Skip to content

Commit a0d9147

Browse files
committed
Initial working prototype
0 parents  commit a0d9147

File tree

16 files changed

+2481
-0
lines changed

16 files changed

+2481
-0
lines changed

.gitignore

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# Python
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
*.so
6+
.Python
7+
build/
8+
develop-eggs/
9+
dist/
10+
downloads/
11+
eggs/
12+
.eggs/
13+
lib/
14+
lib64/
15+
parts/
16+
sdist/
17+
var/
18+
wheels/
19+
pip-wheel-metadata/
20+
share/python-wheels/
21+
*.egg-info/
22+
.installed.cfg
23+
*.egg
24+
MANIFEST
25+
26+
# Virtual environments
27+
venv/
28+
env/
29+
ENV/
30+
.venv/
31+
env.bak/
32+
venv.bak/
33+
34+
# Environment variables
35+
.env
36+
.env.local
37+
.env.*.local
38+
39+
# IDE
40+
.vscode/
41+
.idea/
42+
*.swp
43+
*.swo
44+
*~
45+
.DS_Store
46+
47+
# Jupyter Notebook
48+
.ipynb_checkpoints
49+
50+
# pyenv
51+
.python-version
52+
53+
# FAISS data files
54+
data/
55+
*.faiss
56+
*.meta
57+
*.json
58+
!pyproject.toml
59+
60+
# Streamlit
61+
.streamlit/
62+
63+
# Docker
64+
*.log
65+
66+
# OS
67+
.DS_Store
68+
.DS_Store?
69+
._*
70+
.Spotlight-V100
71+
.Trashes
72+
ehthumbs.db
73+
Thumbs.db
74+
75+
# Project specific
76+
*.pdf
77+
uploads/
78+

Dockerfile

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
FROM python:3.11-slim AS base
2+
3+
# Install system deps
4+
RUN apt-get update && apt-get install -y --no-install-recommends \
5+
build-essential curl \
6+
&& rm -rf /var/lib/apt/lists/*
7+
8+
# Install uv
9+
RUN pip install --no-cache-dir uv
10+
11+
WORKDIR /app
12+
COPY pyproject.toml ./
13+
14+
# Sync deps
15+
RUN uv sync --frozen --no-dev
16+
17+
# App source
18+
COPY app ./app
19+
COPY README.md ./README.md
20+
21+
ENV PYTHONUNBUFFERED=1 \
22+
PIP_NO_CACHE_DIR=1
23+
24+
EXPOSE 8080
25+
26+
CMD ["uv", "run", "streamlit", "run", "app/main.py", "--server.port", "8080", "--server.address", "0.0.0.0"]
27+
28+

README.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# IBM Cloud RAG MVP
2+
3+
Streamlit single-page app with RAG pipeline using watsonx.ai (Granite-13B for answers, `ibm/granite-embedding-30m-english` for embeddings), FAISS vector search, and IBM Cloud Object Storage.
4+
5+
## Prerequisites
6+
- Python 3.11+
7+
- IBM Cloud account with watsonx.ai, COS, watsonx.data (Milvus), Code Engine
8+
- `uv` package manager
9+
10+
## Environment
11+
Set required variables via `.env` or environment. See `app/config.py` and the project plan for details.
12+
13+
## Run locally
14+
```bash
15+
uv run streamlit run app/main.py --server.port 8080 --server.address 0.0.0.0
16+
```
17+
18+
## Container
19+
```bash
20+
docker build -t ibm-rag-mvp:latest .
21+
```
22+
23+
## Deploy (IBM Cloud Code Engine)
24+
- Push image to IBM Cloud Container Registry (`us.icr.io/<namespace>/ibm-rag-mvp:latest`).
25+
- Create a Code Engine project and app, set env vars and bind secrets.
26+
- Expose port 8080.
27+
28+
## Notes
29+
- FAISS index and metadata are persisted to local files (see `FAISS_INDEX_PATH`, `FAISS_META_PATH`). For persistence across deploys, back them up to COS.
30+
- COS can use IAM (no HMAC). Presigned URLs require HMAC; IAM mode uses internal access.
31+
- Keep services in the same region for best latency.
32+
33+

app/config.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import os
2+
from dataclasses import dataclass
3+
4+
5+
@dataclass
6+
class Settings:
7+
ibm_cloud_api_key: str
8+
watsonx_region: str
9+
watsonx_project_id: str
10+
watsonx_embed_model: str
11+
watsonx_gen_model: str
12+
13+
cos_endpoint: str
14+
cos_bucket: str
15+
cos_instance_crn: str
16+
cos_api_key: str | None
17+
cos_auth_endpoint: str
18+
cos_hmac_access_key_id: str
19+
cos_hmac_secret_access_key: str
20+
21+
milvus_host: str
22+
milvus_port: int
23+
milvus_db: str | None
24+
milvus_tls: bool
25+
26+
faiss_index_path: str
27+
faiss_meta_path: str
28+
29+
chunk_size: int
30+
chunk_overlap: int
31+
top_k: int
32+
temperature: float
33+
embedding_dim: int
34+
35+
@staticmethod
36+
def _get_bool(value: str | None, default: bool = False) -> bool:
37+
if value is None:
38+
return default
39+
return value.lower() in {"1", "true", "t", "yes", "y"}
40+
41+
@classmethod
42+
def from_env(cls) -> "Settings":
43+
return cls(
44+
ibm_cloud_api_key=os.getenv("IBM_CLOUD_API_KEY", ""),
45+
watsonx_region=os.getenv("WATSONX_REGION", "us-south"),
46+
watsonx_project_id=os.getenv("WATSONX_PROJECT_ID", ""),
47+
watsonx_embed_model=os.getenv("WATSONX_EMBED_MODEL", "ibm/granite-embedding-30m-english"),
48+
watsonx_gen_model=os.getenv("WATSONX_GEN_MODEL", "ibm/granite-13b-instruct-v2"),
49+
cos_endpoint=os.getenv("COS_ENDPOINT", ""),
50+
cos_bucket=os.getenv("COS_BUCKET", ""),
51+
cos_instance_crn=os.getenv("COS_INSTANCE_CRN", ""),
52+
cos_api_key=os.getenv("COS_API_KEY") or os.getenv("IBM_CLOUD_API_KEY"),
53+
cos_auth_endpoint=os.getenv("COS_AUTH_ENDPOINT", "https://iam.cloud.ibm.com/identity/token"),
54+
cos_hmac_access_key_id=os.getenv("COS_HMAC_ACCESS_KEY_ID", ""),
55+
cos_hmac_secret_access_key=os.getenv("COS_HMAC_SECRET_ACCESS_KEY", ""),
56+
milvus_host=os.getenv("MILVUS_HOST", "localhost"),
57+
milvus_port=int(os.getenv("MILVUS_PORT", "19530")),
58+
milvus_db=os.getenv("MILVUS_DB"),
59+
milvus_tls=cls._get_bool(os.getenv("MILVUS_TLS"), False),
60+
faiss_index_path=os.getenv("FAISS_INDEX_PATH", "data/index.faiss"),
61+
faiss_meta_path=os.getenv("FAISS_META_PATH", "data/meta.json"),
62+
chunk_size=int(os.getenv("CHUNK_SIZE", "1200")),
63+
chunk_overlap=int(os.getenv("CHUNK_OVERLAP", "150")),
64+
top_k=int(os.getenv("TOP_K", "6")),
65+
temperature=float(os.getenv("TEMPERATURE", "0.2")),
66+
embedding_dim=int(os.getenv("EMBEDDING_DIM", "1024")),
67+
)
68+
69+

app/main.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import uuid
2+
from dotenv import load_dotenv
3+
import streamlit as st
4+
5+
from app.config import Settings
6+
from app.rag.pipeline import IngestionPipeline, QueryPipeline
7+
8+
9+
def init_state():
10+
if "messages" not in st.session_state:
11+
st.session_state["messages"] = []
12+
if "ingested_docs" not in st.session_state:
13+
st.session_state["ingested_docs"] = []
14+
15+
16+
def sidebar_uploader(ingestion: IngestionPipeline):
17+
st.sidebar.header("Upload PDFs")
18+
uploaded_files = st.sidebar.file_uploader(
19+
"Choose PDF files", type=["pdf"], accept_multiple_files=True
20+
)
21+
if uploaded_files and st.sidebar.button("Ingest Selected PDFs"):
22+
for f in uploaded_files:
23+
doc_id = str(uuid.uuid4())
24+
with st.spinner(f"Uploading and ingesting {f.name}..."):
25+
source_uri = ingestion.upload_to_cos(doc_id, f.name, f)
26+
count = ingestion.ingest_pdf(doc_id, f.name, source_uri)
27+
st.session_state["ingested_docs"].append((doc_id, f.name, source_uri, count))
28+
st.sidebar.success("Ingestion complete.")
29+
30+
if st.session_state["ingested_docs"]:
31+
st.sidebar.subheader("Ingested Documents")
32+
for doc_id, name, uri, count in st.session_state["ingested_docs"]:
33+
st.sidebar.write(f"- {name} ({count} chunks)")
34+
35+
36+
def chat_ui(query_pipeline: QueryPipeline):
37+
st.title("IBM Cloud RAG MVP")
38+
st.caption("Granite-13B-Instruct + Milvus + COS + Streamlit")
39+
40+
for role, content in st.session_state["messages"]:
41+
with st.chat_message(role):
42+
st.markdown(content)
43+
44+
user_input = st.chat_input("Ask a question about your PDFs…")
45+
if user_input:
46+
st.session_state["messages"].append(("user", user_input))
47+
with st.chat_message("user"):
48+
st.markdown(user_input)
49+
50+
with st.chat_message("assistant"):
51+
with st.spinner("Thinking…"):
52+
answer, sources = query_pipeline.answer(user_input)
53+
citations = "\n\n" + "\n".join([f"- {s}" for s in sources]) if sources else ""
54+
st.markdown(answer + citations)
55+
st.session_state["messages"].append(("assistant", answer + citations))
56+
57+
58+
def main():
59+
# Load local .env for development
60+
load_dotenv()
61+
settings = Settings.from_env()
62+
init_state()
63+
64+
ingestion = IngestionPipeline(settings)
65+
query_pipeline = QueryPipeline(settings)
66+
67+
sidebar_uploader(ingestion)
68+
chat_ui(query_pipeline)
69+
70+
71+
if __name__ == "__main__":
72+
main()
73+
74+

app/models.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from pydantic import BaseModel
2+
from typing import List, Optional
3+
4+
5+
class ChunkRecord(BaseModel):
6+
id: str
7+
doc_id: str
8+
page_num: int
9+
chunk_index: int
10+
text: str
11+
embedding: List[float]
12+
source_uri: str
13+
14+
15+
class QueryResult(BaseModel):
16+
answer: str
17+
sources: List[str]
18+
matched_chunks: Optional[List[ChunkRecord]] = None
19+
20+

app/rag/chunker.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from typing import List
2+
3+
from langchain_text_splitters import RecursiveCharacterTextSplitter
4+
5+
6+
def chunk_pages(pages: List[str], chunk_size: int, chunk_overlap: int) -> List[str]:
7+
splitter = RecursiveCharacterTextSplitter(
8+
chunk_size=chunk_size,
9+
chunk_overlap=chunk_overlap,
10+
length_function=len,
11+
separators=["\n\n", "\n", " ", ""],
12+
)
13+
full_text = "\n\n".join(pages)
14+
return splitter.split_text(full_text)
15+
16+

app/rag/cos_client.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import io
2+
import time
3+
from typing import BinaryIO
4+
5+
import ibm_boto3
6+
from ibm_botocore.client import Config
7+
8+
from app.config import Settings
9+
10+
11+
class COSClient:
12+
def __init__(self, settings: Settings):
13+
self.settings = settings
14+
if not settings.cos_endpoint or not settings.cos_bucket or not settings.cos_instance_crn:
15+
raise ValueError(
16+
"Missing COS configuration. Please set COS_ENDPOINT, COS_BUCKET, and COS_INSTANCE_CRN."
17+
)
18+
# Prefer HMAC if keys are present; otherwise use IAM
19+
if settings.cos_hmac_access_key_id and settings.cos_hmac_secret_access_key:
20+
self.mode = "hmac"
21+
self.client = ibm_boto3.client(
22+
"s3",
23+
aws_access_key_id=settings.cos_hmac_access_key_id,
24+
aws_secret_access_key=settings.cos_hmac_secret_access_key,
25+
config=Config(signature_version="s3v4"),
26+
endpoint_url=settings.cos_endpoint,
27+
)
28+
else:
29+
self.mode = "iam"
30+
self.client = ibm_boto3.client(
31+
"s3",
32+
ibm_api_key_id=settings.cos_api_key,
33+
ibm_service_instance_id=settings.cos_instance_crn,
34+
ibm_auth_endpoint=settings.cos_auth_endpoint,
35+
config=Config(signature_version="oauth"),
36+
endpoint_url=settings.cos_endpoint,
37+
)
38+
39+
def upload_fileobj(self, key: str, fileobj: BinaryIO, content_type: str = "application/pdf") -> str:
40+
self.client.upload_fileobj(
41+
Fileobj=fileobj,
42+
Bucket=self.settings.cos_bucket,
43+
Key=key,
44+
ExtraArgs={"ContentType": content_type},
45+
)
46+
return f"s3://{self.settings.cos_bucket}/{key}"
47+
48+
def generate_presigned_url(self, key: str, expires_in: int = 3600) -> str:
49+
if self.mode != "hmac":
50+
raise RuntimeError("Presigned URLs require HMAC credentials; IAM mode does not support presign.")
51+
return self.client.generate_presigned_url(
52+
ClientMethod="get_object",
53+
Params={"Bucket": self.settings.cos_bucket, "Key": key},
54+
ExpiresIn=expires_in,
55+
)
56+
57+

0 commit comments

Comments
 (0)