Skip to content

Commit 221b1e5

Browse files
committed
feat: enhance /ingest endpoint to support file paths and file uploads
1 parent ba78be0 commit 221b1e5

File tree

7 files changed

+70
-10
lines changed

7 files changed

+70
-10
lines changed

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,8 @@ raglight serve --port 8080
208208
|---|---|---|---|
209209
| `GET` | `/health` || `{"status": "ok"}` |
210210
| `POST` | `/generate` | `{"question": "..."}` | `{"answer": "..."}` |
211-
| `POST` | `/ingest` | `{"data_path": "...", "github_url": "...", "github_branch": "main"}` | `{"message": "..."}` |
211+
| `POST` | `/ingest` | `{"data_path": "...", "file_paths": [...], "github_url": "...", "github_branch": "main"}` | `{"message": "..."}` |
212+
| `POST` | `/ingest/upload` | `multipart/form-data` — champ `files` (un ou plusieurs fichiers) | `{"message": "..."}` |
212213
| `GET` | `/collections` || `{"collections": [...]}` |
213214

214215
The interactive API documentation (Swagger UI) is automatically available at `http://localhost:8000/docs`.
@@ -234,6 +235,11 @@ curl -X POST http://localhost:8000/ingest \
234235
-H "Content-Type: application/json" \
235236
-d '{"github_url": "https://github.com/Bessouat40/RAGLight", "github_branch": "main"}'
236237

238+
# Upload files directly (multipart)
239+
curl -X POST http://localhost:8000/ingest/upload \
240+
-F "files=@./rapport.pdf" \
241+
-F "files=@./notes.txt"
242+
237243
# List collections
238244
curl http://localhost:8000/collections
239245
```

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ dependencies = [
3838
"nltk>=3.9.2",
3939
"fastapi>=0.115.0",
4040
"uvicorn[standard]>=0.34.0",
41+
"python-multipart>=0.0.9",
4142
]
4243
[project.scripts]
4344
raglight = "raglight.cli.main:app"

src/raglight/config/settings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def setup_logging() -> None:
3434
MISTRAL_API_KEY = os.environ.get("MISTRAL_API_KEY", "")
3535
LMSTUDIO = "LmStudio"
3636
HUGGINGFACE = "HuggingFace"
37-
DEFAULT_LLM = "llama3"
37+
DEFAULT_LLM = "llama3.2:1b"
3838
DEFAULT_OPENAI_CLIENT = os.environ.get(
3939
"OPENAI_CLIENT_URL", "https://api.openai.com/v1"
4040
)

src/raglight/rag/builder.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def with_embeddings(self, type: str, **kwargs) -> Builder:
6060
"""
6161
logging.info("⏳ Creating an Embeddings Model...")
6262
if type == Settings.HUGGINGFACE:
63+
kwargs.pop("api_base", None)
6364
self.embeddings = HuggingfaceEmbeddingsModel(**kwargs)
6465
elif type == Settings.OLLAMA:
6566
self.embeddings = OllamaEmbeddingsModel(**kwargs)

tests/tests_api/test_router.py

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ def test_ingest_local(client, pipeline):
6969
def test_ingest_missing_params(client):
7070
response = client.post("/ingest", json={})
7171
assert response.status_code == 400
72-
assert "data_path" in response.json()["detail"] or "github_url" in response.json()["detail"]
72+
detail = response.json()["detail"]
73+
assert any(k in detail for k in ("data_path", "file_paths", "github_url"))
7374

7475

7576
def test_ingest_github(client, pipeline):
@@ -94,6 +95,56 @@ def test_ingest_github(client, pipeline):
9495

9596
# ── /collections ──────────────────────────────────────────────────────────────
9697

98+
def test_ingest_file_paths(client, pipeline):
99+
vector_store = pipeline.get_vector_store.return_value
100+
vector_store.add_documents.return_value = None
101+
vector_store.add_class_documents.return_value = None
102+
vector_store._flatten_metadata.side_effect = lambda docs: docs
103+
vector_store._process_file = lambda fp, factory, flatten: (["chunk"], [])
104+
105+
with patch("os.path.isfile", return_value=True):
106+
response = client.post(
107+
"/ingest",
108+
json={"file_paths": ["/some/file.pdf", "/some/other.txt"]},
109+
)
110+
111+
assert response.status_code == 200
112+
assert "success" in response.json()["message"].lower()
113+
114+
115+
def test_ingest_upload(client, pipeline):
116+
vector_store = pipeline.get_vector_store.return_value
117+
vector_store.add_documents.return_value = None
118+
vector_store.add_class_documents.return_value = None
119+
vector_store._flatten_metadata.side_effect = lambda docs: docs
120+
vector_store._process_file = lambda fp, factory, flatten: (["chunk"], [])
121+
122+
response = client.post(
123+
"/ingest/upload",
124+
files=[
125+
("files", ("doc1.txt", b"hello world", "text/plain")),
126+
("files", ("doc2.txt", b"another doc", "text/plain")),
127+
],
128+
)
129+
assert response.status_code == 200
130+
assert "2 file(s)" in response.json()["message"]
131+
132+
133+
def test_ingest_upload_no_files(client):
134+
response = client.post("/ingest/upload", files=[])
135+
assert response.status_code in (400, 422)
136+
137+
138+
def test_ingest_file_paths_missing_file(client, pipeline):
139+
with patch("os.path.isfile", return_value=False):
140+
response = client.post(
141+
"/ingest",
142+
json={"file_paths": ["/nonexistent/file.pdf"]},
143+
)
144+
assert response.status_code == 500
145+
assert "not found" in response.json()["detail"].lower()
146+
147+
97148
def test_collections(client, pipeline):
98149
vector_store = pipeline.get_vector_store.return_value
99150
vector_store.get_available_collections.return_value = ["default", "project_x"]

tests/tests_api/test_server_config.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,10 @@
1010

1111

1212
def test_defaults():
13-
with patch.dict(os.environ, {}, clear=False):
14-
# Remove RAGLIGHT_* vars if present
15-
env_overrides = {k: "" for k in os.environ if k.startswith("RAGLIGHT_")}
16-
for k in env_overrides:
17-
os.environ.pop(k, None)
13+
clean_env = {k: v for k, v in os.environ.items() if not k.startswith("RAGLIGHT_")}
14+
with patch.dict(os.environ, clean_env, clear=True):
1815
cfg = ServerConfig()
19-
assert cfg.llm_model == "llama3"
16+
assert cfg.llm_model == Settings.DEFAULT_LLM
2017
assert cfg.llm_provider == Settings.OLLAMA
2118
assert cfg.embeddings_model == Settings.DEFAULT_EMBEDDINGS_MODEL
2219
assert cfg.embeddings_provider == Settings.HUGGINGFACE

uv.lock

Lines changed: 5 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)