Skip to content

Commit fe444f3

Browse files
authored
[Feat] RAG API - Add support for using s3 Vectors as Vector Store Provider for /rag/ingest (BerriAI#19888)
* init S3VectorsRAGIngestion as a supported ingestion provider for RAG API * test: TestRAGS3Vectors * init S3VectorsVectorStoreOptions * init s3 vectors * code clean up + QA * fix: get_credentials * S3VectorsRAGIngestion * TestRAGS3Vectors * docs: AWS S3 Vectors * add asyncio QA checks * fix: S3_VECTORS_DEFAULT_DIMENSION
1 parent 7d5439a commit fe444f3

File tree

7 files changed

+826
-2
lines changed

7 files changed

+826
-2
lines changed

docs/my-website/docs/rag_ingest.md

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ All-in-one document ingestion pipeline: **Upload → Chunk → Embed → Vector
55
| Feature | Supported |
66
|---------|-----------|
77
| Logging | Yes |
8-
| Supported Providers | `openai`, `bedrock`, `vertex_ai`, `gemini` |
8+
| Supported Providers | `openai`, `bedrock`, `vertex_ai`, `gemini`, `s3_vectors` |
99

1010
:::tip
1111
After ingesting documents, use [/rag/query](./rag_query.md) to search and generate responses with your ingested content.
@@ -75,6 +75,31 @@ curl -X POST "http://localhost:4000/v1/rag/ingest" \
7575
}"
7676
```
7777

78+
### AWS S3 Vectors
79+
80+
```bash showLineNumbers title="Ingest to S3 Vectors"
81+
curl -X POST "http://localhost:4000/v1/rag/ingest" \
82+
-H "Authorization: Bearer sk-1234" \
83+
-H "Content-Type: application/json" \
84+
-d "{
85+
\"file\": {
86+
\"filename\": \"document.txt\",
87+
\"content\": \"$(base64 -i document.txt)\",
88+
\"content_type\": \"text/plain\"
89+
},
90+
\"ingest_options\": {
91+
\"embedding\": {
92+
\"model\": \"text-embedding-3-small\"
93+
},
94+
\"vector_store\": {
95+
\"custom_llm_provider\": \"s3_vectors\",
96+
\"vector_bucket_name\": \"my-embeddings\",
97+
\"aws_region_name\": \"us-west-2\"
98+
}
99+
}
100+
}"
101+
```
102+
78103
## Response
79104

80105
```json
@@ -265,6 +290,57 @@ When `vector_store_id` is omitted, LiteLLM automatically creates:
265290
4. Install: `pip install 'google-cloud-aiplatform>=1.60.0'`
266291
:::
267292

293+
### vector_store (AWS S3 Vectors)
294+
295+
| Parameter | Type | Default | Description |
296+
|-----------|------|---------|-------------|
297+
| `custom_llm_provider` | string | - | `"s3_vectors"` |
298+
| `vector_bucket_name` | string | **required** | S3 vector bucket name |
299+
| `index_name` | string | auto-create | Vector index name |
300+
| `dimension` | integer | auto-detect | Vector dimension (auto-detected from embedding model) |
301+
| `distance_metric` | string | `cosine` | Distance metric: `cosine` or `euclidean` |
302+
| `non_filterable_metadata_keys` | array | `["source_text"]` | Metadata keys excluded from filtering |
303+
| `aws_region_name` | string | `us-west-2` | AWS region |
304+
| `aws_access_key_id` | string | env | AWS access key |
305+
| `aws_secret_access_key` | string | env | AWS secret key |
306+
307+
:::info S3 Vectors Auto-Creation
308+
When `index_name` is omitted, LiteLLM automatically creates:
309+
- S3 vector bucket (if it doesn't exist)
310+
- Vector index with auto-detected dimensions from your embedding model
311+
312+
**Dimension Auto-Detection**: The vector dimension is automatically detected by making a test embedding request to your specified model. No need to manually specify dimensions!
313+
314+
**Supported Embedding Models**: Works with any LiteLLM-supported embedding model (OpenAI, Cohere, Bedrock, Azure, etc.)
315+
:::
316+
317+
**Example with auto-detection:**
318+
```json
319+
{
320+
"embedding": {
321+
"model": "text-embedding-3-small" // Dimension auto-detected as 1536
322+
},
323+
"vector_store": {
324+
"custom_llm_provider": "s3_vectors",
325+
"vector_bucket_name": "my-embeddings"
326+
}
327+
}
328+
```
329+
330+
**Example with custom embedding provider:**
331+
```json
332+
{
333+
"embedding": {
334+
"model": "cohere/embed-english-v3.0" // Dimension auto-detected as 1024
335+
},
336+
"vector_store": {
337+
"custom_llm_provider": "s3_vectors",
338+
"vector_bucket_name": "my-embeddings",
339+
"distance_metric": "cosine"
340+
}
341+
}
342+
```
343+
268344
## Input Examples
269345

270346
### File (Base64)

litellm/constants.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1327,6 +1327,13 @@
13271327
DEFAULT_CHUNK_SIZE = int(os.getenv("DEFAULT_CHUNK_SIZE", 1000))
13281328
DEFAULT_CHUNK_OVERLAP = int(os.getenv("DEFAULT_CHUNK_OVERLAP", 200))
13291329

1330+
########################### S3 Vectors RAG Constants ###########################
1331+
S3_VECTORS_DEFAULT_DIMENSION = int(os.getenv("S3_VECTORS_DEFAULT_DIMENSION", 1024))
1332+
S3_VECTORS_DEFAULT_DISTANCE_METRIC = str(
1333+
os.getenv("S3_VECTORS_DEFAULT_DISTANCE_METRIC", "cosine")
1334+
)
1335+
S3_VECTORS_DEFAULT_NON_FILTERABLE_METADATA_KEYS = ["source_text"]
1336+
13301337
########################### Microsoft SSO Constants ###########################
13311338
MICROSOFT_USER_EMAIL_ATTRIBUTE = str(
13321339
os.getenv("MICROSOFT_USER_EMAIL_ATTRIBUTE", "userPrincipalName")

litellm/model_prices_and_context_window_backup.json

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10232,6 +10232,48 @@
1023210232
"mode": "completion",
1023310233
"output_cost_per_token": 5e-07
1023410234
},
10235+
"deepseek-v3-2-251201": {
10236+
"input_cost_per_token": 0.0,
10237+
"litellm_provider": "volcengine",
10238+
"max_input_tokens": 98304,
10239+
"max_output_tokens": 32768,
10240+
"max_tokens": 32768,
10241+
"mode": "chat",
10242+
"output_cost_per_token": 0.0,
10243+
"supports_assistant_prefill": true,
10244+
"supports_function_calling": true,
10245+
"supports_prompt_caching": true,
10246+
"supports_reasoning": true,
10247+
"supports_tool_choice": true
10248+
},
10249+
"glm-4-7-251222": {
10250+
"input_cost_per_token": 0.0,
10251+
"litellm_provider": "volcengine",
10252+
"max_input_tokens": 204800,
10253+
"max_output_tokens": 131072,
10254+
"max_tokens": 131072,
10255+
"mode": "chat",
10256+
"output_cost_per_token": 0.0,
10257+
"supports_assistant_prefill": true,
10258+
"supports_function_calling": true,
10259+
"supports_prompt_caching": true,
10260+
"supports_reasoning": true,
10261+
"supports_tool_choice": true
10262+
},
10263+
"kimi-k2-thinking-251104": {
10264+
"input_cost_per_token": 0.0,
10265+
"litellm_provider": "volcengine",
10266+
"max_input_tokens": 229376,
10267+
"max_output_tokens": 32768,
10268+
"max_tokens": 32768,
10269+
"mode": "chat",
10270+
"output_cost_per_token": 0.0,
10271+
"supports_assistant_prefill": true,
10272+
"supports_function_calling": true,
10273+
"supports_prompt_caching": true,
10274+
"supports_reasoning": true,
10275+
"supports_tool_choice": true
10276+
},
1023510277
"doubao-embedding": {
1023610278
"input_cost_per_token": 0.0,
1023710279
"litellm_provider": "volcengine",

0 commit comments

Comments
 (0)