ryan-crabbe
diff --git a/‎docs/my-website/docs/rag_ingest.md‎
Lines changed: 77 additions & 1 deletion b/‎docs/my-website/docs/rag_ingest.md‎
Lines changed: 77 additions & 1 deletion
diff --git a/‎litellm/constants.py‎
Lines changed: 7 additions & 0 deletions b/‎litellm/constants.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎litellm/model_prices_and_context_window_backup.json‎
Lines changed: 42 additions & 0 deletions b/‎litellm/model_prices_and_context_window_backup.json‎
Lines changed: 42 additions & 0 deletions
@@ -5,7 +5,7 @@ All-in-one document ingestion pipeline: **Upload → Chunk → Embed → Vector
 | Feature | Supported |
 |---------|-----------|
 | Logging | Yes |
-| Supported Providers | `openai`, `bedrock`, `vertex_ai`, `gemini` |
+| Supported Providers | `openai`, `bedrock`, `vertex_ai`, `gemini`, `s3_vectors` |
 
 :::tip
 After ingesting documents, use [/rag/query](./rag_query.md) to search and generate responses with your ingested content.
@@ -75,6 +75,31 @@ curl -X POST "http://localhost:4000/v1/rag/ingest" \
     }"
 ```
 
+### AWS S3 Vectors
+
+```bash showLineNumbers title="Ingest to S3 Vectors"
+curl -X POST "http://localhost:4000/v1/rag/ingest" \
+    -H "Authorization: Bearer sk-1234" \
+    -H "Content-Type: application/json" \
+    -d "{
+        \"file\": {
+            \"filename\": \"document.txt\",
+            \"content\": \"$(base64 -i document.txt)\",
+            \"content_type\": \"text/plain\"
+        },
+        \"ingest_options\": {
+            \"embedding\": {
+                \"model\": \"text-embedding-3-small\"
+            },
+            \"vector_store\": {
+                \"custom_llm_provider\": \"s3_vectors\",
+                \"vector_bucket_name\": \"my-embeddings\",
+                \"aws_region_name\": \"us-west-2\"
+            }
+        }
+    }"
+```
+
 ## Response
 
 ```json
@@ -265,6 +290,57 @@ When `vector_store_id` is omitted, LiteLLM automatically creates:
 4. Install: `pip install 'google-cloud-aiplatform>=1.60.0'`
 :::
 
+### vector_store (AWS S3 Vectors)
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `custom_llm_provider` | string | - | `"s3_vectors"` |
+| `vector_bucket_name` | string | **required** | S3 vector bucket name |
+| `index_name` | string | auto-create | Vector index name |
+| `dimension` | integer | auto-detect | Vector dimension (auto-detected from embedding model) |
+| `distance_metric` | string | `cosine` | Distance metric: `cosine` or `euclidean` |
+| `non_filterable_metadata_keys` | array | `["source_text"]` | Metadata keys excluded from filtering |
+| `aws_region_name` | string | `us-west-2` | AWS region |
+| `aws_access_key_id` | string | env | AWS access key |
+| `aws_secret_access_key` | string | env | AWS secret key |
+
+:::info S3 Vectors Auto-Creation
+When `index_name` is omitted, LiteLLM automatically creates:
+- S3 vector bucket (if it doesn't exist)
+- Vector index with auto-detected dimensions from your embedding model
+
+**Dimension Auto-Detection**: The vector dimension is automatically detected by making a test embedding request to your specified model. No need to manually specify dimensions!
+
+**Supported Embedding Models**: Works with any LiteLLM-supported embedding model (OpenAI, Cohere, Bedrock, Azure, etc.)
+:::
+
+**Example with auto-detection:**
+```json
+{
+  "embedding": {
+    "model": "text-embedding-3-small"  // Dimension auto-detected as 1536
+  },
+  "vector_store": {
+    "custom_llm_provider": "s3_vectors",
+    "vector_bucket_name": "my-embeddings"
+  }
+}
+```
+
+**Example with custom embedding provider:**
+```json
+{
+  "embedding": {
+    "model": "cohere/embed-english-v3.0"  // Dimension auto-detected as 1024
+  },
+  "vector_store": {
+    "custom_llm_provider": "s3_vectors",
+    "vector_bucket_name": "my-embeddings",
+    "distance_metric": "cosine"
+  }
+}
+```
+
 ## Input Examples
 
 ### File (Base64)
 
@@ -1327,6 +1327,13 @@
 DEFAULT_CHUNK_SIZE = int(os.getenv("DEFAULT_CHUNK_SIZE", 1000))
 DEFAULT_CHUNK_OVERLAP = int(os.getenv("DEFAULT_CHUNK_OVERLAP", 200))
 
+########################### S3 Vectors RAG Constants ###########################
+S3_VECTORS_DEFAULT_DIMENSION = int(os.getenv("S3_VECTORS_DEFAULT_DIMENSION", 1024))
+S3_VECTORS_DEFAULT_DISTANCE_METRIC = str(
+    os.getenv("S3_VECTORS_DEFAULT_DISTANCE_METRIC", "cosine")
+)
+S3_VECTORS_DEFAULT_NON_FILTERABLE_METADATA_KEYS = ["source_text"]
+
 ########################### Microsoft SSO Constants ###########################
 MICROSOFT_USER_EMAIL_ATTRIBUTE = str(
     os.getenv("MICROSOFT_USER_EMAIL_ATTRIBUTE", "userPrincipalName")
 
@@ -10232,6 +10232,48 @@
         "mode": "completion",
         "output_cost_per_token": 5e-07
     },
+    "deepseek-v3-2-251201": {
+        "input_cost_per_token": 0.0,
+        "litellm_provider": "volcengine",
+        "max_input_tokens": 98304,
+        "max_output_tokens": 32768,
+        "max_tokens": 32768,
+        "mode": "chat",
+        "output_cost_per_token": 0.0,
+        "supports_assistant_prefill": true,
+        "supports_function_calling": true,
+        "supports_prompt_caching": true,
+        "supports_reasoning": true,
+        "supports_tool_choice": true
+    },
+    "glm-4-7-251222": {
+        "input_cost_per_token": 0.0,
+        "litellm_provider": "volcengine",
+        "max_input_tokens": 204800,
+        "max_output_tokens": 131072,
+        "max_tokens": 131072,
+        "mode": "chat",
+        "output_cost_per_token": 0.0,
+        "supports_assistant_prefill": true,
+        "supports_function_calling": true,
+        "supports_prompt_caching": true,
+        "supports_reasoning": true,
+        "supports_tool_choice": true
+    },
+    "kimi-k2-thinking-251104": {
+        "input_cost_per_token": 0.0,
+        "litellm_provider": "volcengine",
+        "max_input_tokens": 229376,
+        "max_output_tokens": 32768,
+        "max_tokens": 32768,
+        "mode": "chat",
+        "output_cost_per_token": 0.0,
+        "supports_assistant_prefill": true,
+        "supports_function_calling": true,
+        "supports_prompt_caching": true,
+        "supports_reasoning": true,
+        "supports_tool_choice": true
+    },
     "doubao-embedding": {
         "input_cost_per_token": 0.0,
         "litellm_provider": "volcengine",