-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
84 lines (81 loc) · 2.63 KB
/
docker-compose.yml
File metadata and controls
84 lines (81 loc) · 2.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
services:
# 1. FastAPI 后端服务
api:
build:
context: .
dockerfile: Dockerfile
container_name: coderag-api
ports:
- "8000:8000"
environment:
# LLM 提供方 & 模型,默认走 OpenAI,具体用哪家由 .env 控制
- RAG_LLM_PROVIDER=${RAG_LLM_PROVIDER:-openai}
- RAG_LLM_MODEL=${RAG_LLM_MODEL:-gpt-4o-mini}
# 本地 llama 时用到的 base URL(Windows + GPU 场景)
- LOCAL_LLM_BASE=${LOCAL_LLM_BASE:-http://llama:8080/v1}
# OpenAI key(在 .env 里填)
- OPENAI_API_KEY=${OPENAI_API_KEY:-dummy}
# Redis 连接(缓存 / RQ 都用得到)
- REDIS_URL=redis://redis:6379/0
- HF_ENDPOINT=https://hf-mirror.com
volumes:
# 挂载本地 data 目录,保证你的向量库持久化
- ./data:/app/data
# 挂载 logs 目录,方便在宿主机看日志
- ./logs:/app/logs
# 挂载 chroma 缓存目录,避免每次重启都重新下载 ONNX 模型
- ./cache/chroma:/root/.cache/chroma
depends_on:
- redis
# llama 是可选服务,不强依赖
# - llama
# 2. 本地 LLM 推理服务 (llama.cpp) —— 仅在 Windows + GPU 场景使用
# llama:
# image: ghcr.io/ggml-org/llama.cpp:server-cuda
# container_name: coderag-llama
# ports:
# - "8081:8080" # 宿主机 8081 -> 容器 8080
# environment:
# - LLAMA_ARG_MODEL=/models/qwen2.5-coder-7b-instruct-q6_k.gguf
# - LLAMA_ARG_CTX_SIZE=8192
# - LLAMA_ARG_N_GPU_LAYERS=35
# volumes:
# - ./models:/models
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: all
# capabilities: [gpu]
# 3. Redis(缓存 & RQ)
redis:
image: redis:7
container_name: coderag-redis
ports:
- "6379:6379"
# 4. RQ Worker
worker:
build:
context: .
dockerfile: Dockerfile
container_name: coderag-worker
# 开发阶段挂代码目录,worker 能 import 最新代码
volumes:
- .:/app
- ./data:/app/data
- ./logs:/app/logs
# 和 api 一样挂 chroma 缓存,保证 ONNX 模型只下一次
- ./cache/chroma:/root/.cache/chroma
environment:
- RAG_LLM_PROVIDER=${RAG_LLM_PROVIDER:-openai}
- RAG_LLM_MODEL=${RAG_LLM_MODEL:-gpt-4o-mini}
- LOCAL_LLM_BASE=${LOCAL_LLM_BASE:-http://llama:8080/v1}
- OPENAI_API_KEY=${OPENAI_API_KEY:-dummy}
- REDIS_URL=redis://redis:6379/0
- HF_ENDPOINT=https://hf-mirror.com
depends_on:
- redis
# 同样不强依赖 llama
# - llama
command: ["rq", "worker", "coderag"]