fAIth/.env_template at main · MushroomGecko/fAIth · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# ---------- WEBAPP CONFIG ---------- #
WEBAPP_PORT = 8000
UVICORN_WORKERS = 1


# ---------- DJANGO CONFIG ---------- #
DJANGO_DEBUG = False # Set to True for development, False for production
DJANGO_SECRET_KEY = "django-insecure-d)+b7f#u@$@q)(ft*qcz1!%^uvy(_ext-^t4d6i$3l$)21__s(" # Change this to a strong secret key!
DJANGO_ALLOWED_HOSTS = ["127.0.0.1", "localhost"] # List of allowed hosts


# ---------- BIBLE CONFIG ---------- #
ENABLED_VERSIONS = [] # Leave empty to enable all versions. See `frontend/bible_data/` for available versions.
DEFAULT_VERSION = "bsb" # Must be in ENABLED_VERSIONS
DEFAULT_BOOK = "Genesis" # Must be in the books of the default version
DEFAULT_CHAPTER = 1 # Must be in the chapters of the default book


# ---------- API KEYS ---------- #
HF_TOKEN = ""


# ---------- POSTGRESQL CONFIG ---------- #
POSTGRES_HOST = "localhost"
POSTGRES_PORT = 5432
POSTGRES_USER = "faith_user"
POSTGRES_PASSWORD = "postgres-secure-password" # Use a strong password!
POSTGRES_DATABASE = "faith_db"


# ---------- MILVUS ---------- #
MILVUS_URL = "http://localhost"
MILVUS_PORT = 19530
MILVUS_DATABASE_NAME = "fAIth"
MILVUS_USERNAME = "root"
MILVUS_PASSWORD = "Milvus"
MILVUS_SEARCH_LIMIT = 10

# Use only vector embeddings with your embedding model
# DATABASE_TYPE = "dense"
# Use only lexical search with BM25
# DATABASE_TYPE = "sparse"
# Use both vector embeddings and lexical search
DATABASE_TYPE = "hybrid"

# How much influence the vector embeddings and lexical search has on the overall results
# Only available when using "hybrid" search
DENSE_WEIGHT = 0.8
SPARSE_WEIGHT = 0.2


# ---------- EMBEDDING MODEL ---------- #
# Endpoints
BASE_EMBEDDING_URL = ""
EMBEDDING_API_KEY = "" # Should be set to the API key you want to use for the embedding model. If using local models, can leave empty. If using remote models, should be set to the API key for the service you are using.

# Localhost options
EMBEDDING_PORT = "11435" # NOTE: IF THIS IS SET, IT WILL OVERRIDE THE BASE_EMBEDDING_URL TO USE THE LOCALHOST ENDPOINT. MAKE THIS EMPTY IF USING A REMOTE EMBEDDING SERVICE.

# Embedding Model
EMBEDDING_MODEL_ID = "Qwen/Qwen3-Embedding-0.6B-GGUF:Q8_0" # The model ID you want to use for the embedding model
EMBEDDING_MODEL_DOCUMENT_PROMPT = "{text}"
EMBEDDING_MODEL_QUERY_PROMPT = "Instruct: Given a Bible-related query, retrieve relevant passages that answer the query.\nQuery: {text}"
EMBEDDING_MAX_CONTEXT_LENGTH = 4096 # The maximum context length you want to allow for the embedding model

# Embedding Model Runners
# EMBEDDING_MODEL_RUNNER = "vllm"
EMBEDDING_MODEL_RUNNER = "llama_cpp"
# EMBEDDING_MODEL_RUNNER = "ollama"
# EMBEDDING_MODEL_RUNNER = "docker_model_runner"
# EMBEDDING_MODEL_RUNNER = "sglang"

# GPU Type
EMBEDDING_GPU_TYPE="cpu"
# EMBEDDING_GPU_TYPE="nvidia"
# EMBEDDING_GPU_TYPE="amd"
# EMBEDDING_GPU_TYPE="intel"

# GPU Driver
EMBEDDING_DRIVER ="cpu"
# EMBEDDING_DRIVER ="cuda"
# EMBEDDING_DRIVER ="rocm"
# EMBEDDING_DRIVER ="vulkan"

# Llama CPP Python specific things
EMBEDDING_LLAMA_CPP_GPU_LAYERS = -1 # How many layers you want allocated to the GPU. 0 for none (all to CPU), -1 for all possible layers on GPU
EMBEDDING_LLAMA_CPP_CONCURRENCY = 1 # How many concurrent requests you want to allow to the embedding model

# vLLM specific things
EMBEDDING_VLLM_ENFORCE_EAGER = False


# ---------- LLM MODEL ---------- #
# Endpoints
BASE_LLM_URL = ""
LLM_API_KEY = "" # Should be set to the API key you want to use for the LLM model. If using local models, can leave empty. If using remote models, should be set to the API key for the service you are using.

# Localhost options
LLM_PORT = "11436" # NOTE: IF THIS IS SET, IT WILL OVERRIDE THE BASE_LLM_URL TO USE THE LOCALHOST ENDPOINT. MAKE THIS EMPTY IF USING A REMOTE LLM SERVICE.

# LLM Model
LLM_MODEL_ID = "unsloth/Qwen3.5-4B-GGUF:Q4_K_M" # The model ID you want to use for the LLM model
LLM_MODEL_ARGUMENTS = '{"chat_template_kwargs": {"enable_thinking": false}}' # The arguments you want to pass to the LLM model
LLM_MAX_CONTEXT_LENGTH = 4096 # The maximum context length you want to allow for the LLM model

# LLM Model Runners
# LLM_MODEL_RUNNER = "vllm"
LLM_MODEL_RUNNER = "llama_cpp"
# LLM_MODEL_RUNNER = "ollama"
# LLM_MODEL_RUNNER = "docker_model_runner"
# LLM_MODEL_RUNNER = "sglang"

# GPU Type
LLM_GPU_TYPE="cpu"
# LLM_GPU_TYPE="nvidia"
# LLM_GPU_TYPE="amd"
# LLM_GPU_TYPE="intel"

# GPU Driver
LLM_DRIVER="cpu"
# LLM_DRIVER="cuda"
# LLM_DRIVER="rocm"
# LLM_DRIVER="vulkan"

# Llama CPP Python specific things
LLM_LLAMA_CPP_GPU_LAYERS = -1 # How many layers you want allocated to the GPU. 0 for none (all to CPU), -1 for all possible layers on GPU
LLM_LLAMA_CPP_CONCURRENCY = 1 # How many concurrent requests you want to allow to the LLM model

# vLLM specific things
LLM_VLLM_ENFORCE_EAGER = False