Skip to content

Commit 4fecbec

Browse files
committed
Fix rag notebook
1 parent 58e2cf2 commit 4fecbec

File tree

3 files changed

+56
-41
lines changed

3 files changed

+56
-41
lines changed

docs/module_04_llm_apps/01_retrieval_augmented_llm_app.ipynb

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,35 @@
8686
"# install dependencies\n",
8787
"# !pip install -q chromadb\n",
8888
"# !pip install retry\n",
89+
"# !pip install ollama\n",
90+
"# !pip install openai\n",
8991
"# !pip install -q streamlit \n",
92+
"# !pip install -U huggingface_hub\n",
9093
"# !npm install localtunnel # this is needed if you are working from colab"
9194
]
9295
},
96+
{
97+
"cell_type": "markdown",
98+
"metadata": {},
99+
"source": [
100+
"### Start Chroma DB in Server mode\n",
101+
"```bash\n",
102+
">chroma run --path ./chromadb \n",
103+
"```\n",
104+
"\n",
105+
"### Install NodeJS\n",
106+
"```bash\n",
107+
">curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh | bash\n",
108+
"\n",
109+
">export NVM_DIR=\"$([ -z \"${XDG_CONFIG_HOME-}\" ] && printf %s \"${HOME}/.nvm\" || printf %s \"${XDG_CONFIG_HOME}/nvm\")\"\n",
110+
"[ -s \"$NVM_DIR/nvm.sh\" ] && \\. \"$NVM_DIR/nvm.sh\" # This loads nvm\n",
111+
"\n",
112+
">source ~/.bashrc\n",
113+
">nvm install node\n",
114+
">npm install localtunnel\n",
115+
"```"
116+
]
117+
},
93118
{
94119
"cell_type": "code",
95120
"execution_count": 1,
@@ -105,7 +130,7 @@
105130
"name": "stdout",
106131
"output_type": "stream",
107132
"text": [
108-
"Writing app.py\n"
133+
"Overwriting app.py\n"
109134
]
110135
}
111136
],
@@ -196,6 +221,13 @@
196221
" st.markdown(f\"- <i>{context[:100]}...</i>\", unsafe_allow_html=True)"
197222
]
198223
},
224+
{
225+
"cell_type": "markdown",
226+
"metadata": {},
227+
"source": [
228+
"> Following steps are useful for colab, on local systems start streamlit from terminal"
229+
]
230+
},
199231
{
200232
"cell_type": "code",
201233
"execution_count": null,

docs/module_04_llm_apps/constants.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
#####################
22
## Set Constants
33
#####################
4-
HF_TOKEN = '<YOUR KEY>'
5-
OPENAI_TOKEN = '<YOUR KEY>'
4+
HF_TOKEN = '<YOUR TOKEN>'
5+
OPENAI_TOKEN = '<YOUR TOKEN>'
66
HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
7-
87
# Constants for embedding model
98
EMB_MODEL_ID = 'pinecone/mpnet-retriever-discourse'
109
EMB_API_URL = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{EMB_MODEL_ID}"
@@ -13,24 +12,19 @@
1312
QA_MODEL_ID = 'deepset/roberta-base-squad2'
1413

1514
# List of Different Endpoints
16-
HF_QA_ENDPOINT = 'HF-QA'
1715
HF_LM_ENDPOINT = 'HF-LM'
1816
OPENAI_ENDPOINT = 'OPENAI-LM'
1917
LOCAL_OLLAMA_ENDPOINT = 'OLLAMA'
2018
AVAILABLE_LMs = {
2119
'models':
2220
[
23-
'deepset/roberta-base-squad2',
24-
'Intel/dynamic_tinybert',
25-
#'google/gemma-2-2b-it', # this is timing out mostly
21+
'HuggingFaceTB/SmolLM3-3B',
2622
'Local-LLAMA-3.1:8b',
2723
'OpenAI-GPT4o-mini'
2824
],
2925
'endpoints':
3026
[
31-
HF_QA_ENDPOINT,
32-
HF_QA_ENDPOINT,
33-
#HF_LM_ENDPOINT, #this is timing out mostly
27+
HF_LM_ENDPOINT,
3428
LOCAL_OLLAMA_ENDPOINT,
3529
OPENAI_ENDPOINT,
3630
]

docs/module_04_llm_apps/utils.py

Lines changed: 19 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from retry import retry
88
import streamlit as st
99
import chromadb.utils.embedding_functions as embedding_functions
10-
from huggingface_hub import InferenceClient
10+
from huggingface_hub import InferenceClient,login
1111
from openai import OpenAI
1212
import ollama
1313
from constants import (
@@ -16,8 +16,6 @@
1616
HEADERS,
1717
EMB_MODEL_ID,
1818
EMB_API_URL,
19-
QA_MODEL_ID,
20-
HF_QA_ENDPOINT,
2119
HF_LM_ENDPOINT,
2220
OPENAI_ENDPOINT,
2321
LOCAL_OLLAMA_ENDPOINT,
@@ -26,8 +24,8 @@
2624

2725
import chromadb
2826

29-
3027
lm_df = pd.DataFrame.from_dict(AVAILABLE_LMs)
28+
login(token=HF_TOKEN, add_to_git_credential=True)
3129

3230
#####################
3331
## Utility Functions
@@ -77,31 +75,24 @@ def get_relevant_documents(query, db):
7775
except Exception as ex:
7876
return "Apologies but I could not process your query", 0.0, ex
7977

80-
def get_hf_qa_answer(payload,lm_model):
81-
data = json.dumps(payload)
82-
try:
83-
QA_API_URL = f"https://api-inference.huggingface.co/models/{lm_model}"
84-
response = requests.request("POST", QA_API_URL, headers=HEADERS, data=data)
85-
decoded_response = json.loads(response.content.decode("utf-8"))
86-
return decoded_response['answer'], decoded_response['score'], ""
87-
except Exception as ex:
88-
return "Apologies but I could not find any relevant answer", 0.0, ex
89-
9078
# this is mostly timing out
9179
def get_hf_llm_answer(payload,lm_model):
9280
try:
9381
client = InferenceClient(
94-
"google/gemma-2-2b-it",
95-
token=HF_TOKEN,)
96-
97-
content = f"Given the context, answer the question. \ncontext:{payload['context']}\nquestion:{payload['question']}"
98-
response= client.chat_completion(
99-
messages=[{"role": "user", "content": content}],
100-
max_tokens=500,
101-
stream=False,
82+
provider="hf-inference",
83+
api_key=HF_TOKEN
10284
)
103-
104-
return json.loads(message.choices[0].delta.content), 0.0
85+
f"Given the context, perform the following tasks:1.Respond with a summarized answer to the question factually in few words only if the provided context contains the answer\n 2.Generate a relevance score.\n3. Format the output as a json with answer and score as keys. Do not add makrdown syntax.\nThink step by step.\ncontext:{payload['context']}\nquestion:{payload['question']}"
86+
completion = client.chat.completions.create(
87+
model=lm_model,
88+
messages=[
89+
{
90+
"role": "user",
91+
"content": content
92+
}
93+
],
94+
)
95+
return completion.choices[0].message.content, "", ""
10596
except Exception as ex:
10697
return "Apologies but I could not find any relevant answer", 0.0, ex
10798

@@ -134,10 +125,10 @@ def get_opeai_answer(payload,lm_model):
134125
"content": content,
135126
}
136127
],
137-
model="gpt-4o-mini",
128+
max_tokens=500,
129+
model="gpt-4o-2024-11-20",
138130
)
139-
json_output = json.loads(chat_completion.choices[0].message.content)
140-
return json_output['answer'], json_output['score'], ""
131+
return chat_completion.choices[0].message.content,"", ""
141132
except Exception as ex:
142133
return "Apologies but I could not find any relevant answer", 0.0, ex
143134

@@ -153,9 +144,7 @@ def get_answer(question,context,lm_model):
153144
}
154145
try:
155146
endpoint_type = lm_df[lm_df['models']==lm_model]['endpoints'].values[0]
156-
if endpoint_type == HF_QA_ENDPOINT:
157-
return get_hf_qa_answer(payload,lm_model)
158-
elif endpoint_type == HF_LM_ENDPOINT:
147+
if endpoint_type == HF_LM_ENDPOINT:
159148
return get_hf_llm_answer(payload,lm_model)
160149
elif endpoint_type == OPENAI_ENDPOINT:
161150
return get_opeai_answer(payload,lm_model)

0 commit comments

Comments
 (0)