77from retry import retry
88import streamlit as st
99import chromadb .utils .embedding_functions as embedding_functions
10- from huggingface_hub import InferenceClient
10+ from huggingface_hub import InferenceClient , login
1111from openai import OpenAI
1212import ollama
1313from constants import (
1616 HEADERS ,
1717 EMB_MODEL_ID ,
1818 EMB_API_URL ,
19- QA_MODEL_ID ,
20- HF_QA_ENDPOINT ,
2119 HF_LM_ENDPOINT ,
2220 OPENAI_ENDPOINT ,
2321 LOCAL_OLLAMA_ENDPOINT ,
2624
2725import chromadb
2826
29-
3027lm_df = pd .DataFrame .from_dict (AVAILABLE_LMs )
28+ login (token = HF_TOKEN , add_to_git_credential = True )
3129
3230#####################
3331## Utility Functions
@@ -77,31 +75,24 @@ def get_relevant_documents(query, db):
7775 except Exception as ex :
7876 return "Apologies but I could not process your query" , 0.0 , ex
7977
80- def get_hf_qa_answer (payload ,lm_model ):
81- data = json .dumps (payload )
82- try :
83- QA_API_URL = f"https://api-inference.huggingface.co/models/{ lm_model } "
84- response = requests .request ("POST" , QA_API_URL , headers = HEADERS , data = data )
85- decoded_response = json .loads (response .content .decode ("utf-8" ))
86- return decoded_response ['answer' ], decoded_response ['score' ], ""
87- except Exception as ex :
88- return "Apologies but I could not find any relevant answer" , 0.0 , ex
89-
9078# this is mostly timing out
9179def get_hf_llm_answer (payload ,lm_model ):
9280 try :
9381 client = InferenceClient (
94- "google/gemma-2-2b-it" ,
95- token = HF_TOKEN ,)
96-
97- content = f"Given the context, answer the question. \n context:{ payload ['context' ]} \n question:{ payload ['question' ]} "
98- response = client .chat_completion (
99- messages = [{"role" : "user" , "content" : content }],
100- max_tokens = 500 ,
101- stream = False ,
82+ provider = "hf-inference" ,
83+ api_key = HF_TOKEN
10284 )
103-
104- return json .loads (message .choices [0 ].delta .content ), 0.0
85+ f"Given the context, perform the following tasks:1.Respond with a summarized answer to the question factually in few words only if the provided context contains the answer\n 2.Generate a relevance score.\n 3. Format the output as a json with answer and score as keys. Do not add makrdown syntax.\n Think step by step.\n context:{ payload ['context' ]} \n question:{ payload ['question' ]} "
86+ completion = client .chat .completions .create (
87+ model = lm_model ,
88+ messages = [
89+ {
90+ "role" : "user" ,
91+ "content" : content
92+ }
93+ ],
94+ )
95+ return completion .choices [0 ].message .content , "" , ""
10596 except Exception as ex :
10697 return "Apologies but I could not find any relevant answer" , 0.0 , ex
10798
@@ -134,10 +125,10 @@ def get_opeai_answer(payload,lm_model):
134125 "content" : content ,
135126 }
136127 ],
137- model = "gpt-4o-mini" ,
128+ max_tokens = 500 ,
129+ model = "gpt-4o-2024-11-20" ,
138130 )
139- json_output = json .loads (chat_completion .choices [0 ].message .content )
140- return json_output ['answer' ], json_output ['score' ], ""
131+ return chat_completion .choices [0 ].message .content ,"" , ""
141132 except Exception as ex :
142133 return "Apologies but I could not find any relevant answer" , 0.0 , ex
143134
@@ -153,9 +144,7 @@ def get_answer(question,context,lm_model):
153144 }
154145 try :
155146 endpoint_type = lm_df [lm_df ['models' ]== lm_model ]['endpoints' ].values [0 ]
156- if endpoint_type == HF_QA_ENDPOINT :
157- return get_hf_qa_answer (payload ,lm_model )
158- elif endpoint_type == HF_LM_ENDPOINT :
147+ if endpoint_type == HF_LM_ENDPOINT :
159148 return get_hf_llm_answer (payload ,lm_model )
160149 elif endpoint_type == OPENAI_ENDPOINT :
161150 return get_opeai_answer (payload ,lm_model )
0 commit comments