1+ #####################
2+ ## imports
3+ #####################
4+ import pandas as pd
5+ import json
6+ import requests
7+ from retry import retry
8+ import streamlit as st
9+ import chromadb .utils .embedding_functions as embedding_functions
10+ from huggingface_hub import InferenceClient
11+ from openai import OpenAI
12+ import ollama
13+ from constants import (
14+ HF_TOKEN ,
15+ OPENAI_TOKEN ,
16+ HEADERS ,
17+ EMB_MODEL_ID ,
18+ EMB_API_URL ,
19+ QA_MODEL_ID ,
20+ HF_QA_ENDPOINT ,
21+ HF_LM_ENDPOINT ,
22+ OPENAI_ENDPOINT ,
23+ LOCAL_OLLAMA_ENDPOINT ,
24+ AVAILABLE_LMs )
25+
26+
27+ import chromadb
28+
29+
30+ lm_df = pd .DataFrame .from_dict (AVAILABLE_LMs )
31+
32+ #####################
33+ ## Utility Functions
34+ #####################
35+
36+ def get_lines (uploaded_file ):
37+ """
38+ Utility to read raw text file in binary
39+ """
40+ raw_data = []
41+ for line in uploaded_file :
42+ raw_data .append (line .decode ("utf-8" ) )
43+ return raw_data
44+
45+ def create_db ():
46+ """
47+ Utility to instantiate vector db client and collection
48+ """
49+ chroma_client = chromadb .Client ()
50+ # huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
51+ # api_key=HF_TOKEN,
52+ # model_name=EMB_MODEL_ID
53+ # )
54+ db = chroma_client .get_or_create_collection (name = "nlp_llm_workshop" ,)
55+ #embedding_function=huggingface_ef)
56+ return chroma_client ,db
57+
58+ def load_data (db , documents ):
59+ """
60+ Utility to add/index data into vector db
61+ """
62+ try :
63+ db .add (
64+ documents = documents ,
65+ ids = [str (i ) for i in range (len (documents ))]
66+ )
67+ except Exception as ex :
68+ return "Apologies but I could not ingest document" , 0.0 , ex
69+
70+ def get_relevant_documents (query , db ):
71+ """
72+ Utility to retrieve relevant documents from vector DB
73+ """
74+ try :
75+ relevant_doc = db .query (query_texts = [query ], n_results = 1 )['documents' ][0 ][0 ]
76+ return relevant_doc
77+ except Exception as ex :
78+ return "Apologies but I could not process your query" , 0.0 , ex
79+
80+ def get_hf_qa_answer (payload ,lm_model ):
81+ data = json .dumps (payload )
82+ try :
83+ QA_API_URL = f"https://api-inference.huggingface.co/models/{ lm_model } "
84+ response = requests .request ("POST" , QA_API_URL , headers = HEADERS , data = data )
85+ decoded_response = json .loads (response .content .decode ("utf-8" ))
86+ return decoded_response ['answer' ], decoded_response ['score' ], ""
87+ except Exception as ex :
88+ return "Apologies but I could not find any relevant answer" , 0.0 , ex
89+
90+ # this is mostly timing out
91+ def get_hf_llm_answer (payload ,lm_model ):
92+ try :
93+ client = InferenceClient (
94+ "google/gemma-2-2b-it" ,
95+ token = HF_TOKEN ,)
96+
97+ content = f"Given the context, answer the question. \n context:{ payload ['context' ]} \n question:{ payload ['question' ]} "
98+ response = client .chat_completion (
99+ messages = [{"role" : "user" , "content" : content }],
100+ max_tokens = 500 ,
101+ stream = False ,
102+ )
103+
104+ return json .loads (message .choices [0 ].delta .content ), 0.0
105+ except Exception as ex :
106+ return "Apologies but I could not find any relevant answer" , 0.0 , ex
107+
108+ def get_local_llama_answer (payload ,lm_model ):
109+ try :
110+ content = f"Given the context, perform the following tasks:1.Respond with a summarized answer to the question factually in few words only if the provided context contains the answer\n 2.Check if your answer is really in the provided context, otherwise respond with 'Sorry I could not find the answer'.\n 3.Generate a relevance score between 0 and 1.\n 4. Format the output as a json with answer and score as keys.\n 5.Do not add makrdown syntax only respond with json.\n Be careful and Think step by step.\n context:{ payload ['context' ]} \n question:{ payload ['question' ]} "
111+ response = ollama .chat (model = 'llama3.1:8b' , messages = [
112+ {
113+ 'role' : 'user' ,
114+ 'content' : content ,
115+ },
116+ ]
117+ )
118+ json_output = json .loads (response ['message' ]['content' ])
119+ return json_output ['answer' ], json_output ['score' ], ""
120+ except Exception as ex :
121+ st .markdown (ex )
122+ return "Apologies but I could not find any relevant answer" , 0.0 , ex
123+
124+ def get_opeai_answer (payload ,lm_model ):
125+ try :
126+ client = OpenAI (
127+ api_key = OPENAI_TOKEN ,
128+ )
129+ content = f"Given the context, perform the following tasks:1.Respond with a summarized answer to the question factually in few words only if the provided context contains the answer\n 2.Generate a relevance score.\n 3. Format the output as a json with answer and score as keys. Do not add makrdown syntax.\n Think step by step.\n context:{ payload ['context' ]} \n question:{ payload ['question' ]} "
130+ chat_completion = client .chat .completions .create (
131+ messages = [
132+ {
133+ "role" : "user" ,
134+ "content" : content ,
135+ }
136+ ],
137+ model = "gpt-4o-mini" ,
138+ )
139+ json_output = json .loads (chat_completion .choices [0 ].message .content )
140+ return json_output ['answer' ], json_output ['score' ], ""
141+ except Exception as ex :
142+ return "Apologies but I could not find any relevant answer" , 0.0 , ex
143+
144+
145+ def get_answer (question ,context ,lm_model ):
146+ """
147+ Utility to leverage QA model for answering question using given context
148+ and the mentioned model
149+ """
150+ payload = {
151+ "question" : question ,
152+ "context" :context
153+ }
154+ try :
155+ endpoint_type = lm_df [lm_df ['models' ]== lm_model ]['endpoints' ].values [0 ]
156+ if endpoint_type == HF_QA_ENDPOINT :
157+ return get_hf_qa_answer (payload ,lm_model )
158+ elif endpoint_type == HF_LM_ENDPOINT :
159+ return get_hf_llm_answer (payload ,lm_model )
160+ elif endpoint_type == OPENAI_ENDPOINT :
161+ return get_opeai_answer (payload ,lm_model )
162+ elif endpoint_type == LOCAL_OLLAMA_ENDPOINT :
163+ return get_local_llama_answer (payload ,lm_model )
164+ else :
165+ "This is not implemented yet" , 0.0 , ex
166+ except Exception as ex :
167+ return "Apologies but I could not find any relevant answer" , 0.0 , ex
168+
169+
170+ def sidebar ():
171+ """
172+ Utility to add content to sidebar
173+ """
174+ with st .sidebar :
175+ st .markdown (
176+ "## How to use\n "
177+ "1. Upload a txt file📄\n "
178+ "3. Ask a question about the document💬\n "
179+ )
180+ st .markdown ("---" )
181+ st .markdown ("## Which LM would you like to use?" )
182+ option = st .selectbox (
183+ "Select a Model" ,
184+ lm_df ['models' ],
185+ label_visibility = 'hidden'
186+ )
187+
188+ st .markdown ("---" )
189+ st .markdown ("# About" )
190+ st .markdown (
191+ "📖PersonalGPT is a demo to showcase retrieval augmented question answering system"
192+ )
193+ st .markdown (":heart: Made by [raghav bali](https://raghavbali.github.io)" )
194+ st .markdown ("---" )
195+
196+ return option
0 commit comments