Skip to content

Commit 8354909

Browse files
committed
added QA version
1 parent 43f04af commit 8354909

File tree

7 files changed

+476
-117
lines changed

7 files changed

+476
-117
lines changed

gpt/utils.py

Lines changed: 111 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,114 @@ def gpt_short_verbalizer(files_dir, llm_api, llm_strong, llm_base, logging):
168168
return gpttext, slides
169169

170170

171+
def gpt_qa_verbalizer(files_dir, llm_api, llm_base, matcher, logging):
172+
173+
encoding = tiktoken.get_encoding("cl100k_base")
174+
175+
path = os.path.join(files_dir, "original_text_split_pages.txt")
176+
with open(path) as f:
177+
paper_text = f.read()
178+
179+
system_message = 'You are a college professor, known for your expert knowledge in deep learning field. ' \
180+
'You are also known for creating very thoughtful and probing questions that examine' \
181+
'the actual knowledge of a student based on their submitted paper. Your goal is to come up with ' \
182+
'a list of questions, both on intuitive level and on deeper technical level that evaluate if ' \
183+
'a student really knows about his or her work. Focus on the knowledge of the main proposed method, ' \
184+
'motivation and results. Make sure your list of questions examine the student thoroughly. ' \
185+
'Ask at least 10 different and diverse questions. ' \
186+
'The questions must cover intuition, main idea and technical details, among others. ' \
187+
'Be extremely specific and ask about details presented in the paper, no generic or abstract questions. '
188+
189+
human_message = f'Below is the student arxiv paper about which the questions needs to be asked: {paper_text}'
190+
191+
messages = [
192+
{'role': 'system', 'content': system_message},
193+
{'role': 'user', 'content': human_message}
194+
]
195+
196+
for i in range(3):
197+
try:
198+
response = llm_api(model=llm_base,
199+
messages=messages,
200+
temperature=0,
201+
functions=[
202+
{
203+
"name": "ask_questions",
204+
"description": "ask questions about provided arxiv paper",
205+
"parameters": {
206+
"type": "object",
207+
"properties": {
208+
"questions": {
209+
"type": "array",
210+
"description": "the list of questions to be asked",
211+
"items": {
212+
"type": "string",
213+
"description": "individual question, thoughtful and revealing",
214+
}
215+
},
216+
},
217+
"required": ["questions"],
218+
},
219+
}
220+
],
221+
function_call="auto",
222+
)
223+
break
224+
except:
225+
time.sleep(5)
226+
else:
227+
raise Exception(f"{llm_base} failed")
228+
229+
Qs = json.loads(response.choices[0].message.function_call.arguments)
230+
231+
system_message = 'You are a student, who wrote this paper. You are on a very important exam. ' \
232+
'You are tasked to explain your work as best as you can. ' \
233+
'You will be provided with a text of the paper, split by pages and a question. ' \
234+
'You must answer the question using information given in the paper. ' \
235+
'The answer should be consice and to the point but still contain details. ' \
236+
'And it should answer the question as best as possible. Be extremly specific. ' \
237+
'Ground your response to the provided paper text. Do NOT use generic or abstract phrases. ' \
238+
'Your career depends on how well you do this job. I will tip you $2000 for an excellent job done. ' \
239+
'Make sure to answer using at least 10 (ten) sentences.'
240+
241+
answers = []
242+
pages = []
243+
244+
for Q in Qs['questions']:
245+
human_message = f'Here is the text of the split by pages: {paper_text}. And here is the question you need to answer: {Q}. ' \
246+
'Make sure your answer best reflects the provided text.'
247+
248+
messages = [{'role': 'system', 'content': system_message},
249+
{'role': 'user', 'content': human_message}]
250+
251+
for i in range(3):
252+
try:
253+
response = llm_api(model=llm_base, messages=messages, temperature=0)
254+
break
255+
except:
256+
time.sleep(5)
257+
else:
258+
raise Exception(f"{llm_base} failed")
259+
260+
answer = response.choices[0].message.content
261+
262+
answer = answer.replace("$", '').replace("```", '').replace("<<", '').replace(">>", '').replace("**", '')
263+
264+
# remove words with underscores
265+
answer = re.sub(r'\b\w*__\w*\b', '', answer)
266+
267+
answers.append(answer)
268+
269+
T = paper_text.split('PAGE ')
270+
G = answer.split('.')
271+
seq = matcher.match(G[:-1], T[1:], minilm=1, bert=1, fuzz=1, spacy=1, diff=1, tfidf=1, pnt=True)
272+
# counts = np.bincount(seq)
273+
# pages.append(np.argmax(counts))
274+
pages.append(seq)
275+
276+
return Qs['questions'], answers, pages
277+
278+
171279
def gpt_textvideo_verbalizer(text, llm_api, llm_strong, llm_base, manual, include_summary, pageblockmap, matcher, logging):
172280

173281
encoding = tiktoken.get_encoding("cl100k_base")
@@ -200,7 +308,7 @@ def gpt_textvideo_verbalizer(text, llm_api, llm_strong, llm_base, manual, includ
200308

201309
for i_s, sec in enumerate(sections):
202310

203-
# if there is a mismatch, make them of equal length
311+
# if there is a mismatch, do this hack to make them of equal length
204312
if len(sent_tokenize(sec)) != len(pagemap_sections[i_s]):
205313
minN = min(len(sent_tokenize(sec)), len(pagemap_sections[i_s]))
206314
cleaned_sent_tok = sent_tokenize(sec)[:minN]
@@ -350,8 +458,8 @@ def gpt_textvideo_verbalizer(text, llm_api, llm_strong, llm_base, manual, includ
350458
page_inds = []
351459
curr_upd = []
352460

353-
if len(sent_tokenize(gpttext_all)) != len(gptpagemap):
354-
raise Exception("Something went wrong. Mismatch between map and text")
461+
if len(sent_tokenize(gpttext_all)) != len(gptpagemap):
462+
raise Exception("Something went wrong. Mismatch between map and text")
355463

356464
return gpttext_all, gptpagemap, verbalizer_steps, textpagemap
357465

main.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,25 @@ def main(args):
137137

138138
tmpdata = {'gpttext_short': gpttext_short, 'gptslides_short': slides_short['slides']}
139139

140+
if args.create_qa:
141+
questions, answers, qa_pages = gpt_qa_verbalizer(files_dir, llm_api, args.llm_base, matcher, logging)
142+
143+
create_questions(questions, os.path.join(files_dir, 'questions'))
144+
145+
with open(os.path.join(files_dir, 'qa_pages.pkl'), 'wb') as f:
146+
pickle.dump(qa_pages, f)
147+
148+
with open(os.path.join(files_dir, 'gpt_questions_answers.txt'), 'w') as f:
149+
for q, a in zip(questions, answers):
150+
f.write(f'==== Question ====\n\n')
151+
f.write(q)
152+
f.write("\n\n")
153+
f.write(f'==== Answer ====\n\n')
154+
f.write(a)
155+
f.write("\n\n")
156+
157+
tmpdata = {'gpttext_q': questions, 'gpttext_a': answers, 'qa_pages': qa_pages}
158+
140159
if args.create_video:
141160
(gpttext, gptpagemap,
142161
verbalizer_steps, textpagemap) = gpt_textvideo_verbalizer(text,
@@ -205,6 +224,23 @@ def main(args):
205224

206225
create_slides(slides_short, os.path.join(files_dir, 'slides'))
207226

227+
if args.create_qa:
228+
with open(os.path.join(files_dir, args.chunk_mp3_file_list), 'w') as mp3_list_file:
229+
text_to_speech_qa(questions, answers, mp3_list_file, files_dir, tts_client, args.ffmpeg, logging)
230+
231+
shutil.copy(os.path.join(files_dir, args.chunk_mp3_file_list),
232+
os.path.join(files_dir, f'qa_{args.chunk_mp3_file_list}'))
233+
234+
final_audio_qa = os.path.join(files_dir, f'{args.final_audio_file}_qa.mp3')
235+
os.system(f'{args.ffmpeg} -f concat -i {os.path.join(files_dir, args.chunk_mp3_file_list)} '
236+
f'-c copy {final_audio_qa} {display}')
237+
238+
logging.info(f'Created QA audio file')
239+
240+
if args.gdrive_id:
241+
gdrive_client.upload_audio(f'[QA] {title}', f'{final_audio_qa}')
242+
logging.info(f'Uploaded QA audio to GDrive')
243+
208244
if args.create_video:
209245
with open(os.path.join(files_dir, args.chunk_mp3_file_list), 'w') as mp3_list_file:
210246
text_to_speechvideo(gpttext, mp3_list_file, files_dir, tts_client, gptpagemap, args.voice, logging)
@@ -264,10 +300,11 @@ def main(args):
264300
parser.add_argument("--extract_text_only", action="store_true", help="extract only the text from paper and exit")
265301
parser.add_argument("--create_video", action="store_true", help="create long video")
266302
parser.add_argument("--create_short", action="store_true", help="create short video")
303+
parser.add_argument("--create_qa", action="store_true", help="create qa video")
267304
parser.add_argument("--create_audio_simple", action="store_true", help="create audio")
268305
parser.add_argument("--openai_key", type=str, default="", help='openai key to call GPT API')
269306
parser.add_argument("--llm_strong", type=str, default="gpt-4-0125-preview", help='llm model for complex tasks')
270-
parser.add_argument("--llm_base", type=str, default="gpt-3.5-turbo-1106", help='llm model for basic tasks')
307+
parser.add_argument("--llm_base", type=str, default="gpt-3.5-turbo-0125", help='llm model for basic tasks')
271308

272309
args = parser.parse_args()
273310

makevideo.py

Lines changed: 94 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def main(args):
3737

3838
# Process each line
3939
for line in lines:
40+
4041
# Remove the newline character at the end of the line
4142
line = line.strip()
4243

@@ -151,51 +152,111 @@ def main(args):
151152

152153
# =============== SHORT VIDEO ====================
153154

154-
with open(os.path.join(dr, "shorts_mp3_list.txt"), "r") as f:
155-
lines = f.readlines()
155+
if os.path.exists(os.path.join(dr, "shorts_mp3_list.txt")):
156156

157-
# create list of chunks
158-
outvideo = open(os.path.join(dr, 'short_mp4_list.txt'), 'w')
157+
with open(os.path.join(dr, "shorts_mp3_list.txt"), "r") as f:
158+
lines = f.readlines()
159159

160-
# Process each line
161-
for page_num, line in enumerate(lines):
162-
# Remove the newline character at the end of the line
163-
line = line.strip()
160+
# create list of chunks
161+
outvideo = open(os.path.join(dr, 'short_mp4_list.txt'), 'w')
164162

165-
# Split the line into components
166-
components = line.split()
163+
# Process each line
164+
for page_num, line in enumerate(lines):
165+
# Remove the newline character at the end of the line
166+
line = line.strip()
167167

168-
# The filename is the second component
169-
audio = components[1].replace('.mp3', '')
170-
video = audio.replace('-', '')
168+
# Split the line into components
169+
components = line.split()
171170

172-
# convert to PNG
173-
if page_num == 0:
174-
input_path = os.path.join(dr, str(page_num))
175-
else:
176-
input_path = os.path.join(dr, 'slides', f'slide_{page_num}')
171+
# The filename is the second component
172+
audio = components[1].replace('.mp3', '')
173+
video = audio.replace('-', '')
177174

178-
os.system(f'{args.gs} -sDEVICE=png16m -r500 -o {os.path.join(dr, str(page_num))}.png {input_path}.pdf')
175+
# convert to PNG
176+
if page_num == 0:
177+
input_path = os.path.join(dr, str(page_num))
178+
else:
179+
input_path = os.path.join(dr, 'slides', f'slide_{page_num}')
179180

180-
resolution = "scale=1920:-2"
181-
os.system(f'{args.ffmpeg} -loop 1 -i {os.path.join(dr, str(page_num))}.png -i {os.path.join(dr, audio)}.mp3 '
182-
f'-vf {resolution} -c:v libx264 -tune stillimage -y -c:a aac -b:a 128k -pix_fmt yuv420p '
183-
f'-shortest {os.path.join(dr, video)}.mp4')
181+
os.system(f'{args.gs} -sDEVICE=png16m -r500 -o {os.path.join(dr, str(page_num))}.png {input_path}.pdf')
184182

185-
# ensure that there is no silence at the end of the video, and video len is the same as audio len
186-
os.system(f'audio_duration=$({args.ffprobe} -i {os.path.join(dr, audio)}.mp3 -show_entries format=duration '
187-
f'-v quiet -of csv="p=0"); 'f'audio_duration=$((${{audio_duration%.*}} + 1)); 'f'{args.ffmpeg} '
188-
f'-i {os.path.join(dr, video)}.mp4 -t $audio_duration -y -c copy {os.path.join(dr, video)}_final.mp4')
183+
resolution = "scale=1920:-2"
184+
os.system(f'{args.ffmpeg} -loop 1 -i {os.path.join(dr, str(page_num))}.png -i {os.path.join(dr, audio)}.mp3 '
185+
f'-vf {resolution} -c:v libx264 -tune stillimage -y -c:a aac -b:a 128k -pix_fmt yuv420p '
186+
f'-shortest {os.path.join(dr, video)}.mp4')
189187

190-
# list of all chunks
191-
outvideo.write(f"file '{video}_final.mp4'\n")
188+
# ensure that there is no silence at the end of the video, and video len is the same as audio len
189+
os.system(f'audio_duration=$({args.ffprobe} -i {os.path.join(dr, audio)}.mp3 -show_entries format=duration '
190+
f'-v quiet -of csv="p=0"); 'f'audio_duration=$((${{audio_duration%.*}} + 1)); 'f'{args.ffmpeg} '
191+
f'-i {os.path.join(dr, video)}.mp4 -t $audio_duration -y -c copy {os.path.join(dr, video)}_final.mp4')
192192

193-
outvideo.close()
193+
# list of all chunks
194+
outvideo.write(f"file '{video}_final.mp4'\n")
194195

195-
# joint video
196-
os.system(f'{args.ffmpeg} -f concat -i {os.path.join(dr, "short_mp4_list.txt")} '
197-
f'-y -c copy {os.path.join(dr, "output_short.mp4")}')
196+
outvideo.close()
197+
198+
# joint video
199+
os.system(f'{args.ffmpeg} -f concat -i {os.path.join(dr, "short_mp4_list.txt")} '
200+
f'-y -c copy {os.path.join(dr, "output_short.mp4")}')
201+
202+
# =============== QA VIDEO ====================
203+
204+
if os.path.exists(os.path.join(dr, "qa_mp3_list.txt")):
205+
206+
with open(os.path.join(dr, "qa_mp3_list.txt"), "r") as f:
207+
lines = f.readlines()
208+
209+
# create list of chunks
210+
outvideo = open(os.path.join(dr, 'qa_mp4_list.txt'), 'w')
211+
212+
qa_pages = pickle.load(open(os.path.join(dr, 'qa_pages.pkl'), 'rb'))
213+
214+
# Process each line
215+
turn = -1
216+
for line_num, line in enumerate(lines):
217+
# Remove the newline character at the end of the line
218+
line = line.strip()
219+
220+
# Split the line into components
221+
components = line.split()
222+
223+
# The filename is the second component
224+
audio = components[1].replace('.mp3', '')
225+
video = audio.replace('-', '')
226+
227+
# convert to PNG
228+
if 'question' in audio: # question - get created slide
229+
turn += 1
230+
page_num = 0
231+
input_path = os.path.join(dr, 'questions', f'question_{turn}')
232+
else: # answer - get single page from paper
233+
p_num = qa_pages[turn][page_num]
234+
# extract the page from PDF
235+
os.system(f'{args.gs} -sDEVICE=pdfwrite -dNOPAUSE -dBATCH -dFirstPage={p_num+1} -dLastPage={p_num+1} -sOutputFile={os.path.join(dr, str(p_num))}.pdf {os.path.join(dr, "main.pdf")} > /dev/null 2>&1')
236+
input_path = os.path.join(dr, f'{p_num}')
237+
page_num += 1
238+
239+
qa_page = 'qa_page.png'
240+
os.system(f'{args.gs} -sDEVICE=png16m -r500 -o {os.path.join(dr, qa_page)} {input_path}.pdf')
241+
242+
resolution = "scale=1920:-2"
243+
os.system(f'{args.ffmpeg} -loop 1 -i {os.path.join(dr, qa_page)} -i {os.path.join(dr, audio)}.mp3 '
244+
f'-vf {resolution} -c:v libx264 -tune stillimage -y -c:a aac -b:a 128k -pix_fmt yuv420p '
245+
f'-shortest {os.path.join(dr, video)}.mp4')
246+
247+
# ensure that there is no silence at the end of the video, and video len is the same as audio len
248+
os.system(f'audio_duration=$({args.ffprobe} -i {os.path.join(dr, audio)}.mp3 -show_entries format=duration '
249+
f'-v quiet -of csv="p=0"); 'f'audio_duration=$((${{audio_duration%.*}} + 1)); 'f'{args.ffmpeg} '
250+
f'-i {os.path.join(dr, video)}.mp4 -t $audio_duration -y -c copy {os.path.join(dr, video)}_final.mp4')
251+
252+
# list of all chunks
253+
outvideo.write(f"file '{video}_final.mp4'\n")
254+
255+
outvideo.close()
198256

257+
# joint video
258+
os.system(f'{args.ffmpeg} -f concat -i {os.path.join(dr, "qa_mp4_list.txt")} '
259+
f'-y -c copy {os.path.join(dr, "output_qa.mp4")}')
199260

200261
if __name__ == "__main__":
201262
parser = argparse.ArgumentParser(description='Arguments')

0 commit comments

Comments
 (0)