Skip to content

Commit d3ab651

Browse files
Improve chapter timestamp accuracy by using raw transcript timeline
- Add timeline skeleton to chapter generation prompt showing actual timestamps every 30s - Remove skip logic for 00:00:00 chapter heading in transcript insertion - Reuse existing time-to-text mapping logic for consistency - Fixes issue where chapter timestamps could be off by several minutes
1 parent 4589139 commit d3ab651

File tree

1 file changed

+52
-7
lines changed

1 file changed

+52
-7
lines changed

video-transcripts/transcripts.py

Lines changed: 52 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -527,10 +527,6 @@ def insert_timestamps_in_transcript(cleaned_transcript, chapters, raw_transcript
527527
line_to_chapter = {} # Maps line index to (timestamp_str, title) to insert
528528

529529
for seconds, timestamp_str, title in chapters:
530-
# Skip 00:00:00 as it's at the beginning
531-
if seconds == 0:
532-
continue
533-
534530
# Get text from a window around the timestamp (±5 seconds)
535531
window_texts = []
536532
for time_sec in range(max(0, seconds - 5), seconds + 6):
@@ -870,6 +866,55 @@ def create_chapters(transcript, video_id, raw_transcript=None):
870866
duration_str = format_duration(duration_seconds)
871867
duration_constraint = f"\n\nIMPORTANT: This video is {duration_str} long. DO NOT generate any timestamps beyond {duration_str}. All timestamps must be less than or equal to {duration_str}."
872868

869+
# Build a timeline skeleton from raw transcript if available
870+
timeline_context = ""
871+
if raw_transcript:
872+
print(f"Building timeline skeleton from raw transcript for video {video_id}")
873+
874+
# Build mapping of time to text (reusing existing logic)
875+
time_to_texts = {}
876+
for entry in raw_transcript:
877+
if isinstance(entry, dict) and 'text' in entry and 'start' in entry:
878+
text = entry['text'].strip()
879+
if text and text not in ['[Music]', '[Applause]', '[Laughter]']:
880+
start_time = int(entry['start'])
881+
if start_time not in time_to_texts:
882+
time_to_texts[start_time] = []
883+
time_to_texts[start_time].append(text)
884+
885+
# Create timeline samples every 30 seconds
886+
timeline_samples = []
887+
sample_interval = 30 # seconds
888+
max_samples = 40 # Limit to avoid token overflow
889+
890+
current_time = 0
891+
sample_count = 0
892+
893+
while current_time <= (duration_seconds or 3600) and sample_count < max_samples:
894+
# Get text from a small window around this time
895+
window_texts = []
896+
for time_sec in range(current_time, min(current_time + 10, (duration_seconds or 3600) + 1)):
897+
if time_sec in time_to_texts:
898+
window_texts.extend(time_to_texts[time_sec])
899+
900+
if window_texts:
901+
# Take first few words as a sample
902+
sample_text = ' '.join(window_texts)[:150] # Limit length
903+
timestamp_str = format_duration(current_time)
904+
timeline_samples.append(f"[{timestamp_str}]: {sample_text}")
905+
sample_count += 1
906+
907+
current_time += sample_interval
908+
909+
if timeline_samples:
910+
timeline_context = f"""
911+
912+
TIMELINE REFERENCE - These are actual timestamps from the video showing what is being said at different times:
913+
914+
{chr(10).join(timeline_samples)}
915+
916+
USE THESE ACTUAL TIMESTAMPS to determine when topics change. Your chapter timestamps MUST come from the times shown above or nearby times. DO NOT guess or make up timestamps."""
917+
873918
chapters_prompt = f"""
874919
This is a transcript of a YouTube livestream. Could you please identify up to 10 key moments in the stream and give me the timestamps in the format for YouTube like this?:
875920
00:00:00 Introductions
@@ -879,18 +924,18 @@ def create_chapters(transcript, video_id, raw_transcript=None):
879924
880925
CRITICAL INSTRUCTIONS:
881926
- Always start with 00:00:00
882-
- Use the ACTUAL timestamps from where topics begin in the transcript
927+
- Use the ACTUAL timestamps from the timeline reference below
883928
- DO NOT round timestamps to neat intervals like :00 or :30
884929
- Use precise timestamps like 00:05:17, 00:12:43, 00:08:09, etc.
885930
- Look at the actual flow of conversation to determine when topics change
886931
- The chapter description MUST accurately describe what is being said RIGHT AT that timestamp
887932
- Do NOT describe what happens later - only describe what is happening at the exact moment of the timestamp
888-
- Read the text carefully around each timestamp to ensure your description matches what's actually being discussed
933+
- Read the timeline reference carefully to see what's actually being said at each time
889934
- If a guest is introduced at 00:05:30, don't put "Guest introduction" at 00:20:00
890935
- Be precise and honest about what's happening at each moment
891936
- KEEP DESCRIPTIONS CONCISE: Use 2-6 words maximum, not full sentences
892937
- Descriptions should be SHORT PHRASES like "Guest introduction", "Discussion about X", "Demo of Y feature"
893-
- DO NOT write full sentences or lengthy explanations in the chapter titles{duration_constraint}
938+
- DO NOT write full sentences or lengthy explanations in the chapter titles{duration_constraint}{timeline_context}
894939
"""
895940

896941
print(f"Creating chapters for video {video_id}")

0 commit comments

Comments
 (0)