Skip to content

Commit b59c891

Browse files
execsumoclaude
authored andcommitted
feat(app): merge consecutive same-speaker transcript segments into blocks
Instead of emitting a separate line for each sentence-level segment, consecutive segments from the same speaker are now merged into a single block with combined text and spanning timestamps. This produces cleaner transcripts for protocol generation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 93894d9 commit b59c891

File tree

3 files changed

+108
-2
lines changed

3 files changed

+108
-2
lines changed

app/MeetingTranscriber/Sources/DiarizationProcess.swift

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,32 @@ enum DiarizationProcess {
129129
)
130130
}
131131

132+
/// Merge consecutive segments from the same speaker into single blocks.
133+
/// Preserves the start timestamp of the first segment and end timestamp of the last.
134+
/// Text is joined with spaces.
135+
static func mergeConsecutiveSpeakers(
136+
_ segments: [TimestampedSegment],
137+
) -> [TimestampedSegment] {
138+
guard var current = segments.first else { return [] }
139+
140+
var merged: [TimestampedSegment] = []
141+
for seg in segments.dropFirst() {
142+
if seg.speaker == current.speaker {
143+
current = TimestampedSegment(
144+
start: current.start,
145+
end: seg.end,
146+
text: "\(current.text) \(seg.text)",
147+
speaker: current.speaker,
148+
)
149+
} else {
150+
merged.append(current)
151+
current = seg
152+
}
153+
}
154+
merged.append(current)
155+
return merged
156+
}
157+
132158
/// Assign speakers using separate diarizations for app and mic tracks.
133159
/// App segments are matched against appDiarization, mic segments against micDiarization.
134160
static func assignSpeakersDualTrack(

app/MeetingTranscriber/Sources/PipelineQueue.swift

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,12 @@ class PipelineQueue {
184184
}
185185
}
186186

187+
func addWarning(id: UUID, _ message: String) {
188+
guard let index = jobs.firstIndex(where: { $0.id == id }) else { return }
189+
guard !jobs[index].warnings.contains(message) else { return }
190+
jobs[index].warnings.append(message)
191+
}
192+
187193
/// Reset the elapsed timer for a new pipeline stage.
188194
private func startElapsedTimer() {
189195
elapsedTimer?.cancel()
@@ -450,7 +456,8 @@ class PipelineQueue {
450456
appDiarization: namedAppDiar,
451457
micDiarization: namedMicDiar,
452458
)
453-
finalTranscript = labeled.map(\.formattedLine).joined(separator: "\n")
459+
let merged = DiarizationProcess.mergeConsecutiveSpeakers(labeled)
460+
finalTranscript = merged.map(\.formattedLine).joined(separator: "\n")
454461
} else if let currentDiarization = diarization {
455462
// Single-source: standard assignment
456463
let namedDiarization = DiarizationResult(
@@ -468,12 +475,14 @@ class PipelineQueue {
468475
transcript: segments,
469476
diarization: namedDiarization,
470477
)
471-
finalTranscript = labeled.map(\.formattedLine).joined(separator: "\n")
478+
let merged = DiarizationProcess.mergeConsecutiveSpeakers(labeled)
479+
finalTranscript = merged.map(\.formattedLine).joined(separator: "\n")
472480
}
473481
let segCount = diarization?.segments.count ?? 0
474482
logger.info("Diarization complete: \(segCount) segments")
475483
} catch {
476484
logger.warning("Diarization failed, using undiarized transcript: \(error.localizedDescription)")
485+
addWarning(id: jobID, "Diarization failed — speakers not identified")
477486
// Continue with original transcript
478487
}
479488
} else {
@@ -515,6 +524,12 @@ class PipelineQueue {
515524
}
516525
stopElapsedTimer()
517526
updateJobState(id: jobID, to: .done)
527+
if let job = jobs.first(where: { $0.id == jobID }), !job.warnings.isEmpty {
528+
NotificationManager.shared.notify(
529+
title: "Protocol Ready (with warnings)",
530+
body: job.warnings.joined(separator: "; "),
531+
)
532+
}
518533
} catch is CancellationError {
519534
stopElapsedTimer()
520535
logger.info("Job \(jobID) cancelled")

app/MeetingTranscriber/Tests/DiarizationProcessTests.swift

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,71 @@ final class DiarizationProcessTests: XCTestCase {
284284
XCTAssertNil(merged.embeddings)
285285
}
286286

287+
// MARK: - Merge Consecutive Speakers
288+
289+
func testMergeConsecutiveSpeakers() {
290+
let segments = [
291+
TimestampedSegment(start: 0, end: 5, text: "Hello there.", speaker: "Alice"),
292+
TimestampedSegment(start: 5, end: 10, text: "How are you?", speaker: "Alice"),
293+
TimestampedSegment(start: 10, end: 15, text: "I'm fine.", speaker: "Bob"),
294+
TimestampedSegment(start: 15, end: 20, text: "Thanks.", speaker: "Bob"),
295+
TimestampedSegment(start: 20, end: 25, text: "Great!", speaker: "Alice"),
296+
]
297+
298+
let merged = DiarizationProcess.mergeConsecutiveSpeakers(segments)
299+
300+
XCTAssertEqual(merged.count, 3)
301+
XCTAssertEqual(merged[0].speaker, "Alice")
302+
XCTAssertEqual(merged[0].text, "Hello there. How are you?")
303+
XCTAssertEqual(merged[0].start, 0)
304+
XCTAssertEqual(merged[0].end, 10)
305+
XCTAssertEqual(merged[1].speaker, "Bob")
306+
XCTAssertEqual(merged[1].text, "I'm fine. Thanks.")
307+
XCTAssertEqual(merged[1].start, 10)
308+
XCTAssertEqual(merged[1].end, 20)
309+
XCTAssertEqual(merged[2].speaker, "Alice")
310+
XCTAssertEqual(merged[2].text, "Great!")
311+
XCTAssertEqual(merged[2].start, 20)
312+
XCTAssertEqual(merged[2].end, 25)
313+
}
314+
315+
func testMergeConsecutiveSpeakers_empty() {
316+
let merged = DiarizationProcess.mergeConsecutiveSpeakers([])
317+
XCTAssertTrue(merged.isEmpty)
318+
}
319+
320+
func testMergeConsecutiveSpeakers_singleSegment() {
321+
let segments = [
322+
TimestampedSegment(start: 0, end: 5, text: "Hello", speaker: "Alice"),
323+
]
324+
let merged = DiarizationProcess.mergeConsecutiveSpeakers(segments)
325+
XCTAssertEqual(merged.count, 1)
326+
XCTAssertEqual(merged[0].text, "Hello")
327+
}
328+
329+
func testMergeConsecutiveSpeakers_allSameSpeaker() {
330+
let segments = [
331+
TimestampedSegment(start: 0, end: 5, text: "One.", speaker: "Alice"),
332+
TimestampedSegment(start: 5, end: 10, text: "Two.", speaker: "Alice"),
333+
TimestampedSegment(start: 10, end: 15, text: "Three.", speaker: "Alice"),
334+
]
335+
let merged = DiarizationProcess.mergeConsecutiveSpeakers(segments)
336+
XCTAssertEqual(merged.count, 1)
337+
XCTAssertEqual(merged[0].text, "One. Two. Three.")
338+
XCTAssertEqual(merged[0].start, 0)
339+
XCTAssertEqual(merged[0].end, 15)
340+
}
341+
342+
func testMergeConsecutiveSpeakers_allDifferent() {
343+
let segments = [
344+
TimestampedSegment(start: 0, end: 5, text: "A", speaker: "Alice"),
345+
TimestampedSegment(start: 5, end: 10, text: "B", speaker: "Bob"),
346+
TimestampedSegment(start: 10, end: 15, text: "C", speaker: "Carol"),
347+
]
348+
let merged = DiarizationProcess.mergeConsecutiveSpeakers(segments)
349+
XCTAssertEqual(merged.count, 3)
350+
}
351+
287352
func testDiarizationErrorDescription() {
288353
let error: DiarizationError = .notAvailable
289354
XCTAssertEqual(error.errorDescription, "Diarization not available")

0 commit comments

Comments
 (0)