Skip to content

Commit a1b543b

Browse files
committed
πŸ› fix: buffer reasoning chunks before text content in stream
The Auggie SDK may emit agent_thought_chunk events in varying order relative to agent_message_chunk events. Without buffering, thinking blocks could appear at the bottom of the reply in OpenCode. Add a reasoning buffer that collects thinking chunks and flushes them as a single reasoning_content delta before the first text content chunk, ensuring thinking renders at the top in OpenCode. Late reasoning chunks (arriving after text) are still sent immediately to preserve all content.
1 parent 2a42574 commit a1b543b

File tree

1 file changed

+72
-20
lines changed

1 file changed

+72
-20
lines changed

β€Žplugin/src/server.tsβ€Ž

Lines changed: 72 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -446,20 +446,62 @@ function createChatResponse(
446446

447447
// ─── Streaming ──────────────────────────────────────────────────────────────
448448

449+
interface StreamHandler {
450+
callback: (notification: SessionNotification) => void;
451+
flush: () => void;
452+
}
453+
449454
function createStreamCallback(
450455
res: ServerResponse,
451456
model: string,
452457
requestId: string
453-
): (notification: SessionNotification) => void {
458+
): StreamHandler {
454459
const chunkId = `chatcmpl-${requestId}`;
455460

456-
return (notification: SessionNotification): void => {
461+
// Reasoning buffer: collect thinking chunks and flush before first text content.
462+
// The Auggie SDK may send agent_thought_chunk events before or after
463+
// agent_message_chunk events. Buffering ensures reasoning appears before
464+
// content in the SSE stream, so OpenCode renders thinking at the top.
465+
const reasoningBuffer: string[] = [];
466+
let hasStartedTextContent = false;
467+
let hasFlushedReasoning = false;
468+
469+
function flushReasoningBuffer(): void {
470+
if (hasFlushedReasoning || reasoningBuffer.length === 0) return;
471+
hasFlushedReasoning = true;
472+
const combined = reasoningBuffer.join("");
473+
reasoningBuffer.length = 0;
474+
const timestamp = Math.floor(Date.now() / 1000);
475+
res.write(
476+
`data: ${JSON.stringify({
477+
id: chunkId,
478+
object: "chat.completion.chunk",
479+
created: timestamp,
480+
model,
481+
choices: [
482+
{
483+
index: 0,
484+
delta: { reasoning_content: combined },
485+
finish_reason: null,
486+
logprobs: null,
487+
},
488+
],
489+
})}\n\n`
490+
);
491+
}
492+
493+
const callback = (notification: SessionNotification): void => {
457494
const update = notification.update;
458495
const timestamp = Math.floor(Date.now() / 1000);
459496

460497
switch (update.sessionUpdate) {
461498
case "agent_message_chunk":
462499
if (update.content?.type === "text" && update.content.text) {
500+
// Flush any buffered reasoning before the first text content
501+
if (!hasStartedTextContent) {
502+
hasStartedTextContent = true;
503+
flushReasoningBuffer();
504+
}
463505
res.write(
464506
`data: ${JSON.stringify({
465507
id: chunkId,
@@ -481,22 +523,28 @@ function createStreamCallback(
481523

482524
case "agent_thought_chunk":
483525
if (update.content?.type === "text" && update.content.text) {
484-
res.write(
485-
`data: ${JSON.stringify({
486-
id: chunkId,
487-
object: "chat.completion.chunk",
488-
created: timestamp,
489-
model,
490-
choices: [
491-
{
492-
index: 0,
493-
delta: { reasoning_content: update.content.text },
494-
finish_reason: null,
495-
logprobs: null,
496-
},
497-
],
498-
})}\n\n`
499-
);
526+
if (!hasStartedTextContent) {
527+
// Buffer reasoning before text content starts
528+
reasoningBuffer.push(update.content.text);
529+
} else {
530+
// Late reasoning: text already started, send immediately
531+
res.write(
532+
`data: ${JSON.stringify({
533+
id: chunkId,
534+
object: "chat.completion.chunk",
535+
created: timestamp,
536+
model,
537+
choices: [
538+
{
539+
index: 0,
540+
delta: { reasoning_content: update.content.text },
541+
finish_reason: null,
542+
logprobs: null,
543+
},
544+
],
545+
})}\n\n`
546+
);
547+
}
500548
}
501549
break;
502550

@@ -513,6 +561,8 @@ function createStreamCallback(
513561
break;
514562
}
515563
};
564+
565+
return { callback, flush: flushReasoningBuffer };
516566
}
517567

518568
// ─── SDK Error Check ────────────────────────────────────────────────────────
@@ -547,8 +597,8 @@ async function callStreamingInternal(
547597
signal?: AbortSignal
548598
): Promise<void> {
549599
const client = await getAuggieClient(modelId, workspaceRoot);
550-
const callback = createStreamCallback(res, model, requestId);
551-
client.onSessionUpdate(callback);
600+
const streamHandler = createStreamCallback(res, model, requestId);
601+
client.onSessionUpdate(streamHandler.callback);
552602
let caughtError: Error | null = null;
553603

554604
const abortPromise = signal
@@ -569,6 +619,8 @@ async function callStreamingInternal(
569619
} catch (err) {
570620
caughtError = err as Error;
571621
} finally {
622+
// Flush any remaining buffered reasoning before cleanup
623+
streamHandler.flush();
572624
client.onSessionUpdate(null);
573625
if (caughtError && (caughtError.message === "Request aborted" || isSessionError(caughtError))) {
574626
discardAuggieClient(modelId, client, workspaceRoot);

0 commit comments

Comments
Β (0)