Skip to content

Commit 3342bb1

Browse files
committed
fix: buffer reasoning content to ensure proper ordering in OpenCode
The Auggie SDK sends agent_thought_chunk and agent_message_chunk events in an interleaved manner. The AI SDK's @ai-sdk/openai-compatible provider expects reasoning content to come BEFORE text content. When text arrives, it ends any active reasoning block. If reasoning chunks arrive after text, they create a new reasoning block at the end, causing thinking to appear at the bottom of the output. This fix: - Buffers all reasoning content in reasoningBuffer array - Flushes all buffered reasoning when first text content arrives - Flushes remaining buffer at end-of-stream in finally block - Logs warning for late reasoning chunks that arrive after text started Fixes issue where thinking progress was showing at the lowermost when using auggie-wrapper with OpenCode.
1 parent 6843779 commit 3342bb1

File tree

1 file changed

+97
-22
lines changed

1 file changed

+97
-22
lines changed

src/server.ts

Lines changed: 97 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1094,7 +1094,12 @@ function formatLocations(locations?: ToolCallLocation[]): string {
10941094
const toolCallIndices = new Map<string, number>();
10951095
let toolCallCounter = 0;
10961096
1097-
function createStreamCallback(res: ServerResponse, model: string, requestId: string) {
1097+
interface StreamCallbackResult {
1098+
callback: (notification: SessionNotification) => void;
1099+
flush: () => void;
1100+
}
1101+
1102+
function createStreamCallback(res: ServerResponse, model: string, requestId: string): StreamCallbackResult {
10981103
// Reset tool call tracking for this request
10991104
toolCallIndices.clear();
11001105
toolCallCounter = 0;
@@ -1103,7 +1108,48 @@ function createStreamCallback(res: ServerResponse, model: string, requestId: str
11031108
let chunkCount = 0;
11041109
let lastChunkTime = Date.now();
11051110
1106-
return (notification: SessionNotification): void => {
1111+
// Buffer for reasoning content - we need to send all reasoning BEFORE any text
1112+
// to ensure proper ordering in OpenCode's UI
1113+
let reasoningBuffer: string[] = [];
1114+
let hasStartedTextContent = false;
1115+
let hasFlushedReasoning = false;
1116+
1117+
// Helper to flush buffered reasoning content
1118+
const flushReasoningBuffer = (): void => {
1119+
if (hasFlushedReasoning || reasoningBuffer.length === 0) return;
1120+
hasFlushedReasoning = true;
1121+
1122+
const combinedReasoning = reasoningBuffer.join('');
1123+
const timestamp = Math.floor(Date.now() / 1000);
1124+
1125+
console.log(`[${requestId}] 💭 Flushing ${String(reasoningBuffer.length)} reasoning chunks (${String(combinedReasoning.length)} chars)`);
1126+
1127+
// Send all reasoning as a single chunk before text starts
1128+
const thoughtChunk = {
1129+
id: `chatcmpl-${requestId}`,
1130+
object: 'chat.completion.chunk',
1131+
created: timestamp,
1132+
model,
1133+
system_fingerprint: SYSTEM_FINGERPRINT,
1134+
choices: [
1135+
{
1136+
index: 0,
1137+
delta: {
1138+
role: 'assistant',
1139+
reasoning_content: combinedReasoning,
1140+
},
1141+
finish_reason: null,
1142+
logprobs: null,
1143+
},
1144+
],
1145+
};
1146+
res.write(`data: ${JSON.stringify(thoughtChunk)}\n\n`);
1147+
1148+
// Clear the buffer
1149+
reasoningBuffer = [];
1150+
};
1151+
1152+
const callback = (notification: SessionNotification): void => {
11071153
const update = notification.update;
11081154
const sessionId = notification.sessionId ?? requestId;
11091155
const timestamp = Math.floor(Date.now() / 1000);
@@ -1140,6 +1186,12 @@ function createStreamCallback(res: ServerResponse, model: string, requestId: str
11401186
11411187
case 'agent_message_chunk':
11421188
if (update.content?.type === 'text' && update.content.text) {
1189+
// Flush any buffered reasoning before sending text content
1190+
// This ensures reasoning appears BEFORE text in the UI
1191+
if (!hasStartedTextContent) {
1192+
hasStartedTextContent = true;
1193+
flushReasoningBuffer();
1194+
}
11431195
res.write(createStreamChunk(update.content.text, model));
11441196
}
11451197
break;
@@ -1150,26 +1202,39 @@ function createStreamCallback(res: ServerResponse, model: string, requestId: str
11501202
console.log(
11511203
`[${requestId}] 💭 Thinking: ${text.substring(0, 100)}${text.length > 100 ? '...' : ''}`
11521204
);
1153-
// Send thinking as extended thinking format (Anthropic-style)
1154-
const thoughtChunk = {
1155-
id: `chatcmpl-${requestId}`,
1156-
object: 'chat.completion.chunk',
1157-
created: timestamp,
1158-
model,
1159-
system_fingerprint: SYSTEM_FINGERPRINT,
1160-
choices: [
1161-
{
1162-
index: 0,
1163-
delta: {
1164-
role: 'assistant',
1165-
reasoning_content: text,
1205+
1206+
// If we haven't started text content yet, buffer the reasoning
1207+
// This handles the case where thought chunks come interleaved with message chunks
1208+
if (!hasStartedTextContent) {
1209+
reasoningBuffer.push(text);
1210+
} else {
1211+
// If text has already started and we get more reasoning,
1212+
// we need to send it immediately as a new reasoning block.
1213+
// However, this is suboptimal - the UI may show it at the end.
1214+
// Log a warning for debugging.
1215+
console.log(
1216+
`[${requestId}] ⚠️ Late reasoning chunk received after text started - may appear at end of output`
1217+
);
1218+
const thoughtChunk = {
1219+
id: `chatcmpl-${requestId}`,
1220+
object: 'chat.completion.chunk',
1221+
created: timestamp,
1222+
model,
1223+
system_fingerprint: SYSTEM_FINGERPRINT,
1224+
choices: [
1225+
{
1226+
index: 0,
1227+
delta: {
1228+
role: 'assistant',
1229+
reasoning_content: text,
1230+
},
1231+
finish_reason: null,
1232+
logprobs: null,
11661233
},
1167-
finish_reason: null,
1168-
logprobs: null,
1169-
},
1170-
],
1171-
};
1172-
res.write(`data: ${JSON.stringify(thoughtChunk)}\n\n`);
1234+
],
1235+
};
1236+
res.write(`data: ${JSON.stringify(thoughtChunk)}\n\n`);
1237+
}
11731238
}
11741239
break;
11751240
@@ -1265,6 +1330,11 @@ function createStreamCallback(res: ServerResponse, model: string, requestId: str
12651330
debugLog(`Unknown Update [${requestId}]`, update);
12661331
}
12671332
};
1333+
1334+
return {
1335+
callback,
1336+
flush: flushReasoningBuffer,
1337+
};
12681338
}
12691339
12701340
// Check if response is an SDK error (JSON with error field)
@@ -1298,7 +1368,8 @@ async function callAugmentAPIStreamingInternal(
12981368
console.log(`[${requestId}] 🚀 Starting streaming call to ${modelId} (prompt: ${String(prompt.length)} chars, workspace: ${workspaceRoot ?? 'default'})`);
12991369
13001370
const client = await getAuggieClient(modelId, workspaceRoot);
1301-
client.onSessionUpdate(createStreamCallback(res, model, requestId));
1371+
const streamHandler = createStreamCallback(res, model, requestId);
1372+
client.onSessionUpdate(streamHandler.callback);
13021373
let hasError = false;
13031374
let caughtError: Error | null = null;
13041375
@@ -1334,6 +1405,10 @@ async function callAugmentAPIStreamingInternal(
13341405
hasError = true;
13351406
caughtError = err as Error;
13361407
} finally {
1408+
// Flush any buffered reasoning content before ending the stream
1409+
// This handles the case where reasoning was received but no text content followed
1410+
streamHandler.flush();
1411+
13371412
client.onSessionUpdate(null);
13381413
// Discard client on session errors or aborts, otherwise return to pool
13391414
if (hasError && caughtError) {

0 commit comments

Comments
 (0)