Skip to content

Commit 24f6025

Browse files
committed
fix: stream thinking blocks inline instead of buffering them
Previously, all reasoning_content (agent_thought_chunk) was buffered and flushed as a single consolidated block before the first text chunk. This caused OpenCode to display thinking at the bottom instead of inline. Changes: - Stream reasoning chunks immediately as they arrive, preserving the natural interleaved order (think → text → think → text) - Use consistent chunk IDs per response (chatcmpl-{requestId}) instead of random UUIDs per chunk, per OpenAI streaming spec - Remove unused createStreamChunk helper and flush/buffering logic - Simplify StreamCallbackResult interface
1 parent 1e49ed1 commit 24f6025

File tree

1 file changed

+48
-108
lines changed

1 file changed

+48
-108
lines changed

src/server.ts

Lines changed: 48 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,29 +1040,6 @@ function createChatResponse(content: string, model: string, promptText?: string)
10401040
// System fingerprint for reproducibility tracking
10411041
const SYSTEM_FINGERPRINT = `auggie-wrapper-${process.env['npm_package_version'] ?? '1.0.0'}`;
10421042
1043-
function createStreamChunk(
1044-
content: string,
1045-
model: string,
1046-
isLast = false,
1047-
extraDelta: Record<string, unknown> = {}
1048-
): string {
1049-
const chunk = {
1050-
id: `chatcmpl-${randomUUID()}`,
1051-
object: 'chat.completion.chunk',
1052-
created: Math.floor(Date.now() / 1000),
1053-
model: model || DEFAULT_MODEL,
1054-
system_fingerprint: SYSTEM_FINGERPRINT,
1055-
choices: [
1056-
{
1057-
index: 0,
1058-
delta: isLast ? {} : { content, ...extraDelta },
1059-
finish_reason: isLast ? 'stop' : null,
1060-
logprobs: null,
1061-
},
1062-
],
1063-
};
1064-
return `data: ${JSON.stringify(chunk)}\n\n`;
1065-
}
10661043
10671044
// Helper to format tool call content for streaming
10681045
function formatToolCallContent(toolContent?: ToolCallContent[]): string {
@@ -1096,7 +1073,6 @@ let toolCallCounter = 0;
10961073
10971074
interface StreamCallbackResult {
10981075
callback: (notification: SessionNotification) => void;
1099-
flush: () => void;
11001076
}
11011077
11021078
function createStreamCallback(res: ServerResponse, model: string, requestId: string): StreamCallbackResult {
@@ -1108,46 +1084,8 @@ function createStreamCallback(res: ServerResponse, model: string, requestId: str
11081084
let chunkCount = 0;
11091085
let lastChunkTime = Date.now();
11101086
1111-
// Buffer for reasoning content - we need to send all reasoning BEFORE any text
1112-
// to ensure proper ordering in OpenCode's UI
1113-
let reasoningBuffer: string[] = [];
1114-
let hasStartedTextContent = false;
1115-
let hasFlushedReasoning = false;
1116-
1117-
// Helper to flush buffered reasoning content
1118-
const flushReasoningBuffer = (): void => {
1119-
if (hasFlushedReasoning || reasoningBuffer.length === 0) return;
1120-
hasFlushedReasoning = true;
1121-
1122-
const combinedReasoning = reasoningBuffer.join('');
1123-
const timestamp = Math.floor(Date.now() / 1000);
1124-
1125-
console.log(`[${requestId}] 💭 Flushing ${String(reasoningBuffer.length)} reasoning chunks (${String(combinedReasoning.length)} chars)`);
1126-
1127-
// Send all reasoning as a single chunk before text starts
1128-
const thoughtChunk = {
1129-
id: `chatcmpl-${requestId}`,
1130-
object: 'chat.completion.chunk',
1131-
created: timestamp,
1132-
model,
1133-
system_fingerprint: SYSTEM_FINGERPRINT,
1134-
choices: [
1135-
{
1136-
index: 0,
1137-
delta: {
1138-
role: 'assistant',
1139-
reasoning_content: combinedReasoning,
1140-
},
1141-
finish_reason: null,
1142-
logprobs: null,
1143-
},
1144-
],
1145-
};
1146-
res.write(`data: ${JSON.stringify(thoughtChunk)}\n\n`);
1147-
1148-
// Clear the buffer
1149-
reasoningBuffer = [];
1150-
};
1087+
// Use a consistent chunk ID for all chunks in this response (per OpenAI spec)
1088+
const chunkId = `chatcmpl-${requestId}`;
11511089
11521090
const callback = (notification: SessionNotification): void => {
11531091
const update = notification.update;
@@ -1186,13 +1124,23 @@ function createStreamCallback(res: ServerResponse, model: string, requestId: str
11861124
11871125
case 'agent_message_chunk':
11881126
if (update.content?.type === 'text' && update.content.text) {
1189-
// Flush any buffered reasoning before sending text content
1190-
// This ensures reasoning appears BEFORE text in the UI
1191-
if (!hasStartedTextContent) {
1192-
hasStartedTextContent = true;
1193-
flushReasoningBuffer();
1194-
}
1195-
res.write(createStreamChunk(update.content.text, model));
1127+
// Send text content immediately - preserves natural ordering
1128+
const textChunk = {
1129+
id: chunkId,
1130+
object: 'chat.completion.chunk',
1131+
created: timestamp,
1132+
model,
1133+
system_fingerprint: SYSTEM_FINGERPRINT,
1134+
choices: [
1135+
{
1136+
index: 0,
1137+
delta: { content: update.content.text },
1138+
finish_reason: null,
1139+
logprobs: null,
1140+
},
1141+
],
1142+
};
1143+
res.write(`data: ${JSON.stringify(textChunk)}\n\n`);
11961144
}
11971145
break;
11981146
@@ -1203,38 +1151,26 @@ function createStreamCallback(res: ServerResponse, model: string, requestId: str
12031151
`[${requestId}] 💭 Thinking: ${text.substring(0, 100)}${text.length > 100 ? '...' : ''}`
12041152
);
12051153
1206-
// If we haven't started text content yet, buffer the reasoning
1207-
// This handles the case where thought chunks come interleaved with message chunks
1208-
if (!hasStartedTextContent) {
1209-
reasoningBuffer.push(text);
1210-
} else {
1211-
// If text has already started and we get more reasoning,
1212-
// we need to send it immediately as a new reasoning block.
1213-
// However, this is suboptimal - the UI may show it at the end.
1214-
// Log a warning for debugging.
1215-
console.log(
1216-
`[${requestId}] ⚠️ Late reasoning chunk received after text started - may appear at end of output`
1217-
);
1218-
const thoughtChunk = {
1219-
id: `chatcmpl-${requestId}`,
1220-
object: 'chat.completion.chunk',
1221-
created: timestamp,
1222-
model,
1223-
system_fingerprint: SYSTEM_FINGERPRINT,
1224-
choices: [
1225-
{
1226-
index: 0,
1227-
delta: {
1228-
role: 'assistant',
1229-
reasoning_content: text,
1230-
},
1231-
finish_reason: null,
1232-
logprobs: null,
1154+
// Stream reasoning chunks immediately to preserve interleaved ordering
1155+
// (think → text → think → text) so they appear inline in OpenCode
1156+
const thoughtChunk = {
1157+
id: chunkId,
1158+
object: 'chat.completion.chunk',
1159+
created: timestamp,
1160+
model,
1161+
system_fingerprint: SYSTEM_FINGERPRINT,
1162+
choices: [
1163+
{
1164+
index: 0,
1165+
delta: {
1166+
reasoning_content: text,
12331167
},
1234-
],
1235-
};
1236-
res.write(`data: ${JSON.stringify(thoughtChunk)}\n\n`);
1237-
}
1168+
finish_reason: null,
1169+
logprobs: null,
1170+
},
1171+
],
1172+
};
1173+
res.write(`data: ${JSON.stringify(thoughtChunk)}\n\n`);
12381174
}
12391175
break;
12401176
@@ -1333,7 +1269,6 @@ function createStreamCallback(res: ServerResponse, model: string, requestId: str
13331269
13341270
return {
13351271
callback,
1336-
flush: flushReasoningBuffer,
13371272
};
13381273
}
13391274
@@ -1405,10 +1340,6 @@ async function callAugmentAPIStreamingInternal(
14051340
hasError = true;
14061341
caughtError = err as Error;
14071342
} finally {
1408-
// Flush any buffered reasoning content before ending the stream
1409-
// This handles the case where reasoning was received but no text content followed
1410-
streamHandler.flush();
1411-
14121343
client.onSessionUpdate(null);
14131344
// Discard client on session errors or aborts, otherwise return to pool
14141345
if (hasError && caughtError) {
@@ -1655,7 +1586,16 @@ async function handleChatCompletions(req: IncomingMessage, res: ServerResponse):
16551586
16561587
try {
16571588
await callAugmentAPIStreaming(prompt, model, res, requestId, model, workspaceRoot ?? undefined, abortController.signal);
1658-
res.write(createStreamChunk('', model, true));
1589+
// Send final stop chunk with consistent ID
1590+
const stopChunk = {
1591+
id: `chatcmpl-${requestId}`,
1592+
object: 'chat.completion.chunk',
1593+
created: Math.floor(Date.now() / 1000),
1594+
model: model || DEFAULT_MODEL,
1595+
system_fingerprint: SYSTEM_FINGERPRINT,
1596+
choices: [{ index: 0, delta: {}, finish_reason: 'stop', logprobs: null }],
1597+
};
1598+
res.write(`data: ${JSON.stringify(stopChunk)}\n\n`);
16591599
res.write('data: [DONE]\n\n');
16601600
cleanup(true);
16611601
} catch (err) {

0 commit comments

Comments
 (0)