@@ -1040,29 +1040,6 @@ function createChatResponse(content: string, model: string, promptText?: string)
10401040// System fingerprint for reproducibility tracking
10411041const SYSTEM_FINGERPRINT = ` auggie - wrapper - $ { process . env [ 'npm_package_version' ] ?? '1.0.0' } `;
10421042
1043- function createStreamChunk(
1044- content: string,
1045- model: string,
1046- isLast = false,
1047- extraDelta: Record<string, unknown> = {}
1048- ): string {
1049- const chunk = {
1050- id: ` chatcmpl - $ { randomUUID ( ) } `,
1051- object: 'chat.completion.chunk',
1052- created: Math.floor(Date.now() / 1000),
1053- model: model || DEFAULT_MODEL,
1054- system_fingerprint: SYSTEM_FINGERPRINT,
1055- choices: [
1056- {
1057- index: 0,
1058- delta: isLast ? {} : { content, ...extraDelta },
1059- finish_reason: isLast ? 'stop' : null,
1060- logprobs: null,
1061- },
1062- ],
1063- };
1064- return ` data : ${JSON . stringify ( chunk ) } \n\n`;
1065- }
10661043
10671044// Helper to format tool call content for streaming
10681045function formatToolCallContent(toolContent?: ToolCallContent[]): string {
@@ -1096,7 +1073,6 @@ let toolCallCounter = 0;
10961073
10971074interface StreamCallbackResult {
10981075 callback: (notification: SessionNotification) => void;
1099- flush: () => void;
11001076}
11011077
11021078function createStreamCallback(res: ServerResponse, model: string, requestId: string): StreamCallbackResult {
@@ -1108,46 +1084,8 @@ function createStreamCallback(res: ServerResponse, model: string, requestId: str
11081084 let chunkCount = 0;
11091085 let lastChunkTime = Date.now();
11101086
1111- // Buffer for reasoning content - we need to send all reasoning BEFORE any text
1112- // to ensure proper ordering in OpenCode's UI
1113- let reasoningBuffer: string[] = [];
1114- let hasStartedTextContent = false;
1115- let hasFlushedReasoning = false;
1116-
1117- // Helper to flush buffered reasoning content
1118- const flushReasoningBuffer = (): void => {
1119- if (hasFlushedReasoning || reasoningBuffer.length === 0) return;
1120- hasFlushedReasoning = true;
1121-
1122- const combinedReasoning = reasoningBuffer.join('');
1123- const timestamp = Math.floor(Date.now() / 1000);
1124-
1125- console.log(` [ $ { requestId} ] 💭 Flushing ${String ( reasoningBuffer . length ) } reasoning chunks ( $ { String ( combinedReasoning . length ) } chars ) `);
1126-
1127- // Send all reasoning as a single chunk before text starts
1128- const thoughtChunk = {
1129- id: ` chatcmpl - $ { requestId} `,
1130- object: 'chat.completion.chunk',
1131- created: timestamp,
1132- model,
1133- system_fingerprint: SYSTEM_FINGERPRINT,
1134- choices: [
1135- {
1136- index: 0,
1137- delta: {
1138- role: 'assistant',
1139- reasoning_content: combinedReasoning,
1140- },
1141- finish_reason: null,
1142- logprobs: null,
1143- },
1144- ],
1145- };
1146- res.write(` data : ${JSON . stringify ( thoughtChunk ) } \n\n`);
1147-
1148- // Clear the buffer
1149- reasoningBuffer = [];
1150- };
1087+ // Use a consistent chunk ID for all chunks in this response (per OpenAI spec)
1088+ const chunkId = ` chatcmpl - $ { requestId} `;
11511089
11521090 const callback = (notification: SessionNotification): void => {
11531091 const update = notification.update;
@@ -1186,13 +1124,23 @@ function createStreamCallback(res: ServerResponse, model: string, requestId: str
11861124
11871125 case 'agent_message_chunk':
11881126 if (update.content?.type === 'text' && update.content.text) {
1189- // Flush any buffered reasoning before sending text content
1190- // This ensures reasoning appears BEFORE text in the UI
1191- if (!hasStartedTextContent) {
1192- hasStartedTextContent = true;
1193- flushReasoningBuffer();
1194- }
1195- res.write(createStreamChunk(update.content.text, model));
1127+ // Send text content immediately - preserves natural ordering
1128+ const textChunk = {
1129+ id: chunkId,
1130+ object: 'chat.completion.chunk',
1131+ created: timestamp,
1132+ model,
1133+ system_fingerprint: SYSTEM_FINGERPRINT,
1134+ choices: [
1135+ {
1136+ index: 0,
1137+ delta: { content: update.content.text },
1138+ finish_reason: null,
1139+ logprobs: null,
1140+ },
1141+ ],
1142+ };
1143+ res.write(` data : ${JSON . stringify ( textChunk ) } \n\n`);
11961144 }
11971145 break;
11981146
@@ -1203,38 +1151,26 @@ function createStreamCallback(res: ServerResponse, model: string, requestId: str
12031151 ` [ $ { requestId} ] 💭 Thinking : ${text . substring ( 0 , 100 ) } ${text . length > 100 ? '...' : '' } `
12041152 );
12051153
1206- // If we haven't started text content yet, buffer the reasoning
1207- // This handles the case where thought chunks come interleaved with message chunks
1208- if (!hasStartedTextContent) {
1209- reasoningBuffer.push(text);
1210- } else {
1211- // If text has already started and we get more reasoning,
1212- // we need to send it immediately as a new reasoning block.
1213- // However, this is suboptimal - the UI may show it at the end.
1214- // Log a warning for debugging.
1215- console.log(
1216- ` [ $ { requestId} ] ⚠️ Late reasoning chunk received after text started - may appear at end of output `
1217- );
1218- const thoughtChunk = {
1219- id: ` chatcmpl - $ { requestId} `,
1220- object: 'chat.completion.chunk',
1221- created: timestamp,
1222- model,
1223- system_fingerprint: SYSTEM_FINGERPRINT,
1224- choices: [
1225- {
1226- index: 0,
1227- delta: {
1228- role: 'assistant',
1229- reasoning_content: text,
1230- },
1231- finish_reason: null,
1232- logprobs: null,
1154+ // Stream reasoning chunks immediately to preserve interleaved ordering
1155+ // (think → text → think → text) so they appear inline in OpenCode
1156+ const thoughtChunk = {
1157+ id: chunkId,
1158+ object: 'chat.completion.chunk',
1159+ created: timestamp,
1160+ model,
1161+ system_fingerprint: SYSTEM_FINGERPRINT,
1162+ choices: [
1163+ {
1164+ index: 0,
1165+ delta: {
1166+ reasoning_content: text,
12331167 },
1234- ],
1235- };
1236- res.write(` data : ${JSON . stringify ( thoughtChunk ) } \n\n`);
1237- }
1168+ finish_reason: null,
1169+ logprobs: null,
1170+ },
1171+ ],
1172+ };
1173+ res.write(` data : ${JSON . stringify ( thoughtChunk ) } \n\n`);
12381174 }
12391175 break;
12401176
@@ -1333,7 +1269,6 @@ function createStreamCallback(res: ServerResponse, model: string, requestId: str
13331269
13341270 return {
13351271 callback,
1336- flush: flushReasoningBuffer,
13371272 };
13381273}
13391274
@@ -1405,10 +1340,6 @@ async function callAugmentAPIStreamingInternal(
14051340 hasError = true;
14061341 caughtError = err as Error;
14071342 } finally {
1408- // Flush any buffered reasoning content before ending the stream
1409- // This handles the case where reasoning was received but no text content followed
1410- streamHandler.flush();
1411-
14121343 client.onSessionUpdate(null);
14131344 // Discard client on session errors or aborts, otherwise return to pool
14141345 if (hasError && caughtError) {
@@ -1655,7 +1586,16 @@ async function handleChatCompletions(req: IncomingMessage, res: ServerResponse):
16551586
16561587 try {
16571588 await callAugmentAPIStreaming(prompt, model, res, requestId, model, workspaceRoot ?? undefined, abortController.signal);
1658- res.write(createStreamChunk('', model, true));
1589+ // Send final stop chunk with consistent ID
1590+ const stopChunk = {
1591+ id: ` chatcmpl - $ { requestId} `,
1592+ object: 'chat.completion.chunk',
1593+ created: Math.floor(Date.now() / 1000),
1594+ model: model || DEFAULT_MODEL,
1595+ system_fingerprint: SYSTEM_FINGERPRINT,
1596+ choices: [{ index: 0, delta: {}, finish_reason: 'stop', logprobs: null }],
1597+ };
1598+ res.write(` data : ${JSON . stringify ( stopChunk ) } \n\n`) ;
16591599 res . write ( 'data: [DONE]\n\n' ) ;
16601600 cleanup ( true ) ;
16611601 } catch ( err ) {
0 commit comments