@@ -1094,7 +1094,12 @@ function formatLocations(locations?: ToolCallLocation[]): string {
10941094const toolCallIndices = new Map<string, number>();
10951095let toolCallCounter = 0;
10961096
1097- function createStreamCallback(res: ServerResponse, model: string, requestId: string) {
1097+ interface StreamCallbackResult {
1098+ callback: (notification: SessionNotification) => void;
1099+ flush: () => void;
1100+ }
1101+
1102+ function createStreamCallback(res: ServerResponse, model: string, requestId: string): StreamCallbackResult {
10981103 // Reset tool call tracking for this request
10991104 toolCallIndices.clear();
11001105 toolCallCounter = 0;
@@ -1103,7 +1108,48 @@ function createStreamCallback(res: ServerResponse, model: string, requestId: str
11031108 let chunkCount = 0;
11041109 let lastChunkTime = Date.now();
11051110
1106- return (notification: SessionNotification): void => {
1111+ // Buffer for reasoning content - we need to send all reasoning BEFORE any text
1112+ // to ensure proper ordering in OpenCode's UI
1113+ let reasoningBuffer: string[] = [];
1114+ let hasStartedTextContent = false;
1115+ let hasFlushedReasoning = false;
1116+
1117+ // Helper to flush buffered reasoning content
1118+ const flushReasoningBuffer = (): void => {
1119+ if (hasFlushedReasoning || reasoningBuffer.length === 0) return;
1120+ hasFlushedReasoning = true;
1121+
1122+ const combinedReasoning = reasoningBuffer.join('');
1123+ const timestamp = Math.floor(Date.now() / 1000);
1124+
1125+ console.log(` [ $ { requestId} ] 💭 Flushing ${String ( reasoningBuffer . length ) } reasoning chunks ( $ { String ( combinedReasoning . length ) } chars ) `);
1126+
1127+ // Send all reasoning as a single chunk before text starts
1128+ const thoughtChunk = {
1129+ id: ` chatcmpl - $ { requestId} `,
1130+ object: 'chat.completion.chunk',
1131+ created: timestamp,
1132+ model,
1133+ system_fingerprint: SYSTEM_FINGERPRINT,
1134+ choices: [
1135+ {
1136+ index: 0,
1137+ delta: {
1138+ role: 'assistant',
1139+ reasoning_content: combinedReasoning,
1140+ },
1141+ finish_reason: null,
1142+ logprobs: null,
1143+ },
1144+ ],
1145+ };
1146+ res.write(` data : ${JSON . stringify ( thoughtChunk ) } \n\n`);
1147+
1148+ // Clear the buffer
1149+ reasoningBuffer = [];
1150+ };
1151+
1152+ const callback = (notification: SessionNotification): void => {
11071153 const update = notification.update;
11081154 const sessionId = notification.sessionId ?? requestId;
11091155 const timestamp = Math.floor(Date.now() / 1000);
@@ -1140,6 +1186,12 @@ function createStreamCallback(res: ServerResponse, model: string, requestId: str
11401186
11411187 case 'agent_message_chunk':
11421188 if (update.content?.type === 'text' && update.content.text) {
1189+ // Flush any buffered reasoning before sending text content
1190+ // This ensures reasoning appears BEFORE text in the UI
1191+ if (!hasStartedTextContent) {
1192+ hasStartedTextContent = true;
1193+ flushReasoningBuffer();
1194+ }
11431195 res.write(createStreamChunk(update.content.text, model));
11441196 }
11451197 break;
@@ -1150,26 +1202,39 @@ function createStreamCallback(res: ServerResponse, model: string, requestId: str
11501202 console.log(
11511203 ` [ $ { requestId} ] 💭 Thinking : ${text . substring ( 0 , 100 ) } ${text . length > 100 ? '...' : '' } `
11521204 );
1153- // Send thinking as extended thinking format (Anthropic-style)
1154- const thoughtChunk = {
1155- id: ` chatcmpl - $ { requestId} `,
1156- object: 'chat.completion.chunk',
1157- created: timestamp,
1158- model,
1159- system_fingerprint: SYSTEM_FINGERPRINT,
1160- choices: [
1161- {
1162- index: 0,
1163- delta: {
1164- role: 'assistant',
1165- reasoning_content: text,
1205+
1206+ // If we haven't started text content yet, buffer the reasoning
1207+ // This handles the case where thought chunks come interleaved with message chunks
1208+ if (!hasStartedTextContent) {
1209+ reasoningBuffer.push(text);
1210+ } else {
1211+ // If text has already started and we get more reasoning,
1212+ // we need to send it immediately as a new reasoning block.
1213+ // However, this is suboptimal - the UI may show it at the end.
1214+ // Log a warning for debugging.
1215+ console.log(
1216+ ` [ $ { requestId} ] ⚠️ Late reasoning chunk received after text started - may appear at end of output `
1217+ );
1218+ const thoughtChunk = {
1219+ id: ` chatcmpl - $ { requestId} `,
1220+ object: 'chat.completion.chunk',
1221+ created: timestamp,
1222+ model,
1223+ system_fingerprint: SYSTEM_FINGERPRINT,
1224+ choices: [
1225+ {
1226+ index: 0,
1227+ delta: {
1228+ role: 'assistant',
1229+ reasoning_content: text,
1230+ },
1231+ finish_reason: null,
1232+ logprobs: null,
11661233 },
1167- finish_reason: null,
1168- logprobs: null,
1169- },
1170- ],
1171- };
1172- res.write(` data : ${JSON . stringify ( thoughtChunk ) } \n\n`);
1234+ ],
1235+ };
1236+ res.write(` data : ${JSON . stringify ( thoughtChunk ) } \n\n`);
1237+ }
11731238 }
11741239 break;
11751240
@@ -1265,6 +1330,11 @@ function createStreamCallback(res: ServerResponse, model: string, requestId: str
12651330 debugLog(` Unknown Update [ $ { requestId} ] `, update);
12661331 }
12671332 };
1333+
1334+ return {
1335+ callback,
1336+ flush: flushReasoningBuffer,
1337+ };
12681338}
12691339
12701340// Check if response is an SDK error (JSON with error field)
@@ -1298,7 +1368,8 @@ async function callAugmentAPIStreamingInternal(
12981368 console.log(` [ $ { requestId} ] 🚀 Starting streaming call to ${modelId } ( prompt : ${String ( prompt . length ) } chars , workspace : ${workspaceRoot ?? 'default' } ) `);
12991369
13001370 const client = await getAuggieClient(modelId, workspaceRoot);
1301- client.onSessionUpdate(createStreamCallback(res, model, requestId));
1371+ const streamHandler = createStreamCallback(res, model, requestId);
1372+ client.onSessionUpdate(streamHandler.callback);
13021373 let hasError = false;
13031374 let caughtError: Error | null = null;
13041375
@@ -1334,6 +1405,10 @@ async function callAugmentAPIStreamingInternal(
13341405 hasError = true;
13351406 caughtError = err as Error;
13361407 } finally {
1408+ // Flush any buffered reasoning content before ending the stream
1409+ // This handles the case where reasoning was received but no text content followed
1410+ streamHandler.flush();
1411+
13371412 client.onSessionUpdate(null);
13381413 // Discard client on session errors or aborts, otherwise return to pool
13391414 if (hasError && caughtError) {
0 commit comments