diff --git a/src/services/api/claude.ts b/src/services/api/claude.ts index ec4dfaeab8..1340657775 100644 --- a/src/services/api/claude.ts +++ b/src/services/api/claude.ts @@ -1340,7 +1340,10 @@ async function* queryModel( // media stripping) but before Anthropic-specific logic (betas, thinking, caching). if (getAPIProvider() === 'openai') { const { queryModelOpenAI } = await import('./openai/index.js') - yield* queryModelOpenAI(messagesForAPI, systemPrompt, filteredTools, signal, options) + // OpenAI emulates Anthropic's dynamic tool loading client-side. It needs + // the full tool pool so ToolSearchTool can search deferred MCP tools that + // were intentionally filtered out of the initial API tool list above. + yield* queryModelOpenAI(messagesForAPI, systemPrompt, tools, signal, options) return } diff --git a/src/services/api/openai/__tests__/queryModelOpenAI.isolated.ts b/src/services/api/openai/__tests__/queryModelOpenAI.isolated.ts index 86ccc5d5d9..23f9dff062 100644 --- a/src/services/api/openai/__tests__/queryModelOpenAI.isolated.ts +++ b/src/services/api/openai/__tests__/queryModelOpenAI.isolated.ts @@ -196,10 +196,52 @@ async function runQueryModel( // We mock at module level. Bun's mock.module replaces the module for the // entire file, so we configure the stream per-test via a shared variable. let _nextEvents: BetaRawMessageStreamEvent[] = [] +let _toolSearchEnabled = false /** Captured arguments from the last chat.completions.create() call */ let _lastCreateArgs: Record | null = null +mock.module('@ant/model-provider', () => ({ + resolveOpenAIModel: (m: string) => m, + adaptOpenAIStreamToAnthropic: (_stream: any, _model: string) => + eventStream(_nextEvents), + anthropicMessagesToOpenAI: (messages: any[]) => + messages.map(msg => ({ + role: msg.message?.role ?? 'user', + content: msg.message?.content ?? '', + })), + anthropicToolsToOpenAI: (tools: any[]) => + tools.map(tool => ({ + type: 'function', + function: { + name: tool.name, + description: tool.description ?? '', + parameters: tool.input_schema ?? { type: 'object', properties: {} }, + }, + })), + anthropicToolChoiceToOpenAI: () => undefined, +})) + +mock.module('../../../../utils/envUtils.js', () => ({ + isEnvTruthy: (value: string | undefined) => + value === '1' || value === 'true' || value === 'yes' || value === 'on', + isEnvDefinedFalsy: (value: string | undefined) => + value === '0' || value === 'false' || value === 'no' || value === 'off', +})) + +mock.module('../../../../services/analytics/growthbook.js', () => ({ + getFeatureValue_CACHED_MAY_BE_STALE: (_key: string, fallback: unknown) => + fallback, +})) + +mock.module('src/bootstrap/state.js', () => ({ + isReplBridgeActive: () => false, +})) + +mock.module('bun:bundle', () => ({ + feature: () => false, +})) + mock.module('../client.js', () => ({ getOpenAIClient: () => ({ chat: { @@ -252,6 +294,13 @@ mock.module('../../../../utils/context.js', () => ({ mock.module('../../../../utils/messages.js', () => ({ normalizeMessagesForAPI: (msgs: any) => msgs, normalizeContentFromAPI: (blocks: any[]) => blocks, + createUserMessage: (opts: any) => ({ + type: 'user', + message: { role: 'user', content: opts.content }, + uuid: 'user-uuid', + timestamp: new Date().toISOString(), + isMeta: opts.isMeta, + }), createAssistantAPIErrorMessage: (opts: any) => ({ type: 'assistant', message: { @@ -268,8 +317,9 @@ mock.module('../../../../utils/api.js', () => ({ })) mock.module('../../../../utils/toolSearch.js', () => ({ - isToolSearchEnabled: async () => false, + isToolSearchEnabled: async () => _toolSearchEnabled, extractDiscoveredToolNames: () => new Set(), + isDeferredToolsDeltaEnabled: () => false, })) mock.module('../../../../tools/ToolSearchTool/prompt.js', () => ({ @@ -297,6 +347,16 @@ mock.module('../../../../utils/modelCost.js', () => ({ getModelPricingString: () => undefined, })) +mock.module('../../../../services/langfuse/tracing.js', () => ({ + recordLLMObservation: () => {}, +})) + +mock.module('../../../../services/langfuse/convert.js', () => ({ + convertMessagesToLangfuse: () => [], + convertOutputToLangfuse: () => ({}), + convertToolsToLangfuse: () => [], +})) + mock.module('../../../../utils/debug.js', () => ({ logForDebugging: () => {}, logAntError: () => {}, @@ -543,3 +603,59 @@ describe('queryModelOpenAI — max_tokens forwarded to request', () => { expect(_lastCreateArgs!.max_tokens).toBe(8192) }) }) + +describe('queryModelOpenAI — deferred MCP tool visibility', () => { + test('prepends available deferred MCP tools to OpenAI messages', async () => { + _toolSearchEnabled = true + _nextEvents = [makeMessageStart(), makeMessageStop()] + + try { + const { queryModelOpenAI } = await import('../index.js') + const tools: any[] = [ + { + name: 'ToolSearch', + isMcp: false, + input_schema: { type: 'object', properties: {} }, + prompt: async () => 'Search deferred tools', + }, + { + name: 'mcp__wechat__send_message', + isMcp: true, + input_schema: { type: 'object', properties: {} }, + prompt: async () => 'Send a WeChat message', + }, + ] + + const options: any = { + model: 'test-model', + tools: [], + agents: [], + querySource: 'main_loop', + getToolPermissionContext: async () => ({ + alwaysAllow: [], + alwaysDeny: [], + needsPermission: [], + mode: 'default', + isBypassingPermissions: false, + }), + } + + for await (const _item of queryModelOpenAI( + [], + { type: 'text', text: '' } as any, + tools as any, + new AbortController().signal, + options, + )) { + // Exhaust generator so request body is built. + } + + expect(_lastCreateArgs).not.toBeNull() + expect(JSON.stringify(_lastCreateArgs!.messages)).toContain( + '\\nmcp__wechat__send_message\\n', + ) + } finally { + _toolSearchEnabled = false + } + }) +}) diff --git a/src/services/api/openai/index.ts b/src/services/api/openai/index.ts index 0db0022252..248c2dac3b 100644 --- a/src/services/api/openai/index.ts +++ b/src/services/api/openai/index.ts @@ -5,6 +5,7 @@ import type { StreamEvent, SystemAPIErrorMessage, AssistantMessage, + UserMessage, } from '../../../types/message.js' import type { AgentId } from '../../../types/ids.js' import type { Tools } from '../../../Tool.js' @@ -32,18 +33,58 @@ import type { Options } from '../claude.js' import { randomUUID } from 'crypto' import { createAssistantAPIErrorMessage, + createUserMessage, normalizeContentFromAPI, } from '../../../utils/messages.js' import type { SDKAssistantMessageError } from '../../../entrypoints/agentSdkTypes.js' import { isToolSearchEnabled, extractDiscoveredToolNames, + isDeferredToolsDeltaEnabled, } from '../../../utils/toolSearch.js' import { + formatDeferredToolLine, isDeferredTool, TOOL_SEARCH_TOOL_NAME, } from '@claude-code-best/builtin-tools/tools/ToolSearchTool/prompt.js' +/** + * Mirrors the Anthropic request path's deferred-tool announcement for OpenAI. + * + * OpenAI-compatible endpoints cannot consume Anthropic's `defer_loading` or + * `tool_reference` beta payloads directly, so the model needs the same textual + * list of deferred MCP tool names that Anthropic receives before it can ask + * ToolSearchTool to load their full schemas. + */ +function prependDeferredToolListIfNeeded( + messages: (AssistantMessage | UserMessage)[], + tools: Tools, + deferredToolNames: Set, + useToolSearch: boolean, +): (AssistantMessage | UserMessage)[] { + if (!useToolSearch || isDeferredToolsDeltaEnabled()) return messages + + const deferredToolList = tools + .filter(tool => deferredToolNames.has(tool.name)) + .map(formatDeferredToolLine) + .sort() + .join('\n') + + if (!deferredToolList) return messages + + return [ + createUserMessage({ + content: `\n${deferredToolList}\n`, + isMeta: true, + }), + ...messages, + ] +} + +function isOpenAIConvertibleMessage(msg: Message): msg is AssistantMessage | UserMessage { + return msg.type === 'assistant' || msg.type === 'user' +} + /** * Assemble the final AssistantMessage (and optional max_tokens error) from * accumulated stream state. Extracted to avoid duplication between the @@ -176,9 +217,18 @@ export async function* queryModelOpenAI( // 8. Convert messages and tools to OpenAI format const enableThinking = isOpenAIThinkingEnabled(openaiModel) - const openaiMessages = anthropicMessagesToOpenAI(messagesForAPI, systemPrompt, { - enableThinking, - }) + const openAIConvertibleMessages = messagesForAPI.filter(isOpenAIConvertibleMessage) + const messagesWithDeferredToolList = prependDeferredToolListIfNeeded( + openAIConvertibleMessages, + tools, + deferredToolNames, + useToolSearch, + ) + const openaiMessages = anthropicMessagesToOpenAI( + messagesWithDeferredToolList, + systemPrompt, + { enableThinking }, + ) const openaiTools = anthropicToolsToOpenAI(standardTools) const openaiToolChoice = anthropicToolChoiceToOpenAI(options.toolChoice) @@ -356,7 +406,7 @@ export async function* queryModelOpenAI( recordLLMObservation(options.langfuseTrace ?? null, { model: openaiModel, provider: 'openai', - input: convertMessagesToLangfuse(messagesForAPI, systemPrompt), + input: convertMessagesToLangfuse(openaiMessages), output: convertOutputToLangfuse(collectedMessages), usage: { input_tokens: usage.input_tokens, diff --git a/src/services/langfuse/__tests__/langfuse.test.ts b/src/services/langfuse/__tests__/langfuse.test.ts index 48afa23722..dab33fd60d 100644 --- a/src/services/langfuse/__tests__/langfuse.test.ts +++ b/src/services/langfuse/__tests__/langfuse.test.ts @@ -184,6 +184,100 @@ describe('Langfuse integration', () => { }) }) + describe('convertMessagesToLangfuse', () => { + test('preserves OpenAI-style messages including deferred tool announcements', async () => { + const { convertMessagesToLangfuse } = await import('../convert.js') + const result = convertMessagesToLangfuse([ + { + role: 'system', + content: 'system prompt', + }, + { + role: 'user', + content: + '\nmcp__wechat__send_message\n', + }, + ]) + + expect(result).toEqual([ + { role: 'system', content: 'system prompt' }, + { + role: 'user', + content: + '\nmcp__wechat__send_message\n', + }, + ]) + }) + + test('preserves roles for OpenAI-style array content messages', async () => { + const { convertMessagesToLangfuse } = await import('../convert.js') + const result = convertMessagesToLangfuse([ + { + role: 'system', + content: [{ type: 'text', text: 'system reminder' }], + }, + { + role: 'tool', + tool_call_id: 'call_1', + content: [{ type: 'text', text: 'tool output' }], + }, + ]) + + expect(result).toEqual([ + { role: 'system', content: 'system reminder' }, + { role: 'tool', content: 'tool output', tool_call_id: 'call_1' }, + ]) + }) + + test('merges assistant tool calls from OpenAI-style array content', async () => { + const { convertMessagesToLangfuse } = await import('../convert.js') + const result = convertMessagesToLangfuse([ + { + role: 'assistant', + content: [ + { + type: 'text', + text: 'calling a tool', + tool_calls: [ + { + id: 'call_from_part', + type: 'function', + function: { name: 'part_tool', arguments: '{}' }, + }, + ], + }, + ], + tool_calls: [ + { + id: 'call_from_message', + type: 'function', + function: { name: 'message_tool', arguments: '{"ok":true}' }, + }, + ], + }, + ]) + + expect(result).toEqual([ + { + role: 'assistant', + content: 'calling a tool', + tool_calls: [ + { + id: 'call_from_message', + type: 'function', + function: { name: 'message_tool', arguments: '{"ok":true}' }, + }, + { + id: 'call_from_part', + type: 'function', + function: { name: 'part_tool', arguments: '{}' }, + }, + ], + }, + ]) + }) + }) + // ── client tests ──────────────────────────────────────────────────────────── describe('isLangfuseEnabled', () => { diff --git a/src/services/langfuse/convert.ts b/src/services/langfuse/convert.ts index 89018e4acd..ad324c2a0b 100644 --- a/src/services/langfuse/convert.ts +++ b/src/services/langfuse/convert.ts @@ -10,7 +10,7 @@ * - tool_result blocks → separate { role: 'tool' } messages */ -import type { Message, AssistantMessage, UserMessage } from 'src/types/message.js' +import type { AssistantMessage } from 'src/types/message.js' type LangfuseContentPart = | { type: 'text'; text: string } @@ -30,6 +30,55 @@ type LangfuseChatMessage = { tool_call_id?: string } +function isLangfuseRole(value: unknown): value is LangfuseChatMessage['role'] { + switch (value) { + case 'user': + case 'assistant': + case 'system': + case 'tool': + return true + default: + return false + } +} + +function isRecord(value: unknown): value is Record { + return value !== null && typeof value === 'object' && !Array.isArray(value) +} + +function isLangfuseToolCall(value: unknown): value is LangfuseToolCall { + if (!isRecord(value)) return false + const fn = value.function + return ( + typeof value.id === 'string' && + value.type === 'function' && + isRecord(fn) && + typeof fn.name === 'string' && + typeof fn.arguments === 'string' + ) +} + +function getToolCalls(value: unknown): LangfuseToolCall[] { + return Array.isArray(value) ? value.filter(isLangfuseToolCall) : [] +} + +function getContentToolCalls(content: unknown[]): LangfuseToolCall[] { + return content.flatMap(block => + isRecord(block) ? getToolCalls(block.tool_calls) : [], + ) +} + +function mergeToolCalls( + ...groups: readonly LangfuseToolCall[][] +): LangfuseToolCall[] { + const merged = new Map() + for (const toolCall of groups.flat()) { + const key = toolCall.id || `${toolCall.function.name}:${toolCall.function.arguments}` + if (!merged.has(key)) merged.set(key, toolCall) + } + return [...merged.values()] +} + /** Normalize a content block into a LangfuseContentPart (non-tool_use, non-tool_result) */ function toContentPart(block: Record): LangfuseContentPart | null { const type = block.type as string | undefined @@ -121,15 +170,15 @@ function collapseContent(parts: LangfuseContentPart[]): string | LangfuseContent return parts } -function toRole(msg: Message): 'user' | 'assistant' | 'system' { +function toRoleFromWrappedMessage(msg: Record): 'user' | 'assistant' | 'system' { if (msg.type === 'assistant') return 'assistant' if (msg.type === 'system') return 'system' return 'user' } -/** Convert messagesForAPI (UserMessage | AssistantMessage)[] → Langfuse input format */ +/** Convert internal or OpenAI-style messages → Langfuse input format */ export function convertMessagesToLangfuse( - messages: (UserMessage | AssistantMessage)[], + messages: readonly unknown[], systemPrompt?: readonly string[], ): LangfuseChatMessage[] { const result: LangfuseChatMessage[] = [] @@ -139,18 +188,34 @@ export function convertMessagesToLangfuse( } } for (const msg of messages) { - const inner = msg.message - if (!inner) continue - const role = (inner.role as 'user' | 'assistant' | undefined) ?? toRole(msg) + if (!isRecord(msg)) continue + const wrappedMessage = msg.message + const isWrappedMessage = isRecord(wrappedMessage) + const inner = isWrappedMessage ? wrappedMessage : msg + const role = + isLangfuseRole(inner.role) ? inner.role : isWrappedMessage ? toRoleFromWrappedMessage(msg) : 'user' const rawContent = inner.content if (typeof rawContent === 'string' || !Array.isArray(rawContent)) { - result.push({ role, content: String(rawContent ?? '') }) + const toolCalls = getToolCalls(inner.tool_calls) + result.push({ + role, + content: String(rawContent ?? ''), + ...('tool_call_id' in inner && typeof inner.tool_call_id === 'string' + ? { tool_call_id: inner.tool_call_id } + : {}), + ...(toolCalls.length > 0 ? { tool_calls: toolCalls } : {}), + }) continue } if (role === 'assistant') { // Extract tool_use → tool_calls at message level const { tool_calls, rest } = extractToolCalls(rawContent) + const allToolCalls = mergeToolCalls( + tool_calls, + getToolCalls(inner.tool_calls), + getContentToolCalls(rest), + ) const parts = rest .filter((b): b is Record => b != null && typeof b === 'object') .map(b => toContentPart(b)) @@ -158,7 +223,7 @@ export function convertMessagesToLangfuse( result.push({ role: 'assistant', content: collapseContent(parts), - ...(tool_calls.length > 0 && { tool_calls }), + ...(allToolCalls.length > 0 && { tool_calls: allToolCalls }), }) } else { // User messages: extract tool_result → separate tool messages @@ -168,7 +233,18 @@ export function convertMessagesToLangfuse( .map(b => toContentPart(b)) .filter((p): p is LangfuseContentPart => p !== null) if (parts.length > 0 || toolMessages.length === 0) { - result.push({ role: 'user', content: collapseContent(parts) }) + const toolCalls = mergeToolCalls( + getToolCalls(inner.tool_calls), + getContentToolCalls(rest), + ) + result.push({ + role, + content: collapseContent(parts), + ...('tool_call_id' in inner && typeof inner.tool_call_id === 'string' + ? { tool_call_id: inner.tool_call_id } + : {}), + ...(toolCalls.length > 0 ? { tool_calls: toolCalls } : {}), + }) } result.push(...toolMessages) }