diff --git a/app/actions.tsx b/app/actions.tsx index ffec2a61..9984a779 100644 --- a/app/actions.tsx +++ b/app/actions.tsx @@ -250,8 +250,9 @@ async function submit(formData?: FormData, skip?: boolean) { } const hasImage = messageParts.some(part => part.type === 'image') - const content = hasImage - ? (messageParts as any) + // Properly type the content based on whether it contains images + const content: CoreMessage['content'] = hasImage + ? messageParts as CoreMessage['content'] : messageParts.map(part => part.text).join('\n') const type = skip @@ -278,7 +279,7 @@ async function submit(formData?: FormData, skip?: boolean) { messages.push({ role: 'user', content - }) + } as CoreMessage) } const userId = 'anonymous' diff --git a/lib/agents/researcher.tsx b/lib/agents/researcher.tsx index a5deb967..cbcc740c 100644 --- a/lib/agents/researcher.tsx +++ b/lib/agents/researcher.tsx @@ -96,8 +96,14 @@ export async function researcher( ? dynamicSystemPrompt : getDefaultSystemPrompt(currentDate) + // Check if any message contains an image + const hasImage = messages.some(message => + Array.isArray(message.content) && + message.content.some(part => part.type === 'image') + ) + const result = await nonexperimental_streamText({ - model: getModel() as LanguageModel, + model: getModel(hasImage) as LanguageModel, maxTokens: 4096, system: systemPromptToUse, messages, diff --git a/lib/agents/resolution-search.tsx b/lib/agents/resolution-search.tsx index 595fa5ad..b5682136 100644 --- a/lib/agents/resolution-search.tsx +++ b/lib/agents/resolution-search.tsx @@ -39,9 +39,15 @@ Analyze the user's prompt and the image to provide a holistic understanding of t const filteredMessages = messages.filter(msg => msg.role !== 'system'); + // Check if any message contains an image (resolution search is specifically for image analysis) + const hasImage = messages.some(message => + Array.isArray(message.content) && + message.content.some(part => part.type === 'image') + ) + // Use generateObject to get the full object at once. const { object } = await generateObject({ - model: getModel(), + model: getModel(hasImage), system: systemPrompt, messages: filteredMessages, schema: resolutionSearchSchema, diff --git a/lib/utils/index.ts b/lib/utils/index.ts index 9fcb0f6d..f82b05ea 100644 --- a/lib/utils/index.ts +++ b/lib/utils/index.ts @@ -16,15 +16,16 @@ export function generateUUID(): string { return uuidv4(); } -export function getModel() { +export function getModel(requireVision: boolean = false) { const xaiApiKey = process.env.XAI_API_KEY const gemini3ProApiKey = process.env.GEMINI_3_PRO_API_KEY const awsAccessKeyId = process.env.AWS_ACCESS_KEY_ID const awsSecretAccessKey = process.env.AWS_SECRET_ACCESS_KEY const awsRegion = process.env.AWS_REGION - const bedrockModelId = '' + const bedrockModelId = process.env.BEDROCK_MODEL_ID || 'anthropic.claude-3-5-sonnet-20241022-v2:0' - if (xaiApiKey) { + // If vision is required, skip models that don't support it + if (!requireVision && xaiApiKey) { const xai = createXai({ apiKey: xaiApiKey, baseURL: 'https://api.x.ai/v1', @@ -67,7 +68,7 @@ export function getModel() { return model } - // Default fallback (OpenAI) + // Default fallback (OpenAI gpt-4o supports vision) const openai = createOpenAI({ apiKey: process.env.OPENAI_API_KEY, })