From f23fb6b303030b43ab9b360cf4c88f0a843915d4 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Thu, 23 Oct 2025 07:07:32 +0000 Subject: [PATCH 1/2] feat: Enhance geospatial tool usage with two-stage prompting This commit introduces a two-stage prompting strategy to improve the reliability of geospatial tool usage. The `taskManager` agent now categorizes user queries into 'geospatial', 'web_search', or 'general'. This classification is then passed to the `researcher` agent, which uses it to tailor its system prompt. For geospatial queries, a more directive prompt is used to ensure the `geospatialQueryTool` is prioritized, addressing the issue of underutilization. --- app/actions.tsx | 5 +++-- lib/agents/researcher.tsx | 15 ++++++++++----- lib/agents/task-manager.tsx | 38 ++++++++++++++++++++----------------- lib/schema/next-action.tsx | 3 ++- mapbox_mcp/hooks.ts | 2 +- 5 files changed, 37 insertions(+), 26 deletions(-) diff --git a/app/actions.tsx b/app/actions.tsx index bce44e40..d6e13b11 100644 --- a/app/actions.tsx +++ b/app/actions.tsx @@ -285,7 +285,7 @@ async function submit(formData?: FormData, skip?: boolean) { const currentSystemPrompt = (await getSystemPrompt(userId)) || '' async function processEvents() { - let action: any = { object: { next: 'proceed' } } + let action: any = { object: { next: 'proceed', category: 'general' } } if (!skip) { const taskManagerResult = await taskManager(messages) if (taskManagerResult) { @@ -329,7 +329,8 @@ async function submit(formData?: FormData, skip?: boolean) { uiStream, streamText, messages, - useSpecificAPI + useSpecificAPI, + action.object.category ) answer = fullResponse toolOutputs = toolResponses diff --git a/lib/agents/researcher.tsx b/lib/agents/researcher.tsx index e54b1428..dec4e527 100644 --- a/lib/agents/researcher.tsx +++ b/lib/agents/researcher.tsx @@ -12,12 +12,12 @@ import { getTools } from './tools' import { getModel } from '../utils' export async function researcher( - dynamicSystemPrompt: string, // New parameter + dynamicSystemPrompt: string, uiStream: ReturnType, streamText: ReturnType>, messages: CoreMessage[], - // mcp: any, // Removed mcp parameter - useSpecificModel?: boolean + useSpecificModel?: boolean, + category?: 'geospatial' | 'web_search' | 'general' ) { let fullResponse = '' let hasError = false @@ -65,9 +65,14 @@ Analysis & Planning When you use 'geospatialQueryTool', you don't need to describe how the map will change; simply provide your textual answer based on the query, and trust the map will update appropriately. `; + const geospatial_prompt = `The user's query has been identified as geospatial. +You MUST use the 'geospatialQueryTool' to answer this question. +Do not use any other tools. If the query cannot be answered with the geospatial tool, respond that you are unable to answer.`; - const systemToUse = dynamicSystemPrompt && dynamicSystemPrompt.trim() !== '' ? dynamicSystemPrompt : default_system_prompt; - + let systemToUse = dynamicSystemPrompt && dynamicSystemPrompt.trim() !== '' ? dynamicSystemPrompt : default_system_prompt; + if (category === 'geospatial') { + systemToUse = `${systemToUse}\n\n${geospatial_prompt}`; + } const result = await nonexperimental_streamText({ model: getModel() as LanguageModel, maxTokens: 2500, diff --git a/lib/agents/task-manager.tsx b/lib/agents/task-manager.tsx index 0c21591d..dcbf0ecd 100644 --- a/lib/agents/task-manager.tsx +++ b/lib/agents/task-manager.tsx @@ -17,23 +17,27 @@ export async function taskManager(messages: CoreMessage[]) { const result = await generateObject({ model: getModel() as LanguageModel, - system: `As a planet computer, your primary objective is to fully comprehend the user's query, conduct thorough web searches and use Geospatial tools to gather preview the necessary information, and provide an appropriate response. - To achieve this, you must first analyze the user's input and determine the optimal course of action. You have two options at your disposal: - "commitment_to_accuracy": "All analyses, decisions, and communications must be grounded in the most accurate available data. Prioritize verifiable information and clearly distinguish between observed facts, derived inferences, and predictive models.", - "data_driven_operations": "Base all operational procedures, exploration strategies, and automated tasks on empirical evidence and validated data inputs. Assumptions made due to incomplete data must be explicitly stated.", - "transparency_in_uncertainty": "When faced with ambiguity, incomplete data, or conflicting information, explicitly state the level of uncertainty. Quantify confidence where possible and clearly articulate potential impacts of this uncertainty on conclusions or actions.", - "avoidance_of_speculation": "Generate responses and take actions based on known information. Do not invent, fabricate, or present unsubstantiated claims as facts. If information is unavailable, state so clearly.", - "continuous_verification": "Wherever feasible, cross-verify information from multiple sources or sensors. Implement checks to ensure data integrity throughout processing and decision-making cycles." - 1. "proceed": If the provided information is sufficient to address the query effectively, choose this option to proceed with the research and formulate a response. - 2. "inquire": If you believe that additional information from the user would enhance your ability to provide a comprehensive response, select this option. You may present a form to the user, offering default selections or free-form input fields, to gather the required details.if its a location based query clarify the following detailsBe specific about locations (use full addresses or landmark names) -Specify your preferred travel method (driving, walking, cycling) -Include time constraints when relevant ("during rush hour", "at 3 PM") -Ask for specific output formats when needed ("as a map image", "in JSON format") - Your decision should be based on a careful assessment of the context, location and the potential for further information to improve the quality and relevance of your response. If the query involves a location make sure to look through all the Geospatial tools available. - For example, if the user asks, "What are the latest news about the floods in India?", you may choose to "proceed" as the query is clear and can be answered effectively with web research alone. - However, if the user asks, "What's the warmest temperature in my area?", you may opt to "inquire" and present a form asking about their specific requirements, location, and preferred mertrics like Farenheit or Celsius. - Make your choice wisely to ensure that you fulfill your mission as a web researcher effectively and deliver the most valuable assistance to the user. - `, + system: `As a planet computer, your primary objective is to fully comprehend the user's query, conduct thorough web searches, and use Geospatial tools to gather the necessary information and provide an appropriate response. + +To achieve this, you must first analyze the user's input to determine the optimal course of action. + +First, classify the user's query into one of three categories: +- "geospatial": Select this for any query related to locations, maps, directions, addresses, points of interest, or geographical features. This is for when the user is asking for information about a place, or wants to see something on a map. +- "web_search": Choose this if the query requires current, factual information from the internet (e.g., news, recent events, specific data). +- "general": Use this for conversational questions, creative tasks, or anything that doesn't fit the other two categories. + +After categorizing, decide on the next action: +1. "proceed": If the provided information is sufficient to address the query effectively, choose this option to proceed with the research and formulate a response. +2. "inquire": If you believe that additional information from the user would enhance your ability to provide a comprehensive response, select this option. You may present a form to the user, offering default selections or free-form input fields, to gather the required details. + +Your decision should be based on a careful assessment of the context, location, and the potential for further information to improve the quality and relevance of your response. + +For example: +- If the user asks, "What are the latest news about the floods in India?", you should categorize it as "web_search" and choose to "proceed" as the query is clear and can be answered effectively with web research alone. +- If the user asks, "What's the warmest temperature in my area?", you should categorize it as "geospatial" and opt to "inquire" to ask for their specific location. +- If the user asks, "Directions from New York to Boston", you should categorize it as "geospatial" and choose to "proceed". + +Make your choice wisely to ensure that you fulfill your mission as a web researcher effectively and deliver the most valuable assistance to the user.`, messages, schema: nextActionSchema }) diff --git a/lib/schema/next-action.tsx b/lib/schema/next-action.tsx index efbaae2e..6dff7b9a 100644 --- a/lib/schema/next-action.tsx +++ b/lib/schema/next-action.tsx @@ -2,7 +2,8 @@ import { DeepPartial } from 'ai' import { z } from 'zod' export const nextActionSchema = z.object({ - next: z.enum(['inquire', 'proceed']) // "generate_ui" + next: z.enum(['inquire', 'proceed']), // "generate_ui" + category: z.enum(['geospatial', 'web_search', 'general']).optional() }) export type NextAction = DeepPartial diff --git a/mapbox_mcp/hooks.ts b/mapbox_mcp/hooks.ts index 326056db..06342b3f 100644 --- a/mapbox_mcp/hooks.ts +++ b/mapbox_mcp/hooks.ts @@ -8,7 +8,7 @@ type Tool = { name: string; // Add other properties as needed based on your usage }; -import { getModel } from 'QCX/lib/utils'; +import { getModel } from '@/lib/utils'; // Types for location and mapping data interface LocationResult { From 13dcf0bc70f1403e935ce2ecb7cf7d1cfd4102e5 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 24 Oct 2025 12:22:06 +0000 Subject: [PATCH 2/2] feat: Add image search and refactor agent logic This commit introduces a new image search functionality and refactors the agent logic to improve tool selection. - A new `imageSearchTool` is added, using the Serper API to fetch image results from the web. - A corresponding `ImageSearchSection` component is created to display the image results in a grid format in the chat UI. - The `taskManager` agent is updated to classify user queries into categories ('geospatial', 'web_search', 'general'). - The `researcher` agent now uses this classification to tailor its system prompt, encouraging the use of the new image search tool for visual queries and improving the reliability of geospatial tool usage. --- app/actions.tsx | 7 +++ components/image-search-section.tsx | 29 ++++++++++++ lib/agents/researcher.tsx | 1 + lib/agents/tools/image-search.tsx | 71 +++++++++++++++++++++++++++++ lib/agents/tools/index.tsx | 5 ++ 5 files changed, 113 insertions(+) create mode 100644 components/image-search-section.tsx create mode 100644 lib/agents/tools/image-search.tsx diff --git a/app/actions.tsx b/app/actions.tsx index d6e13b11..74e49de3 100644 --- a/app/actions.tsx +++ b/app/actions.tsx @@ -26,6 +26,7 @@ import { GeoJsonLayer } from '@/components/map/geojson-layer' import { CopilotDisplay } from '@/components/copilot-display' import RetrieveSection from '@/components/retrieve-section' import { VideoSearchSection } from '@/components/video-search-section' +import { ImageSearchSection } from '@/components/image-search-section' import { MapQueryHandler } from '@/components/map/map-query-handler' // Add this import // Define the type for related queries @@ -683,6 +684,12 @@ export const getUIStateFromAIState = (aiState: AIState): UIState => { ), isCollapsed: isCollapsed.value } + case 'imageSearch': + return { + id, + component: , + isCollapsed: isCollapsed.value + } default: console.warn( `Unhandled tool result in getUIStateFromAIState: ${name}` diff --git a/components/image-search-section.tsx b/components/image-search-section.tsx new file mode 100644 index 00000000..e76985d8 --- /dev/null +++ b/components/image-search-section.tsx @@ -0,0 +1,29 @@ + +import { StreamableValue } from 'ai/rsc' +import { Card } from '@/components/ui/card' + +interface Image { + imageUrl: string + link: string + title: string +} + +export const ImageSearchSection = ({ result }: { result: StreamableValue }) => { + const data = JSON.parse(result.value) as { images: Image[] } + + return ( + +
+ {data.images?.map((image, index) => ( + + {image.title} + + ))} +
+
+ ) +} diff --git a/lib/agents/researcher.tsx b/lib/agents/researcher.tsx index dec4e527..54a67ebf 100644 --- a/lib/agents/researcher.tsx +++ b/lib/agents/researcher.tsx @@ -43,6 +43,7 @@ Tool Usage Guide: - For general web searches for factual information: Use the 'search' tool. - For retrieving content from specific URLs provided by the user: Use the 'retrieve' tool. (Do not use this for URLs found in search results). +- For image searches: Use the 'imageSearch' tool. This is especially useful for visual queries. - For any questions involving locations, places, addresses, geographical features, finding businesses or points of interest, distances between locations, or directions: You MUST use the 'geospatialQueryTool'. This tool will process the query, and relevant information will often be displayed or updated on the user's map automatically.** Examples of queries for 'geospatialQueryTool': diff --git a/lib/agents/tools/image-search.tsx b/lib/agents/tools/image-search.tsx new file mode 100644 index 00000000..29030ef7 --- /dev/null +++ b/lib/agents/tools/image-search.tsx @@ -0,0 +1,71 @@ + +import { createStreamableValue } from 'ai/rsc' +import { searchSchema } from '@/lib/schema/search' +import { Card } from '@/components/ui/card' +import { ImageSearchSection } from '@/components/image-search-section' +import { ToolProps } from '.' + +export const imageSearchTool = ({ uiStream, fullResponse }: ToolProps) => ({ + description: 'Search the web for images', + parameters: searchSchema, + execute: async ({ + query, + max_results, + }: { + query: string + max_results: number + }) => { + let hasError = false + const streamResults = createStreamableValue() + uiStream.append() + + const filledQuery = + query.length < 5 ? query + ' '.repeat(5 - query.length) : query + let searchResult + try { + searchResult = await serperImageSearch(filledQuery, max_results) + } catch (error) { + console.error('Image search API error:', error) + hasError = true + } + + if (hasError) { + fullResponse += `\nAn error occurred while searching for images of "${query}".` + uiStream.update( + + {`An error occurred while searching for images of "${query}".`} + + ) + return searchResult + } + + streamResults.done(JSON.stringify(searchResult)) + + return searchResult + } +}) + +async function serperImageSearch( + query: string, + maxResults: number = 10, +): Promise { + const apiKey = process.env.SERPER_API_KEY + const response = await fetch('https://google.serper.dev/images', { + method: 'POST', + headers: { + 'X-API-KEY': apiKey!, + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + q: query, + num: maxResults, + }) + }) + + if (!response.ok) { + throw new Error(`Error: ${response.status}`) + } + + const data = await response.json() + return data +} diff --git a/lib/agents/tools/index.tsx b/lib/agents/tools/index.tsx index 4c08f373..db90b202 100644 --- a/lib/agents/tools/index.tsx +++ b/lib/agents/tools/index.tsx @@ -2,6 +2,7 @@ import { createStreamableUI } from 'ai/rsc' import { retrieveTool } from './retrieve' import { searchTool } from './search' import { videoSearchTool } from './video-search' +import { imageSearchTool } from './image-search' import { geospatialTool } from './geospatial' // Removed useGeospatialToolMcp import export interface ToolProps { @@ -33,6 +34,10 @@ export const getTools = ({ uiStream, fullResponse }: ToolProps) => { uiStream, fullResponse }) + tools.imageSearch = imageSearchTool({ + uiStream, + fullResponse + }) } return tools