Skip to content
2 changes: 2 additions & 0 deletions agent_core/core/embedding_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
- GOOGLE_API_KEY (for provider="gemini")
"""

from __future__ import annotations

import os
from typing import List, Optional

Expand Down
2 changes: 2 additions & 0 deletions agent_core/core/impl/action/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
based on user queries using LLM reasoning.
"""

from __future__ import annotations

import json
import ast
from typing import Optional, List, Dict, Any, Tuple
Expand Down
34 changes: 21 additions & 13 deletions agent_core/core/impl/llm/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -1184,9 +1184,10 @@ def _generate_openai(
# Always enforce JSON output format
request_kwargs["response_format"] = {"type": "json_object"}

# Add prompt_cache_key when call_type is provided for better cache routing
# This helps when alternating between different call types (reasoning, action_selection)
if call_type and system_prompt and len(system_prompt) >= config.min_cache_tokens:
# Add prompt_cache_key for OpenAI/DeepSeek cache routing.
# Grok (xAI) does not support prompt_cache_key — it uses automatic
# prefix caching and ignores this parameter, so skip it for Grok.
if self.provider != "grok" and call_type and system_prompt and len(system_prompt) >= config.min_cache_tokens:
prompt_hash = hashlib.sha256(system_prompt.encode()).hexdigest()[:16]
cache_key = f"{call_type}_{prompt_hash}"
request_kwargs["extra_body"] = {"prompt_cache_key": cache_key}
Expand All @@ -1197,21 +1198,26 @@ def _generate_openai(
token_count_input = response.usage.prompt_tokens
token_count_output = response.usage.completion_tokens

# Extract cached tokens from prompt_tokens_details (OpenAI automatic caching)
# Available for prompts ≥1024 tokens
prompt_tokens_details = getattr(response.usage, "prompt_tokens_details", None)
if prompt_tokens_details:
cached_tokens = getattr(prompt_tokens_details, "cached_tokens", 0) or 0
# Extract cached tokens — field name differs by provider:
# - OpenAI: response.usage.prompt_tokens_details.cached_tokens
# - Grok (xAI): response.usage.prompt_cache_hit_tokens
if self.provider == "grok":
cached_tokens = getattr(response.usage, "prompt_cache_hit_tokens", 0) or 0
else:
prompt_tokens_details = getattr(response.usage, "prompt_tokens_details", None)
if prompt_tokens_details:
cached_tokens = getattr(prompt_tokens_details, "cached_tokens", 0) or 0

# Record cache metrics
provider_label = self.provider # "openai", "grok", "deepseek", etc.
metrics = get_cache_metrics()
if cached_tokens > 0:
logger.info(f"[CACHE] OpenAI {cache_type} cache hit: {cached_tokens}/{token_count_input} tokens from cache")
metrics.record_hit("openai", cache_type, cached_tokens=cached_tokens, total_tokens=token_count_input)
logger.info(f"[CACHE] {provider_label} {cache_type} cache hit: {cached_tokens}/{token_count_input} tokens from cache")
metrics.record_hit(provider_label, cache_type, cached_tokens=cached_tokens, total_tokens=token_count_input)
elif system_prompt and len(system_prompt) >= config.min_cache_tokens:
# Caching should have been attempted (prompt long enough)
# This is a miss - either first call or cache expired
metrics.record_miss("openai", cache_type, total_tokens=token_count_input)
metrics.record_miss(provider_label, cache_type, total_tokens=token_count_input)

status = "success"
except Exception as exc:
Expand Down Expand Up @@ -1262,22 +1268,24 @@ def _generate_ollama(self, system_prompt: str | None, user_prompt: str) -> Dict[
try:
payload = {
"model": self.model,
"system": system_prompt,
"prompt": user_prompt,
"stream": False,
"format": "json",
"options": {
"temperature": self.temperature,
}
}
if system_prompt:
payload["system"] = system_prompt
url: str = f"{self.remote_url.rstrip('/')}/api/generate"
response = requests.post(url, json=payload, timeout=600)
response.raise_for_status()
result = response.json()

content = result.get("response", "").strip()
total_tokens = result.get("usage", {}).get("total_tokens", 0)
token_count_input = result.get("prompt_eval_count", 0)
token_count_output = result.get("eval_count", 0)
total_tokens = token_count_input + token_count_output
status = "success"
except Exception as exc:
exc_obj = exc
Expand Down
50 changes: 31 additions & 19 deletions agent_core/core/impl/vlm/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,9 @@ def describe_image_bytes(
if log_response:
logger.info(f"[LLM SEND] system={system_prompt} | user={user_prompt}")

if self.provider in ("openai", "minimax", "deepseek", "moonshot", "grok"):
if self.provider == "deepseek":
raise RuntimeError("DeepSeek does not support vision/VLM. Use a different provider for image description.")
elif self.provider in ("openai", "minimax", "moonshot", "grok"):
response = self._openai_describe_bytes(image_bytes, system_prompt, user_prompt)
elif self.provider == "remote":
response = self._ollama_describe_bytes(image_bytes, system_prompt, user_prompt)
Expand Down Expand Up @@ -288,6 +290,17 @@ async def generate_response_async(

# ───────────────────── Provider Helpers ─────────────────────

@staticmethod
def _detect_mime_type(image_bytes: bytes) -> str:
"""Detect image MIME type from the first few bytes of image data."""
if image_bytes[:8] == b'\x89PNG\r\n\x1a\n':
return "image/png"
if image_bytes[:4] == b'GIF8':
return "image/gif"
if image_bytes[:4] == b'RIFF' and image_bytes[8:12] == b'WEBP':
return "image/webp"
return "image/jpeg"

def _report_usage_async(
self,
service_type: str,
Expand Down Expand Up @@ -318,8 +331,9 @@ def _report_usage_async(
logger.warning(f"[VLM] Failed to report usage: {e}")

def _openai_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str) -> Dict[str, Any]:
"""OpenAI vision request with automatic prompt caching metrics."""
"""OpenAI/Grok vision request with automatic prompt caching metrics."""
img_b64 = base64.b64encode(image_bytes).decode()
mime_type = self._detect_mime_type(image_bytes)
messages: list[Dict[str, Any]] = []
if sys:
messages.append({"role": "system", "content": sys})
Expand All @@ -328,7 +342,7 @@ def _openai_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str)
"role": "user",
"content": [
{"type": "text", "text": usr},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}},
{"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{img_b64}"}},
],
}
)
Expand Down Expand Up @@ -375,9 +389,9 @@ def _openai_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str)
elif sys and len(sys) >= config.min_cache_tokens:
metrics.record_miss("openai", "automatic_vlm", total_tokens=token_count_input)

# Report usage via hook
# Report usage via hook (use actual provider name, e.g. "grok", "minimax")
self._report_usage_async(
"vlm_openai", "openai", self.model,
f"vlm_{self.provider}", self.provider, self.model,
token_count_input, token_count_output, cached_tokens
)

Expand All @@ -393,16 +407,20 @@ def _ollama_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str)
payload = {
"model": self.model,
"prompt": usr,
"system": sys,
"images": [img_b64],
"stream": False,
"temperature": self.temperature,
"options": {"temperature": self.temperature},
}
if sys:
payload["system"] = sys
url: str = f"{self.remote_url.rstrip('/')}/api/generate"
r = requests.post(url, json=payload, timeout=600)
r.raise_for_status()
content = r.json().get("response", "").strip()
total_tokens = r.json().get("usage", {}).get("total_tokens", 0)
result = r.json()
content = result.get("response", "").strip()
token_count_input = result.get("prompt_eval_count", 0)
token_count_output = result.get("eval_count", 0)
total_tokens = token_count_input + token_count_output

return {
"tokens_used": total_tokens or 0,
Expand All @@ -420,7 +438,7 @@ def _gemini_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str)
image_bytes=image_bytes,
system_prompt=sys,
temperature=self.temperature,
json_mode=True,
json_mode=False,
)

# Record cache metrics
Expand All @@ -447,6 +465,7 @@ def _gemini_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str)
def _byteplus_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str) -> Dict[str, Any]:
"""BytePlus vision request."""
img_b64 = base64.b64encode(image_bytes).decode()
mime_type = self._detect_mime_type(image_bytes)
messages: list[Dict[str, Any]] = []
if sys:
messages.append({"role": "system", "content": sys})
Expand All @@ -456,7 +475,7 @@ def _byteplus_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str
"role": "user",
"content": [
{"type": "text", "text": usr},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}},
{"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{img_b64}"}},
],
}
)
Expand Down Expand Up @@ -501,14 +520,7 @@ def _anthropic_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: st
img_b64 = base64.b64encode(image_bytes).decode()
config = get_cache_config()

# Detect media type from image bytes
media_type = "image/jpeg"
if image_bytes[:8] == b'\x89PNG\r\n\x1a\n':
media_type = "image/png"
elif image_bytes[:4] == b'GIF8':
media_type = "image/gif"
elif image_bytes[:4] == b'RIFF' and image_bytes[8:12] == b'WEBP':
media_type = "image/webp"
media_type = self._detect_mime_type(image_bytes)

message_content = [
{
Expand Down
2 changes: 2 additions & 0 deletions agent_core/core/registry/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
result = await executor.execute_action(action, input_data)
"""

from __future__ import annotations

from typing import TYPE_CHECKING

from agent_core.core.registry.base import ComponentRegistry
Expand Down
2 changes: 2 additions & 0 deletions agent_core/core/registry/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
system_prompt, user_prompt = engine.make_prompt(query="...")
"""

from __future__ import annotations

from typing import TYPE_CHECKING

from agent_core.core.registry.base import ComponentRegistry
Expand Down
2 changes: 2 additions & 0 deletions agent_core/core/registry/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
db.list_actions()
"""

from __future__ import annotations

from typing import TYPE_CHECKING

from agent_core.core.registry.base import ComponentRegistry
Expand Down
2 changes: 2 additions & 0 deletions agent_core/core/registry/event_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
manager.log("INFO", "Something happened")
"""

from __future__ import annotations

from typing import TYPE_CHECKING

from agent_core.core.registry.base import ComponentRegistry
Expand Down
2 changes: 2 additions & 0 deletions agent_core/core/registry/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
response = await llm.generate_response_async(prompt)
"""

from __future__ import annotations

from typing import TYPE_CHECKING

from agent_core.core.registry.base import ComponentRegistry
Expand Down
2 changes: 2 additions & 0 deletions agent_core/core/registry/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
pointers = memory.retrieve("user preferences")
"""

from __future__ import annotations

from typing import TYPE_CHECKING

from agent_core.core.registry.base import ComponentRegistry
Expand Down
2 changes: 2 additions & 0 deletions agent_core/core/registry/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
await manager.start_session()
"""

from __future__ import annotations

from typing import TYPE_CHECKING

from agent_core.core.registry.base import ComponentRegistry
Expand Down
2 changes: 2 additions & 0 deletions agent_core/core/registry/task_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
task_id = manager.create_task("My Task", "Do something")
"""

from __future__ import annotations

from typing import TYPE_CHECKING

from agent_core.core.registry.base import ComponentRegistry
Expand Down
2 changes: 2 additions & 0 deletions agent_core/decorators/log_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
{id}, {name}, {args}, {kwargs}, {result}, {exception}, {duration_ms}
"""

from __future__ import annotations

import logging
import time
import uuid
Expand Down
2 changes: 2 additions & 0 deletions agent_core/decorators/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
Set "auto_save_interval" to N to save after every N loops (0 = only at exit).
"""

from __future__ import annotations

import atexit
import asyncio
import functools
Expand Down
1 change: 1 addition & 0 deletions app/config/skills_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"xlsx"
],
"disabled_skills": [
"cli-anything",
"agentmail",
"ai-news-collector",
"ai-ppt-generator",
Expand Down
2 changes: 2 additions & 0 deletions app/gui/gui_module.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@

from __future__ import annotations
import json
import ast
import tempfile
Expand Down
2 changes: 2 additions & 0 deletions app/internal_action_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
framework internal functions.
"""

from __future__ import annotations

from typing import Dict, Any, Optional, List, TYPE_CHECKING
from app.llm import LLMInterface, LLMCallType
from app.vlm_interface import VLMInterface
Expand Down
19 changes: 11 additions & 8 deletions app/onboarding/interfaces/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,14 +446,17 @@ def get_options(self) -> List[StepOption]:
return []

def validate(self, value: Any) -> tuple[bool, Optional[str]]:
"""Validate the form data dict. All fields are optional."""
if not isinstance(value, dict):
return False, "Expected a dictionary of form values"
# Validate approval is a list if present
approval = value.get("approval")
if approval is not None and not isinstance(approval, list):
return False, "Approval settings must be a list"
return True, None
"""Validate the form data dict. All fields are optional."""
if not isinstance(value, dict):
return False, "Expected a dictionary of form values"
user_name = value.get("user_name")
if user_name and len(str(user_name)) > 20:
return False, "Name must be 20 characters or fewer"
# Validate approval is a list if present
approval = value.get("approval")
if approval is not None and not isinstance(approval, list):
return False, "Approval settings must be a list"
return True, None

def get_default(self) -> Dict[str, Any]:
"""Return defaults for all fields."""
Expand Down
2 changes: 2 additions & 0 deletions app/security/prompt_sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
- Format manipulation attacks
"""

from __future__ import annotations

import re
from typing import Any

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -829,6 +829,7 @@ export function OnboardingPage() {
value={textValue}
onChange={e => setTextValue(e.target.value)}
placeholder={isApiKey ? 'Enter your API key' : 'Enter a name'}
maxLength={isApiKey ? undefined : 20}
autoFocus
onKeyDown={e => { if (e.key === 'Enter' && canSubmit) handleSubmit() }}
/>
Expand Down
2 changes: 2 additions & 0 deletions app/ui_layer/local_llm_setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# -*- coding: utf-8 -*-
"""Local LLM setup utilities for Ollama."""

from __future__ import annotations

import asyncio
import json
import logging
Expand Down
Loading