CraftOS-dev · ahmad-ajmal · Apr 17, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/agent_core/core/embedding_interface.py b/agent_core/core/embedding_interface.py
@@ -12,6 +12,8 @@
 - GOOGLE_API_KEY (for provider="gemini")
 """
 
+from __future__ import annotations
+
 import os
 from typing import List, Optional
 

diff --git a/agent_core/core/impl/action/router.py b/agent_core/core/impl/action/router.py
@@ -6,6 +6,8 @@
 based on user queries using LLM reasoning.
 """
 
+from __future__ import annotations
+
 import json
 import ast
 from typing import Optional, List, Dict, Any, Tuple

diff --git a/agent_core/core/impl/llm/interface.py b/agent_core/core/impl/llm/interface.py
@@ -1184,9 +1184,10 @@ def _generate_openai(
             # Always enforce JSON output format
             request_kwargs["response_format"] = {"type": "json_object"}
 
-            # Add prompt_cache_key when call_type is provided for better cache routing
-            # This helps when alternating between different call types (reasoning, action_selection)
-            if call_type and system_prompt and len(system_prompt) >= config.min_cache_tokens:
+            # Add prompt_cache_key for OpenAI/DeepSeek cache routing.
+            # Grok (xAI) does not support prompt_cache_key — it uses automatic
+            # prefix caching and ignores this parameter, so skip it for Grok.
+            if self.provider != "grok" and call_type and system_prompt and len(system_prompt) >= config.min_cache_tokens:
                 prompt_hash = hashlib.sha256(system_prompt.encode()).hexdigest()[:16]
                 cache_key = f"{call_type}_{prompt_hash}"
                 request_kwargs["extra_body"] = {"prompt_cache_key": cache_key}
@@ -1197,21 +1198,26 @@ def _generate_openai(
             token_count_input = response.usage.prompt_tokens
             token_count_output = response.usage.completion_tokens
 
-            # Extract cached tokens from prompt_tokens_details (OpenAI automatic caching)
-            # Available for prompts ≥1024 tokens
-            prompt_tokens_details = getattr(response.usage, "prompt_tokens_details", None)
-            if prompt_tokens_details:
-                cached_tokens = getattr(prompt_tokens_details, "cached_tokens", 0) or 0
+            # Extract cached tokens — field name differs by provider:
+            # - OpenAI:  response.usage.prompt_tokens_details.cached_tokens
+            # - Grok (xAI): response.usage.prompt_cache_hit_tokens
+            if self.provider == "grok":
+                cached_tokens = getattr(response.usage, "prompt_cache_hit_tokens", 0) or 0
+            else:
+                prompt_tokens_details = getattr(response.usage, "prompt_tokens_details", None)
+                if prompt_tokens_details:
+                    cached_tokens = getattr(prompt_tokens_details, "cached_tokens", 0) or 0
 
             # Record cache metrics
+            provider_label = self.provider  # "openai", "grok", "deepseek", etc.
             metrics = get_cache_metrics()
             if cached_tokens > 0:
-                logger.info(f"[CACHE] OpenAI {cache_type} cache hit: {cached_tokens}/{token_count_input} tokens from cache")
-                metrics.record_hit("openai", cache_type, cached_tokens=cached_tokens, total_tokens=token_count_input)
+                logger.info(f"[CACHE] {provider_label} {cache_type} cache hit: {cached_tokens}/{token_count_input} tokens from cache")
+                metrics.record_hit(provider_label, cache_type, cached_tokens=cached_tokens, total_tokens=token_count_input)
             elif system_prompt and len(system_prompt) >= config.min_cache_tokens:
                 # Caching should have been attempted (prompt long enough)
                 # This is a miss - either first call or cache expired
-                metrics.record_miss("openai", cache_type, total_tokens=token_count_input)
+                metrics.record_miss(provider_label, cache_type, total_tokens=token_count_input)
 
             status = "success"
         except Exception as exc:
@@ -1262,22 +1268,24 @@ def _generate_ollama(self, system_prompt: str | None, user_prompt: str) -> Dict[
         try:
             payload = {
                 "model": self.model,
-                "system": system_prompt,
                 "prompt": user_prompt,
                 "stream": False,
+                "format": "json",
                 "options": {
                     "temperature": self.temperature,
                 }
             }
+            if system_prompt:
+                payload["system"] = system_prompt
             url: str = f"{self.remote_url.rstrip('/')}/api/generate"
             response = requests.post(url, json=payload, timeout=600)
             response.raise_for_status()
             result = response.json()
 
             content = result.get("response", "").strip()
-            total_tokens = result.get("usage", {}).get("total_tokens", 0)
             token_count_input = result.get("prompt_eval_count", 0)
             token_count_output = result.get("eval_count", 0)
+            total_tokens = token_count_input + token_count_output
             status = "success"
         except Exception as exc:
             exc_obj = exc

diff --git a/agent_core/core/impl/vlm/interface.py b/agent_core/core/impl/vlm/interface.py
@@ -233,7 +233,9 @@ def describe_image_bytes(
             if log_response:
                 logger.info(f"[LLM SEND] system={system_prompt} | user={user_prompt}")
 
-            if self.provider in ("openai", "minimax", "deepseek", "moonshot", "grok"):
+            if self.provider == "deepseek":
+                raise RuntimeError("DeepSeek does not support vision/VLM. Use a different provider for image description.")
+            elif self.provider in ("openai", "minimax", "moonshot", "grok"):
                 response = self._openai_describe_bytes(image_bytes, system_prompt, user_prompt)
             elif self.provider == "remote":
                 response = self._ollama_describe_bytes(image_bytes, system_prompt, user_prompt)
@@ -288,6 +290,17 @@ async def generate_response_async(
 
     # ───────────────────── Provider Helpers ─────────────────────
 
+    @staticmethod
+    def _detect_mime_type(image_bytes: bytes) -> str:
+        """Detect image MIME type from the first few bytes of image data."""
+        if image_bytes[:8] == b'\x89PNG\r\n\x1a\n':
+            return "image/png"
+        if image_bytes[:4] == b'GIF8':
+            return "image/gif"
+        if image_bytes[:4] == b'RIFF' and image_bytes[8:12] == b'WEBP':
+            return "image/webp"
+        return "image/jpeg"
+
     def _report_usage_async(
         self,
         service_type: str,
@@ -318,8 +331,9 @@ def _report_usage_async(
             logger.warning(f"[VLM] Failed to report usage: {e}")
 
     def _openai_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str) -> Dict[str, Any]:
-        """OpenAI vision request with automatic prompt caching metrics."""
+        """OpenAI/Grok vision request with automatic prompt caching metrics."""
         img_b64 = base64.b64encode(image_bytes).decode()
+        mime_type = self._detect_mime_type(image_bytes)
         messages: list[Dict[str, Any]] = []
         if sys:
             messages.append({"role": "system", "content": sys})
@@ -328,7 +342,7 @@ def _openai_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str)
                 "role": "user",
                 "content": [
                     {"type": "text", "text": usr},
-                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}},
+                    {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{img_b64}"}},
                 ],
             }
         )
@@ -375,9 +389,9 @@ def _openai_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str)
         elif sys and len(sys) >= config.min_cache_tokens:
             metrics.record_miss("openai", "automatic_vlm", total_tokens=token_count_input)
 
-        # Report usage via hook
+        # Report usage via hook (use actual provider name, e.g. "grok", "minimax")
         self._report_usage_async(
-            "vlm_openai", "openai", self.model,
+            f"vlm_{self.provider}", self.provider, self.model,
             token_count_input, token_count_output, cached_tokens
         )
 
@@ -393,16 +407,20 @@ def _ollama_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str)
         payload = {
             "model": self.model,
             "prompt": usr,
-            "system": sys,
             "images": [img_b64],
             "stream": False,
-            "temperature": self.temperature,
+            "options": {"temperature": self.temperature},
         }
+        if sys:
+            payload["system"] = sys
         url: str = f"{self.remote_url.rstrip('/')}/api/generate"
         r = requests.post(url, json=payload, timeout=600)
         r.raise_for_status()
-        content = r.json().get("response", "").strip()
-        total_tokens = r.json().get("usage", {}).get("total_tokens", 0)
+        result = r.json()
+        content = result.get("response", "").strip()
+        token_count_input = result.get("prompt_eval_count", 0)
+        token_count_output = result.get("eval_count", 0)
+        total_tokens = token_count_input + token_count_output
 
         return {
             "tokens_used": total_tokens or 0,
@@ -420,7 +438,7 @@ def _gemini_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str)
             image_bytes=image_bytes,
             system_prompt=sys,
             temperature=self.temperature,
-            json_mode=True,
+            json_mode=False,
         )
 
         # Record cache metrics
@@ -447,6 +465,7 @@ def _gemini_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str)
     def _byteplus_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str) -> Dict[str, Any]:
         """BytePlus vision request."""
         img_b64 = base64.b64encode(image_bytes).decode()
+        mime_type = self._detect_mime_type(image_bytes)
         messages: list[Dict[str, Any]] = []
         if sys:
             messages.append({"role": "system", "content": sys})
@@ -456,7 +475,7 @@ def _byteplus_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str
                 "role": "user",
                 "content": [
                     {"type": "text", "text": usr},
-                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}},
+                    {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{img_b64}"}},
                 ],
             }
         )
@@ -501,14 +520,7 @@ def _anthropic_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: st
         img_b64 = base64.b64encode(image_bytes).decode()
         config = get_cache_config()
 
-        # Detect media type from image bytes
-        media_type = "image/jpeg"
-        if image_bytes[:8] == b'\x89PNG\r\n\x1a\n':
-            media_type = "image/png"
-        elif image_bytes[:4] == b'GIF8':
-            media_type = "image/gif"
-        elif image_bytes[:4] == b'RIFF' and image_bytes[8:12] == b'WEBP':
-            media_type = "image/webp"
+        media_type = self._detect_mime_type(image_bytes)
 
         message_content = [
             {

diff --git a/agent_core/core/registry/action.py b/agent_core/core/registry/action.py
@@ -19,6 +19,8 @@
     result = await executor.execute_action(action, input_data)
 """
 
+from __future__ import annotations
+
 from typing import TYPE_CHECKING
 
 from agent_core.core.registry.base import ComponentRegistry

diff --git a/agent_core/core/registry/context.py b/agent_core/core/registry/context.py
@@ -16,6 +16,8 @@
     system_prompt, user_prompt = engine.make_prompt(query="...")
 """
 
+from __future__ import annotations
+
 from typing import TYPE_CHECKING
 
 from agent_core.core.registry.base import ComponentRegistry

diff --git a/agent_core/core/registry/database.py b/agent_core/core/registry/database.py
@@ -18,6 +18,8 @@
     db.list_actions()
 """
 
+from __future__ import annotations
+
 from typing import TYPE_CHECKING
 
 from agent_core.core.registry.base import ComponentRegistry

diff --git a/agent_core/core/registry/event_stream.py b/agent_core/core/registry/event_stream.py
@@ -16,6 +16,8 @@
     manager.log("INFO", "Something happened")
 """
 
+from __future__ import annotations
+
 from typing import TYPE_CHECKING
 
 from agent_core.core.registry.base import ComponentRegistry

diff --git a/agent_core/core/registry/llm.py b/agent_core/core/registry/llm.py
@@ -18,6 +18,8 @@
     response = await llm.generate_response_async(prompt)
 """
 
+from __future__ import annotations
+
 from typing import TYPE_CHECKING
 
 from agent_core.core.registry.base import ComponentRegistry

diff --git a/agent_core/core/registry/memory.py b/agent_core/core/registry/memory.py
@@ -21,6 +21,8 @@
     pointers = memory.retrieve("user preferences")
 """
 
+from __future__ import annotations
+
 from typing import TYPE_CHECKING
 
 from agent_core.core.registry.base import ComponentRegistry

diff --git a/agent_core/core/registry/state.py b/agent_core/core/registry/state.py
@@ -19,6 +19,8 @@
     await manager.start_session()
 """
 
+from __future__ import annotations
+
 from typing import TYPE_CHECKING
 
 from agent_core.core.registry.base import ComponentRegistry

diff --git a/agent_core/core/registry/task_manager.py b/agent_core/core/registry/task_manager.py
@@ -16,6 +16,8 @@
     task_id = manager.create_task("My Task", "Do something")
 """
 
+from __future__ import annotations
+
 from typing import TYPE_CHECKING
 
 from agent_core.core.registry.base import ComponentRegistry

diff --git a/agent_core/decorators/log_events.py b/agent_core/decorators/log_events.py
@@ -8,6 +8,8 @@
   {id}, {name}, {args}, {kwargs}, {result}, {exception}, {duration_ms}
 """
 
+from __future__ import annotations
+
 import logging
 import time
 import uuid

diff --git a/agent_core/decorators/profiler.py b/agent_core/decorators/profiler.py
@@ -28,6 +28,8 @@
     Set "auto_save_interval" to N to save after every N loops (0 = only at exit).
 """
 
+from __future__ import annotations
+
 import atexit
 import asyncio
 import functools

diff --git a/app/config/skills_config.json b/app/config/skills_config.json
@@ -9,6 +9,7 @@
     "xlsx"
   ],
   "disabled_skills": [
+    "cli-anything",
     "agentmail",
     "ai-news-collector",
     "ai-ppt-generator",

diff --git a/app/gui/gui_module.py b/app/gui/gui_module.py
@@ -1,3 +1,5 @@
+
+from __future__ import annotations
 import json
 import ast
 import tempfile

diff --git a/app/internal_action_interface.py b/app/internal_action_interface.py
@@ -5,6 +5,8 @@
 framework internal functions.
 """
 
+from __future__ import annotations
+
 from typing import Dict, Any, Optional, List, TYPE_CHECKING
 from app.llm import LLMInterface, LLMCallType
 from app.vlm_interface import VLMInterface

diff --git a/app/onboarding/interfaces/steps.py b/app/onboarding/interfaces/steps.py
@@ -446,14 +446,17 @@ def get_options(self) -> List[StepOption]:
         return []
 
     def validate(self, value: Any) -> tuple[bool, Optional[str]]:
-        """Validate the form data dict. All fields are optional."""
-        if not isinstance(value, dict):
-            return False, "Expected a dictionary of form values"
-        # Validate approval is a list if present
-        approval = value.get("approval")
-        if approval is not None and not isinstance(approval, list):
-            return False, "Approval settings must be a list"
-        return True, None
+      """Validate the form data dict. All fields are optional."""
+      if not isinstance(value, dict):
+          return False, "Expected a dictionary of form values"
+      user_name = value.get("user_name")
+      if user_name and len(str(user_name)) > 20:
+          return False, "Name must be 20 characters or fewer"
+      # Validate approval is a list if present
+      approval = value.get("approval")
+      if approval is not None and not isinstance(approval, list):
+          return False, "Approval settings must be a list"
+      return True, None
 
     def get_default(self) -> Dict[str, Any]:
         """Return defaults for all fields."""

diff --git a/app/security/prompt_sanitizer.py b/app/security/prompt_sanitizer.py
@@ -9,6 +9,8 @@
 - Format manipulation attacks
 """
 
+from __future__ import annotations
+
 import re
 from typing import Any
 

diff --git a/app/ui_layer/browser/frontend/src/pages/Onboarding/OnboardingPage.tsx b/app/ui_layer/browser/frontend/src/pages/Onboarding/OnboardingPage.tsx
@@ -829,6 +829,7 @@ export function OnboardingPage() {
           value={textValue}
           onChange={e => setTextValue(e.target.value)}
           placeholder={isApiKey ? 'Enter your API key' : 'Enter a name'}
+          maxLength={isApiKey ? undefined : 20}
           autoFocus
           onKeyDown={e => { if (e.key === 'Enter' && canSubmit) handleSubmit() }}
         />

diff --git a/app/ui_layer/local_llm_setup.py b/app/ui_layer/local_llm_setup.py
@@ -1,6 +1,8 @@
 # -*- coding: utf-8 -*-
 """Local LLM setup utilities for Ollama."""
 
+from __future__ import annotations
+
 import asyncio
 import json
 import logging