From 9c99622d1550cd2cedbd5d27058c0b13d5f85b69 Mon Sep 17 00:00:00 2001 From: Korivi Date: Mon, 13 Apr 2026 16:02:54 +0900 Subject: [PATCH 01/10] CLI Anything added --- skills/cli-anything/SKILL.md | 187 +++++++++++++++++++++++++++++++++ skills/cli-anything/_meta.json | 6 ++ 2 files changed, 193 insertions(+) create mode 100644 skills/cli-anything/SKILL.md create mode 100644 skills/cli-anything/_meta.json diff --git a/skills/cli-anything/SKILL.md b/skills/cli-anything/SKILL.md new file mode 100644 index 00000000..5194429b --- /dev/null +++ b/skills/cli-anything/SKILL.md @@ -0,0 +1,187 @@ +--- +name: cli-anything +description: "Generate agent-native CLI harnesses for any GUI application using the CLI-Anything methodology, or discover and install pre-built CLIs via CLI-Hub." +metadata: {"clawdbot":{"emoji":"⚡","os":["darwin","linux","windows"],"requires":{"bins":["python"]}}} +--- + +# CLI-Anything Skill + +CLI-Anything transforms any GUI application into an agent-native command-line interface. Use this skill when the user asks to: +- Generate a CLI harness for any software (GIMP, Blender, LibreOffice, etc.) +- Install or discover CLIs via CLI-Hub +- Refine or test an existing generated harness + +--- + +## Quick Install (CLI-Hub) + +For software that already has a pre-built harness: + +```bash +pip install cli-anything-hub +cli-hub install +``` + +Browse the full catalog: https://hkuds.github.io/CLI-Anything/ + +--- + +## Generate a New CLI Harness + +Follow the **7-Phase Methodology** below. Work sequentially — each phase depends on the prior. + +### Phase 1 — Codebase Analysis + +Before writing code, study the target application: + +``` +- Identify the backend engine (separate from the GUI presentation layer) +- Map each GUI action to its underlying API or Python call +- Understand the data model and native file formats (e.g., .blend, ODF, SVG) +- Locate any existing CLI entry points or scripting interfaces +- Catalog the undo/redo and session management system +``` + +### Phase 2 — CLI Architecture Design + +Choose one of: +- **Stateful REPL** — for interactive, session-based workflows +- **Subcommand CLI** — for scriptable, one-shot invocations +- **Both** — recommended; REPL wraps the subcommand interface + +Design command groups that mirror the app's logical domains (e.g., `image`, `layer`, `export` for GIMP). Plan dual output: human-readable text and machine-readable `--json`. + +### Phase 3 — Implementation + +Directory layout: +``` +cli_anything/ # Namespace package — NO __init__.py here +└── / # Sub-package — HAS __init__.py + ├── __main__.py + ├── README.md + ├── _cli.py + ├── core/ # Domain modules wrapping the real software + ├── utils/ # Shared utilities + repl_skin.py + └── tests/ + ├── TEST.md + ├── test_core.py + └── test_full_e2e.py +``` + +**Critical rule**: The CLI MUST call the actual software for rendering and export — never reimplement the software's functionality in Python. Generate valid native project files and hand them to the real application backend. + +Required patterns for every command: +- `--json` flag for machine-readable output +- Fail loudly with unambiguous error messages +- Introspection commands (`info`, `list`, `status`) for state inspection + +Use the unified REPL skin (`repl_skin.py` from `cli-anything-plugin/repl_skin.py`) so all generated CLIs share a consistent interface. + +### Phase 4 — Test Planning (write TEST.md Part 1) + +Before any test code, document in `tests/TEST.md`: +- Test inventory and what each test covers +- Unit test plans (synthetic data, no external deps) +- E2E test plans (real software backend invoked) +- Realistic end-to-end workflow scenarios + +### Phase 5 — Test Implementation + +Four layers, all required: +1. **Unit tests** — synthetic data, deterministic, fast +2. **E2E native tests** — verify project file generation and structure +3. **E2E backend tests** — invoke the real software, check output exists with correct format (magic bytes, ZIP structure, pixel analysis, etc.) +4. **CLI subprocess tests** — install the CLI entry point, run full workflows end-to-end + +**Never assume an export is correct because it ran without errors.** Validate outputs programmatically and print artifact paths for manual inspection. + +### Phase 6 — Test Documentation (write TEST.md Part 2) + +Append full `pytest` output and summary statistics to `TEST.md`. + +### Phase 6.5 — SKILL.md Generation + +Create `cli_anything//skills/SKILL.md` with: +- YAML frontmatter for agent discovery (`name`, `description`, `tags`, `requires`) +- All command groups and subcommands +- Usage examples for common workflows +- Agent-specific guidance for `--json` output and error handling + +The REPL should print the absolute path to `SKILL.md` on startup so agents can find it. + +### Phase 7 — Package & Install + +```bash +# setup.py uses PEP 420 namespace packaging +cd cli_anything/ +pip install -e . + +# Verify the CLI is on PATH +which cli-anything- +cli-anything- --help +``` + +Publish to PyPI when ready: +```bash +python -m build +twine upload dist/* +``` + +--- + +## Using a Generated CLI + +```bash +# Interactive REPL (default when no subcommand given) +cli-anything- + +# One-shot subcommand with JSON output for agent consumption +cli-anything- --json [args] + +# Help +cli-anything- --help +cli-anything- --help +``` + +--- + +## Refining an Existing Harness + +After initial generation, run a gap analysis: + +```bash +# Broad refinement +/cli-anything:refine ./ + +# Focused refinement on specific capabilities +/cli-anything:refine ./ "batch processing and filters" +``` + +Then re-run tests: `/cli-anything:test ` + +--- + +## Supported Applications (Pre-built) + +CLI-Anything has verified harnesses for 26+ applications: + +| Category | Applications | +|---|---| +| Creative | GIMP, Blender, Inkscape, Krita, MuseScore | +| Office | LibreOffice, Zotero | +| Media | Audacity, OBS Studio, Kdenlive, Shotcut, VideoCaptioner | +| Diagramming | Draw.io, Mermaid | +| AI/ML | ComfyUI, Ollama, NotebookLM | +| Web/Cloud | Zoom, AdGuard Home, Exa | +| Dev Tools | Godot Engine, RenderDoc | + +--- + +## Architecture Pitfalls + +**The Rendering Gap** — project files may reference filters/effects that simple file readers ignore. Solution priority: +1. Use the app's native renderer +2. Build a translation layer for effect conversion +3. Generate a render script as fallback + +**Testing with missing software** — tests MUST NOT skip or fake results when the target software is missing. They should fail loudly so the absence is visible. diff --git a/skills/cli-anything/_meta.json b/skills/cli-anything/_meta.json new file mode 100644 index 00000000..af8c9adc --- /dev/null +++ b/skills/cli-anything/_meta.json @@ -0,0 +1,6 @@ +{ + "ownerId": "kn70pywhg0fyz996kpa8xj89s57yhv26", + "slug": "cli-anything", + "version": "1.0.0", + "publishedAt": 1744574400000 +} From 5d121ea43281b733e609516a8cc4f019a6dcdecf Mon Sep 17 00:00:00 2001 From: Korivi Date: Tue, 14 Apr 2026 08:11:14 +0900 Subject: [PATCH 02/10] Added name character limit to 20 Added name character limit to 20 --- app/onboarding/interfaces/steps.py | 3 ++- .../browser/frontend/src/pages/Onboarding/OnboardingPage.tsx | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/app/onboarding/interfaces/steps.py b/app/onboarding/interfaces/steps.py index e8899440..1e95dc27 100644 --- a/app/onboarding/interfaces/steps.py +++ b/app/onboarding/interfaces/steps.py @@ -209,7 +209,8 @@ def get_options(self) -> List[StepOption]: return [] def validate(self, value: Any) -> tuple[bool, Optional[str]]: - # Optional, any string is valid + if value and len(str(value)) > 20: + return False, "Agent name must be 20 characters or fewer" return True, None def get_default(self) -> str: diff --git a/app/ui_layer/browser/frontend/src/pages/Onboarding/OnboardingPage.tsx b/app/ui_layer/browser/frontend/src/pages/Onboarding/OnboardingPage.tsx index 46bf5e23..e1b55b0d 100644 --- a/app/ui_layer/browser/frontend/src/pages/Onboarding/OnboardingPage.tsx +++ b/app/ui_layer/browser/frontend/src/pages/Onboarding/OnboardingPage.tsx @@ -510,6 +510,7 @@ export function OnboardingPage() { value={textValue} onChange={e => setTextValue(e.target.value)} placeholder={isApiKey ? 'Enter your API key' : 'Enter a name'} + maxLength={isApiKey ? undefined : 20} autoFocus onKeyDown={e => { if (e.key === 'Enter' && canSubmit) handleSubmit() }} /> From f85346bebf313934a305e77aaccdbad73b4bef6c Mon Sep 17 00:00:00 2001 From: Korivi Date: Tue, 14 Apr 2026 08:31:16 +0900 Subject: [PATCH 03/10] Add CLI-Anything integration to crafbot --- app/config/skills_config.json | 1 + 1 file changed, 1 insertion(+) diff --git a/app/config/skills_config.json b/app/config/skills_config.json index 9f6df29a..0975a5d4 100644 --- a/app/config/skills_config.json +++ b/app/config/skills_config.json @@ -1,6 +1,7 @@ { "auto_load": true, "enabled_skills": [ + "cli-anything", "docx", "pdf", "playwright-mcp", From 139eae850b5714caa4e7b0aa47bb592052339d35 Mon Sep 17 00:00:00 2001 From: Korivi Date: Tue, 14 Apr 2026 09:59:47 +0900 Subject: [PATCH 04/10] Grok KV caching issue fixed! MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There were two bugs fixed: Bug 1: `prompt_cache_key` is an OpenAI-specific routing hint in `extra_body`. xAI’s API ignores it, so it doesn’t help with cache routing for Grok. So I skipped it when `self.provider == "grok"`. Bug 2: Wrong field was used for reading cached tokens: * OpenAI → `usage.prompt_tokens_details.cached_tokens` * Grok (xAI) → `usage.prompt_cache_hit_tokens` ---------------- The code was always reading the OpenAI field, so Grok always returned 0 cached tokens, making it look like every call was a full cache miss. I fixed this by branching on `self.provider == "grok"` to read the correct field. Additionally, I updated the cache metrics log to show the actual provider name (grok, openai, etc.). ---------------- I updated the fixed in the same branch " feature/CLI" --- agent_core/core/impl/llm/interface.py | 28 ++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/agent_core/core/impl/llm/interface.py b/agent_core/core/impl/llm/interface.py index 94b7923d..5114cfae 100644 --- a/agent_core/core/impl/llm/interface.py +++ b/agent_core/core/impl/llm/interface.py @@ -1155,9 +1155,10 @@ def _generate_openai( # Always enforce JSON output format request_kwargs["response_format"] = {"type": "json_object"} - # Add prompt_cache_key when call_type is provided for better cache routing - # This helps when alternating between different call types (reasoning, action_selection) - if call_type and system_prompt and len(system_prompt) >= config.min_cache_tokens: + # Add prompt_cache_key for OpenAI/DeepSeek cache routing. + # Grok (xAI) does not support prompt_cache_key — it uses automatic + # prefix caching and ignores this parameter, so skip it for Grok. + if self.provider != "grok" and call_type and system_prompt and len(system_prompt) >= config.min_cache_tokens: prompt_hash = hashlib.sha256(system_prompt.encode()).hexdigest()[:16] cache_key = f"{call_type}_{prompt_hash}" request_kwargs["extra_body"] = {"prompt_cache_key": cache_key} @@ -1168,21 +1169,26 @@ def _generate_openai( token_count_input = response.usage.prompt_tokens token_count_output = response.usage.completion_tokens - # Extract cached tokens from prompt_tokens_details (OpenAI automatic caching) - # Available for prompts ≥1024 tokens - prompt_tokens_details = getattr(response.usage, "prompt_tokens_details", None) - if prompt_tokens_details: - cached_tokens = getattr(prompt_tokens_details, "cached_tokens", 0) or 0 + # Extract cached tokens — field name differs by provider: + # - OpenAI: response.usage.prompt_tokens_details.cached_tokens + # - Grok (xAI): response.usage.prompt_cache_hit_tokens + if self.provider == "grok": + cached_tokens = getattr(response.usage, "prompt_cache_hit_tokens", 0) or 0 + else: + prompt_tokens_details = getattr(response.usage, "prompt_tokens_details", None) + if prompt_tokens_details: + cached_tokens = getattr(prompt_tokens_details, "cached_tokens", 0) or 0 # Record cache metrics + provider_label = self.provider # "openai", "grok", "deepseek", etc. metrics = get_cache_metrics() if cached_tokens > 0: - logger.info(f"[CACHE] OpenAI {cache_type} cache hit: {cached_tokens}/{token_count_input} tokens from cache") - metrics.record_hit("openai", cache_type, cached_tokens=cached_tokens, total_tokens=token_count_input) + logger.info(f"[CACHE] {provider_label} {cache_type} cache hit: {cached_tokens}/{token_count_input} tokens from cache") + metrics.record_hit(provider_label, cache_type, cached_tokens=cached_tokens, total_tokens=token_count_input) elif system_prompt and len(system_prompt) >= config.min_cache_tokens: # Caching should have been attempted (prompt long enough) # This is a miss - either first call or cache expired - metrics.record_miss("openai", cache_type, total_tokens=token_count_input) + metrics.record_miss(provider_label, cache_type, total_tokens=token_count_input) status = "success" except Exception as exc: From 26da4b9d85ca26ae1651f011889121527cba27ea Mon Sep 17 00:00:00 2001 From: korivi-CraftOS Date: Tue, 14 Apr 2026 17:31:24 +0900 Subject: [PATCH 05/10] Delete craftbot.pid --- craftbot.pid | 1 - 1 file changed, 1 deletion(-) delete mode 100644 craftbot.pid diff --git a/craftbot.pid b/craftbot.pid deleted file mode 100644 index b86a3065..00000000 --- a/craftbot.pid +++ /dev/null @@ -1 +0,0 @@ -10948 \ No newline at end of file From 403398fb4ef39a81150b0397ad64d5a04775bd50 Mon Sep 17 00:00:00 2001 From: korivi-CraftOS Date: Tue, 14 Apr 2026 17:31:34 +0900 Subject: [PATCH 06/10] Delete craftbot.log --- craftbot.log | 299 --------------------------------------------------- 1 file changed, 299 deletions(-) delete mode 100644 craftbot.log diff --git a/craftbot.log b/craftbot.log deleted file mode 100644 index fe1ee0ea..00000000 --- a/craftbot.log +++ /dev/null @@ -1,299 +0,0 @@ - -============================================================ -CraftBot service started at 2026-04-08 14:51:22 -Command: C:\Python314\pythonw.exe C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py --no-open-browser -============================================================ -Traceback (most recent call last): - File "C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py", line 1074, in - print_browser_header() - ~~~~~~~~~~~~~~~~~~~~^^ - File "C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py", line 610, in print_browser_header - print("\n\U0001f916 CraftBot") - ~~~~~^^^^^^^^^^^^^^^^^ - File "C:\Python314\Lib\encodings\cp1252.py", line 19, in encode - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - ~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -UnicodeEncodeError: 'charmap' codec can't encode character '\U0001f916' in position 2: character maps to - -============================================================ -CraftBot service started at 2026-04-08 14:59:15 -Command: C:\Python314\pythonw.exe C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py --no-open-browser -============================================================ -Traceback (most recent call last): - File "C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py", line 1074, in - print_browser_header() - ~~~~~~~~~~~~~~~~~~~~^^ - File "C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py", line 610, in print_browser_header - print("\n\U0001f916 CraftBot") - ~~~~~^^^^^^^^^^^^^^^^^ - File "C:\Python314\Lib\encodings\cp1252.py", line 19, in encode - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - ~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -UnicodeEncodeError: 'charmap' codec can't encode character '\U0001f916' in position 2: character maps to - -============================================================ -CraftBot service started at 2026-04-08 15:07:33 -Command: C:\Python314\pythonw.exe C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py --no-open-browser -============================================================ -Traceback (most recent call last): - File "C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py", line 1074, in - print_browser_header() - ~~~~~~~~~~~~~~~~~~~~^^ - File "C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py", line 610, in print_browser_header - print("\n\U0001f916 CraftBot") - ~~~~~^^^^^^^^^^^^^^^^^ - File "C:\Python314\Lib\encodings\cp1252.py", line 19, in encode - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - ~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -UnicodeEncodeError: 'charmap' codec can't encode character '\U0001f916' in position 2: character maps to - -============================================================ -CraftBot service started at 2026-04-08 15:18:54 -Command: C:\Python314\pythonw.exe C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py --no-open-browser -============================================================ - -🤖 CraftBot -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Mode: Browser - - [ 1/8] Starting frontend server... ✓ - [ 2/8] Starting agent backend... ✓ - [ 3/8] Initializing agent... ✓ - [ 4/8] Connecting to MCP servers... ✓ - [ 5/8] Loading skills... ✓ - [ 6/8] Loading libraries... ✓ - [ 7/8] Starting scheduler... ✓ - [ 8/8] Starting communications... ✓ - - ---- Cleanup Initiated (Exit Status: 1073807364) --- -[*] Skipping Docker cleanup (not started in CLI mode). - -============================================================ -CraftBot service started at 2026-04-08 16:27:25 -Command: C:\Python314\pythonw.exe C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py --no-open-browser -============================================================ - -🤖 CraftBot -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Mode: Browser - - [ 1/8] Starting frontend server... ✓ - [ 2/8] Starting agent backend... ✓ - [ 3/8] Initializing agent... ✓ - [ 4/8] Connecting to MCP servers... ✓ - [ 5/8] Loading skills... ✓ - [ 6/8] Loading libraries... ✓ - [ 7/8] Starting scheduler... ✓ - [ 8/8] Starting communications... ✓ - -============================================================ -CraftBot service started at 2026-04-08 16:51:37 -Command: C:\Python314\pythonw.exe C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py --no-open-browser -============================================================ - -🤖 CraftBot -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Mode: Browser - - [ 1/8] Starting frontend server... ✓ - [ 2/8] Starting agent backend... ✓ - [ 3/8] Initializing agent... ✓ - [ 4/8] Connecting to MCP servers... ✓ - [ 5/8] Loading skills... ✓ - [ 6/8] Loading libraries... ✓ - [ 7/8] Starting scheduler... ✓ - [ 8/8] Starting communications... ✓ - -============================================================ -CraftBot service started at 2026-04-08 17:18:37 -Command: C:\Python314\pythonw.exe C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py --no-open-browser -============================================================ - -🤖 CraftBot -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Mode: Browser - - [ 1/8] Starting frontend server... ✓ - [ 2/8] Starting agent backend... ✓ - [ 3/8] Initializing agent... ✓ - [ 4/8] Connecting to MCP servers... ✓ - [ 5/8] Loading skills... ✓ - [ 6/8] Loading libraries... ✓ - [ 7/8] Starting scheduler... ✓ - [ 8/8] Starting communications... ✓ - -============================================================ -CraftBot service started at 2026-04-08 17:38:37 -Command: C:\Python314\pythonw.exe C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py --no-open-browser -============================================================ - -🤖 CraftBot -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Mode: Browser - - [ 1/8] Starting frontend server... ✓ - [ 2/8] Starting agent backend... ✓ - [ 3/8] Initializing agent... ✓ - [ 4/8] Connecting to MCP servers... ✓ - [ 5/8] Loading skills... ✓ - [ 6/8] Loading libraries... ✓ - [ 7/8] Starting scheduler... ✓ - [ 8/8] Starting communications... ✓ - -============================================================ -CraftBot service started at 2026-04-08 17:52:20 -Command: C:\Python314\pythonw.exe C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py --no-open-browser -============================================================ - -🤖 CraftBot -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Mode: Browser - - [ 1/8] Starting frontend server... ✓ - [ 2/8] Starting agent backend... ✓ - [ 3/8] Initializing agent... ✓ - [ 4/8] Connecting to MCP servers... ✓ - [ 5/8] Loading skills... ✓ - [ 6/8] Loading libraries... ✓ - [ 7/8] Starting scheduler... ✓ - [ 8/8] Starting communications... ✓ - -============================================================ -CraftBot service started at 2026-04-08 17:59:29 -Command: C:\Python314\pythonw.exe C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py --no-open-browser -============================================================ - -🤖 CraftBot -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Mode: Browser - - [ 1/8] Starting frontend server... ✓ - [ 2/8] Starting agent backend... ✓ - [ 3/8] Initializing agent... ✓ - [ 4/8] Connecting to MCP servers... ✓ - [ 5/8] Loading skills... ✓ - [ 6/8] Loading libraries... ✓ - [ 7/8] Starting scheduler... ✓ - [ 8/8] Starting communications... ✓ - -============================================================ -CraftBot service started at 2026-04-08 18:05:16 -Command: C:\Python314\pythonw.exe C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py --no-open-browser -============================================================ - -🤖 CraftBot -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Mode: Browser - - [ 1/8] Starting frontend server... ✓ - [ 2/8] Starting agent backend... ✓ - [ 3/8] Initializing agent... ✓ - [ 4/8] Connecting to MCP servers... ✓ - [ 5/8] Loading skills... ✓ - [ 6/8] Loading libraries... ✓ - [ 7/8] Starting scheduler... ✓ - [ 8/8] Starting communications... ✓ - -============================================================ -CraftBot service started at 2026-04-08 18:16:07 -Command: C:\Python314\pythonw.exe C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py --no-open-browser -============================================================ - -🤖 CraftBot -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Mode: Browser - - [ 1/8] Starting frontend server... ✓ - [ 2/8] Starting agent backend... ✓ - [ 3/8] Initializing agent... ✓ - [ 4/8] Connecting to MCP servers... ✓ - [ 5/8] Loading skills... ✓ - [ 6/8] Loading libraries... ✓ - [ 7/8] Starting scheduler... ✓ - [ 8/8] Starting communications... ✓ - -============================================================ -CraftBot service started at 2026-04-08 20:52:14 -Command: C:\Python314\pythonw.exe C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py --no-open-browser -============================================================ - -🤖 CraftBot -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Mode: Browser - - [ 1/8] Starting frontend server... ✓ - [ 2/8] Starting agent backend... ✓ - [ 3/8] Initializing agent... ✓ - [ 4/8] Connecting to MCP servers... ✓ - [ 5/8] Loading skills... ✓ - [ 6/8] Loading libraries... ✓ - [ 7/8] Starting scheduler... ✓ - [ 8/8] Starting communications... ✓ - -============================================================ -CraftBot service started at 2026-04-08 20:57:52 -Command: C:\Python314\pythonw.exe C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py --no-open-browser -============================================================ - -🤖 CraftBot -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Mode: Browser - - [ 1/8] Starting frontend server... ✓ - [ 2/8] Starting agent backend... ✓ - [ 3/8] Initializing agent... ✓ - [ 4/8] Connecting to MCP servers... ✓ - [ 5/8] Loading skills... ✓ - [ 6/8] Loading libraries... ✓ - [ 7/8] Starting scheduler... ✓ - [ 8/8] Starting communications... ✓ - -============================================================ -CraftBot service started at 2026-04-08 21:06:10 -Command: C:\Python314\pythonw.exe C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py --no-open-browser -============================================================ - -🤖 CraftBot -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Mode: Browser - - [ 1/8] Starting frontend server... ✓ - [ 2/8] Starting agent backend... ✓ - [ 3/8] Initializing agent... ✓ - [ 4/8] Connecting to MCP servers... ✓ - [ 5/8] Loading skills... ✓ - [ 6/8] Loading libraries... ✓ - [ 7/8] Starting scheduler... ✓ - [ 8/8] Starting communications... ✓ - -============================================================ -CraftBot service started at 2026-04-09 00:58:46 -Command: C:\Python314\pythonw.exe C:\Users\ganiy\OneDrive\Desktop\OneDrive\Korivi Important Data\Aether\CraftOS\CraftBot\CraftBot\run.py --no-open-browser -============================================================ - -🤖 CraftBot -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Mode: Browser - - [ 1/8] Starting frontend server... ✓ - [ 2/8] Starting agent backend... ✓ - [ 3/8] Initializing agent... ✓ - [ 4/8] Connecting to MCP servers... ✓ - [ 5/8] Loading skills... ✓ - [ 6/8] Loading libraries... ✓ - [ 7/8] Starting scheduler... ✓ - [ 8/8] Starting communications... ✓ From 5feaa80c1d80118ced849846a50b10a2b50747ad Mon Sep 17 00:00:00 2001 From: Korivi Date: Wed, 15 Apr 2026 10:48:47 +0900 Subject: [PATCH 07/10] CLI SKILLS Improvements --- skills/cli-anything/SKILL.md | 273 +++++++++++++++++------------------ 1 file changed, 131 insertions(+), 142 deletions(-) diff --git a/skills/cli-anything/SKILL.md b/skills/cli-anything/SKILL.md index 5194429b..e0b3b509 100644 --- a/skills/cli-anything/SKILL.md +++ b/skills/cli-anything/SKILL.md @@ -1,187 +1,176 @@ --- name: cli-anything -description: "Generate agent-native CLI harnesses for any GUI application using the CLI-Anything methodology, or discover and install pre-built CLIs via CLI-Hub." -metadata: {"clawdbot":{"emoji":"⚡","os":["darwin","linux","windows"],"requires":{"bins":["python"]}}} +description: "Use any supported GUI application (GIMP, Blender, LibreOffice, Audacity, OBS, etc.) on behalf of the user. Auto-installs the app and CLI harness, then executes the task directly." +action-sets: ["shell", "file_operations"] --- # CLI-Anything Skill -CLI-Anything transforms any GUI application into an agent-native command-line interface. Use this skill when the user asks to: -- Generate a CLI harness for any software (GIMP, Blender, LibreOffice, etc.) -- Install or discover CLIs via CLI-Hub -- Refine or test an existing generated harness +**Core rule: Do everything yourself. Never give the user a command to run. Never explain steps. Just execute the task and report the result.** --- -## Quick Install (CLI-Hub) - -For software that already has a pre-built harness: - -```bash -pip install cli-anything-hub -cli-hub install -``` - -Browse the full catalog: https://hkuds.github.io/CLI-Anything/ +## Supported Apps Reference + +Use this table to look up the correct names for every step. + +| App | cli-hub name | Windows (winget) | macOS (brew cask) | Linux (apt) | +|---|---|---|---|---| +| GIMP | `gimp` | `GIMP.GIMP` | `gimp` | `gimp` | +| Blender | `blender` | `BlenderFoundation.Blender` | `blender` | `blender` | +| Inkscape | `inkscape` | `Inkscape.Inkscape` | `inkscape` | `inkscape` | +| Audacity | `audacity` | `Audacity.Audacity` | `audacity` | `audacity` | +| OBS Studio | `obs` | `OBSProject.OBSStudio` | `obs` | `obs-studio` | +| Kdenlive | `kdenlive` | `KDE.Kdenlive` | `kdenlive` | `kdenlive` | +| Shotcut | `shotcut` | `Meltytech.Shotcut` | `shotcut` | `shotcut` | +| Krita | `krita` | `KDE.Krita` | `krita` | `krita` | +| LibreOffice | `libreoffice` | `TheDocumentFoundation.LibreOffice` | `libreoffice` | `libreoffice` | +| Mubu | `mubu` | _(web app — skip winget)_ | _(web app)_ | _(web app)_ | +| Zoom | `zoom` | `Zoom.Zoom` | `zoom` | `zoom` | +| Draw.io | `draw-io` | `JGraph.Draw` | `drawio` | _(AppImage)_ | +| Mermaid | `mermaid` | `OpenJS.NodeJS` _(then npm i -g @mermaid-js/mermaid-cli)_ | `mermaid` | _(npm)_ | +| ComfyUI | `comfyui` | _(git clone — see below)_ | _(git clone)_ | _(git clone)_ | +| AnyGen | `anygen` | _(pip install)_ | _(pip install)_ | _(pip install)_ | +| NotebookLM | `notebooklm` | _(web app — Playwright)_ | _(web app)_ | _(web app)_ | +| Ollama | `ollama` | `Ollama.Ollama` | `ollama` | _(curl install)_ | +| AdGuard Home | `adguard-home` | `AdGuard.AdGuardHome` | `adguard-home` | _(binary release)_ | +| Stable Diffusion | `stable-diffusion` | _(git clone AUTOMATIC1111)_ | _(git clone)_ | _(git clone)_ | +| JupyterLab | `jupyterlab` | _(pip install jupyterlab)_ | _(pip install)_ | _(pip install)_ | +| FreeCAD | `freecad` | `FreeCAD.FreeCAD` | `freecad` | `freecad` | +| QGIS | `qgis` | `OSGeo.QGIS` | `qgis` | `qgis` | +| Grafana | `grafana` | `GrafanaLabs.Grafana` | `grafana` | `grafana` | +| Gitea | `gitea` | `Gitea.Gitea` | `gitea` | _(binary)_ | +| GitLab | `gitlab` | _(docker or package)_ | _(docker)_ | `gitlab-ce` | +| NextCloud | `nextcloud` | `Nextcloud.NextcloudDesktop` | `nextcloud` | _(snap/docker)_ | +| Jenkins | `jenkins` | `Jenkins.Jenkins` | `jenkins` | `jenkins` | --- -## Generate a New CLI Harness - -Follow the **7-Phase Methodology** below. Work sequentially — each phase depends on the prior. +## Execution Flow (follow every time — use EXACT timeouts listed) -### Phase 1 — Codebase Analysis +**CRITICAL: Always pass the timeout shown below to run_shell. Never use the default (30s). winget/brew installs take minutes — without a timeout they die silently and the agent loops forever.** -Before writing code, study the target application: - -``` -- Identify the backend engine (separate from the GUI presentation layer) -- Map each GUI action to its underlying API or Python call -- Understand the data model and native file formats (e.g., .blend, ODF, SVG) -- Locate any existing CLI entry points or scripting interfaces -- Catalog the undo/redo and session management system +### Step 1 — Detect OS +Run with `timeout: 10`: +```bash +python -c "import platform; print(platform.system())" ``` +Result: `Windows`, `Darwin`, or `Linux`. -### Phase 2 — CLI Architecture Design - -Choose one of: -- **Stateful REPL** — for interactive, session-based workflows -- **Subcommand CLI** — for scriptable, one-shot invocations -- **Both** — recommended; REPL wraps the subcommand interface - -Design command groups that mirror the app's logical domains (e.g., `image`, `layer`, `export` for GIMP). Plan dual output: human-readable text and machine-readable `--json`. - -### Phase 3 — Implementation - -Directory layout: -``` -cli_anything/ # Namespace package — NO __init__.py here -└── / # Sub-package — HAS __init__.py - ├── __main__.py - ├── README.md - ├── _cli.py - ├── core/ # Domain modules wrapping the real software - ├── utils/ # Shared utilities + repl_skin.py - └── tests/ - ├── TEST.md - ├── test_core.py - └── test_full_e2e.py +### Step 2 — Check if the app is installed +Run with `timeout: 10`: +```bash +gimp --version # or blender --version, libreoffice --version, etc. ``` +- Exit 0 → already installed → skip to Step 4 +- Exit non-zero → not installed → go to Step 3 -**Critical rule**: The CLI MUST call the actual software for rendering and export — never reimplement the software's functionality in Python. Generate valid native project files and hand them to the real application backend. - -Required patterns for every command: -- `--json` flag for machine-readable output -- Fail loudly with unambiguous error messages -- Introspection commands (`info`, `list`, `status`) for state inspection - -Use the unified REPL skin (`repl_skin.py` from `cli-anything-plugin/repl_skin.py`) so all generated CLIs share a consistent interface. - -### Phase 4 — Test Planning (write TEST.md Part 1) - -Before any test code, document in `tests/TEST.md`: -- Test inventory and what each test covers -- Unit test plans (synthetic data, no external deps) -- E2E test plans (real software backend invoked) -- Realistic end-to-end workflow scenarios - -### Phase 5 — Test Implementation - -Four layers, all required: -1. **Unit tests** — synthetic data, deterministic, fast -2. **E2E native tests** — verify project file generation and structure -3. **E2E backend tests** — invoke the real software, check output exists with correct format (magic bytes, ZIP structure, pixel analysis, etc.) -4. **CLI subprocess tests** — install the CLI entry point, run full workflows end-to-end - -**Never assume an export is correct because it ran without errors.** Validate outputs programmatically and print artifact paths for manual inspection. - -### Phase 6 — Test Documentation (write TEST.md Part 2) - -Append full `pytest` output and summary statistics to `TEST.md`. - -### Phase 6.5 — SKILL.md Generation - -Create `cli_anything//skills/SKILL.md` with: -- YAML frontmatter for agent discovery (`name`, `description`, `tags`, `requires`) -- All command groups and subcommands -- Usage examples for common workflows -- Agent-specific guidance for `--json` output and error handling - -The REPL should print the absolute path to `SKILL.md` on startup so agents can find it. - -### Phase 7 — Package & Install +### Step 3 — Install the app (ONE attempt only — never retry install) +**Windows** — run with `timeout: 600`: ```bash -# setup.py uses PEP 420 namespace packaging -cd cli_anything/ -pip install -e . +winget install --id --silent --accept-package-agreements --accept-source-agreements +``` -# Verify the CLI is on PATH -which cli-anything- -cli-anything- --help +**macOS** — run with `timeout: 600`: +```bash +brew install --cask ``` -Publish to PyPI when ready: +**Linux** — run with `timeout: 300`: ```bash -python -m build -twine upload dist/* +sudo apt-get install -y ``` ---- +**Special cases:** +- ComfyUI / Stable Diffusion: `git clone` + `pip install -r requirements.txt` — `timeout: 600` +- Mermaid: `npm install -g @mermaid-js/mermaid-cli` — `timeout: 120` +- JupyterLab / AnyGen: `pip install ` — `timeout: 120` +- Web apps (Mubu, NotebookLM): no install needed — use `playwright-mcp` +- Ollama on Linux: `curl -fsSL https://ollama.com/install.sh | sh` — `timeout: 300` -## Using a Generated CLI +After install, re-run Step 2 check once (`timeout: 10`). If still fails → tell the user, stop completely. +### Step 4 — Check if CLI harness is installed +Run with `timeout: 10`: ```bash -# Interactive REPL (default when no subcommand given) -cli-anything- +cli-anything- --version +``` +- Found → skip to Step 6 +- Not found → go to Step 5 -# One-shot subcommand with JSON output for agent consumption -cli-anything- --json [args] +### Step 5 — Install CLI harness (ONE attempt only) -# Help -cli-anything- --help -cli-anything- --help +**Always try CLI-Hub first** — run with `timeout: 120`: +```bash +pip install cli-anything-hub --quiet && cli-hub install ``` ---- +If CLI-Hub fails → generate a minimal harness with `write_file` (a Click CLI wrapping the app's real scripting API), then run with `timeout: 60`: +```bash +pip install -e cli_anything/ --quiet +``` -## Refining an Existing Harness +If harness install also fails → tell the user, stop completely. -After initial generation, run a gap analysis: +### Step 6 — Execute the user's task +Run with `timeout: 300` (or `timeout: 600` for renders/exports): ```bash -# Broad refinement -/cli-anything:refine ./ - -# Focused refinement on specific capabilities -/cli-anything:refine ./ "batch processing and filters" +# Image editing +cli-anything-gimp image resize input.jpg output.jpg 1920 1080 +cli-anything-gimp filter blur input.jpg --radius 3 --output out.jpg +cli-anything-gimp export input.xcf output.png + +# 3D / rendering +cli-anything-blender render scene.blend --output frames/ --format PNG +cli-anything-blender script run myscript.py scene.blend + +# Vector +cli-anything-inkscape export logo.svg logo.png --dpi 300 +cli-anything-inkscape convert input.svg output.pdf + +# Audio +cli-anything-audacity trim audio.mp3 output.mp3 --start 0 --end 30 +cli-anything-audacity export-mp3 project.aup3 output.mp3 + +# Video +cli-anything-kdenlive render project.kdenlive output.mp4 +cli-anything-shotcut render project.mlt output.mp4 + +# Office +cli-anything-libreoffice convert doc.docx output.pdf +cli-anything-libreoffice calc run macro.py spreadsheet.xlsx + +# Diagrams +cli-anything-draw-io export diagram.drawio output.png +cli-anything-mermaid render diagram.mmd output.png + +# AI / ML +cli-anything-comfyui run workflow.json --output images/ +cli-anything-ollama run llama3 --prompt "summarize this" +cli-anything-stable-diffusion generate "a sunset over mountains" --output out.png + +# Dev / Infra +cli-anything-jupyterlab execute notebook.ipynb --output result.ipynb +cli-anything-grafana export-dashboard my-dashboard dashboard.json +cli-anything-gitea create-repo myrepo --private ``` -Then re-run tests: `/cli-anything:test ` - ---- +**Always run the task. Never print commands and ask the user to run them.** -## Supported Applications (Pre-built) +If the task command fails → retry once with adjusted args. If it fails again → report the error and stop. -CLI-Anything has verified harnesses for 26+ applications: - -| Category | Applications | -|---|---| -| Creative | GIMP, Blender, Inkscape, Krita, MuseScore | -| Office | LibreOffice, Zotero | -| Media | Audacity, OBS Studio, Kdenlive, Shotcut, VideoCaptioner | -| Diagramming | Draw.io, Mermaid | -| AI/ML | ComfyUI, Ollama, NotebookLM | -| Web/Cloud | Zoom, AdGuard Home, Exa | -| Dev Tools | Godot Engine, RenderDoc | +### Step 7 — Report result +One or two sentences only: +> "Done — rendered `output.mp4` from your Kdenlive project." +> "Converted `report.docx` to PDF at `report.pdf`." --- -## Architecture Pitfalls - -**The Rendering Gap** — project files may reference filters/effects that simple file readers ignore. Solution priority: -1. Use the app's native renderer -2. Build a translation layer for effect conversion -3. Generate a render script as fallback +## Hard Stop Rules (prevents infinite loops) -**Testing with missing software** — tests MUST NOT skip or fake results when the target software is missing. They should fail loudly so the absence is visible. +- **Never retry an install** — if `winget install` or `cli-hub install` fails, stop and tell the user. +- **Never loop on a timeout** — if a command times out once, it will time out again. Stop immediately. +- **Max 1 retry on the task command (Step 6) only** — not on installs. +- **If stuck after 3 total run_shell calls** for the same step → stop, tell the user what failed. From c49fe5690426c5c4463e95d1b45265b76e806193 Mon Sep 17 00:00:00 2001 From: Korivi Date: Wed, 15 Apr 2026 11:54:08 +0900 Subject: [PATCH 08/10] cli anything help guid added --- skills/cli-anything/SKILL.md | 78 ++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/skills/cli-anything/SKILL.md b/skills/cli-anything/SKILL.md index e0b3b509..c35e10d0 100644 --- a/skills/cli-anything/SKILL.md +++ b/skills/cli-anything/SKILL.md @@ -10,6 +10,84 @@ action-sets: ["shell", "file_operations"] --- +## Help Response (no tools needed — just reply with text) + +If the user's message matches any of these (case-insensitive, any wording): +- "cli anything help" / "cli-anything help" / "cli help" +- "what apps does cli-anything support" / "what can cli-anything do" +- "show cli apps" / "cli anything guide" / "list cli apps" +- Any variation asking what CLI-Anything can do or which apps are supported + +**Do not run any tools. Reply directly with this message:** + +--- + +**CLI-Anything — What I Can Do** + +Just tell me what you want done in plain English. I'll auto-install the app if it's not on your system and complete the task for you — you never need to run any commands yourself. + +**Creative & Media** +| App | Example prompt | +|---|---| +| GIMP | "Resize photo.jpg to 1920×1080 and save as photo_hd.jpg" | +| Blender | "Render scene.blend to PNG frames in the frames/ folder" | +| Inkscape | "Export logo.svg as a 300 DPI PNG" | +| Krita | "Export painting.kra as PNG" | +| Audacity | "Trim the first 30 seconds from audio.mp3 and save as clip.mp3" | +| OBS Studio | "Record my screen for 60 seconds" | +| Kdenlive | "Render project.kdenlive to MP4" | +| Shotcut | "Render project.mlt to MP4" | + +**Office & Productivity** +| App | Example prompt | +|---|---| +| LibreOffice | "Convert report.docx to PDF" / "Run a macro on spreadsheet.xlsx" | +| Mubu | "Open my outline in Mubu" | + +**Communication** +| App | Example prompt | +|---|---| +| Zoom | "Start a Zoom meeting" | + +**Diagramming** +| App | Example prompt | +|---|---| +| Draw.io | "Export diagram.drawio as PNG" | +| Mermaid | "Render this diagram to PNG: graph TD; A-->B; B-->C" | + +**AI & ML** +| App | Example prompt | +|---|---| +| ComfyUI | "Run workflow.json and save images to output/" | +| AnyGen | "Generate content using AnyGen" | +| NotebookLM | "Summarize this PDF using NotebookLM" | +| Ollama | "Run llama3 and summarize this text: ..." | +| Stable Diffusion | "Generate 'a sunset over mountains' and save as out.png" | + +**Dev & Infrastructure** +| App | Example prompt | +|---|---| +| JupyterLab | "Execute notebook.ipynb and save the output" | +| Grafana | "Export my dashboard as JSON" | +| Gitea | "Create a private repo called myrepo on Gitea" | +| GitLab | "Create a new project on GitLab" | +| NextCloud | "Sync my files to NextCloud" | +| Jenkins | "Trigger my build pipeline" | +| AdGuard Home | "Set up network-wide ad blocking with AdGuard Home" | + +**GIS & Design** +| App | Example prompt | +|---|---| +| FreeCAD | "Export model.fcstd as STL" | +| QGIS | "Export map.qgz as PNG" | + +**Tips:** +- Always give me the full file path (e.g. `C:\Users\you\Desktop\photo.jpg`) +- If the app isn't installed, I'll install it automatically — just wait a few minutes +- I never ask you to run commands yourself — I do everything for you + +--- + ## Supported Apps Reference Use this table to look up the correct names for every step. From ec3dc6763468b3b8a583997dacb9143a017bc4ac Mon Sep 17 00:00:00 2001 From: Korivi Date: Thu, 16 Apr 2026 15:18:53 +0900 Subject: [PATCH 09/10] CLI Skill updated --- skills/cli-anything/SKILL.md | 169 ++++++++++++++++++++++------------- 1 file changed, 108 insertions(+), 61 deletions(-) diff --git a/skills/cli-anything/SKILL.md b/skills/cli-anything/SKILL.md index c35e10d0..58309cc0 100644 --- a/skills/cli-anything/SKILL.md +++ b/skills/cli-anything/SKILL.md @@ -10,6 +10,29 @@ action-sets: ["shell", "file_operations"] --- +## FORBIDDEN — Never Do These (causes bugs on all platforms) + +These patterns are strictly banned. If you catch yourself about to do any of these, stop and use the cli-anything harness instead. + +| ❌ FORBIDDEN | ✅ CORRECT | +|---|---| +| `soffice.exe --headless --convert-to pdf ...` | `cli-anything-libreoffice convert doc.docx output.pdf` | +| `cd "C:\Program Files\LibreOffice\program" && soffice.exe ...` | `cli-anything-libreoffice convert doc.docx output.pdf` | +| `gimp --batch-interpreter=script-fu-use-v2 ...` | `cli-anything-gimp image resize input.jpg output.jpg 1920 1080` | +| `blender --background scene.blend --render-output ...` | `cli-anything-blender render scene.blend --output frames/ --format PNG` | +| `inkscape --export-type=png logo.svg` | `cli-anything-inkscape export logo.svg logo.png --dpi 300` | +| Chaining with `&&`: `cmd1 && cmd2` | Two separate `run_shell` calls | +| Any `.exe` extension in a command | No `.exe` — harness is cross-platform | +| Hardcoded paths like `C:\Program Files\...` | Use the harness — it finds the app automatically | + +**Why these are banned:** +- `.exe` only exists on Windows — breaks on macOS and Linux +- `C:\Program Files\...` paths break on macOS and Linux +- `&&` chaining breaks in PowerShell on Windows +- Raw app CLIs require knowing app-specific flags — the harness handles all of that + +--- + ## Help Response (no tools needed — just reply with text) If the user's message matches any of these (case-insensitive, any wording): @@ -27,64 +50,65 @@ If the user's message matches any of these (case-insensitive, any wording): Just tell me what you want done in plain English. I'll auto-install the app if it's not on your system and complete the task for you — you never need to run any commands yourself. **Creative & Media** -| App | Example prompt | -|---|---| -| GIMP | "Resize photo.jpg to 1920×1080 and save as photo_hd.jpg" | -| Blender | "Render scene.blend to PNG frames in the frames/ folder" | -| Inkscape | "Export logo.svg as a 300 DPI PNG" | -| Krita | "Export painting.kra as PNG" | -| Audacity | "Trim the first 30 seconds from audio.mp3 and save as clip.mp3" | -| OBS Studio | "Record my screen for 60 seconds" | -| Kdenlive | "Render project.kdenlive to MP4" | -| Shotcut | "Render project.mlt to MP4" | +| App | What I can do | Example prompt | +|---|---|---| +| GIMP | Resize, crop, blur, convert, export images | "Resize photo.jpg to 1920×1080 and save as photo_hd.jpg" | +| Blender | Render 3D scenes, run scripts, export models | "Render scene.blend to PNG frames in the frames/ folder" | +| Inkscape | Export SVG to PNG/PDF, convert vector files | "Export logo.svg as a 300 DPI PNG" | +| Krita | Export paintings, batch convert images | "Export painting.kra as PNG" | +| Audacity | Trim, export, convert audio files | "Trim the first 30 seconds from audio.mp3 and save as clip.mp3" | +| OBS Studio | Record screen, stream | "Record my screen for 60 seconds" | +| Kdenlive | Render video projects to MP4/MKV | "Render project.kdenlive to MP4" | +| Shotcut | Render video projects to MP4 | "Render project.mlt to MP4" | **Office & Productivity** -| App | Example prompt | -|---|---| -| LibreOffice | "Convert report.docx to PDF" / "Run a macro on spreadsheet.xlsx" | -| Mubu | "Open my outline in Mubu" | +| App | What I can do | Example prompt | +|---|---|---| +| LibreOffice | Convert DOCX/XLSX/PPTX to PDF, run macros | "Convert report.docx to PDF" | +| Mubu | Manage knowledge outlines | "Open my outline in Mubu" | **Communication** -| App | Example prompt | -|---|---| -| Zoom | "Start a Zoom meeting" | +| App | What I can do | Example prompt | +|---|---|---| +| Zoom | Start/join meetings | "Start a Zoom meeting" | **Diagramming** -| App | Example prompt | -|---|---| -| Draw.io | "Export diagram.drawio as PNG" | -| Mermaid | "Render this diagram to PNG: graph TD; A-->B; B-->C" | +| App | What I can do | Example prompt | +|---|---|---| +| Draw.io | Export diagrams to PNG/SVG/PDF | "Export diagram.drawio as PNG" | +| Mermaid | Render diagram code to PNG | "Render this diagram to PNG: graph TD; A-->B; B-->C" | **AI & ML** -| App | Example prompt | -|---|---| -| ComfyUI | "Run workflow.json and save images to output/" | -| AnyGen | "Generate content using AnyGen" | -| NotebookLM | "Summarize this PDF using NotebookLM" | -| Ollama | "Run llama3 and summarize this text: ..." | -| Stable Diffusion | "Generate 'a sunset over mountains' and save as out.png" | +| App | What I can do | Example prompt | +|---|---|---| +| ComfyUI | Run AI image generation workflows | "Run workflow.json and save images to output/" | +| AnyGen | Generate AI content | "Generate content using AnyGen" | +| NotebookLM | AI research and summarization | "Summarize this PDF using NotebookLM" | +| Ollama | Run local LLM inference | "Run llama3 and summarize this text: ..." | +| Stable Diffusion | Generate images from text prompts | "Generate 'a sunset over mountains' and save as out.png" | **Dev & Infrastructure** -| App | Example prompt | -|---|---| -| JupyterLab | "Execute notebook.ipynb and save the output" | -| Grafana | "Export my dashboard as JSON" | -| Gitea | "Create a private repo called myrepo on Gitea" | -| GitLab | "Create a new project on GitLab" | -| NextCloud | "Sync my files to NextCloud" | -| Jenkins | "Trigger my build pipeline" | -| AdGuard Home | "Set up network-wide ad blocking with AdGuard Home" | - -**GIS & Design** -| App | Example prompt | -|---|---| -| FreeCAD | "Export model.fcstd as STL" | -| QGIS | "Export map.qgz as PNG" | +| App | What I can do | Example prompt | +|---|---|---| +| JupyterLab | Execute notebooks, save output | "Execute notebook.ipynb and save the output" | +| Grafana | Export dashboards | "Export my dashboard as JSON" | +| Gitea | Create repos, manage git hosting | "Create a private repo called myrepo on Gitea" | +| GitLab | Create projects, manage CI/CD | "Create a new project on GitLab" | +| NextCloud | Sync files, manage cloud storage | "Sync my files to NextCloud" | +| Jenkins | Trigger build pipelines | "Trigger my build pipeline" | +| AdGuard Home | Set up network-wide ad blocking | "Set up network-wide ad blocking with AdGuard Home" | + +**GIS & 3D Design** +| App | What I can do | Example prompt | +|---|---|---| +| FreeCAD | Export 3D models to STL/STEP | "Export model.fcstd as STL" | +| QGIS | Export maps to PNG/PDF | "Export map.qgz as PNG" | **Tips:** - Always give me the full file path (e.g. `C:\Users\you\Desktop\photo.jpg`) - If the app isn't installed, I'll install it automatically — just wait a few minutes - I never ask you to run commands yourself — I do everything for you +- Works on Windows, macOS, and Linux --- @@ -128,35 +152,39 @@ Use this table to look up the correct names for every step. **CRITICAL: Always pass the timeout shown below to run_shell. Never use the default (30s). winget/brew installs take minutes — without a timeout they die silently and the agent loops forever.** +**CRITICAL: Never chain commands with `&&` or `;` in a single run_shell call. Use one separate run_shell call per command.** + ### Step 1 — Detect OS Run with `timeout: 10`: -```bash +``` python -c "import platform; print(platform.system())" ``` Result: `Windows`, `Darwin`, or `Linux`. ### Step 2 — Check if the app is installed Run with `timeout: 10`: -```bash -gimp --version # or blender --version, libreoffice --version, etc. ``` +gimp --version +``` +(replace with the correct app: `blender --version`, `libreoffice --version`, etc.) + - Exit 0 → already installed → skip to Step 4 - Exit non-zero → not installed → go to Step 3 ### Step 3 — Install the app (ONE attempt only — never retry install) **Windows** — run with `timeout: 600`: -```bash +``` winget install --id --silent --accept-package-agreements --accept-source-agreements ``` **macOS** — run with `timeout: 600`: -```bash +``` brew install --cask ``` **Linux** — run with `timeout: 300`: -```bash +``` sudo apt-get install -y ``` @@ -171,7 +199,7 @@ After install, re-run Step 2 check once (`timeout: 10`). If still fails → tell ### Step 4 — Check if CLI harness is installed Run with `timeout: 10`: -```bash +``` cli-anything- --version ``` - Found → skip to Step 6 @@ -180,44 +208,56 @@ cli-anything- --version ### Step 5 — Install CLI harness (ONE attempt only) **Always try CLI-Hub first** — run with `timeout: 120`: -```bash -pip install cli-anything-hub --quiet && cli-hub install ``` +pip install cli-anything-hub --quiet +``` +Then run with `timeout: 120`: +``` +cli-hub install +``` +(Two separate run_shell calls — do NOT chain with &&) If CLI-Hub fails → generate a minimal harness with `write_file` (a Click CLI wrapping the app's real scripting API), then run with `timeout: 60`: -```bash +``` pip install -e cli_anything/ --quiet ``` If harness install also fails → tell the user, stop completely. -### Step 6 — Execute the user's task +### Step 6 — Execute the user's task using the CLI harness ONLY + +**MANDATORY: Use ONLY `cli-anything-` commands. Never call soffice, gimp, blender, or any app binary directly.** + Run with `timeout: 300` (or `timeout: 600` for renders/exports): -```bash -# Image editing +``` +# Image editing — GIMP cli-anything-gimp image resize input.jpg output.jpg 1920 1080 cli-anything-gimp filter blur input.jpg --radius 3 --output out.jpg cli-anything-gimp export input.xcf output.png -# 3D / rendering +# 3D / rendering — Blender cli-anything-blender render scene.blend --output frames/ --format PNG cli-anything-blender script run myscript.py scene.blend -# Vector +# Vector — Inkscape cli-anything-inkscape export logo.svg logo.png --dpi 300 cli-anything-inkscape convert input.svg output.pdf -# Audio +# Painting — Krita +cli-anything-krita export painting.kra output.png + +# Audio — Audacity cli-anything-audacity trim audio.mp3 output.mp3 --start 0 --end 30 cli-anything-audacity export-mp3 project.aup3 output.mp3 -# Video +# Video — Kdenlive / Shotcut cli-anything-kdenlive render project.kdenlive output.mp4 cli-anything-shotcut render project.mlt output.mp4 -# Office +# Office — LibreOffice (NEVER use soffice.exe directly) cli-anything-libreoffice convert doc.docx output.pdf +cli-anything-libreoffice convert spreadsheet.xlsx output.pdf cli-anything-libreoffice calc run macro.py spreadsheet.xlsx # Diagrams @@ -233,6 +273,10 @@ cli-anything-stable-diffusion generate "a sunset over mountains" --output out.pn cli-anything-jupyterlab execute notebook.ipynb --output result.ipynb cli-anything-grafana export-dashboard my-dashboard dashboard.json cli-anything-gitea create-repo myrepo --private + +# GIS / Design +cli-anything-freecad export model.fcstd output.stl +cli-anything-qgis export map.qgz output.png ``` **Always run the task. Never print commands and ask the user to run them.** @@ -252,3 +296,6 @@ One or two sentences only: - **Never loop on a timeout** — if a command times out once, it will time out again. Stop immediately. - **Max 1 retry on the task command (Step 6) only** — not on installs. - **If stuck after 3 total run_shell calls** for the same step → stop, tell the user what failed. +- **Never use `&&` or `;` to chain commands** — always use separate run_shell calls. +- **Never use `.exe` extensions** — use the cli-anything harness which is cross-platform. +- **Never hardcode app installation paths** — use the harness, it resolves the path automatically. \ No newline at end of file From 77d4d878eedb1e89dfd5edba98555edc653a1cc2 Mon Sep 17 00:00:00 2001 From: Korivi Date: Thu, 16 Apr 2026 23:52:54 +0900 Subject: [PATCH 10/10] Major Issues are fixed - Install Issues on Mac fixed - Python compatibility & Syntax Issues fixed - CLI skills updated - Local LLM compatibility Issues fixed - Image action error fixed --- agent_core/core/embedding_interface.py | 2 + agent_core/core/impl/action/router.py | 2 + agent_core/core/impl/llm/interface.py | 6 +- agent_core/core/impl/vlm/interface.py | 52 +++--- agent_core/core/registry/action.py | 2 + agent_core/core/registry/context.py | 2 + agent_core/core/registry/database.py | 2 + agent_core/core/registry/event_stream.py | 2 + agent_core/core/registry/llm.py | 2 + agent_core/core/registry/memory.py | 2 + agent_core/core/registry/state.py | 2 + agent_core/core/registry/task_manager.py | 2 + agent_core/decorators/log_events.py | 2 + agent_core/decorators/profiler.py | 2 + app/config/skills_config.json | 2 +- app/gui/gui_module.py | 2 + app/internal_action_interface.py | 2 + app/security/prompt_sanitizer.py | 2 + app/ui_layer/local_llm_setup.py | 2 + install.py | 151 +++++++++++++++++- skills/cli-anything/SKILL.md | 130 ++++++++++----- skills/docx/scripts/comment.py | 2 + skills/docx/scripts/office/pack.py | 2 + .../nano-banana-pro/scripts/generate_image.py | 2 + skills/ontology/scripts/ontology.py | 2 + skills/pptx/scripts/office/pack.py | 2 + skills/tesla-api/scripts/tesla.py | 2 + skills/xlsx/scripts/office/pack.py | 2 + 28 files changed, 316 insertions(+), 71 deletions(-) diff --git a/agent_core/core/embedding_interface.py b/agent_core/core/embedding_interface.py index b9894cbd..17acfa99 100644 --- a/agent_core/core/embedding_interface.py +++ b/agent_core/core/embedding_interface.py @@ -12,6 +12,8 @@ - GOOGLE_API_KEY (for provider="gemini") """ +from __future__ import annotations + import os from typing import List, Optional diff --git a/agent_core/core/impl/action/router.py b/agent_core/core/impl/action/router.py index 12f1fef9..210c2458 100644 --- a/agent_core/core/impl/action/router.py +++ b/agent_core/core/impl/action/router.py @@ -6,6 +6,8 @@ based on user queries using LLM reasoning. """ +from __future__ import annotations + import json import ast from typing import Optional, List, Dict, Any, Tuple diff --git a/agent_core/core/impl/llm/interface.py b/agent_core/core/impl/llm/interface.py index 5114cfae..3f201d9e 100644 --- a/agent_core/core/impl/llm/interface.py +++ b/agent_core/core/impl/llm/interface.py @@ -1239,22 +1239,24 @@ def _generate_ollama(self, system_prompt: str | None, user_prompt: str) -> Dict[ try: payload = { "model": self.model, - "system": system_prompt, "prompt": user_prompt, "stream": False, + "format": "json", "options": { "temperature": self.temperature, } } + if system_prompt: + payload["system"] = system_prompt url: str = f"{self.remote_url.rstrip('/')}/api/generate" response = requests.post(url, json=payload, timeout=600) response.raise_for_status() result = response.json() content = result.get("response", "").strip() - total_tokens = result.get("usage", {}).get("total_tokens", 0) token_count_input = result.get("prompt_eval_count", 0) token_count_output = result.get("eval_count", 0) + total_tokens = token_count_input + token_count_output status = "success" except Exception as exc: exc_obj = exc diff --git a/agent_core/core/impl/vlm/interface.py b/agent_core/core/impl/vlm/interface.py index dce58675..927bd8e6 100644 --- a/agent_core/core/impl/vlm/interface.py +++ b/agent_core/core/impl/vlm/interface.py @@ -233,7 +233,9 @@ def describe_image_bytes( if log_response: logger.info(f"[LLM SEND] system={system_prompt} | user={user_prompt}") - if self.provider in ("openai", "minimax", "deepseek", "moonshot", "grok"): + if self.provider == "deepseek": + raise RuntimeError("DeepSeek does not support vision/VLM. Use a different provider for image description.") + elif self.provider in ("openai", "minimax", "moonshot", "grok"): response = self._openai_describe_bytes(image_bytes, system_prompt, user_prompt) elif self.provider == "remote": response = self._ollama_describe_bytes(image_bytes, system_prompt, user_prompt) @@ -288,6 +290,17 @@ async def generate_response_async( # ───────────────────── Provider Helpers ───────────────────── + @staticmethod + def _detect_mime_type(image_bytes: bytes) -> str: + """Detect image MIME type from the first few bytes of image data.""" + if image_bytes[:8] == b'\x89PNG\r\n\x1a\n': + return "image/png" + if image_bytes[:4] == b'GIF8': + return "image/gif" + if image_bytes[:4] == b'RIFF' and image_bytes[8:12] == b'WEBP': + return "image/webp" + return "image/jpeg" + def _report_usage_async( self, service_type: str, @@ -318,8 +331,9 @@ def _report_usage_async( logger.warning(f"[VLM] Failed to report usage: {e}") def _openai_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str) -> Dict[str, Any]: - """OpenAI vision request with automatic prompt caching metrics.""" + """OpenAI/Grok vision request with automatic prompt caching metrics.""" img_b64 = base64.b64encode(image_bytes).decode() + mime_type = self._detect_mime_type(image_bytes) messages: list[Dict[str, Any]] = [] if sys: messages.append({"role": "system", "content": sys}) @@ -328,7 +342,7 @@ def _openai_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str) "role": "user", "content": [ {"type": "text", "text": usr}, - {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}}, + {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{img_b64}"}}, ], } ) @@ -337,7 +351,6 @@ def _openai_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str) messages=messages, temperature=self.temperature, max_tokens=2048, - response_format={"type": "json_object"}, ) content = response.choices[0].message.content.strip() token_count_input = response.usage.prompt_tokens @@ -359,9 +372,9 @@ def _openai_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str) elif sys and len(sys) >= config.min_cache_tokens: metrics.record_miss("openai", "automatic_vlm", total_tokens=token_count_input) - # Report usage via hook + # Report usage via hook (use actual provider name, e.g. "grok", "minimax") self._report_usage_async( - "vlm_openai", "openai", self.model, + f"vlm_{self.provider}", self.provider, self.model, token_count_input, token_count_output, cached_tokens ) @@ -377,16 +390,20 @@ def _ollama_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str) payload = { "model": self.model, "prompt": usr, - "system": sys, "images": [img_b64], "stream": False, - "temperature": self.temperature, + "options": {"temperature": self.temperature}, } + if sys: + payload["system"] = sys url: str = f"{self.remote_url.rstrip('/')}/api/generate" r = requests.post(url, json=payload, timeout=600) r.raise_for_status() - content = r.json().get("response", "").strip() - total_tokens = r.json().get("usage", {}).get("total_tokens", 0) + result = r.json() + content = result.get("response", "").strip() + token_count_input = result.get("prompt_eval_count", 0) + token_count_output = result.get("eval_count", 0) + total_tokens = token_count_input + token_count_output return { "tokens_used": total_tokens or 0, @@ -404,7 +421,7 @@ def _gemini_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str) image_bytes=image_bytes, system_prompt=sys, temperature=self.temperature, - json_mode=True, + json_mode=False, ) # Record cache metrics @@ -431,6 +448,7 @@ def _gemini_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str) def _byteplus_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str) -> Dict[str, Any]: """BytePlus vision request.""" img_b64 = base64.b64encode(image_bytes).decode() + mime_type = self._detect_mime_type(image_bytes) messages: list[Dict[str, Any]] = [] if sys: messages.append({"role": "system", "content": sys}) @@ -440,7 +458,7 @@ def _byteplus_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str "role": "user", "content": [ {"type": "text", "text": usr}, - {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}}, + {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{img_b64}"}}, ], } ) @@ -451,7 +469,6 @@ def _byteplus_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str "messages": messages, "temperature": self.temperature, "max_tokens": 2048, - "response_format": {"type": "json_object"}, } headers = { "Content-Type": "application/json", @@ -486,14 +503,7 @@ def _anthropic_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: st img_b64 = base64.b64encode(image_bytes).decode() config = get_cache_config() - # Detect media type from image bytes - media_type = "image/jpeg" - if image_bytes[:8] == b'\x89PNG\r\n\x1a\n': - media_type = "image/png" - elif image_bytes[:4] == b'GIF8': - media_type = "image/gif" - elif image_bytes[:4] == b'RIFF' and image_bytes[8:12] == b'WEBP': - media_type = "image/webp" + media_type = self._detect_mime_type(image_bytes) message_content = [ { diff --git a/agent_core/core/registry/action.py b/agent_core/core/registry/action.py index 956c9dba..46478333 100644 --- a/agent_core/core/registry/action.py +++ b/agent_core/core/registry/action.py @@ -19,6 +19,8 @@ result = await executor.execute_action(action, input_data) """ +from __future__ import annotations + from typing import TYPE_CHECKING from agent_core.core.registry.base import ComponentRegistry diff --git a/agent_core/core/registry/context.py b/agent_core/core/registry/context.py index fe3aef47..4ba203d5 100644 --- a/agent_core/core/registry/context.py +++ b/agent_core/core/registry/context.py @@ -16,6 +16,8 @@ system_prompt, user_prompt = engine.make_prompt(query="...") """ +from __future__ import annotations + from typing import TYPE_CHECKING from agent_core.core.registry.base import ComponentRegistry diff --git a/agent_core/core/registry/database.py b/agent_core/core/registry/database.py index ab04e20d..cb5a3827 100644 --- a/agent_core/core/registry/database.py +++ b/agent_core/core/registry/database.py @@ -18,6 +18,8 @@ db.list_actions() """ +from __future__ import annotations + from typing import TYPE_CHECKING from agent_core.core.registry.base import ComponentRegistry diff --git a/agent_core/core/registry/event_stream.py b/agent_core/core/registry/event_stream.py index 041ff55d..fec9e3e3 100644 --- a/agent_core/core/registry/event_stream.py +++ b/agent_core/core/registry/event_stream.py @@ -16,6 +16,8 @@ manager.log("INFO", "Something happened") """ +from __future__ import annotations + from typing import TYPE_CHECKING from agent_core.core.registry.base import ComponentRegistry diff --git a/agent_core/core/registry/llm.py b/agent_core/core/registry/llm.py index 4e82fb67..be8d40ab 100644 --- a/agent_core/core/registry/llm.py +++ b/agent_core/core/registry/llm.py @@ -18,6 +18,8 @@ response = await llm.generate_response_async(prompt) """ +from __future__ import annotations + from typing import TYPE_CHECKING from agent_core.core.registry.base import ComponentRegistry diff --git a/agent_core/core/registry/memory.py b/agent_core/core/registry/memory.py index c1586d69..cf774336 100644 --- a/agent_core/core/registry/memory.py +++ b/agent_core/core/registry/memory.py @@ -21,6 +21,8 @@ pointers = memory.retrieve("user preferences") """ +from __future__ import annotations + from typing import TYPE_CHECKING from agent_core.core.registry.base import ComponentRegistry diff --git a/agent_core/core/registry/state.py b/agent_core/core/registry/state.py index 3b869851..45571b50 100644 --- a/agent_core/core/registry/state.py +++ b/agent_core/core/registry/state.py @@ -19,6 +19,8 @@ await manager.start_session() """ +from __future__ import annotations + from typing import TYPE_CHECKING from agent_core.core.registry.base import ComponentRegistry diff --git a/agent_core/core/registry/task_manager.py b/agent_core/core/registry/task_manager.py index ce87f4e8..da57db77 100644 --- a/agent_core/core/registry/task_manager.py +++ b/agent_core/core/registry/task_manager.py @@ -16,6 +16,8 @@ task_id = manager.create_task("My Task", "Do something") """ +from __future__ import annotations + from typing import TYPE_CHECKING from agent_core.core.registry.base import ComponentRegistry diff --git a/agent_core/decorators/log_events.py b/agent_core/decorators/log_events.py index ab9a7cfe..41a84547 100644 --- a/agent_core/decorators/log_events.py +++ b/agent_core/decorators/log_events.py @@ -8,6 +8,8 @@ {id}, {name}, {args}, {kwargs}, {result}, {exception}, {duration_ms} """ +from __future__ import annotations + import logging import time import uuid diff --git a/agent_core/decorators/profiler.py b/agent_core/decorators/profiler.py index 38e5e77c..ca35a343 100644 --- a/agent_core/decorators/profiler.py +++ b/agent_core/decorators/profiler.py @@ -28,6 +28,8 @@ Set "auto_save_interval" to N to save after every N loops (0 = only at exit). """ +from __future__ import annotations + import atexit import asyncio import functools diff --git a/app/config/skills_config.json b/app/config/skills_config.json index 0975a5d4..8fde8d49 100644 --- a/app/config/skills_config.json +++ b/app/config/skills_config.json @@ -1,7 +1,6 @@ { "auto_load": true, "enabled_skills": [ - "cli-anything", "docx", "pdf", "playwright-mcp", @@ -9,6 +8,7 @@ "xlsx" ], "disabled_skills": [ + "cli-anything", "agentmail", "ai-news-collector", "ai-ppt-generator", diff --git a/app/gui/gui_module.py b/app/gui/gui_module.py index 6d53c583..124bb967 100644 --- a/app/gui/gui_module.py +++ b/app/gui/gui_module.py @@ -1,3 +1,5 @@ + +from __future__ import annotations import json import ast import tempfile diff --git a/app/internal_action_interface.py b/app/internal_action_interface.py index a1486f1b..9fffeadb 100644 --- a/app/internal_action_interface.py +++ b/app/internal_action_interface.py @@ -5,6 +5,8 @@ framework internal functions. """ +from __future__ import annotations + from typing import Dict, Any, Optional, List, TYPE_CHECKING from app.llm import LLMInterface, LLMCallType from app.vlm_interface import VLMInterface diff --git a/app/security/prompt_sanitizer.py b/app/security/prompt_sanitizer.py index 3dba8ced..71ae1ce0 100644 --- a/app/security/prompt_sanitizer.py +++ b/app/security/prompt_sanitizer.py @@ -9,6 +9,8 @@ - Format manipulation attacks """ +from __future__ import annotations + import re from typing import Any diff --git a/app/ui_layer/local_llm_setup.py b/app/ui_layer/local_llm_setup.py index 67437eab..e998c510 100644 --- a/app/ui_layer/local_llm_setup.py +++ b/app/ui_layer/local_llm_setup.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- """Local LLM setup utilities for Ollama.""" +from __future__ import annotations + import asyncio import json import logging diff --git a/install.py b/install.py index bfbfc982..2c346460 100644 --- a/install.py +++ b/install.py @@ -553,19 +553,43 @@ def verify_conda_env(env_name: str) -> bool: def install_nodejs_linux(): """ - Automatically install Node.js on Linux systems (including Kali). - Detects the package manager (apt, pacman, yum) and installs accordingly. + Automatically install Node.js on Linux/macOS systems (including Kali). + Detects the package manager (brew, apt, pacman, yum) and installs accordingly. """ if sys.platform == "win32": return True # Windows users should install Node.js manually from nodejs.org - + # Check if node is already installed if shutil.which("node") and shutil.which("npm"): print("✓ Node.js and npm are already installed") return True - + print("\n🔧 Installing Node.js...") - + + # macOS: try Homebrew first, then nvm + if sys.platform == "darwin": + if shutil.which("brew"): + print(" Found Homebrew, installing Node.js...") + try: + result = run_command(["brew", "install", "node"], check=False, capture=True, quiet=True, show_error=False) + if result and hasattr(result, 'returncode') and result.returncode == 0: + print("✓ Node.js installed via Homebrew") + time.sleep(1) + if shutil.which("node") and shutil.which("npm"): + return True + print("⚠ Node.js installed but not yet in PATH. Restart your terminal.") + return False + except Exception as e: + print(f" ⚠ brew install node failed: {str(e)[:100]}") + print("\n⚠ Could not automatically install Node.js on macOS") + print("\nOptions:") + print(" 1. Install Homebrew (https://brew.sh), then run: brew install node") + print(" 2. Download Node.js from: https://nodejs.org/ (LTS version)") + print(" 3. Use nvm: curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | bash") + print(" then: nvm install --lts") + print("\n After installation, restart your terminal and run: python3 install.py") + return False + # Detect package manager and prepare install commands # Format: (package_manager, update_cmd, install_cmd) package_managers = [ @@ -1148,10 +1172,127 @@ def show_api_setup_instructions(): print("="*50 + "\n") +# ========================================== +# LINUX PYTHON COMPATIBILITY CHECK +# ========================================== +def _check_linux_python() -> None: + """ + Warn Linux users who are running an old or system-managed Python. + + Common problem scenarios: + - Python < 3.9 (Ubuntu 20.04 default is 3.8) + - System Python used directly without a venv, which triggers PEP 668 + "externally-managed-environment" errors on newer distros + """ + ver = sys.version_info + + # Already gated to >= 3.9 above, but warn hard about 3.9 since + # it's the bare minimum — 3.11+ is much more reliable. + if ver < (3, 10): + print("\n" + "=" * 62) + print(f" ⚠ Python {ver.major}.{ver.minor} detected — upgrade recommended") + print("=" * 62) + print(f"\n You are running Python {ver.major}.{ver.minor}.{ver.micro}.") + print(" CraftBot works on 3.9+ but runs best on Python 3.11 or newer.") + print("\n To install Python 3.11 on Ubuntu/Debian/Kali:") + print(" sudo apt update") + print(" sudo apt install -y software-properties-common") + print(" sudo add-apt-repository ppa:deadsnakes/ppa") + print(" sudo apt install -y python3.11 python3.11-venv python3.11-pip") + print(" python3.11 install.py") + print() + print(" Or use pyenv (works on any distro):") + print(" curl https://pyenv.run | bash") + print(" pyenv install 3.11.9") + print(" pyenv local 3.11.9") + print(" python install.py") + print("=" * 62) + choice = input("\n Continue with Python 3.9 anyway? (y/n): ").strip().lower() + if choice != "y": + print("\n Installation cancelled. Please upgrade Python and try again.\n") + sys.exit(1) + print() + + +# ========================================== +# MAC PYTHON COMPATIBILITY CHECK +# ========================================== +def _check_mac_python() -> None: + """ + Warn Mac users who are running a problematic Python interpreter. + + Common bad interpreters on macOS: + - Xcode bundled Python (/Applications/Xcode.app/...) + - macOS system Python (/usr/bin/python3) + + Both are difficult to install packages into and are intended as OS + tooling, not for running user applications. Homebrew or python.org + Python is recommended instead. + """ + exe = sys.executable or "" + is_xcode = "Xcode.app" in exe or "Python3.framework" in exe + is_system = exe.startswith("/usr/bin/python") + + if not (is_xcode or is_system): + return # Running a proper Python — nothing to warn about + + ver = sys.version_info + label = "Xcode's built-in Python" if is_xcode else "macOS system Python" + + print("\n" + "=" * 62) + print(" ⚠ WARNING: Wrong Python interpreter detected") + print("=" * 62) + print(f"\n You are using {label}:") + print(f" {exe}") + print(f"\n This Python ({ver.major}.{ver.minor}.{ver.micro}) is reserved for macOS") + print(" system tools. Installing packages into it can be unreliable") + print(" and may break system components.") + print("\n Recommended fix — install Python via Homebrew:") + print() + print(" # 1. Install Homebrew (if not already installed):") + print(' /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"') + print() + print(" # 2. Install Python 3.11 (or newer):") + print(" brew install python@3.11") + print() + print(" # 3. Re-run the installer with Homebrew Python:") + print(" /opt/homebrew/bin/python3.11 install.py # Apple Silicon") + print(" /usr/local/bin/python3.11 install.py # Intel Mac") + print() + print(" Alternative: download Python from https://www.python.org/downloads/") + print("=" * 62) + + choice = input("\n Continue with the current interpreter anyway? (y/n): ").strip().lower() + if choice != "y": + print("\n Installation cancelled. Please use a Homebrew or python.org Python.\n") + sys.exit(1) + print() + + # ========================================== # MAIN # ========================================== if __name__ == "__main__": + # ── Python version gate ──────────────────────────────────────────────── + _ver = sys.version_info + if _ver < (3, 9): + print(f"\n❌ Python {_ver.major}.{_ver.minor} is not supported.") + print(" CraftBot requires Python 3.9 or newer.") + if sys.platform == "darwin": + print("\n Recommended fix:") + print(" 1. Install Homebrew: https://brew.sh") + print(" 2. Run: brew install python@3.11") + print(" 3. Re-run: /opt/homebrew/bin/python3.11 install.py") + else: + print("\n Please install Python 3.9+ from https://www.python.org/downloads/") + sys.exit(1) + + # ── platform-specific interpreter checks ────────────────────────────── + if sys.platform == "darwin": + _check_mac_python() + elif sys.platform == "linux": + _check_linux_python() + args = set(sys.argv[1:]) # Parse flags diff --git a/skills/cli-anything/SKILL.md b/skills/cli-anything/SKILL.md index 58309cc0..5dbff223 100644 --- a/skills/cli-anything/SKILL.md +++ b/skills/cli-anything/SKILL.md @@ -1,6 +1,6 @@ --- name: cli-anything -description: "Use any supported GUI application (GIMP, Blender, LibreOffice, Audacity, OBS, etc.) on behalf of the user. Auto-installs the app and CLI harness, then executes the task directly." +description: "Automatically handles image editing, document conversion, audio/video editing, 3D rendering, diagrams, AI image generation, and more — using GIMP, LibreOffice, Blender, Audacity, Inkscape, Krita, Kdenlive, Shotcut, OBS, Draw.io, Mermaid, Ollama, Stable Diffusion, ComfyUI, JupyterLab, FreeCAD, QGIS, Grafana, Gitea, GitLab, NextCloud, Jenkins, AdGuard Home, Zoom, Mubu. User does NOT need to mention CLI-Anything — agent auto-selects the right app for the task. Auto-installs if not present." action-sets: ["shell", "file_operations"] --- @@ -8,6 +8,56 @@ action-sets: ["shell", "file_operations"] **Core rule: Do everything yourself. Never give the user a command to run. Never explain steps. Just execute the task and report the result.** +**Activation rule: The user does NOT need to say "CLI-Anything". If their task matches a supported app below, use it automatically — no prompting needed.** + +--- + +## Task Routing — Auto-select the right app (check this before every task) + +| If the user asks about... | Use this app | Command prefix | +|---|---|---| +| Resize / crop / filter / edit an image | **GIMP** | `cli-anything-gimp` | +| Convert image format (JPG→PNG, PNG→WEBP, etc.) | **GIMP** | `cli-anything-gimp` | +| SVG, vector graphics, logos | **Inkscape** | `cli-anything-inkscape` | +| Digital painting, .kra files | **Krita** | `cli-anything-krita` | +| Convert DOCX / XLSX / PPTX → PDF | **LibreOffice** | `cli-anything-libreoffice` | +| Writer / Calc / Impress / spreadsheet macros | **LibreOffice** | `cli-anything-libreoffice` | +| Trim / convert / export audio (MP3, WAV, FLAC) | **Audacity** | `cli-anything-audacity` | +| Render / edit video | **Kdenlive** or **Shotcut** | `cli-anything-kdenlive` | +| Record screen or live stream | **OBS Studio** | `cli-anything-obs` | +| 3D modeling / rendering / .blend files | **Blender** | `cli-anything-blender` | +| Create or export diagrams (.drawio) | **Draw.io** | `cli-anything-draw-io` | +| Render Mermaid diagram code | **Mermaid** | `cli-anything-mermaid` | +| Generate image from text prompt (AI) | **Stable Diffusion** or **ComfyUI** | `cli-anything-stable-diffusion` | +| Run a local LLM | **Ollama** | `cli-anything-ollama` | +| AI content generation | **AnyGen** | `cli-anything-anygen` | +| AI research / summarize PDF | **NotebookLM** | `cli-anything-notebooklm` | +| Execute a Jupyter notebook | **JupyterLab** | `cli-anything-jupyterlab` | +| CAD / 3D design, .fcstd files | **FreeCAD** | `cli-anything-freecad` | +| GIS / maps, .qgz files | **QGIS** | `cli-anything-qgis` | +| Monitoring dashboards | **Grafana** | `cli-anything-grafana` | +| Git hosting, create repos | **Gitea** or **GitLab** | `cli-anything-gitea` | +| CI/CD pipelines | **Jenkins** | `cli-anything-jenkins` | +| Cloud file sync | **NextCloud** | `cli-anything-nextcloud` | +| Network-wide ad blocking | **AdGuard Home** | `cli-anything-adguard-home` | +| Video conferencing | **Zoom** | `cli-anything-zoom` | +| Knowledge outlines | **Mubu** | `cli-anything-mubu` | + +--- + +## Smart Fallback — When CLI-Anything fails + +CLI-Anything is the first choice, but if it fails the agent must still complete the task: + +1. **Try CLI-Anything first** — always attempt the harness (`cli-anything-`) +2. **If harness fails after 1 retry** — fall back to Python (PIL, python-docx, pydub, moviepy, etc.) and complete the task anyway +3. **Always tell the user** what was actually used and suggest installing the app for better results + +Example: +> "Done — resized using Python PIL as a fallback (GIMP harness failed). Install GIMP for higher quality results next time." + +Never leave the user with no result. Always complete the task one way or another. + --- ## FORBIDDEN — Never Do These (causes bugs on all platforms) @@ -47,67 +97,63 @@ If the user's message matches any of these (case-insensitive, any wording): **CLI-Anything — What I Can Do** -Just tell me what you want done in plain English. I'll auto-install the app if it's not on your system and complete the task for you — you never need to run any commands yourself. +Just describe your task in plain English — you don't need to mention CLI-Anything. I'll pick the right app, install it if needed, and complete the task. Works on Windows, macOS, and Linux. **Creative & Media** -| App | What I can do | Example prompt | +| App | What I do | Example | |---|---|---| -| GIMP | Resize, crop, blur, convert, export images | "Resize photo.jpg to 1920×1080 and save as photo_hd.jpg" | -| Blender | Render 3D scenes, run scripts, export models | "Render scene.blend to PNG frames in the frames/ folder" | -| Inkscape | Export SVG to PNG/PDF, convert vector files | "Export logo.svg as a 300 DPI PNG" | -| Krita | Export paintings, batch convert images | "Export painting.kra as PNG" | -| Audacity | Trim, export, convert audio files | "Trim the first 30 seconds from audio.mp3 and save as clip.mp3" | -| OBS Studio | Record screen, stream | "Record my screen for 60 seconds" | -| Kdenlive | Render video projects to MP4/MKV | "Render project.kdenlive to MP4" | -| Shotcut | Render video projects to MP4 | "Render project.mlt to MP4" | +| GIMP _(image editing)_ | Resize, crop, filter, convert, export images | "Resize photo.jpg to 1920×1080" | +| Blender _(3D modeling & rendering)_ | Render scenes, export models, run scripts | "Render scene.blend to PNG frames" | +| Inkscape _(vector graphics)_ | Export SVG to PNG/PDF, convert vectors | "Export logo.svg as 300 DPI PNG" | +| Audacity _(audio production)_ | Trim, convert, export audio | "Trim first 30s from audio.mp3" | +| OBS Studio _(live streaming & recording)_ | Record screen, capture video, stream | "Record my screen for 60 seconds" | +| Kdenlive _(video editing)_ | Render video projects to MP4/MKV | "Render project.kdenlive to MP4" | +| Shotcut _(video editing)_ | Render video projects to MP4 | "Render project.mlt to MP4" | +| Krita _(digital painting)_ | Export paintings, batch convert .kra files | "Export painting.kra as PNG" | **Office & Productivity** -| App | What I can do | Example prompt | +| App | What I do | Example | |---|---|---| -| LibreOffice | Convert DOCX/XLSX/PPTX to PDF, run macros | "Convert report.docx to PDF" | -| Mubu | Manage knowledge outlines | "Open my outline in Mubu" | +| LibreOffice _(Writer, Calc, Impress)_ | Convert DOCX/XLSX/PPTX to PDF, run macros | "Convert report.docx to PDF" | +| Mubu _(knowledge management & outlining)_ | Manage outlines and knowledge bases | "Open my outline in Mubu" | **Communication** -| App | What I can do | Example prompt | +| App | What I do | Example | |---|---|---| -| Zoom | Start/join meetings | "Start a Zoom meeting" | +| Zoom _(video conferencing)_ | Start or join meetings | "Start a Zoom meeting" | **Diagramming** -| App | What I can do | Example prompt | +| App | What I do | Example | |---|---|---| -| Draw.io | Export diagrams to PNG/SVG/PDF | "Export diagram.drawio as PNG" | -| Mermaid | Render diagram code to PNG | "Render this diagram to PNG: graph TD; A-->B; B-->C" | +| Draw.io _(diagrams)_ | Export diagrams to PNG/SVG/PDF | "Export diagram.drawio as PNG" | +| Mermaid Live Editor _(diagrams)_ | Render diagram code to image | "Render: graph TD; A-->B; B-->C" | **AI & ML** -| App | What I can do | Example prompt | -|---|---|---| -| ComfyUI | Run AI image generation workflows | "Run workflow.json and save images to output/" | -| AnyGen | Generate AI content | "Generate content using AnyGen" | -| NotebookLM | AI research and summarization | "Summarize this PDF using NotebookLM" | -| Ollama | Run local LLM inference | "Run llama3 and summarize this text: ..." | -| Stable Diffusion | Generate images from text prompts | "Generate 'a sunset over mountains' and save as out.png" | - -**Dev & Infrastructure** -| App | What I can do | Example prompt | +| App | What I do | Example | |---|---|---| -| JupyterLab | Execute notebooks, save output | "Execute notebook.ipynb and save the output" | -| Grafana | Export dashboards | "Export my dashboard as JSON" | -| Gitea | Create repos, manage git hosting | "Create a private repo called myrepo on Gitea" | -| GitLab | Create projects, manage CI/CD | "Create a new project on GitLab" | -| NextCloud | Sync files, manage cloud storage | "Sync my files to NextCloud" | -| Jenkins | Trigger build pipelines | "Trigger my build pipeline" | -| AdGuard Home | Set up network-wide ad blocking | "Set up network-wide ad blocking with AdGuard Home" | - -**GIS & 3D Design** -| App | What I can do | Example prompt | +| ComfyUI _(AI image generation)_ | Run AI image workflows | "Run workflow.json, save to output/" | +| AnyGen _(AI content generation)_ | Generate AI content | "Generate content using AnyGen" | +| NotebookLM _(AI research assistant)_ | Research, summarize documents | "Summarize this PDF in NotebookLM" | +| Ollama _(local LLM inference)_ | Run local AI models | "Run llama3: summarize this text" | +| Stable Diffusion WebUI | Generate images from text prompts | "Generate 'sunset over mountains'" | + +**Network & Infrastructure** +| App | What I do | Example | |---|---|---| +| AdGuard Home _(network-wide ad blocking)_ | Set up DNS-level ad blocking | "Set up AdGuard Home ad blocking" | +| JupyterLab | Execute notebooks, save output | "Run notebook.ipynb and save output" | +| Jenkins | Trigger CI/CD pipelines | "Trigger my build pipeline" | +| Gitea | Git hosting, create/manage repos | "Create private repo called myrepo" | +| NextCloud | Cloud file sync | "Sync my folder to NextCloud" | +| GitLab | Projects, CI/CD pipelines | "Create a new GitLab project" | +| Grafana | Export monitoring dashboards | "Export my dashboard as JSON" | | FreeCAD | Export 3D models to STL/STEP | "Export model.fcstd as STL" | | QGIS | Export maps to PNG/PDF | "Export map.qgz as PNG" | **Tips:** -- Always give me the full file path (e.g. `C:\Users\you\Desktop\photo.jpg`) -- If the app isn't installed, I'll install it automatically — just wait a few minutes -- I never ask you to run commands yourself — I do everything for you +- Give me the full file path (e.g. `C:\Users\you\Desktop\photo.jpg` or `/home/user/photo.jpg`) +- If the app isn't installed, I install it automatically — no action needed from you +- If the app fails, I fall back to a Python alternative and tell you - Works on Windows, macOS, and Linux --- diff --git a/skills/docx/scripts/comment.py b/skills/docx/scripts/comment.py index 36e1c935..35600710 100644 --- a/skills/docx/scripts/comment.py +++ b/skills/docx/scripts/comment.py @@ -13,6 +13,8 @@ """ +from __future__ import annotations + import argparse import random import shutil diff --git a/skills/docx/scripts/office/pack.py b/skills/docx/scripts/office/pack.py index db29ed8b..55b53343 100644 --- a/skills/docx/scripts/office/pack.py +++ b/skills/docx/scripts/office/pack.py @@ -10,6 +10,8 @@ python pack.py unpacked/ output.pptx --validate false """ +from __future__ import annotations + import argparse import sys import shutil diff --git a/skills/nano-banana-pro/scripts/generate_image.py b/skills/nano-banana-pro/scripts/generate_image.py index 0ceed2c2..0672c22e 100644 --- a/skills/nano-banana-pro/scripts/generate_image.py +++ b/skills/nano-banana-pro/scripts/generate_image.py @@ -1,3 +1,5 @@ + +from __future__ import annotations #!/usr/bin/env python3 # /// script # requires-python = ">=3.10" diff --git a/skills/ontology/scripts/ontology.py b/skills/ontology/scripts/ontology.py index 040b4354..2c8f8e07 100644 --- a/skills/ontology/scripts/ontology.py +++ b/skills/ontology/scripts/ontology.py @@ -1,3 +1,5 @@ + +from __future__ import annotations #!/usr/bin/env python3 """ Ontology graph operations: create, query, relate, validate. diff --git a/skills/pptx/scripts/office/pack.py b/skills/pptx/scripts/office/pack.py index db29ed8b..55b53343 100644 --- a/skills/pptx/scripts/office/pack.py +++ b/skills/pptx/scripts/office/pack.py @@ -10,6 +10,8 @@ python pack.py unpacked/ output.pptx --validate false """ +from __future__ import annotations + import argparse import sys import shutil diff --git a/skills/tesla-api/scripts/tesla.py b/skills/tesla-api/scripts/tesla.py index 3577107b..b5c10fd5 100644 --- a/skills/tesla-api/scripts/tesla.py +++ b/skills/tesla-api/scripts/tesla.py @@ -1,3 +1,5 @@ + +from __future__ import annotations #!/usr/bin/env python3 # /// script # requires-python = ">=3.10" diff --git a/skills/xlsx/scripts/office/pack.py b/skills/xlsx/scripts/office/pack.py index db29ed8b..55b53343 100644 --- a/skills/xlsx/scripts/office/pack.py +++ b/skills/xlsx/scripts/office/pack.py @@ -10,6 +10,8 @@ python pack.py unpacked/ output.pptx --validate false """ +from __future__ import annotations + import argparse import sys import shutil