Minor improvements in ollama code, ensure dir busting process is cleaned up, run_unix_command instructions.

double16 · double16 · commit 4fef393a7ad6 · 2025-09-19T12:32:27.000-05:00
diff --git a/requirements.txt b/requirements.txt
@@ -3,13 +3,13 @@ chroma-haystack~=3.3.0
 numpy<2
 ollama-haystack~=5.1.0
 google-genai-haystack~=2.1.1
-mcp-haystack~=0.5.0
+mcp-haystack~=0.6.0
 chardet~=5.2.0
 sentence-transformers~=5.1.0
 prompt_toolkit~=3.0.51
-mcp[cli]~=1.13.1
+mcp[cli]~=1.14.1
 httpx~=0.28.1
-uv~=0.8.6
+uv~=0.8.18
 tldextract~=5.3.0
 validators~=0.35.0
 more-itertools~=10.8.0
@@ -26,4 +26,4 @@ html5lib~=1.1
 optimum~=1.27.0
 ddgs~=9.5.2
 pycryptodome~=3.23.0
-psutil~=7.0.0
+psutil~=7.1.0
diff --git a/shyhurricane/generator_config.py b/shyhurricane/generator_config.py
@@ -3,6 +3,7 @@
 import os
 from typing import Optional, Dict, Any, Union, List
 
+import requests
 from google.genai import Client
 from google.genai.types import HttpOptions, HttpRetryOptions
 from haystack.components.generators import OpenAIGenerator
@@ -36,6 +37,7 @@ def add_generator_args(ap: argparse.ArgumentParser):
 
 
 TEMPERATURE_DEFAULT: float = 0.2
+OLLAMA_HOST_DEFAULT = "localhost:11434"
 
 
 class GoogleGenAIChatGeneratorWithRetry(GoogleGenAIChatGenerator):
@@ -173,6 +175,18 @@ def _thinking_convert_ollama_response_to_chatmessage(ollama_response: ChatRespon
 ollama_cg._convert_ollama_response_to_chatmessage = _thinking_convert_ollama_response_to_chatmessage
 
 
+def ollama_model_supports_thinking(ollama_host: str, ollama_model: str) -> bool:
+    r = requests.post(f"http://{ollama_host}/api/chat", json={
+        "model": ollama_model,
+        "messages": [{"role": "user", "content": "ping"}],
+        "think": True,
+        "stream": False
+    })
+    data = r.json()
+    supports_thinking = bool(data.get("message", {}).get("thinking"))
+    return supports_thinking
+
+
 class GeneratorConfig(BaseModel):
     ollama_host: Optional[str] = Field(description="The location of the Ollama server", default=None)
     ollama_model: Optional[str] = Field(description="The name of the Ollama model", default=None)
@@ -203,6 +217,7 @@ def from_env():
         return generator_config
 
     def apply_reasoning_default(self):
+        self.ollama_host = self.ollama_host or OLLAMA_HOST_DEFAULT
         if self.ollama_model or self.gemini_model or self.openai_model:
             return self
         if os.environ.get("GEMINI_API_KEY", None) or os.environ.get("GOOGLE_API_KEY", None):
@@ -214,6 +229,7 @@ def apply_reasoning_default(self):
         return self
 
     def apply_summarizing_default(self):
+        self.ollama_host = self.ollama_host or OLLAMA_HOST_DEFAULT
         if self.ollama_model or self.gemini_model or self.openai_model:
             return self
         self.ollama_model = "llama3.2:3b"
@@ -235,10 +251,7 @@ def describe(self) -> str:
         elif self.gemini_model:
             return f"Gemini {self.gemini_model}"
         else:
-            if self.ollama_host:
-                return f"Ollama {self.ollama_model} at {self.ollama_host}"
-            else:
-                return f"Ollama {self.ollama_model}"
+            return f"Ollama {self.ollama_model} at {self.ollama_host}"
 
     def create_chat_generator(self,
                               temperature: Optional[float] = None,
@@ -272,29 +285,20 @@ def create_chat_generator(self,
                 "temperature": temperature or self.temperature,
             }
             ollama_timeout = int(os.environ.get("OLLAMA_TIMEOUT", "300"))
-            if self.ollama_model.startswith("gpt-oss"):
+            ollama_think = ollama_model_supports_thinking(self.ollama_host, self.ollama_model)
+            if ollama_think:
                 # OllamaChatGenerator docs say the think parameter can be a bool or "low", "medium", "high", but the client only supports bool
                 # https://huggingface.co/docs/inference-providers/guides/gpt-oss
                 _generation_kwargs["effort"] = "high"
-            if self.ollama_host:
-                logger.info("Using Ollama chat with model %s at %s", self.ollama_model, self.ollama_host)
-                return OllamaChatGenerator(
-                    url="http://" + self.ollama_host,
-                    model=self.ollama_model,
-                    timeout=ollama_timeout,
-                    generation_kwargs=_generation_kwargs | (generation_kwargs or {}),
-                    tools=tools,
-                    think=True,
-                )
-            else:
-                logger.info("Using Ollama chat with model %s", self.ollama_model)
-                return OllamaChatGenerator(
-                    model=self.ollama_model,
-                    timeout=ollama_timeout,
-                    generation_kwargs=_generation_kwargs | (generation_kwargs or {}),
-                    tools=tools,
-                    think=True,
-                )
+            logger.info("Using Ollama chat with model %s at %s", self.ollama_model, self.ollama_host)
+            return OllamaChatGenerator(
+                url="http://" + (self.ollama_host or OLLAMA_HOST_DEFAULT),
+                model=self.ollama_model,
+                timeout=ollama_timeout,
+                generation_kwargs=_generation_kwargs | (generation_kwargs or {}),
+                tools=tools,
+                think=ollama_think,
+            )
         else:
             raise NotImplementedError
 
@@ -326,19 +330,12 @@ def create_generator(self,
             _generation_kwargs = {
                 "temperature": temperature or self.temperature,
             }
-            if self.ollama_host:
-                logger.info("Using Ollama generator with model %s at %s", self.ollama_model, self.ollama_host)
-                return OllamaGenerator(
-                    url="http://" + self.ollama_host,
-                    model=self.ollama_model,
-                    generation_kwargs=_generation_kwargs | (generation_kwargs or {}),
-                )
-            else:
-                logger.info("Using Ollama generator with model %s", self.ollama_model)
-                return OllamaGenerator(
-                    model=self.ollama_model,
-                    generation_kwargs=_generation_kwargs | (generation_kwargs or {}),
-                )
+            logger.info("Using Ollama generator with model %s at %s", self.ollama_model, self.ollama_host)
+            return OllamaGenerator(
+                url="http://" + (self.ollama_host or OLLAMA_HOST_DEFAULT),
+                model=self.ollama_model,
+                generation_kwargs=_generation_kwargs | (generation_kwargs or {}),
+            )
         else:
             raise NotImplementedError
 
diff --git a/shyhurricane/mcp_server/tools/run_unix_command.py b/shyhurricane/mcp_server/tools/run_unix_command.py
@@ -74,7 +74,7 @@ async def run_unix_command(
 - Always set a timeout for potentially blocking commands (e.g., timeout 10s nmap ...). Use a timeout value appropriate for the command. For example, directory busting with a large word list may take 10 minutes, whereas a short wordlist may be 2 minutes.
 - Ensure commands can be complete without user interaction before execution.
 - The directly accessible filesystem is part of the containerized environment, not the target. Commands such as find, cat, etc. are not enumerating the target unless they are part of a command that connects to the target, such as ssh.
-- Files in the current working directory will persist across calls. Do not write to /tmp or /var/tmp. Do not save output to files outside of the current working directory.
+- Files in the current working directory will persist across calls. Prefer writing files to the current working directory.
 """
     await log_tool_history(ctx, title="run_unix_command", command=command, additional_hosts=additional_hosts, env=env)
     server_ctx = await get_server_context()
diff --git a/shyhurricane/task_queue/dir_busting_worker.py b/shyhurricane/task_queue/dir_busting_worker.py
@@ -168,9 +168,10 @@ def process_stdout(data: str):
             # logger.error("Dir busting errors %s", buster_proc.stderr.read())
         return None
     finally:
-        mitmdump_proc.terminate()
         if result_queue:
             result_queue.put_nowait(None)
+        subprocess.Popen(["docker", "rm", "-f", container_name], stdout=subprocess.DEVNULL,
+                         stderr=subprocess.DEVNULL)
 
 
 def _build_feroxbuster_command(