Skip to content

Commit f4ca96f

Browse files
committed
Pull Ollama models. Try to make fastembed more efficient.
1 parent 53e2c61 commit f4ca96f

File tree

3 files changed

+46
-15
lines changed

3 files changed

+46
-15
lines changed

requirements.txt

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
1-
haystack-ai~=2.21.0
2-
qdrant-haystack~=9.4.0
3-
fastembed-haystack~=1.5.0
1+
haystack-ai~=2.22.0
2+
qdrant-haystack~=10.0.0
3+
fastembed-haystack~=2.0.0
44
numpy<2
5-
ollama-haystack~=5.3.0
6-
amazon-bedrock-haystack~=5.3.1
7-
openai~=2.14.0
8-
google-genai-haystack~=2.3.0
5+
ollama-haystack~=6.0.0
6+
amazon-bedrock-haystack~=6.2.0
7+
openai~=2.15.0
8+
google-genai-haystack~=3.1.0
9+
haystack-experimental~=0.16.0
910
mcp-haystack~=1.1.0
1011
chardet~=5.2.0
1112
sentence-transformers~=5.2.0
1213
prompt_toolkit~=3.0.51
13-
mcp[cli]~=1.23.1
14+
mcp[cli]~=1.25.0
1415
httpx~=0.28.1
1516
uv~=0.9.15
1617
tldextract~=5.3.0
@@ -26,7 +27,7 @@ lxml~=6.0.0
2627
tinycss2~=1.5.1
2728
json5~=0.13.0
2829
html5lib~=1.1
29-
optimum~=2.1.0
30+
optimum[onnxruntime]~=2.1.0
3031
ddgs~=9.10.0
3132
pycryptodome~=3.23.0
3233
psutil~=7.2.1

shyhurricane/generator_config.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import argparse
22
import logging
33
import os
4+
from math import ceil
45
from typing import Optional, Dict, Any, Union, List
56

67
import requests
@@ -30,6 +31,7 @@
3031
from pydantic import BaseModel, Field
3132

3233
from shyhurricane.doc_type_model_map import ModelConfig
34+
from shyhurricane.utils import process_cpu_count
3335

3436
logger = logging.getLogger(__name__)
3537

@@ -235,6 +237,14 @@ def from_env():
235237
)
236238
return generator_config
237239

240+
def ollama_url(self) -> str:
241+
return "http://" + (self.ollama_host or OLLAMA_HOST_DEFAULT)
242+
243+
def ollama_pull(self, model_id: str):
244+
model, tag = model_id.rsplit(":", maxsplit=1)
245+
r = requests.post(f"{self.ollama_url()}/api/pull", json={"model": model, "tag": tag, "force": False})
246+
r.raise_for_status()
247+
238248
def apply_reasoning_default(self):
239249
self.ollama_host = self.ollama_host or OLLAMA_HOST_DEFAULT
240250
if self.ollama_model or self.gemini_model or self.openai_model or self.bedrock_model:
@@ -325,8 +335,9 @@ def create_chat_generator(self,
325335
# https://huggingface.co/docs/inference-providers/guides/gpt-oss
326336
_generation_kwargs["effort"] = "high"
327337
logger.info("Using Ollama chat with model %s at %s", self.ollama_model, self.ollama_host)
338+
self.ollama_pull(self.ollama_model)
328339
return OllamaChatGenerator(
329-
url="http://" + (self.ollama_host or OLLAMA_HOST_DEFAULT),
340+
url=self.ollama_url(),
330341
model=self.ollama_model,
331342
timeout=ollama_timeout,
332343
generation_kwargs=_generation_kwargs | (generation_kwargs or {}),
@@ -374,8 +385,9 @@ def create_generator(self,
374385
"temperature": temperature or self.temperature,
375386
}
376387
logger.info("Using Ollama generator with model %s at %s", self.ollama_model, self.ollama_host)
388+
self.ollama_pull(self.ollama_model)
377389
return OllamaGenerator(
378-
url="http://" + (self.ollama_host or OLLAMA_HOST_DEFAULT),
390+
url=self.ollama_url(),
379391
model=self.ollama_model,
380392
generation_kwargs=_generation_kwargs | (generation_kwargs or {}),
381393
)
@@ -397,7 +409,7 @@ def _embedder_enable_ollama(self) -> bool:
397409
# v0.12.11, v0.13.0 - macos has use after free failures
398410
# v0.14.0 - macos embedding is working
399411
try:
400-
resp_version = requests.get("http://" + (self.ollama_host or OLLAMA_HOST_DEFAULT) + "/api/version")
412+
resp_version = requests.get(self.ollama_url() + "/api/version")
401413
resp_version.raise_for_status()
402414
version = float(".".join(resp_version.json()["version"].split(".")[0:2]))
403415
return version >= 0.14
@@ -466,9 +478,10 @@ def create_document_embedder(self, model_config: ModelConfig):
466478
)
467479
elif self.ollama_model and self._embedder_enable_ollama():
468480
logger.info("Using Ollama document embedder with model %s at %s", model_path, self.ollama_host)
481+
self.ollama_pull(model_path)
469482
return OllamaDocumentEmbedder(
470483
model=model_path,
471-
url="http://" + (self.ollama_host or OLLAMA_HOST_DEFAULT),
484+
url=self.ollama_url(),
472485
progress_bar=False,
473486
)
474487

@@ -501,9 +514,10 @@ def create_text_embedder(self, model_config: ModelConfig):
501514
)
502515
elif self.ollama_model and self._embedder_enable_ollama():
503516
logger.info("Using Ollama text embedder with model %s at %s", model_path, self.ollama_host)
517+
self.ollama_pull(model_path)
504518
return OllamaTextEmbedder(
505519
model=model_path,
506-
url="http://" + (self.ollama_host or OLLAMA_HOST_DEFAULT),
520+
url=self.ollama_url(),
507521
)
508522

509523
logger.info("Using local text embedder with model %s", model_path)
@@ -528,14 +542,18 @@ def create_sparse_document_embedder(self, model_config: ModelConfig):
528542
return FastembedSparseDocumentEmbedder(
529543
model=model_config.model_name,
530544
cache_dir=self._fastembed_cache_dir(),
531-
batch_size=1,
545+
threads=max(1, ceil(process_cpu_count() / 2)),
546+
batch_size=32,
547+
parallel=0,
532548
progress_bar=False,
533549
)
534550

535551
def create_sparse_text_embedder(self, model_config: ModelConfig):
536552
return FastembedSparseTextEmbedder(
537553
model=model_config.model_name,
538554
cache_dir=self._fastembed_cache_dir(),
555+
threads=max(1, ceil(process_cpu_count() / 2)),
556+
parallel=0,
539557
progress_bar=False,
540558
)
541559

shyhurricane/utils.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -721,3 +721,15 @@ def coerce_to_dict(value: Any, kv_sep: str = None, element_sep: str = None) -> D
721721
it = iter(value)
722722
return dict(zip_longest(it, it, fillvalue=None))
723723
return {str(value): ""}
724+
725+
726+
def process_cpu_count() -> int:
727+
try:
728+
return os.process_cpu_count()
729+
except Exception:
730+
pass
731+
try:
732+
return len(os.sched_getaffinity(0))
733+
except Exception:
734+
pass
735+
return os.cpu_count()

0 commit comments

Comments
 (0)