diff --git a/README.md b/README.md index 4faa61f..ef93e9f 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ pip install "git+https://github.com/TakoData/QueryFilter.git#egg=tako-query-filt ### Prerequisites - Get access to Tako Hugging Face repositories +- Install and init `git-lfs` - Log into Hugging Face using `huggingface-cli login` ### Examples diff --git a/pyproject.toml b/pyproject.toml index b77dda0..7954a54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ joblib = "1.4.2" jupyter = "1.1.1" nbstripout = "0.7.1" numpy = "1.26.4" -python = ">=3.10.14,<4.0" +python = ">=3.10.14,<3.13" scikit-learn = "1.4.1.post1" sentence-transformers = {version = "3.0.1", optional = true} spacy = {version = "3.7.5"} diff --git a/src/tako_query_filter/filter.py b/src/tako_query_filter/filter.py index 42d0d47..e23a1b4 100644 --- a/src/tako_query_filter/filter.py +++ b/src/tako_query_filter/filter.py @@ -6,7 +6,7 @@ import numpy as np from typing import Iterable, List, Optional, Set from sklearn.linear_model import LogisticRegressionCV -from huggingface_hub import hf_hub_download +from huggingface_hub import hf_hub_download, snapshot_download import spacy from spacy.language import Language @@ -40,13 +40,11 @@ def load_from_hf( force_download=force_download, ) ) - spacy_model_path = hf_hub_download( + spacy_model_dir = snapshot_download( repo_id="TakoData/ner-model-best", - filename="config.cfg", revision=spacy_revision, force_download=force_download, ) - spacy_model_dir = str(Path(spacy_model_path).parent) spacy_model = spacy.load(spacy_model_dir) keywords_file = hf_hub_download( repo_id=scikit_path,