From f416e429e9711d32a38aaebadeec3bdd9b29423f Mon Sep 17 00:00:00 2001 From: noahjax Date: Wed, 18 Dec 2024 14:11:04 -0800 Subject: [PATCH] Update how keywords are packaged --- README.md | 2 +- demo.ipynb | 11 ++-------- pyproject.toml | 2 +- src/tako_query_filter/filter.py | 21 +++++++------------ .../tako_query_filter/keywords.py | 2 +- 5 files changed, 12 insertions(+), 26 deletions(-) rename keywords.json => src/tako_query_filter/keywords.py (99%) diff --git a/README.md b/README.md index 302ead4..df95a74 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ See the [demo notebook](demo.ipynb) for a more interactive example. ``` from tako_query_filter.filter import TakoQueryFilter -query_filter = TakoQueryFilter.load_with_keywords() +query_filter = TakoQueryFilter() queries = [ "aapl vs nvda", diff --git a/demo.ipynb b/demo.ipynb index b81812f..62fb8ec 100644 --- a/demo.ipynb +++ b/demo.ipynb @@ -13,13 +13,13 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from tako_query_filter.filter import TakoQueryFilter\n", "\n", - "query_filter = TakoQueryFilter.load_with_keywords()\n" + "query_filter = TakoQueryFilter()" ] }, { @@ -48,13 +48,6 @@ "for query, pred in zip(queries, preds):\n", " print(f\"{query} -> {pred}\")\n" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/pyproject.toml b/pyproject.toml index 34b9d10..6499f78 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ include = ["keywords.json"] name = "tako-query-filter" packages = [{include = "tako_query_filter", from = "src"}] readme = "README.md" -version = "0.1.4" +version = "0.2.0" [tool.poetry.dependencies] en-tako-query-filter = {url = "https://huggingface.co/TakoData/en_tako_query_filter/resolve/main/en_tako_query_filter-any-py3-none-any.whl"} diff --git a/src/tako_query_filter/filter.py b/src/tako_query_filter/filter.py index 8bcca7e..9c70346 100644 --- a/src/tako_query_filter/filter.py +++ b/src/tako_query_filter/filter.py @@ -1,24 +1,24 @@ import json import re -from typing import List, Optional, Set +from typing import Iterable, List import spacy import hashlib -from importlib import resources +from tako_query_filter.keywords import keywords class TakoQueryFilter: def __init__( self, - keyword_hashes: Set[str], + keyword_hashes: Iterable[str] = keywords, ): self.nlp = spacy.load("en_tako_query_filter") - self.keywords_hashes = keyword_hashes + self.keywords_hashes = set(keyword_hashes) self.keyword_match_score = 0.9 @classmethod def load_with_keywords( cls, - keywords_path: Optional[str] = None, + keywords_path: str, ): """Load TakoQueryFilter with a set of whitelist keywords. @@ -28,15 +28,8 @@ def load_with_keywords( Returns: TakoQueryFilter: Initialized filter with models loaded from local paths """ - - if not keywords_path: - with resources.files("tako_query_filter").joinpath("keywords.json").open( - "r" - ) as f: - keyword_hashes = set(json.load(f)) - else: - with open(keywords_path, "r") as f: - keyword_hashes = set(json.load(f)) + with open(keywords_path, "r") as f: + keyword_hashes = json.load(f) return cls(keyword_hashes) diff --git a/keywords.json b/src/tako_query_filter/keywords.py similarity index 99% rename from keywords.json rename to src/tako_query_filter/keywords.py index a0469f7..61ec712 100644 --- a/keywords.json +++ b/src/tako_query_filter/keywords.py @@ -1,4 +1,4 @@ -[ +keywords = [ "f674a8c2fb0cae1fa204bf7e6ba9e9ef", "f3d7e7f16862091754e23c303870f259", "4ca30d0053831e6f6d8104c312ab8017",