Bug fix

Den4ikAI · web-flow · commit 0eeafde744fb · 2024-03-17T00:42:03.000+08:00
diff --git a/LICENSE b/LICENSE
@@ -1,13 +1,21 @@
-Copyright 2023 Denis Petrov
+The MIT License (MIT)
 
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
+Copyright (c) 2023 Denis Petrov
 
-    http://www.apache.org/licenses/LICENSE-2.0
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
 
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,23 @@
+[build-system]
+requires = ["flit_core >=3.2,<4"]
+build-backend = "flit_core.buildapi"
+
+[project]
+name = "ruaccent"
+authors = [{name = "Denis Petrov", email = "arduino4b@gmail.com"}]
+readme = "README.md"
+license = {file = "LICENSE"}
+classifiers = ["License :: OSI Approved :: Apache Software License"]
+dynamic = ["version", "description"]
+
+dependencies = [
+    "huggingface_hub",
+    "onnxruntime",
+    "transformers",
+    "sentencepiece",
+    "numpy",
+    "python-crfsuite",
+]
+
+[project.urls]
+Home = "https://github.com/Den4ikAI/ruaccent"
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,6 @@
+huggingface_hub
+onnxruntime
+transformers
+sentencepiece
+numpy
+python-crfsuite
diff --git a/ruaccent/__init__.py b/ruaccent/__init__.py
@@ -1,6 +1,6 @@
 """Russian accentizer"""
 
-__version__ = "1.5.6.1"
+__version__ = "1.5.6.2"
 
 
 from .ruaccent import RUAccent
diff --git a/ruaccent/ruaccent.py b/ruaccent/ruaccent.py
@@ -29,9 +29,9 @@ def __init__(self):
                                       'small_poetry': '/nn/nn_omograph/small_poetry',
                                       'turbo': '/nn/nn_omograph/turbo'}
     
-        self.accentuator_paths = ['/nn/nn_accent', '/nn/nn_stress_usage_predictor','/nn/nn_yo_homograph_resolver', '/dictionary', '/dictionary/rule_engine', "/koziev/rulemma", "/koziev/rupostagger", "/koziev/rupostagger/database"]
+        self.accentuator_paths = ['/nn/nn_accent', '/nn/nn_stress_usage_predictor','/nn/nn_yo_homograph_resolver', '/dictionary', '/dictionary/rule_engine']
         self.letters_accent = {'о': '+о', 'О': '+О'}
-
+        self.koziev_paths = ["/koziev/rulemma", "/koziev/rupostagger", "/koziev/rupostagger/database"]
     def load(
         self,
         omograph_model_size="big_poetry",
@@ -46,7 +46,7 @@ def load(
             self.workdir = workdir
         else:
             self.workdir = str(pathlib.Path(__file__).resolve().parent)
-
+        self.module_path = str(pathlib.Path(__file__).resolve().parent)
         self.custom_dict = custom_dict
         self.accents = {}
         if not os.path.exists(
@@ -66,10 +66,16 @@ def load(
             if model_path:
                 files = self.fs.ls(repo + model_path)
                 for file in files:
-                    hf_hub_download(repo_id=repo, local_dir_use_symlinks=False, local_dir=self.workdir, filename=file['name'].replace(repo+'/', ''))
+                    if file["type"] == "file":
+                        hf_hub_download(repo_id=repo, local_dir_use_symlinks=False, local_dir=self.workdir, filename=file['name'].replace(repo+'/', ''))
             else:
                 raise FileNotFoundError
-                
+        if not os.path.exists(join_path(self.module_path, "koziev")):
+          for path in self.koziev_paths:
+               files = self.fs.ls(repo + path)
+               for file in files:
+                   if file["type"] == "file":
+                       hf_hub_download(repo_id=repo, local_dir_use_symlinks=False, local_dir=self.module_path, filename=file['name'].replace(repo+'/', ''))
         from .rule_accent_engine import RuleEngine
         self.rule_accent = RuleEngine()
         self.omographs = json.load(
@@ -91,17 +97,14 @@ def load(
             ))
         self.accents.update(self.custom_dict)
         self.accents.update(self.letters_accent)
-        self.omograph_model.load(
-        join_path(self.workdir, f"nn/nn_omograph/{omograph_model_size}/")
-
-        #"../../../pretrain_ruaccent_turbo/onnx_deberta"        
-	)
+        self.omograph_model.load(join_path(self.workdir, f"nn/nn_omograph/{omograph_model_size}/"), device=device)
+        
         self.yo_homographs = json.load(
             gzip.open(join_path(self.workdir, "dictionary","yo_homographs.json.gz"))
         )
-        self.accent_model.load(join_path(self.workdir, "nn","nn_accent/"))
-        self.stress_usage_predictor.load(join_path(self.workdir, "nn","nn_stress_usage_predictor/"))
-        self.yo_homograph_model.load(join_path(self.workdir, "nn","nn_yo_homograph_resolver"))
+        self.accent_model.load(join_path(self.workdir, "nn","nn_accent/"), device=device)
+        self.stress_usage_predictor.load(join_path(self.workdir, "nn","nn_stress_usage_predictor/"), device=device)
+        self.yo_homograph_model.load(join_path(self.workdir, "nn","nn_yo_homograph_resolver"), device=device)
         self.rule_accent.load(join_path(self.workdir, "dictionary","rule_engine"))
 
     def split_by_words(self, string):
diff --git a/setup.py b/setup.py
@@ -6,7 +6,7 @@
 
 setup(
     name='ruaccent',
-    version='1.5.6.1',
+    version='1.5.6.2',
     author='Denis Petrov',
     author_email='arduino4b@gmail.com',
     description='A Russian text accentuation tool',