Skip to content

Commit 0eeafde

Browse files
authored
Bug fix
1 parent ec1b4d6 commit 0eeafde

File tree

6 files changed

+65
-25
lines changed

6 files changed

+65
-25
lines changed

LICENSE

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,21 @@
1-
Copyright 2023 Denis Petrov
1+
The MIT License (MIT)
22

3-
Licensed under the Apache License, Version 2.0 (the "License");
4-
you may not use this file except in compliance with the License.
5-
You may obtain a copy of the License at
3+
Copyright (c) 2023 Denis Petrov
64

7-
http://www.apache.org/licenses/LICENSE-2.0
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
811

9-
Unless required by applicable law or agreed to in writing, software
10-
distributed under the License is distributed on an "AS IS" BASIS,
11-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12-
See the License for the specific language governing permissions and
13-
limitations under the License.
12+
The above copyright notice and this permission notice shall be included in
13+
all copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21+
THE SOFTWARE.

pyproject.toml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
[build-system]
2+
requires = ["flit_core >=3.2,<4"]
3+
build-backend = "flit_core.buildapi"
4+
5+
[project]
6+
name = "ruaccent"
7+
authors = [{name = "Denis Petrov", email = "arduino4b@gmail.com"}]
8+
readme = "README.md"
9+
license = {file = "LICENSE"}
10+
classifiers = ["License :: OSI Approved :: Apache Software License"]
11+
dynamic = ["version", "description"]
12+
13+
dependencies = [
14+
"huggingface_hub",
15+
"onnxruntime",
16+
"transformers",
17+
"sentencepiece",
18+
"numpy",
19+
"python-crfsuite",
20+
]
21+
22+
[project.urls]
23+
Home = "https://github.com/Den4ikAI/ruaccent"

requirements.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
huggingface_hub
2+
onnxruntime
3+
transformers
4+
sentencepiece
5+
numpy
6+
python-crfsuite

ruaccent/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Russian accentizer"""
22

3-
__version__ = "1.5.6.1"
3+
__version__ = "1.5.6.2"
44

55

66
from .ruaccent import RUAccent

ruaccent/ruaccent.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ def __init__(self):
2929
'small_poetry': '/nn/nn_omograph/small_poetry',
3030
'turbo': '/nn/nn_omograph/turbo'}
3131

32-
self.accentuator_paths = ['/nn/nn_accent', '/nn/nn_stress_usage_predictor','/nn/nn_yo_homograph_resolver', '/dictionary', '/dictionary/rule_engine', "/koziev/rulemma", "/koziev/rupostagger", "/koziev/rupostagger/database"]
32+
self.accentuator_paths = ['/nn/nn_accent', '/nn/nn_stress_usage_predictor','/nn/nn_yo_homograph_resolver', '/dictionary', '/dictionary/rule_engine']
3333
self.letters_accent = {'о': '+о', 'О': '+О'}
34-
34+
self.koziev_paths = ["/koziev/rulemma", "/koziev/rupostagger", "/koziev/rupostagger/database"]
3535
def load(
3636
self,
3737
omograph_model_size="big_poetry",
@@ -46,7 +46,7 @@ def load(
4646
self.workdir = workdir
4747
else:
4848
self.workdir = str(pathlib.Path(__file__).resolve().parent)
49-
49+
self.module_path = str(pathlib.Path(__file__).resolve().parent)
5050
self.custom_dict = custom_dict
5151
self.accents = {}
5252
if not os.path.exists(
@@ -66,10 +66,16 @@ def load(
6666
if model_path:
6767
files = self.fs.ls(repo + model_path)
6868
for file in files:
69-
hf_hub_download(repo_id=repo, local_dir_use_symlinks=False, local_dir=self.workdir, filename=file['name'].replace(repo+'/', ''))
69+
if file["type"] == "file":
70+
hf_hub_download(repo_id=repo, local_dir_use_symlinks=False, local_dir=self.workdir, filename=file['name'].replace(repo+'/', ''))
7071
else:
7172
raise FileNotFoundError
72-
73+
if not os.path.exists(join_path(self.module_path, "koziev")):
74+
for path in self.koziev_paths:
75+
files = self.fs.ls(repo + path)
76+
for file in files:
77+
if file["type"] == "file":
78+
hf_hub_download(repo_id=repo, local_dir_use_symlinks=False, local_dir=self.module_path, filename=file['name'].replace(repo+'/', ''))
7379
from .rule_accent_engine import RuleEngine
7480
self.rule_accent = RuleEngine()
7581
self.omographs = json.load(
@@ -91,17 +97,14 @@ def load(
9197
))
9298
self.accents.update(self.custom_dict)
9399
self.accents.update(self.letters_accent)
94-
self.omograph_model.load(
95-
join_path(self.workdir, f"nn/nn_omograph/{omograph_model_size}/")
96-
97-
#"../../../pretrain_ruaccent_turbo/onnx_deberta"
98-
)
100+
self.omograph_model.load(join_path(self.workdir, f"nn/nn_omograph/{omograph_model_size}/"), device=device)
101+
99102
self.yo_homographs = json.load(
100103
gzip.open(join_path(self.workdir, "dictionary","yo_homographs.json.gz"))
101104
)
102-
self.accent_model.load(join_path(self.workdir, "nn","nn_accent/"))
103-
self.stress_usage_predictor.load(join_path(self.workdir, "nn","nn_stress_usage_predictor/"))
104-
self.yo_homograph_model.load(join_path(self.workdir, "nn","nn_yo_homograph_resolver"))
105+
self.accent_model.load(join_path(self.workdir, "nn","nn_accent/"), device=device)
106+
self.stress_usage_predictor.load(join_path(self.workdir, "nn","nn_stress_usage_predictor/"), device=device)
107+
self.yo_homograph_model.load(join_path(self.workdir, "nn","nn_yo_homograph_resolver"), device=device)
105108
self.rule_accent.load(join_path(self.workdir, "dictionary","rule_engine"))
106109

107110
def split_by_words(self, string):

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
setup(
88
name='ruaccent',
9-
version='1.5.6.1',
9+
version='1.5.6.2',
1010
author='Denis Petrov',
1111
author_email='arduino4b@gmail.com',
1212
description='A Russian text accentuation tool',

0 commit comments

Comments
 (0)