Skip to content

Commit 70c7251

Browse files
authored
Fix g2p_model_path and add tests (#939)
1 parent bc4b621 commit 70c7251

File tree

12 files changed

+300
-151
lines changed

12 files changed

+300
-151
lines changed

.github/workflows/main.yml

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,33 +14,24 @@ concurrency:
1414

1515
jobs:
1616
build:
17-
strategy:
18-
matrix:
19-
include:
20-
- os: ubuntu-latest
21-
label: linux-64
22-
23-
#- os: macos-latest
24-
# label: osx-64
25-
# prefix: /Users/runner/miniconda3/envs/my-env
26-
27-
#- os: windows-latest
28-
# label: win-64
29-
# prefix: C:\Miniconda3\envs\my-env
30-
31-
name: ${{ matrix.label }}
32-
runs-on: ${{ matrix.os }}
17+
runs-on: ubuntu-latest
3318
steps:
3419
- uses: actions/checkout@main
3520
with:
3621
fetch-depth: 0
3722

23+
- name: Check disk usage
24+
run: df -h
25+
3826
- name: Install Conda environment with Micromamba
3927
uses: conda-incubator/setup-miniconda@v3
4028
with:
4129
environment-file: github_environment.yml
4230
miniforge-version: latest
4331

32+
- name: Check disk usage after environment install
33+
run: df -h
34+
4435
- name: Configure mfa
4536
shell: bash -l {0}
4637
run: python -m montreal_forced_aligner configure --disable_auto_server

github_environment.yml

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
channels:
22
- conda-forge
33
dependencies:
4-
- python=3.11
5-
- numpy
4+
- python=3.12
5+
- numpy<2.1.0
66
- librosa
77
- pysoundfile
88
- tqdm
@@ -42,15 +42,28 @@ dependencies:
4242
- kalpy>=0.8
4343
- pip
4444
- huggingface_hub
45+
- spacy
46+
- sudachipy
47+
- sudachidict-core
48+
- jamo
49+
- pythainlp
50+
- python-build
51+
- twine
52+
# speechbrain dependencies
53+
- transformers>=4.48.0
54+
- pandas<2.3.0
55+
- sentencepiece
56+
- pytorch=2.8.0
57+
- torchaudio=2.8.0
58+
# whisperx dependencies
59+
- triton
60+
- nltk
61+
- av<16.0.0
62+
- onnxruntime<1.20.0
4563
- pip:
46-
- build
47-
- twine
64+
- speechbrain
65+
- whisperx
4866
# Tokenization dependencies
4967
- python-mecab-ko
50-
- jamo
51-
- pythainlp
5268
- hanziconv
5369
- dragonmapper
54-
- speechbrain
55-
- transformers
56-
- whisperx

montreal_forced_aligner/command_line/align_one.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -139,10 +139,7 @@ def align_one_cli(context, **kwargs) -> None:
139139
lexicon_compiler.phone_table = pywrapfst.SymbolTable.read_text(phones_path)
140140
else:
141141
lexicon_compiler.load_pronunciations(dictionary_path)
142-
lexicon_compiler.fst.write(str(l_fst_path))
143-
lexicon_compiler.align_fst.write(str(l_align_fst_path))
144-
lexicon_compiler.word_table.write_text(words_path)
145-
lexicon_compiler.phone_table.write_text(phones_path)
142+
lexicon_compiler.create_fsts()
146143
lexicon_compiler.clear()
147144

148145
if no_tokenization or acoustic_model.language is Language.unknown:
@@ -193,6 +190,11 @@ def align_one_cli(context, **kwargs) -> None:
193190
"boost_silence",
194191
]
195192
}
193+
if g2p_model is not None or not (l_fst_path.exists() and not config.CLEAN):
194+
lexicon_compiler.fst.write(str(l_fst_path))
195+
lexicon_compiler.align_fst.write(str(l_align_fst_path))
196+
lexicon_compiler.word_table.write_text(words_path)
197+
lexicon_compiler.phone_table.write_text(phones_path)
196198
kalpy_aligner = KalpyAligner(acoustic_model, lexicon_compiler, **align_options)
197199
for utt in utterances:
198200
utt.apply_cmvn(cmvn)

montreal_forced_aligner/data.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1573,6 +1573,24 @@ class WordData:
15731573
pronunciations: typing.Set[typing.Tuple[str, ...]]
15741574

15751575

1576+
# noinspection PyUnresolvedReferences
1577+
@dataclassy.dataclass(slots=True)
1578+
class GeneratedPronunciation:
1579+
"""
1580+
Data class for information about a pronunciation generated by G2P models
1581+
1582+
Parameters
1583+
----------
1584+
pronunciation: str
1585+
String of phones separated by spaces
1586+
score: float
1587+
Log-likelihood score for the pronunciation
1588+
"""
1589+
1590+
pronunciation: str
1591+
score: float
1592+
1593+
15761594
# noinspection PyUnresolvedReferences
15771595
@dataclassy.dataclass(slots=True)
15781596
class NgramHistoryState:

0 commit comments

Comments
 (0)