Skip to content

Commit 69aa372

Browse files
committed
init
1 parent 8718172 commit 69aa372

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+15466
-0
lines changed

.gitignore

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
__pycache__/
2+
*.py[cod]
3+
*$py.class
4+
*.egg-info
5+
.pytest_cache
6+
.ipynb_checkpoints
7+
8+
thumbs.db
9+
.DS_Store
10+
.idea
11+
*.log
12+
*rtx*
13+
*.pdf
14+
*.mkv
15+
*.mp4
16+
*a40*
17+
*durip*
18+
*.png
19+
sim_lr.ipynb
20+
*.mp3
21+
*.gz
22+
*.flac
23+
*.th
24+
*.pth
25+
*.pt
26+
local_*
27+
hub/
28+
per_sample_res/
29+
src/
30+
res/seed_tts_eval/
31+
res/lspc_eval
32+
file_log.txt
33+
file_log_debug*.txt
34+
bad_files/
35+
amt/
36+
sam1.wav
37+
sam2.wav
38+
sam3.wav
39+
demo/generated_tts

README.md

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# VoiceStar: Robust, Duration-controllable TTS that can Extrapolate
2+
3+
## 1. Env setup
4+
### Download model
5+
```bash
6+
# under VoiceStar root dir
7+
wget -O ./pretrained/encodec_6f79c6a8.th https://huggingface.co/pyp1/VoiceCraft/resolve/main/encodec_4cb2048_giga.th?download=true
8+
wget -O ./pretrained/VoiceStar_840M_30s.pth https://huggingface.co/pyp1/VoiceStar/resolve/main/VoiceStar_840M_30s.pth?download=true
9+
wget -O ./pretrained/VoiceStar_840M_40s.pth https://huggingface.co/pyp1/VoiceStar/resolve/main/VoiceStar_840M_40s.pth?download=true
10+
```
11+
### Inference only:
12+
```bash
13+
conda create -n voicestar python=3.10
14+
conda activate voicestar # this seems to lead to much worse results in terms of wer and spksim (comparing e9_rerun and e9_rerun_newba_upgraded)
15+
pip install torch==2.5.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu124
16+
pip install numpy, tqdm, fire
17+
pip install phonemizer==3.2.1
18+
apt-get install espeak-ng # backend for the phonemizer
19+
pip install torchmetrics
20+
pip install einops
21+
pip install omegaconf==2.3.0
22+
pip install openai-whisper
23+
```
24+
25+
* avoid warnings likes
26+
[WARNING] words_mismatch.py:88 || words count mismatch on 200.0% of the lines (2/1)
27+
```python
28+
# go to ~/miniconda3/envs/voicestar/lib/python3.10/site-packages/phonemizer/backend/espeak/words_mismatch.py
29+
# pass the warning like this
30+
def _resume(self, nmismatch: int, nlines: int):
31+
"""Logs a high level undetailed warning"""
32+
pass
33+
# if nmismatch:
34+
# self._logger.warning(
35+
# 'words count mismatch on %s%% of the lines (%s/%s)',
36+
# round(nmismatch / nlines, 2) * 100, nmismatch, nlines)
37+
```
38+
39+
### Training and data processing
40+
*additional packages*:
41+
```bash
42+
pip install huggingface_hub
43+
pip install datasets
44+
pip install tensorboard
45+
pip install wandb
46+
pip install matplotlib
47+
pip install ffmpeg-python
48+
pip install scipy
49+
pip install soundfile
50+
```
51+
52+
## 2. example
53+
### download pretrained models
54+
```bash
55+
56+
```
57+
58+
59+
### command line example
60+
check signature of `run_inference` func in `inference_commandline.py` for adjustable hyperparameters
61+
```bash
62+
# under root dir
63+
python inference_commandline.py \
64+
--reference_speech "./demo/5895_34622_000026_000002.wav" \
65+
--target_text "I cannot believe that the same model can also do text to speech synthesis too! And you know what? this audio is 8 seconds long." \
66+
--target_duration 8
67+
```
68+
69+
### Gradio

config.py

Lines changed: 254 additions & 0 deletions
Large diffs are not rendered by default.

copy_codebase.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
2+
import os
3+
import shutil
4+
import fnmatch
5+
6+
def parse_gitignore(gitignore_path):
7+
"""Parse a .gitignore file and return a list of patterns."""
8+
patterns = []
9+
with open(gitignore_path, "r") as f:
10+
for line in f:
11+
# Ignore comments and blank lines
12+
line = line.strip()
13+
if not line or line.startswith("#"):
14+
continue
15+
# Handle wildcards and directory separators
16+
patterns.append(line)
17+
return patterns
18+
19+
def file_matches_patterns(file_path, patterns):
20+
"""Check if a file matches any of the patterns in .gitignore."""
21+
for pattern in patterns:
22+
if fnmatch.fnmatch(file_path, pattern):
23+
return True
24+
return False
25+
26+
def copy_codebase(src, dst, max_size_mb=5, gitignore_path=None):
27+
""" Copy files from src to dst, skipping files larger than max_size_mb and matching .gitignore patterns. """
28+
if gitignore_path and os.path.exists(gitignore_path):
29+
patterns = parse_gitignore(gitignore_path)
30+
else:
31+
patterns = []
32+
print("patterns to ignore: ", patterns)
33+
os.makedirs(dst, exist_ok=True)
34+
for root, dirs, files in os.walk(src):
35+
for file in files:
36+
file_path = os.path.join(root, file)
37+
relative_path = os.path.relpath(file_path, src)
38+
dst_path = os.path.join(dst, relative_path)
39+
# ignore .git because of permission issues
40+
if "/.git/" in file_path:
41+
continue
42+
43+
# Check .gitignore patterns
44+
if file_matches_patterns(file_path, patterns):
45+
# print(f"Skipping {file_path} because it matches a pattern in .gitignore")
46+
continue
47+
48+
# Check file size
49+
if os.path.getsize(file_path) > max_size_mb * 1024 * 1024:
50+
print(f"Skipping {file_path} because it's larger than {max_size_mb}MB")
51+
continue
52+
53+
54+
# Make sure the destination directory exists
55+
os.makedirs(os.path.dirname(dst_path), exist_ok=True)
56+
shutil.copy(file_path, dst_path)

data/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)