herimor
diff --git a/‎.gitignore‎
Lines changed: 39 additions & 0 deletions b/‎.gitignore‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 69 additions & 0 deletions b/‎README.md‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎config.py‎
Lines changed: 254 additions & 0 deletions b/‎config.py‎
Lines changed: 254 additions & 0 deletions
diff --git a/‎copy_codebase.py‎
Lines changed: 56 additions & 0 deletions b/‎copy_codebase.py‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎data/__init__.py‎ b/‎data/__init__.py‎
@@ -0,0 +1,39 @@
+__pycache__/
+*.py[cod]
+*$py.class
+*.egg-info
+.pytest_cache
+.ipynb_checkpoints
+
+thumbs.db
+.DS_Store
+.idea
+*.log
+*rtx*
+*.pdf
+*.mkv
+*.mp4
+*a40*
+*durip*
+*.png
+sim_lr.ipynb
+*.mp3
+*.gz
+*.flac
+*.th
+*.pth
+*.pt
+local_*
+hub/
+per_sample_res/
+src/
+res/seed_tts_eval/
+res/lspc_eval
+file_log.txt
+file_log_debug*.txt
+bad_files/
+amt/
+sam1.wav
+sam2.wav
+sam3.wav
+demo/generated_tts
@@ -0,0 +1,69 @@
+# VoiceStar: Robust, Duration-controllable TTS that can Extrapolate
+
+## 1. Env setup
+### Download model
+```bash
+# under VoiceStar root dir
+wget -O ./pretrained/encodec_6f79c6a8.th https://huggingface.co/pyp1/VoiceCraft/resolve/main/encodec_4cb2048_giga.th?download=true
+wget -O ./pretrained/VoiceStar_840M_30s.pth https://huggingface.co/pyp1/VoiceStar/resolve/main/VoiceStar_840M_30s.pth?download=true
+wget -O ./pretrained/VoiceStar_840M_40s.pth https://huggingface.co/pyp1/VoiceStar/resolve/main/VoiceStar_840M_40s.pth?download=true
+```
+### Inference only:
+```bash
+conda create -n voicestar python=3.10
+conda activate voicestar # this seems to lead to much worse results in terms of wer and spksim (comparing e9_rerun and e9_rerun_newba_upgraded)
+pip install torch==2.5.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu124 
+pip install numpy, tqdm, fire
+pip install phonemizer==3.2.1
+apt-get install espeak-ng # backend for the phonemizer
+pip install torchmetrics
+pip install einops
+pip install omegaconf==2.3.0
+pip install openai-whisper
+```
+
+* avoid warnings likes
+[WARNING] words_mismatch.py:88 || words count mismatch on 200.0% of the lines (2/1)
+```python
+# go to ~/miniconda3/envs/voicestar/lib/python3.10/site-packages/phonemizer/backend/espeak/words_mismatch.py
+# pass the warning like this
+    def _resume(self, nmismatch: int, nlines: int):
+        """Logs a high level undetailed warning"""
+        pass
+        # if nmismatch:
+        #     self._logger.warning(
+        #         'words count mismatch on %s%% of the lines (%s/%s)',
+        #         round(nmismatch / nlines, 2) * 100, nmismatch, nlines)
+```
+
+### Training and data processing
+*additional packages*:
+```bash
+pip install huggingface_hub
+pip install datasets
+pip install tensorboard
+pip install wandb
+pip install matplotlib
+pip install ffmpeg-python
+pip install scipy
+pip install soundfile
+```
+
+## 2. example 
+### download pretrained models
+```bash
+
+```
+
+
+### command line example
+check signature of `run_inference` func in `inference_commandline.py` for adjustable hyperparameters
+```bash
+# under root dir
+python inference_commandline.py \
+  --reference_speech "./demo/5895_34622_000026_000002.wav" \
+  --target_text "I cannot believe that the same model can also do text to speech synthesis too! And you know what? this audio is 8 seconds long." \
+  --target_duration 8
+```
+
+### Gradio
@@ -0,0 +1,56 @@
+
+import os
+import shutil
+import fnmatch
+
+def parse_gitignore(gitignore_path):
+    """Parse a .gitignore file and return a list of patterns."""
+    patterns = []
+    with open(gitignore_path, "r") as f:
+        for line in f:
+            # Ignore comments and blank lines
+            line = line.strip()
+            if not line or line.startswith("#"):
+                continue
+            # Handle wildcards and directory separators
+            patterns.append(line)
+    return patterns
+
+def file_matches_patterns(file_path, patterns):
+    """Check if a file matches any of the patterns in .gitignore."""
+    for pattern in patterns:
+        if fnmatch.fnmatch(file_path, pattern):
+            return True
+    return False
+
+def copy_codebase(src, dst, max_size_mb=5, gitignore_path=None):
+    """ Copy files from src to dst, skipping files larger than max_size_mb and matching .gitignore patterns. """
+    if gitignore_path and os.path.exists(gitignore_path):
+        patterns = parse_gitignore(gitignore_path)
+    else:
+        patterns = []
+    print("patterns to ignore: ", patterns)
+    os.makedirs(dst, exist_ok=True)
+    for root, dirs, files in os.walk(src):
+        for file in files:
+            file_path = os.path.join(root, file)
+            relative_path = os.path.relpath(file_path, src)
+            dst_path = os.path.join(dst, relative_path)
+            # ignore .git because of permission issues
+            if "/.git/" in file_path:
+                continue
+
+            # Check .gitignore patterns
+            if file_matches_patterns(file_path, patterns):
+                # print(f"Skipping {file_path} because it matches a pattern in .gitignore")
+                continue
+
+            # Check file size
+            if os.path.getsize(file_path) > max_size_mb * 1024 * 1024:
+                print(f"Skipping {file_path} because it's larger than {max_size_mb}MB")
+                continue
+
+
+            # Make sure the destination directory exists
+            os.makedirs(os.path.dirname(dst_path), exist_ok=True)
+            shutil.copy(file_path, dst_path)