|
5 | 5 | import xml.etree.ElementTree as ET |
6 | 6 | from glob import glob |
7 | 7 | from pathlib import Path |
| 8 | +from typing import Any, Protocol |
8 | 9 |
|
9 | 10 | from tqdm import tqdm |
10 | 11 |
|
11 | 12 | logger = logging.getLogger(__name__) |
12 | 13 |
|
| 14 | + |
| 15 | +class Formatter(Protocol): |
| 16 | + def __call__( |
| 17 | + self, |
| 18 | + root_path: str | os.PathLike[Any], |
| 19 | + meta_file: str | os.PathLike[Any], |
| 20 | + ignored_speakers: list[str] | None, |
| 21 | + **kwargs, |
| 22 | + ) -> list[dict[str, Any]]: ... |
| 23 | + |
| 24 | + |
| 25 | +_FORMATTER_REGISTRY: dict[str, Formatter] = {} |
| 26 | + |
| 27 | + |
| 28 | +def register_formatter(name: str, formatter: Formatter) -> None: |
| 29 | + """Add a formatter function to the registry. |
| 30 | +
|
| 31 | + Args: |
| 32 | + name: Name of the formatter. |
| 33 | + formatter: Formatter function. |
| 34 | + """ |
| 35 | + if name.lower() in _FORMATTER_REGISTRY: |
| 36 | + msg = f"Formatter {name} already exists." |
| 37 | + raise ValueError(msg) |
| 38 | + _FORMATTER_REGISTRY[name.lower()] = formatter |
| 39 | + |
| 40 | + |
13 | 41 | ######################## |
14 | 42 | # DATASETS |
15 | 43 | ######################## |
@@ -659,3 +687,35 @@ def bel_tts_formatter(root_path, meta_file, **kwargs): # pylint: disable=unused |
659 | 687 | text = cols[1] |
660 | 688 | items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path}) |
661 | 689 | return items |
| 690 | + |
| 691 | + |
| 692 | +### Registrations |
| 693 | +register_formatter("cml_tts", cml_tts) |
| 694 | +register_formatter("coqui", coqui) |
| 695 | +register_formatter("tweb", tweb) |
| 696 | +register_formatter("mozilla", mozilla) |
| 697 | +register_formatter("mozilla_de", mozilla_de) |
| 698 | +register_formatter("mailabs", mailabs) |
| 699 | +register_formatter("ljspeech", ljspeech) |
| 700 | +register_formatter("ljspeech_test", ljspeech_test) |
| 701 | +register_formatter("thorsten", thorsten) |
| 702 | +register_formatter("sam_accenture", sam_accenture) |
| 703 | +register_formatter("ruslan", ruslan) |
| 704 | +register_formatter("css10", css10) |
| 705 | +register_formatter("nancy", nancy) |
| 706 | +register_formatter("common_voice", common_voice) |
| 707 | +register_formatter("libri_tts", libri_tts) |
| 708 | +register_formatter("custom_turkish", custom_turkish) |
| 709 | +register_formatter("brspeech", brspeech) |
| 710 | +register_formatter("vctk", vctk) |
| 711 | +register_formatter("vctk_old", vctk_old) |
| 712 | +register_formatter("synpaflex", synpaflex) |
| 713 | +register_formatter("open_bible", open_bible) |
| 714 | +register_formatter("mls", mls) |
| 715 | +register_formatter("voxceleb2", voxceleb2) |
| 716 | +register_formatter("voxceleb1", voxceleb1) |
| 717 | +register_formatter("emotion", emotion) |
| 718 | +register_formatter("baker", baker) |
| 719 | +register_formatter("kokoro", kokoro) |
| 720 | +register_formatter("kss", kss) |
| 721 | +register_formatter("bel_tts_formatter", bel_tts_formatter) |
0 commit comments