This repository was archived by the owner on Oct 19, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 24
Expand file tree
/
Copy pathpreprocess_flist_config.py
More file actions
117 lines (110 loc) · 3.24 KB
/
preprocess_flist_config.py
File metadata and controls
117 lines (110 loc) · 3.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import os
import argparse
from tqdm import tqdm
from random import shuffle
import json
config_template = {
"train": {
"log_interval": 200,
"eval_interval": 1000,
"seed": 1234,
"epochs": 10000,
"learning_rate": 1e-4,
"betas": [0.8, 0.99],
"eps": 1e-9,
"batch_size": 12,
"fp16_run": False,
"lr_decay": 0.999875,
"segment_size": 17920,
"init_lr_ratio": 1,
"warmup_epochs": 0,
"c_mel": 45,
"c_kl": 1.0,
"use_sr": True,
"max_speclen": 384,
"port": "8001"
},
"data": {
"training_files":"filelists/train.txt",
"validation_files":"filelists/val.txt",
"max_wav_value": 32768.0,
"sampling_rate": 48000,
"filter_length": 1280,
"hop_length": 320,
"win_length": 1280,
"n_mel_channels": 80,
"mel_fmin": 0.0,
"mel_fmax": None
},
"model": {
"inter_channels": 192,
"hidden_channels": 192,
"filter_channels": 768,
"n_heads": 2,
"n_layers": 6,
"kernel_size": 3,
"p_dropout": 0.1,
"resblock": "1",
"resblock_kernel_sizes": [3,7,11],
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
"upsample_rates": [10,8,2,2],
"upsample_initial_channel": 512,
"upsample_kernel_sizes": [16,16,4,4],
"n_layers_q": 3,
"use_spectral_norm": False,
"gin_channels": 256,
"ssl_dim": 256,
"n_speakers": 0,
},
"spk":{
"nen": 0,
"paimon": 1,
"yunhao": 2
}
}
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--train_list", type=str, default="./filelists/train.txt", help="path to train list")
parser.add_argument("--val_list", type=str, default="./filelists/val.txt", help="path to val list")
parser.add_argument("--test_list", type=str, default="./filelists/test.txt", help="path to test list")
parser.add_argument("--source_dir", type=str, default="./dataset/48k", help="path to source dir")
args = parser.parse_args()
train = []
val = []
test = []
idx = 0
spk_dict = {}
spk_id = 0
for speaker in tqdm(os.listdir(args.source_dir)):
spk_dict[speaker] = spk_id
spk_id += 1
wavs = [os.path.join(args.source_dir, speaker, i)for i in os.listdir(os.path.join(args.source_dir, speaker))]
wavs = [i for i in wavs if i.endswith("wav")]
shuffle(wavs)
train += wavs[2:-10]
val += wavs[:2]
test += wavs[-10:]
n_speakers = len(spk_dict.keys())*2
shuffle(train)
shuffle(val)
shuffle(test)
print("Writing", args.train_list)
with open(args.train_list, "w") as f:
for fname in tqdm(train):
wavpath = fname
f.write(wavpath + "\n")
print("Writing", args.val_list)
with open(args.val_list, "w") as f:
for fname in tqdm(val):
wavpath = fname
f.write(wavpath + "\n")
print("Writing", args.test_list)
with open(args.test_list, "w") as f:
for fname in tqdm(test):
wavpath = fname
f.write(wavpath + "\n")
config_template["model"]["n_speakers"] = n_speakers
config_template["spk"] = spk_dict
print("Writing configs/config.json")
with open("configs/config.json", "w") as f:
json.dump(config_template, f, indent=2)