Skip to content
This repository was archived by the owner on Oct 19, 2024. It is now read-only.

Commit f9497ae

Browse files
committed
sovits 3.0
1 parent fefb14a commit f9497ae

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+2031
-2798
lines changed

.gitignore

Lines changed: 0 additions & 3 deletions
This file was deleted.

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
MIT License
22

3-
Copyright (c) 2021 Jaehyeon Kim
3+
Copyright (c) 2021 Jingyi Li
44

55
Permission is hereby granted, free of charge, to any person obtaining a copy
66
of this software and associated documentation files (the "Software"), to deal

app.py

Lines changed: 0 additions & 103 deletions
This file was deleted.

commons.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,23 @@
44
from torch import nn
55
from torch.nn import functional as F
66

7+
def slice_pitch_segments(x, ids_str, segment_size=4):
8+
ret = torch.zeros_like(x[:, :segment_size])
9+
for i in range(x.size(0)):
10+
idx_str = ids_str[i]
11+
idx_end = idx_str + segment_size
12+
ret[i] = x[i, idx_str:idx_end]
13+
return ret
14+
15+
def rand_slice_segments_with_pitch(x, pitch, x_lengths=None, segment_size=4):
16+
b, d, t = x.size()
17+
if x_lengths is None:
18+
x_lengths = t
19+
ids_str_max = x_lengths - segment_size + 1
20+
ids_str = (torch.rand([b]).to(device=x.device) * ids_str_max).to(dtype=torch.long)
21+
ret = slice_segments(x, ids_str, segment_size)
22+
ret_pitch = slice_pitch_segments(pitch, ids_str, segment_size)
23+
return ret, ret_pitch, ids_str
724

825
def init_weights(m, mean=0.0, std=0.01):
926
classname = m.__class__.__name__
@@ -64,6 +81,16 @@ def rand_slice_segments(x, x_lengths=None, segment_size=4):
6481
return ret, ids_str
6582

6683

84+
def rand_spec_segments(x, x_lengths=None, segment_size=4):
85+
b, d, t = x.size()
86+
if x_lengths is None:
87+
x_lengths = t
88+
ids_str_max = x_lengths - segment_size
89+
ids_str = (torch.rand([b]).to(device=x.device) * ids_str_max).to(dtype=torch.long)
90+
ret = slice_segments(x, ids_str, segment_size)
91+
return ret, ids_str
92+
93+
6794
def get_timing_signal_1d(
6895
length, channels, min_timescale=1.0, max_timescale=1.0e4):
6996
position = torch.arange(length, dtype=torch.float)
Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,39 @@
11
{
22
"train": {
33
"log_interval": 200,
4-
"eval_interval": 2000,
4+
"eval_interval": 200,
55
"seed": 1234,
66
"epochs": 10000,
77
"learning_rate": 2e-4,
88
"betas": [0.8, 0.99],
99
"eps": 1e-9,
1010
"batch_size": 16,
11-
"fp16_run": true,
11+
"fp16_run": false,
1212
"lr_decay": 0.999875,
13-
"segment_size": 8192,
13+
"segment_size": 17920,
1414
"init_lr_ratio": 1,
1515
"warmup_epochs": 0,
1616
"c_mel": 45,
17-
"c_kl": 1.0
17+
"c_kl": 1.0,
18+
"use_sr": true,
19+
"max_speclen": 384,
20+
"port": "8001"
1821
},
1922
"data": {
20-
"training_files":"/content/drive/MyDrive/SingingVC/trainmul.txt",
21-
"validation_files":"/content/drive/MyDrive/SingingVC/valmul.txt",
22-
"text_cleaners":["english_cleaners2"],
23+
"training_files":"filelists/train.txt",
24+
"validation_files":"filelists/val.txt",
2325
"max_wav_value": 32768.0,
24-
"sampling_rate": 22050,
25-
"filter_length": 1024,
26-
"hop_length": 256,
27-
"win_length": 1024,
26+
"sampling_rate": 48000,
27+
"filter_length": 1280,
28+
"hop_length": 320,
29+
"win_length": 1280,
2830
"n_mel_channels": 80,
2931
"mel_fmin": 0.0,
30-
"mel_fmax": null,
31-
"add_blank": true,
32-
"n_speakers": 3,
33-
"cleaned_text": true
32+
"mel_fmax": null
3433
},
3534
"model": {
3635
"inter_channels": 192,
37-
"hidden_channels": 256,
36+
"hidden_channels": 192,
3837
"filter_channels": 768,
3938
"n_heads": 2,
4039
"n_layers": 6,
@@ -43,11 +42,17 @@
4342
"resblock": "1",
4443
"resblock_kernel_sizes": [3,7,11],
4544
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
46-
"upsample_rates": [8,8,2,2],
45+
"upsample_rates": [10,8,2,2],
4746
"upsample_initial_channel": 512,
4847
"upsample_kernel_sizes": [16,16,4,4],
4948
"n_layers_q": 3,
5049
"use_spectral_norm": false,
51-
"gin_channels": 256
50+
"gin_channels": 256,
51+
"ssl_dim": 256
52+
},
53+
"spk":{
54+
"nen": 0,
55+
"paimon": 1,
56+
"yunhao": 2
5257
}
5358
}

configs/nyarusing.json

Lines changed: 0 additions & 52 deletions
This file was deleted.

0 commit comments

Comments
 (0)