forked from 332plim/sovits_f0_infer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
65 lines (51 loc) · 2.34 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import io
import logging
from pathlib import Path
import soundfile
from sovits import infer_tool
from sovits import slicer
from sovits.infer_tool import Svc
logging.getLogger('numba').setLevel(logging.WARNING)
model_name = "354_epochs.pth" # 模型名称(pth文件夹下)
config_name = "config.json"
svc_model = Svc(f"./pth/{model_name}", f"./configs/{config_name}")
infer_tool.mkdir(["./raw", "./pth", "./results"])
# 支持多个wav文件,放在raw文件夹下
clean_names = ["十年"]
trans = [-3] # 音高调整,支持正负(半音)
id_list = [1] # 每次同时合成多序号音色
cut_time = 30
infer_tool.fill_a_to_b(trans, clean_names)
print("mis连续超过10%时,考虑升降半音\n")
# 清除缓存文件
for clean_name, tran in zip(clean_names, trans):
raw_audio_path = f"./raw/{clean_name}.wav"
svc_model.format_wav(raw_audio_path)
audio_data, audio_sr = slicer.cut(Path(raw_audio_path).with_suffix('.wav'))
audio = []
for spk_id in id_list:
var_list = []
mis_list = []
count = 0
for data in audio_data:
raw_path = io.BytesIO()
soundfile.write(raw_path, data, audio_sr, format="wav")
raw_path.seek(0)
out_audio, out_sr = svc_model.infer(spk_id, tran, raw_path)
# svc方式,仅支持模型内部音色互转,不建议使用
# out_audio, out_sr = svc_model.vc(2, spk_id, raw_path)
_audio = out_audio.cpu().numpy()
audio.extend(list(_audio))
out_path = io.BytesIO()
soundfile.write(out_path, _audio, svc_model.target_sample, format="wav")
raw_path.seek(0)
out_path.seek(0)
mistake, var = svc_model.calc_error(raw_path, out_path, tran)
mis_list.append(mistake)
var_list.append(var)
count += 1
print(f"{clean_name}: {round(100 * count / len(audio_data), 2)}% mis:{mistake} var:{var}")
print(
f"分段误差参考:0.3优秀,0.5左右合理,少量0.8-1可以接受\n若偏差过大,请调整升降半音数;多次调整均过大、说明超出歌手音域\n半音偏差:{mis_list}\n半音方差:{var_list}")
res_path = f'./results/{clean_name}_{tran}key_{svc_model.speakers[spk_id]}.wav'
soundfile.write(res_path, audio, svc_model.target_sample)