Add files via upload

This commit is contained in:
RVC-Boss
2023-05-28 22:58:33 +08:00
committed by GitHub
parent 7789c46ded
commit f1730d42d4
4 changed files with 455 additions and 56 deletions

View File

@@ -2,7 +2,7 @@ import numpy as np, parselmouth, torch, pdb
from time import time as ttime
import torch.nn.functional as F
import scipy.signal as signal
import pyworld, os, traceback, faiss, librosa
import pyworld, os, traceback, faiss, librosa,torchcrepe
from scipy import signal
from functools import lru_cache
@@ -103,6 +103,27 @@ class VC(object):
f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
if filter_radius > 2:
f0 = signal.medfilt(f0, 3)
elif f0_method == "crepe":
model = "full"
# Pick a batch size that doesn't cause memory errors on your gpu
batch_size = 512
# Compute pitch using first gpu
audio = torch.tensor(np.copy(x))[None].float()
f0, pd = torchcrepe.predict(
audio,
self.sr,
self.window,
f0_min,
f0_max,
model,
batch_size=batch_size,
device=self.device,
return_periodicity=True,
)
pd = torchcrepe.filter.median(pd, 3)
f0 = torchcrepe.filter.mean(f0, 3)
f0[pd < 0.1] = 0
f0 = f0[0].cpu().numpy()
f0 *= pow(2, f0_up_key / 12)
# with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
tf0 = self.sr // self.window # 每秒f0点数
@@ -141,6 +162,7 @@ class VC(object):
big_npy,
index_rate,
version,
protect
): # ,file_index,file_big_npy
feats = torch.from_numpy(audio0)
if self.is_half:
@@ -162,7 +184,8 @@ class VC(object):
with torch.no_grad():
logits = model.extract_features(**inputs)
feats = model.final_proj(logits[0]) if version == "v1" else logits[0]
if(protect<0.5):
feats0=feats.clone()
if (
isinstance(index, type(None)) == False
and isinstance(big_npy, type(None)) == False
@@ -188,6 +211,8 @@ class VC(object):
)
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
if(protect<0.5):
feats0 = F.interpolate(feats0.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
t1 = ttime()
p_len = audio0.shape[0] // self.window
if feats.shape[1] < p_len:
@@ -195,6 +220,14 @@ class VC(object):
if pitch != None and pitchf != None:
pitch = pitch[:, :p_len]
pitchf = pitchf[:, :p_len]
if(protect<0.5):
pitchff = pitchf.clone()
pitchff[pitchf > 0] = 1
pitchff[pitchf < 1] = protect
pitchff = pitchff.unsqueeze(-1)
feats = feats * pitchff + feats0 * (1 - pitchff)
feats=feats.to(feats0.dtype)
p_len = torch.tensor([p_len], device=self.device).long()
with torch.no_grad():
if pitch != None and pitchf != None:
@@ -235,6 +268,7 @@ class VC(object):
resample_sr,
rms_mix_rate,
version,
protect,
f0_file=None,
):
if (
@@ -322,6 +356,7 @@ class VC(object):
big_npy,
index_rate,
version,
protect
)[self.t_pad_tgt : -self.t_pad_tgt]
)
else:
@@ -338,6 +373,7 @@ class VC(object):
big_npy,
index_rate,
version,
protect
)[self.t_pad_tgt : -self.t_pad_tgt]
)
s = t
@@ -355,6 +391,7 @@ class VC(object):
big_npy,
index_rate,
version,
protect
)[self.t_pad_tgt : -self.t_pad_tgt]
)
else:
@@ -371,6 +408,7 @@ class VC(object):
big_npy,
index_rate,
version,
protect
)[self.t_pad_tgt : -self.t_pad_tgt]
)
audio_opt = np.concatenate(audio_opt)