Reformat and rewrite _get_name_params (#57)

* Reformat

* rewrite _get_name_params

* Add workflow for automatic formatting

* Revert "Add workflow for automatic formatting"

This reverts commit 9111c5dbc1.

* revert Retrieval_based_Voice_Conversion_WebUI.ipynb

---------

Co-authored-by: 源文雨 <41315874+fumiama@users.noreply.github.com>
This commit is contained in:
Ftps
2023-04-15 20:44:24 +09:00
committed by GitHub
parent aaa893c4b1
commit c8261b2ccc
45 changed files with 4878 additions and 2456 deletions

View File

@@ -1,8 +1,9 @@
import os,librosa
import numpy as np
import soundfile as sf
import os, librosa
import numpy as np
import soundfile as sf
from tqdm import tqdm
import json,math ,hashlib
import json, math, hashlib
def crop_center(h1, h2):
h1_shape = h1.size()
@@ -11,7 +12,7 @@ def crop_center(h1, h2):
if h1_shape[3] == h2_shape[3]:
return h1
elif h1_shape[3] < h2_shape[3]:
raise ValueError('h1_shape[3] must be greater than h2_shape[3]')
raise ValueError("h1_shape[3] must be greater than h2_shape[3]")
# s_freq = (h2_shape[2] - h1_shape[2]) // 2
# e_freq = s_freq + h1_shape[2]
@@ -22,7 +23,9 @@ def crop_center(h1, h2):
return h1
def wave_to_spectrogram(wave, hop_length, n_fft, mid_side=False, mid_side_b2=False, reverse=False):
def wave_to_spectrogram(
wave, hop_length, n_fft, mid_side=False, mid_side_b2=False, reverse=False
):
if reverse:
wave_left = np.flip(np.asfortranarray(wave[0]))
wave_right = np.flip(np.asfortranarray(wave[1]))
@@ -30,21 +33,23 @@ def wave_to_spectrogram(wave, hop_length, n_fft, mid_side=False, mid_side_b2=Fal
wave_left = np.asfortranarray(np.add(wave[0], wave[1]) / 2)
wave_right = np.asfortranarray(np.subtract(wave[0], wave[1]))
elif mid_side_b2:
wave_left = np.asfortranarray(np.add(wave[1], wave[0] * .5))
wave_right = np.asfortranarray(np.subtract(wave[0], wave[1] * .5))
wave_left = np.asfortranarray(np.add(wave[1], wave[0] * 0.5))
wave_right = np.asfortranarray(np.subtract(wave[0], wave[1] * 0.5))
else:
wave_left = np.asfortranarray(wave[0])
wave_right = np.asfortranarray(wave[1])
spec_left = librosa.stft(wave_left, n_fft, hop_length=hop_length)
spec_right = librosa.stft(wave_right, n_fft, hop_length=hop_length)
spec = np.asfortranarray([spec_left, spec_right])
return spec
def wave_to_spectrogram_mt(wave, hop_length, n_fft, mid_side=False, mid_side_b2=False, reverse=False):
def wave_to_spectrogram_mt(
wave, hop_length, n_fft, mid_side=False, mid_side_b2=False, reverse=False
):
import threading
if reverse:
@@ -54,62 +59,75 @@ def wave_to_spectrogram_mt(wave, hop_length, n_fft, mid_side=False, mid_side_b2=
wave_left = np.asfortranarray(np.add(wave[0], wave[1]) / 2)
wave_right = np.asfortranarray(np.subtract(wave[0], wave[1]))
elif mid_side_b2:
wave_left = np.asfortranarray(np.add(wave[1], wave[0] * .5))
wave_right = np.asfortranarray(np.subtract(wave[0], wave[1] * .5))
wave_left = np.asfortranarray(np.add(wave[1], wave[0] * 0.5))
wave_right = np.asfortranarray(np.subtract(wave[0], wave[1] * 0.5))
else:
wave_left = np.asfortranarray(wave[0])
wave_right = np.asfortranarray(wave[1])
def run_thread(**kwargs):
global spec_left
spec_left = librosa.stft(**kwargs)
thread = threading.Thread(target=run_thread, kwargs={'y': wave_left, 'n_fft': n_fft, 'hop_length': hop_length})
thread = threading.Thread(
target=run_thread,
kwargs={"y": wave_left, "n_fft": n_fft, "hop_length": hop_length},
)
thread.start()
spec_right = librosa.stft(wave_right, n_fft, hop_length=hop_length)
thread.join()
thread.join()
spec = np.asfortranarray([spec_left, spec_right])
return spec
def combine_spectrograms(specs, mp):
l = min([specs[i].shape[2] for i in specs])
spec_c = np.zeros(shape=(2, mp.param['bins'] + 1, l), dtype=np.complex64)
l = min([specs[i].shape[2] for i in specs])
spec_c = np.zeros(shape=(2, mp.param["bins"] + 1, l), dtype=np.complex64)
offset = 0
bands_n = len(mp.param['band'])
bands_n = len(mp.param["band"])
for d in range(1, bands_n + 1):
h = mp.param['band'][d]['crop_stop'] - mp.param['band'][d]['crop_start']
spec_c[:, offset:offset+h, :l] = specs[d][:, mp.param['band'][d]['crop_start']:mp.param['band'][d]['crop_stop'], :l]
h = mp.param["band"][d]["crop_stop"] - mp.param["band"][d]["crop_start"]
spec_c[:, offset : offset + h, :l] = specs[d][
:, mp.param["band"][d]["crop_start"] : mp.param["band"][d]["crop_stop"], :l
]
offset += h
if offset > mp.param['bins']:
raise ValueError('Too much bins')
if offset > mp.param["bins"]:
raise ValueError("Too much bins")
# lowpass fiter
if mp.param['pre_filter_start'] > 0: # and mp.param['band'][bands_n]['res_type'] in ['scipy', 'polyphase']:
if (
mp.param["pre_filter_start"] > 0
): # and mp.param['band'][bands_n]['res_type'] in ['scipy', 'polyphase']:
if bands_n == 1:
spec_c = fft_lp_filter(spec_c, mp.param['pre_filter_start'], mp.param['pre_filter_stop'])
spec_c = fft_lp_filter(
spec_c, mp.param["pre_filter_start"], mp.param["pre_filter_stop"]
)
else:
gp = 1
for b in range(mp.param['pre_filter_start'] + 1, mp.param['pre_filter_stop']):
g = math.pow(10, -(b - mp.param['pre_filter_start']) * (3.5 - gp) / 20.0)
gp = 1
for b in range(
mp.param["pre_filter_start"] + 1, mp.param["pre_filter_stop"]
):
g = math.pow(
10, -(b - mp.param["pre_filter_start"]) * (3.5 - gp) / 20.0
)
gp = g
spec_c[:, b, :] *= g
return np.asfortranarray(spec_c)
def spectrogram_to_image(spec, mode='magnitude'):
if mode == 'magnitude':
return np.asfortranarray(spec_c)
def spectrogram_to_image(spec, mode="magnitude"):
if mode == "magnitude":
if np.iscomplexobj(spec):
y = np.abs(spec)
else:
y = spec
y = np.log10(y ** 2 + 1e-8)
elif mode == 'phase':
y = np.log10(y**2 + 1e-8)
elif mode == "phase":
if np.iscomplexobj(spec):
y = np.angle(spec)
else:
@@ -121,9 +139,7 @@ def spectrogram_to_image(spec, mode='magnitude'):
if y.ndim == 3:
img = img.transpose(1, 2, 0)
img = np.concatenate([
np.max(img, axis=2, keepdims=True), img
], axis=2)
img = np.concatenate([np.max(img, axis=2, keepdims=True), img], axis=2)
return img
@@ -136,12 +152,12 @@ def reduce_vocal_aggressively(X, y, softmask):
v_mask = v_mag_tmp > y_mag_tmp
y_mag = np.clip(y_mag_tmp - v_mag_tmp * v_mask * softmask, 0, np.inf)
return y_mag * np.exp(1.j * np.angle(y))
return y_mag * np.exp(1.0j * np.angle(y))
def mask_silence(mag, ref, thres=0.2, min_range=64, fade_size=32):
if min_range < fade_size * 2:
raise ValueError('min_range must be >= fade_area * 2')
raise ValueError("min_range must be >= fade_area * 2")
mag = mag.copy()
@@ -159,72 +175,106 @@ def mask_silence(mag, ref, thres=0.2, min_range=64, fade_size=32):
if s != 0:
weight = np.linspace(0, 1, fade_size)
mag[:, :, s:s + fade_size] += weight * ref[:, :, s:s + fade_size]
mag[:, :, s : s + fade_size] += weight * ref[:, :, s : s + fade_size]
else:
s -= fade_size
if e != mag.shape[2]:
weight = np.linspace(1, 0, fade_size)
mag[:, :, e - fade_size:e] += weight * ref[:, :, e - fade_size:e]
mag[:, :, e - fade_size : e] += weight * ref[:, :, e - fade_size : e]
else:
e += fade_size
mag[:, :, s + fade_size:e - fade_size] += ref[:, :, s + fade_size:e - fade_size]
mag[:, :, s + fade_size : e - fade_size] += ref[
:, :, s + fade_size : e - fade_size
]
old_e = e
return mag
def align_wave_head_and_tail(a, b):
l = min([a[0].size, b[0].size])
return a[:l,:l], b[:l,:l]
l = min([a[0].size, b[0].size])
return a[:l, :l], b[:l, :l]
def cache_or_load(mix_path, inst_path, mp):
mix_basename = os.path.splitext(os.path.basename(mix_path))[0]
inst_basename = os.path.splitext(os.path.basename(inst_path))[0]
cache_dir = 'mph{}'.format(hashlib.sha1(json.dumps(mp.param, sort_keys=True).encode('utf-8')).hexdigest())
mix_cache_dir = os.path.join('cache', cache_dir)
inst_cache_dir = os.path.join('cache', cache_dir)
cache_dir = "mph{}".format(
hashlib.sha1(json.dumps(mp.param, sort_keys=True).encode("utf-8")).hexdigest()
)
mix_cache_dir = os.path.join("cache", cache_dir)
inst_cache_dir = os.path.join("cache", cache_dir)
os.makedirs(mix_cache_dir, exist_ok=True)
os.makedirs(inst_cache_dir, exist_ok=True)
mix_cache_path = os.path.join(mix_cache_dir, mix_basename + '.npy')
inst_cache_path = os.path.join(inst_cache_dir, inst_basename + '.npy')
mix_cache_path = os.path.join(mix_cache_dir, mix_basename + ".npy")
inst_cache_path = os.path.join(inst_cache_dir, inst_basename + ".npy")
if os.path.exists(mix_cache_path) and os.path.exists(inst_cache_path):
X_spec_m = np.load(mix_cache_path)
y_spec_m = np.load(inst_cache_path)
else:
X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {}
for d in range(len(mp.param['band']), 0, -1):
bp = mp.param['band'][d]
if d == len(mp.param['band']): # high-end band
for d in range(len(mp.param["band"]), 0, -1):
bp = mp.param["band"][d]
if d == len(mp.param["band"]): # high-end band
X_wave[d], _ = librosa.load(
mix_path, bp['sr'], False, dtype=np.float32, res_type=bp['res_type'])
mix_path, bp["sr"], False, dtype=np.float32, res_type=bp["res_type"]
)
y_wave[d], _ = librosa.load(
inst_path, bp['sr'], False, dtype=np.float32, res_type=bp['res_type'])
else: # lower bands
X_wave[d] = librosa.resample(X_wave[d+1], mp.param['band'][d+1]['sr'], bp['sr'], res_type=bp['res_type'])
y_wave[d] = librosa.resample(y_wave[d+1], mp.param['band'][d+1]['sr'], bp['sr'], res_type=bp['res_type'])
inst_path,
bp["sr"],
False,
dtype=np.float32,
res_type=bp["res_type"],
)
else: # lower bands
X_wave[d] = librosa.resample(
X_wave[d + 1],
mp.param["band"][d + 1]["sr"],
bp["sr"],
res_type=bp["res_type"],
)
y_wave[d] = librosa.resample(
y_wave[d + 1],
mp.param["band"][d + 1]["sr"],
bp["sr"],
res_type=bp["res_type"],
)
X_wave[d], y_wave[d] = align_wave_head_and_tail(X_wave[d], y_wave[d])
X_spec_s[d] = wave_to_spectrogram(X_wave[d], bp['hl'], bp['n_fft'], mp.param['mid_side'], mp.param['mid_side_b2'], mp.param['reverse'])
y_spec_s[d] = wave_to_spectrogram(y_wave[d], bp['hl'], bp['n_fft'], mp.param['mid_side'], mp.param['mid_side_b2'], mp.param['reverse'])
X_spec_s[d] = wave_to_spectrogram(
X_wave[d],
bp["hl"],
bp["n_fft"],
mp.param["mid_side"],
mp.param["mid_side_b2"],
mp.param["reverse"],
)
y_spec_s[d] = wave_to_spectrogram(
y_wave[d],
bp["hl"],
bp["n_fft"],
mp.param["mid_side"],
mp.param["mid_side_b2"],
mp.param["reverse"],
)
del X_wave, y_wave
X_spec_m = combine_spectrograms(X_spec_s, mp)
y_spec_m = combine_spectrograms(y_spec_s, mp)
if X_spec_m.shape != y_spec_m.shape:
raise ValueError('The combined spectrograms are different: ' + mix_path)
raise ValueError("The combined spectrograms are different: " + mix_path)
_, ext = os.path.splitext(mix_path)
@@ -244,72 +294,129 @@ def spectrogram_to_wave(spec, hop_length, mid_side, mid_side_b2, reverse):
if reverse:
return np.asfortranarray([np.flip(wave_left), np.flip(wave_right)])
elif mid_side:
return np.asfortranarray([np.add(wave_left, wave_right / 2), np.subtract(wave_left, wave_right / 2)])
return np.asfortranarray(
[np.add(wave_left, wave_right / 2), np.subtract(wave_left, wave_right / 2)]
)
elif mid_side_b2:
return np.asfortranarray([np.add(wave_right / 1.25, .4 * wave_left), np.subtract(wave_left / 1.25, .4 * wave_right)])
return np.asfortranarray(
[
np.add(wave_right / 1.25, 0.4 * wave_left),
np.subtract(wave_left / 1.25, 0.4 * wave_right),
]
)
else:
return np.asfortranarray([wave_left, wave_right])
def spectrogram_to_wave_mt(spec, hop_length, mid_side, reverse, mid_side_b2):
import threading
spec_left = np.asfortranarray(spec[0])
spec_right = np.asfortranarray(spec[1])
def run_thread(**kwargs):
global wave_left
wave_left = librosa.istft(**kwargs)
thread = threading.Thread(target=run_thread, kwargs={'stft_matrix': spec_left, 'hop_length': hop_length})
thread = threading.Thread(
target=run_thread, kwargs={"stft_matrix": spec_left, "hop_length": hop_length}
)
thread.start()
wave_right = librosa.istft(spec_right, hop_length=hop_length)
thread.join()
thread.join()
if reverse:
return np.asfortranarray([np.flip(wave_left), np.flip(wave_right)])
elif mid_side:
return np.asfortranarray([np.add(wave_left, wave_right / 2), np.subtract(wave_left, wave_right / 2)])
return np.asfortranarray(
[np.add(wave_left, wave_right / 2), np.subtract(wave_left, wave_right / 2)]
)
elif mid_side_b2:
return np.asfortranarray([np.add(wave_right / 1.25, .4 * wave_left), np.subtract(wave_left / 1.25, .4 * wave_right)])
return np.asfortranarray(
[
np.add(wave_right / 1.25, 0.4 * wave_left),
np.subtract(wave_left / 1.25, 0.4 * wave_right),
]
)
else:
return np.asfortranarray([wave_left, wave_right])
def cmb_spectrogram_to_wave(spec_m, mp, extra_bins_h=None, extra_bins=None):
wave_band = {}
bands_n = len(mp.param['band'])
bands_n = len(mp.param["band"])
offset = 0
for d in range(1, bands_n + 1):
bp = mp.param['band'][d]
spec_s = np.ndarray(shape=(2, bp['n_fft'] // 2 + 1, spec_m.shape[2]), dtype=complex)
h = bp['crop_stop'] - bp['crop_start']
spec_s[:, bp['crop_start']:bp['crop_stop'], :] = spec_m[:, offset:offset+h, :]
bp = mp.param["band"][d]
spec_s = np.ndarray(
shape=(2, bp["n_fft"] // 2 + 1, spec_m.shape[2]), dtype=complex
)
h = bp["crop_stop"] - bp["crop_start"]
spec_s[:, bp["crop_start"] : bp["crop_stop"], :] = spec_m[
:, offset : offset + h, :
]
offset += h
if d == bands_n: # higher
if extra_bins_h: # if --high_end_process bypass
max_bin = bp['n_fft'] // 2
spec_s[:, max_bin-extra_bins_h:max_bin, :] = extra_bins[:, :extra_bins_h, :]
if bp['hpf_start'] > 0:
spec_s = fft_hp_filter(spec_s, bp['hpf_start'], bp['hpf_stop'] - 1)
if d == bands_n: # higher
if extra_bins_h: # if --high_end_process bypass
max_bin = bp["n_fft"] // 2
spec_s[:, max_bin - extra_bins_h : max_bin, :] = extra_bins[
:, :extra_bins_h, :
]
if bp["hpf_start"] > 0:
spec_s = fft_hp_filter(spec_s, bp["hpf_start"], bp["hpf_stop"] - 1)
if bands_n == 1:
wave = spectrogram_to_wave(spec_s, bp['hl'], mp.param['mid_side'], mp.param['mid_side_b2'], mp.param['reverse'])
wave = spectrogram_to_wave(
spec_s,
bp["hl"],
mp.param["mid_side"],
mp.param["mid_side_b2"],
mp.param["reverse"],
)
else:
wave = np.add(wave, spectrogram_to_wave(spec_s, bp['hl'], mp.param['mid_side'], mp.param['mid_side_b2'], mp.param['reverse']))
wave = np.add(
wave,
spectrogram_to_wave(
spec_s,
bp["hl"],
mp.param["mid_side"],
mp.param["mid_side_b2"],
mp.param["reverse"],
),
)
else:
sr = mp.param['band'][d+1]['sr']
if d == 1: # lower
spec_s = fft_lp_filter(spec_s, bp['lpf_start'], bp['lpf_stop'])
wave = librosa.resample(spectrogram_to_wave(spec_s, bp['hl'], mp.param['mid_side'], mp.param['mid_side_b2'], mp.param['reverse']), bp['sr'], sr, res_type="sinc_fastest")
else: # mid
spec_s = fft_hp_filter(spec_s, bp['hpf_start'], bp['hpf_stop'] - 1)
spec_s = fft_lp_filter(spec_s, bp['lpf_start'], bp['lpf_stop'])
wave2 = np.add(wave, spectrogram_to_wave(spec_s, bp['hl'], mp.param['mid_side'], mp.param['mid_side_b2'], mp.param['reverse']))
sr = mp.param["band"][d + 1]["sr"]
if d == 1: # lower
spec_s = fft_lp_filter(spec_s, bp["lpf_start"], bp["lpf_stop"])
wave = librosa.resample(
spectrogram_to_wave(
spec_s,
bp["hl"],
mp.param["mid_side"],
mp.param["mid_side_b2"],
mp.param["reverse"],
),
bp["sr"],
sr,
res_type="sinc_fastest",
)
else: # mid
spec_s = fft_hp_filter(spec_s, bp["hpf_start"], bp["hpf_stop"] - 1)
spec_s = fft_lp_filter(spec_s, bp["lpf_start"], bp["lpf_stop"])
wave2 = np.add(
wave,
spectrogram_to_wave(
spec_s,
bp["hl"],
mp.param["mid_side"],
mp.param["mid_side_b2"],
mp.param["reverse"],
),
)
# wave = librosa.core.resample(wave2, bp['sr'], sr, res_type="sinc_fastest")
wave = librosa.core.resample(wave2, bp['sr'], sr,res_type='scipy')
wave = librosa.core.resample(wave2, bp["sr"], sr, res_type="scipy")
return wave.T
@@ -318,7 +425,7 @@ def fft_lp_filter(spec, bin_start, bin_stop):
for b in range(bin_start, bin_stop):
g -= 1 / (bin_stop - bin_start)
spec[:, b, :] = g * spec[:, b, :]
spec[:, bin_stop:, :] *= 0
return spec
@@ -329,42 +436,69 @@ def fft_hp_filter(spec, bin_start, bin_stop):
for b in range(bin_start, bin_stop, -1):
g -= 1 / (bin_start - bin_stop)
spec[:, b, :] = g * spec[:, b, :]
spec[:, 0:bin_stop+1, :] *= 0
spec[:, 0 : bin_stop + 1, :] *= 0
return spec
def mirroring(a, spec_m, input_high_end, mp):
if 'mirroring' == a:
mirror = np.flip(np.abs(spec_m[:, mp.param['pre_filter_start']-10-input_high_end.shape[1]:mp.param['pre_filter_start']-10, :]), 1)
mirror = mirror * np.exp(1.j * np.angle(input_high_end))
return np.where(np.abs(input_high_end) <= np.abs(mirror), input_high_end, mirror)
if 'mirroring2' == a:
mirror = np.flip(np.abs(spec_m[:, mp.param['pre_filter_start']-10-input_high_end.shape[1]:mp.param['pre_filter_start']-10, :]), 1)
if "mirroring" == a:
mirror = np.flip(
np.abs(
spec_m[
:,
mp.param["pre_filter_start"]
- 10
- input_high_end.shape[1] : mp.param["pre_filter_start"]
- 10,
:,
]
),
1,
)
mirror = mirror * np.exp(1.0j * np.angle(input_high_end))
return np.where(
np.abs(input_high_end) <= np.abs(mirror), input_high_end, mirror
)
if "mirroring2" == a:
mirror = np.flip(
np.abs(
spec_m[
:,
mp.param["pre_filter_start"]
- 10
- input_high_end.shape[1] : mp.param["pre_filter_start"]
- 10,
:,
]
),
1,
)
mi = np.multiply(mirror, input_high_end * 1.7)
return np.where(np.abs(input_high_end) <= np.abs(mi), input_high_end, mi)
def ensembling(a, specs):
def ensembling(a, specs):
for i in range(1, len(specs)):
if i == 1:
spec = specs[0]
ln = min([spec.shape[2], specs[i].shape[2]])
spec = spec[:,:,:ln]
specs[i] = specs[i][:,:,:ln]
spec = spec[:, :, :ln]
specs[i] = specs[i][:, :, :ln]
if 'min_mag' == a:
if "min_mag" == a:
spec = np.where(np.abs(specs[i]) <= np.abs(spec), specs[i], spec)
if 'max_mag' == a:
spec = np.where(np.abs(specs[i]) >= np.abs(spec), specs[i], spec)
if "max_mag" == a:
spec = np.where(np.abs(specs[i]) >= np.abs(spec), specs[i], spec)
return spec
def stft(wave, nfft, hl):
wave_left = np.asfortranarray(wave[0])
wave_right = np.asfortranarray(wave[1])
@@ -374,6 +508,7 @@ def stft(wave, nfft, hl):
return spec
def istft(spec, hl):
spec_left = np.asfortranarray(spec[0])
spec_right = np.asfortranarray(spec[1])
@@ -389,62 +524,94 @@ if __name__ == "__main__":
import time
import argparse
from model_param_init import ModelParameters
p = argparse.ArgumentParser()
p.add_argument('--algorithm', '-a', type=str, choices=['invert', 'invert_p', 'min_mag', 'max_mag', 'deep', 'align'], default='min_mag')
p.add_argument('--model_params', '-m', type=str, default=os.path.join('modelparams', '1band_sr44100_hl512.json'))
p.add_argument('--output_name', '-o', type=str, default='output')
p.add_argument('--vocals_only', '-v', action='store_true')
p.add_argument('input', nargs='+')
p.add_argument(
"--algorithm",
"-a",
type=str,
choices=["invert", "invert_p", "min_mag", "max_mag", "deep", "align"],
default="min_mag",
)
p.add_argument(
"--model_params",
"-m",
type=str,
default=os.path.join("modelparams", "1band_sr44100_hl512.json"),
)
p.add_argument("--output_name", "-o", type=str, default="output")
p.add_argument("--vocals_only", "-v", action="store_true")
p.add_argument("input", nargs="+")
args = p.parse_args()
start_time = time.time()
if args.algorithm.startswith('invert') and len(args.input) != 2:
raise ValueError('There should be two input files.')
if not args.algorithm.startswith('invert') and len(args.input) < 2:
raise ValueError('There must be at least two input files.')
if args.algorithm.startswith("invert") and len(args.input) != 2:
raise ValueError("There should be two input files.")
if not args.algorithm.startswith("invert") and len(args.input) < 2:
raise ValueError("There must be at least two input files.")
wave, specs = {}, {}
mp = ModelParameters(args.model_params)
for i in range(len(args.input)):
for i in range(len(args.input)):
spec = {}
for d in range(len(mp.param['band']), 0, -1):
bp = mp.param['band'][d]
if d == len(mp.param['band']): # high-end band
for d in range(len(mp.param["band"]), 0, -1):
bp = mp.param["band"][d]
if d == len(mp.param["band"]): # high-end band
wave[d], _ = librosa.load(
args.input[i], bp['sr'], False, dtype=np.float32, res_type=bp['res_type'])
if len(wave[d].shape) == 1: # mono to stereo
args.input[i],
bp["sr"],
False,
dtype=np.float32,
res_type=bp["res_type"],
)
if len(wave[d].shape) == 1: # mono to stereo
wave[d] = np.array([wave[d], wave[d]])
else: # lower bands
wave[d] = librosa.resample(wave[d+1], mp.param['band'][d+1]['sr'], bp['sr'], res_type=bp['res_type'])
spec[d] = wave_to_spectrogram(wave[d], bp['hl'], bp['n_fft'], mp.param['mid_side'], mp.param['mid_side_b2'], mp.param['reverse'])
else: # lower bands
wave[d] = librosa.resample(
wave[d + 1],
mp.param["band"][d + 1]["sr"],
bp["sr"],
res_type=bp["res_type"],
)
spec[d] = wave_to_spectrogram(
wave[d],
bp["hl"],
bp["n_fft"],
mp.param["mid_side"],
mp.param["mid_side_b2"],
mp.param["reverse"],
)
specs[i] = combine_spectrograms(spec, mp)
del wave
if args.algorithm == 'deep':
if args.algorithm == "deep":
d_spec = np.where(np.abs(specs[0]) <= np.abs(spec[1]), specs[0], spec[1])
v_spec = d_spec - specs[1]
sf.write(os.path.join('{}.wav'.format(args.output_name)), cmb_spectrogram_to_wave(v_spec, mp), mp.param['sr'])
if args.algorithm.startswith('invert'):
sf.write(
os.path.join("{}.wav".format(args.output_name)),
cmb_spectrogram_to_wave(v_spec, mp),
mp.param["sr"],
)
if args.algorithm.startswith("invert"):
ln = min([specs[0].shape[2], specs[1].shape[2]])
specs[0] = specs[0][:,:,:ln]
specs[1] = specs[1][:,:,:ln]
if 'invert_p' == args.algorithm:
specs[0] = specs[0][:, :, :ln]
specs[1] = specs[1][:, :, :ln]
if "invert_p" == args.algorithm:
X_mag = np.abs(specs[0])
y_mag = np.abs(specs[1])
max_mag = np.where(X_mag >= y_mag, X_mag, y_mag)
v_spec = specs[1] - max_mag * np.exp(1.j * np.angle(specs[0]))
y_mag = np.abs(specs[1])
max_mag = np.where(X_mag >= y_mag, X_mag, y_mag)
v_spec = specs[1] - max_mag * np.exp(1.0j * np.angle(specs[0]))
else:
specs[1] = reduce_vocal_aggressively(specs[0], specs[1], 0.2)
v_spec = specs[0] - specs[1]
@@ -458,28 +625,43 @@ if __name__ == "__main__":
y_image = spectrogram_to_image(y_mag)
v_image = spectrogram_to_image(v_mag)
cv2.imwrite('{}_X.png'.format(args.output_name), X_image)
cv2.imwrite('{}_y.png'.format(args.output_name), y_image)
cv2.imwrite('{}_v.png'.format(args.output_name), v_image)
sf.write('{}_X.wav'.format(args.output_name), cmb_spectrogram_to_wave(specs[0], mp), mp.param['sr'])
sf.write('{}_y.wav'.format(args.output_name), cmb_spectrogram_to_wave(specs[1], mp), mp.param['sr'])
sf.write('{}_v.wav'.format(args.output_name), cmb_spectrogram_to_wave(v_spec, mp), mp.param['sr'])
else:
if not args.algorithm == 'deep':
sf.write(os.path.join('ensembled','{}.wav'.format(args.output_name)), cmb_spectrogram_to_wave(ensembling(args.algorithm, specs), mp), mp.param['sr'])
cv2.imwrite("{}_X.png".format(args.output_name), X_image)
cv2.imwrite("{}_y.png".format(args.output_name), y_image)
cv2.imwrite("{}_v.png".format(args.output_name), v_image)
if args.algorithm == 'align':
sf.write(
"{}_X.wav".format(args.output_name),
cmb_spectrogram_to_wave(specs[0], mp),
mp.param["sr"],
)
sf.write(
"{}_y.wav".format(args.output_name),
cmb_spectrogram_to_wave(specs[1], mp),
mp.param["sr"],
)
sf.write(
"{}_v.wav".format(args.output_name),
cmb_spectrogram_to_wave(v_spec, mp),
mp.param["sr"],
)
else:
if not args.algorithm == "deep":
sf.write(
os.path.join("ensembled", "{}.wav".format(args.output_name)),
cmb_spectrogram_to_wave(ensembling(args.algorithm, specs), mp),
mp.param["sr"],
)
if args.algorithm == "align":
trackalignment = [
{
'file1':'"{}"'.format(args.input[0]),
'file2':'"{}"'.format(args.input[1])
"file1": '"{}"'.format(args.input[0]),
"file2": '"{}"'.format(args.input[1]),
}
]
for i,e in tqdm(enumerate(trackalignment), desc="Performing Alignment..."):
for i, e in tqdm(enumerate(trackalignment), desc="Performing Alignment..."):
os.system(f"python lib/align_tracks.py {e['file1']} {e['file2']}")
#print('Total time: {0:.{1}f}s'.format(time.time() - start_time, 1))
# print('Total time: {0:.{1}f}s'.format(time.time() - start_time, 1))