diff --git a/lib/spec_utils.py b/lib/spec_utils.py index af34f70d..7d7a0432 100644 --- a/lib/spec_utils.py +++ b/lib/spec_utils.py @@ -27,8 +27,8 @@ def wave_to_spectrogram(wave, hop_length, n_fft): wave_left = np.asfortranarray(wave[0]) wave_right = np.asfortranarray(wave[1]) - spec_left = librosa.stft(wave_left, n_fft, hop_length=hop_length) - spec_right = librosa.stft(wave_right, n_fft, hop_length=hop_length) + spec_left = librosa.stft(wave_left, n_fft=n_fft, hop_length=hop_length) + spec_right = librosa.stft(wave_right, n_fft=n_fft, hop_length=hop_length) spec = np.asfortranarray([spec_left, spec_right]) return spec @@ -152,9 +152,9 @@ def cache_or_load(mix_path, inst_path, sr, hop_length, n_fft): y = np.load(inst_cache_path) else: X, _ = librosa.load( - mix_path, sr, False, dtype=np.float32, res_type='kaiser_fast') + mix_path, sr=sr, mono=False, dtype=np.float32, res_type='kaiser_fast') y, _ = librosa.load( - inst_path, sr, False, dtype=np.float32, res_type='kaiser_fast') + inst_path, sr=sr, mono=False, dtype=np.float32, res_type='kaiser_fast') X, y = align_wave_head_and_tail(X, y, sr) @@ -196,9 +196,9 @@ def spectrogram_to_wave(spec, hop_length=1024): ], axis=0) * 0.2 X, _ = librosa.load( - sys.argv[1], 44100, False, dtype=np.float32, res_type='kaiser_fast') + sys.argv[1], sr=44100, mono=False, dtype=np.float32, res_type='kaiser_fast') y, _ = librosa.load( - sys.argv[2], 44100, False, dtype=np.float32, res_type='kaiser_fast') + sys.argv[2], sr=44100, mono=False, dtype=np.float32, res_type='kaiser_fast') X, y = align_wave_head_and_tail(X, y, 44100) X_spec = wave_to_spectrogram(X, 1024, 2048) diff --git a/requirements.txt b/requirements.txt index b57ed4c6..ca7bf39c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,6 @@ pillow qrcode pygame; sys_platform != 'darwin' pygame==2.1.3; sys_platform == 'darwin' -futures psutil unidecode requests @@ -18,7 +17,7 @@ python-bidi flask-paginate==2021.10.29 torch>=1.5.1 tqdm>=4.30 -librosa>=0.6.3,<0.9 +librosa>=0.10 pip soundfile spec_utils diff --git a/vocal_splitter.py b/vocal_splitter.py index a8b68368..5054105a 100644 --- a/vocal_splitter.py +++ b/vocal_splitter.py @@ -120,7 +120,7 @@ def ffm_video2wav(input_fn, output_fn): def split_vocal_by_stereo(in_wav, out_wav_nonvocal, out_wav_vocal): try: # Create temporary filenames if not done yet - X, sr = librosa.load(in_wav, 44100, False, dtype = np.float32, res_type = 'kaiser_fast') + X, sr = librosa.load(in_wav, sr = 44100, mono = False, dtype = np.float32, res_type = 'kaiser_fast') if X.shape[0] < 2: return False if out_wav_nonvocal: @@ -134,7 +134,7 @@ def split_vocal_by_stereo(in_wav, out_wav_nonvocal, out_wav_vocal): def split_vocal_by_dnn(in_wav, out_wav_nonvocal, out_wav_vocal, args): print('Loading wave source ...', end = ' ', flush = True) - X, sr = librosa.load(in_wav, args.sr, False, dtype = np.float32, res_type = 'kaiser_fast') + X, sr = librosa.load(in_wav, sr = args.sr, mono = False, dtype = np.float32, res_type = 'kaiser_fast') print('done', flush = True) if X.ndim == 1: