From 30dba18425ed44ec5082e88cda2badae1b03baaa Mon Sep 17 00:00:00 2001 From: mrbraden56 Date: Sat, 25 Jun 2022 08:21:52 -0500 Subject: [PATCH] Replace sf.read with librosa.load to load correct sr based on config file sr specified --- audiolib.py | 9 +++++---- noisyspeech_synthesizer.py | 10 +++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/audiolib.py b/audiolib.py index cd999efe..3b526bc4 100644 --- a/audiolib.py +++ b/audiolib.py @@ -7,14 +7,15 @@ import soundfile as sf import os import numpy as np +import librosa # Function to read audio -def audioread(path, norm = True, start=0, stop=None): +def audioread(path, norm = True, sr=16000): path = os.path.abspath(path) if not os.path.exists(path): raise ValueError("[{}] does not exist!".format(path)) try: - x, sr = sf.read(path, start=start, stop=stop) + x, sr = librosa.load(path, sr=sr) except RuntimeError: # fix for sph pcm-embedded shortened v2 print('WARNING: Audio type not supported') @@ -47,8 +48,8 @@ def audiowrite(data, fs, destpath, norm=False): if not os.path.exists(destdir): os.makedirs(destdir) - - sf.write(destpath, data, fs) + + sf.write(destpath, data, int(fs)) return # Function to mix clean speech and noise at various SNR levels diff --git a/noisyspeech_synthesizer.py b/noisyspeech_synthesizer.py index 86c136e1..f36c42a9 100644 --- a/noisyspeech_synthesizer.py +++ b/noisyspeech_synthesizer.py @@ -19,7 +19,7 @@ def main(cfg): clean_dir = cfg["speech_dir"] if not os.path.exists(clean_dir): assert False, ("Clean speech data is required") - + noise_dir = os.path.join(os.path.dirname(__file__), 'noise_train') if cfg["noise_dir"]!='None': noise_dir = cfg["noise_dir"] @@ -59,7 +59,7 @@ def main(cfg): while num_samples < total_samples: idx_s = np.random.randint(0, np.size(cleanfilenames)) - clean, fs = audioread(cleanfilenames[idx_s]) + clean, fs = audioread(cleanfilenames[idx_s], sr=fs) if len(clean)>audio_length: clean = clean @@ -70,12 +70,12 @@ def main(cfg): idx_s = idx_s + 1 if idx_s >= np.size(cleanfilenames)-1: idx_s = np.random.randint(0, np.size(cleanfilenames)) - newclean, fs = audioread(cleanfilenames[idx_s]) + newclean, fs = audioread(cleanfilenames[idx_s], sr=fs) cleanconcat = np.append(clean, np.zeros(int(fs*silence_length))) clean = np.append(cleanconcat, newclean) idx_n = np.random.randint(0, np.size(noisefilenames)) - noise, fs = audioread(noisefilenames[idx_n]) + noise, fs = audioread(noisefilenames[idx_n], sr=fs) if len(noise)>=len(clean): noise = noise[0:len(clean)] @@ -86,7 +86,7 @@ def main(cfg): idx_n = idx_n + 1 if idx_n >= np.size(noisefilenames)-1: idx_n = np.random.randint(0, np.size(noisefilenames)) - newnoise, fs = audioread(noisefilenames[idx_n]) + newnoise, fs = audioread(noisefilenames[idx_n], sr=fs) noiseconcat = np.append(noise, np.zeros(int(fs*silence_length))) noise = np.append(noiseconcat, newnoise) noise = noise[0:len(clean)]