From 30dba18425ed44ec5082e88cda2badae1b03baaa Mon Sep 17 00:00:00 2001
From: mrbraden56 <bradenlock83@gmail.com>
Date: Sat, 25 Jun 2022 08:21:52 -0500
Subject: [PATCH] Replace sf.read with librosa.load to load correct sr based on
 config file sr specified

---
 audiolib.py                |  9 +++++----
 noisyspeech_synthesizer.py | 10 +++++-----
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/audiolib.py b/audiolib.py
index cd999efe..3b526bc4 100644
--- a/audiolib.py
+++ b/audiolib.py
@@ -7,14 +7,15 @@
 import soundfile as sf
 import os
 import numpy as np
+import librosa
 
 # Function to read audio
-def audioread(path, norm = True, start=0, stop=None):
+def audioread(path, norm = True, sr=16000):
     path = os.path.abspath(path)
     if not os.path.exists(path):
         raise ValueError("[{}] does not exist!".format(path))
     try:
-        x, sr = sf.read(path, start=start, stop=stop)
+        x, sr = librosa.load(path, sr=sr)
     except RuntimeError:  # fix for sph pcm-embedded shortened v2
         print('WARNING: Audio type not supported')
 
@@ -47,8 +48,8 @@ def audiowrite(data, fs, destpath, norm=False):
     
     if not os.path.exists(destdir):
         os.makedirs(destdir)
-    
-    sf.write(destpath, data, fs)
+
+    sf.write(destpath, data, int(fs))
     return
 
 # Function to mix clean speech and noise at various SNR levels
diff --git a/noisyspeech_synthesizer.py b/noisyspeech_synthesizer.py
index 86c136e1..f36c42a9 100644
--- a/noisyspeech_synthesizer.py
+++ b/noisyspeech_synthesizer.py
@@ -19,7 +19,7 @@ def main(cfg):
         clean_dir = cfg["speech_dir"]
     if not os.path.exists(clean_dir):
         assert False, ("Clean speech data is required")
-    
+
     noise_dir = os.path.join(os.path.dirname(__file__), 'noise_train')
     if cfg["noise_dir"]!='None':
         noise_dir = cfg["noise_dir"]
@@ -59,7 +59,7 @@ def main(cfg):
     
     while num_samples < total_samples:
         idx_s = np.random.randint(0, np.size(cleanfilenames))
-        clean, fs = audioread(cleanfilenames[idx_s])
+        clean, fs = audioread(cleanfilenames[idx_s], sr=fs)
         
         if len(clean)>audio_length:
             clean = clean
@@ -70,12 +70,12 @@ def main(cfg):
                 idx_s = idx_s + 1
                 if idx_s >= np.size(cleanfilenames)-1:
                     idx_s = np.random.randint(0, np.size(cleanfilenames)) 
-                newclean, fs = audioread(cleanfilenames[idx_s])
+                newclean, fs = audioread(cleanfilenames[idx_s], sr=fs)
                 cleanconcat = np.append(clean, np.zeros(int(fs*silence_length)))
                 clean = np.append(cleanconcat, newclean)
     
         idx_n = np.random.randint(0, np.size(noisefilenames))
-        noise, fs = audioread(noisefilenames[idx_n])
+        noise, fs = audioread(noisefilenames[idx_n], sr=fs)
         
         if len(noise)>=len(clean):
             noise = noise[0:len(clean)]
@@ -86,7 +86,7 @@ def main(cfg):
                 idx_n = idx_n + 1
                 if idx_n >= np.size(noisefilenames)-1:
                     idx_n = np.random.randint(0, np.size(noisefilenames))
-                newnoise, fs = audioread(noisefilenames[idx_n])
+                newnoise, fs = audioread(noisefilenames[idx_n], sr=fs)
                 noiseconcat = np.append(noise, np.zeros(int(fs*silence_length)))
                 noise = np.append(noiseconcat, newnoise)
         noise = noise[0:len(clean)]