Merge pull request #37 from pythonlessons/develop

fixing dependency with librosa library
pythonlessons · Oct 30, 2023 · 5465413 · 5465413
2 parents 870aedc + 8686f95
commit 5465413
Show file tree

Hide file tree

Showing 5 changed files with 31 additions and 15 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## [1.1.6] - 2022-10-30
+### Changed
+- Fixed dependencies with `librosa` library
+
 ## [1.1.5] - 2022-10-17
 ### Changed
 - Fixed dependencies with `librosa` library

diff --git a/mltu/__init__.py b/mltu/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "1.1.5"
+__version__ = "1.1.6"
 
 from .annotations.images import Image
 from .annotations.images import CVImage

diff --git a/mltu/augmentors.py b/mltu/augmentors.py
@@ -6,6 +6,11 @@
 from . import Image
 from mltu.annotations.audio import Audio
 
+try:
+    import librosa
+except:
+    print("librosa not found. Please install it with `pip install librosa` if you plan to use it.")
+
 """ 
 Implemented image augmentors:
 - RandomBrightness
@@ -592,15 +597,14 @@ def __init__(
         self.max_n_steps = max_n_steps
 
         try:
-            import librosa
-            # samplerate
+            librosa.__version__
         except ImportError:
             raise ImportError("librosa is required to augment Audio. Please install it with `pip install librosa`.")
 
     def augment(self, audio: Audio) -> Audio:
         random_n_steps = np.random.randint(-self.max_n_steps, self.max_n_steps)
         # changing default res_type "kaiser_best" to "linear" for speed and memory efficiency
-        shift_audio = self.librosa.effects.pitch_shift(
+        shift_audio = librosa.effects.pitch_shift(
             audio.numpy(), sr=audio.sample_rate, n_steps=random_n_steps, res_type="linear"
             )
         audio.audio = shift_audio
@@ -631,13 +635,13 @@ def __init__(
         self.max_rate = max_rate
 
         try:
-            import librosa
+            librosa.__version__
         except ImportError:
             raise ImportError("librosa is required to augment Audio. Please install it with `pip install librosa`.")
 
     def augment(self, audio: Audio) -> Audio:
         random_rate = np.random.uniform(self.min_rate, self.max_rate)
-        stretch_audio = self.librosa.effects.time_stretch(audio.numpy(), rate=random_rate)
+        stretch_audio = librosa.effects.time_stretch(audio.numpy(), rate=random_rate)
         audio.audio = stretch_audio
 
         return audio
diff --git a/mltu/preprocessors.py b/mltu/preprocessors.py
@@ -5,6 +5,11 @@
 import matplotlib
 import logging
 
+try:
+    import librosa
+except:
+    print("librosa not found. Please install it with `pip install librosa` if you plan to use it.")
+
 from . import Image
 from mltu.annotations.audio import Audio
 
@@ -67,7 +72,6 @@ def __init__(
         self.logger.setLevel(log_level)
 
         try:
-            import librosa
             librosa.__version__
         except AttributeError:
             raise ImportError("librosa is required to read WAV files. Please install it with `pip install librosa`.")
@@ -89,7 +93,7 @@ def __call__(self, audio_path: str, label: typing.Any) -> typing.Tuple[np.ndarra
         else:
             raise TypeError(f"Audio {audio_path} is not a string.")
 
-        audio = Audio(audio_path, sample_rate=self.sample_rate, library=self.librosa)
+        audio = Audio(audio_path, sample_rate=self.sample_rate, library=librosa)
 
         if not audio.init_successful:
             audio = None
@@ -120,7 +124,6 @@ def __init__(
         matplotlib.interactive(False)
         # Check if librosa is installed
         try:
-            import librosa
             librosa.__version__
         except AttributeError:
             raise ImportError("librosa is required to read WAV files. Please install it with `pip install librosa`.")
@@ -139,12 +142,12 @@ def get_spectrogram(wav_path: str, frame_length: int, frame_step: int, fft_lengt
             np.ndarray: Spectrogram of the WAV file.
         """
         # Load the wav file and store the audio data in the variable 'audio' and the sample rate in 'orig_sr'
-        audio, orig_sr = WavReader.librosa.load(wav_path) 
+        audio, orig_sr = librosa.load(wav_path) 
 
         # Compute the Short Time Fourier Transform (STFT) of the audio data and store it in the variable 'spectrogram'
         # The STFT is computed with a hop length of 'frame_step' samples, a window length of 'frame_length' samples, and 'fft_length' FFT components.
         # The resulting spectrogram is also transposed for convenience
-        spectrogram = WavReader.librosa.stft(audio, hop_length=frame_step, win_length=frame_length, n_fft=fft_length).T
+        spectrogram = librosa.stft(audio, hop_length=frame_step, win_length=frame_length, n_fft=fft_length).T
 
         # Take the absolute value of the spectrogram to obtain the magnitude spectrum
         spectrogram = np.abs(spectrogram)
@@ -168,7 +171,7 @@ def plot_raw_audio(wav_path: str, title: str = None, sr: int = 16000) -> None:
             title (str, optional): Title
         """
         # Load the wav file and store the audio data in the variable 'audio' and the sample rate in 'orig_sr'
-        audio, orig_sr = WavReader.librosa.load(wav_path, sr=sr)
+        audio, orig_sr = librosa.load(wav_path, sr=sr)
 
         duration = len(audio) / orig_sr
 

diff --git a/mltu/transformers.py b/mltu/transformers.py
@@ -3,6 +3,11 @@
 import logging
 import numpy as np
 
+try:
+    import librosa
+except:
+    print("librosa not found. Please install it with `pip install librosa` if you plan to use it.")
+
 from . import Image
 from mltu.annotations.audio import Audio
 
@@ -231,7 +236,7 @@ def __call__(self, audio: Audio, label: typing.Any):
             if self.limit:
                 padded_audios = padded_audios[:, :self.max_audio_length]
 
-            return padded_audios, np.array(label)
+            return padded_audios, label
 
         audio_numpy = audio.numpy()
         # limit audio if it exceed max_audio_length
@@ -265,7 +270,7 @@ def __init__(
         self.fft_length = fft_length
 
         try:
-            import librosa
+            librosa.__version__
         except ImportError:
             raise ImportError("librosa is required to transform Audio. Please install it with `pip install librosa`.")
 
@@ -284,7 +289,7 @@ def __call__(self, audio: Audio, label: typing.Any):
         # Compute the Short Time Fourier Transform (STFT) of the audio data and store it in the variable 'spectrogram'
         # The STFT is computed with a hop length of 'frame_step' samples, a window length of 'frame_length' samples, and 'fft_length' FFT components.
         # The resulting spectrogram is also transposed for convenience
-        spectrogram = self.librosa.stft(audio.numpy(), hop_length=self.frame_step, win_length=self.frame_length, n_fft=self.fft_length).T
+        spectrogram = librosa.stft(audio.numpy(), hop_length=self.frame_step, win_length=self.frame_length, n_fft=self.fft_length).T
 
         # Take the absolute value of the spectrogram to obtain the magnitude spectrum
         spectrogram = np.abs(spectrogram)