Skip to content

Commit

Permalink
Merge pull request #37 from pythonlessons/develop
Browse files Browse the repository at this point in the history
fixing dependency with librosa library
  • Loading branch information
pythonlessons authored Oct 30, 2023
2 parents 870aedc + 8686f95 commit 5465413
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 15 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## [1.1.6] - 2022-10-30
### Changed
- Fixed dependencies with `librosa` library

## [1.1.5] - 2022-10-17
### Changed
- Fixed dependencies with `librosa` library
Expand Down
2 changes: 1 addition & 1 deletion mltu/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "1.1.5"
__version__ = "1.1.6"

from .annotations.images import Image
from .annotations.images import CVImage
Expand Down
14 changes: 9 additions & 5 deletions mltu/augmentors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
from . import Image
from mltu.annotations.audio import Audio

try:
import librosa
except:
print("librosa not found. Please install it with `pip install librosa` if you plan to use it.")

"""
Implemented image augmentors:
- RandomBrightness
Expand Down Expand Up @@ -592,15 +597,14 @@ def __init__(
self.max_n_steps = max_n_steps

try:
import librosa
# samplerate
librosa.__version__
except ImportError:
raise ImportError("librosa is required to augment Audio. Please install it with `pip install librosa`.")

def augment(self, audio: Audio) -> Audio:
random_n_steps = np.random.randint(-self.max_n_steps, self.max_n_steps)
# changing default res_type "kaiser_best" to "linear" for speed and memory efficiency
shift_audio = self.librosa.effects.pitch_shift(
shift_audio = librosa.effects.pitch_shift(
audio.numpy(), sr=audio.sample_rate, n_steps=random_n_steps, res_type="linear"
)
audio.audio = shift_audio
Expand Down Expand Up @@ -631,13 +635,13 @@ def __init__(
self.max_rate = max_rate

try:
import librosa
librosa.__version__
except ImportError:
raise ImportError("librosa is required to augment Audio. Please install it with `pip install librosa`.")

def augment(self, audio: Audio) -> Audio:
random_rate = np.random.uniform(self.min_rate, self.max_rate)
stretch_audio = self.librosa.effects.time_stretch(audio.numpy(), rate=random_rate)
stretch_audio = librosa.effects.time_stretch(audio.numpy(), rate=random_rate)
audio.audio = stretch_audio

return audio
15 changes: 9 additions & 6 deletions mltu/preprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
import matplotlib
import logging

try:
import librosa
except:
print("librosa not found. Please install it with `pip install librosa` if you plan to use it.")

from . import Image
from mltu.annotations.audio import Audio

Expand Down Expand Up @@ -67,7 +72,6 @@ def __init__(
self.logger.setLevel(log_level)

try:
import librosa
librosa.__version__
except AttributeError:
raise ImportError("librosa is required to read WAV files. Please install it with `pip install librosa`.")
Expand All @@ -89,7 +93,7 @@ def __call__(self, audio_path: str, label: typing.Any) -> typing.Tuple[np.ndarra
else:
raise TypeError(f"Audio {audio_path} is not a string.")

audio = Audio(audio_path, sample_rate=self.sample_rate, library=self.librosa)
audio = Audio(audio_path, sample_rate=self.sample_rate, library=librosa)

if not audio.init_successful:
audio = None
Expand Down Expand Up @@ -120,7 +124,6 @@ def __init__(
matplotlib.interactive(False)
# Check if librosa is installed
try:
import librosa
librosa.__version__
except AttributeError:
raise ImportError("librosa is required to read WAV files. Please install it with `pip install librosa`.")
Expand All @@ -139,12 +142,12 @@ def get_spectrogram(wav_path: str, frame_length: int, frame_step: int, fft_lengt
np.ndarray: Spectrogram of the WAV file.
"""
# Load the wav file and store the audio data in the variable 'audio' and the sample rate in 'orig_sr'
audio, orig_sr = WavReader.librosa.load(wav_path)
audio, orig_sr = librosa.load(wav_path)

# Compute the Short Time Fourier Transform (STFT) of the audio data and store it in the variable 'spectrogram'
# The STFT is computed with a hop length of 'frame_step' samples, a window length of 'frame_length' samples, and 'fft_length' FFT components.
# The resulting spectrogram is also transposed for convenience
spectrogram = WavReader.librosa.stft(audio, hop_length=frame_step, win_length=frame_length, n_fft=fft_length).T
spectrogram = librosa.stft(audio, hop_length=frame_step, win_length=frame_length, n_fft=fft_length).T

# Take the absolute value of the spectrogram to obtain the magnitude spectrum
spectrogram = np.abs(spectrogram)
Expand All @@ -168,7 +171,7 @@ def plot_raw_audio(wav_path: str, title: str = None, sr: int = 16000) -> None:
title (str, optional): Title
"""
# Load the wav file and store the audio data in the variable 'audio' and the sample rate in 'orig_sr'
audio, orig_sr = WavReader.librosa.load(wav_path, sr=sr)
audio, orig_sr = librosa.load(wav_path, sr=sr)

duration = len(audio) / orig_sr

Expand Down
11 changes: 8 additions & 3 deletions mltu/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
import logging
import numpy as np

try:
import librosa
except:
print("librosa not found. Please install it with `pip install librosa` if you plan to use it.")

from . import Image
from mltu.annotations.audio import Audio

Expand Down Expand Up @@ -231,7 +236,7 @@ def __call__(self, audio: Audio, label: typing.Any):
if self.limit:
padded_audios = padded_audios[:, :self.max_audio_length]

return padded_audios, np.array(label)
return padded_audios, label

audio_numpy = audio.numpy()
# limit audio if it exceed max_audio_length
Expand Down Expand Up @@ -265,7 +270,7 @@ def __init__(
self.fft_length = fft_length

try:
import librosa
librosa.__version__
except ImportError:
raise ImportError("librosa is required to transform Audio. Please install it with `pip install librosa`.")

Expand All @@ -284,7 +289,7 @@ def __call__(self, audio: Audio, label: typing.Any):
# Compute the Short Time Fourier Transform (STFT) of the audio data and store it in the variable 'spectrogram'
# The STFT is computed with a hop length of 'frame_step' samples, a window length of 'frame_length' samples, and 'fft_length' FFT components.
# The resulting spectrogram is also transposed for convenience
spectrogram = self.librosa.stft(audio.numpy(), hop_length=self.frame_step, win_length=self.frame_length, n_fft=self.fft_length).T
spectrogram = librosa.stft(audio.numpy(), hop_length=self.frame_step, win_length=self.frame_length, n_fft=self.fft_length).T

# Take the absolute value of the spectrogram to obtain the magnitude spectrum
spectrogram = np.abs(spectrogram)
Expand Down

0 comments on commit 5465413

Please sign in to comment.