diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c42c39..fc402d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## [1.1.6] - 2022-10-30 +### Changed +- Fixed dependencies with `librosa` library + ## [1.1.5] - 2022-10-17 ### Changed - Fixed dependencies with `librosa` library diff --git a/mltu/__init__.py b/mltu/__init__.py index 5cb2f26..dd0875f 100644 --- a/mltu/__init__.py +++ b/mltu/__init__.py @@ -1,4 +1,4 @@ -__version__ = "1.1.5" +__version__ = "1.1.6" from .annotations.images import Image from .annotations.images import CVImage diff --git a/mltu/augmentors.py b/mltu/augmentors.py index bc7396b..f23a37b 100644 --- a/mltu/augmentors.py +++ b/mltu/augmentors.py @@ -6,6 +6,11 @@ from . import Image from mltu.annotations.audio import Audio +try: + import librosa +except: + print("librosa not found. Please install it with `pip install librosa` if you plan to use it.") + """ Implemented image augmentors: - RandomBrightness @@ -592,15 +597,14 @@ def __init__( self.max_n_steps = max_n_steps try: - import librosa - # samplerate + librosa.__version__ except ImportError: raise ImportError("librosa is required to augment Audio. Please install it with `pip install librosa`.") def augment(self, audio: Audio) -> Audio: random_n_steps = np.random.randint(-self.max_n_steps, self.max_n_steps) # changing default res_type "kaiser_best" to "linear" for speed and memory efficiency - shift_audio = self.librosa.effects.pitch_shift( + shift_audio = librosa.effects.pitch_shift( audio.numpy(), sr=audio.sample_rate, n_steps=random_n_steps, res_type="linear" ) audio.audio = shift_audio @@ -631,13 +635,13 @@ def __init__( self.max_rate = max_rate try: - import librosa + librosa.__version__ except ImportError: raise ImportError("librosa is required to augment Audio. Please install it with `pip install librosa`.") def augment(self, audio: Audio) -> Audio: random_rate = np.random.uniform(self.min_rate, self.max_rate) - stretch_audio = self.librosa.effects.time_stretch(audio.numpy(), rate=random_rate) + stretch_audio = librosa.effects.time_stretch(audio.numpy(), rate=random_rate) audio.audio = stretch_audio return audio \ No newline at end of file diff --git a/mltu/preprocessors.py b/mltu/preprocessors.py index ead6bb8..d290e25 100644 --- a/mltu/preprocessors.py +++ b/mltu/preprocessors.py @@ -5,6 +5,11 @@ import matplotlib import logging +try: + import librosa +except: + print("librosa not found. Please install it with `pip install librosa` if you plan to use it.") + from . import Image from mltu.annotations.audio import Audio @@ -67,7 +72,6 @@ def __init__( self.logger.setLevel(log_level) try: - import librosa librosa.__version__ except AttributeError: raise ImportError("librosa is required to read WAV files. Please install it with `pip install librosa`.") @@ -89,7 +93,7 @@ def __call__(self, audio_path: str, label: typing.Any) -> typing.Tuple[np.ndarra else: raise TypeError(f"Audio {audio_path} is not a string.") - audio = Audio(audio_path, sample_rate=self.sample_rate, library=self.librosa) + audio = Audio(audio_path, sample_rate=self.sample_rate, library=librosa) if not audio.init_successful: audio = None @@ -120,7 +124,6 @@ def __init__( matplotlib.interactive(False) # Check if librosa is installed try: - import librosa librosa.__version__ except AttributeError: raise ImportError("librosa is required to read WAV files. Please install it with `pip install librosa`.") @@ -139,12 +142,12 @@ def get_spectrogram(wav_path: str, frame_length: int, frame_step: int, fft_lengt np.ndarray: Spectrogram of the WAV file. """ # Load the wav file and store the audio data in the variable 'audio' and the sample rate in 'orig_sr' - audio, orig_sr = WavReader.librosa.load(wav_path) + audio, orig_sr = librosa.load(wav_path) # Compute the Short Time Fourier Transform (STFT) of the audio data and store it in the variable 'spectrogram' # The STFT is computed with a hop length of 'frame_step' samples, a window length of 'frame_length' samples, and 'fft_length' FFT components. # The resulting spectrogram is also transposed for convenience - spectrogram = WavReader.librosa.stft(audio, hop_length=frame_step, win_length=frame_length, n_fft=fft_length).T + spectrogram = librosa.stft(audio, hop_length=frame_step, win_length=frame_length, n_fft=fft_length).T # Take the absolute value of the spectrogram to obtain the magnitude spectrum spectrogram = np.abs(spectrogram) @@ -168,7 +171,7 @@ def plot_raw_audio(wav_path: str, title: str = None, sr: int = 16000) -> None: title (str, optional): Title """ # Load the wav file and store the audio data in the variable 'audio' and the sample rate in 'orig_sr' - audio, orig_sr = WavReader.librosa.load(wav_path, sr=sr) + audio, orig_sr = librosa.load(wav_path, sr=sr) duration = len(audio) / orig_sr diff --git a/mltu/transformers.py b/mltu/transformers.py index 48d597b..f026899 100644 --- a/mltu/transformers.py +++ b/mltu/transformers.py @@ -3,6 +3,11 @@ import logging import numpy as np +try: + import librosa +except: + print("librosa not found. Please install it with `pip install librosa` if you plan to use it.") + from . import Image from mltu.annotations.audio import Audio @@ -231,7 +236,7 @@ def __call__(self, audio: Audio, label: typing.Any): if self.limit: padded_audios = padded_audios[:, :self.max_audio_length] - return padded_audios, np.array(label) + return padded_audios, label audio_numpy = audio.numpy() # limit audio if it exceed max_audio_length @@ -265,7 +270,7 @@ def __init__( self.fft_length = fft_length try: - import librosa + librosa.__version__ except ImportError: raise ImportError("librosa is required to transform Audio. Please install it with `pip install librosa`.") @@ -284,7 +289,7 @@ def __call__(self, audio: Audio, label: typing.Any): # Compute the Short Time Fourier Transform (STFT) of the audio data and store it in the variable 'spectrogram' # The STFT is computed with a hop length of 'frame_step' samples, a window length of 'frame_length' samples, and 'fft_length' FFT components. # The resulting spectrogram is also transposed for convenience - spectrogram = self.librosa.stft(audio.numpy(), hop_length=self.frame_step, win_length=self.frame_length, n_fft=self.fft_length).T + spectrogram = librosa.stft(audio.numpy(), hop_length=self.frame_step, win_length=self.frame_length, n_fft=self.fft_length).T # Take the absolute value of the spectrogram to obtain the magnitude spectrum spectrogram = np.abs(spectrogram)