- Mel & MFCC
- CWT & Synchrosqueezing
- CQT & Chroma
- Different Wavelet Type
- Spectral Features
- Pitch Estimate
- Onset Detection
- Harmonic Percussive Source Separation
Mel spectrogram and Mel-frequency cepstral coefficients
import numpy as np
import audioflux as af
import matplotlib.pyplot as plt
from audioflux.display import fill_spec
# Get a 220Hz's audio file path
sample_path = af.utils.sample_path('220')
# Read audio data and sample rate
audio_arr, sr = af.read(sample_path)
# Extract mel spectrogram
spec_arr, mel_fre_band_arr = af.mel_spectrogram(audio_arr, num=128, radix2_exp=12, samplate=sr)
spec_arr = np.abs(spec_arr)
# Extract mfcc
mfcc_arr, _ = af.mfcc(audio_arr, cc_num=13, mel_num=128, radix2_exp=12, samplate=sr)
# Display
audio_len = audio_arr.shape[-1]
# calculate x/y-coords
x_coords = np.linspace(0, audio_len / sr, spec_arr.shape[-1] + 1)
y_coords = np.insert(mel_fre_band_arr, 0, 0)
fig, ax = plt.subplots()
img = fill_spec(spec_arr, axes=ax,
x_coords=x_coords, y_coords=y_coords,
x_axis='time', y_axis='log',
title='Mel Spectrogram')
fig.colorbar(img, ax=ax)
fig, ax = plt.subplots()
img = fill_spec(mfcc_arr, axes=ax,
x_coords=x_coords, x_axis='time',
title='MFCC')
fig.colorbar(img, ax=ax)
plt.show()
Continuous Wavelet Transform spectrogram and its corresponding synchrosqueezing reassignment spectrogram
import numpy as np
import audioflux as af
from audioflux.type import SpectralFilterBankScaleType, WaveletContinueType
from audioflux.utils import note_to_hz
import matplotlib.pyplot as plt
from audioflux.display import fill_spec
# Get a 220Hz's audio file path
sample_path = af.utils.sample_path('220')
# Read audio data and sample rate
audio_arr, sr = af.read(sample_path)
audio_arr = audio_arr[..., :4096]
cwt_obj = af.CWT(num=84, radix2_exp=12, samplate=sr, low_fre=note_to_hz('C1'),
bin_per_octave=12, wavelet_type=WaveletContinueType.MORSE,
scale_type=SpectralFilterBankScaleType.OCTAVE)
cwt_spec_arr = cwt_obj.cwt(audio_arr)
synsq_obj = af.Synsq(num=cwt_obj.num,
radix2_exp=cwt_obj.radix2_exp,
samplate=cwt_obj.samplate)
synsq_arr = synsq_obj.synsq(cwt_spec_arr,
filter_bank_type=cwt_obj.scale_type,
fre_arr=cwt_obj.get_fre_band_arr())
# Show CWT
fig, ax = plt.subplots(figsize=(7, 4))
img = fill_spec(np.abs(cwt_spec_arr), axes=ax,
x_coords=cwt_obj.x_coords(),
y_coords=cwt_obj.y_coords(),
x_axis='time', y_axis='log',
title='CWT')
fig.colorbar(img, ax=ax)
# Show Synsq
fig, ax = plt.subplots(figsize=(7, 4))
img = fill_spec(np.abs(synsq_arr), axes=ax,
x_coords=cwt_obj.x_coords(),
y_coords=cwt_obj.y_coords(),
x_axis='time', y_axis='log',
title='Synsq')
fig.colorbar(img, ax=ax)
plt.show()
Constant-Q Transform spectrogram and cqt_chroma
import numpy as np
import audioflux as af
import matplotlib.pyplot as plt
from audioflux.display import fill_spec
# Read audio data and sample rate
audio_arr, sr = af.read(af.utils.sample_path('guitar_chord2'))
# Create CQT object
cqt_obj = af.CQT(num=84, samplate=sr)
# Extract CQT and Chroma_cqt
cqt_arr = cqt_obj.cqt(audio_arr)
chroma_cqt_arr = cqt_obj.chroma(cqt_arr)
# Display
audio_len = audio_arr.shape[-1]
# Display CQT
fig, ax = plt.subplots()
img = fill_spec(np.abs(cqt_arr), axes=ax,
x_coords=cqt_obj.x_coords(audio_len), x_axis='time',
y_coords=cqt_obj.y_coords(), y_axis='log',
title='CQT')
fig.colorbar(img, ax=ax)
# Display Chroma_CQT
fig, ax = plt.subplots()
img = fill_spec(chroma_cqt_arr, axes=ax,
x_coords=cqt_obj.x_coords(audio_len),
x_axis='time', y_axis='chroma',
title='Chroma-CQT')
fig.colorbar(img, ax=ax)
plt.show()
Compare morlet, morese, paul and bump Wavelet type spectrogram
import numpy as np
import audioflux as af
from audioflux.type import SpectralFilterBankScaleType, WaveletContinueType
from audioflux.utils import note_to_hz
import matplotlib.pyplot as plt
from audioflux.display import fill_spec, fill_wave
# Read audio data and sample rate
audio_arr, sr = af.read(af.utils.sample_path('220'))
audio_arr = audio_arr[..., :4096]
# Create CWT object of Octave and Extract spectrogram
obj = af.CWT(num=84, radix2_exp=12, samplate=sr,
low_fre=note_to_hz('C1'), bin_per_octave=12,
wavelet_type=WaveletContinueType.MORLET,
scale_type=SpectralFilterBankScaleType.OCTAVE)
morlet_spec_arr = obj.cwt(audio_arr)
morlet_spec_arr = np.abs(morlet_spec_arr)
obj = af.CWT(num=84, radix2_exp=12, samplate=sr,
low_fre=note_to_hz('C1'), bin_per_octave=12,
wavelet_type=WaveletContinueType.MORSE,
scale_type=SpectralFilterBankScaleType.OCTAVE)
morse_spec_arr = obj.cwt(audio_arr)
morse_spec_arr = np.abs(morse_spec_arr)
obj = af.CWT(num=84, radix2_exp=12, samplate=sr,
low_fre=note_to_hz('C1'), bin_per_octave=12,
wavelet_type=WaveletContinueType.PAUL,
scale_type=SpectralFilterBankScaleType.OCTAVE)
paul_spec_arr = obj.cwt(audio_arr)
paul_spec_arr = np.abs(paul_spec_arr)
obj = af.CWT(num=84, radix2_exp=12, samplate=sr,
low_fre=note_to_hz('C1'), bin_per_octave=12,
wavelet_type=WaveletContinueType.BUMP,
scale_type=SpectralFilterBankScaleType.OCTAVE)
bump_spec_arr = obj.cwt(audio_arr)
bump_spec_arr = np.abs(bump_spec_arr)
# Display
fig, ax = plt.subplots(nrows=5, figsize=(8, 10), sharex=True)
fill_wave(audio_arr, samplate=sr, axes=ax[0])
fill_spec(morlet_spec_arr, axes=ax[1],
x_coords=obj.x_coords(), y_coords=obj.y_coords(),
y_axis='log',
title='CWT-Octave MORLET Spectrogram')
fill_spec(morse_spec_arr, axes=ax[2],
x_coords=obj.x_coords(), y_coords=obj.y_coords(),
y_axis='log',
title='CWT-Octave MORSE Spectrogram')
fill_spec(paul_spec_arr, axes=ax[3],
x_coords=obj.x_coords(), y_coords=obj.y_coords(),
y_axis='log',
title='CWT-Octave PAUL Spectrogram')
fill_spec(bump_spec_arr, axes=ax[4],
x_coords=obj.x_coords(), y_coords=obj.y_coords(),
x_axis='time', y_axis='log',
title='CWT-Octave BUMP Spectrogram')
plt.show()
Flatness, Novelty, Entropy, RMS and Slope features
import numpy as np
import audioflux as af
from audioflux.type import SpectralDataType, SpectralFilterBankScaleType
import matplotlib.pyplot as plt
from audioflux.display import fill_wave, fill_plot, fill_spec
# Read audio data and sample rate
audio_arr, sr = af.read(af.utils.sample_path('guitar_chord2'))
audio_len = audio_arr.shape[-1]
bft_obj = af.BFT(num=256, samplate=sr, radix2_exp=12, slide_length=1024,
data_type=SpectralDataType.MAG,
scale_type=SpectralFilterBankScaleType.LINEAR)
spec_arr = bft_obj.bft(audio_arr)
spec_arr = np.abs(spec_arr)
# Create Spectral object and extract spectral feature
spectral_obj = af.Spectral(num=bft_obj.num,
fre_band_arr=bft_obj.get_fre_band_arr())
spectral_obj.set_time_length(spec_arr.shape[-1])
flatness_arr = spectral_obj.flatness(spec_arr)
novelty_arr = spectral_obj.novelty(spec_arr)
entropy_arr = spectral_obj.entropy(spec_arr)
rms_arr = spectral_obj.rms(spec_arr)
slope_arr = spectral_obj.slope(spec_arr)
# Display
fig, ax = plt.subplots(nrows=7, figsize=(8, 10), sharex=True)
times = np.arange(0, flatness_arr.shape[-1]) * (bft_obj.slide_length / bft_obj.samplate)
fill_wave(audio_arr, samplate=sr, axes=ax[0])
fill_spec(spec_arr, axes=ax[1],
x_coords=bft_obj.x_coords(audio_len), y_coords=bft_obj.y_coords(),
y_axis='log')
fill_plot(times, flatness_arr, axes=ax[2], label='flatness')
fill_plot(times, novelty_arr, axes=ax[3], label='novelty')
fill_plot(times, entropy_arr, axes=ax[4], label='entropy')
fill_plot(times, rms_arr, axes=ax[5], label='rms_arr')
fill_plot(times, slope_arr, axes=ax[6], label='slope_arr')
plt.show()
Fundamental frequency (F0) estimation using the YIN algorithm.
import numpy as np
import audioflux as af
from audioflux.type import PitchType
import matplotlib.pyplot as plt
from audioflux.display import fill_wave
# Read audio data and sample rate
audio_arr, sr = af.read(af.utils.sample_path('voice'))
obj = af.Pitch(pitch_type=PitchType.YIN)
fre_arr, value_arr1, value_arr2 = obj.pitch(audio_arr)
fre_arr[fre_arr < 1] = np.nan
# Display
fig, ax = plt.subplots(nrows=2, figsize=(8, 6), sharex=True)
times = np.arange(0, fre_arr.shape[-1]) * (obj.slide_length / obj.samplate)
fill_wave(audio_arr, samplate=sr, axes=ax[0])
ax[1].xaxis.set_label_text("Time(s)")
ax[1].yaxis.set_label_text("Frequency(Hz)")
ax[1].plot(times, fre_arr, label='fre', linewidth=3)
# set real plot
real_fre_arr = np.zeros_like(fre_arr)
real_fre_arr[25:48] = 261.6
real_fre_arr[56:78] = 293.7
real_fre_arr[87:107] = 329.6
real_fre_arr[118:135] = 349.2
real_fre_arr[150:169] = 392.0
real_fre_arr[179:200] = 440.0
real_fre_arr[212:243] = 493.9
real_fre_arr[real_fre_arr == 0] = np.nan
ax[1].plot(times, real_fre_arr, color='red', label='fre', linewidth=2)
plt.show()
Locate note onset events by picking peaks in an onset strength envelope.
import numpy as np
import audioflux as af
from audioflux.type import SpectralFilterBankScaleType, SpectralDataType, NoveltyType
import matplotlib.pyplot as plt
from audioflux.display import fill_wave, fill_plot, fill_spec
# Read audio data and sample rate
audio_arr, sr = af.read(af.utils.sample_path('guitar_chord2'))
bft_obj = af.BFT(num=128, samplate=sr, radix2_exp=11, slide_length=2048,
scale_type=SpectralFilterBankScaleType.MEL,
data_type=SpectralDataType.POWER)
spec_arr = bft_obj.bft(audio_arr)
spec_dB_arr = af.utils.power_to_db(np.abs(spec_arr))
n_fre, n_time = spec_dB_arr.shape
onset_obj = af.Onset(time_length=n_time, fre_length=n_fre,
slide_length=bft_obj.slide_length, samplate=bft_obj.samplate,
novelty_type=NoveltyType.FLUX)
point_arr, evn_arr, time_arr, value_arr = onset_obj.onset(spec_dB_arr)
audio_len = audio_arr.shape[-1]
fig, axes = plt.subplots(nrows=3, sharex=True)
img = fill_spec(spec_dB_arr, axes=axes[0],
x_coords=bft_obj.x_coords(audio_len), y_coords=bft_obj.y_coords(),
x_axis='time', y_axis='log',
title='Onset')
ax = fill_wave(audio_arr, samplate=sr, axes=axes[1])
ax.vlines(time_arr, -1, 1, color='r', alpha=0.9,
linestyle='--', label='Onsets')
times = np.arange(0, evn_arr.shape[-1]) * (bft_obj.slide_length / sr)
ax = fill_plot(times, evn_arr, axes=axes[2], label='Onset strength')
ax.vlines(time_arr, evn_arr.min(), evn_arr.max(), color='r', alpha=0.9,
linestyle='--', label='Onsets')
plt.show()
Decompose audio into harmonic and percussive components
import audioflux as af
from audioflux.type import SpectralDataType, WindowType, SpectralFilterBankScaleType
import matplotlib.pyplot as plt
from audioflux.display import fill_wave, fill_spec
# Read audio data and sample rate
audio_arr, sr = af.read(af.utils.sample_path('chord_metronome2'))
# Compute the hpss
radix2_exp = 11
slide_length = (1 << radix2_exp) // 4
hpss_obj = af.HPSS(radix2_exp=radix2_exp, window_type=WindowType.HAMM,
slide_length=slide_length, h_order=21, p_order=31)
h_arr, p_arr = hpss_obj.hpss(audio_arr)
# Extract Linear spectrogram
bft_obj = af.BFT(num=2049, radix2_exp=12, samplate=sr,
data_type=SpectralDataType.POWER,
scale_type=SpectralFilterBankScaleType.LINEAR)
audio_arr = audio_arr[..., :h_arr.shape[-1]]
origin_spec_arr = bft_obj.bft(audio_arr, result_type=1)
h_spec_arr = bft_obj.bft(h_arr, result_type=1)
p_spec_arr = bft_obj.bft(p_arr, result_type=1)
origin_spec_arr = af.utils.power_to_abs_db(origin_spec_arr)
h_spec_arr = af.utils.power_to_abs_db(h_spec_arr)
p_spec_arr = af.utils.power_to_abs_db(p_spec_arr)
# Display
fig, axes = plt.subplots(nrows=3, sharex=True, sharey=True)
fill_wave(audio_arr, samplate=sr, axes=axes[0])
fill_wave(h_arr, samplate=sr, axes=axes[1])
fill_wave(p_arr, samplate=sr, axes=axes[2])
audio_len = audio_arr.shape[-1]
fig, axes = plt.subplots(nrows=3, sharex=True, sharey=True)
fill_spec(origin_spec_arr, axes=axes[0],
x_coords=bft_obj.x_coords(audio_len),
y_coords=bft_obj.y_coords(),
y_axis='log',
title='origin spec')
fill_spec(h_spec_arr, axes=axes[1],
x_coords=bft_obj.x_coords(audio_len),
y_coords=bft_obj.y_coords(),
y_axis='log',
title='h spec')
fill_spec(p_spec_arr, axes=axes[2],
x_coords=bft_obj.x_coords(audio_len),
y_coords=bft_obj.y_coords(),
y_axis='log',
title='p spec')
plt.show()