forked from Tony607/Keras-Trigger-Word
-
Notifications
You must be signed in to change notification settings - Fork 0
/
td_utils.py
46 lines (42 loc) · 1.68 KB
/
td_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import matplotlib.pyplot as plt
from scipy.io import wavfile
import os
from pydub import AudioSegment
# Calculate and plot spectrogram for a wav audio file
def graph_spectrogram(wav_file):
rate, data = get_wav_info(wav_file)
nfft = 200 # Length of each window segment
fs = 8000 # Sampling frequencies
noverlap = 120 # Overlap between windows
nchannels = data.ndim
if nchannels == 1:
pxx, freqs, bins, im = plt.specgram(data, nfft, fs, noverlap = noverlap)
elif nchannels == 2:
pxx, freqs, bins, im = plt.specgram(data[:,0], nfft, fs, noverlap = noverlap)
return pxx
# Load a wav file
def get_wav_info(wav_file):
rate, data = wavfile.read(wav_file)
return rate, data
# Used to standardize volume of audio clip
def match_target_amplitude(sound, target_dBFS):
change_in_dBFS = target_dBFS - sound.dBFS
return sound.apply_gain(change_in_dBFS)
# Load raw audio files for speech synthesis
def load_raw_audio():
activates = []
backgrounds = []
negatives = []
for filename in os.listdir("./raw_data/activates"):
if filename.endswith("wav"):
activate = AudioSegment.from_wav("./raw_data/activates/"+filename)
activates.append(activate)
for filename in os.listdir("./raw_data/backgrounds"):
if filename.endswith("wav"):
background = AudioSegment.from_wav("./raw_data/backgrounds/"+filename)
backgrounds.append(background)
for filename in os.listdir("./raw_data/negatives"):
if filename.endswith("wav"):
negative = AudioSegment.from_wav("./raw_data/negatives/"+filename)
negatives.append(negative)
return activates, negatives, backgrounds