forked from rdz-oss/BattyBirdNET-Analyzer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaudio.py
118 lines (84 loc) · 2.94 KB
/
audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
"""Module containing audio helper functions.
"""
import numpy as np
import config as cfg
RANDOM = np.random.RandomState(cfg.RANDOM_SEED)
def openAudioFile(path: str, sample_rate=cfg.SAMPLE_RATE, offset=0.0, duration=None):
"""Open an audio file.
Opens an audio file with librosa and the given settings.
Args:
path: Path to the audio file.
sample_rate: The sample rate at which the file should be processed.
offset: The starting offset.
duration: Maximum duration of the loaded content.
Returns:
Returns the audio time series and the sampling rate.
"""
# Open file with librosa (uses ffmpeg or libav)
import librosa
sig, rate = librosa.load(path, sr=sample_rate, offset=offset, duration=duration, mono=True, res_type="kaiser_fast")
return sig, rate
def saveSignal(sig, fname: str):
"""Saves a signal to file.
Args:
sig: The signal to be saved.
fname: The file path.
"""
import soundfile as sf
sf.write(fname, sig, cfg.SAMPLE_RATE, "PCM_16")
def noise(sig, shape, amount=None):
"""Creates noise.
Creates a noise vector with the given shape.
Args:
sig: The original audio signal.
shape: Shape of the noise.
amount: The noise intensity.
Returns:
An numpy array of noise with the given shape.
"""
# Random noise intensity
if amount == None:
amount = RANDOM.uniform(0.1, 0.5)
# Create Gaussian noise
try:
noise = RANDOM.normal(min(sig) * amount, max(sig) * amount, shape)
except:
noise = np.zeros(shape)
return noise.astype("float32")
def splitSignal(sig, rate, seconds, overlap, minlen):
"""Split signal with overlap.
Args:
sig: The original signal to be split.
rate: The sampling rate.
seconds: The duration of a segment.
overlap: The overlapping seconds of segments.
minlen: Minimum length of a split.
Returns:
A list of splits.
"""
sig_splits = []
for i in range(0, len(sig), int((seconds - overlap) * rate)):
split = sig[i : i + int(seconds * rate)]
# End of signal?
if len(split) < int(minlen * rate):
break
# Signal chunk too short?
if len(split) < int(rate * seconds):
split = np.hstack((split, noise(split, (int(rate * seconds) - len(split)), 0.5)))
sig_splits.append(split)
return sig_splits
def cropCenter(sig, rate, seconds):
"""Crop signal to center.
Args:
sig: The original signal.
rate: The sampling rate.
seconds: The length of the signal.
"""
if len(sig) > int(seconds * rate):
start = int((len(sig) - int(seconds * rate)) / 2)
end = start + int(seconds * rate)
sig = sig[start:end]
# Pad with noise
elif len(sig) < int(seconds * rate):
sig = np.hstack((sig, noise(sig, (int(seconds * rate) - len(sig)), 0.5)))
return sig