forked from clovaai/aasist
-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_utils.py
99 lines (80 loc) · 2.86 KB
/
data_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import numpy as np
import soundfile as sf
import torch
from torch import Tensor
from torch.utils.data import Dataset
___author__ = "Hemlata Tak, Jee-weon Jung"
__email__ = "[email protected], [email protected]"
def genSpoof_list(dir_meta, is_train=False, is_eval=False):
d_meta = {}
file_list = []
with open(dir_meta, "r") as f:
l_meta = f.readlines()
if is_train:
for line in l_meta:
_, key, _, _, label = line.strip().split(" ")
file_list.append(key)
d_meta[key] = 1 if label == "bonafide" else 0
return d_meta, file_list
elif is_eval:
for line in l_meta:
_, key, _, _, _ = line.strip().split(" ")
#key = line.strip()
file_list.append(key)
return file_list
else:
for line in l_meta:
_, key, _, _, label = line.strip().split(" ")
file_list.append(key)
d_meta[key] = 1 if label == "bonafide" else 0
return d_meta, file_list
def pad(x, max_len=64600):
x_len = x.shape[0]
if x_len >= max_len:
return x[:max_len]
# need to pad
num_repeats = int(max_len / x_len) + 1
padded_x = np.tile(x, (1, num_repeats))[:, :max_len][0]
return padded_x
def pad_random(x: np.ndarray, max_len: int = 64600):
x_len = x.shape[0]
# if duration is already long enough
if x_len >= max_len:
stt = np.random.randint(x_len - max_len)
return x[stt:stt + max_len]
# if too short
num_repeats = int(max_len / x_len) + 1
padded_x = np.tile(x, (num_repeats))[:max_len]
return padded_x
class Dataset_ASVspoof2019_train(Dataset):
def __init__(self, list_IDs, labels, base_dir):
"""self.list_IDs : list of strings (each string: utt key),
self.labels : dictionary (key: utt key, value: label integer)"""
self.list_IDs = list_IDs
self.labels = labels
self.base_dir = base_dir
self.cut = 64600 # take ~4 sec audio (64600 samples)
def __len__(self):
return len(self.list_IDs)
def __getitem__(self, index):
key = self.list_IDs[index]
X, _ = sf.read(str(self.base_dir / f"flac/{key}.flac"))
X_pad = pad_random(X, self.cut)
x_inp = Tensor(X_pad)
y = self.labels[key]
return x_inp, y
class Dataset_ASVspoof2019_devNeval(Dataset):
def __init__(self, list_IDs, base_dir):
"""self.list_IDs : list of strings (each string: utt key),
"""
self.list_IDs = list_IDs
self.base_dir = base_dir
self.cut = 64600 # take ~4 sec audio (64600 samples)
def __len__(self):
return len(self.list_IDs)
def __getitem__(self, index):
key = self.list_IDs[index]
X, _ = sf.read(str(self.base_dir / f"flac/{key}.flac"))
X_pad = pad(X, self.cut)
x_inp = Tensor(X_pad)
return x_inp, key