-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathanalyze_split.py
49 lines (39 loc) · 1.07 KB
/
analyze_split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import os
txt_dir = '/media/data/bbekci/voxceleb/iden_split.txt'
tr_idens = {}
val_idens = {}
te_idens = {}
with open(txt_dir, 'r') as identxt:
lines = identxt.readlines()
train_paths = []
test_paths = []
val_paths = []
for line in lines:
subset, path = line.strip().split(' ')
if subset == '1':
train_paths.append(path)
elif subset == '2':
val_paths.append(path)
elif subset == '3':
test_paths.append(path)
print(test_paths[:20])
for p in train_paths:
iden, vid, aud = p.split('/')
if iden not in tr_idens:
tr_idens[iden] = []
if vid not in tr_idens[iden]:
tr_idens[iden].append(vid)
for p in test_paths:
iden, vid, aud = p.split('/')
if iden not in te_idens:
te_idens[iden] = []
if vid not in te_idens[iden]:
te_idens[iden].append(vid)
for p in val_paths:
iden, vid, aud = p.split('/')
if iden not in val_idens:
val_idens[iden] = []
if vid not in val_idens[iden]:
val_idens[iden].append(vid)
for k in te_idens:
print("ID: ", k , " aud: ", len(te_idens[k]))