forked from YerevaNN/Spoken-language-identification
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmake_submission.py
39 lines (32 loc) · 1.02 KB
/
make_submission.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
""" Usage: python make_submission.py csvpath model_name
csv - must contain 12320 rows, 176 coloumns: the predictions for test set
"""
import sys
import numpy as np
# info about classes
file = open('trainingData.csv')
data = file.readlines()[1:]
langs = set()
for line in data:
filepath, language = line.split(',')
language = language.strip()
langs.add(language)
langs = sorted(langs)
path = sys.argv[1]
name = sys.argv[2]
read_file = open(path, 'r')
f = open('testingData.csv')
cnt = 12320
print_file = open('predictions/test_' + name + '.csv', 'w')
for iter in range(cnt):
st = f.readline()
name = st.strip()[:-4]
out = read_file.readline().split(',')
out = [float(x) for x in out]
pred = sorted([(x, it) for it, x in enumerate(out)], reverse=True)
for i in range(3):
lang_id = pred[i][1]
lang = langs[lang_id]
print_file.write(name + '.mp3,' + lang + ',' + str(i + 1) + '\n')
if (iter % 100 == 0):
print >> sys.stderr, "processed %d / %d images" % (iter + 1, cnt)