-
Notifications
You must be signed in to change notification settings - Fork 2
/
rawcom.py
45 lines (27 loc) · 1.13 KB
/
rawcom.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import pickle
databox = "../qadatasetKstudy"
source = pickle.load(open(databox+"/qatypeA.pkl","rb"))
raw = pickle.load(open(databox +"/context.pkl","rb"))
splitfile = "/nfs/projects/funcom/data/java/output/trainvaltest_ids.pkl"
spliter = pickle.load(open(splitfile,"rb"))
mainfile = dict((fid, raw[fid]) for fid in source) # first filter out the good fids
raw.clear()
trainfid = spliter['trainfid']
valfid = spliter['valfid']
testfid = spliter['testfid']
ftrain = open(databox+"/output/context.train", 'w')
fval = open(databox+"/output/context.val", 'w')
ftest =open(databox+"/output/context.test", 'w')
train = dict((fid, mainfile[fid]) for fid in trainfid if fid in mainfile.keys()) # split train-val-test
val = dict((fid, mainfile[fid]) for fid in valfid if fid in mainfile.keys())
test = dict((fid, mainfile[fid]) for fid in testfid if fid in mainfile.keys())
mainfile.clear()
for f in train:
ftrain.write('{}, <s> {} </s>\n'.format(f, train[f]))
for f in val:
fval.write('{}, <s> {} </s>\n'.format(f, val[f]))
for f in test:
ftest.write('{}, <s> {} </s>\n'.format(f, test[f]))
ftrain.close()
fval.close()
ftest.close()