-
Notifications
You must be signed in to change notification settings - Fork 21
/
Training_zip_with_context.py
96 lines (82 loc) · 2.81 KB
/
Training_zip_with_context.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
__author__ = 'andreadsouza'
import sqlite3
import json
import sys
import multiprocessing as mp
from context import extract_types,process_tokens,process_obj_name
FOLDS=10
DB_CONN="pyty.db"
def run_query_for_prj(fold_no, query_text):
conn=sqlite3.connect(DB_CONN)
c=conn.cursor()
query_info=json.loads(query_text)
folder, filename = query_info["folder"],query_info["file"]
print "Folder-name:"+folder, "File:"+filename
call_list=[]
for call in query_info["calls"]:
call_list.append(call["tgt"])
other_calls=','.join(query_info["other_calls"])
#print other_calls
calls=','.join(call_list)
arg_types=','.join(extract_types(query_info['context']))
arg_values=','.join(process_tokens(query_info['context']))
obj_name=','.join(process_obj_name(query_info['obj']))
for fold in range(1,FOLDS+1):
if fold!=fold_no:
try:
c.execute(
"INSERT INTO TRAINSET_{fold} (obj_type, obj_name, calls, arg_types, arg_values, other_calls) VALUES (?, ?, ?, ?, ?, ?)".format(fold=str(fold)),
(query_info['type'], obj_name, calls, arg_types, arg_values, other_calls))
except sqlite3.OperationalError, msg:
print msg
conn.commit()
conn.close()
def run_queries(fold_no, prj_query):
delim='-'*20+'\n'
queries=prj_query.split(delim)
for query in queries:
#print query
#print "blah"
run_query_for_prj(fold_no, query)
def main():
conn=sqlite3.connect(DB_CONN)
c=conn.cursor()
for fold in range(1,FOLDS+1):
c.execute('''DROP TABLE IF EXISTS TRAINSET_{fold_num}'''.format(fold_num=str(fold)))
c.execute('''CREATE TABLE TRAINSET_{fold_num} (obj_type text, obj_name text, calls text, arg_types text, arg_values text, other_calls text)'''.format(fold_num=str(fold)))
conn.commit()
conn.close()
pool=mp.Pool(mp.cpu_count())
jobs=[]
count=0
Q_LIBS=[]
for line in open('Top100.txt','r'):
lib=line.split(':')[0]
print "LIB:",lib
Q_LIBS.append(
lib
)
for lib in Q_LIBS:
query_file='queries/query-'+lib+'.txt'
with open(query_file,'r') as file:
query=""
for line in file:
if line.strip()=='$' * 20:
try:
job=pool.apply_async(run_queries,(count%FOLDS+1, query))
jobs.append(job)
query=""
count+=1
except:
print "Unexpected error in worker:", sys.exc_info()[0]
else:
query+=line
for job in jobs:
try:
job.get()
except:
continue
pool.close()
pool.join()
if __name__ == '__main__':
main()