-
Notifications
You must be signed in to change notification settings - Fork 0
/
SearchCodePython27.py
57 lines (38 loc) · 1.51 KB
/
SearchCodePython27.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import os
import sys
import subprocess
import collections
import pandas as pd
import string
rootdir = sys.argv[1]
print rootdir
singlekeyword = sys.argv[2]
ExtractDir = sys.argv[3]
all_filename=[]
all_fileext=[]
all_filewords=[]
df3=pd.DataFrame(columns=['cnt','dataname','filesource'])
for folder, subs, files in os.walk(rootdir):
for filename in files:
if ".ext" in filename or "sql" in filename:
#print filename
a=open(os.path.join(folder, filename),"r"); b=a.read(); rows=b.split('\n')
p=set([])
for x in rows:
if string.find(x.upper(),singlekeyword.upper())>=0 :
p.add(x)
#print x
all_fileext.append(x[:1000])
#print (p)
counter=collections.Counter(all_fileext)
df=pd.DataFrame([counter.values(),counter.keys()])
df2=df.transpose()
df2.columns=['cnt','dataname']
df2['filesource']=os.path.join(folder, filename)
df3=df3.append(df2)
#cols = df2.columns.tolist()
#cols=cols[-1:] + cols[:-1]
all_fileext=[]
#ax = sns.barplot(x="fname", y="cnt", data=df3)
#sns.plt.show()
df3.to_csv(ExtractDir,index=False)