-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
58 lines (46 loc) · 1.54 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import csv, sys, config, sys
from src import log, scrapper, file
# Begin Operation
# load src and config files
config = config.config
File = file.File
Scrapper = scrapper.Scrapper
# lazy attempt in making cli args
args = sys.argv
args.pop(0)
thread_count = config['number_of_thread']
write_to = config['write_to']
for arg in args:
if "--thread=" in arg:
thread_input = arg.replace("--thread=", '')
if thread_input == "max":
thread_count = thread_input
else:
thread_count = int(arg.replace("--thread=", ''))
if "--writeto=" in arg:
write_to = arg.replace("--writeto=", '')
# Init file
file = File(name = write_to)
# Append Columnd
for key, value in config['column_to_append'].items():
file.appendColumn(key, value)
# load urls
urls = File(name = config['read_from']['name']).open({
"column_name": config['read_from']['column'] # Column Name for the urls
})
# Begin Scrapping
print("\nScrapping all urls... ( This may take awhile )")
scrapper = Scrapper(urls, config["timeout"])
scrapper.setThreadCount(thread_count)
scrapper.work()
# Set Scrapped Result to File Content
file.setContents(scrapper.scrappedItems)
# Write To File
print("\nWriting to file", write_to , "... ( This may take awhile )")
file.write()
print("Dumping Log to file ...")
fullPath = config['logs']['path'] + config['logs']['name']
with open(fullPath, 'a+') as f:
f.writelines(str(log.dump()))
f.close()
print("Operation finished")