-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
44 lines (33 loc) · 840 Bytes
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import threading
from queue import Queue
from crawler import Crawler
from fiand import *
Project = 'wiki_ped'
startpage = 'https://www.wikipedia.org/'
domain = domain_name(startpage)
queued_f = Project + '/queue.txt'
crawled_f = Project +'/crawled.txt'
threads = 6
queue = Queue()
Crawler(Project, startpage, domain)
def create_crawlers():
for _ in range(threads):
t = threading.Thread(target=task)
t.daemon = True
t.start()
def task():
while True:
url = queue.get()
Crawler.crawlpage(threading.current_thread().name, url)
queue.task_done()
def create_task():
for link in ftoset(queued_f):
queue.put(link)
queue.join()
do_work()
def do_work():
queued_l = ftoset(queued_f)
if(len(queued_l) > 0):
create_task()
create_crawlers()
do_work()