-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
57 lines (42 loc) · 1.38 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import logging
import time
from typing import Optional
from internet import is_connected
from remarkable.uploader import Uploader
from scraping.scraper.spiders.economist import Economist
from pdf.creator import create_latest
from pathlib import Path
import os, sys
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
def scrape():
logging.info("Scraping...")
configure_python_path_for_scraping()
process = CrawlerProcess(get_project_settings())
process.crawl(Economist)
process.start()
logging.info("Scraping complete")
def configure_python_path_for_scraping():
sys.path.append(str(Path(os.getcwd()) / "scraping"))
os.environ["SCRAPY_SETTINGS_MODULE"] = "scraper.settings"
def upload(file: Path):
logging.info("Uploading...")
uploader = Uploader()
uploader.upload_file_to_folder(str(file), "Economist")
logging.info("Upload complete")
def setup_logging():
ch = logging.StreamHandler(sys.stdout)
root = logging.getLogger()
root.addHandler(ch)
def try_upload(latest_pdf: Optional[Path]):
if latest_pdf:
logging.info("New PDF generated")
upload(latest_pdf)
if __name__ == '__main__':
while not is_connected():
time.sleep(300)
setup_logging()
logging.info("Begin economist2rm")
scrape()
try_upload(create_latest())
logging.info("End economist2rm")