-
Notifications
You must be signed in to change notification settings - Fork 0
/
Main.py
24 lines (20 loc) · 934 Bytes
/
Main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import PageScraper
import ReportGenerator
import sqlbackend
import KeywordAnalysis
from collections import OrderedDict
def sciscraper(topic, queries, nResults, sortby):
pmc_ids, query = [pmc for pmc in PageScraper.esearch(topic, queries, nResults, sortby)]
main_dict = OrderedDict((pmc, PageScraper.esummary(pmc)) for pmc in pmc_ids)
for pmc in main_dict:
main_dict[pmc]['Images'] = PageScraper.image_scraper(pmc)
alltext = [KeywordAnalysis.text_grab(i) for i in pmc_ids]
keywords = [i for i in KeywordAnalysis.get_continuous_chunks(" ".join(alltext)) if i != topic]
return main_dict, query, keywords
def sql_insert(main_dict, query):
sqlbackend.create_table()
for i in main_dict:
ref = main_dict[i]
sqlbackend.insert(query, i, ref['DOI'], ref['Title'], ', '.join(ref['Authors']),
ref['Date'], ref['Abstract'], ', '.join(ref['Images']))
return