forked from neurolibre/neurolibre.com
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathautogen_articles.py
48 lines (42 loc) · 1.71 KB
/
autogen_articles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import requests
import yaml
from os import walk, listdir
import sys
gh_dr = sys.argv[1]
def get_yml(inp):
with open(inp) as file:
documents = yaml.full_load(file)
pub = {}
for item, doc in documents.items():
pub[item] = doc
return pub
def get_list(path):
out = []
for file in listdir(path):
if file.endswith(".yml"):
out.append(file)
return out
yaml_user = get_list(gh_dr + '/data/research_edit')
yaml_exists = get_list(gh_dr + '/data/research_autogen')
to_proc = list(set(yaml_user) - set(yaml_exists))
if not(to_proc):
print('MRpub >>>>> Already up-to-date, nothing to process.')
for cur_yml in to_proc:
print('MRpub >>>>> Creating website data for: ' + cur_yml)
pub = get_yml(gh_dr + '/data/research_edit/' + cur_yml)
response = requests.get('https://api.semanticscholar.org/v1/paper/' + pub['doi']) # Abstract
response2 = requests.get('http://api.crossref.org/works/' + pub['doi']) # Affiliations
if response.status_code == 200 and response2.status_code == 200:
response = response.json()
response2 = response2.json()
response2 = response2['message']
pub['authors'] = response2['author']
for ii in range(len(pub['authors'])):
pub['authors'][ii]['affiliation'] = pub['authors'][ii]['affiliation'][0]['name']
pub['article_url'] = 'https://doi.org/' + pub['doi']
pub['title'] = response['title']
pub['abstract'] = response['abstract']
with open('data/research_autogen/' + cur_yml, 'w') as outfile:
yaml.dump(pub, outfile, default_flow_style=False)
else:
print('MRpub >>>>> Cannot find semanticscholar entry for: ' + pub['doi'] + cur_yml)