This repository has been archived by the owner on May 16, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 15
/
zotero.py
51 lines (43 loc) · 1.97 KB
/
zotero.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from pyzotero import zotero
import pandas as pd
import re
def list_strip(lst: list) -> list:
for i in range(len(lst)):
lst[i] = lst[i].strip().replace(' ',' ').replace(' ',' ')
return lst
def creators_to_names(names_lst: list) -> list:
if type(names_lst) != float:
tmp = pd.DataFrame(names_lst).fillna('')
for col in ['name', 'firstName', 'middleName', 'lastName']:
if col not in tmp.columns:
tmp[col] = ''
tmp['author'] = tmp['name'] + ' ' + tmp['firstName'] + ' ' + tmp['middleName'] + ' ' + tmp['lastName']
tmp.apply(lambda x: list_strip(x))
return tmp.author.to_list()
else:
return ['Anonymous']
def extrtact_data(df: pd.DataFrame) -> pd.DataFrame:
# df = df[df['itemType'] != 'attachment'].copy()
df['authors'] = [ creators_to_names(i) for i in df.creators]
df['year'] = df.date.apply( lambda x: re.search(r'([0-9]{4})', str(x)).group(1) if re.search(r'([0-9]{4})', str(x)) else '*' )
df['author_year'] = df.authors.apply(lambda x: ', '.join(x) if len(x) <= 2 else ', '.join(x[:2]) + ' ...')
df['author_year'] = '(' + df['author_year'] + ', ' + df['year'] + ')'
data = df[df['itemType'] != 'attachment'][['author_year', 'title', 'authors', 'year', 'itemType', 'key', 'version']].T.to_dict()
return data
def fetch_zotero_items(LIBRARY_ID: int, LIBRARY_TYPE: str, API_KEY: str) -> pd.DataFrame:
lib = zotero.Zotero(LIBRARY_ID, LIBRARY_TYPE, API_KEY)
items = lib.items()
try:
df = pd.DataFrame(pd.DataFrame(items)['data'].to_list())
except Exception as e:
print('Error:' + str(e))
return df
# if __name__ == '__main__':
# try:
# print('Start running...')
# df = fetch_zotero_items(LIBRARY_ID, LIBRARY_TYPE, API_KEY)
# data = extrtact_data(df)
# for item in data.items():
# print(item[1]['title'])
# except KeyboardInterrupt as k:
# print('\nKey pressed to interrupt...')