forked from MarkEEaton/open-journal-matcher
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcsvdata.py
30 lines (26 loc) · 929 Bytes
/
csvdata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
""" extract issns from journallist.csv """
import csv
import json
import regex
output = []
with open("journallist-June2020.csv", newline="") as csvfile:
data = csv.reader(csvfile)
for row in data:
try:
if "english" in row[30].lower():
if row[4]:
if regex.match(r'^[0-9]{4}-[0-9]{3}[0-9xX]$', row[4]):
output.append(row[4])
else:
print(row[4], 'regex does not match')
elif row[3]:
if regex.match(r'^[0-9]{4}-[0-9]{3}[0-9Xx]$', row[3]):
output.append(row[3])
else:
print(row[3], 'regex does not match')
else:
print("no issn")
except:
pass
with open("issnlist-multilingual.txt", "w") as issnfile:
issnfile.write(json.dumps(output))