-
Notifications
You must be signed in to change notification settings - Fork 80
/
update_mathscinet.py
executable file
·37 lines (30 loc) · 1.43 KB
/
update_mathscinet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/env python3
import pandas as pd
import csv
import requests
from io import StringIO
file_in = "https://mathscinet.ams.org/msnhtml/annser.csv"
file_out = "journals/journal_abbreviations_mathematics.csv"
# set headers to mimic browser request
headers = {
'sec-ch-ua': '"Google Chrome";v="129", "Not=A?Brand";v="8", "Chromium";v="129"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
}
response = requests.get(file_in, headers=headers)
if response.status_code == 200:
df_new = pd.read_csv(StringIO(response.text), usecols=[0, 1]).dropna()[["Full Title", "Abbrev"]]
else:
raise Exception(f"Failed to fetch the file. Status code: {response.status_code}")
# Get our last mathematics data file
df_old = pd.read_csv(file_out, sep=",", escapechar="\\",
header=None, names=["Full Title", "Abbrev"])
# Concatenate, remove duplicates and sort by journal name
df = pd.concat([df_new, df_old], axis=0).drop_duplicates(
).sort_values(by=["Full Title", "Abbrev"])
# Remove values where journal name is equal to abbreviation
df = df[df["Full Title"].str.lower() != df["Abbrev"].str.lower()]
# Save the end file in the same path as the old one
df.to_csv(file_out, sep=",", escapechar="\\", index=False, header=False, quoting=csv.QUOTE_ALL)