scripts/update_mathscinet.py

#!/usr/bin/env python3

import pandas as pd
import csv
import requests
from io import StringIO

file_in = "https://mathscinet.ams.org/msnhtml/annser.csv"
file_out = "journals/journal_abbreviations_mathematics.csv" 
# set headers to mimic browser request
headers = {
    'sec-ch-ua': '"Google Chrome";v="129", "Not=A?Brand";v="8", "Chromium";v="129"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
}
response = requests.get(file_in, headers=headers)

if response.status_code == 200:
    df_new = pd.read_csv(StringIO(response.text), usecols=[0, 1]).dropna()[["Full Title", "Abbrev"]]
else:
    raise Exception(f"Failed to fetch the file. Status code: {response.status_code}")

# Get our last mathematics data file
df_old = pd.read_csv(file_out, sep=",", escapechar="\\",
                     header=None, names=["Full Title", "Abbrev"])

# Concatenate, remove duplicates and sort by journal name
df = pd.concat([df_new, df_old], axis=0).drop_duplicates(
).sort_values(by=["Full Title", "Abbrev"])

# Remove values where journal name is equal to abbreviation
df = df[df["Full Title"].str.lower() != df["Abbrev"].str.lower()]

# Save the end file in the same path as the old one
df.to_csv(file_out, sep=",", escapechar="\\", index=False, header=False, quoting=csv.QUOTE_ALL)