-
Notifications
You must be signed in to change notification settings - Fork 1
/
getcitation.js
341 lines (283 loc) · 13.7 KB
/
getcitation.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
/* global chse */
// type of citation to insert; used by citeDOI
// short citation = 1; long citation = 2;
chse.CITATION_TYPE = 1;
chse.citeWebsite = function (URL, callback) {
// I cannot retrieve author/title of website without using my own external server :(
// so just do the bare citation
const time = new Date(),
months = [
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
],
citation = `${URL} (accessed ${months[time.getMonth()]} ${time.getDate()}, ${time.getFullYear()}.`;
callback.call(this, citation);
};
function getDOIMetaData(doi, callback) {
const cachedObject = JSON.parse(localStorage.getItem(chse.LS_KEY)),
cachedMetadata = cachedObject ? cachedObject[doi] : undefined;
if (cachedMetadata) {
chse.citeDOI(doi, callback, cachedMetadata);
return;
}
// ISSUE: I'm supposed to send my email along with this API call, how'd I do that?
const xhr = new XMLHttpRequest();
xhr.open("GET", `https://api.crossref.org/works/${doi}`, true);
xhr.setRequestHeader("Content-type", "text/plain");
xhr.send();
xhr.onload = function (e) {
let response;
try {
response = JSON.parse(e.srcElement.response);
} catch (error) {
console.log("JSON.parse failed", this, e);
chse.citeDOI(doi, callback, false);
return;
}
if (response.status !== "ok") {
console.log("response.status not ok", this, e);
chse.citeDOI(doi, callback, false);
return;
}
const metadata = response.message;
chse.cacheDOI(doi, metadata);
chse.citeDOI(doi, callback, metadata);
};
xhr.onerror = function (error) {
console.log("xhr.onerror", this, error);
chse.citeDOI(doi, callback, false);
};
}
// *J. Am. Chem. Soc.* **2018,** *140,* 1855
// or *J. Am. Chem. Soc.* **2018,** *140* (2), 1855
// Jounral name Year, Volume (Issue), Pages
function shortCiteDOI(doi, metadata) {
const output = `[${getTitleYearIssuePagesForCitation(metadata)}](https://doi.org/${doi})`;
return output;
}
chse.ERROR_MSG = "<!-- Citation lookup unsuccessful -->";
// doi must be the doi (10(.\d+)+) and nothing else
chse.citeDOI = function (doi, callback, metadata) {
let output = "";
if (metadata === undefined) {
getDOIMetaData(doi, callback);
return;
} if (metadata === false) {
output = chse.ERROR_MSG;
callback.call(this, output);
return;
}
try {
if (chse.CITATION_TYPE === 1) {
callback.call(this, shortCiteDOI(doi, metadata));
return;
}
output += citeAuthors(metadata.author);
output += `${citeTitle(metadata.title[0])} `;
output += getTitleYearIssuePagesForCitation(metadata);
output += `. [DOI: ${doi}](https://doi.org/${doi}).`;
} catch (error) {
console.log("citing problem", error);
output = chse.ERROR_MSG;
}
callback.call(this, output);
};
function getTitleYearIssuePagesForCitation(metadata) {
const { issue } = metadata,
{ volume } = metadata,
{ page } = metadata,
article = metadata["article-number"];
let output = `*${getShortJournalTitle(metadata)}* **${getPublishedYear(metadata)}`; // issue: 10.1021/ci00024a006 gives back 2005, though it was published in 1995 (legacy archives)
// books may not have volumes (10.1007/0-306-48639-3_12)
if (volume) {
output += `,** *${volume}`;
output += (parseInt(issue, 10) ? `* (${issue})` : ",*");
output += (((page || article) && parseInt(issue, 10)) ? "," : "");
}
console.log(output);
// page numbers are absent in ACS Article ASAP service or some other papers (10.1371/journal.pone.0068486)
if (page) {
output += (volume ? " " : ",** ") + getPageRange(page);
} else if (article) {
output += `${volume ? " " : ",** "}No. ${article}`;
}
console.log(output);
if (!page && !volume) { output += "**"; }
return output;
}
function getPublishedYear(metadata) {
// the last two are not always accurate (github.com/CrossRef/rest-api-doc/issues/381)
const usableDateParts = metadata["published-print"] || metadata["published-online"] || metadata.created;
return usableDateParts["date-parts"][0][0];
}
function getShortJournalTitle(metadata) {
// user needs to install this via a GitHub Gist
const title = metadata["container-title"],
shortTitle = metadata["short-container-title"],
// fallback to sometimes inaccurate CrossRef results in case user didn't install Gist
// (eg: missing the short-container-title field (10.1023/A:1008989800098); incorrect short form (Tetrahedron Letters instead of Tetrahedron Lett.)
// fallback to unabbrev. title in case neither list has the abbrev., or in case it's a book (not a journal - 10.1007/0-306-48639-3_12)
res = (chse.journalList && chse.journalList[title]) || (shortTitle.length !== 0 ? shortTitle[0] : title),
journalAbbrevs = {
"Proceedings of the National Academy of Sciences": "Proc. Acad. Natl. Sci. U. S. A.",
"The Journal of Chemical Physics": "J. Chem. Phys.",
"Phys. Rev. Appl": "Phys. Rev. Appl.",
};
return journalAbbrevs[res] ? journalAbbrevs[res] : res;
}
/**
* names like William von leu should be capitalized to
* William von Leu
* @param {String} name family name of an author
*/
function capitalizeSpecialAuthorFamilyNames(name) {
return name
.replace(/([- ])([a-z])/gi, ($0, $1, $2) => $1 + $2.toUpperCase())
.replace(/von ([a-z])/gi, ($0, $1) => `von ${$1.toUpperCase()}`);
}
function citeAuthors(authors) {
// there needn't be authors all the time; (10.1007/0-306-48639-3_12)
if (!authors || authors.length === 0) { return ""; }
let citation = "";
for (const author of authors) {
const familyName = author.family;
// ex: https://journals.aps.org/prx/abstract/10.1103/PhysRevX.7.031059
// last entry is not an author actually
if (!familyName) { continue; }
// 10.1248/cpb.49.1102 has all its author names in ALL CAPS; capitalize its only first letter
citation
+= `${capitalizeSpecialAuthorFamilyNames(chse.capitalizeFirstLetter(familyName))},${getInitials(author.given)}`;
citation += "; ";
}
// final two characters ("; ") are unnecessary
const strLen = citation.length;
citation = `${citation.substring(0, strLen - 2)} `;
return citation;
}
function getInitials(givenName) {
return givenName.split(" ").reduce((citation, name) => `${citation} ${name[0]}.`, "");
}
/**
*
* @param {String} title the title to cite properly
*/
function citeTitle(title) {
const greekLetterRegex = /\.[a-z]+\./gi;
// some titles are received in ALL CAPS (10.1021/ja01532a066)
// fix them to titlecase
if (chse.isAllUpcase(title)) { title = chse.toTitleCase(title); }
// issues#19
if (greekLetterRegex.test(title)) {
// convert to html entity
title = title.replace(greekLetterRegex, $0 => `&${$0.toLowerCase().substring(1, $0.length - 1)};`);
}
// some paper titles don't end with punctuation
// example: 10.1021/ci00024a006, 10.1021/ja00178a014
if (!chse.endsWithPunctuation(title)) { title += "."; }
// some paper titles have a phrase like (iii)
// correct it to (III) (ex: 10.1039/C5SC03429A)
title = title.replace(/\(([ivxl]+)\)/, ($0, $1) => `(${$1.toUpperCase()})`);
return title;
}
function getPageRange(pages) {
return pages.replace(/[-]/, "–");
}
// general DOI format: http://www.doi.org/doi_handbook/2_Numbering.html#2.2.2
/*
The following web URLs are being checked: (they follow the format specified here https://webhome.weizmann.ac.il/home/comartin/doi.html)
- Wiley - https://onlinelibrary.wiley.com/doi/pdf/10.1002/9780470682531.pat0081
- Springer Verlag - https://link.springer.com/article/10.1007%2Fs002140050256
- Elsevier (ScienceDirect) - https://www.sciencedirect.com/science/article/pii/S0013468602000476
- ACS - https://pubs.acs.org/doi/abs/10.1021/ed029p167
- Nature - https://www.nature.com/articles/s41586-018-0058-6
- RSC - http://pubs.rsc.org/en/content/articlelanding/2011/dt/c0dt01244k/unauth#!divAbstract
The following websites are unsupported:
- American Press IDEAL (10.1006) (absorbed by Elsevier; web URLs non existent; DOIs still work)
- Cambridge UP - https://www.cambridge.org/core/journals/ageing-and-society/article/social-engagement-from-childhood-to-middle-age-and-the-effect-of-childhood-socioeconomic-status-on-middle-age-social-engagement-results-from-the-national-child-development-study/2512DF65E696B95F028BF9209A9FD2DA#
can anyone tell how to extract DOI from this URL?! --^
- i can work on these later
10.1046 Blackwell Publishers. Details to follow.
10.1055 G. Thieme Verlag (Synthesis).Structure: s-YEAR-XXXXX, where XXXXX is a 5-digit article code.
10.1063 American Institute of Physics. The DOI of recent articles (example: http://dx.doi.org/10.1063/1.1385363) can be found on the online abstract of the paper. However, AIP offers a very convenient alternative (a kind-of "OpenURL avant-la-lettre"): http://link.aip.org/link/?jou/vol/firstpage, where "jou" is the three-letter journal abbreviation (e.g. jcp for Journal of Chemical Physics), "vol" is the volume number and "firstpage" the first page number. Example: http://link.aip.org/link/?jcp/115/2051 will link to J. Chem. Phys. 115, 2051 (2001).
10.1073 PNAS (Proceedings of the National Academy of Sciences [USA]). pnas.XXXXXXXXX, where XXXXXXXXX is a 9-digit manuscript number.
10.1074 Journal of Biological Chemistry. Structure: jbc.MXXXXXXXXX, where XXXXXXXXX is a 9-digit manuscript number.
10.1080 Taylor and Francis. This publisher uses 15-character PIIs like Elsevier; again, the PII can generally be found on the online abstract of the journal paper or on the first page of the printed paper. An example DOI URL is http://dx.doi.org/10.1080/002689799163172
10.1083 Rockefeller University Press (e.g. Journal of Cell Biology)
10.1092 Laser Pages Publishing (=Israel Journal of [fill in subject]). Structure: e.g. V0Q8-T3XM-N68W-D8NL.
10.1093 Oxford University Press, EMBO (European Molecular Biology Organisation). Structure: emboj/aaaXXX, where emboj stands for EMBO Journal and aaaXXX is a 3-letter, 3-digit article code.
10.1103 American Physical Society. Example: http://dx.doi.org/10.1103/PhysRevA.68.021801. As you see, JournalName.Volume.ArticleNumber. Like for AIP, an alternative URL is given as follows: http://link.aps.org/abstract/PRA/v68/e021801.
10.1107 International Union of Crystallography (e.g. Acta Crystallographica series). Uses PIIs in same way as Elsevier (see above).
10.1126 SCIENCE magazine. Structure: science.XXXXXXX, where XXXXXXX is a 7-digit article code.
10.1161 American Heart Association.
10.1182 American Society of hematology (e.g. the journal Bloo
*/
function getDOIFromPaperWebURL(originalURL) {
// remove query parameters
let queryMatch = originalURL.match(/[?#]/),
URL = originalURL;
if (queryMatch) { URL = URL.substring(0, queryMatch.index); }
if (/wiley|journals\.aps\.org|annualreviews\.org|aip\.scitation\.org/.test(URL)) {
return URL.match(/10\.\d+(\.\d+)?\/.+/)[0];
} if (/springer/.test(URL)) {
const matcher = URL.match(/(10\.\d+(\.\d+)*?)(%2F | \/)?(.+)/i);
return matcher[1] + (matcher[3] || "/") + matcher[4];
} if (/sciencedirect/.test(URL)) {
let PII = URL.match(/pii\/(.+)\/?/i)[1],
sPresent = /pii\/s/i.test(URL),
doi = sPresent ? "S" : "";
// the doi substring logic below requires the S to be removed if present
if (sPresent) { PII = PII.substring(1); }
// PII has to be split like "Sxxxx-xxxx(yy)zzzzz-c" (S can be upcase/lowercase/absent altogether)
doi += `${PII.substring(0, 4)}-${PII.substring(4, 8)}(${PII.substring(8, 10)})${PII.substring(10, 15)}-${PII.substring(15, 16)}`;
return `10.1016/${doi}`;
} if (/acs/.test(URL)) {
return URL.match(/10\.\d+(\.\d+)?\/.+/)[0];
} if (/nature\./.test(URL)) {
return `10.1038/${URL.match(/articles\/([^/]+)/)[1]}`;
} if (/rsc/.test(URL)) {
// rsc url may not always have unauth at its end
return `10.1039/${(URL.match(/\/([\w]+)\/unAuth$/i) || URL.match(/\/([\w]+)$/))[1]}`;
} if (/plos/.test(URL)) {
// http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0068486
// the id? query parameter gets stripped away in the URL
return `10.1371/journal.pone.${originalURL.match(/pone\.(\d+)/)[1]}`;
}
return null;
}
chse.getURISource = function (URI) {
// URIs supported: "website"/"doi"/"paperWeb"
// "website" is any website like HyperPhysics
// "doi" is literally a DOI url
// "paperWeb" is a wiley/elsevier/nature/etc. URL
URI = URI.trim();
const validDOIRegexes = [
/^DOI: ?(10\.\d+\/.+)$/i,
/^(10\.\d+\/.+)$/i,
/^(https?:?\/?\/?)?(dx.)?doi.org\/(10\.\d+\/.+)$/i,
];
for (const doiRegex of validDOIRegexes) {
const match = URI.match(doiRegex);
if (match) {
return ["doi", match[match.length - 1]];
}
}
const DOI = getDOIFromPaperWebURL(URI);
if (DOI) { return ["paperWeb", DOI]; }
const validWebsiteRegexes = [/^(https?:\/?\/?)?(www\.)?[\w\.]+?\.[a-z]+[\w\.\/]*$/];
// port://www.completeDomain/path
for (const webRegex of validWebsiteRegexes) {
if (webRegex.test(URI)) { return ["web", URI]; }
}
return [null, null];
};
chse.debugLog("Get citation loaded");