From 89daaeec789282f79da7ff860c8cc3440b7a03ac Mon Sep 17 00:00:00 2001 From: carnetdethese Date: Wed, 17 Apr 2024 15:04:47 +0200 Subject: [PATCH 01/15] Adding Dalloz translator. --- Dalloz.js | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 Dalloz.js diff --git a/Dalloz.js b/Dalloz.js new file mode 100644 index 00000000000..8c9c2b23632 --- /dev/null +++ b/Dalloz.js @@ -0,0 +1,191 @@ +{ + "translatorID": "a59e99a6-42b0-4be6-bb0c-1ff688c3a8b3", + "label": "Dalloz", + "creator": "Alexandre Mimms", + "target": "https?://(?:www[.-])?dalloz(?:[.-]fr)?", + "minVersion": "5.0", + "maxVersion": "", + "priority": 100, + "inRepository": true, + "translatorType": 4, + "browserSupport": "gcsibv", + "lastUpdated": "2024-04-17 12:42:44" +} + +/* + ***** BEGIN LICENSE BLOCK ***** + + Copyright © 2024 Alexandre Mimms + + This file is part of Zotero. + + Zotero is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Zotero is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with Zotero. If not, see . + + ***** END LICENSE BLOCK ***** +*/ +// TODO +// - Make sure that the case report are correctly saved. +// - PDF import : needs reverse engineering the internal api of the service. Seems like a quite complex one. + +const citationAvecNumero = new RegExp(/^([\D]+)\s*(\d{4}),?\s?(n°\s?\d+) *,?\s*(p\.\s?\d+)*/); +const citationSansNumero = new RegExp(/^([\D]+)\s*(\d{4}),?\s*(p\.\d+)?/); +const regAnnee = new RegExp(/\d{4}/); +const docTypeId = new RegExp(/id=([^%]+)(?:%2F)?/); + +const codeDocument = new Map([ + ["ENCY", "dictionary-entry"], + ["JA", "journalArticle"], + ["AJ", "journalArticle"], + ["ACTU", "blogPost"], + ["RFDA", "journalArticle"], + ["CONS", "journalArticle"], + ["DIPI", "journalArticle"], + ["DS", "journalArticle"], + ["JA", "journalArticle"], + ["JT", "journalArticle"], + ["JS", "journalArticle"], + ["JCAS", "journalArticle"], + ["LEGI", "journalArticle"], + ["CAHJ", "journalArticle"], + ["RDI", "journalArticle"], + ["RDSS", "journalArticle"], + ["RECU", "journalArticle"], + ["LEBO", "case"], + ["REV", "journalArticle"], + ["RMC", "journalArticle"], + ["RSC", "journalArticle"], + ["RTD", "journalArticle"], + ["RPR", "journalArticle"], + ["RCJ", "journalArticle"] +]); + +// The following function checks if the ID passed as argument has an associated key (some IDs start with the same letters - easier than filing all available IDs). +function idStartsWithKey(string) { + for (let key of codeDocument.keys()) { + if (key.startsWith(string.substring(0, 2))) { + return true; + } + } + return false; +} + +function detectWeb(doc, url) { + if (url.includes('/documentation/Document')) { // Checks if the page is a document. + let id = url.match(docTypeId); + id = id[1].substring(0, 4); + Z.debug(id); + if (idStartsWithKey(id)) { + if (codeDocument.get(id)) { // If there is a corresponding ID. + return codeDocument.get(id); + } + return codeDocument.get(id.substring(0, 2)); // Gets the value of the key if it is a shorthand. + // Returns the type of the document according to the ID - refer to the const Map declared. + } + } + else if (url.includes('/documentation/Liste')) { // Checks if the page is a list of results. + return 'multiple'; + } + return false; +} + +// This function is basically as it was set by the template. I modified it so it is specific to Dalloz. +function getSearchResults(doc, checkOnly) { + var items = {}; + var found = false; + var rows = doc.querySelectorAll('.result-content'); + for (let row of rows) { + let href = attr(row, "a", "href", 0); + let title = ZU.trimInternal(text(row, "a", 0)); + if (!href || !title) continue; + if (checkOnly) return true; + found = true; + items[href] = title; + } + return found ? items : false; +} + +// Nothing changed here neither. +async function doWeb(doc, url) { + if (detectWeb(doc, url) == 'multiple') { + let items = await Zotero.selectItems(getSearchResults(doc, false)); + if (!items) return; + for (let url of Object.keys(items)) { + await scrape(await requestDocument(url)); + } + } + else { + await scrape(doc, url); + } +} + + +// Since searches trigger a "< >" markup around the searched words, we have to edit that away before storing the values. +async function scrape(doc, url = doc.location.href) { + const titre = ZU.trimInternal(text(doc, ".chronTITRE", 0)).replace(/[<>]/g, ""); // gets the title of the document + const abstract = ZU.trimInternal(text(doc, "#RESUFRAN")).replace(/[<>]/g, ""); // gets the abstract + let refDoc = ZU.trimInternal(text(doc, ".refDoc", 0).replace(/[<>]/g, "")); // gets the reference + + let page, revue, numRevue, date; + const signatures = doc.querySelectorAll(".chronSIGNATURE"); + let auteurs = []; + + // Loop over the "signatures" of the document, and store the author in the list. + for (let signature of signatures) { + auteurs.push(signature.innerText.replace(/[<>]/g, "").split(',')[0]); + } + + if (citationAvecNumero.test(refDoc)) { + refDoc = refDoc.split(citationAvecNumero); + } + else if (citationSansNumero.test(refDoc)) { + refDoc = refDoc.split(citationSansNumero); + } + + for (let item of refDoc) { + if (item.startsWith("p")) { + page = item.replace("p.", ""); + } + else if (item.startsWith("n")) { + numRevue = item.replace("n°", ""); + } + else if (regAnnee.test(item)) { + date = item; + } + else if (item !== "") { + revue = item; + } + } + + let newItem = new Z.Item("journalArticle"); + + newItem.title = titre; + for (let auth of auteurs) { // loop over the list of authors and set them as authors. + let authNames = auth.split(" "); + newItem.creators.push({ + firstName: authNames[0], + lastName: authNames[1], + creatorType: "author", + fieldMode: true + }); + } + + newItem.publicationTitle = revue; + newItem.abstractNote = abstract; + if (numRevue !== "") newItem.issue = numRevue; + newItem.pages = page; + newItem.date = date; + newItem.url = url; + newItem.complete(); +} + From 1dd087702a53fa286d8b401ec100873924cae8fb Mon Sep 17 00:00:00 2001 From: carnetdethese Date: Thu, 18 Apr 2024 11:24:28 +0200 Subject: [PATCH 02/15] added support for cases in Recueil Lebon. --- Dalloz.js | 157 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 102 insertions(+), 55 deletions(-) diff --git a/Dalloz.js b/Dalloz.js index 8c9c2b23632..7bf8fb36755 100644 --- a/Dalloz.js +++ b/Dalloz.js @@ -9,7 +9,7 @@ "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2024-04-17 12:42:44" + "lastUpdated": "2024-04-18 09:22:40" } /* @@ -76,62 +76,12 @@ function idStartsWithKey(string) { if (key.startsWith(string.substring(0, 2))) { return true; } - } - return false; -} - -function detectWeb(doc, url) { - if (url.includes('/documentation/Document')) { // Checks if the page is a document. - let id = url.match(docTypeId); - id = id[1].substring(0, 4); - Z.debug(id); - if (idStartsWithKey(id)) { - if (codeDocument.get(id)) { // If there is a corresponding ID. - return codeDocument.get(id); - } - return codeDocument.get(id.substring(0, 2)); // Gets the value of the key if it is a shorthand. - // Returns the type of the document according to the ID - refer to the const Map declared. - } - } - else if (url.includes('/documentation/Liste')) { // Checks if the page is a list of results. - return 'multiple'; } return false; } -// This function is basically as it was set by the template. I modified it so it is specific to Dalloz. -function getSearchResults(doc, checkOnly) { - var items = {}; - var found = false; - var rows = doc.querySelectorAll('.result-content'); - for (let row of rows) { - let href = attr(row, "a", "href", 0); - let title = ZU.trimInternal(text(row, "a", 0)); - if (!href || !title) continue; - if (checkOnly) return true; - found = true; - items[href] = title; - } - return found ? items : false; -} - -// Nothing changed here neither. -async function doWeb(doc, url) { - if (detectWeb(doc, url) == 'multiple') { - let items = await Zotero.selectItems(getSearchResults(doc, false)); - if (!items) return; - for (let url of Object.keys(items)) { - await scrape(await requestDocument(url)); - } - } - else { - await scrape(doc, url); - } -} - - -// Since searches trigger a "< >" markup around the searched words, we have to edit that away before storing the values. -async function scrape(doc, url = doc.location.href) { +function scrapeJournalArticle(doc, url = doc.location.href) { + // Since searches trigger a "< >" markup around the searched words, we have to edit that away before storing the values. const titre = ZU.trimInternal(text(doc, ".chronTITRE", 0)).replace(/[<>]/g, ""); // gets the title of the document const abstract = ZU.trimInternal(text(doc, "#RESUFRAN")).replace(/[<>]/g, ""); // gets the abstract let refDoc = ZU.trimInternal(text(doc, ".refDoc", 0).replace(/[<>]/g, "")); // gets the reference @@ -155,7 +105,7 @@ async function scrape(doc, url = doc.location.href) { for (let item of refDoc) { if (item.startsWith("p")) { page = item.replace("p.", ""); - } + } else if (item.startsWith("n")) { numRevue = item.replace("n°", ""); } @@ -181,11 +131,108 @@ async function scrape(doc, url = doc.location.href) { } newItem.publicationTitle = revue; - newItem.abstractNote = abstract; + newItem.abstractNote = abstract; if (numRevue !== "") newItem.issue = numRevue; newItem.pages = page; newItem.date = date; newItem.url = url; + newItem.language = "french"; + newItem.complete(); +} + +function scrapeCase(doc, url = doc.location.href) { + let juridiction; + + if (url.includes("LEBON")) { + juridiction = "Conseil d'État"; + } + + // Since searches trigger a "< >" markup around the searched words, we have to edit that away before storing the values. + const titre = ZU.trimInternal(text(doc, ".jurisJURI", 0)).replace(/[<>]/g, ""); // gets the title of the document + const abstract = ZU.trimInternal(text(doc, ".jurisSOMMAIRE")).replace(/[<>]/g, ""); // gets the abstract + const formation = ZU.trimInternal(text(doc, ".jurisCHAM", 0).replace(/[<>]/g, "")); // gets the reference + const date = ZU.trimInternal(text(doc, ".jurisDATE", 0).replace(/[<>]/g, "")); + const volume = date.split("-")[2]; + const mentionPublication = ZU.trimInternal(text(doc, ".commentPopupNDC b", 0).replace(/[<>]/g, "")); + const numeroAffaire = ZU.trimInternal(text(doc, ".jurisNAAF", 0).replace(/[<>]/g, "").replace("n° ", "")); + + let newItem = new Z.Item("case"); + newItem.caseName = titre; + newItem.reporter = mentionPublication; + newItem.abstractNote = abstract.replace("Sommaire : ", ""); + newItem.court = juridiction; + newItem.dateDecided = date; + newItem.reporterVolume = volume; + newItem.docketNumber = numeroAffaire; + newItem.language = "french"; + newItem.url = url; + newItem.extra = formation; newItem.complete(); } +// function scrapeBlog(doc, url = doc.location.href) { + +// } + +function detectWeb(doc, url) { + if (url.includes('/documentation/Document')) { // Checks if the page is a document. + let id = url.match(docTypeId); + id = id[1].substring(0, 4); + if (idStartsWithKey(id)) { + if (codeDocument.get(id)) { // If there is a corresponding ID. + return codeDocument.get(id); + } + return codeDocument.get(id.substring(0, 2)); // Gets the value of the key if it is a shorthand. + // Returns the type of the document according to the ID - refer to the const Map declared. + } + } + else if (url.includes('/documentation/Liste')) { // Checks if the page is a list of results. + return 'multiple'; + } + return false; +} + +// This function is basically as it was set by the template. I modified it so it is specific to Dalloz. +function getSearchResults(doc, checkOnly) { + var items = {}; + var found = false; + var rows = doc.querySelectorAll('.result-content'); + for (let row of rows) { + let href = attr(row, "a", "href", 0); + let title = ZU.trimInternal(text(row, "a", 0)); + if (!href || !title) continue; + if (checkOnly) return true; + found = true; + items[href] = title; + } + return found ? items : false; +} + +// Nothing changed here neither. +async function doWeb(doc, url) { + const docType = detectWeb(doc, url); // calling detectWeb once and passing it to scrape function, + // so we don't have to call it multiple times to check in the scrape function what type of document it is. + + if (docType == 'multiple') { + let items = await Zotero.selectItems(getSearchResults(doc, false)); + if (!items) return; + for (let url of Object.keys(items)) { + await scrape(await requestDocument(url)); + } + } + else { + await scrape(doc, url, docType); + } +} + +async function scrape(doc, url = doc.location.href, docType) { + if (docType == "journalArticle") { + scrapeJournalArticle(doc, url); + } + else if (docType == "case") { + scrapeCase(doc, url); + } + // else if (docType == "blogPost") { + // scrapeBlog(doc, url); + // } +} From 0d2bc10087e3f4f6736b259732544d712f410320 Mon Sep 17 00:00:00 2001 From: carnetdethese Date: Thu, 18 Apr 2024 14:47:40 +0200 Subject: [PATCH 03/15] =?UTF-8?q?Updating=20Dalloz=20and=20adding=20Dalloz?= =?UTF-8?q?=20Biblioth=C3=A8que?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dalloz Bibliotheque.js | 117 +++++++++++++++++++++++++++++++++++++++++ Dalloz.js | 6 ++- 2 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 Dalloz Bibliotheque.js diff --git a/Dalloz Bibliotheque.js b/Dalloz Bibliotheque.js new file mode 100644 index 00000000000..f733df0ff04 --- /dev/null +++ b/Dalloz Bibliotheque.js @@ -0,0 +1,117 @@ +{ + "translatorID": "2ea86ad9-71ca-410c-9126-9d7d98722acf", + "label": "Dalloz Bibliothèque", + "creator": "Alexandre Mimms", + "target": "https?://(?:www[.-])?bibliotheque[.-]lefebvre[.-]dalloz(?:[.-]fr)?", + "minVersion": "5.0", + "maxVersion": "", + "priority": 100, + "inRepository": true, + "translatorType": 4, + "browserSupport": "gcsibv", + "lastUpdated": "2024-04-18 12:43:54" +} + +/* + ***** BEGIN LICENSE BLOCK ***** + + Copyright © 2024 YOUR_NAME <- TODO + + This file is part of Zotero. + + Zotero is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Zotero is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with Zotero. If not, see . + + ***** END LICENSE BLOCK ***** +*/ + + +function detectWeb(doc, url) { + if (url.includes('/ouvrage/')) { + return 'book'; + } + else if (url.includes('/recherche')) { + return 'multiple' + } + return false; +} + +function getSearchResults(doc, checkOnly) { + var items = {}; + var found = false; + var rows = doc.querySelectorAll('.result-list-grid-item'); + + for (let row of rows) { + let href = row.querySelectorAll("a")[0].href; + let title = ZU.trimInternal(row.querySelectorAll(".detail-title")[0].innerText); + if (!href || !title) continue; + if (checkOnly) return true; + found = true; + items[href] = title; + } + return found ? items : false; +} + +async function doWeb(doc, url) { + if (detectWeb(doc, url) == 'multiple') { + let items = await Zotero.selectItems(getSearchResults(doc, false)); + if (!items) return; + for (let url of Object.keys(items)) { + await scrape(await requestDocument(url)); + } + } + else { + await scrape(doc, url); + } +} + +async function scrape(doc, url = doc.location.href) { + const edition = ZU.trimInternal(text(doc, ".editions-edition.css-p7sjbi", 0)).split(" ")[0]; + const date = ZU.trimInternal(text(doc, ".editions-date.css-p7sjbi", 0)).replace(/Edition\s?:\s?/, ""); + const collection = ZU.trimInternal(text(doc, ".notice-header-grid-item.css-1o256gd.e4d31s30:not(.first-item) .notice-header-link", 0)); + const isbn = ZU.trimInternal(text(doc, ".notice-header-grid-item.css-leol38.e4d31s30 .notice-header-link", 0)); + let marque = ZU.trimInternal(text(doc, ".notice-header-grid-item.css-xc5jw0.e4d31s30 .notice-header-link", 0)); + marque = marque.substring(0,1) + marque.substring(1).toLowerCase(); + const auteurs = ZU.trimInternal(text(doc, ".notice-header-grid-item.css-2bwjgy.e4d31s30 .notice-header-link", 0)).split(" • "); + const titre = ZU.trimInternal(text(doc, ".title", 0)); + const abstract = ZU.trimInternal(text(doc, ".description", 0)).replace("Description", ""); + + let newItem = new Z.Item("book"); + + for (let auteur of auteurs) { + auteurNames = auteur.split(" "); + newItem.creators.push({ + firstName: auteurNames[0], + lastName: auteurNames[1], + creatorType: "author", + fieldMode: true + }); + } + + newItem.title = titre; + newItem.date = date; + newItem.abstractNote = abstract; + newItem.ISBN = isbn; + newItem.edition = edition; + newItem.publisher = marque; + newItem.language = "french"; + newItem.series = collection; + newItem.url = url; + + newItem.complete(); +} + +/** BEGIN TEST CASES **/ +var testCases = [ +] +/** END TEST CASES **/ diff --git a/Dalloz.js b/Dalloz.js index 7bf8fb36755..97142745424 100644 --- a/Dalloz.js +++ b/Dalloz.js @@ -9,7 +9,7 @@ "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2024-04-18 09:22:40" + "lastUpdated": "2024-04-18 12:12:00" } /* @@ -236,3 +236,7 @@ async function scrape(doc, url = doc.location.href, docType) { // scrapeBlog(doc, url); // } } +/** BEGIN TEST CASES **/ +var testCases = [ +] +/** END TEST CASES **/ From 83629beffa61f89c2f22c6e4b77e006e8eec4caf Mon Sep 17 00:00:00 2001 From: carnetdethese Date: Thu, 18 Apr 2024 14:56:11 +0200 Subject: [PATCH 04/15] fixing dalloz bib.js --- Dalloz Bibliotheque.js | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/Dalloz Bibliotheque.js b/Dalloz Bibliotheque.js index f733df0ff04..debab3bd23c 100644 --- a/Dalloz Bibliotheque.js +++ b/Dalloz Bibliotheque.js @@ -41,7 +41,7 @@ function detectWeb(doc, url) { return 'book'; } else if (url.includes('/recherche')) { - return 'multiple' + return 'multiple'; } return false; } @@ -81,7 +81,7 @@ async function scrape(doc, url = doc.location.href) { const collection = ZU.trimInternal(text(doc, ".notice-header-grid-item.css-1o256gd.e4d31s30:not(.first-item) .notice-header-link", 0)); const isbn = ZU.trimInternal(text(doc, ".notice-header-grid-item.css-leol38.e4d31s30 .notice-header-link", 0)); let marque = ZU.trimInternal(text(doc, ".notice-header-grid-item.css-xc5jw0.e4d31s30 .notice-header-link", 0)); - marque = marque.substring(0,1) + marque.substring(1).toLowerCase(); + marque = marque.substring(0, 1) + marque.substring(1).toLowerCase(); const auteurs = ZU.trimInternal(text(doc, ".notice-header-grid-item.css-2bwjgy.e4d31s30 .notice-header-link", 0)).split(" • "); const titre = ZU.trimInternal(text(doc, ".title", 0)); const abstract = ZU.trimInternal(text(doc, ".description", 0)).replace("Description", ""); @@ -89,13 +89,13 @@ async function scrape(doc, url = doc.location.href) { let newItem = new Z.Item("book"); for (let auteur of auteurs) { - auteurNames = auteur.split(" "); + const auteurNames = auteur.split(" "); newItem.creators.push({ firstName: auteurNames[0], lastName: auteurNames[1], creatorType: "author", fieldMode: true - }); + }); } newItem.title = titre; @@ -111,7 +111,3 @@ async function scrape(doc, url = doc.location.href) { newItem.complete(); } -/** BEGIN TEST CASES **/ -var testCases = [ -] -/** END TEST CASES **/ From 1e465311bb58df4496ecfa08a45b6726f2c89c15 Mon Sep 17 00:00:00 2001 From: carnetdethese Date: Thu, 18 Apr 2024 18:42:07 +0200 Subject: [PATCH 05/15] Added Jus Politicum and Lextenso support. --- Jus Politicum.js | 112 +++++++++++++++++++++++++++++ Lextenso.js | 183 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 295 insertions(+) create mode 100644 Jus Politicum.js create mode 100644 Lextenso.js diff --git a/Jus Politicum.js b/Jus Politicum.js new file mode 100644 index 00000000000..15090adbbaf --- /dev/null +++ b/Jus Politicum.js @@ -0,0 +1,112 @@ +{ + "translatorID": "aeb7f19b-0907-4117-bef4-08e36af4d31f", + "label": "Jus Politicum", + "creator": "Alexandre Mimms", + "target": "https?://(?:www[.-])?juspoliticum.com", + "minVersion": "5.0", + "maxVersion": "", + "priority": 100, + "inRepository": true, + "translatorType": 4, + "browserSupport": "gcsibv", + "lastUpdated": "2024-04-18 16:40:24" +} + +/* + ***** BEGIN LICENSE BLOCK ***** + + Copyright © 2024 Alexandre Mimms + + This file is part of Zotero. + + Zotero is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Zotero is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with Zotero. If not, see . + + ***** END LICENSE BLOCK ***** +*/ + + +function detectWeb(doc, url) { + if (url.includes('/article/')) { + return 'journalArticle'; + } + else if (url.includes('/searches')) { + return 'multiple'; + } + return false; +} + +function getSearchResults(doc, checkOnly) { + var items = {}; + var found = false; + var rows = doc.querySelectorAll('#search-section h2 a'); + for (let row of rows) { + let href = row.href; + let title = ZU.trimInternal(row.textContent); + if (!href || !title) continue; + if (checkOnly) return true; + found = true; + items[href] = title; + } + return found ? items : false; +} + +async function doWeb(doc, url) { + if (detectWeb(doc, url) == 'multiple') { + let items = await Zotero.selectItems(getSearchResults(doc, false)); + if (!items) return; + for (let url of Object.keys(items)) { + await scrape(await requestDocument(url)); + } + } + else { + await scrape(doc, url); + } +} + +async function scrape(doc, url = doc.location.href) { + const abstract = ZU.trimInternal(text(doc, "#content")); + const titre = ZU.trimInternal(text(doc, "h2")) + const numero = text(doc, ".release-title .num").replace("N°", ""); + const linkURL = doc.querySelectorAll(".documentsAssocies a")[0].href; + const auteurs = text(doc, ".article-author").split(", "); + + let newItem = new Zotero.Item("journalArticle"); + + for (let auteur of auteurs) { + const auteurNames = auteur.split(" "); + newItem.creators.push({ + firstName: auteurNames[0], + lastName: auteurNames[1], + creatorType: "author", + fieldMode: true + }) + } + + newItem.title = titre; + newItem.issue = numero; + newItem.abstractNote = abstract; + + newItem.attachments = [{ + url: linkURL, + title: "Full text PDF", + mimeType: "application/pdf", + }]; + + newItem.complete(); +} + +/** BEGIN TEST CASES **/ +var testCases = [ +] +/** END TEST CASES **/ diff --git a/Lextenso.js b/Lextenso.js new file mode 100644 index 00000000000..3c0333cc19d --- /dev/null +++ b/Lextenso.js @@ -0,0 +1,183 @@ +{ + "translatorID": "3243d081-22c0-452c-8298-9d8a9fb5de2f", + "label": "Lextenso", + "creator": "Alexandre Mimms", + "target": "https?://(?:www[.-])?labase[.-]lextenso[.-](?:[.-]fr)?", + "minVersion": "5.0", + "maxVersion": "", + "priority": 100, + "inRepository": true, + "translatorType": 4, + "browserSupport": "gcsibv", + "lastUpdated": "2024-04-18 16:17:04" +} + +/* + ***** BEGIN LICENSE BLOCK ***** + + Copyright © 2022 Alexandre Mimms + + This file is part of Zotero. + + Zotero is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Zotero is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with Zotero. If not, see . + + ***** END LICENSE BLOCK ***** +*/ + + +async function scrapeJournalArticle(doc, url) { + const references = ZU.trimInternal(text(doc, ".document-metadata-origin").replace("Issu de ", "")).split(" - "); + const revue = references[0]; + const numeroRevue = references[1]; + const page = references[2]; + const titre = ZU.trimInternal(text(doc, "#page-title")); + const auteurs = doc.querySelectorAll(".document-metadata-authors-name"); + const abstract = ZU.trimInternal(text(doc, ".cChapeau", 0)); + const date = ZU.trimInternal(text(doc, ".document-metadata-date", 0).replace("Date de parution : ", "")); + + + let newItem = new Z.Item("journalArticle"); + newItem.title = titre; + + for (let auteur of auteurs) { + auteurNames = auteur.innerText.split(" "); + newItem.creators.push({ + firstName: auteurNames[0], + lastName: auteurNames[1], + creatorType: "author", + fieldMode: true, + }); + } + + newItem.date = date; + newItem.abstractNote = abstract; + newItem.publicationTitle = revue; + newItem.issue = numeroRevue.replace(/n°[0]?/, ""); + newItem.pages = page.replace(/page\s?/, ""); + newItem.url = url; + newItem.language = "french"; + newItem.complete(); +} + +async function scrapeBook(doc, url) { + // weirdly enough no real information is displayed on the book summary page, but + // some info, like ISBN, is shown on individual pages. + // So, we get the first url to one of those individual pages, then request it so we + // can fetch the information. + // I did not yet find a way to fetch the number of page or edition. + // I tried accessing the link of the shop, where those are displayed, but the request + // fails. + const firstItemUrl = doc.querySelectorAll(".book-summary-list li a")[0].href; + Z.debug(firstItemUrl); + const indivPage = await requestDocument(firstItemUrl); + + // Accessing the metadata - reversing the list, since there can be multiple authors + // the end of the list will always be the same, so easier and surer to do it like that. + const ref = text(indivPage, ".document-metadata-ref .value", 0).split(", ").reverse(); + const date = ref[2]; + const publisher = ref[1]; + const isbn = ref[0]; + + const auteurs = indivPage.querySelectorAll(".document-metadata-authors-name"); + + const titre = text(doc, "#page-title"); + + let newItem = new Z.Item("book"); + newItem.title = titre; + + for (let auteur of auteurs) { + auteurNames = auteur.innerText.split(" "); + newItem.creators.push({ + firstName: auteurNames[0], + lastName: auteurNames[1], + creatorType: "author", + fieldMode: true, + }); + } + + newItem.date = date; + newItem.publisher = publisher; + newItem.ISBN = isbn; + // newItem.pages = page.replace(/page\s?/, ""); + newItem.url = url; + newItem.language = "french"; + newItem.complete(); +} + +function detectWeb(doc, url) { + // TODO: adjust the logic here + if (url.includes('/lextenso/rechercher')) { + return 'multiple'; + } + else if (doc.querySelectorAll(".node-type-ouvrage").length > 0) { + return 'book'; + } + else if (doc.querySelectorAll(".lextenso-document-article").length > 0) { + return 'journalArticle'; + } + return false; +} + +function getSearchResults(doc, checkOnly) { + var items = {}; + var found = false; + // TODO: adjust the CSS selector + var rows = doc.querySelectorAll('h2 > a.title[href*="/article/"]'); + for (let row of rows) { + // TODO: check and maybe adjust + let href = row.href; + // TODO: check and maybe adjust + let title = ZU.trimInternal(row.textContent); + if (!href || !title) continue; + if (checkOnly) return true; + found = true; + items[href] = title; + } + return found ? items : false; +} + +async function doWeb(doc, url) { + const docType = detectWeb(doc, url); + if (docType == 'multiple') { + let items = await Zotero.selectItems(getSearchResults(doc, false)); + if (!items) return; + for (let url of Object.keys(items)) { + await scrape(await requestDocument(url), docType); + } + } + else { + await scrape(doc, url, docType); + } +} + +async function scrape(doc, url = doc.location.href, docType) { + if (docType == "journalArticle") { + scrapeJournalArticle(doc, url); + } + else if (docType == "book") { + scrapeBook(doc, url); + } + +} + +/** BEGIN TEST CASES **/ +var testCases = [ + { + "type": "web", + "url": "https://www-labase-lextenso-fr.docelec-u-paris2.idm.oclc.org/", + "detectedItemType": false, + "items": [] + } +] +/** END TEST CASES **/ From 3caf78309d1bea9899d544a81002fdf1b2da676b Mon Sep 17 00:00:00 2001 From: carnetdethese Date: Thu, 18 Apr 2024 18:45:42 +0200 Subject: [PATCH 06/15] eslint --- Dalloz Bibliotheque.js | 2 +- Jus Politicum.js | 10 +++------- Lextenso.js | 15 +++++++-------- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/Dalloz Bibliotheque.js b/Dalloz Bibliotheque.js index debab3bd23c..02db7160853 100644 --- a/Dalloz Bibliotheque.js +++ b/Dalloz Bibliotheque.js @@ -15,7 +15,7 @@ /* ***** BEGIN LICENSE BLOCK ***** - Copyright © 2024 YOUR_NAME <- TODO + Copyright © 2024 Alexandre Mimms This file is part of Zotero. diff --git a/Jus Politicum.js b/Jus Politicum.js index 15090adbbaf..c519326dc58 100644 --- a/Jus Politicum.js +++ b/Jus Politicum.js @@ -76,7 +76,7 @@ async function doWeb(doc, url) { async function scrape(doc, url = doc.location.href) { const abstract = ZU.trimInternal(text(doc, "#content")); - const titre = ZU.trimInternal(text(doc, "h2")) + const titre = ZU.trimInternal(text(doc, "h2")); const numero = text(doc, ".release-title .num").replace("N°", ""); const linkURL = doc.querySelectorAll(".documentsAssocies a")[0].href; const auteurs = text(doc, ".article-author").split(", "); @@ -90,12 +90,13 @@ async function scrape(doc, url = doc.location.href) { lastName: auteurNames[1], creatorType: "author", fieldMode: true - }) + }); } newItem.title = titre; newItem.issue = numero; newItem.abstractNote = abstract; + newItem.url = url; newItem.attachments = [{ url: linkURL, @@ -105,8 +106,3 @@ async function scrape(doc, url = doc.location.href) { newItem.complete(); } - -/** BEGIN TEST CASES **/ -var testCases = [ -] -/** END TEST CASES **/ diff --git a/Lextenso.js b/Lextenso.js index 3c0333cc19d..9032dca655b 100644 --- a/Lextenso.js +++ b/Lextenso.js @@ -15,7 +15,7 @@ /* ***** BEGIN LICENSE BLOCK ***** - Copyright © 2022 Alexandre Mimms + Copyright © 2024 Alexandre Mimms This file is part of Zotero. @@ -51,7 +51,7 @@ async function scrapeJournalArticle(doc, url) { newItem.title = titre; for (let auteur of auteurs) { - auteurNames = auteur.innerText.split(" "); + const auteurNames = auteur.innerText.split(" "); newItem.creators.push({ firstName: auteurNames[0], lastName: auteurNames[1], @@ -72,7 +72,7 @@ async function scrapeJournalArticle(doc, url) { async function scrapeBook(doc, url) { // weirdly enough no real information is displayed on the book summary page, but - // some info, like ISBN, is shown on individual pages. + // some info, like ISBN, is shown on individual pages. // So, we get the first url to one of those individual pages, then request it so we // can fetch the information. // I did not yet find a way to fetch the number of page or edition. @@ -97,7 +97,7 @@ async function scrapeBook(doc, url) { newItem.title = titre; for (let auteur of auteurs) { - auteurNames = auteur.innerText.split(" "); + const auteurNames = auteur.innerText.split(" "); newItem.creators.push({ firstName: auteurNames[0], lastName: auteurNames[1], @@ -132,12 +132,12 @@ function detectWeb(doc, url) { function getSearchResults(doc, checkOnly) { var items = {}; var found = false; - // TODO: adjust the CSS selector + var rows = doc.querySelectorAll('h2 > a.title[href*="/article/"]'); for (let row of rows) { - // TODO: check and maybe adjust + let href = row.href; - // TODO: check and maybe adjust + let title = ZU.trimInternal(row.textContent); if (!href || !title) continue; if (checkOnly) return true; @@ -168,7 +168,6 @@ async function scrape(doc, url = doc.location.href, docType) { else if (docType == "book") { scrapeBook(doc, url); } - } /** BEGIN TEST CASES **/ From 368bff7b9c623d9d52d21f2c212b6e75b00546f6 Mon Sep 17 00:00:00 2001 From: carnetdethese Date: Fri, 19 Apr 2024 07:54:09 +0200 Subject: [PATCH 07/15] Added Vie Publique support (public reports and speeches) --- Vie Publique.js | 175 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 Vie Publique.js diff --git a/Vie Publique.js b/Vie Publique.js new file mode 100644 index 00000000000..1640d1ad356 --- /dev/null +++ b/Vie Publique.js @@ -0,0 +1,175 @@ +{ + "translatorID": "858fa86d-82e2-43ca-9fc7-cf75b98101cb", + "label": "Vie Publique", + "creator": "Alexandre Mimms", + "target": "https?://(?:www.)?vie-publique(?:.fr)?", + "minVersion": "5.0", + "maxVersion": "", + "priority": 100, + "inRepository": true, + "translatorType": 4, + "browserSupport": "gcsibv", + "lastUpdated": "2024-04-19 05:51:17" +} + +/* + ***** BEGIN LICENSE BLOCK ***** + + Copyright © 2024 Alexandre Mimms + + This file is part of Zotero. + + Zotero is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Zotero is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with Zotero. If not, see . + + ***** END LICENSE BLOCK ***** +*/ + + +function scrapeRapport(doc, url) { + const titre = text(doc, "h1", 0); + const auteursString = doc.querySelectorAll(".book--author a"); + const auteursMorauxString = doc.querySelectorAll(".book--author-moral a"); + const abstract = text(doc, "#fiche-item-présentation"); + const information = doc.querySelectorAll(".tabpanel--technique--details li"); + const date = text(doc, ".field--name-field-date-remise", 0); + const page = information[1].innerText.replace("Pagination : ", "").replace(" pages", ""); + const reportType = information[0].innerText.replace("Type de document : ", ""); + const pdfLink = doc.querySelectorAll(".book--actionsBox a")[0].href; + const tags = doc.querySelectorAll(".vp-item-tag"); + + let newItem = new Z.Item('report'); + newItem.title = titre || ""; + newItem.date = date; + newItem.institution = auteursMorauxString[0].innerText; + newItem.abstractNote = abstract; + newItem.pages = page; + newItem.reportType = reportType; + newItem.url = url; + + newItem.attachments = [{ + url: pdfLink, + title: "Full Text PDF", + mimeType: "application/pdf", + snapshot: false + }]; + + for (let aut of auteursString) { + const autNames = aut.innerText.split(" "); + + newItem.creators.push({ + firstName: autNames[0], + lastName: autNames[1], + creatorType: "author", + fieldMode: true, + }); + } + + for (let tag of tags) { + newItem.tags.push(tag.innerText); + } + + + if (auteursMorauxString.length > 1) { + for (let autMoral of auteursMorauxString) { + newItem.institution += ", " + autMoral.innerText; + } + } + + newItem.complete(); +} + +function scrapeSpeech(doc, url) { + const titre = text(doc, "h1", 0); + const auteursString = doc.querySelectorAll(".line-intervenant a"); + const date = text(doc, ".datetime", 0); + const tags = doc.querySelectorAll(".vp-item-tag"); + + let newItem = new Z.Item('presentation'); + newItem.title = titre || ""; + newItem.date = date; + newItem.url = url; + + for (let aut of auteursString) { + const autNames = aut.innerText.split(" "); + + newItem.creators.push({ + firstName: autNames[0], + lastName: autNames[1], + creatorType: "author", + fieldMode: true, + }); + } + + for (let tag of tags) { + newItem.tags.push(tag.innerText); + } + + newItem.complete(); +} + +function detectWeb(doc, url) { + // TODO: adjust the logic here + if (url.includes('/rapport')) { + return 'report'; + } + else if (url.includes('/discours')) { + return 'presentation'; + } + else if (url.includes('/recherche')) { + return 'multiple'; + } + return false; +} + +function getSearchResults(doc, checkOnly) { + var items = {}; + var found = false; + // TODO: adjust the CSS selector + var rows = doc.querySelectorAll('h2 > a.title[href*="/article/"]'); + for (let row of rows) { + // TODO: check and maybe adjust + let href = row.href; + // TODO: check and maybe adjust + let title = ZU.trimInternal(row.textContent); + if (!href || !title) continue; + if (checkOnly) return true; + found = true; + items[href] = title; + } + return found ? items : false; +} + +async function doWeb(doc, url) { + const docType = detectWeb(doc, url); + if (docType == 'multiple') { + let items = await Zotero.selectItems(getSearchResults(doc, false)); + if (!items) return; + for (let url of Object.keys(items)) { + await scrape(await requestDocument(url)); + } + } + else { + await scrape(doc, url, docType); + } +} + +async function scrape(doc, url = doc.location.href, docType) { + if (docType == "report") { + scrapeRapport(doc, url); + } + else if (docType == "presentation") { + scrapeSpeech(doc, url); + } +} + From 47360f54a0f2ea7a8258950e8040f3ceabf227e9 Mon Sep 17 00:00:00 2001 From: carnetdethese Date: Fri, 19 Apr 2024 07:57:04 +0200 Subject: [PATCH 08/15] . --- Vie Publique.js | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/Vie Publique.js b/Vie Publique.js index 1640d1ad356..9181b2cbd67 100644 --- a/Vie Publique.js +++ b/Vie Publique.js @@ -9,30 +9,30 @@ "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2024-04-19 05:51:17" + "lastUpdated": "2024-04-19 05:56:53" } /* - ***** BEGIN LICENSE BLOCK ***** + ***** BEGIN LICENSE BLOCK ***** - Copyright © 2024 Alexandre Mimms + Copyright © 2024 Alexandre Mimms - This file is part of Zotero. + This file is part of Zotero. - Zotero is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. + Zotero is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Zotero is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. + Zotero is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. - You should have received a copy of the GNU Affero General Public License - along with Zotero. If not, see . + You should have received a copy of the GNU Affero General Public License + along with Zotero. If not, see . - ***** END LICENSE BLOCK ***** + ***** END LICENSE BLOCK ***** */ @@ -136,7 +136,7 @@ function getSearchResults(doc, checkOnly) { var items = {}; var found = false; // TODO: adjust the CSS selector - var rows = doc.querySelectorAll('h2 > a.title[href*="/article/"]'); + var rows = doc.querySelectorAll('h3 > a'); for (let row of rows) { // TODO: check and maybe adjust let href = row.href; @@ -173,3 +173,7 @@ async function scrape(doc, url = doc.location.href, docType) { } } +/** BEGIN TEST CASES **/ +var testCases = [ +] +/** END TEST CASES **/ From a18a6d98bf40b377d07c9bdbeeefda6d7bc2a1a4 Mon Sep 17 00:00:00 2001 From: carnetdethese Date: Fri, 19 Apr 2024 07:57:27 +0200 Subject: [PATCH 09/15] . --- Vie Publique.js | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Vie Publique.js b/Vie Publique.js index 9181b2cbd67..2587ce1e623 100644 --- a/Vie Publique.js +++ b/Vie Publique.js @@ -135,12 +135,9 @@ function detectWeb(doc, url) { function getSearchResults(doc, checkOnly) { var items = {}; var found = false; - // TODO: adjust the CSS selector var rows = doc.querySelectorAll('h3 > a'); for (let row of rows) { - // TODO: check and maybe adjust let href = row.href; - // TODO: check and maybe adjust let title = ZU.trimInternal(row.textContent); if (!href || !title) continue; if (checkOnly) return true; @@ -173,7 +170,3 @@ async function scrape(doc, url = doc.location.href, docType) { } } -/** BEGIN TEST CASES **/ -var testCases = [ -] -/** END TEST CASES **/ From 6c93b85a4e0efb746b02b23cc46f23dc08e43fbb Mon Sep 17 00:00:00 2001 From: carnetdethese <113130642+carnetdethese@users.noreply.github.com> Date: Tue, 23 Apr 2024 10:15:05 +0200 Subject: [PATCH 10/15] Update Dalloz Bibliotheque.js Co-authored-by: Abe Jellinek --- Dalloz Bibliotheque.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dalloz Bibliotheque.js b/Dalloz Bibliotheque.js index 02db7160853..f64767538a8 100644 --- a/Dalloz Bibliotheque.js +++ b/Dalloz Bibliotheque.js @@ -2,7 +2,7 @@ "translatorID": "2ea86ad9-71ca-410c-9126-9d7d98722acf", "label": "Dalloz Bibliothèque", "creator": "Alexandre Mimms", - "target": "https?://(?:www[.-])?bibliotheque[.-]lefebvre[.-]dalloz(?:[.-]fr)?", + "target": "https?://(www\\.)?bibliotheque\\.lefebvre\\.dalloz\\.fr", "minVersion": "5.0", "maxVersion": "", "priority": 100, From ea65544ba1ebd228b9ba525ee9d71dee0fda18da Mon Sep 17 00:00:00 2001 From: carnetdethese <113130642+carnetdethese@users.noreply.github.com> Date: Tue, 23 Apr 2024 10:15:40 +0200 Subject: [PATCH 11/15] Update Dalloz Bibliotheque.js Co-authored-by: Abe Jellinek --- Dalloz Bibliotheque.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dalloz Bibliotheque.js b/Dalloz Bibliotheque.js index f64767538a8..c62b97350d4 100644 --- a/Dalloz Bibliotheque.js +++ b/Dalloz Bibliotheque.js @@ -40,7 +40,7 @@ function detectWeb(doc, url) { if (url.includes('/ouvrage/')) { return 'book'; } - else if (url.includes('/recherche')) { + else if (url.includes('/recherche') && getSearchResults(doc, true)) { return 'multiple'; } return false; From ee9bb49ad86a60333010f97de1903ff286581809 Mon Sep 17 00:00:00 2001 From: carnetdethese Date: Tue, 23 Apr 2024 10:18:38 +0200 Subject: [PATCH 12/15] . --- Dalloz Bibliotheque.js | 6 +++++- Dalloz.js | 33 +++++++++++++++++++++++++-------- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/Dalloz Bibliotheque.js b/Dalloz Bibliotheque.js index 02db7160853..5c44535e7d7 100644 --- a/Dalloz Bibliotheque.js +++ b/Dalloz Bibliotheque.js @@ -9,7 +9,7 @@ "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2024-04-18 12:43:54" + "lastUpdated": "2024-04-19 13:07:42" } /* @@ -111,3 +111,7 @@ async function scrape(doc, url = doc.location.href) { newItem.complete(); } +/** BEGIN TEST CASES **/ +var testCases = [ +] +/** END TEST CASES **/ diff --git a/Dalloz.js b/Dalloz.js index 97142745424..37acdb390d5 100644 --- a/Dalloz.js +++ b/Dalloz.js @@ -9,7 +9,7 @@ "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2024-04-18 12:12:00" + "lastUpdated": "2024-04-19 13:42:32" } /* @@ -82,16 +82,33 @@ function idStartsWithKey(string) { function scrapeJournalArticle(doc, url = doc.location.href) { // Since searches trigger a "< >" markup around the searched words, we have to edit that away before storing the values. - const titre = ZU.trimInternal(text(doc, ".chronTITRE", 0)).replace(/[<>]/g, ""); // gets the title of the document - const abstract = ZU.trimInternal(text(doc, "#RESUFRAN")).replace(/[<>]/g, ""); // gets the abstract + + const xhr = new XMLHttpRequest(); + xhr.open("POST", "https://www-dalloz-fr.docelec-u-paris2.idm.oclc.org/api/toolsAction/Document.html"); + xhr.setRequestHeader("Content-Type", "application/json;charset=utf-8"); + const body = JSON.stringify({ + title: "Hello World", + body: "My POST request", + userId: 900, + }); + xhr.onload = () => { + if (xhr.readyState == 4 && xhr.status == 201) { + Z.debug(JSON.parse(xhr.responseText)); + } else { + Z.debug(`Error: ${xhr.status}`); + } + }; + xhr.send(body); + + + + let refDoc = ZU.trimInternal(text(doc, ".refDoc", 0).replace(/[<>]/g, "")); // gets the reference - let page, revue, numRevue, date; - const signatures = doc.querySelectorAll(".chronSIGNATURE"); let auteurs = []; // Loop over the "signatures" of the document, and store the author in the list. - for (let signature of signatures) { + for (let signature of doc.querySelectorAll(".chronSIGNATURE")) { auteurs.push(signature.innerText.replace(/[<>]/g, "").split(',')[0]); } @@ -119,7 +136,7 @@ function scrapeJournalArticle(doc, url = doc.location.href) { let newItem = new Z.Item("journalArticle"); - newItem.title = titre; + newItem.title = ZU.trimInternal(text(doc, ".chronTITRE", 0)).replace(/[<>]/g, ""); for (let auth of auteurs) { // loop over the list of authors and set them as authors. let authNames = auth.split(" "); newItem.creators.push({ @@ -131,7 +148,7 @@ function scrapeJournalArticle(doc, url = doc.location.href) { } newItem.publicationTitle = revue; - newItem.abstractNote = abstract; + newItem.abstractNote = ZU.trimInternal(text(doc, "#RESUFRAN")).replace(/[<>]/g, ""); if (numRevue !== "") newItem.issue = numRevue; newItem.pages = page; newItem.date = date; From 70abf520fc05f28db268f3e77f81b77f1d09b6dc Mon Sep 17 00:00:00 2001 From: carnetdethese Date: Tue, 23 Apr 2024 14:19:14 +0200 Subject: [PATCH 13/15] Added requested changes. --- Dalloz Bibliotheque.js | 96 +++++++++++++++++----- Dalloz.js | 163 +++++++++++++++++++------------------ Jus Politicum.js | 82 +++++++++++++------ Lextenso.js | 97 +++++++++++----------- Vie Publique.js | 177 +++++++++++++++++++++++++++++------------ 5 files changed, 397 insertions(+), 218 deletions(-) diff --git a/Dalloz Bibliotheque.js b/Dalloz Bibliotheque.js index b8742aaff8e..3c89a8b9656 100644 --- a/Dalloz Bibliotheque.js +++ b/Dalloz Bibliotheque.js @@ -2,14 +2,14 @@ "translatorID": "2ea86ad9-71ca-410c-9126-9d7d98722acf", "label": "Dalloz Bibliothèque", "creator": "Alexandre Mimms", - "target": "https?://(www\\.)?bibliotheque\\.lefebvre\\.dalloz\\.fr", + "target": "https?://(www\\.)?bibliotheque\\.lefebvre-dalloz\\.fr", "minVersion": "5.0", "maxVersion": "", "priority": 100, "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2024-04-19 13:07:42" + "lastUpdated": "2024-04-23 10:41:20" } /* @@ -76,42 +76,96 @@ async function doWeb(doc, url) { } async function scrape(doc, url = doc.location.href) { - const edition = ZU.trimInternal(text(doc, ".editions-edition.css-p7sjbi", 0)).split(" ")[0]; - const date = ZU.trimInternal(text(doc, ".editions-date.css-p7sjbi", 0)).replace(/Edition\s?:\s?/, ""); - const collection = ZU.trimInternal(text(doc, ".notice-header-grid-item.css-1o256gd.e4d31s30:not(.first-item) .notice-header-link", 0)); - const isbn = ZU.trimInternal(text(doc, ".notice-header-grid-item.css-leol38.e4d31s30 .notice-header-link", 0)); - let marque = ZU.trimInternal(text(doc, ".notice-header-grid-item.css-xc5jw0.e4d31s30 .notice-header-link", 0)); - marque = marque.substring(0, 1) + marque.substring(1).toLowerCase(); - const auteurs = ZU.trimInternal(text(doc, ".notice-header-grid-item.css-2bwjgy.e4d31s30 .notice-header-link", 0)).split(" • "); - const titre = ZU.trimInternal(text(doc, ".title", 0)); - const abstract = ZU.trimInternal(text(doc, ".description", 0)).replace("Description", ""); + let edition, date, marque, collection, isbn, auteurs; + const editions = doc.querySelectorAll(".editions-box"); + for (let ed of editions) { + if (ed.querySelectorAll("a")[0].href == url) { + edition = text(ed, ".editions-edition"); + date = text(ed, ".editions-date"); + } + } + + const infoGen = doc.querySelectorAll(".notice-header-grid-item"); + for (let infoBox of infoGen) { + + const value = ZU.trimInternal(infoBox.innerText); + + if (infoBox.querySelector(".auteurs")) { + auteurs = ZU.trimInternal(infoBox.querySelector(".auteurs").innerText).split(" • "); + Z.debug(auteurs); + } + + if (value.startsWith("Collection")) { collection = value.split(" : ")[1]; } + else if (value.startsWith("Marque")) { marque = value.split(" : ")[1]; } + else if (value.startsWith("ISBN")) { isbn = value.split(" : ")[1]; } + } + + const titre = ZU.trimInternal(text(doc, ".title")); + const abstract = ZU.trimInternal(text(doc, ".description")).replace("Description", ""); let newItem = new Z.Item("book"); for (let auteur of auteurs) { - const auteurNames = auteur.split(" "); - newItem.creators.push({ - firstName: auteurNames[0], - lastName: auteurNames[1], - creatorType: "author", - fieldMode: true - }); + newItem.creators.push(ZU.cleanAuthor(auteur, "author")); } newItem.title = titre; newItem.date = date; newItem.abstractNote = abstract; - newItem.ISBN = isbn; + newItem.ISBN = ZU.cleanISBN(isbn); newItem.edition = edition; newItem.publisher = marque; - newItem.language = "french"; + newItem.language = "fr"; newItem.series = collection; - newItem.url = url; newItem.complete(); } /** BEGIN TEST CASES **/ var testCases = [ + { + "type": "web", + "url": "https://bibliotheque.lefebvre-dalloz.fr/recherche?query=livre", + "items": "multiple" + }, + { + "type": "web", + "url": "https://bibliotheque.lefebvre-dalloz.fr/ouvrage/grands-arrets/grands-arrets-jurisprudence-civile-t1_9782247154579", + "items": [ + { + "itemType": "book", + "title": "Les grands arrêts de la jurisprudence civile T1", + "creators": [ + { + "firstName": "Henri", + "lastName": "Capitant", + "creatorType": "author" + }, + { + "firstName": "Yves", + "lastName": "Lequette", + "creatorType": "author" + }, + { + "firstName": "François", + "lastName": "Terré", + "creatorType": "author" + } + ], + "date": "Avril 2015", + "ISBN": "9782247154579", + "abstractNote": "La 13e édition des Grands arrêts de la jurisprudence civile coïncide avec le quatre-vingtième anniversaire de leur parution sous la signature de Henri Capitant. C'est dire que cet ouvrage est le précurseur de tous les recueils de Grands arrêts actuellement existants. Jamais démenti, son succès vient de ce qu'il offre un accès direct aux grandes décisions qui ont permis au Code civil de s'adapter à la réalité sociale contemporaine.L'ouvrage est scindé en deux tomes.Le premier volume réunit la totalité des matières étudiées, d'une université à l'autre, en licence 1 : Introduction, mais aussi droit des personnes, droit de la famille et droit des biens.S'y ajoutent le droit des régimes matrimoniaux et celui des successions et des libéralités qui, situés au confluent du droit de la famille et du droit du patrimoine, sont le prolongement naturel des disciplines précédentes.Le second volume rassemble la théorie générale des obligations (acte juridique, responsabilité, quasi-contrats, régime général) ainsi que les disciplines qui évoluent dans son orbite : contrats spéciaux, sûretés. Il correspond aux matières généralement enseignées en licence 2 et en licence 3.À l'occasion de cette 13e édition, les auteurs ont procédé à une importante mise à jour : nombre de commentaires ont été partiellement ou totalement réécrits pour prendre en compte les évolutions survenues depuis la précédente édition, il y a huit ans.", + "edition": "13e édition", + "language": "fr", + "libraryCatalog": "Dalloz Bibliothèque", + "publisher": "DALLOZ", + "series": "Grands arrêts", + "attachments": [], + "tags": [], + "notes": [], + "seeAlso": [] + } + ] + } ] /** END TEST CASES **/ diff --git a/Dalloz.js b/Dalloz.js index 37acdb390d5..84864ad14bb 100644 --- a/Dalloz.js +++ b/Dalloz.js @@ -2,14 +2,14 @@ "translatorID": "a59e99a6-42b0-4be6-bb0c-1ff688c3a8b3", "label": "Dalloz", "creator": "Alexandre Mimms", - "target": "https?://(?:www[.-])?dalloz(?:[.-]fr)?", + "target": "https?://(www\\.)?dalloz\\.fr", "minVersion": "5.0", "maxVersion": "", "priority": 100, "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2024-04-19 13:42:32" + "lastUpdated": "2024-04-23 09:45:03" } /* @@ -38,10 +38,10 @@ // - Make sure that the case report are correctly saved. // - PDF import : needs reverse engineering the internal api of the service. Seems like a quite complex one. -const citationAvecNumero = new RegExp(/^([\D]+)\s*(\d{4}),?\s?(n°\s?\d+) *,?\s*(p\.\s?\d+)*/); -const citationSansNumero = new RegExp(/^([\D]+)\s*(\d{4}),?\s*(p\.\d+)?/); -const regAnnee = new RegExp(/\d{4}/); -const docTypeId = new RegExp(/id=([^%]+)(?:%2F)?/); +const citationAvecNumero = /^([\D]+)\s*(\d{4}),?\s?(n°\s?\d+) *,?\s*(p\.\s?\d+)*/; +const citationSansNumero = /^([\D]+)\s*(\d{4}),?\s*(p\.\d+)?/; +const regAnnee = /\d{4}/; +const docTypeId = /id=([^%_]+)(?:%2F|_)?/; const codeDocument = new Map([ ["ENCY", "dictionary-entry"], @@ -49,7 +49,8 @@ const codeDocument = new Map([ ["AJ", "journalArticle"], ["ACTU", "blogPost"], ["RFDA", "journalArticle"], - ["CONS", "journalArticle"], + ["CONSCONST", "case"], + ["CONSTIT", "journalArticle"], ["DIPI", "journalArticle"], ["DS", "journalArticle"], ["JA", "journalArticle"], @@ -67,9 +68,26 @@ const codeDocument = new Map([ ["RSC", "journalArticle"], ["RTD", "journalArticle"], ["RPR", "journalArticle"], - ["RCJ", "journalArticle"] + ["RCJ", "journalArticle"], + ]); + + +function detectWeb(doc, url) { + if (url.includes('/documentation/Document')) { // Checks if the page is a document. + let id = url.match(docTypeId)[1]; + Z.debug(id); + if (codeDocument.get(id)) { return codeDocument.get(id) } + else if (idStartsWithKey(id)) { return codeDocument.get(id.substring(0, 2)); } // Gets the value of the key if it is a shorthand. + // Returns the type of the document according to the ID - refer to the const Map declared. + } + else if (url.includes('/documentation/Liste') && getSearchResults(doc, true)) { // Checks if the page is a list of results. + return 'multiple'; + } + return false; +} + // The following function checks if the ID passed as argument has an associated key (some IDs start with the same letters - easier than filing all available IDs). function idStartsWithKey(string) { for (let key of codeDocument.keys()) { @@ -82,28 +100,7 @@ function idStartsWithKey(string) { function scrapeJournalArticle(doc, url = doc.location.href) { // Since searches trigger a "< >" markup around the searched words, we have to edit that away before storing the values. - - const xhr = new XMLHttpRequest(); - xhr.open("POST", "https://www-dalloz-fr.docelec-u-paris2.idm.oclc.org/api/toolsAction/Document.html"); - xhr.setRequestHeader("Content-Type", "application/json;charset=utf-8"); - const body = JSON.stringify({ - title: "Hello World", - body: "My POST request", - userId: 900, - }); - xhr.onload = () => { - if (xhr.readyState == 4 && xhr.status == 201) { - Z.debug(JSON.parse(xhr.responseText)); - } else { - Z.debug(`Error: ${xhr.status}`); - } - }; - xhr.send(body); - - - - - let refDoc = ZU.trimInternal(text(doc, ".refDoc", 0).replace(/[<>]/g, "")); // gets the reference + let refDoc = ZU.trimInternal(text(doc, ".refDoc").replace(/[<>]/g, "")); // gets the reference let page, revue, numRevue, date; let auteurs = []; @@ -136,15 +133,9 @@ function scrapeJournalArticle(doc, url = doc.location.href) { let newItem = new Z.Item("journalArticle"); - newItem.title = ZU.trimInternal(text(doc, ".chronTITRE", 0)).replace(/[<>]/g, ""); + newItem.title = ZU.trimInternal(text(doc, ".chronTITRE")).replace(/[<>]/g, ""); for (let auth of auteurs) { // loop over the list of authors and set them as authors. - let authNames = auth.split(" "); - newItem.creators.push({ - firstName: authNames[0], - lastName: authNames[1], - creatorType: "author", - fieldMode: true - }); + newItem.creators.push(ZU.cleanAuthor(auth, "author")); } newItem.publicationTitle = revue; @@ -153,61 +144,51 @@ function scrapeJournalArticle(doc, url = doc.location.href) { newItem.pages = page; newItem.date = date; newItem.url = url; - newItem.language = "french"; + newItem.language = "fr"; newItem.complete(); } function scrapeCase(doc, url = doc.location.href) { - let juridiction; + let juridiction, titre, abstract, formation, date, volume, mentionPublication, numeroAffaire; if (url.includes("LEBON")) { juridiction = "Conseil d'État"; + // Since searches trigger a "< >" markup around the searched words, we have to edit that away before storing the values. + titre = ZU.trimInternal(text(doc, ".jurisJURI")).replace(/[<>]/g, ""); // gets the title of the document + abstract = ZU.trimInternal(text(doc, ".jurisSOMMAIRE")).replace(/[<>]/g, ""); // gets the abstract + formation = ZU.trimInternal(text(doc, ".jurisCHAM").replace(/[<>]/g, "")); // gets the reference + date = ZU.trimInternal(text(doc, ".jurisDATE").replace(/[<>]/g, "")); + volume = date.split("-")[2]; + mentionPublication = ZU.trimInternal(text(doc, ".commentPopupNDC b").replace(/[<>]/g, "")); + numeroAffaire = ZU.trimInternal(text(doc, ".jurisNAAF").replace(/[<>]/g, "").replace("n° ", "")); } + else { + juridiction = ZU.trimInternal(text(doc, ".book-header-title-caselaw__juridiction")); + date = ZU.trimInternal(text(doc, ".book-header-title-caselaw__date")); + numeroAffaire = ZU.trimInternal(text(doc, ".book-header-title-caselaw__references")); + abstract = ""; - // Since searches trigger a "< >" markup around the searched words, we have to edit that away before storing the values. - const titre = ZU.trimInternal(text(doc, ".jurisJURI", 0)).replace(/[<>]/g, ""); // gets the title of the document - const abstract = ZU.trimInternal(text(doc, ".jurisSOMMAIRE")).replace(/[<>]/g, ""); // gets the abstract - const formation = ZU.trimInternal(text(doc, ".jurisCHAM", 0).replace(/[<>]/g, "")); // gets the reference - const date = ZU.trimInternal(text(doc, ".jurisDATE", 0).replace(/[<>]/g, "")); - const volume = date.split("-")[2]; - const mentionPublication = ZU.trimInternal(text(doc, ".commentPopupNDC b", 0).replace(/[<>]/g, "")); - const numeroAffaire = ZU.trimInternal(text(doc, ".jurisNAAF", 0).replace(/[<>]/g, "").replace("n° ", "")); + if (juridiction == "Conseil constitutionnel") { titre = `Cons. constit., ${numeroAffaire}, ${date}`; } + else { titre = `${juridiction}, ${date}, ${numeroAffaire}`; } + + } let newItem = new Z.Item("case"); + newItem.caseName = titre; + newItem.court = juridiction; newItem.reporter = mentionPublication; newItem.abstractNote = abstract.replace("Sommaire : ", ""); newItem.court = juridiction; newItem.dateDecided = date; - newItem.reporterVolume = volume; + newItem.reporterVolume = volume || ""; newItem.docketNumber = numeroAffaire; - newItem.language = "french"; + newItem.language = "fr"; newItem.url = url; newItem.extra = formation; newItem.complete(); } -// function scrapeBlog(doc, url = doc.location.href) { - -// } - -function detectWeb(doc, url) { - if (url.includes('/documentation/Document')) { // Checks if the page is a document. - let id = url.match(docTypeId); - id = id[1].substring(0, 4); - if (idStartsWithKey(id)) { - if (codeDocument.get(id)) { // If there is a corresponding ID. - return codeDocument.get(id); - } - return codeDocument.get(id.substring(0, 2)); // Gets the value of the key if it is a shorthand. - // Returns the type of the document according to the ID - refer to the const Map declared. - } - } - else if (url.includes('/documentation/Liste')) { // Checks if the page is a list of results. - return 'multiple'; - } - return false; -} // This function is basically as it was set by the template. I modified it so it is specific to Dalloz. function getSearchResults(doc, checkOnly) { @@ -215,8 +196,8 @@ function getSearchResults(doc, checkOnly) { var found = false; var rows = doc.querySelectorAll('.result-content'); for (let row of rows) { - let href = attr(row, "a", "href", 0); - let title = ZU.trimInternal(text(row, "a", 0)); + let href = attr(row, "a", "href"); + let title = ZU.trimInternal(text(row, "a")); if (!href || !title) continue; if (checkOnly) return true; found = true; @@ -249,11 +230,41 @@ async function scrape(doc, url = doc.location.href, docType) { else if (docType == "case") { scrapeCase(doc, url); } - // else if (docType == "blogPost") { - // scrapeBlog(doc, url); - // } } + + +/** BEGIN TEST CASES **/ +var testCases = [ + +] +/** END TEST CASES **/ /** BEGIN TEST CASES **/ var testCases = [ + { + "type": "web", + "url": "https://www.dalloz.fr/dalloz", + "detectedItemType": false, + "items": [] + }, + { + "type": "web", + "url": "https://www.dalloz.fr/documentation/Document?ctxt=0_YSR0MD1jb25zdGl0dXRpb27Cp3gkc2Y9c2ltcGxlLXNlYXJjaA%3D%3D&ctxtl=0_cyRwYWdlTnVtPTHCp3MkdHJpZGF0ZT1GYWxzZcKncyRzb3J0PSNkZWZhdWx0X0Rlc2PCp3Mkc2xOYlBhZz0yMMKncyRpc2Fibz1UcnVlwqdzJHBhZ2luZz1UcnVlwqdzJG9uZ2xldD3Cp3MkZnJlZXNjb3BlPVRydWXCp3Mkd29JUz1GYWxzZcKncyR3b1NQQ0g9RmFsc2XCp3MkZmxvd01vZGU9RmFsc2XCp3MkYnE9wqdzJHNlYXJjaExhYmVsPcKncyRzZWFyY2hDbGFzcz3Cp3Mkej0wREJGQzhEQi8xOEUwNjY0Mw%3D%3D&id=CONSCONST_LIEUVIDE_2024-01-18_20231076QPC", + "items": [ + { + "itemType": "case", + "caseName": "Cons. constit., n° 2023-1076 QPC, 18 janvier 2024", + "creators": [], + "dateDecided": "18 janvier 2024", + "court": "Conseil constitutionnel", + "docketNumber": "n° 2023-1076 QPC", + "language": "fr", + "url": "https://www.dalloz.fr/documentation/Document?ctxt=0_YSR0MD1jb25zdGl0dXRpb27Cp3gkc2Y9c2ltcGxlLXNlYXJjaA%3D%3D&ctxtl=0_cyRwYWdlTnVtPTHCp3MkdHJpZGF0ZT1GYWxzZcKncyRzb3J0PSNkZWZhdWx0X0Rlc2PCp3Mkc2xOYlBhZz0yMMKncyRpc2Fibz1UcnVlwqdzJHBhZ2luZz1UcnVlwqdzJG9uZ2xldD3Cp3MkZnJlZXNjb3BlPVRydWXCp3Mkd29JUz1GYWxzZcKncyR3b1NQQ0g9RmFsc2XCp3MkZmxvd01vZGU9RmFsc2XCp3MkYnE9wqdzJHNlYXJjaExhYmVsPcKncyRzZWFyY2hDbGFzcz3Cp3Mkej0wREJGQzhEQi8xOEUwNjY0Mw%3D%3D&id=CONSCONST_LIEUVIDE_2024-01-18_20231076QPC", + "attachments": [], + "tags": [], + "notes": [], + "seeAlso": [] + } + ] + } ] /** END TEST CASES **/ diff --git a/Jus Politicum.js b/Jus Politicum.js index c519326dc58..5272c914376 100644 --- a/Jus Politicum.js +++ b/Jus Politicum.js @@ -2,37 +2,37 @@ "translatorID": "aeb7f19b-0907-4117-bef4-08e36af4d31f", "label": "Jus Politicum", "creator": "Alexandre Mimms", - "target": "https?://(?:www[.-])?juspoliticum.com", + "target": "https?://(www\\.)?juspoliticum\\.com", "minVersion": "5.0", "maxVersion": "", "priority": 100, "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2024-04-18 16:40:24" + "lastUpdated": "2024-04-23 10:45:20" } /* - ***** BEGIN LICENSE BLOCK ***** + ***** BEGIN LICENSE BLOCK ***** - Copyright © 2024 Alexandre Mimms + Copyright © 2024 Alexandre Mimms - This file is part of Zotero. + This file is part of Zotero. - Zotero is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. + Zotero is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Zotero is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. + Zotero is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. - You should have received a copy of the GNU Affero General Public License - along with Zotero. If not, see . + You should have received a copy of the GNU Affero General Public License + along with Zotero. If not, see . - ***** END LICENSE BLOCK ***** + ***** END LICENSE BLOCK ***** */ @@ -40,7 +40,7 @@ function detectWeb(doc, url) { if (url.includes('/article/')) { return 'journalArticle'; } - else if (url.includes('/searches')) { + else if (url.includes('/searches') && getSearchResults(doc, true)) { return 'multiple'; } return false; @@ -84,13 +84,7 @@ async function scrape(doc, url = doc.location.href) { let newItem = new Zotero.Item("journalArticle"); for (let auteur of auteurs) { - const auteurNames = auteur.split(" "); - newItem.creators.push({ - firstName: auteurNames[0], - lastName: auteurNames[1], - creatorType: "author", - fieldMode: true - }); + newItem.creators.push(ZU.cleanAuthor(auteur, "author")); } newItem.title = titre; @@ -106,3 +100,43 @@ async function scrape(doc, url = doc.location.href) { newItem.complete(); } +/** BEGIN TEST CASES **/ +var testCases = [ + { + "type": "web", + "url": "https://juspoliticum.com/searches?expression=constitution&release=&author=&theme=", + "items": "multiple" + }, + { + "type": "web", + "url": "https://juspoliticum.com/article/Situation-presente-du-constitutionnalisme-Quelques-reflexions-sur-l-idee-de-democratie-par-le-droit-25.html", + "items": [ + { + "itemType": "journalArticle", + "title": "Situation présente du constitutionnalisme. Quelques réflexions sur l’idée de démocratie par le droit", + "creators": [ + { + "firstName": "Jean-Marie", + "lastName": "Denquin", + "creatorType": "author", + "fieldMode": true + } + ], + "abstractNote": "Le constitutionnalisme est aujourd’hui identifié, à tort ou à raison, à l’idée d’une « démocratie par le droit », laquelle recouvre souvent un projet d’accomplissement des droits fondamentaux par des moyens juridiques. L’article analyse le déplacement que cela implique du point de vue du sens du mot « démocratie », mais aussi l’effet de survalorisation du droit qui en résulte. Ce phénomène de sacralisation du droit explique aussi comment on en est venu à confondre constitutionnalisme et droit constitutionnel. Plusieurs idées centrales du premier sont devenues, dans le second, des techniques dont on présume le caractère non problématique et la neutralité. Cela est démontré par une étude de l’évolution récente des notions de séparation des pouvoirs et de hiérarchie des normes, notamment dans la jurisprudence du Conseil constitutionnel. The present Situation of Constitutionalism. Some Thoughts about the Idea of Democracy by LawConstitutionalism is, more often than not, equated with a notion of “achieving democracy through the law”. This entails the notion that, in a democracy, the law is expected to ensure the development of fundamental rights. The purpose of the article is to analyse the shift thus involved in the concept of democracy, as well as the high expectations law has to meet in order to achieve these goals. It is suggested that this involves a collapse of “constitutionalism” into “constitutional law”. Several important aspects of constitutionalism, such as the separation of powers or the existence of a hierarchy of norms, are transformed into technical words of art which courts use as if they were neutral and uncontroversial. Die aktuelle Lage des Konstitutionalismus’. Ansichten über die Frage der Demokratie durch RechtDer Konstitutionalismus (Verfassungsstaat) ist heute oft mit der Idee einer „Demokratie durch Recht“ identifiziert. In diesem Sinne wird meistens Demokratie als Verwirklichung der Grundrechte durch juristische Mitteln angesehen. Dadurch erfährt der Begriff Demokratie eine Verschiebung. Zudem erfährt der Begriff des Rechts eine Aufwertung ja sogar eine Überbewertung, die zu einer Verwechslung von Konstitutionalismus und Verfassungsrecht führt. Mehrere zentrale Elemente des Konstitutionalismus sind im Verfassungsrecht zu blossen Techniken geworden, die man als unproblematisch und neutral postuliert. Dies wird im vorliegenden Aufsatz am Beispiel der neuesten Entwicklungen der Gewaltenteilung und der Normenhierarchie verdeutlicht.", + "issue": "1", + "libraryCatalog": "Jus Politicum", + "url": "https://juspoliticum.com/article/Situation-presente-du-constitutionnalisme-Quelques-reflexions-sur-l-idee-de-democratie-par-le-droit-25.html", + "attachments": [ + { + "title": "Full text PDF", + "mimeType": "application/pdf" + } + ], + "tags": [], + "notes": [], + "seeAlso": [] + } + ] + } +] +/** END TEST CASES **/ diff --git a/Lextenso.js b/Lextenso.js index 9032dca655b..65ba475be51 100644 --- a/Lextenso.js +++ b/Lextenso.js @@ -2,14 +2,14 @@ "translatorID": "3243d081-22c0-452c-8298-9d8a9fb5de2f", "label": "Lextenso", "creator": "Alexandre Mimms", - "target": "https?://(?:www[.-])?labase[.-]lextenso[.-](?:[.-]fr)?", + "target": "https?://(www\\.)?labase-lextenso\\.fr/", "minVersion": "5.0", "maxVersion": "", "priority": 100, "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2024-04-18 16:17:04" + "lastUpdated": "2024-04-23 11:30:31" } /* @@ -36,6 +36,20 @@ */ +function detectWeb(doc, url) { + if (url.includes('/lextenso/rechercher') && getSearchResults(doc, true)) { + return 'multiple'; + } + else if (doc.querySelector(".node-type-ouvrage")) { + return 'book'; + } + else if (doc.querySelector(".lextenso-document-article")) { + return 'journalArticle'; + } + return false; +} + + async function scrapeJournalArticle(doc, url) { const references = ZU.trimInternal(text(doc, ".document-metadata-origin").replace("Issu de ", "")).split(" - "); const revue = references[0]; @@ -43,30 +57,21 @@ async function scrapeJournalArticle(doc, url) { const page = references[2]; const titre = ZU.trimInternal(text(doc, "#page-title")); const auteurs = doc.querySelectorAll(".document-metadata-authors-name"); - const abstract = ZU.trimInternal(text(doc, ".cChapeau", 0)); - const date = ZU.trimInternal(text(doc, ".document-metadata-date", 0).replace("Date de parution : ", "")); - + const abstract = ZU.trimInternal(text(doc, ".cChapeau")); + const date = ZU.trimInternal(text(doc, ".document-metadata-date").replace("Date de parution : ", "")); let newItem = new Z.Item("journalArticle"); newItem.title = titre; - + for (let auteur of auteurs) { - const auteurNames = auteur.innerText.split(" "); - newItem.creators.push({ - firstName: auteurNames[0], - lastName: auteurNames[1], - creatorType: "author", - fieldMode: true, - }); + newItem.creators.push(ZU.cleanAuthor(auteur.innerText, "author")); } newItem.date = date; newItem.abstractNote = abstract; newItem.publicationTitle = revue; - newItem.issue = numeroRevue.replace(/n°[0]?/, ""); - newItem.pages = page.replace(/page\s?/, ""); - newItem.url = url; - newItem.language = "french"; + if (numeroRevue) { newItem.issue = numeroRevue.replace(/n°[0]?/, ""); } + if (page) { newItem.pages = page.replace(/page\s?/, ""); } newItem.complete(); } @@ -97,48 +102,27 @@ async function scrapeBook(doc, url) { newItem.title = titre; for (let auteur of auteurs) { - const auteurNames = auteur.innerText.split(" "); - newItem.creators.push({ - firstName: auteurNames[0], - lastName: auteurNames[1], - creatorType: "author", - fieldMode: true, - }); + newItem.creators.push(ZU.cleanAuthor(auteur.innerText, "author")); } newItem.date = date; newItem.publisher = publisher; newItem.ISBN = isbn; - // newItem.pages = page.replace(/page\s?/, ""); + newItem.pages = page.replace(/page\s?/, ""); newItem.url = url; - newItem.language = "french"; + newItem.language = "fr"; newItem.complete(); } -function detectWeb(doc, url) { - // TODO: adjust the logic here - if (url.includes('/lextenso/rechercher')) { - return 'multiple'; - } - else if (doc.querySelectorAll(".node-type-ouvrage").length > 0) { - return 'book'; - } - else if (doc.querySelectorAll(".lextenso-document-article").length > 0) { - return 'journalArticle'; - } - return false; -} function getSearchResults(doc, checkOnly) { var items = {}; var found = false; - var rows = doc.querySelectorAll('h2 > a.title[href*="/article/"]'); + var rows = doc.querySelectorAll('.hit'); for (let row of rows) { - - let href = row.href; - - let title = ZU.trimInternal(row.textContent); + let href = row.querySelectorAll("a")[0].href; + let title = ZU.trimInternal(row.querySelectorAll("h3")[0].innerText); if (!href || !title) continue; if (checkOnly) return true; found = true; @@ -174,9 +158,32 @@ async function scrape(doc, url = doc.location.href, docType) { var testCases = [ { "type": "web", - "url": "https://www-labase-lextenso-fr.docelec-u-paris2.idm.oclc.org/", + "url": "https://www.labase-lextenso.fr/", "detectedItemType": false, "items": [] + }, + { + "type": "web", + "url": "https://www.labase-lextenso.fr/revue-generale-du-droit-des-assurances/RGA201v5", + "items": [ + { + "itemType": "journalArticle", + "title": "La priorité du tiers lésé sur l'indemnité d'assurance", + "creators": [ + { + "firstName": "James", + "lastName": "Landel", + "creatorType": "author" + } + ], + "abstractNote": "Action directe ; C. assur., art. L. 124-3 ; Somme versée par l’assureur du responsable à la personne indiquée comme « preneur d'assurance / assuré » et « conducteur » sur le constat amiable ; Condamnation de l’assureur envers le tiers lésé, propriétaire du véhicule ; Montant ; Cour d’appel : soustraction des sommes payées au tiers lésé une somme versée à un tiers ; Cassation", + "libraryCatalog": "Lextenso", + "attachments": [], + "tags": [], + "notes": [], + "seeAlso": [] + } + ] } ] /** END TEST CASES **/ diff --git a/Vie Publique.js b/Vie Publique.js index 2587ce1e623..7544b7859f2 100644 --- a/Vie Publique.js +++ b/Vie Publique.js @@ -2,14 +2,14 @@ "translatorID": "858fa86d-82e2-43ca-9fc7-cf75b98101cb", "label": "Vie Publique", "creator": "Alexandre Mimms", - "target": "https?://(?:www.)?vie-publique(?:.fr)?", + "target": "https?://(www\\.)?vie-publique\\.fr/", "minVersion": "5.0", "maxVersion": "", "priority": 100, "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2024-04-19 05:56:53" + "lastUpdated": "2024-04-23 12:18:01" } /* @@ -35,23 +35,61 @@ ***** END LICENSE BLOCK ***** */ +function detectWeb(doc, url) { + if (url.includes('/rapport/')) { + return 'report'; + } + else if (url.includes('/discours')) { + return 'presentation'; + } + else if (url.includes('/recherche') && getSearchResults(doc, true)) { + return 'multiple'; + } + return false; +} + function scrapeRapport(doc, url) { - const titre = text(doc, "h1", 0); + let page, reportType; + const titre = text(doc, "h1"); const auteursString = doc.querySelectorAll(".book--author a"); - const auteursMorauxString = doc.querySelectorAll(".book--author-moral a"); + const auteursMoraux = doc.querySelectorAll(".book--author-moral a"); const abstract = text(doc, "#fiche-item-présentation"); const information = doc.querySelectorAll(".tabpanel--technique--details li"); - const date = text(doc, ".field--name-field-date-remise", 0); - const page = information[1].innerText.replace("Pagination : ", "").replace(" pages", ""); - const reportType = information[0].innerText.replace("Type de document : ", ""); - const pdfLink = doc.querySelectorAll(".book--actionsBox a")[0].href; + + if (information.length > 0) { + for (let info of information) { + let value = info.innerText + if (value.startsWith('Pagination')) { + page = info.innerText.replace("Pagination : ", "").replace(" pages", ""); + } + else if (value.startsWith('Type de document')) { + reportType = information[0].innerText.replace("Type de document : ", ""); + } + } + } + + const date = text(doc, ".field--name-field-date-remise"); + + const pdfLink = doc.querySelectorAll(".book--actionsBox a")[0].href; + const tags = doc.querySelectorAll(".vp-item-tag"); let newItem = new Z.Item('report'); newItem.title = titre || ""; newItem.date = date; - newItem.institution = auteursMorauxString[0].innerText; + + for (let aut of auteursString) { + newItem.creators.push(ZU.cleanAuthor(aut.innerText, "author")) + } + + if (auteursMoraux.length > 1) { + for (let autMoral of auteursMoraux) { + newItem.institution += ", " + autMoral.innerText; + } + } + else { newItem.institution = auteursMoraux[0].innerText; } + newItem.abstractNote = abstract; newItem.pages = page; newItem.reportType = reportType; @@ -64,35 +102,17 @@ function scrapeRapport(doc, url) { snapshot: false }]; - for (let aut of auteursString) { - const autNames = aut.innerText.split(" "); - - newItem.creators.push({ - firstName: autNames[0], - lastName: autNames[1], - creatorType: "author", - fieldMode: true, - }); - } - for (let tag of tags) { newItem.tags.push(tag.innerText); } - - if (auteursMorauxString.length > 1) { - for (let autMoral of auteursMorauxString) { - newItem.institution += ", " + autMoral.innerText; - } - } - newItem.complete(); } function scrapeSpeech(doc, url) { - const titre = text(doc, "h1", 0); + const titre = text(doc, "h1"); const auteursString = doc.querySelectorAll(".line-intervenant a"); - const date = text(doc, ".datetime", 0); + const date = text(doc, ".datetime"); const tags = doc.querySelectorAll(".vp-item-tag"); let newItem = new Z.Item('presentation'); @@ -101,36 +121,17 @@ function scrapeSpeech(doc, url) { newItem.url = url; for (let aut of auteursString) { - const autNames = aut.innerText.split(" "); - - newItem.creators.push({ - firstName: autNames[0], - lastName: autNames[1], - creatorType: "author", - fieldMode: true, - }); + newItem.creators.push(ZU.cleanAuthor(aut, "author")); } for (let tag of tags) { - newItem.tags.push(tag.innerText); + newItem.tags.push({ tag: tag.innerText }); } newItem.complete(); } -function detectWeb(doc, url) { - // TODO: adjust the logic here - if (url.includes('/rapport')) { - return 'report'; - } - else if (url.includes('/discours')) { - return 'presentation'; - } - else if (url.includes('/recherche')) { - return 'multiple'; - } - return false; -} + function getSearchResults(doc, checkOnly) { var items = {}; @@ -161,7 +162,8 @@ async function doWeb(doc, url) { } } -async function scrape(doc, url = doc.location.href, docType) { +async function scrape(doc, url = doc.location.href) { + const docType = detectWeb(doc, url); if (docType == "report") { scrapeRapport(doc, url); } @@ -170,3 +172,74 @@ async function scrape(doc, url = doc.location.href, docType) { } } +/** BEGIN TEST CASES **/ +var testCases = [ + { + "type": "web", + "url": "https://www.vie-publique.fr/", + "detectedItemType": false, + "items": [] + }, + { + "type": "web", + "url": "https://www.vie-publique.fr/rapport/286137-les-outre-mer-dans-la-constitution", + "items": [ + { + "itemType": "report", + "title": "Rapport d'information (...) sur les outre-mer dans la Constitution", + "creators": [ + { + "firstName": "Stéphane", + "lastName": "Artano", + "creatorType": "author" + } + ], + "date": "18 juillet 2022", + "abstractNote": "Le 29 juin 2022, les membres de la Délégation sénatoriale aux outre-mer et ceux de l'Association des juristes en droit des outre-mer (AJDOM) ont échangé au Sénat sur la situation des outre-mer dans la Constitution et débattu des trajectoires d'avenir pour les territoires concernés.\nCette réunion conjointe s'est déroulée autour de deux tables rondes.\nLa première, consacrée à la Nouvelle-Calédonie, a permis de pointer plusieurs interrogations. Il ressort notamment que la question du corps électoral est sans doute la plus sensible, à la fois politiquement et juridiquement, et qu'il sera très difficile de faire l'impasse sur une révision de la Constitution.\nLa seconde, axée sur les collectivités régies par les articles 73 et 74 de la Constitution, a mis en évidence les défauts et le caractère artificiel de cette dichotomie affichée.", + "institution": "Sénat. Délégation aux outre-mer", + "libraryCatalog": "Vie Publique", + "pages": "67", + "reportType": "Rapport parlementaire", + "url": "https://www.vie-publique.fr/rapport/286137-les-outre-mer-dans-la-constitution", + "attachments": [ + { + "title": "Full Text PDF", + "mimeType": "application/pdf", + "snapshot": false + } + ], + "tags": [ + { + "tag": "Collectivité d'outre mer" + }, + { + "tag": "Collectivités territoriales" + }, + { + "tag": "Constitution" + }, + { + "tag": "Institutions" + }, + { + "tag": "Institutions de l'Etat" + }, + { + "tag": "Outre-mer" + }, + { + "tag": "Statut juridique" + } + ], + "notes": [], + "seeAlso": [] + } + ] + }, + { + "type": "web", + "url": "https://www.vie-publique.fr/recherche?search_api_fulltext=constitution&f%5B0%5D=categories%3Arapport", + "items": "multiple" + } +] +/** END TEST CASES **/ From 1ae8e9a645df2f63c985b34df201d059756d8ea7 Mon Sep 17 00:00:00 2001 From: carnetdethese Date: Tue, 23 Apr 2024 14:21:06 +0200 Subject: [PATCH 14/15] . --- Vie Publique.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Vie Publique.js b/Vie Publique.js index 7544b7859f2..03801672e51 100644 --- a/Vie Publique.js +++ b/Vie Publique.js @@ -9,7 +9,7 @@ "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2024-04-23 12:18:01" + "lastUpdated": "2024-04-23 12:20:52" } /* @@ -103,7 +103,7 @@ function scrapeRapport(doc, url) { }]; for (let tag of tags) { - newItem.tags.push(tag.innerText); + newItem.tags.push({ tag: tag.innerText }); } newItem.complete(); From 2da8dfb521ddfdf7892128b7cb146758b8d2acc8 Mon Sep 17 00:00:00 2001 From: carnetdethese <113130642+carnetdethese@users.noreply.github.com> Date: Wed, 2 Oct 2024 12:15:52 +0200 Subject: [PATCH 15/15] Update Dalloz Bibliotheque.js --- Dalloz Bibliotheque.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dalloz Bibliotheque.js b/Dalloz Bibliotheque.js index 3c89a8b9656..174ca5d657b 100644 --- a/Dalloz Bibliotheque.js +++ b/Dalloz Bibliotheque.js @@ -2,7 +2,7 @@ "translatorID": "2ea86ad9-71ca-410c-9126-9d7d98722acf", "label": "Dalloz Bibliothèque", "creator": "Alexandre Mimms", - "target": "https?://(www\\.)?bibliotheque\\.lefebvre-dalloz\\.fr", + "target": "https?://(www\\.)?bibliotheque\\.lefebvre\\.dalloz\\.fr", "minVersion": "5.0", "maxVersion": "", "priority": 100,