Skip to content

Commit

Permalink
rename project to GlamScrap to also handle museums
Browse files Browse the repository at this point in the history
  • Loading branch information
don-vip committed Jan 19, 2024
1 parent df8a6e4 commit 5273d5a
Show file tree
Hide file tree
Showing 18 changed files with 84 additions and 84 deletions.
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@
<modelVersion>4.0.0</modelVersion>

<groupId>com.github.donvip</groupId>
<artifactId>archscrap</artifactId>
<artifactId>glamscrap</artifactId>
<version>0.2.0-SNAPSHOT</version>
<packaging>jar</packaging>

<name>archscrap</name>
<name>glamscrap</name>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>21</maven.compiler.source>
<maven.compiler.target>21</maven.compiler.target>
<sonar.projectKey>don-vip_ArchScrap</sonar.projectKey>
<sonar.projectKey>don-vip_GlamScrap</sonar.projectKey>
<sonar.organization>don-vip</sonar.organization>
<sonar.host.url>https://sonarcloud.io</sonar.host.url>
</properties>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
/**
* This file is part of ArchScrap.
* This file is part of GlamScrap.
*
* ArchScrap is free software: you can redistribute it and/or modify
* GlamScrap is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ArchScrap is distributed in the hope that it will be useful,
* GlamScrap is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ArchScrap. If not, see <http://www.gnu.org/licenses/>.
* along with GlamScrap. If not, see <http://www.gnu.org/licenses/>.
*/
package com.github.donvip.archscrap;
package com.github.donvip.glamscrap;

import java.io.File;
import java.io.FileOutputStream;
Expand Down Expand Up @@ -44,15 +44,15 @@
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import com.github.donvip.archscrap.archives.paris.ParisArchScrap;
import com.github.donvip.archscrap.archives.toulouse.ToulouseArchScrap;
import com.github.donvip.archscrap.domain.Fonds;
import com.github.donvip.archscrap.domain.Notice;
import com.github.donvip.archscrap.uploadtools.Pattypan;
import com.github.donvip.archscrap.uploadtools.UploadTool;
import com.github.donvip.archscrap.wikidata.Author;
import com.github.donvip.glamscrap.domain.Fonds;
import com.github.donvip.glamscrap.domain.Notice;
import com.github.donvip.glamscrap.institutions.paris.ParisArchivesGlamScrap;
import com.github.donvip.glamscrap.institutions.toulouse.ToulouseArchivesGlamScrap;
import com.github.donvip.glamscrap.uploadtools.Pattypan;
import com.github.donvip.glamscrap.uploadtools.UploadTool;
import com.github.donvip.glamscrap.wikidata.Author;

public abstract class ArchScrap implements AutoCloseable {
public abstract class GlamScrap implements AutoCloseable {

private static final Logger LOGGER = LogManager.getLogger();

Expand Down Expand Up @@ -97,7 +97,7 @@ public boolean contains(int i) {

protected final Set<String> missedNotices = new TreeSet<>();

protected ArchScrap(String city) {
protected GlamScrap(String city) {
LOGGER.debug("Initializing Hibernate...");
System.setProperty("city", city);
this.city = city;
Expand All @@ -124,15 +124,15 @@ public final void close() throws IOException {
}

public static void usage() {
LOGGER.info("Usage: ArchScrap [paris|toulouse] scrap [<fonds>[,<fonds>]*] | check [<fonds>[,<fonds>]*] | download [<fonds>[,<fonds>]*] | pattypan [<fonds>] | gui");
LOGGER.info("Usage: GlamScrap [paris|toulouse] scrap [<fonds>[,<fonds>]*] | check [<fonds>[,<fonds>]*] | download [<fonds>[,<fonds>]*] | pattypan [<fonds>] | gui");
}

public static void main(String[] args) {
if (args.length < 2) {
usage();
return;
}
try (ArchScrap app = buildApp(args[0])) {
try (GlamScrap app = buildApp(args[0])) {
switch (args[1]) {
case "scrap":
app.doScrap(args);
Expand Down Expand Up @@ -173,10 +173,10 @@ private void doUploadTool(String[] args, UploadTool tool) throws IOException {

public abstract String getInstitution();

private static ArchScrap buildApp(String city) {
private static GlamScrap buildApp(String city) {
switch (city) {
case "paris": return new ParisArchScrap();
case "toulouse": return new ToulouseArchScrap();
case "paris": return new ParisArchivesGlamScrap();
case "toulouse": return new ToulouseArchivesGlamScrap();
default: throw new IllegalArgumentException("Unsupported city: " + city);
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
/**
* This file is part of ArchScrap.
* This file is part of GlamScrap.
*
* ArchScrap is free software: you can redistribute it and/or modify
* GlamScrap is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ArchScrap is distributed in the hope that it will be useful,
* GlamScrap is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ArchScrap. If not, see <http://www.gnu.org/licenses/>.
* along with GlamScrap. If not, see <http://www.gnu.org/licenses/>.
*/
package com.github.donvip.archscrap;
package com.github.donvip.glamscrap;

import java.time.LocalDate;
import java.time.Year;
Expand All @@ -24,7 +24,7 @@
import org.apache.logging.log4j.Logger;
import org.apache.uima.cas.FSIterator;

import com.github.donvip.archscrap.domain.Notice;
import com.github.donvip.glamscrap.domain.Notice;

import de.unihd.dbs.heideltime.standalone.DocumentType;
import de.unihd.dbs.heideltime.standalone.HeidelTimeStandalone;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
/**
* This file is part of ArchScrap.
* This file is part of GlamScrap.
*
* ArchScrap is free software: you can redistribute it and/or modify
* GlamScrap is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ArchScrap is distributed in the hope that it will be useful,
* GlamScrap is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ArchScrap. If not, see <http://www.gnu.org/licenses/>.
* along with GlamScrap. If not, see <http://www.gnu.org/licenses/>.
*/
package com.github.donvip.archscrap.domain;
package com.github.donvip.glamscrap.domain;

import java.math.BigInteger;
import java.util.ArrayList;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
/**
* This file is part of ArchScrap.
* This file is part of GlamScrap.
*
* ArchScrap is free software: you can redistribute it and/or modify
* GlamScrap is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ArchScrap is distributed in the hope that it will be useful,
* GlamScrap is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ArchScrap. If not, see <http://www.gnu.org/licenses/>.
* along with GlamScrap. If not, see <http://www.gnu.org/licenses/>.
*/
package com.github.donvip.archscrap.domain;
package com.github.donvip.glamscrap.domain;

import java.net.URL;
import java.time.LocalDate;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.github.donvip.archscrap.archives.paris;
package com.github.donvip.glamscrap.institutions.paris;

import static java.util.stream.Collectors.toMap;

Expand All @@ -19,15 +19,15 @@
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.github.donvip.archscrap.ArchScrap;
import com.github.donvip.archscrap.domain.Fonds;
import com.github.donvip.archscrap.domain.Notice;
import com.github.donvip.archscrap.wikidata.Author;
import com.github.donvip.glamscrap.GlamScrap;
import com.github.donvip.glamscrap.domain.Fonds;
import com.github.donvip.glamscrap.domain.Notice;
import com.github.donvip.glamscrap.wikidata.Author;

/**
* https://archives.paris.fr/a/234/catalogues-des-documents-figures/
*/
public class ParisArchScrap extends ArchScrap {
public class ParisArchivesGlamScrap extends GlamScrap {

private static final Logger LOGGER = LogManager.getLogger();

Expand All @@ -54,7 +54,7 @@ public class ParisArchScrap extends ArchScrap {
PREDEFINED_AUTHORS.put("Harand, F. (photographe)", new Author("Harand", "François", "photographe"));
}

public ParisArchScrap() {
public ParisArchivesGlamScrap() {
super("paris");
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.github.donvip.archscrap.archives.paris;
package com.github.donvip.glamscrap.institutions.paris;

import java.net.MalformedURLException;
import java.net.URL;
Expand All @@ -8,8 +8,8 @@
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

import com.github.donvip.archscrap.Parser;
import com.github.donvip.archscrap.domain.Notice;
import com.github.donvip.glamscrap.Parser;
import com.github.donvip.glamscrap.domain.Notice;

class ParisParser extends Parser {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.github.donvip.archscrap.archives.toulouse;
package com.github.donvip.glamscrap.institutions.toulouse;

import java.io.IOException;
import java.util.ArrayList;
Expand All @@ -15,12 +15,12 @@
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.github.donvip.archscrap.ArchScrap;
import com.github.donvip.archscrap.domain.Fonds;
import com.github.donvip.archscrap.domain.Notice;
import com.github.donvip.archscrap.wikidata.Author;
import com.github.donvip.glamscrap.GlamScrap;
import com.github.donvip.glamscrap.domain.Fonds;
import com.github.donvip.glamscrap.domain.Notice;
import com.github.donvip.glamscrap.wikidata.Author;

public class ToulouseArchScrap extends ArchScrap {
public class ToulouseArchivesGlamScrap extends GlamScrap {

private static final Logger LOGGER = LogManager.getLogger();

Expand All @@ -39,7 +39,7 @@ public class ToulouseArchScrap extends ArchScrap {
ALLOWED_GAPS.put("24Fi", new Range(215, 99));
}

public ToulouseArchScrap() {
public ToulouseArchivesGlamScrap() {
super("toulouse");
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.github.donvip.archscrap.archives.toulouse;
package com.github.donvip.glamscrap.institutions.toulouse;

import java.time.Year;
import java.time.format.DateTimeParseException;
Expand All @@ -17,9 +17,9 @@
import org.jsoup.select.Elements;
import org.jsoup.select.Selector.SelectorParseException;

import com.github.donvip.archscrap.Parser;
import com.github.donvip.archscrap.domain.Fonds;
import com.github.donvip.archscrap.domain.Notice;
import com.github.donvip.glamscrap.Parser;
import com.github.donvip.glamscrap.domain.Fonds;
import com.github.donvip.glamscrap.domain.Notice;

class ToulouseParser extends Parser {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.github.donvip.archscrap.uploadtools;
package com.github.donvip.glamscrap.uploadtools;

import static java.util.stream.Collectors.joining;

Expand All @@ -17,11 +17,11 @@
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;

import com.github.donvip.archscrap.ArchScrap;
import com.github.donvip.archscrap.domain.Fonds;
import com.github.donvip.archscrap.domain.Notice;
import com.github.donvip.archscrap.wikidata.Author;
import com.github.donvip.archscrap.wikidata.WikidataUtils;
import com.github.donvip.glamscrap.GlamScrap;
import com.github.donvip.glamscrap.domain.Fonds;
import com.github.donvip.glamscrap.domain.Notice;
import com.github.donvip.glamscrap.wikidata.Author;
import com.github.donvip.glamscrap.wikidata.WikidataUtils;

public class Pattypan extends UploadTool {

Expand All @@ -37,7 +37,7 @@ protected String getFileExtension() {
}

@Override
protected void writeContents(Fonds f, ArchScrap cityScrap, OutputStream out) throws IOException {
protected void writeContents(Fonds f, GlamScrap cityScrap, OutputStream out) throws IOException {
try (Workbook wb = new HSSFWorkbook()) {
Path downloadDir = cityScrap.getDownloadDir(f);
Sheet data = wb.createSheet("Data");
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.github.donvip.archscrap.uploadtools;
package com.github.donvip.glamscrap.uploadtools;

import java.io.IOException;
import java.io.InputStream;
Expand All @@ -11,22 +11,22 @@
import java.util.Set;
import java.util.function.Consumer;

import com.github.donvip.archscrap.ArchScrap;
import com.github.donvip.archscrap.domain.Fonds;
import com.github.donvip.glamscrap.GlamScrap;
import com.github.donvip.glamscrap.domain.Fonds;

public abstract class UploadTool {

private static final Set<String> LOGGED_MESSAGES = new HashSet<>();

public void writeUploadFile(Fonds f, ArchScrap cityScrap) throws IOException {
public void writeUploadFile(Fonds f, GlamScrap cityScrap) throws IOException {
Path dir = Files.createDirectories(cityScrap.getDownloadDir(f).resolve("upload"));
String filename = getClass().getSimpleName().toLowerCase(Locale.ENGLISH) + '.' + getFileExtension();
try (OutputStream out = Files.newOutputStream(dir.resolve(filename))) {
writeContents(f, cityScrap, out);
}
}

protected abstract void writeContents(Fonds f, ArchScrap cityScrap, OutputStream out) throws IOException;
protected abstract void writeContents(Fonds f, GlamScrap cityScrap, OutputStream out) throws IOException;

protected abstract String getFileExtension();

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.github.donvip.archscrap.wikidata;
package com.github.donvip.glamscrap.wikidata;

import java.time.LocalDate;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.github.donvip.archscrap.wikidata;
package com.github.donvip.glamscrap.wikidata;

import static java.util.stream.Collectors.joining;

Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/config.github.props
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ considerTemponym = false
# Path to TreeTagger home directory
###################################
# Ensure there is no white space in path (try to escape white spaces)
treeTaggerHome = /home/runner/work/ArchScrap/ArchScrap/TreeTagger/linux
treeTaggerHome = /home/runner/work/GlamScrap/GlamScrap/TreeTagger/linux
# This one is only necessary if you want to process chinese documents.
chineseTokenizerPath = SET ME IN CONFIG.PROPS! (e.g., /home/jannik/treetagger/chinese-tokenizer)

Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/config.windows.props
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ considerTemponym = false
# Path to TreeTagger home directory
###################################
# Ensure there is no white space in path (try to escape white spaces)
treeTaggerHome = C:\\GIT\\ArchScrap\\TreeTagger\\windows
treeTaggerHome = C:\\GIT\\GlamScrap\\TreeTagger\\windows
# This one is only necessary if you want to process chinese documents.
chineseTokenizerPath = SET ME IN CONFIG.PROPS! (e.g., /home/jannik/treetagger/chinese-tokenizer)

Expand Down
Loading

0 comments on commit 5273d5a

Please sign in to comment.