Skip to content

Commit

Permalink
WIP - initial support of Keepeek/GWT
Browse files Browse the repository at this point in the history
  • Loading branch information
don-vip committed Jan 21, 2024
1 parent 5273d5a commit ab270de
Show file tree
Hide file tree
Showing 11 changed files with 373 additions and 17 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
<dependency>
<groupId>org.apache.uima</groupId>
<artifactId>uimaj-core</artifactId>
<version>3.5.0</version>
<version>2.11.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
Expand Down
32 changes: 26 additions & 6 deletions src/main/java/com/github/donvip/glamscrap/GlamScrap.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpRequest.BodyPublishers;
import java.net.http.HttpResponse;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.nio.file.Files;
Expand Down Expand Up @@ -48,6 +53,7 @@
import com.github.donvip.glamscrap.domain.Notice;
import com.github.donvip.glamscrap.institutions.paris.ParisArchivesGlamScrap;
import com.github.donvip.glamscrap.institutions.toulouse.ToulouseArchivesGlamScrap;
import com.github.donvip.glamscrap.institutions.toulouse.ToulousePhotothequeGlamScrap;
import com.github.donvip.glamscrap.uploadtools.Pattypan;
import com.github.donvip.glamscrap.uploadtools.UploadTool;
import com.github.donvip.glamscrap.wikidata.Author;
Expand Down Expand Up @@ -124,7 +130,7 @@ public final void close() throws IOException {
}

public static void usage() {
LOGGER.info("Usage: GlamScrap [paris|toulouse] scrap [<fonds>[,<fonds>]*] | check [<fonds>[,<fonds>]*] | download [<fonds>[,<fonds>]*] | pattypan [<fonds>] | gui");
LOGGER.info("Usage: GlamScrap [paris_archives|toulouse_archives|toulouse_photos] scrap [<fonds>[,<fonds>]*] | check [<fonds>[,<fonds>]*] | download [<fonds>[,<fonds>]*] | pattypan [<fonds>] | gui");
}

public static void main(String[] args) {
Expand Down Expand Up @@ -173,11 +179,12 @@ private void doUploadTool(String[] args, UploadTool tool) throws IOException {

public abstract String getInstitution();

private static GlamScrap buildApp(String city) {
switch (city) {
case "paris": return new ParisArchivesGlamScrap();
case "toulouse": return new ToulouseArchivesGlamScrap();
default: throw new IllegalArgumentException("Unsupported city: " + city);
private static GlamScrap buildApp(String institution) {
switch (institution) {
case "paris_archives": return new ParisArchivesGlamScrap();
case "toulouse_archives": return new ToulouseArchivesGlamScrap();
case "toulouse_photos": return new ToulousePhotothequeGlamScrap();
default: throw new IllegalArgumentException("Unsupported institution: " + institution);
}
}

Expand Down Expand Up @@ -375,6 +382,19 @@ protected final Document fetch(String doc) throws IOException {
return Jsoup.connect(getBaseUrl() + doc).get();
}

protected final String fetchPost(String doc, String body, String...headers) throws IOException, InterruptedException {
return fetchPost(HttpRequest.newBuilder()
.headers(headers)
.method("POST", BodyPublishers.ofString(body))
.uri(URI.create(getBaseUrl() + doc)).build());
}

protected final String fetchPost(HttpRequest request) throws IOException, InterruptedException {
try (HttpClient client = HttpClient.newHttpClient()) {
return client.send(request, HttpResponse.BodyHandlers.ofString()).body();
}
}

public abstract String getOtherFields(Notice n);

public abstract List<String> getCategories(Notice n);
Expand Down
101 changes: 101 additions & 0 deletions src/main/java/com/github/donvip/glamscrap/Gwt.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package com.github.donvip.glamscrap;

import static java.util.Objects.requireNonNull;

import java.net.URI;
import java.net.http.HttpRequest;
import java.net.http.HttpRequest.BodyPublishers;
import java.net.http.HttpRequest.Builder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

import org.apache.commons.lang3.ArrayUtils;

public class Gwt {

public static final String BOOLEAN = "java.lang.Boolean/476441737";
public static final String INTEGER = "java.lang.Integer/3438268394";
public static final String STRING = "java.lang.String/2004016611";

public record TypedValue(String declaredType, String runtimeType, String stringValue, int intValue, List<TypedValue> fieldsInAlphabeticalOrder) {
}

public static HttpRequest request(String uri, String baseUrl, String strongNamePolicyFile, String permutation, String service, String method, List<TypedValue> argTypes) {
return requestBuilder(uri, baseUrl, strongNamePolicyFile, permutation, service, method, argTypes).build();
}

public static Builder requestBuilder(String uri, String baseUrl, String strongNamePolicyFile, String permutation, String service, String method, List<TypedValue> argTypes) {
return HttpRequest.newBuilder()
.headers("Content-Type", "text/x-gwt-rpc; charset=utf-8", "X-GWT-Module-Base", baseUrl, "X-GWT-Permutation", permutation)
.method("POST", BodyPublishers.ofString(requestPayload(baseUrl, strongNamePolicyFile, service, method, argTypes)))
.uri(URI.create(uri));
}

public static String requestPayload(String baseUrl, String strongNamePolicyFile, String service, String method, List<TypedValue> typedValues) {
List<String> strings = new ArrayList<>();
strings.add(requireNonNull(baseUrl));
strings.add(requireNonNull(strongNamePolicyFile));
strings.add(requireNonNull(service));
strings.add(requireNonNull(method));
for (TypedValue typedVal : typedValues) {
addStrings(strings, typedVal);
}

List<Integer> ints = new ArrayList<>();
ints.add(1); // baseUrl
ints.add(2); // strongNamePolicyFile
ints.add(3); // service
ints.add(4); // method
ints.add(typedValues.size()); // number of arguments
for (TypedValue typedVal : typedValues) {
ints.add(strings.indexOf(typedVal.declaredType) + 1);
}
for (TypedValue typedVal : typedValues) {
addValueIndice(strings, ints, typedVal);
}

return requestPayload(strings.toArray(new String[0]), ArrayUtils.toPrimitive(ints.toArray(new Integer[0])));
}

private static void addStrings(List<String> strings, TypedValue typedVal) {
if (typedVal.declaredType != null && !strings.contains(typedVal.declaredType)) {
strings.add(typedVal.declaredType);
}
if (typedVal.runtimeType != null && !strings.contains(typedVal.runtimeType)) {
strings.add(typedVal.runtimeType);
}
if (typedVal.stringValue != null && !strings.contains(typedVal.stringValue)) {
strings.add(typedVal.stringValue);
}
for (TypedValue field : typedVal.fieldsInAlphabeticalOrder) {
addStrings(strings, requireNonNull(field));
}
}

private static void addValueIndice(List<String> strings, List<Integer> ints, TypedValue typedVal) {
if (typedVal.runtimeType != null ) {
ints.add(strings.indexOf(typedVal.runtimeType) + 1);
}
if (typedVal.fieldsInAlphabeticalOrder.isEmpty()) {
if (typedVal.declaredType != null && typedVal.declaredType.startsWith("java.lang.String")) {
ints.add(typedVal.stringValue != null ? strings.indexOf(typedVal.stringValue) + 1 : 0);
} else if (typedVal.runtimeType != null && (typedVal.runtimeType.startsWith("java.lang.Integer") || typedVal.runtimeType.startsWith("java.lang.Boolean"))) {
ints.add(typedVal.intValue);
}
}
for (TypedValue field : typedVal.fieldsInAlphabeticalOrder) {
addValueIndice(strings, ints, field);
}
}

private static String requestPayload(String[] strings, int[] ints) {
return requestPayload(7, 0, strings, ints);
}

private static String requestPayload(int protocolVersion, int flags, String[] strings, int[] ints) {
return String.format("%d|%d|%d|%s|%s|", protocolVersion, flags, strings.length, String.join("|", strings),
Arrays.stream(ints).mapToObj(Integer::toString).collect(Collectors.joining("|")));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public class ParisArchivesGlamScrap extends GlamScrap {
}

public ParisArchivesGlamScrap() {
super("paris");
super("paris_archives");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public class ToulouseArchivesGlamScrap extends GlamScrap {
}

public ToulouseArchivesGlamScrap() {
super("toulouse");
super("toulouse_archives");
}

@Override
Expand Down Expand Up @@ -105,7 +105,7 @@ protected Notice searchNotice(Fonds f, int i, int j, boolean fetch) {
try {
Document desc = fetch(String.format("Web_VoirLaNotice/34_01/%s/ILUMP21411", cote.replace("/", "xzx")));
if (desc != null) {
n = ToulouseParser.parseNotice(desc, cote);
n = ToulouseArchivesParser.parseNotice(desc, cote);
if (n != null) {
session.beginTransaction();
f.getNotices().add(n);
Expand All @@ -129,11 +129,7 @@ protected Notice searchNotice(Fonds f, int i, int j, boolean fetch) {
@Override
protected Fonds createNewFonds(String cote) throws IOException {
Document doc = fetch(String.format("Web_FondsCClass%s/ILUMP31929", cote));
if (doc != null) {
return ToulouseParser.parseFonds(doc, cote);
} else {
return null;
}
return doc != null ? ToulouseArchivesParser.parseFonds(doc, cote) : null;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import com.github.donvip.glamscrap.domain.Fonds;
import com.github.donvip.glamscrap.domain.Notice;

class ToulouseParser extends Parser {
class ToulouseArchivesParser extends Parser {

private static final Logger LOGGER = LogManager.getLogger();

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package com.github.donvip.glamscrap.institutions.toulouse;

import java.util.List;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import com.github.donvip.glamscrap.Parser;
import com.github.donvip.glamscrap.domain.Fonds;

public class ToulousePhotosParser extends Parser {

private static final Logger LOGGER = LogManager.getLogger();

public static Fonds parseFonds(String basket, List<String> medias, String cote) {
LOGGER.info("Basket: {}", basket);
LOGGER.info("Medias: {}", medias);
if (basket != null && medias != null && basket.startsWith("//OK") && medias.stream().allMatch(x -> x.startsWith("//OK"))) {
final Fonds f = new Fonds(cote);
// 0. Search for title
int idx = basket.indexOf("\"com.keepeek.kpk360.shared.transport.UserLightTransport/");
idx = basket.lastIndexOf("\"", idx - 3);
f.setTitle(basket.substring(idx + 1, basket.indexOf("\"", idx + 1)));
// 1. Search for expected number of notices (information always displayed)
try {
f.setExpectedNotices(Integer.valueOf("617")); // TODO
} catch (RuntimeException e) {
LOGGER.warn("Unable to fetch number of notices for {}", cote);
}
return f;
} else {
LOGGER.warn("Couldn't parse fonds for: {}", cote);
return null;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
package com.github.donvip.glamscrap.institutions.toulouse;

import java.io.IOException;
import java.net.http.HttpRequest;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.github.donvip.glamscrap.GlamScrap;
import com.github.donvip.glamscrap.Gwt;
import com.github.donvip.glamscrap.Gwt.TypedValue;
import com.github.donvip.glamscrap.domain.Fonds;
import com.github.donvip.glamscrap.domain.Notice;
import com.github.donvip.glamscrap.wikidata.Author;

// https://phototheque.toulouse.fr/api/doc/
public class ToulousePhotothequeGlamScrap extends GlamScrap {

private static final String BASE_URL = "https://phototheque.toulouse.fr/";

private static final Map<String, Album> ALBUMS = new HashMap<>();
static {
ALBUMS.put("bBxZc08Cb", new Album(1, false));
}

public ToulousePhotothequeGlamScrap() {
super("toulouse_photos");
}

@Override
protected Album getAlbum(String cote) {
return ALBUMS.get(cote);
}

@Override
protected Range getAllowedGap(String cote) {
return null;
}

@Override
protected String getBaseUrl() {
return BASE_URL;
}

@Override
public String getInstitution() {
return "Ville de Toulouse";
}

@Override
protected List<Fonds> fetchAllFonds() throws IOException {
return List.of();
}

@Override
protected void postScrapFonds(Fonds f) throws IOException {
// Do nothing
}

@Override
protected Notice searchNotice(Fonds f, int i, int j, boolean fetch) {
// TODO Auto-generated method stub
return null;
}

@Override
protected Fonds createNewFonds(String cote) throws IOException {
try {
String basket = fetchPost(shareGetBasketAction(cote));
List<String> medias = new ArrayList<>();
boolean finished = false;
int index = 0;
while (!finished) {
medias.add(fetchPost(shareGetBasketMediasListAction(cote, index)));
index += 60; // FIXME
finished = index > 617;//FIXME
}
return ToulousePhotosParser.parseFonds(basket, medias, cote);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IOException(e);
}
}

protected static HttpRequest shareGetBasketAction(String sharedBasketIdentifier) {
return keepeekAction("ShareGetBasketAction", "com.keepeek.kpk360.shared.dispatch.share.basket.ShareGetBasketAction/3056539714", List.of(
new TypedValue(Gwt.STRING, null, null, -1, List.of()),
shareParameters(sharedBasketIdentifier)));
}

protected static HttpRequest shareGetBasketMediasListAction(String sharedBasketIdentifier, int index) {
return keepeekAction("ShareGetBasketMediasListAction", "com.keepeek.kpk360.shared.dispatch.share.basket.ShareGetBasketMediasListAction/840037237", List.of(
new TypedValue(null, Gwt.INTEGER, null, -1, List.of()),
new TypedValue(null, Gwt.INTEGER, null, index, List.of()),
new TypedValue(null, Gwt.BOOLEAN, null, 0, List.of()),
new TypedValue(Gwt.STRING, null, "DESC", -1, List.of()),
new TypedValue(Gwt.STRING, null, "", -1, List.of()),
new TypedValue(Gwt.STRING, null, null, -1, List.of()),
shareParameters(sharedBasketIdentifier)));
}

protected static HttpRequest keepeekAction(String urlAction, String actionQualifiedClass, List<TypedValue> arguments) {
return Gwt.request(BASE_URL + "dispatch/" + urlAction,
"https://phototheque.toulouse.fr/keepeek360/", "2ED7D09E9D2EBA03EA03E51C1582B244", "AE4175F0BB42AE91733577263A0417ED",
"com.gwtplatform.dispatch.rpc.shared.DispatchService", "execute", List.of(
new TypedValue(Gwt.STRING, null, null, -1, List.of()),
new TypedValue("com.gwtplatform.dispatch.rpc.shared.Action", actionQualifiedClass, null, -1, arguments)));
}

private static TypedValue shareParameters(String sharedBasketIdentifier) {
return new TypedValue(null, "com.keepeek.kpk360.shared.dispatch.common.share.ShareParameters/511658636", null, -1, List.of(
new TypedValue(Gwt.STRING, null, "fr", -1, List.of()),
new TypedValue(Gwt.STRING, null, sharedBasketIdentifier, -1, List.of()),
new TypedValue(Gwt.STRING, null, "https://phototheque.toulouse.fr/", -1, List.of())));
}

@Override
public String getOtherFields(Notice n) {
return "";
}

@Override
public List<String> getCategories(Notice n) {
return List.of();
}

@Override
public Map<String, Author> getPredefinedAuthors() {
return Map.of();
}
}
Loading

0 comments on commit ab270de

Please sign in to comment.