Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Version 2.0.0, update for Solr 8 NOT FOR MERGING #71

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# Ubuntu-based Solr container
FROM solr:6.6
FROM solr:8.11

ENV SOLR_HOME=/opt/solr/solr-config/ehri

# NB: Solr config should be mounted at solr-config/ehri
COPY solr-config/target/solr-config-*-solr-core.tar.gz /tmp/
USER root
RUN mkdir -p /opt/solr/solr-config/ehri/portal && \
echo "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n\n<solr></solr>" > /opt/solr/solr-config/ehri/solr.xml && \
tar -C /opt/solr/solr-config/ehri/portal --extract --file /tmp/solr-config-*-solr-core.tar.gz && \
ln -s /opt/solr/solr-config/ehri/portal/lib-* /opt/solr/solr-config/ehri/portal/lib
ln -s /opt/solr/solr-config/ehri/portal/lib-* /opt/solr/solr-config/ehri/portal/lib && \
chown solr.solr --recursive /opt/solr/solr-config
USER solr
2 changes: 1 addition & 1 deletion build-docker.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash

mvn package
mvn package -DskipTests
sudo docker build -t ehri/ehri-search-tools .
10 changes: 5 additions & 5 deletions index-data-converter/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<parent>
<artifactId>search-tools</artifactId>
<groupId>eu.ehri-project</groupId>
<version>1.1.13</version>
<version>2.0.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down Expand Up @@ -51,17 +51,17 @@
<dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-core</artifactId>
<version>1.19.1</version>
<version>1.19.4</version>
</dependency>
<dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-client</artifactId>
<version>1.19.1</version>
<version>1.19.4</version>
</dependency>
<dependency>
<groupId>com.sun.jersey.contribs</groupId>
<artifactId>jersey-apache-client</artifactId>
<version>1.19.1</version>
<version>1.19.4</version>
</dependency>
<dependency>
<groupId>joda-time</groupId>
Expand All @@ -71,7 +71,7 @@
<dependency>
<groupId>com.jayway.jsonpath</groupId>
<artifactId>json-path</artifactId>
<version>1.2.0</version>
<version>2.4.0</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public class Pipeline<S, E> {
public static class Builder<S, E> {
private final List<Source<? extends S>> sources = Lists.newArrayList();
private final List<Converter< S, ? extends E>> converters = Lists.newArrayList();
private final List<Sink<? super E>> writers = Lists.newArrayList();
private final List<Sink<E>> writers = Lists.newArrayList();

public Builder<S, E> addSink(Sink<E> writer) {
writers.add(writer);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.jayway.jsonpath.*;
import com.jayway.jsonpath.internal.spi.json.JacksonJsonProvider;
import com.jayway.jsonpath.spi.json.JacksonJsonProvider;
import eu.ehri.project.indexing.converter.Converter;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormatter;
Expand All @@ -25,6 +25,7 @@ public class JsonConverter implements Converter<JsonNode, JsonNode> {

private static final Logger logger = LoggerFactory.getLogger(JsonConverter.class);


/**
* Set of key -> JsonPath extractors
*/
Expand Down Expand Up @@ -84,7 +85,8 @@ public class JsonConverter implements Converter<JsonNode, JsonNode> {
}

public JsonConverter() {
parseContext = JsonPath.using(new JacksonJsonProvider());
Configuration config = Configuration.defaultConfiguration().jsonProvider(new JacksonJsonProvider());
parseContext = JsonPath.using(config);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
/**
* Aggregate several sink together.
*/
public class MultiSink<T, W extends Sink<? super T>> implements Sink<T> {
public class MultiSink<T, W extends Sink<T>> implements Sink<T> {

private final List<W> writers;

Expand Down
4 changes: 3 additions & 1 deletion index-data-converter/src/main/resources/paths.properties
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ repositoryName=relationships.heldBy[0].relationships.describes[0].data.name
parentId=relationships.childOf[0].id,\
relationships.broader[*].id,\
relationships.isPartOf[*].id
ancestorIds=..relationships.childOf[*].id
# leading '.' along with automatically added '$.' combines to
# form a '..' deep scan which fetches all ancestor IDs.
ancestorIds=.relationships.childOf[*].id
## new added for multilingual indexing
addresses=relationships.hasAddress[0].data.*
archivalHistory=data.archivalHistory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public void finish() {

assertEquals(2, stats.getCount());
assertEquals(2, outText.size());
assertEquals("<test>\n<foo>bar</foo>\n</test>\n", outText.get(0));
assertEquals("<test>\n <foo>bar</foo>\n</test>\n", outText.get(0));
}

@Test(expected = IllegalStateException.class)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package eu.ehri.project.indexing.converter.impl;

import com.jayway.jsonpath.JsonPath;
import org.junit.Test;

import java.util.List;
import java.util.Map;

import static org.junit.Assert.*;

public class UtilsTest {

@Test
public void loadPaths() {
final Map<String, List<JsonPath>> paths = Utils.loadPaths();
assertFalse(paths.isEmpty());
}
}
17 changes: 0 additions & 17 deletions index-data-converter/src/test/resources/log4j.xml

This file was deleted.

13 changes: 13 additions & 0 deletions index-data-converter/src/test/resources/log4j2.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<Configuration>
<Appenders>
<Console name="STDOUT" target="SYSTEM_OUT">
<PatternLayout pattern="%d %-5p [%t] %C{2} (%F:%L) - %m%n"/>
</Console>
</Appenders>
<Loggers>
<Logger name="org.apache.log4j.xml" level="info"/>
<Root level="info">
<AppenderRef ref="STDOUT"/>
</Root>
</Loggers>
</Configuration>
40 changes: 24 additions & 16 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>eu.ehri-project</groupId>
<artifactId>search-tools</artifactId>
<packaging>pom</packaging>
<version>1.1.13</version>
<version>2.0.0</version>
<modules>
<module>solr-config</module>
<module>index-data-converter</module>
</modules>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<solr.version>6.6.6</solr.version>
<solr.version>8.11.2</solr.version>
</properties>

<issueManagement>
Expand All @@ -26,7 +26,7 @@
<site>
<id>ehri-search-tools</id>
<name>EHRI Search Tools Site</name>
<url>http://github.com/EHRI/ehri-search-tools</url>
<url>https://github.com/EHRI/ehri-search-tools</url>
</site>
<snapshotRepository>
<id>ehridev</id>
Expand All @@ -41,36 +41,44 @@
</distributionManagement>

<dependencies>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.7</version>
</dependency>

<!-- Test only dependencies -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<version>4.13.1</version>
<scope>test</scope>
</dependency>

<!-- Test only dependencies -->
<!-- this is a dependency of json-path and for some reason needs to be on the classpath -->
<dependency>
<groupId>com.jayway.jsonpath</groupId>
<artifactId>json-path-assert</artifactId>
<version>1.2.0</version>
<groupId>net.minidev</groupId>
<artifactId>json-smart</artifactId>
<version>2.3</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.7</version>
<groupId>com.jayway.jsonpath</groupId>
<artifactId>json-path-assert</artifactId>
<version>2.4.0</version>
<scope>test</scope>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.3</version>
<version>3.11.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<source>11</source>
<target>11</target>
</configuration>
</plugin>
</plugins>
Expand Down
51 changes: 45 additions & 6 deletions solr-config/core/conf/schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@
<field name="txt_pl" type="text_pl" indexed="true" stored="false" termVectors="true" multiValued="true"/>
<field name="txt_de" type="text_de" indexed="true" stored="false" termVectors="true" multiValued="true"/>
<field name="txt_nl" type="text_nl" indexed="true" stored="false" termVectors="true" multiValued="true"/>
<field name="txt_bg" type="text_bg" indexed="true" stored="false" termVectors="true" multiValued="true"/>
<field name="txt_cs" type="text_general" indexed="true" stored="false" termVectors="true" multiValued="true"/>
<field name="txt_it" type="text_it" indexed="true" stored="false" termVectors="true" multiValued="true"/>
<field name="txt_fr" type="text_fr" indexed="true" stored="false" termVectors="true" multiValued="true"/>
Expand Down Expand Up @@ -499,6 +500,17 @@
<field name="biographicalHistory_nl" type="text_nl" indexed="true" stored="false" termVectors="true" multiValued="false"/>
<field name="accessPoints_nl" type="text_nl" indexed="true" stored="false" termVectors="true" multiValued="true"/>

<field name="title_bg" type="text_bg" indexed="true" stored="false" termVectors="true" multiValued="false"/>
<field name="name_bg" type="text_bg" indexed="true" stored="false" termVectors="true" multiValued="false"/>
<field name="otherFormsOfName_bg" type="text_bg" indexed="true" stored="false" termVectors="true" multiValued="false"/>
<field name="parallelFormsOfName_bg" type="text_bg" indexed="true" stored="false" termVectors="true" multiValued="false"/>
<field name="scopeAndContent_bg" type="text_bg" indexed="true" stored="false" termVectors="true" multiValued="false"/>
<field name="archivalHistory_bg" type="text_bg" indexed="true" stored="false" termVectors="true" multiValued="false"/>
<field name="archivistNote_bg" type="text_bg" indexed="true" stored="false" termVectors="true" multiValued="false"/>
<field name="abstract_bg" type="text_bg" indexed="true" stored="false" termVectors="true" multiValued="false"/>
<field name="biographicalHistory_bg" type="text_bg" indexed="true" stored="false" termVectors="true" multiValued="false"/>
<field name="accessPoints_bg" type="text_bg" indexed="true" stored="false" termVectors="true" multiValued="true"/>

<field name="title_cs" type="text_cs" indexed="true" stored="false" termVectors="true" multiValued="false"/>
<field name="name_cs" type="text_cs" indexed="true" stored="false" termVectors="true" multiValued="false"/>
<field name="otherFormsOfName_cs" type="text_cs" indexed="true" stored="false" termVectors="true" multiValued="false"/>
Expand Down Expand Up @@ -855,6 +867,39 @@
<copyField source="title_nl" dest="text"/>
<copyField source="title_nl" dest="txt_nl"/>

<copyField source="abstract_bg" dest="text"/>
<copyField source="abstract_bg" dest="txt_bg"/>
<copyField source="accessPoints_bg" dest="accessPointNames"/>
<copyField source="accessPoints_bg" dest="accessPoints"/>
<copyField source="accessPoints_bg" dest="accessPoints_facet"/>
<copyField source="accessPoints_bg" dest="text"/>
<copyField source="accessPoints_bg" dest="txt_bg"/>
<copyField source="archivalHistory_bg" dest="archivalHistory"/>
<copyField source="archivalHistory_bg" dest="text"/>
<copyField source="archivalHistory_bg" dest="txt_bg"/>
<copyField source="archivistNote_bg" dest="text"/>
<copyField source="archivistNote_bg" dest="txt_bg"/>
<copyField source="biographicalHistory_bg" dest="biographicalHistory"/>
<copyField source="biographicalHistory_bg" dest="text"/>
<copyField source="biographicalHistory_bg" dest="txt_bg"/>
<copyField source="name_bg" dest="name"/>
<copyField source="name_bg" dest="name_ngram"/>
<copyField source="name_bg" dest="name_sort"/>
<copyField source="name_bg" dest="text"/>
<copyField source="name_bg" dest="title"/>
<copyField source="name_bg" dest="txt_bg"/>
<copyField source="otherFormsOfName_bg" dest="otherFormsOfName"/>
<copyField source="otherFormsOfName_bg" dest="text"/>
<copyField source="otherFormsOfName_bg" dest="txt_bg"/>
<copyField source="parallelFormsOfName_bg" dest="parallelFormsOfName"/>
<copyField source="parallelFormsOfName_bg" dest="text"/>
<copyField source="parallelFormsOfName_bg" dest="txt_bg"/>
<copyField source="scopeAndContent_bg" dest="scopeAndContent"/>
<copyField source="scopeAndContent_bg" dest="text"/>
<copyField source="scopeAndContent_bg" dest="txt_bg"/>
<copyField source="title_bg" dest="text"/>
<copyField source="title_bg" dest="txt_bg"/>

<copyField source="abstract_cs" dest="text"/>
<copyField source="abstract_cs" dest="txt_cs"/>
<copyField source="accessPoints_cs" dest="accessPointNames"/>
Expand Down Expand Up @@ -1188,12 +1233,6 @@
<copyField source="scopeAndContent_generic" dest="text"/>
<copyField source="title_generic" dest="text"/>

<!-- field for the QueryParser to use when an explicit fieldname is absent -->
<defaultSearchField>text</defaultSearchField>

<!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
<solrQueryParser defaultOperator="OR"/>

<!-- change ranking model to BM25/Okapi. It works better than the default Vector Space Model! -->
<similarity class="solr.BM25SimilarityFactory"/>
</schema>
2 changes: 1 addition & 1 deletion solr-config/core/conf/solrconfig.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
-->

<config>
<luceneMatchVersion>LUCENE_6_1_0</luceneMatchVersion>
<luceneMatchVersion>LUCENE_7_0_0</luceneMatchVersion>

<!-- Use the classic schema for the time being, since managed is the default since 2.6 -->
<schemaFactory class="ClassicIndexSchemaFactory"/>
Expand Down
Loading