diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 58f53a4b7..5731dea98 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,6 +13,16 @@ jobs: - name: Checkout uses: actions/checkout@v1 + # Download data file used for config testing + - uses: keithweaver/aws-s3-github-action@v1.0.0 + with: + command: cp + source: s3://ehri-data/solr_test_data/searchdata.json + destination: ./test/resources/searchdata.json + aws_access_key_id: ${{ secrets.AWS_S3_ACCESS_KEY }} + aws_secret_access_key: ${{ secrets.AWS_S3_SECRET_ACCESS_KEY }} + aws_region: us-west-1 + - name: Setup Node uses: actions/setup-node@v2 with: diff --git a/.gitignore b/.gitignore index 480f76a7d..453df4d10 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,7 @@ miniodata/* # stupid Mac stuff .DS_Store +# Confidential things conf/oauth2.conf* conf/parse.conf* conf/aws.conf* @@ -39,4 +40,5 @@ conf/external_pages.conf conf/dos.conf* conf/minio.conf* conf/api-keys.conf* -solr* +test/resources/searchdata.json +solr? diff --git a/docker-compose.yml b/docker-compose.yml index ecb3175e7..2004c86ab 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -28,6 +28,11 @@ services: ports: - 8982:8983 + solr: + image: ehri/ehri-search-tools + ports: + - 8982:8983 + # This simply allows us to send mails from CI environments smtp: image: python:3.9.7-slim diff --git a/modules/admin/app/guice/AdminModule.scala b/modules/admin/app/guice/AdminModule.scala index cf9188a23..8fc8642fb 100644 --- a/modules/admin/app/guice/AdminModule.scala +++ b/modules/admin/app/guice/AdminModule.scala @@ -3,7 +3,7 @@ package guice import com.google.inject.AbstractModule import eu.ehri.project.xml.{BaseXXQueryXmlTransformer, SaxonXsltXmlTransformer, XQueryXmlTransformer, XsltXmlTransformer} import services.harvesting._ -import services.ingest.{CoreferenceService, EadValidator, IngestService, RelaxNGEadValidator, SqlCoreferenceService, WSIngestService} +import services.ingest._ import javax.inject.Provider diff --git a/modules/portal/app/controllers/AppComponents.scala b/modules/portal/app/controllers/AppComponents.scala index cd547504b..d603113da 100644 --- a/modules/portal/app/controllers/AppComponents.scala +++ b/modules/portal/app/controllers/AppComponents.scala @@ -35,18 +35,18 @@ trait AppComponents { def itemLifecycle: ItemLifecycle } -case class DefaultAppComponents @Inject ()( - accounts: AccountManager, - authHandler: AuthHandler, - cacheApi: SyncCacheApi, - config: Configuration, - dataApi: DataServiceBuilder, - conf: AppConfig, - markdown: MarkdownRenderer, - materializer: Materializer, - pageRelocator: MovedPageLookup, - searchEngine: SearchEngine, - searchResolver: SearchItemResolver, - itemLifecycle: ItemLifecycle, +case class DefaultAppComponents @Inject()( + accounts: AccountManager, + authHandler: AuthHandler, + cacheApi: SyncCacheApi, + config: Configuration, + dataApi: DataServiceBuilder, + conf: AppConfig, + markdown: MarkdownRenderer, + materializer: Materializer, + pageRelocator: MovedPageLookup, + searchEngine: SearchEngine, + searchResolver: SearchItemResolver, + itemLifecycle: ItemLifecycle, ) extends AppComponents diff --git a/test/helpers/TestConfiguration.scala b/test/helpers/TestConfiguration.scala index 701b8a063..9ae8cf71f 100644 --- a/test/helpers/TestConfiguration.scala +++ b/test/helpers/TestConfiguration.scala @@ -118,6 +118,8 @@ trait TestConfiguration { bind[CypherQueryService].toInstance(mockCypherQueries), bind[EventHandler].toInstance(testEventHandler), + bind[DataServiceBuilder].to[WsDataServiceBuilder], +// bind[SearchIndexMediator].toInstance(mockIndexer), bind[HtmlPages].toInstance(mockHtmlPages), bind[GeocodingService].to[NoopGeocodingService], bind[EadValidator].to[MockEadValidatorService], diff --git a/test/integration/admin/IndexingSpec.scala b/test/integration/admin/IndexingSpec.scala index 01ce19c2f..7ba566bde 100644 --- a/test/integration/admin/IndexingSpec.scala +++ b/test/integration/admin/IndexingSpec.scala @@ -49,14 +49,26 @@ class IndexingSpec extends SearchTestRunner { val outFlow: Flow[Message, Message, (Future[Seq[Message]], Promise[Option[Message]])] = Flow.fromSinkAndSourceMat(Sink.seq[Message], src.concatMat(Source.maybe[Message])(Keep.right))(Keep.both) + // NB: using the technique mentioned for "half-closed" websockets here to get output + // even when we are not putting any items in. + // https://doc.akka.io/docs/akka-http/current/client-side/websocket-support.html#half-closed-websockets + // val (_, (out, promise)) = Http().singleWebSocketRequest(WebSocketRequest(wsUrl, extraHeaders = headers), outFlow) + // Clear the index so we know we're testing against a clean start + await(mediator.handle.clearAll()) + await(engine.search(query)).page.size must_== 0 + // Here we can't read any messages till we've signalled the end of the input stream, but in // reality the indexer is working behind-the-scenes. So we need to wait for some time. - // Wait up to ten seconds until a search query is non-empty. Since Solr won't show anything till - // it commits the request this means we're done: - await(engine.search(query)).page.headOption must beSome.eventually(100, 100.millis) + // Currently we'd expect X number of items to be returned by a fully-indexed search engine: + // change this if the fixtures change and you're getting unexpected results! + // NB: this is the number of items, not descriptions which is what the + // search engine indexes - there are something like 46 descriptions. + // Wait up to ten seconds until we get the expected number of items: + val EXPECTED = 39 + await(engine.search(query)).page.size must be_==(EXPECTED).eventually(100, 100.millis) // close the connection... promise.success(None) diff --git a/test/integration/search/SolrSearchSpec.scala b/test/integration/search/SolrSearchSpec.scala new file mode 100644 index 000000000..5de734251 --- /dev/null +++ b/test/integration/search/SolrSearchSpec.scala @@ -0,0 +1,74 @@ +package integration.search + +import akka.actor.ActorSystem +import akka.http.scaladsl.Http +import akka.http.scaladsl.model._ +import akka.stream.Materializer +import akka.util.ByteString +import config.ServiceConfig +import helpers.SearchTestRunner +import play.api.{Application, Configuration, Environment, Logger} +import services.search._ +import utils.PageParams + +import java.nio.file.Paths +import scala.concurrent.{ExecutionContext, Future} + + +/** + * Spec to test the ingest UI and websocket monitoring. + */ +class SolrSearchSpec extends SearchTestRunner { + + val logger = Logger(classOf[SolrSearchSpec]) + + private def initSolr(): Unit = { + val env = Environment.simple() + val config = Configuration.load(env) + val port = config.get[Int]("services.solr.port") + if (port == 8983) { + throw new RuntimeException(s"Solr port is set to default value: $port, bailing out...") + } + + implicit val as: ActorSystem = ActorSystem() + val mat = Materializer(as) + implicit val ec: ExecutionContext = mat.executionContext + + def req(payload: UniversalEntity): Future[HttpResponse] = { + val url = ServiceConfig("solr", config).baseUrl + "/update?commit=true" + Http().singleRequest(HttpRequest(HttpMethods.POST, url).withEntity(payload)) + } + + logger.debug("Clearing Solr data...") + val json = ByteString.fromString("""{"delete": {"query": "*:*"}}""") + await(req(HttpEntity.apply(ContentTypes.`application/json`, json))) + + logger.debug("Loading Solr data...") + val resource = Paths.get(getClass.getResource("/searchdata.json").toURI) + val entity = HttpEntity.fromPath(ContentTypes.`application/json`, resource) + await(req(entity)) + + await(as.terminate()) + } + initSolr() + + + def engine(implicit app: Application) = app.injector.instanceOf[SearchEngine] + + def simpleSearch(engine: SearchEngine, q: String): Future[SearchResult[SearchHit]] = + engine.search(SearchQuery( + params = SearchParams(query = Some(q)), + paging = PageParams.empty.withoutLimit)) + + "Solr search engine should" should { + "find things" in new ITestApp { + val r = await(simpleSearch(engine, "USHMM")) + r.page.size must be_>(0) + } + + "find other things" in new ITestApp { + val r = await(simpleSearch(engine, "Wiener Library")) + r.page.size must be_>(0) + } + } +}