From 1212abff50b607f82484f1833079baf5690e7637 Mon Sep 17 00:00:00 2001 From: andrewresearch Date: Thu, 2 Nov 2017 16:25:25 +1000 Subject: [PATCH] Minor updates to separate NER and Parse from other pipelines --- .travis.yml | 4 ++-- build.sbt | 2 +- .../factorie_nlp_api/AnnotatorPipelines.scala | 15 +++++++++------ 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index a222ea6..6908934 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,10 +3,10 @@ language: scala jdk: oraclejdk8 scala: - - 2.12.4 + - 2.12.3 script: - - sbt ++$TRAVIS_SCALA_VERSION -J-Xmx6G test + - sbt ++$TRAVIS_SCALA_VERSION -J-Xmx2000m test # Use container-based infrastructure sudo: false diff --git a/build.sbt b/build.sbt index 17ec1e1..755566e 100644 --- a/build.sbt +++ b/build.sbt @@ -1,6 +1,6 @@ name := "factorie-nlp-api" -version := "0.5.0" +version := "0.5.1" scalaVersion := "2.12.3" diff --git a/src/main/scala/io/nlytx/factorie_nlp_api/AnnotatorPipelines.scala b/src/main/scala/io/nlytx/factorie_nlp_api/AnnotatorPipelines.scala index e3fe7e4..d0e4c85 100644 --- a/src/main/scala/io/nlytx/factorie_nlp_api/AnnotatorPipelines.scala +++ b/src/main/scala/io/nlytx/factorie_nlp_api/AnnotatorPipelines.scala @@ -29,6 +29,7 @@ object AnnotatorPipelines { val logger = Logging(system.eventStream, "factorie-nlp-api") type Pipeline = String => RunnableGraph[Future[Document]] + type DocPipeline = Document => RunnableGraph[Future[Document]] //Make Document private lazy val doc = Flow[String].map(new Document(_)) @@ -74,14 +75,14 @@ object AnnotatorPipelines { val defaultPipeline = lemmaPipeline - val parsePipeline = (s:String) => - Source.single(s) - .via(doc.map(tokeniser).map(segmenter).map(normaliser).map(postagger).map(lemmatiser).map(parser)) + val parsePipeline = (d:Document) => + Source.single(d) + .map(parser) .toMat(Sink.head[Document])(Keep.right) - val nerPipeline = (s:String) => - Source.single(s) - .via(doc.map(tokeniser).map(segmenter).map(normaliser).map(postagger).map(lemmatiser).map(parser).mapAsync(2)(nerTagger)) + val nerPipeline = (d:Document) => + Source.single(d) + .mapAsync(2)(nerTagger) .toMat(Sink.head[Document])(Keep.right) val completePipeline = nerPipeline @@ -89,6 +90,8 @@ object AnnotatorPipelines { /* The main method for running a pipeline */ def process(text:String,pipeline:Pipeline=defaultPipeline):Future[Document] = pipeline(text).run + def processDoc(doc:Document,pipeline:DocPipeline):Future[Document] = pipeline(doc).run + def profile(text:String,pipeline:Pipeline=defaultPipeline,wait:Int=180):Document = { logger.info(s"Profiling pipeline...") val start = System.currentTimeMillis()