Skip to content

Commit

Permalink
fathomnet/fathomnet-py#25 - first cut. Needs refinement
Browse files Browse the repository at this point in the history
  • Loading branch information
hohonuuli committed Feb 29, 2024
1 parent 30dd62f commit f649abc
Show file tree
Hide file tree
Showing 14 changed files with 71 additions and 20 deletions.
10 changes: 5 additions & 5 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,18 @@ object Dependencies {

lazy val jansi = "org.fusesource.jansi" % "jansi" % "2.4.1"

lazy val logback = "ch.qos.logback" % "logback-classic" % "1.4.14"
lazy val logback = "ch.qos.logback" % "logback-classic" % "1.5.1"
lazy val methanol = "com.github.mizosoft.methanol" % "methanol" % "1.7.0"
lazy val munit = "org.scalameta" %% "munit" % "1.0.0-M10"
lazy val munit = "org.scalameta" %% "munit" % "1.0.0-M11"
lazy val picocli = "info.picocli" % "picocli" % "4.7.5"

lazy val slf4jJdk = "org.slf4j" % "slf4j-jdk-platform-logging" % "2.0.10"
lazy val slf4jJdk = "org.slf4j" % "slf4j-jdk-platform-logging" % "2.0.12"

private val tapirVersion = "1.9.6"
private val tapirVersion = "1.9.10"
lazy val tapirStubServer = "com.softwaremill.sttp.tapir" %% "tapir-sttp-stub-server" % tapirVersion
lazy val tapirSwagger = "com.softwaremill.sttp.tapir" %% "tapir-swagger-ui-bundle" % tapirVersion
lazy val tapirCirce = "com.softwaremill.sttp.tapir" %% "tapir-json-circe" % tapirVersion
lazy val tapirCirceClient = "com.softwaremill.sttp.client3" %% "circe" % "3.9.1"
lazy val tapirCirceClient = "com.softwaremill.sttp.client3" %% "circe" % "3.9.3"
lazy val tapirNetty = "com.softwaremill.sttp.tapir" %% "tapir-netty-server" % tapirVersion
lazy val tapirVertx = "com.softwaremill.sttp.tapir" %% "tapir-vertx-server" % tapirVersion

Expand Down
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version=1.9.7
sbt.version=1.9.9
2 changes: 1 addition & 1 deletion src/main/scala/org/fathomnet/worms/Main.scala
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ object Main:
WormsLoader.load(wormsDir).map { root =>
if (treeFiles.nonEmpty)
// Our new base. We use 0 as aphiaId so that the real aphiaIds are not incremented when the trees are combined
val newRoot = WormsNode("object", "", 0L, Nil, Nil)
val newRoot = WormsNode("object", "", 0L, 0L, Nil, Nil)
val newBranches = treeFiles.flatMap(ExtendedLoader.load(_)).toSeq
val trees = root +: newBranches
val combinedRoot = CombineTrees.combine(newRoot, trees, root.maxAphiaId)
Expand Down
20 changes: 20 additions & 0 deletions src/main/scala/org/fathomnet/worms/StateController.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ package org.fathomnet.worms
import scala.util.control.NonFatal
import org.fathomnet.worms.etc.jdk.Logging.given
import scala.collection.mutable.ListMap
import cats.conversions.all

object StateController:

Expand Down Expand Up @@ -62,6 +63,25 @@ object StateController:
data.names.filter(_.toLowerCase.contains(glob.toLowerCase)).toList
runSearch(search)

def findNamesByAphiaId(aphiaId: Long): Either[ErrorMsg, Names] =
def search(data: Data): Option[Names] =
val allNodes = data.namesMap.values
val existing = allNodes.find(n => n.aphiaId == aphiaId)
existing match
case None => None
case Some(node) =>
val accepted = if (node.aphiaId == node.acceptedAphiaId)
Some(node)
else
allNodes.find(n => n.aphiaId == node.acceptedAphiaId)
val names = accepted match
case None => Names(node.aphiaId, node.name, node.name, node.alternateNames)
case Some(value) => Names(node.aphiaId, node.name, value.name, node.alternateNames ++ value.alternateNames)

Option(names)

runNodeSearch(search, s"Unable to find a name with aphiaId: $aphiaId")

def descendantNames(name: String): Either[ErrorMsg, List[String]] =
def search(data: Data): List[String] =
data.findNodeByName(name) match
Expand Down
5 changes: 5 additions & 0 deletions src/main/scala/org/fathomnet/worms/WormsNode.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
package org.fathomnet.worms

import org.fathomnet.worms.io.MutableWormsNode
import org.fathomnet.worms.io.extended.CombineTrees.add

/**
* Immutable tree node
Expand All @@ -19,6 +20,7 @@ final case class WormsNode(
name: String,
rank: String,
aphiaId: Long,
acceptedAphiaId: Long,
alternateNames: Seq[String],
children: Seq[WormsNode]
):
Expand Down Expand Up @@ -58,6 +60,7 @@ final case class WormsNode(
name = this.name,
rank = this.rank,
aphiaId = this.aphiaId,
acceptedAphiaId = this.acceptedAphiaId,
alternateNames = this.alternateNames
)

Expand Down Expand Up @@ -87,6 +90,7 @@ object WormsNodeBuilder:
name,
node.concept.rank,
node.concept.id,
node.concept.acceptedId,
alternateNames,
node.children.map(from).toSeq
)
Expand All @@ -95,5 +99,6 @@ final case class SimpleWormsNode(
name: String,
rank: String,
aphiaId: Long,
acceptedAphiaId: Long,
alternateNames: Seq[String]
)
13 changes: 12 additions & 1 deletion src/main/scala/org/fathomnet/worms/api/NameEndpoints.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import sttp.tapir._
import sttp.tapir.json.circe._
import sttp.tapir.generic.auto._
import io.circe.generic.auto._
import org.fathomnet.worms.{Data, NotFound, Page, ServerError, State}
import org.fathomnet.worms.{Data, Names, NotFound, Page, ServerError, State}
import sttp.tapir.server.ServerEndpoint
import scala.concurrent.Future
import org.fathomnet.worms.ErrorMsg
Expand Down Expand Up @@ -54,6 +54,16 @@ class NameEndpoints(using ec: ExecutionContext) extends Endpoints:
val namesCountServerEndpoint: ServerEndpoint[Any, Future] =
namesCountEndpoint.serverLogic(Unit => Future.successful(StateController.countAllNames()))

val namesByAphiaId: PublicEndpoint[Long, ErrorMsg, Names, Any] = baseEndpoint
.get
.in("names" / "aphiaid")
.in(path[Long]("aphiaid"))
.out(jsonBody[Names])
.description("Returns the data for a name given its aphiaid.")

val namesByAphiaIdServerEndpoint: ServerEndpoint[Any, Future] =
namesByAphiaId.serverLogic(aphiaid => Future(StateController.findNamesByAphiaId(aphiaid)))

// --/query/startswith/:prefix
val queryStartswithEndpoint: PublicEndpoint[String, ErrorMsg, List[String], Any] = baseEndpoint
.get
Expand Down Expand Up @@ -142,6 +152,7 @@ class NameEndpoints(using ec: ExecutionContext) extends Endpoints:
override val all: List[ServerEndpoint[Any, Future]] = List(
namesServerEndpoint,
namesCountServerEndpoint,
namesByAphiaIdServerEndpoint,
queryStartswithServerEndpoint,
queryContainsServerEndpoint,
descendantsServerEndpoint,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ case class SwaggerEndpoints(nameEndpoints: NameEndpoints, taxaEndpoints: TaxaEnd
List(
nameEndpoints.namesCountEndpoint,
nameEndpoints.namesEndpoint,
nameEndpoints.namesByAphiaId,
nameEndpoints.ancestorsEndpoint,
nameEndpoints.childrenEndpoint,
nameEndpoints.descendantsEndpoint,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import org.fathomnet.worms.util.HexUtil
import org.fathomnet.worms.{ErrorMsg, SimpleWormsNode, WormsNode}
import org.fathomnet.worms.Page
import java.net.URI
import org.fathomnet.worms.Names

/**
* JSON codecs for use with Circe. Usage:
Expand Down Expand Up @@ -48,6 +49,9 @@ object CirceCodecs:
given Decoder[SimpleWormsNode] = deriveDecoder
given Encoder[SimpleWormsNode] = deriveEncoder

given Decoder[Names] = deriveDecoder
given Encoder[Names] = deriveEncoder

given Decoder[ErrorMsg] = deriveDecoder
given Encoder[ErrorMsg] = deriveEncoder

Expand Down
6 changes: 4 additions & 2 deletions src/main/scala/org/fathomnet/worms/io/WormsConcept.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@ final case class WormsConceptName(name: String, isPrimary: Boolean = true)

final case class WormsConcept(
id: Long,
acceptedId: Long,
parentId: Option[Long],
names: Seq[WormsConceptName],
rank: String,
isMarine: Boolean = false,
isExtinct: Boolean = false
isExtinct: Boolean = false,
)

object WormsConcept:
Expand All @@ -36,7 +37,8 @@ object WormsConcept:
): Seq[WormsConcept] =
val concepts = mutable.Map[Long, WormsConcept]()
for t <- taxons do
val wc = WormsConcept(t.id, t.parentId, Seq(WormsConceptName(t.scientificName)), t.rank)
val acceptedId = t.acceptedId.getOrElse(t.id)
val wc = WormsConcept(t.id, acceptedId, t.parentId, Seq(WormsConceptName(t.scientificName)), t.rank)
concepts(t.id) = wc

for v <- vernacularNames do
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ object CombineTrees:
rootNode.copy(children = children)

def incrementAphiaId(node: WormsNode, maxAphiaId: Long): WormsNode =
val newAphiaId = node.aphiaId + maxAphiaId
node.copy(
aphiaId = node.aphiaId + maxAphiaId,
aphiaId = newAphiaId,
children = node.children.map(incrementAphiaId(_, maxAphiaId))
)

Expand All @@ -55,7 +56,10 @@ object CombineTrees:
* A new tree with fake aphiaId's flipped to negative
*/
def flipFakeAphiaId(node: WormsNode, maxAphiaId: Long): WormsNode =
val newAphiaId = if (node.aphiaId > maxAphiaId) -node.aphiaId else node.aphiaId
val newAcceptedAphiaId = if (newAphiaId < 0) newAphiaId else node.acceptedAphiaId
node.copy(
aphiaId = if (node.aphiaId > maxAphiaId) -node.aphiaId else node.aphiaId,
aphiaId = newAphiaId,
acceptedAphiaId = newAcceptedAphiaId,
children = node.children.map(n => flipFakeAphiaId(n, maxAphiaId))
)
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ object ExtendedLoader:
.map(_.trim)
val conceptNames =
WormsConceptName(names.head, true) +: names.tail.map(WormsConceptName(_, false))
Some(WormsConcept(cols(0).toInt, parentId, conceptNames.toIndexedSeq, ""))
val aphiaId = cols(0).toLong
Some(WormsConcept(aphiaId, aphiaId, parentId, conceptNames.toIndexedSeq, ""))
catch
case NonFatal(e) =>
log.atWarn.withCause(e).log(s"Failed to parse row: $row")
Expand Down
7 changes: 5 additions & 2 deletions src/main/scala/org/fathomnet/worms/io/model.scala
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,20 @@ final case class Taxon(
taxonID: String,
parentNameUsageID: Option[String],
scientificName: String,
rank: String
rank: String,
acceptedNameUsageID: Option[String]
):
val id = taxonIDToKey(taxonID)
val parentId = parentNameUsageID.map(taxonIDToKey)
val acceptedId = acceptedNameUsageID.map(taxonIDToKey)

object Taxon:
def from(row: String): Option[Taxon] =
Try {
val cols = row.split("\t")
val parentNameUsageID = if cols(3).isBlank then None else Some(cols(3))
Taxon(cols(0), parentNameUsageID, cols(5), cols(19))
val acceptedNameUsageID = if cols(2).isBlank then None else Some(cols(2))
Taxon(cols(0), parentNameUsageID, cols(5), cols(19), acceptedNameUsageID)
}.toOption

def read(file: String): List[Taxon] = readFile(file, Taxon.from)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@ import java.nio.file.Paths
class CombinedTreesSuite extends munit.FunSuite:

test("incrementAphiaId"):
val node = WormsNode("foo", "", 1000, Nil, Nil)
val node = WormsNode("foo", "", 1000, 1000, Nil, Nil)
val n = CombineTrees.incrementAphiaId(node, 1000)
assertEquals(n.aphiaId, 2000L)

test("add"):
val node1 = WormsNode("foo", "", 1000, Nil, Nil)
val node2 = WormsNode("bar", "", 2000, Nil, Nil)
val node1 = WormsNode("foo", "", 1000, 1000, Nil, Nil)
val node2 = WormsNode("bar", "", 2000, 2000, Nil, Nil)
val n = CombineTrees.add(node1, node2, 1000)
assertEquals(n.aphiaId, 1000L)
assertEquals(n.children.size, 1)
Expand All @@ -27,7 +27,7 @@ class CombinedTreesSuite extends munit.FunSuite:

test("combine") {

val root = WormsNode("object", "", 1L, Nil, Nil)
val root = WormsNode("object", "", 1L, 1L, Nil, Nil)

val sample1 = getClass.getResource("/extended_tree_sample1.csv").getPath
val opt1 = ExtendedLoader.load(Paths.get(sample1))
Expand Down
2 changes: 1 addition & 1 deletion src/universal/conf/application.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Setting -X directly (-J is stripped)
# -J-X
-J-Xmx1g
-J-Xmx2g

# Add additional jvm parameters
-Duser.timezone=UTC
Expand Down

0 comments on commit f649abc

Please sign in to comment.