Skip to content

Commit

Permalink
prohibit overlap of locus times corresponding to regional times in th…
Browse files Browse the repository at this point in the history
…e same merge group

close #384

also add test that any non-regional timepoint declared as a participant in a merge for tracing is illegal; close #393
  • Loading branch information
vreuter committed Dec 3, 2024
1 parent 82f7948 commit 0c8177a
Show file tree
Hide file tree
Showing 11 changed files with 267 additions and 20 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ RUN apt-get update -y && \
RUN mkdir /looptrace
WORKDIR /looptrace
COPY . /looptrace
RUN mv /looptrace/target/scala-3.5.2/looptrace-assembly-0.11.3.jar /looptrace/looptrace
RUN mv /looptrace/target/scala-3.5.2/looptrace-assembly-0.12.0.jar /looptrace/looptrace

# Install new-ish R and necessary packages.
RUN echo "Installing R..." && \
Expand Down
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ val primaryJavaVersion = "11"
val primaryOs = "ubuntu-latest"
val isPrimaryOsAndPrimaryJavaTest = s"runner.os == '$primaryOs' && runner.java-version == '$primaryJavaVersion'"
ThisBuild / scalaVersion := "3.5.2"
ThisBuild / version := "0.11.3"
ThisBuild / version := "0.12.0"
ThisBuild / organization := orgName
ThisBuild / organizationName := "Gerlich Group, IMBA, OEAW"

Expand Down
2 changes: 1 addition & 1 deletion looptrace/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@


# This is put into place by the docker build, as declared in the Dockerfile.
LOOPTRACE_JAR_PATH = importlib.resources.files(__name__).joinpath("looptrace-assembly-0.11.3.jar")
LOOPTRACE_JAR_PATH = importlib.resources.files(__name__).joinpath("looptrace-assembly-0.12.0.jar")
LOOPTRACE_JAVA_PACKAGE = "at.ac.oeaw.imba.gerlich.looptrace"

FIELD_OF_VIEW_COLUMN = "fieldOfView"
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "looptrace"
version = "0.11.3"
version = "0.12.0"
description = "Library and programs for tracing chromatin loops from microscopy images"
authors = [
"Kai Sandvold Beckwith",
Expand Down
82 changes: 67 additions & 15 deletions src/main/scala/ImagingRoundsConfiguration.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package at.ac.oeaw.imba.gerlich.looptrace
import scala.collection.immutable.SortedSet
import scala.language.adhocExtensions // to extend ujson.Value.InvalidData
import scala.util.Try
import scala.util.chaining.*
import cats.*
import cats.data.{ EitherNel, NonEmptyList, NonEmptyMap, NonEmptySet, Validated, ValidatedNel }
import cats.data.Validated.{ Invalid, Valid }
Expand Down Expand Up @@ -166,6 +167,8 @@ object ImagingRoundsConfiguration extends LazyLogging:
else s"${nonRegional.size} timepoint(s) as keys in locus grouping that aren't regional.".invalidNel
}
val uniqueRegionalTimes = sequence.regionRounds.map(_.time).toList.toSet

/* Every timepoint in a proximity grouping must be a regional (rather than locus-specific) timepoint. */
val (proximityGroupingSubsetNel, proximityGroupingSupersetNel) = proximityFilterStrategy match {
case (UniversalProximityPermission | UniversalProximityProhibition(_)) =>
// In the trivial case, we have no more validation work to do.
Expand All @@ -177,20 +180,69 @@ object ImagingRoundsConfiguration extends LazyLogging:
val supersetNel = checkTimesSubset(uniqueGroupedTimes)(uniqueRegionalTimes, "regionals in imaging sequence (rel. to proximity filter strategy)")
(subsetNel, supersetNel)
}
val idsToMergeAreAllRegionalNel = maybeMergeRules match {
case None => ().validNel
case Some((rules, _)) =>
import at.ac.oeaw.imba.gerlich.gerlib.collections.AtLeast2.syntax.toSet
(rules.reduceMap(_.mergeGroup.members.toSet) -- uniqueRegionalTimes)
.toList
.sorted
.toNel
.toLeft(())
.leftMap(nonRegionalTimesInRules =>
s"${nonRegionalTimesInRules.size} non-regional times in merge rules: ${nonRegionalTimesInRules.toList.sorted.map(_.show_).mkString(", ")}"
)
.toValidatedNel
}

/* Every timepoint ID to merge for tracing must a regional (rather than locus-specific) timepoint. */
val (idsToMergeAreAllRegionalNel, noRepeatsInLocusTimeSetsForRegionalTimesToMerge) =
import at.ac.oeaw.imba.gerlich.gerlib.collections.given // SemigroupK[AtLeast2[Set, *]]
maybeMergeRules match {
case None => (().validNel, ().validNel)
case Some((rules, _)) =>
val allAreRegional: ValidatedNel[String, Unit] =
import at.ac.oeaw.imba.gerlich.gerlib.collections.AtLeast2.syntax.toSet
(rules.map(_.mergeGroup.members).reduceK.toSet -- uniqueRegionalTimes)
.toList
.sorted
.toNel
.toLeft(())
.leftMap(nonRegionalTimesInRules =>
s"${nonRegionalTimesInRules.size} non-regional time(s) in merge rules: ${nonRegionalTimesInRules.toList.sorted.map(_.show_).mkString(", ")}"
)
.toValidatedNel

val noRepeatsInLocusTimesOfRegionalsToMerge: ValidatedNel[String, Unit] =
import at.ac.oeaw.imba.gerlich.gerlib.collections.AtLeast2.syntax.toList

// First, build the lookup of locus times by regional time.
val locusTimesByRegional = locusGrouping
.foldLeft(Map.empty[ImagingTimepoint, NonEmptySet[ImagingTimepoint]]){ (acc, g) =>
val rt = g.regionalTimepoint
val lts = g.locusTimepoints
acc + (rt -> acc.get(rt).fold(lts)(lts ++ _))
}

// Then, find repeated locus timepoints WITHIN each merge group.
type GroupId = Int
val repeatsByGroup: Map[GroupId, NonEmptyMap[ImagingTimepoint, AtLeast2[Set, ImagingTimepoint]]] =
def processOneRule = (r: TraceIdDefinitionAndFiltrationRule) =>
r.mergeGroup.members.toList.foldRight(Map.empty[ImagingTimepoint, NonEmptySet[ImagingTimepoint]]){
(rt, acc) => locusTimesByRegional
.get(rt)
.fold(acc)(_
.toList
.foldRight(acc){ (lt, m) =>
m + (lt -> m.get(lt).fold(NonEmptySet.one(rt))(_.add(rt)))
}
)
}

// Determine if a single rule's inverse mapping from locus timepoints to regional timepoints is problematic.
def getBadResult: Map[ImagingTimepoint, NonEmptySet[ImagingTimepoint]] => Option[NonEmptyMap[ImagingTimepoint, AtLeast2[Set, ImagingTimepoint]]] = _
.view
.mapValues(_.toSortedSet.toSet.pipe(AtLeast2.either).pipe(_.toOption))
.flatMap{ (locTime, maybeRegTimes) => maybeRegTimes.map(locTime -> _) }
.pipe(scala.collection.immutable.SortedMap.from)
.pipe(NonEmptyMap.fromMap)

rules.zipWithIndex.toList.flatMap{ (r, i) => getBadResult(processOneRule(r)).map(i -> _) }.toMap

// If no repeats, we're all good; otherwise, make an error message.
if repeatsByGroup.isEmpty
then ().validNel
else s"Regionals timepoints to merge for tracing map to overlapping locus timepoint sets; here are the repeat(s): $repeatsByGroup".invalidNel

(allAreRegional, noRepeatsInLocusTimesOfRegionalsToMerge)
}

(
tracingSubsetNel,
locusTimeSubsetNel,
Expand All @@ -199,6 +251,7 @@ object ImagingRoundsConfiguration extends LazyLogging:
proximityGroupingSubsetNel,
proximityGroupingSupersetNel,
idsToMergeAreAllRegionalNel,
noRepeatsInLocusTimeSetsForRegionalTimesToMerge,
)
.tupled
// We ignore the acutal values (Unit) because this was just to accumulate errors.
Expand All @@ -207,7 +260,6 @@ object ImagingRoundsConfiguration extends LazyLogging:
}

private def mkStringTimepoints = (_: List[ImagingTimepoint]).sorted.map(_.show_).mkString(", ")


/**
* Read the configuration of imaging rounds for the experiment, including regional grouping and
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"proximityFilterStrategy": {
"semantic": "UniversalProximityPermission"
},
"mergeRulesForTracing": {
"discardRoisNotInGroupsOfInterest": true,
"distanceThreshold": 2000,
"requirementType": "Conjunctive",
"groups": [[0, 7], [8, 9]]
},
"imagingRounds": [
{"time": 0, "name": "pre_image", "isBlank": true},
{"time": 1, "probe": "Dp001"},
{"time": 2, "probe": "Dp002"},
{"time": 3, "probe": "Dp003"},
{"time": 4, "probe": "Dp006"},
{"time": 5, "probe": "Dp007"},
{"time": 6, "probe": "Dp101", "isRegional": true},
{"time": 7, "probe": "Dp102", "isRegional": true},
{"time": 8, "probe": "Dp103", "isRegional": true},
{"time": 9, "probe": "Dp104", "isRegional": true}
],
"locusGrouping": {
"7": [1, 3],
"8": [2, 4]
},
"tracingExclusions": [0, 6, 7, 8, 9]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"proximityFilterStrategy": {
"semantic": "UniversalProximityPermission"
},
"mergeRulesForTracing": {
"discardRoisNotInGroupsOfInterest": true,
"distanceThreshold": 2000,
"requirementType": "Conjunctive",
"groups": [[5, 7], [8, 9]]
},
"imagingRounds": [
{"time": 0, "name": "pre_image", "isBlank": true},
{"time": 1, "probe": "Dp001"},
{"time": 2, "probe": "Dp002"},
{"time": 3, "probe": "Dp003"},
{"time": 4, "probe": "Dp006"},
{"time": 5, "probe": "Dp007"},
{"time": 6, "probe": "Dp101", "isRegional": true},
{"time": 7, "probe": "Dp102", "isRegional": true},
{"time": 8, "probe": "Dp103", "isRegional": true},
{"time": 9, "probe": "Dp104", "isRegional": true}
],
"locusGrouping": {
"7": [1, 3],
"8": [2, 4]
},
"tracingExclusions": [0, 6, 7, 8, 9]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"proximityFilterStrategy": {
"semantic": "UniversalProximityPermission"
},
"mergeRulesForTracing": {
"discardRoisNotInGroupsOfInterest": true,
"distanceThreshold": 2000,
"requirementType": "Conjunctive",
"groups": [[6, 7], [8, 9]]
},
"imagingRounds": [
{"time": 0, "name": "pre_image", "isBlank": true},
{"time": 1, "probe": "Dp001"},
{"time": 2, "probe": "Dp002"},
{"time": 3, "probe": "Dp003"},
{"time": 4, "probe": "Dp006"},
{"time": 5, "probe": "Dp007"},
{"time": 6, "probe": "Dp101", "isRegional": true},
{"time": 7, "probe": "Dp102", "isRegional": true},
{"time": 8, "probe": "Dp103", "isRegional": true},
{"time": 9, "probe": "Dp104", "isRegional": true}
],
"locusGrouping": {
"6": [1, 3],
"7": [3, 5],
"8": [2],
"9": [4]
},
"tracingExclusions": [0, 6, 7, 8, 9]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"proximityFilterStrategy": {
"semantic": "UniversalProximityPermission"
},
"mergeRulesForTracing": {
"discardRoisNotInGroupsOfInterest": true,
"distanceThreshold": 2000,
"requirementType": "Conjunctive",
"groups": [[6, 7], [8, 9]]
},
"imagingRounds": [
{"time": 0, "name": "pre_image", "isBlank": true},
{"time": 1, "probe": "Dp001"},
{"time": 2, "probe": "Dp002"},
{"time": 3, "probe": "Dp003"},
{"time": 4, "probe": "Dp006"},
{"time": 5, "probe": "Dp007"},
{"time": 6, "probe": "Dp101", "isRegional": true},
{"time": 7, "probe": "Dp102", "isRegional": true},
{"time": 8, "probe": "Dp103", "isRegional": true},
{"time": 9, "probe": "Dp104", "isRegional": true}
],
"locusGrouping": {
"7": [1, 3, 5],
"8": [2, 4]
},
"tracingExclusions": [0, 6, 7, 8, 9]
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,34 @@ package at.ac.oeaw.imba.gerlich.looptrace

import scala.collection.SortedSet
import scala.util.{ Failure, Success, Try }
import scala.util.chaining.*
import cats.Eq
import cats.data.{ NonEmptyList, NonEmptySet }
import cats.syntax.all.*
import mouse.boolean.*
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.*
import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks

import at.ac.oeaw.imba.gerlich.gerlib.collections.AtLeast2
import at.ac.oeaw.imba.gerlich.gerlib.imaging.ImagingTimepoint
import at.ac.oeaw.imba.gerlich.gerlib.numeric.*

import at.ac.oeaw.imba.gerlich.looptrace.ImagingRoundsConfiguration.{
BuildError,
LocusGroup,
RoiPartnersRequirementType,
SelectiveProximityPermission,
SelectiveProximityProhibition,
TraceIdDefinitionAndFiltrationRule,
UniversalProximityPermission,
UniversalProximityProhibition,
}
import at.ac.oeaw.imba.gerlich.looptrace.syntax.all.*
import at.ac.oeaw.imba.gerlich.gerlib.geometry.EuclideanDistance
import at.ac.oeaw.imba.gerlich.looptrace.ImagingRoundsConfiguration.ProximityGroup
import cats.data.Validated.Invalid
import cats.data.Validated.Valid

/** Tests of examples of imaging rounds config files */
class TestImagingRoundsConfigurationExamplesParsability extends AnyFunSuite with ScalaCheckPropertyChecks with should.Matchers:
Expand Down Expand Up @@ -195,6 +205,77 @@ class TestImagingRoundsConfigurationExamplesParsability extends AnyFunSuite with
}
}

test("Simple canonical example of merge groups for tracing parses as expected."):
val configFile = getResourcePath(
subfolder = "DifferentTimepointRegionalMergeForTracing",
filename = "good_example__legitimate_tracing_merge_groups.json",
)
ImagingRoundsConfiguration.fromJsonFile(configFile) match {
case Left(errorMessages) => fail(
s"${errorMessages.length} error message(s) parsing config file $configFile: ${errorMessages.mkString_("; ")}"
)
case Right(conf) =>
val discardUngroupedMembersNel =
if conf.discardRoisNotInGroupsOfInterest
then ().validNel
else "Expected the discard of ungrouped regional timepoints to be true, but it's false".invalidNel
val mergeRulesNel = conf.mergeRules match {
case None => "No merge rules section".invalidNel
case Some(obsRules) =>
val expDistance =
import io.github.iltotore.iron.autoRefine
EuclideanDistance.Threshold(NonnegativeReal(2000))
val expReqType = RoiPartnersRequirementType.Conjunctive
val expRules =
NonEmptyList.of(Set(6, 7), Set(8, 9))
.map(_.map(ImagingTimepoint.unsafe).pipe(AtLeast2.unsafe))
.map{ g => TraceIdDefinitionAndFiltrationRule(ProximityGroup(expDistance, g), expReqType) }
given Eq[TraceIdDefinitionAndFiltrationRule] = Eq.fromUniversalEquals
if obsRules === expRules
then ().validNel
else f"Observed rules ($obsRules) don't match expected ($expRules)".invalidNel
}
(discardUngroupedMembersNel, mergeRulesNel).tupled match {
case Valid(a) => succeed
case Invalid(messages) => fail(f"${messages.length} failure(s): ${messages.mkString_("; ")}")
}
}

test("Any non-regional timepoint listed for tracing merger is an error."):
val failCases = Table(
("configFileName", "expectedMessage"),
("bad_example__locus_time_in_tracing_merge_groups.json", "1 non-regional time(s) in merge rules: 5"),
("bad_example__blank_time_in_tracing_merge_groups.json", "1 non-regional time(s) in merge rules: 0"),
)
forAll (failCases) { (configFileName, expectedMessage) =>
val configFile = getResourcePath(
subfolder = "DifferentTimepointRegionalMergeForTracing",
filename = configFileName,
)
checkParseError(configFile, expectedMessage) // Test exact equality of message and expectation.
}


test("Any overlap of locus timepoint sets for regional timepoints to merge is an error. #384"):
val configFile = getResourcePath(
subfolder = "DifferentTimepointRegionalMergeForTracing",
filename = "bad_example__overlap_locus_times_for_regional_times_to_merge__384.json",
)
checkParseError(
configFile,
"Regionals timepoints to merge for tracing map to overlapping locus timepoint sets",
check = (msg: String, exp: String) => msg.startsWith(exp) // Here we just to prefix check.
)

def checkParseError(configFile: os.Path, expectedMessage: String, check: (String, String) => Boolean = cats.Eq[String].eqv) =
ImagingRoundsConfiguration.fromJsonFile(configFile) match {
case Left(messages) =>
if messages.count(check(_, expectedMessage)) === 1
then succeed
else fail(s"No message parse fail message matched query ($expectedMessage); messages: ${messages.mkString_("; ")}")
case Right(_) => fail(s"Expected config parse to fail, but it succeeded on file $configFile")
}

private def getResourcePath(subfolder: String, filename: String): os.Path =
os.Path(getClass.getResource(s"/TestImagingRoundsConfiguration/$subfolder/$filename").getPath)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ class TestParseDifferentTimepointsRoiMergeSectionOfImagingRoundsConfig extends
messages.length shouldEqual 1
case Right(_) => fail("Expected parse failure but got success")
}

test("With no groups and no threhsold, no parse"):
pending

Expand Down

0 comments on commit 0c8177a

Please sign in to comment.