From 0ba7feeee0b70c0e1938bd3a8c401c1cd6972e95 Mon Sep 17 00:00:00 2001 From: Holger Brandl Date: Tue, 7 Nov 2017 09:53:48 +0100 Subject: [PATCH] fixed negative selection; inc version to 1.2.3 --- build.gradle | 2 +- src/main/kotlin/kscript/KscriptUtil.kt | 2 +- src/main/kotlin/kscript/text/Tables.kt | 39 ++++++++++++------- .../kotlin/kscript/test/SupportApiTest.kt | 36 ++++++++++++----- 4 files changed, 53 insertions(+), 26 deletions(-) diff --git a/build.gradle b/build.gradle index d4892ba..286210d 100644 --- a/build.gradle +++ b/build.gradle @@ -51,7 +51,7 @@ task javadocJar(type: Jar, dependsOn: javadoc) { //group = 'de.mpicbg.scicomp' group = 'com.github.holgerbrandl' //version = '1.1.9' -version = '1.2.3-SNAPSHOT' +version = '1.2.3' artifacts { diff --git a/src/main/kotlin/kscript/KscriptUtil.kt b/src/main/kotlin/kscript/KscriptUtil.kt index c53a2d5..2e5f5cd 100644 --- a/src/main/kotlin/kscript/KscriptUtil.kt +++ b/src/main/kotlin/kscript/KscriptUtil.kt @@ -7,7 +7,7 @@ import kotlin.system.exitProcess */ /** - * Just used interally to prevent [stopIfNot] to quit the process when running in unit-test mode. + * Just used internally to prevent [stopIfNot] to quit the process when running in unit-test mode. * It throw an IllegalArgumentException instead. */ internal var isTestMode = false diff --git a/src/main/kotlin/kscript/text/Tables.kt b/src/main/kotlin/kscript/text/Tables.kt index da942b8..321ca19 100644 --- a/src/main/kotlin/kscript/text/Tables.kt +++ b/src/main/kotlin/kscript/text/Tables.kt @@ -83,18 +83,33 @@ fun List.print(separator: String = "\t") = asSequence().print(separator) /** Internal representations for column selection indices. Usually not use directly but rather via [with] and [without]. */ abstract class ColSelect(val indices: Array = emptyArray()) { + + // irrespective of selection mode (positive or negative) indices must be positive in here + init { + stopIfNot(indices.all { it > 0 }) { + "kscript.text.* is using 1-based arrays to ease awk transition, so indices must be strictly positive" + } + } + abstract fun and(column: Int): ColSelect abstract fun and(range: IntRange): ColSelect + abstract fun process(lines: Sequence): Sequence } -class PosSelect(arrayOf: Array) : ColSelect(arrayOf) { +class PosSelect(columnIndices: Array) : ColSelect(columnIndices) { override fun and(column: Int) = PosSelect(arrayOf(*indices, column)) override fun and(range: IntRange) = PosSelect(arrayOf(*indices, *range.toList().toTypedArray())) + + override fun process(lines: Sequence): Sequence = lines.map { row -> Row(indices.map { row[it] }) } } -class NegSelect(arrayOf: Array) : ColSelect(arrayOf) { +class NegSelect(columnIndices: Array) : ColSelect(columnIndices) { override fun and(column: Int) = NegSelect(arrayOf(*indices, column)) override fun and(range: IntRange) = NegSelect(arrayOf(*indices, *range.toList().toTypedArray())) + + override fun process(lines: Sequence): Sequence = lines.map { + Row(it.filterIndexed { index, _ -> !indices.contains(index+1) }) + } } /** Starts building a column selection index. Both positive and negative indices are supported. */ @@ -116,24 +131,20 @@ fun Sequence.select(vararg colIndices: Int): Sequence { "Can not mix positive and negative selections" } - val selector = if (isPositive) PosSelect(arrayOf(*colIndices.toTypedArray())) else NegSelect(arrayOf(*colIndices.toTypedArray())) + val selector = if (isPositive) { + PosSelect(arrayOf(*colIndices.toTypedArray())) + } else { + NegSelect(arrayOf(*colIndices.map { -it }.toTypedArray())) + } return select(selector) } -fun Sequence.select(columns: ColSelect): Sequence { +fun Sequence.select(columnSelector: ColSelect): Sequence { // more efficient but does not allow to change the order - // return map { it.filterIndexed { index, _ -> retainColumn(columns, index + 1) } } - - stopIfNot(columns.indices.all { it != 0 }) { "kscript.text.* is using 1-based arrays to ease awk transition" } + // return map { it.filterIndexed { index, _ -> retainColumn(columnSelector, index + 1) } } - return if (columns is PosSelect) { - // positive selection - map { row -> Row(columns.indices.map { row[it] }) } - } else { - // negative selection - map { Row(it.filterIndexed { index, _ -> !columns.indices.contains(index) }) } - } + return columnSelector.process(this) } diff --git a/src/test/kotlin/kscript/test/SupportApiTest.kt b/src/test/kotlin/kscript/test/SupportApiTest.kt index 42012be..8394699 100644 --- a/src/test/kotlin/kscript/test/SupportApiTest.kt +++ b/src/test/kotlin/kscript/test/SupportApiTest.kt @@ -1,9 +1,11 @@ package kscript.test import io.kotlintest.matchers.* -import io.kotlintest.specs.StringSpec -import kscript.isTestMode -import kscript.text.* +import kscript.text.resolveArgFile +import kscript.text.select +import kscript.text.split +import kscript.text.with +import org.junit.Test /** * @author Holger Brandl @@ -16,12 +18,15 @@ fun flightsZipped() = resolveArgFile(arrayOf("src/test/resources/flights.tsv.gz" fun flights() = resolveArgFile(arrayOf("src/test/resources/flights.txt")) -class SupportApiTest : StringSpec() { init { +class SupportApiTest { - isTestMode = true + init { + kscript.isTestMode = true + } - "extract field with column filter" { + @Test + fun `extract field with column filter`() { someFlights().split(). filter { it[12] == "N14228" }. map { it[13] }. @@ -33,23 +38,34 @@ class SupportApiTest : StringSpec() { init { } - "allow to select columsn" { + @Test + fun `allow to select column`() { someFlights().split() .select(with(3).and(11..13).and(1)) .first().data shouldBe listOf("day", "flight", "tailnum", "origin", "year") } - "rejeced mixed select" { + @Test + fun `is should perform a negative selection`() { + someFlights().split() + .select(1, 2, 3) + .select(-2) + .first().data shouldBe listOf("year", "day") + } + + + @Test + fun `rejeced mixed select`() { shouldThrow { someFlights().split().select(1, -2) }.message shouldBe "[ERROR] Can not mix positive and negative selections" } - "compressed lines should be unzipped on the fly"{ + @Test + fun `compressed lines should be unzipped on the fly`() { resolveArgFile(arrayOf("src/test/resources/flights.tsv.gz")). drop(1).first() should startWith("2013") } -} } \ No newline at end of file