Skip to content

Commit

Permalink
fixed negative selection; inc version to 1.2.3
Browse files Browse the repository at this point in the history
  • Loading branch information
holgerbrandl committed Nov 7, 2017
1 parent a7de31e commit 0ba7fee
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 26 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ task javadocJar(type: Jar, dependsOn: javadoc) {
//group = 'de.mpicbg.scicomp'
group = 'com.github.holgerbrandl'
//version = '1.1.9'
version = '1.2.3-SNAPSHOT'
version = '1.2.3'


artifacts {
Expand Down
2 changes: 1 addition & 1 deletion src/main/kotlin/kscript/KscriptUtil.kt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import kotlin.system.exitProcess
*/

/**
* Just used interally to prevent [stopIfNot] to quit the process when running in unit-test mode.
* Just used internally to prevent [stopIfNot] to quit the process when running in unit-test mode.
* It throw an IllegalArgumentException instead.
*/
internal var isTestMode = false
Expand Down
39 changes: 25 additions & 14 deletions src/main/kotlin/kscript/text/Tables.kt
Original file line number Diff line number Diff line change
Expand Up @@ -83,18 +83,33 @@ fun List<Row>.print(separator: String = "\t") = asSequence().print(separator)
/** Internal representations for column selection indices. Usually not use directly but rather via [with] and [without].
*/
abstract class ColSelect(val indices: Array<Int> = emptyArray()) {

// irrespective of selection mode (positive or negative) indices must be positive in here
init {
stopIfNot(indices.all { it > 0 }) {
"kscript.text.* is using 1-based arrays to ease awk transition, so indices must be strictly positive"
}
}

abstract fun and(column: Int): ColSelect
abstract fun and(range: IntRange): ColSelect
abstract fun process(lines: Sequence<Row>): Sequence<Row>
}

class PosSelect(arrayOf: Array<Int>) : ColSelect(arrayOf) {
class PosSelect(columnIndices: Array<Int>) : ColSelect(columnIndices) {
override fun and(column: Int) = PosSelect(arrayOf(*indices, column))
override fun and(range: IntRange) = PosSelect(arrayOf(*indices, *range.toList().toTypedArray()))

override fun process(lines: Sequence<Row>): Sequence<Row> = lines.map { row -> Row(indices.map { row[it] }) }
}

class NegSelect(arrayOf: Array<Int>) : ColSelect(arrayOf) {
class NegSelect(columnIndices: Array<Int>) : ColSelect(columnIndices) {
override fun and(column: Int) = NegSelect(arrayOf(*indices, column))
override fun and(range: IntRange) = NegSelect(arrayOf(*indices, *range.toList().toTypedArray()))

override fun process(lines: Sequence<Row>): Sequence<Row> = lines.map {
Row(it.filterIndexed { index, _ -> !indices.contains(index+1) })
}
}

/** Starts building a column selection index. Both positive and negative indices are supported. */
Expand All @@ -116,24 +131,20 @@ fun Sequence<Row>.select(vararg colIndices: Int): Sequence<Row> {
"Can not mix positive and negative selections"
}

val selector = if (isPositive) PosSelect(arrayOf(*colIndices.toTypedArray())) else NegSelect(arrayOf(*colIndices.toTypedArray()))
val selector = if (isPositive) {
PosSelect(arrayOf(*colIndices.toTypedArray()))
} else {
NegSelect(arrayOf(*colIndices.map { -it }.toTypedArray()))
}

return select(selector)
}

fun Sequence<Row>.select(columns: ColSelect): Sequence<Row> {
fun Sequence<Row>.select(columnSelector: ColSelect): Sequence<Row> {
// more efficient but does not allow to change the order
// return map { it.filterIndexed { index, _ -> retainColumn(columns, index + 1) } }

stopIfNot(columns.indices.all { it != 0 }) { "kscript.text.* is using 1-based arrays to ease awk transition" }
// return map { it.filterIndexed { index, _ -> retainColumn(columnSelector, index + 1) } }

return if (columns is PosSelect) {
// positive selection
map { row -> Row(columns.indices.map { row[it] }) }
} else {
// negative selection
map { Row(it.filterIndexed { index, _ -> !columns.indices.contains(index) }) }
}
return columnSelector.process(this)
}


Expand Down
36 changes: 26 additions & 10 deletions src/test/kotlin/kscript/test/SupportApiTest.kt
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package kscript.test

import io.kotlintest.matchers.*
import io.kotlintest.specs.StringSpec
import kscript.isTestMode
import kscript.text.*
import kscript.text.resolveArgFile
import kscript.text.select
import kscript.text.split
import kscript.text.with
import org.junit.Test

/**
* @author Holger Brandl
Expand All @@ -16,12 +18,15 @@ fun flightsZipped() = resolveArgFile(arrayOf("src/test/resources/flights.tsv.gz"
fun flights() = resolveArgFile(arrayOf("src/test/resources/flights.txt"))


class SupportApiTest : StringSpec() { init {
class SupportApiTest {

isTestMode = true
init {
kscript.isTestMode = true
}


"extract field with column filter" {
@Test
fun `extract field with column filter`() {
someFlights().split().
filter { it[12] == "N14228" }.
map { it[13] }.
Expand All @@ -33,23 +38,34 @@ class SupportApiTest : StringSpec() { init {
}


"allow to select columsn" {
@Test
fun `allow to select column`() {
someFlights().split()
.select(with(3).and(11..13).and(1))
.first().data shouldBe listOf("day", "flight", "tailnum", "origin", "year")
}


"rejeced mixed select" {
@Test
fun `is should perform a negative selection`() {
someFlights().split()
.select(1, 2, 3)
.select(-2)
.first().data shouldBe listOf("year", "day")
}


@Test
fun `rejeced mixed select`() {
shouldThrow<IllegalArgumentException> {
someFlights().split().select(1, -2)
}.message shouldBe "[ERROR] Can not mix positive and negative selections"
}


"compressed lines should be unzipped on the fly"{
@Test
fun `compressed lines should be unzipped on the fly`() {
resolveArgFile(arrayOf("src/test/resources/flights.tsv.gz")).
drop(1).first() should startWith("2013")
}
}
}

0 comments on commit 0ba7fee

Please sign in to comment.