Skip to content

Commit

Permalink
Support for encoding/decoding a sequence of values (#20)
Browse files Browse the repository at this point in the history
* Support for streaming via Reader and Appendable
* Handle Microsoft Excel's insistence on using a byte order marker
* Cleaning up new unit tests for FetchSourceTest
* Removed commented debugging println's for FetchSource
* Sequence encoding and decoding
* Streaming serialization bug fix
* Cleanup formatting
* Refactor asynchronous API to `CsvRecordReader` and `CsvRecordWriter`
* Fix blocking initialization of readers

---------

Co-authored-by: Sven Obser <[email protected]>
  • Loading branch information
UnknownJoe796 and Sven Obser authored Dec 13, 2024
1 parent 5a358d8 commit bf89d3c
Show file tree
Hide file tree
Showing 8 changed files with 304 additions and 80 deletions.
2 changes: 2 additions & 0 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@
dokka = "1.9.20"
junit-jupiter = "5.11.0"
kotlin = "2.1.0"
kotlinx-coroutines = "1.9.0"
kotlinx-serialization-core = "1.7.3"
nexus-publish = "0.4.0"
nexus-staging = "0.30.0"
researchgate-release = "3.0.2"

[libraries]
junit-jupiter = { module = "org.junit.jupiter:junit-jupiter", version.ref = "junit-jupiter" }
kotlinx-coroutines-test = { module = "org.jetbrains.kotlinx:kotlinx-coroutines-test", version.ref = "kotlinx-coroutines" }
kotlinx-serialization-core = { module = "org.jetbrains.kotlinx:kotlinx-serialization-core", version.ref = "kotlinx-serialization-core" }

[plugins]
Expand Down
1 change: 1 addition & 0 deletions library/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies {

testImplementation(kotlin("test-junit5"))
testImplementation(libs.junit.jupiter)
testImplementation(libs.kotlinx.coroutines.test)
}

kotlin {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package kotlinx.serialization.csv

import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.KSerializer
import kotlinx.serialization.builtins.ListSerializer
import kotlinx.serialization.csv.decode.CsvReader
import kotlinx.serialization.csv.decode.FetchSource
import kotlinx.serialization.csv.decode.RecordListCsvDecoder
import kotlinx.serialization.encoding.CompositeDecoder.Companion.DECODE_DONE
import java.io.Reader

/**
* Record reader that allows reading CSV line-by-line.
*/
interface CsvRecordReader<T : Any> : Iterator<T> {
/**
* Read next record
*/
fun read(): T? = if (hasNext()) next() else null
}

/**
* Parse CSV line-by-line from the given [input].
*
* @param deserializer The deserializer used to parse the given CSV string.
* @param input The CSV reader to parse. This function *does not close the reader*.
*/
@ExperimentalSerializationApi
fun <T : Any> Csv.recordReader(deserializer: KSerializer<T>, input: Reader): CsvRecordReader<T> {
val decoder = RecordListCsvDecoder(
csv = this,
reader = CsvReader(FetchSource(input), config)
)
val listDescriptor = ListSerializer(deserializer).descriptor
var previousValue: T? = null

return object : CsvRecordReader<T> {
override fun hasNext(): Boolean =
decoder.decodeElementIndex(listDescriptor) != DECODE_DONE

override fun next(): T {
val index = decoder.decodeElementIndex(listDescriptor)
return decoder.decodeSerializableElement(listDescriptor, index, deserializer, previousValue).also {
previousValue = it
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package kotlinx.serialization.csv

import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.KSerializer
import kotlinx.serialization.csv.encode.CsvWriter
import kotlinx.serialization.csv.encode.RecordListCsvEncoder

/**
* Record writer that allows writing CSV line by line.
*/
fun interface CsvRecordWriter<T : Any> {
/**
* Write next record.
*/
fun write(record: T)
}

/**
* Create [CsvRecordWriter] that allows writing CSV line-by-line.
*
* @param serializer The serializer used to serialize the given object.
* @param output The output where the CSV will be written.
*/
@ExperimentalSerializationApi
fun <T : Any> Csv.recordWriter(serializer: KSerializer<T>, output: Appendable): CsvRecordWriter<T> {
val encoder = RecordListCsvEncoder(
csv = this,
writer = CsvWriter(output, config)
)

return CsvRecordWriter {
encoder.encodeSerializableValue(serializer, it)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,17 @@ import kotlinx.serialization.csv.config.CsvConfig
/**
* Reader that parses CSV input.
*/
internal class CsvReader(private val source: Source, private val config: CsvConfig) {
internal class CsvReader(source: Source, private val config: CsvConfig) {

private val source: Source by lazy {
source.also {
// Skip Microsoft Excel's byte order marker, should it appear.
// This has to happen lazily to avoid blocking read calls during the initialization of the CsvReader.
if (source.peek() == '\uFEFF') {
source.read()
}
}
}

val offset
get() = source.offset
Expand All @@ -21,11 +31,6 @@ internal class CsvReader(private val source: Source, private val config: CsvConf

private var marks = arrayListOf<Int>()

init {
// Skip Microsoft Excel's byte order marker, should it appear:
read("\uFEFF")
}

/**
* Read value in the next column.
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package kotlinx.serialization.csv.decode

import java.io.EOFException
import java.io.Reader

internal class FetchSource(
Expand All @@ -20,18 +21,27 @@ internal class FetchSource(
override var offset: Int = 0
private set

private var next: Char? = getChar()
private var queue = ArrayList<Char>(2048)
private var marks = ArrayList<Int>(2048)
private var queueOffset = 0

private var next: Char? = null
get() {
if (field == null && nextPosition == 0) {
// Reading first char has to happen lazily to avoid blocking read calls
// during the initialization of the FetchSource.
field = getChar()
}
return field
}

private fun nextChar(): Char {
val n = next ?: throw IllegalStateException("Out of characters")
val nextChar = next ?: throw EOFException("No more characters to read.")
next = getChar()
nextPosition++
return n
return nextChar
}

private var queue = ArrayList<Char>(2048)
private var marks = ArrayList<Int>(2048)
private var queueOffset = 0

override fun canRead(): Boolean = offset <= nextPosition

override fun read(): Char? {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import kotlinx.serialization.csv.config.QuoteMode
* To write one CSV record, call [beginRecord], followed by multiple calls to [printColumn] and
* finally call [endRecord] to finish the record.
*/
internal class CsvWriter(private val sb: Appendable, private val config: CsvConfig) {
internal class CsvWriter(private val output: Appendable, private val config: CsvConfig) {

var isFirstRecord = true
private var isFirstColumn = true
Expand All @@ -20,7 +20,7 @@ internal class CsvWriter(private val sb: Appendable, private val config: CsvConf
*/
fun beginRecord() {
if (!isFirstRecord) {
sb.append(config.recordSeparator)
output.append(config.recordSeparator)
}
}

Expand Down Expand Up @@ -64,19 +64,19 @@ internal class CsvWriter(private val sb: Appendable, private val config: CsvConf
escapeCharacters = "$escapeChar$delimiter$quoteChar$recordSeparator",
escapeChar = escapeChar
)
sb.append(escapedValue)
output.append(escapedValue)
} else if (mode == WriteMode.QUOTED || mode == WriteMode.ESCAPED) {
val escapedValue = value.replace("$quoteChar", "$quoteChar$quoteChar")
sb.append(quoteChar).append(escapedValue).append(quoteChar)
output.append(quoteChar).append(escapedValue).append(quoteChar)
} else {
sb.append(value)
output.append(value)
}
}

/** End the current column (which writes the column delimiter). */
private fun nextColumn() {
if (!isFirstColumn) {
sb.append(config.delimiter)
output.append(config.delimiter)
}
isFirstColumn = false
}
Expand Down
Loading

0 comments on commit bf89d3c

Please sign in to comment.