From b5c614cc3942109ba517b7ec125703b5331f9c2e Mon Sep 17 00:00:00 2001 From: Laurent Perez Date: Thu, 17 Oct 2024 20:53:26 +0200 Subject: [PATCH] Implement TimeStampMicroTZVector for parquet isAdjustedToUTC timestamp columns #926 --- .../kotlinx/dataframe/io/arrowReadingImpl.kt | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt index f7c7eb940..ea835530c 100644 --- a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt +++ b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt @@ -1,11 +1,14 @@ package org.jetbrains.kotlinx.dataframe.io +import kotlinx.datetime.Instant import kotlinx.datetime.LocalDate import kotlinx.datetime.LocalDateTime import kotlinx.datetime.LocalTime +import kotlinx.datetime.TimeZone import kotlinx.datetime.toKotlinLocalDate import kotlinx.datetime.toKotlinLocalDateTime import kotlinx.datetime.toKotlinLocalTime +import kotlinx.datetime.toLocalDateTime import org.apache.arrow.memory.RootAllocator import org.apache.arrow.vector.BigIntVector import org.apache.arrow.vector.BitVector @@ -21,6 +24,7 @@ import org.apache.arrow.vector.LargeVarBinaryVector import org.apache.arrow.vector.LargeVarCharVector import org.apache.arrow.vector.NullVector import org.apache.arrow.vector.SmallIntVector +import org.apache.arrow.vector.TimeStampMicroTZVector import org.apache.arrow.vector.TimeMicroVector import org.apache.arrow.vector.TimeMilliVector import org.apache.arrow.vector.TimeNanoVector @@ -179,6 +183,16 @@ private fun TimeStampMicroVector.values(range: IntRange): List = } } +private fun TimeStampMicroTZVector.values(range: IntRange): List = + range.mapIndexed { i, it -> + if (isNull(i)) { + null + } else { + Instant.fromEpochMilliseconds(getObject(it) / 1000) + .toLocalDateTime(TimeZone.of(this.timeZone)) + } + } + private fun TimeStampMilliVector.values(range: IntRange): List = range.mapIndexed { i, it -> if (isNull(i)) { @@ -345,6 +359,8 @@ private fun readField(root: VectorSchemaRoot, field: Field, nullability: Nullabi is TimeStampMicroVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) + is TimeStampMicroTZVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) + is TimeStampMilliVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) is TimeStampSecVector -> vector.values(range).withTypeNullable(field.isNullable, nullability)