From acafb6d6fd47471f761a42e5116b3428ce3c1929 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 1 Aug 2024 10:39:29 +0000 Subject: [PATCH] Automated commit of generated code --- .../jetbrains/kotlinx/dataframe/impl/Utils.kt | 10 ++- .../kotlinx/dataframe/impl/api/convertTo.kt | 62 +++++++++++++------ .../kotlinx/dataframe/impl/schema/Utils.kt | 29 ++++++--- 3 files changed, 70 insertions(+), 31 deletions(-) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt index 08490f973..21f8bb639 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt @@ -121,7 +121,7 @@ internal fun Iterable.anyNull(): Boolean = any { it == null } internal fun emptyPath(): ColumnPath = ColumnPath(emptyList()) @PublishedApi -internal fun KClass.zero(): T = +internal fun KClass.zeroOrNull(): T? = when (this) { Int::class -> 0 as T Byte::class -> 0.toByte() as T @@ -131,10 +131,14 @@ internal fun KClass.zero(): T = Float::class -> 0.toFloat() as T BigDecimal::class -> BigDecimal.ZERO as T BigInteger::class -> BigInteger.ZERO as T - Number::class -> 0 as T - else -> TODO() + Number::class -> 0 as? T + else -> null } +@PublishedApi +internal fun KClass.zero(): T = + zeroOrNull() ?: throw NotImplementedError("Zero value for $this is not supported") + internal fun catchSilent(body: () -> T): T? = try { body() diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt index 7e4c1a31f..b958fa050 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt @@ -1,5 +1,6 @@ package org.jetbrains.kotlinx.dataframe.impl.api +import io.github.oshai.kotlinlogging.KotlinLogging import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.AnyRow import org.jetbrains.kotlinx.dataframe.ColumnsSelector @@ -11,13 +12,13 @@ import org.jetbrains.kotlinx.dataframe.api.ConvertSchemaDsl import org.jetbrains.kotlinx.dataframe.api.ConverterScope import org.jetbrains.kotlinx.dataframe.api.ExcessiveColumns import org.jetbrains.kotlinx.dataframe.api.Infer +import org.jetbrains.kotlinx.dataframe.api.add import org.jetbrains.kotlinx.dataframe.api.all import org.jetbrains.kotlinx.dataframe.api.allNulls import org.jetbrains.kotlinx.dataframe.api.asColumnGroup import org.jetbrains.kotlinx.dataframe.api.concat import org.jetbrains.kotlinx.dataframe.api.convertTo import org.jetbrains.kotlinx.dataframe.api.emptyDataFrame -import org.jetbrains.kotlinx.dataframe.api.getColumnPaths import org.jetbrains.kotlinx.dataframe.api.isEmpty import org.jetbrains.kotlinx.dataframe.api.map import org.jetbrains.kotlinx.dataframe.api.name @@ -29,12 +30,14 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.columns.ColumnPath import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.exceptions.ExcessiveColumnsException import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException import org.jetbrains.kotlinx.dataframe.impl.emptyPath -import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyColumn +import org.jetbrains.kotlinx.dataframe.impl.getColumnPaths import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrame +import org.jetbrains.kotlinx.dataframe.impl.schema.createNullFilledColumn import org.jetbrains.kotlinx.dataframe.impl.schema.extractSchema import org.jetbrains.kotlinx.dataframe.impl.schema.render import org.jetbrains.kotlinx.dataframe.kind @@ -45,6 +48,8 @@ import kotlin.reflect.KType import kotlin.reflect.full.withNullability import kotlin.reflect.jvm.jvmErasure +private val logger = KotlinLogging.logger {} + private open class Converter(val transform: ConverterScope.(Any?) -> Any?, val skipNulls: Boolean) private class Filler(val columns: ColumnsSelector<*, *>, val expr: RowExpression<*, *>) @@ -252,22 +257,16 @@ internal fun AnyFrame.convertToImpl( } }.toMutableList() - // when the target is nullable but the source does not contain a column, fill it in with nulls / empty dataframes + // when the target is nullable but the source does not contain a column, + // fill it in with nulls / empty dataframes val size = this.size.nrow schema.columns.forEach { (name, targetColumn) -> - val isNullable = - // like value column of type Int? - targetColumn.nullable || - // like value column of type Int? (backup check) - targetColumn.type.isMarkedNullable || - // like DataRow for a group column (all columns in the group will be nullable) - targetColumn.contentType?.isMarkedNullable == true || - // frame column can be filled with empty dataframes - targetColumn.kind == ColumnKind.Frame - if (name !in visited) { - newColumns += targetColumn.createEmptyColumn(name, size) - if (!isNullable) { + try { + newColumns += targetColumn.createNullFilledColumn(name, size) + } catch (e: IllegalStateException) { + logger.debug(e) { "" } + // if this could not be done automatically, they need to be filled manually missingPaths.add(path + name) } } @@ -279,14 +278,39 @@ internal fun AnyFrame.convertToImpl( val marker = MarkersExtractor.get(clazz) var result = convertToSchema(marker.schema, emptyPath()) + /* + * Here we handle all registered fillers of the user. + * Fillers are registered in the DSL like: + * ```kt + * df.convertTo { + * fill { col1 and col2 }.with { something } + * fill { col3 }.with { somethingElse } + * } + * ``` + * Users can use this to fill up any column that was missing during the conversion. + * They can also fill up and thus overwrite any existing column here. + */ dsl.fillers.forEach { filler -> - val paths = result.getColumnPaths(filler.columns) - missingPaths.removeAll(paths.toSet()) - result = result.update { paths.toColumnSet() }.with { - filler.expr(this, this) + // get all paths from the `fill { col1 and col2 }` part + val paths = result.getColumnPaths(UnresolvedColumnsPolicy.Create, filler.columns).toSet() + + // split the paths into those that are already in the df and those that are missing + val (newPaths, existingPaths) = paths.partition { it in missingPaths } + + // first fill cols that are already in the df using the `with {}` part of the dsl + result = result.update { existingPaths.toColumnSet() }.with { filler.expr(this, this) } + + // then create any missing ones by filling using the `with {}` part of the dsl + result = newPaths.fold(result) { df, newPath -> + df.add(newPath, Infer.Type) { filler.expr(this, this) } } + + // remove the paths that are now filled + missingPaths -= paths } + // Inform the user which target columns could not be created in the conversion + // The user will need to supply extra information for these, like `fill {}` them. if (missingPaths.isNotEmpty()) { throw IllegalArgumentException( "The following columns were not found in DataFrame: ${ diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt index d8c806953..48ff46f46 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt @@ -102,6 +102,9 @@ internal fun AnyCol.extractSchema(): ColumnSchema = @PublishedApi internal fun getSchema(kClass: KClass<*>): DataFrameSchema = MarkersExtractor.get(kClass).schema +/** + * Create "empty" column based on the toplevel of [this] [ColumnSchema]. + */ internal fun ColumnSchema.createEmptyColumn(name: String): AnyCol = when (this) { is ColumnSchema.Value -> DataColumn.createValueColumn(name, emptyList(), type) @@ -110,14 +113,22 @@ internal fun ColumnSchema.createEmptyColumn(name: String): AnyCol = else -> error("Unexpected ColumnSchema: $this") } -/** Create "empty" column, filled with either null or empty dataframes. */ -internal fun ColumnSchema.createEmptyColumn(name: String, numberOfRows: Int): AnyCol = +/** + * Creates a column based on [this] [ColumnSchema] filled with `null` or empty dataframes. + * @throws IllegalStateException if the column is not nullable and [numberOfRows]` > 0`. + */ +internal fun ColumnSchema.createNullFilledColumn(name: String, numberOfRows: Int): AnyCol = when (this) { - is ColumnSchema.Value -> DataColumn.createValueColumn( - name = name, - values = List(numberOfRows) { null }, - type = type, - ) + is ColumnSchema.Value -> { + if (!type.isMarkedNullable && numberOfRows > 0) { + error("Cannot create a null-filled value column of type $type as it's not nullable.") + } + DataColumn.createValueColumn( + name = name, + values = List(numberOfRows) { null }, + type = type, + ) + } is ColumnSchema.Group -> DataColumn.createColumnGroup( name = name, @@ -130,7 +141,7 @@ internal fun ColumnSchema.createEmptyColumn(name: String, numberOfRows: Int): An schema = lazyOf(schema), ) - else -> error("Unexpected ColumnSchema: $this") + else -> error("Cannot create null-filled column of unexpected ColumnSchema: $this") } internal fun DataFrameSchema.createEmptyDataFrame(): AnyFrame = @@ -143,7 +154,7 @@ internal fun DataFrameSchema.createEmptyDataFrame(numberOfRows: Int): AnyFrame = DataFrame.empty(numberOfRows) } else { columns.map { (name, schema) -> - schema.createEmptyColumn(name, numberOfRows) + schema.createNullFilledColumn(name, numberOfRows) }.toDataFrame() }