From 41577dfd2950490f21c45bfe63833f93708501cc Mon Sep 17 00:00:00 2001 From: Aleksei Zinovev Date: Fri, 19 Apr 2024 18:40:22 +0200 Subject: [PATCH] Add separator parameter to DataFrame.flatten (#667) Added a 'separator' parameter to the DataFrame.flatten function to customize the separator used in column names when 'keepParentNameForColumns' is true. This allows greater flexibility in formatting column names. Tests have been updated accordingly to check for proper functionality. --- .../kotlinx/dataframe/api/flatten.kt | 39 +++++++++++++------ .../kotlinx/dataframe/impl/api/flatten.kt | 5 ++- .../kotlinx/dataframe/api/flatten.kt | 12 +++++- .../kotlinx/dataframe/api/flatten.kt | 39 +++++++++++++------ .../kotlinx/dataframe/impl/api/flatten.kt | 5 ++- .../kotlinx/dataframe/api/flatten.kt | 12 +++++- 6 files changed, 84 insertions(+), 28 deletions(-) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt index e18411d72..ecbd09a3b 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt @@ -9,16 +9,33 @@ import kotlin.reflect.KProperty // region DataFrame -public fun DataFrame.flatten(keepParentNameForColumns: Boolean = false): DataFrame = flatten(keepParentNameForColumns) { all() } - -public fun DataFrame.flatten(keepParentNameForColumns: Boolean = false, columns: ColumnsSelector): DataFrame = flattenImpl(columns, keepParentNameForColumns) - -public fun DataFrame.flatten(vararg columns: String, keepParentNameForColumns: Boolean = false): DataFrame = flatten(keepParentNameForColumns) { columns.toColumnSet() } - -public fun DataFrame.flatten(vararg columns: ColumnReference, keepParentNameForColumns: Boolean = false): DataFrame = - flatten(keepParentNameForColumns) { columns.toColumnSet() } - -public fun DataFrame.flatten(vararg columns: KProperty, keepParentNameForColumns: Boolean = false): DataFrame = - flatten(keepParentNameForColumns) { columns.toColumnSet() } +public fun DataFrame.flatten(keepParentNameForColumns: Boolean = false, separator: String = "."): DataFrame = + flatten(keepParentNameForColumns, separator) { all() } + +public fun DataFrame.flatten( + keepParentNameForColumns: Boolean = false, + separator: String = ".", + columns: ColumnsSelector +): DataFrame = flattenImpl(columns, keepParentNameForColumns, separator) + +public fun DataFrame.flatten( + vararg columns: String, + keepParentNameForColumns: Boolean = false, + separator: String = "." +): DataFrame = flatten(keepParentNameForColumns, separator) { columns.toColumnSet() } + +public fun DataFrame.flatten( + vararg columns: ColumnReference, + keepParentNameForColumns: Boolean = false, + separator: String = "." +): DataFrame = + flatten(keepParentNameForColumns, separator) { columns.toColumnSet() } + +public fun DataFrame.flatten( + vararg columns: KProperty, + keepParentNameForColumns: Boolean = false, + separator: String = "." +): DataFrame = + flatten(keepParentNameForColumns, separator) { columns.toColumnSet() } // endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt index 0608fae03..706e8d95c 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt @@ -14,7 +14,8 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet internal fun DataFrame.flattenImpl( columns: ColumnsSelector, - keepParentNameForColumns: Boolean = false + keepParentNameForColumns: Boolean = false, + separator: String = ".", ): DataFrame { val rootColumns = getColumnsWithPaths { columns.toColumnSet().filter { it.isColumnGroup() }.simplify() @@ -32,7 +33,7 @@ internal fun DataFrame.flattenImpl( .into { val targetPath = getRootPrefix(it.path).dropLast(1) val nameGen = nameGenerators[targetPath]!! - val preferredName = if (keepParentNameForColumns) "${it.name()}.${it.parentName}" else it.name() + val preferredName = if (keepParentNameForColumns) "${it.parentName}${separator}${it.name()}" else it.name() val name = nameGen.addUnique(preferredName) targetPath + name } diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt index 9a2bc7d0e..3fbb0abab 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt @@ -95,7 +95,17 @@ class FlattenTests { aggregate .flatten(keepParentNameForColumns = true) - .columnNames() shouldBe listOf("city", "age.mean", "weight.mean", "age.std", "weight.std") + .columnNames() shouldBe listOf("city", "mean.age", "mean.weight", "std.age", "std.weight") + + aggregate + .flatten(keepParentNameForColumns = true, separator = "_happy_separator_") + .columnNames() shouldBe listOf( + "city", + "mean_happy_separator_age", + "mean_happy_separator_weight", + "std_happy_separator_age", + "std_happy_separator_weight" + ) } @DataSchema diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt index e18411d72..ecbd09a3b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt @@ -9,16 +9,33 @@ import kotlin.reflect.KProperty // region DataFrame -public fun DataFrame.flatten(keepParentNameForColumns: Boolean = false): DataFrame = flatten(keepParentNameForColumns) { all() } - -public fun DataFrame.flatten(keepParentNameForColumns: Boolean = false, columns: ColumnsSelector): DataFrame = flattenImpl(columns, keepParentNameForColumns) - -public fun DataFrame.flatten(vararg columns: String, keepParentNameForColumns: Boolean = false): DataFrame = flatten(keepParentNameForColumns) { columns.toColumnSet() } - -public fun DataFrame.flatten(vararg columns: ColumnReference, keepParentNameForColumns: Boolean = false): DataFrame = - flatten(keepParentNameForColumns) { columns.toColumnSet() } - -public fun DataFrame.flatten(vararg columns: KProperty, keepParentNameForColumns: Boolean = false): DataFrame = - flatten(keepParentNameForColumns) { columns.toColumnSet() } +public fun DataFrame.flatten(keepParentNameForColumns: Boolean = false, separator: String = "."): DataFrame = + flatten(keepParentNameForColumns, separator) { all() } + +public fun DataFrame.flatten( + keepParentNameForColumns: Boolean = false, + separator: String = ".", + columns: ColumnsSelector +): DataFrame = flattenImpl(columns, keepParentNameForColumns, separator) + +public fun DataFrame.flatten( + vararg columns: String, + keepParentNameForColumns: Boolean = false, + separator: String = "." +): DataFrame = flatten(keepParentNameForColumns, separator) { columns.toColumnSet() } + +public fun DataFrame.flatten( + vararg columns: ColumnReference, + keepParentNameForColumns: Boolean = false, + separator: String = "." +): DataFrame = + flatten(keepParentNameForColumns, separator) { columns.toColumnSet() } + +public fun DataFrame.flatten( + vararg columns: KProperty, + keepParentNameForColumns: Boolean = false, + separator: String = "." +): DataFrame = + flatten(keepParentNameForColumns, separator) { columns.toColumnSet() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt index 0608fae03..706e8d95c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt @@ -14,7 +14,8 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet internal fun DataFrame.flattenImpl( columns: ColumnsSelector, - keepParentNameForColumns: Boolean = false + keepParentNameForColumns: Boolean = false, + separator: String = ".", ): DataFrame { val rootColumns = getColumnsWithPaths { columns.toColumnSet().filter { it.isColumnGroup() }.simplify() @@ -32,7 +33,7 @@ internal fun DataFrame.flattenImpl( .into { val targetPath = getRootPrefix(it.path).dropLast(1) val nameGen = nameGenerators[targetPath]!! - val preferredName = if (keepParentNameForColumns) "${it.name()}.${it.parentName}" else it.name() + val preferredName = if (keepParentNameForColumns) "${it.parentName}${separator}${it.name()}" else it.name() val name = nameGen.addUnique(preferredName) targetPath + name } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt index 9a2bc7d0e..3fbb0abab 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt @@ -95,7 +95,17 @@ class FlattenTests { aggregate .flatten(keepParentNameForColumns = true) - .columnNames() shouldBe listOf("city", "age.mean", "weight.mean", "age.std", "weight.std") + .columnNames() shouldBe listOf("city", "mean.age", "mean.weight", "std.age", "std.weight") + + aggregate + .flatten(keepParentNameForColumns = true, separator = "_happy_separator_") + .columnNames() shouldBe listOf( + "city", + "mean_happy_separator_age", + "mean_happy_separator_weight", + "std_happy_separator_age", + "std_happy_separator_weight" + ) } @DataSchema