-
Notifications
You must be signed in to change notification settings - Fork 61
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
some implementation refactoring of csv
- Loading branch information
1 parent
c811d70
commit ae8ce9c
Showing
466 changed files
with
93,899 additions
and
84 deletions.
There are no files selected for viewing
86 changes: 86 additions & 0 deletions
86
core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/ColumnsContainer.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
package org.jetbrains.kotlinx.dataframe | ||
|
||
import org.jetbrains.kotlinx.dataframe.api.ColumnSelectionDsl | ||
import org.jetbrains.kotlinx.dataframe.api.asColumnGroup | ||
import org.jetbrains.kotlinx.dataframe.api.cast | ||
import org.jetbrains.kotlinx.dataframe.api.castFrameColumn | ||
import org.jetbrains.kotlinx.dataframe.api.getColumn | ||
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup | ||
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath | ||
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference | ||
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn | ||
import org.jetbrains.kotlinx.dataframe.impl.columnName | ||
import org.jetbrains.kotlinx.dataframe.impl.columns.asAnyFrameColumn | ||
import kotlin.reflect.KProperty | ||
|
||
/** | ||
* Provides access to [columns][DataColumn]. | ||
* | ||
* Base interface for [DataFrame] and [ColumnSelectionDsl] | ||
* | ||
* @param T Schema marker. Used to generate extension properties for typed column access. | ||
*/ | ||
public interface ColumnsContainer<out T> { | ||
|
||
// region columns | ||
|
||
public fun columns(): List<AnyCol> | ||
|
||
public fun columnsCount(): Int | ||
|
||
public fun containsColumn(name: String): Boolean | ||
|
||
public fun containsColumn(path: ColumnPath): Boolean | ||
|
||
public fun getColumnIndex(name: String): Int | ||
|
||
// endregion | ||
|
||
// region getColumnOrNull | ||
|
||
public fun getColumnOrNull(name: String): AnyCol? | ||
|
||
public fun getColumnOrNull(index: Int): AnyCol? | ||
|
||
public fun <R> getColumnOrNull(column: ColumnReference<R>): DataColumn<R>? | ||
|
||
public fun <R> getColumnOrNull(column: KProperty<R>): DataColumn<R>? | ||
|
||
public fun getColumnOrNull(path: ColumnPath): AnyCol? | ||
|
||
public fun <R> getColumnOrNull(column: ColumnSelector<T, R>): DataColumn<R>? | ||
|
||
// endregion | ||
|
||
// region get | ||
|
||
public operator fun get(columnName: String): AnyCol = getColumn(columnName) | ||
|
||
public operator fun get(columnPath: ColumnPath): AnyCol = getColumn(columnPath) | ||
|
||
public operator fun <R> get(column: DataColumn<R>): DataColumn<R> = getColumn(column.name()).cast() | ||
|
||
public operator fun <R> get(column: DataColumn<DataRow<R>>): ColumnGroup<R> = getColumn(column) | ||
|
||
public operator fun <R> get(column: DataColumn<DataFrame<R>>): FrameColumn<R> = getColumn(column) | ||
|
||
public operator fun <R> get(column: ColumnReference<R>): DataColumn<R> = getColumn(column) | ||
|
||
public operator fun <R> get(column: ColumnReference<DataRow<R>>): ColumnGroup<R> = getColumn(column) | ||
|
||
public operator fun <R> get(column: ColumnReference<DataFrame<R>>): FrameColumn<R> = getColumn(column) | ||
|
||
public operator fun <R> get(column: KProperty<R>): DataColumn<R> = get(column.columnName).cast() | ||
|
||
public operator fun <R> get(column: KProperty<DataRow<R>>): ColumnGroup<R> = | ||
get(column.columnName).asColumnGroup().cast() | ||
|
||
public operator fun <R> get(column: KProperty<DataFrame<R>>): FrameColumn<R> = | ||
get(column.columnName).asAnyFrameColumn().castFrameColumn() | ||
|
||
public fun <C> get(columns: ColumnsSelector<T, C>): List<DataColumn<C>> | ||
|
||
public fun <C> get(column: ColumnSelector<T, C>): DataColumn<C> = get(column as ColumnsSelector<T, C>).single() | ||
|
||
// endregion | ||
} |
153 changes: 153 additions & 0 deletions
153
core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
package org.jetbrains.kotlinx.dataframe | ||
|
||
import org.jetbrains.kotlinx.dataframe.api.Infer | ||
import org.jetbrains.kotlinx.dataframe.api.asDataColumn | ||
import org.jetbrains.kotlinx.dataframe.api.cast | ||
import org.jetbrains.kotlinx.dataframe.api.concat | ||
import org.jetbrains.kotlinx.dataframe.api.filter | ||
import org.jetbrains.kotlinx.dataframe.api.map | ||
import org.jetbrains.kotlinx.dataframe.api.schema | ||
import org.jetbrains.kotlinx.dataframe.api.take | ||
import org.jetbrains.kotlinx.dataframe.columns.BaseColumn | ||
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup | ||
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind | ||
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath | ||
import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext | ||
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath | ||
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn | ||
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn | ||
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupImpl | ||
import org.jetbrains.kotlinx.dataframe.impl.columns.FrameColumnImpl | ||
import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnImpl | ||
import org.jetbrains.kotlinx.dataframe.impl.columns.addPath | ||
import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType | ||
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnKind | ||
import org.jetbrains.kotlinx.dataframe.impl.getValuesType | ||
import org.jetbrains.kotlinx.dataframe.impl.splitByIndices | ||
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema | ||
import kotlin.reflect.KClass | ||
import kotlin.reflect.KProperty | ||
import kotlin.reflect.KType | ||
import kotlin.reflect.typeOf | ||
|
||
/** | ||
* Column with [name] and [values] of specific [type]. | ||
* | ||
* Base interface for [ValueColumn] and [FrameColumn], but not for [ColumnGroup]. However, implementations for all three [column kinds][ColumnKind] derive from DataColumn and can cast to it safely. | ||
* Column operations that have signature clash with [DataFrame] API ([filter], [take], [map] etc.) are defined for [DataColumn] and not for [BaseColumn]. | ||
* | ||
* @param T type of values in the column. | ||
*/ | ||
public interface DataColumn<out T> : BaseColumn<T> { | ||
|
||
public companion object { | ||
|
||
/** | ||
* Creates [ValueColumn] using given [name], [values] and [type]. | ||
* | ||
* @param name name of the column | ||
* @param values list of column values | ||
* @param type type of the column | ||
* @param infer column type inference mode | ||
*/ | ||
public fun <T> createValueColumn( | ||
name: String, | ||
values: List<T>, | ||
type: KType, | ||
infer: Infer = Infer.None, | ||
defaultValue: T? = null, | ||
): ValueColumn<T> = ValueColumnImpl(values, name, getValuesType(values, type, infer), defaultValue) | ||
|
||
/** | ||
* Creates [ValueColumn] using given [name], [values] and reified column [type]. | ||
* | ||
* Note, that column [type] will be defined at compile-time using [T] argument | ||
* | ||
* @param T type of the column | ||
* @param name name of the column | ||
* @param values list of column values | ||
* @param infer column type inference mode | ||
*/ | ||
public inline fun <reified T> createValueColumn( | ||
name: String, | ||
values: List<T>, | ||
infer: Infer = Infer.None, | ||
): ValueColumn<T> = | ||
createValueColumn( | ||
name, | ||
values, | ||
getValuesType( | ||
values, | ||
typeOf<T>(), | ||
infer, | ||
), | ||
) | ||
|
||
public fun <T> createColumnGroup(name: String, df: DataFrame<T>): ColumnGroup<T> = ColumnGroupImpl(name, df) | ||
|
||
public fun <T> createFrameColumn(name: String, df: DataFrame<T>, startIndices: Iterable<Int>): FrameColumn<T> = | ||
FrameColumnImpl(name, df.splitByIndices(startIndices.asSequence()).toList(), lazy { df.schema() }) | ||
|
||
public fun <T> createFrameColumn( | ||
name: String, | ||
groups: List<DataFrame<T>>, | ||
schema: Lazy<DataFrameSchema>? = null, | ||
): FrameColumn<T> = FrameColumnImpl(name, groups, schema) | ||
|
||
public fun <T> createWithTypeInference( | ||
name: String, | ||
values: List<T>, | ||
nullable: Boolean? = null, | ||
): DataColumn<T> = guessColumnType(name, values, nullable = nullable) | ||
|
||
public fun <T> create( | ||
name: String, | ||
values: List<T>, | ||
type: KType, | ||
infer: Infer = Infer.None, | ||
): DataColumn<T> = | ||
when (type.toColumnKind()) { | ||
ColumnKind.Value -> createValueColumn(name, values, type, infer) | ||
ColumnKind.Group -> createColumnGroup(name, (values as List<AnyRow?>).concat()).asDataColumn().cast() | ||
ColumnKind.Frame -> createFrameColumn(name, values as List<AnyFrame>).asDataColumn().cast() | ||
} | ||
|
||
public inline fun <reified T> create(name: String, values: List<T>, infer: Infer = Infer.None): DataColumn<T> = | ||
create(name, values, typeOf<T>(), infer) | ||
|
||
public fun empty(name: String = ""): AnyCol = createValueColumn(name, emptyList<Unit>(), typeOf<Unit>()) | ||
} | ||
|
||
public fun hasNulls(): Boolean = type().isMarkedNullable | ||
|
||
override fun distinct(): DataColumn<T> | ||
|
||
override fun get(indices: Iterable<Int>): DataColumn<T> | ||
|
||
override fun rename(newName: String): DataColumn<T> | ||
|
||
override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath<T>? = this.addPath() | ||
|
||
override operator fun getValue(thisRef: Any?, property: KProperty<*>): DataColumn<T> = | ||
super.getValue(thisRef, property) as DataColumn<T> | ||
|
||
public operator fun iterator(): Iterator<T> = values().iterator() | ||
|
||
public override operator fun get(range: IntRange): DataColumn<T> | ||
} | ||
|
||
public val AnyCol.name: String get() = name() | ||
public val AnyCol.path: ColumnPath get() = path() | ||
|
||
public val <T> DataColumn<T>.values: Iterable<T> get() = values() | ||
public val AnyCol.hasNulls: Boolean get() = hasNulls() | ||
public val AnyCol.size: Int get() = size() | ||
public val AnyCol.indices: IntRange get() = indices() | ||
|
||
public val AnyCol.type: KType get() = type() | ||
public val AnyCol.kind: ColumnKind get() = kind() | ||
public val AnyCol.typeClass: KClass<*> | ||
get() = type.classifier as? KClass<*> | ||
?: error("Cannot cast ${type.classifier?.javaClass} to a ${KClass::class}. Column $name: $type") | ||
|
||
public fun AnyBaseCol.indices(): IntRange = 0 until size() |
135 changes: 135 additions & 0 deletions
135
core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
package org.jetbrains.kotlinx.dataframe | ||
|
||
import org.jetbrains.kotlinx.dataframe.aggregation.Aggregatable | ||
import org.jetbrains.kotlinx.dataframe.aggregation.AggregateGroupedBody | ||
import org.jetbrains.kotlinx.dataframe.annotations.HasSchema | ||
import org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl | ||
import org.jetbrains.kotlinx.dataframe.api.add | ||
import org.jetbrains.kotlinx.dataframe.api.cast | ||
import org.jetbrains.kotlinx.dataframe.api.getRows | ||
import org.jetbrains.kotlinx.dataframe.api.indices | ||
import org.jetbrains.kotlinx.dataframe.api.rows | ||
import org.jetbrains.kotlinx.dataframe.api.select | ||
import org.jetbrains.kotlinx.dataframe.api.toDataFrame | ||
import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy | ||
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet | ||
import org.jetbrains.kotlinx.dataframe.impl.DataFrameImpl | ||
import org.jetbrains.kotlinx.dataframe.impl.DataFrameSize | ||
import org.jetbrains.kotlinx.dataframe.impl.getColumnsImpl | ||
import org.jetbrains.kotlinx.dataframe.impl.headPlusArray | ||
import org.jetbrains.kotlinx.dataframe.impl.headPlusIterable | ||
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrame | ||
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrameOf | ||
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema | ||
import kotlin.reflect.KType | ||
|
||
/** | ||
* Readonly interface for an ordered list of [columns][DataColumn]. | ||
* | ||
* Columns in `DataFrame` have distinct non-empty [names][DataColumn.name] and equal [sizes][DataColumn.size]. | ||
* | ||
* @param T Schema marker. It identifies column schema and is used to generate schema-specific extension properties for typed data access. It is covariant, so `DataFrame<A>` is assignable to variable of type `DataFrame<B>` if `A` is a subtype of `B`. | ||
*/ | ||
@HasSchema(schemaArg = 0) | ||
public interface DataFrame<out T> : | ||
Aggregatable<T>, | ||
ColumnsContainer<T> { | ||
|
||
public companion object { | ||
public val Empty: AnyFrame = DataFrameImpl<Unit>(emptyList(), 0) | ||
|
||
public fun empty(nrow: Int = 0): AnyFrame = if (nrow == 0) Empty else DataFrameImpl<Unit>(emptyList(), nrow) | ||
|
||
/** | ||
* Creates a DataFrame with empty columns (rows = 0). | ||
* Can be used as a "null object" in aggregation operations, operations that work on columns (select, reorder, ...) | ||
* | ||
*/ | ||
public inline fun <reified T> emptyOf(): DataFrame<T> = createEmptyDataFrameOf(T::class).cast() | ||
|
||
/** | ||
* Creates a DataFrame with empty columns (rows = 0). | ||
* Can be used as a "null object" in aggregation operations, operations that work on columns (select, reorder, ...) | ||
*/ | ||
public fun empty(schema: DataFrameSchema): AnyFrame = schema.createEmptyDataFrame() | ||
} | ||
|
||
// region columns | ||
|
||
public fun columnNames(): List<String> | ||
|
||
public fun columnTypes(): List<KType> | ||
|
||
// endregion | ||
|
||
// region rows | ||
|
||
public fun rowsCount(): Int | ||
|
||
public operator fun iterator(): Iterator<DataRow<T>> = rows().iterator() | ||
|
||
// endregion | ||
|
||
public fun <R> aggregate(body: AggregateGroupedBody<T, R>): DataRow<T> | ||
|
||
// region get columns | ||
|
||
/** | ||
* Returns a list of columns selected by [columns], a [ColumnsSelectionDsl]. | ||
* | ||
* NOTE: This doesn't work in [ColumnsSelectionDsl], use [ColumnsSelectionDsl.cols] to select columns by predicate. | ||
*/ | ||
override fun <C> get(columns: ColumnsSelector<T, C>): List<DataColumn<C>> = | ||
getColumnsImpl(UnresolvedColumnsPolicy.Fail, columns) | ||
|
||
// endregion | ||
|
||
// region get rows | ||
|
||
public operator fun get(index: Int): DataRow<T> | ||
|
||
public operator fun get(indices: Iterable<Int>): DataFrame<T> = getRows(indices) | ||
|
||
public operator fun get(range: IntRange): DataFrame<T> = getRows(range) | ||
|
||
public operator fun get(first: IntRange, vararg ranges: IntRange): DataFrame<T> = | ||
getRows(headPlusArray(first, ranges).asSequence().flatMap { it.asSequence() }.asIterable()) | ||
|
||
public operator fun get(firstIndex: Int, vararg otherIndices: Int): DataFrame<T> = | ||
get(headPlusIterable(firstIndex, otherIndices.asIterable())) | ||
|
||
// endregion | ||
|
||
// region plus columns | ||
|
||
public operator fun plus(col: AnyBaseCol): DataFrame<T> = add(col) | ||
|
||
public operator fun plus(cols: Iterable<AnyBaseCol>): DataFrame<T> = (columns() + cols).toDataFrame().cast() | ||
|
||
// endregion | ||
} | ||
|
||
// region get columns | ||
|
||
/** | ||
* Returns a list of columns selected by [columns], a [ColumnsSelectionDsl]. | ||
*/ | ||
public operator fun <T, C> DataFrame<T>.get(columns: ColumnsSelector<T, C>): List<DataColumn<C>> = this.get(columns) | ||
|
||
public operator fun <T> DataFrame<T>.get(first: AnyColumnReference, vararg other: AnyColumnReference): DataFrame<T> = | ||
select { (listOf(first) + other).toColumnSet() } | ||
|
||
public operator fun <T> DataFrame<T>.get(first: String, vararg other: String): DataFrame<T> = | ||
select { (listOf(first) + other).toColumnSet() } | ||
|
||
public operator fun <T> DataFrame<T>.get(columnRange: ClosedRange<String>): DataFrame<T> = | ||
select { columnRange.start..columnRange.endInclusive } | ||
|
||
// endregion | ||
|
||
internal val ColumnsContainer<*>.ncol get() = columnsCount() | ||
internal val AnyFrame.nrow get() = rowsCount() | ||
internal val AnyFrame.indices get() = indices() | ||
internal val AnyFrame.size: DataFrameSize get() = size() | ||
|
||
public fun AnyFrame.size(): DataFrameSize = DataFrameSize(ncol, nrow) |
Oops, something went wrong.