forked from opensearch-project/opensearch-spark
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add materialized view in Flint Spark API (opensearch-project#71)
* Implement MV metadata on refactored Flint metadata Signed-off-by: Chen Dai <[email protected]> * Split build API and add IT for MV Signed-off-by: Chen Dai <[email protected]> * Add IT for incremental refresh Signed-off-by: Chen Dai <[email protected]> * Refactor build API with optional StreamingRefresh interface Signed-off-by: Chen Dai <[email protected]> * Add javadoc and remove useless BatchRefresh interface Signed-off-by: Chen Dai <[email protected]> * Fluent data frame API chain Signed-off-by: Chen Dai <[email protected]> * Add more javadoc Signed-off-by: Chen Dai <[email protected]> * Add UT for build function Signed-off-by: Chen Dai <[email protected]> * Add UT for build stream function Signed-off-by: Chen Dai <[email protected]> * More readability by implicit class Signed-off-by: Chen Dai <[email protected]> * Add more IT Signed-off-by: Chen Dai <[email protected]> * Refactor MV build stream Signed-off-by: Chen Dai <[email protected]> * Add more javadoc and comment Signed-off-by: Chen Dai <[email protected]> * Move remaining deserialize logic to new Factory class Signed-off-by: Chen Dai <[email protected]> * Add implicit class for options Signed-off-by: Chen Dai <[email protected]> * Qualify MV name Signed-off-by: Chen Dai <[email protected]> * Fix qualified mv name check Signed-off-by: Chen Dai <[email protected]> --------- Signed-off-by: Chen Dai <[email protected]>
- Loading branch information
Showing
11 changed files
with
841 additions
and
95 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
83 changes: 83 additions & 0 deletions
83
...-spark-integration/src/main/scala/org/opensearch/flint/spark/FlintSparkIndexFactory.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.flint.spark | ||
|
||
import scala.collection.JavaConverters.mapAsScalaMapConverter | ||
|
||
import org.opensearch.flint.core.metadata.FlintMetadata | ||
import org.opensearch.flint.spark.covering.FlintSparkCoveringIndex | ||
import org.opensearch.flint.spark.covering.FlintSparkCoveringIndex.COVERING_INDEX_TYPE | ||
import org.opensearch.flint.spark.mv.FlintSparkMaterializedView | ||
import org.opensearch.flint.spark.mv.FlintSparkMaterializedView.MV_INDEX_TYPE | ||
import org.opensearch.flint.spark.skipping.FlintSparkSkippingIndex | ||
import org.opensearch.flint.spark.skipping.FlintSparkSkippingIndex.SKIPPING_INDEX_TYPE | ||
import org.opensearch.flint.spark.skipping.FlintSparkSkippingStrategy.SkippingKind | ||
import org.opensearch.flint.spark.skipping.FlintSparkSkippingStrategy.SkippingKind.{MIN_MAX, PARTITION, VALUE_SET} | ||
import org.opensearch.flint.spark.skipping.minmax.MinMaxSkippingStrategy | ||
import org.opensearch.flint.spark.skipping.partition.PartitionSkippingStrategy | ||
import org.opensearch.flint.spark.skipping.valueset.ValueSetSkippingStrategy | ||
|
||
/** | ||
* Flint Spark index factory that encapsulates specific Flint index instance creation. This is for | ||
* internal code use instead of user facing API. | ||
*/ | ||
object FlintSparkIndexFactory { | ||
|
||
/** | ||
* Creates Flint index from generic Flint metadata. | ||
* | ||
* @param metadata | ||
* Flint metadata | ||
* @return | ||
* Flint index | ||
*/ | ||
def create(metadata: FlintMetadata): FlintSparkIndex = { | ||
val indexOptions = FlintSparkIndexOptions( | ||
metadata.options.asScala.mapValues(_.asInstanceOf[String]).toMap) | ||
|
||
// Convert generic Map[String,AnyRef] in metadata to specific data structure in Flint index | ||
metadata.kind match { | ||
case SKIPPING_INDEX_TYPE => | ||
val strategies = metadata.indexedColumns.map { colInfo => | ||
val skippingKind = SkippingKind.withName(getString(colInfo, "kind")) | ||
val columnName = getString(colInfo, "columnName") | ||
val columnType = getString(colInfo, "columnType") | ||
|
||
skippingKind match { | ||
case PARTITION => | ||
PartitionSkippingStrategy(columnName = columnName, columnType = columnType) | ||
case VALUE_SET => | ||
ValueSetSkippingStrategy(columnName = columnName, columnType = columnType) | ||
case MIN_MAX => | ||
MinMaxSkippingStrategy(columnName = columnName, columnType = columnType) | ||
case other => | ||
throw new IllegalStateException(s"Unknown skipping strategy: $other") | ||
} | ||
} | ||
FlintSparkSkippingIndex(metadata.source, strategies, indexOptions) | ||
case COVERING_INDEX_TYPE => | ||
FlintSparkCoveringIndex( | ||
metadata.name, | ||
metadata.source, | ||
metadata.indexedColumns.map { colInfo => | ||
getString(colInfo, "columnName") -> getString(colInfo, "columnType") | ||
}.toMap, | ||
indexOptions) | ||
case MV_INDEX_TYPE => | ||
FlintSparkMaterializedView( | ||
metadata.name, | ||
metadata.source, | ||
metadata.indexedColumns.map { colInfo => | ||
getString(colInfo, "columnName") -> getString(colInfo, "columnType") | ||
}.toMap, | ||
indexOptions) | ||
} | ||
} | ||
|
||
private def getString(map: java.util.Map[String, AnyRef], key: String): String = { | ||
map.get(key).asInstanceOf[String] | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.