forked from opensearch-project/opensearch-spark
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
support shard level split on read path (opensearch-project#402)
Signed-off-by: Peng Huo <[email protected]>
- Loading branch information
Showing
15 changed files
with
326 additions
and
63 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
62 changes: 62 additions & 0 deletions
62
...-spark-integration/src/main/scala/org/apache/spark/opensearch/table/OpenSearchTable.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.apache.spark.opensearch.table | ||
|
||
import scala.collection.JavaConverters._ | ||
|
||
import org.opensearch.flint.core.{FlintClientBuilder, FlintOptions} | ||
import org.opensearch.flint.core.metadata.FlintMetadata | ||
|
||
import org.apache.spark.sql.flint.datatype.FlintDataType | ||
import org.apache.spark.sql.types.StructType | ||
|
||
/** | ||
* Represents an OpenSearch table. | ||
* | ||
* @param tableName | ||
* The name of the table. | ||
* @param metadata | ||
* Metadata of the table. | ||
*/ | ||
case class OpenSearchTable(tableName: String, metadata: Map[String, FlintMetadata]) { | ||
/* | ||
* FIXME. we use first index schema in multiple indices. we should merge StructType to widen type | ||
*/ | ||
lazy val schema: StructType = { | ||
metadata.values.headOption | ||
.map(m => FlintDataType.deserialize(m.getContent)) | ||
.getOrElse(StructType(Nil)) | ||
} | ||
|
||
lazy val partitions: Array[PartitionInfo] = { | ||
metadata.map { case (partitionName, metadata) => | ||
PartitionInfo.apply(partitionName, metadata.indexSettings.get) | ||
}.toArray | ||
} | ||
} | ||
|
||
object OpenSearchTable { | ||
|
||
/** | ||
* Creates an OpenSearchTable instance. | ||
* | ||
* @param tableName | ||
* tableName support (1) single index name. (2) wildcard index name. (3) comma sep index name. | ||
* @param options | ||
* The options for Flint. | ||
* @return | ||
* An instance of OpenSearchTable. | ||
*/ | ||
def apply(tableName: String, options: FlintOptions): OpenSearchTable = { | ||
OpenSearchTable( | ||
tableName, | ||
FlintClientBuilder | ||
.build(options) | ||
.getAllIndexMetadata(tableName.split(","): _*) | ||
.asScala | ||
.toMap) | ||
} | ||
} |
54 changes: 54 additions & 0 deletions
54
flint-spark-integration/src/main/scala/org/apache/spark/opensearch/table/PartitionInfo.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.apache.spark.opensearch.table | ||
|
||
import org.json4s.{Formats, NoTypeHints} | ||
import org.json4s.jackson.JsonMethods | ||
import org.json4s.native.Serialization | ||
|
||
/** | ||
* Represents information about a partition in OpenSearch. Partition is backed by OpenSearch | ||
* Index. Each partition contain a list of Shards | ||
* | ||
* @param partitionName | ||
* partition name. | ||
* @param shards | ||
* shards. | ||
*/ | ||
case class PartitionInfo(partitionName: String, shards: Array[ShardInfo]) {} | ||
|
||
object PartitionInfo { | ||
implicit val formats: Formats = Serialization.formats(NoTypeHints) | ||
|
||
/** | ||
* Creates a PartitionInfo instance. | ||
* | ||
* @param partitionName | ||
* The name of the partition. | ||
* @param settings | ||
* The settings of the partition. | ||
* @return | ||
* An instance of PartitionInfo. | ||
*/ | ||
def apply(partitionName: String, settings: String): PartitionInfo = { | ||
val shards = | ||
Range.apply(0, numberOfShards(settings)).map(id => ShardInfo(partitionName, id)).toArray | ||
PartitionInfo(partitionName, shards) | ||
} | ||
|
||
/** | ||
* Extracts the number of shards from the settings string. | ||
* | ||
* @param settingStr | ||
* The settings string. | ||
* @return | ||
* The number of shards. | ||
*/ | ||
def numberOfShards(settingStr: String): Int = { | ||
val setting = JsonMethods.parse(settingStr) | ||
(setting \ "index.number_of_shards").extract[String].toInt | ||
} | ||
} |
16 changes: 16 additions & 0 deletions
16
flint-spark-integration/src/main/scala/org/apache/spark/opensearch/table/ShardInfo.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.apache.spark.opensearch.table | ||
|
||
/** | ||
* Represents information about a shard in OpenSearch. | ||
* | ||
* @param indexName | ||
* The name of the index. | ||
* @param id | ||
* The ID of the shard. | ||
*/ | ||
case class ShardInfo(indexName: String, id: Int) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.