-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Chen Dai <[email protected]>
- Loading branch information
Showing
8 changed files
with
156 additions
and
69 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
77 changes: 77 additions & 0 deletions
77
...n/src/main/scala/org/opensearch/flint/spark/source/FlintSparkSourceRelationProvider.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.flint.spark.source | ||
|
||
import org.opensearch.flint.spark.source.file.FileSourceRelationProvider | ||
import org.opensearch.flint.spark.source.iceberg.IcebergSourceRelationProvider | ||
|
||
import org.apache.spark.sql.SparkSession | ||
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan | ||
|
||
/** | ||
* A provider defines what kind of logical plan can be supported by Flint Spark integration. It | ||
* serves similar purpose to Scala extractor which has to be used in match case statement. | ||
* However, the problem here is we want to avoid hard dependency on some data source code, such as | ||
* Iceberg. In this case, we have to maintain a list of provider and run it only if the 3rd party | ||
* library is available in current Spark session. | ||
*/ | ||
trait FlintSparkSourceRelationProvider { | ||
|
||
/** | ||
* @return | ||
* the name of the source relation provider | ||
*/ | ||
def name(): String | ||
|
||
/** | ||
* Determines whether the given logical plan is supported by this provider. | ||
* | ||
* @param plan | ||
* the logical plan to evaluate | ||
* @return | ||
* true if the plan is supported, false otherwise | ||
*/ | ||
def isSupported(plan: LogicalPlan): Boolean | ||
|
||
/** | ||
* Creates a source relation based on the provided logical plan. | ||
* | ||
* @param plan | ||
* the logical plan to wrap in source relation | ||
* @return | ||
* an instance of source relation | ||
*/ | ||
def getRelation(plan: LogicalPlan): FlintSparkSourceRelation | ||
} | ||
|
||
/** | ||
* Companion object provides utility methods. | ||
*/ | ||
object FlintSparkSourceRelationProvider { | ||
|
||
/** | ||
* Retrieve all supported source relation provider for the given Spark session. | ||
* | ||
* @param spark | ||
* the Spark session | ||
* @return | ||
* a sequence of source relation provider | ||
*/ | ||
def getProviders(spark: SparkSession): Seq[FlintSparkSourceRelationProvider] = { | ||
var relations = Seq[FlintSparkSourceRelationProvider]() | ||
|
||
// File source is built-in supported | ||
relations = relations :+ new FileSourceRelationProvider | ||
|
||
// Add Iceberg provider if it's enabled in Spark conf | ||
if (spark.conf | ||
.getOption("spark.sql.catalog.spark_catalog") | ||
.contains("org.apache.iceberg.spark.SparkSessionCatalog")) { | ||
relations = relations :+ new IcebergSourceRelationProvider | ||
} | ||
relations | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
30 changes: 30 additions & 0 deletions
30
...on/src/main/scala/org/opensearch/flint/spark/source/file/FileSourceRelationProvider.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.flint.spark.source.file | ||
|
||
import org.opensearch.flint.spark.source.{FlintSparkSourceRelation, FlintSparkSourceRelationProvider} | ||
|
||
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan | ||
import org.apache.spark.sql.execution.datasources.LogicalRelation | ||
|
||
/** | ||
* Source relation provider for Spark built-in file-based source. | ||
* | ||
* @param name | ||
* the name of the file source provider | ||
*/ | ||
class FileSourceRelationProvider(override val name: String = "file") | ||
extends FlintSparkSourceRelationProvider { | ||
|
||
override def isSupported(plan: LogicalPlan): Boolean = plan match { | ||
case LogicalRelation(_, _, Some(_), false) => true | ||
case _ => false | ||
} | ||
|
||
override def getRelation(plan: LogicalPlan): FlintSparkSourceRelation = { | ||
FileSourceRelation(plan.asInstanceOf[LogicalRelation]) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
40 changes: 40 additions & 0 deletions
40
.../main/scala/org/opensearch/flint/spark/source/iceberg/IcebergSourceRelationProvider.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.flint.spark.source.iceberg | ||
|
||
import org.apache.iceberg.spark.source.SparkTable | ||
import org.opensearch.flint.spark.source.{FlintSparkSourceRelation, FlintSparkSourceRelationProvider} | ||
|
||
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan | ||
import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2ScanRelation} | ||
|
||
/** | ||
* Source relation provider for Apache Iceberg-based source. | ||
* | ||
* @param name | ||
* the name of the Iceberg source provider | ||
*/ | ||
class IcebergSourceRelationProvider(override val name: String = "iceberg") | ||
extends FlintSparkSourceRelationProvider { | ||
|
||
override def isSupported(plan: LogicalPlan): Boolean = plan match { | ||
case DataSourceV2Relation(_: SparkTable, _, _, _, _) => true | ||
case DataSourceV2ScanRelation(DataSourceV2Relation(_: SparkTable, _, _, _, _), _, _, _) => | ||
true | ||
case _ => false | ||
} | ||
|
||
override def getRelation(plan: LogicalPlan): FlintSparkSourceRelation = plan match { | ||
case relation @ DataSourceV2Relation(_: SparkTable, _, _, _, _) => | ||
IcebergSourceRelation(relation) | ||
case DataSourceV2ScanRelation( | ||
relation @ DataSourceV2Relation(_: SparkTable, _, _, _, _), | ||
_, | ||
_, | ||
_) => | ||
IcebergSourceRelation(relation) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters