forked from opensearch-project/opensearch-spark
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into improve-error-handling
- Loading branch information
Showing
6 changed files
with
230 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
35 changes: 35 additions & 0 deletions
35
...tegration/src/main/scala/org/opensearch/flint/spark/source/FlintSparkSourceRelation.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.flint.spark.source | ||
|
||
import org.apache.spark.sql.catalyst.expressions.AttributeReference | ||
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan | ||
|
||
/** | ||
* This source relation abstraction allows Flint to interact uniformly with different kinds of | ||
* source data formats (like Spark built-in File, Delta table, Iceberg, etc.), hiding the | ||
* specifics of each data source implementation. | ||
*/ | ||
trait FlintSparkSourceRelation { | ||
|
||
/** | ||
* @return | ||
* the concrete logical plan of the relation associated | ||
*/ | ||
def plan: LogicalPlan | ||
|
||
/** | ||
* @return | ||
* fully qualified table name represented by the relation | ||
*/ | ||
def tableName: String | ||
|
||
/** | ||
* @return | ||
* output column list of the relation | ||
*/ | ||
def output: Seq[AttributeReference] | ||
} |
73 changes: 73 additions & 0 deletions
73
...n/src/main/scala/org/opensearch/flint/spark/source/FlintSparkSourceRelationProvider.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.flint.spark.source | ||
|
||
import org.opensearch.flint.spark.source.file.FileSourceRelationProvider | ||
|
||
import org.apache.spark.internal.Logging | ||
import org.apache.spark.sql.SparkSession | ||
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan | ||
|
||
/** | ||
* A provider defines what kind of logical plan can be supported by Flint Spark integration. It | ||
* serves similar purpose to Scala extractor which has to be used in match case statement. | ||
* However, the problem here is we want to avoid hard dependency on some data source code, such as | ||
* Iceberg. In this case, we have to maintain a list of provider and run it only if the 3rd party | ||
* library is available in current Spark session. | ||
*/ | ||
trait FlintSparkSourceRelationProvider { | ||
|
||
/** | ||
* @return | ||
* the name of the source relation provider | ||
*/ | ||
def name(): String | ||
|
||
/** | ||
* Determines whether the given logical plan is supported by this provider. | ||
* | ||
* @param plan | ||
* the logical plan to evaluate | ||
* @return | ||
* true if the plan is supported, false otherwise | ||
*/ | ||
def isSupported(plan: LogicalPlan): Boolean | ||
|
||
/** | ||
* Creates a source relation based on the provided logical plan. | ||
* | ||
* @param plan | ||
* the logical plan to wrap in source relation | ||
* @return | ||
* an instance of source relation | ||
*/ | ||
def getRelation(plan: LogicalPlan): FlintSparkSourceRelation | ||
} | ||
|
||
/** | ||
* Companion object provides utility methods. | ||
*/ | ||
object FlintSparkSourceRelationProvider extends Logging { | ||
|
||
/** | ||
* Retrieve all supported source relation provider for the given Spark session. | ||
* | ||
* @param spark | ||
* the Spark session | ||
* @return | ||
* a sequence of source relation provider | ||
*/ | ||
def getAllProviders(spark: SparkSession): Seq[FlintSparkSourceRelationProvider] = { | ||
var relations = Seq[FlintSparkSourceRelationProvider]() | ||
|
||
// File source is built-in supported | ||
relations = relations :+ new FileSourceRelationProvider | ||
|
||
val providerNames = relations.map(_.name()).mkString(",") | ||
logInfo(s"Loaded source relation providers [$providerNames]") | ||
relations | ||
} | ||
} |
27 changes: 27 additions & 0 deletions
27
...ntegration/src/main/scala/org/opensearch/flint/spark/source/file/FileSourceRelation.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.flint.spark.source.file | ||
|
||
import org.opensearch.flint.spark.source.FlintSparkSourceRelation | ||
|
||
import org.apache.spark.sql.catalyst.expressions.AttributeReference | ||
import org.apache.spark.sql.execution.datasources.LogicalRelation | ||
|
||
/** | ||
* Concrete source relation implementation for Spark built-in file-based data sources. | ||
* | ||
* @param plan | ||
* the `LogicalRelation` that represents the plan associated with the File-based table | ||
*/ | ||
case class FileSourceRelation(override val plan: LogicalRelation) | ||
extends FlintSparkSourceRelation { | ||
|
||
override def tableName: String = | ||
plan.catalogTable.get // catalogTable must be present as pre-checked in source relation provider's | ||
.qualifiedName | ||
|
||
override def output: Seq[AttributeReference] = plan.output | ||
} |
30 changes: 30 additions & 0 deletions
30
...on/src/main/scala/org/opensearch/flint/spark/source/file/FileSourceRelationProvider.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.flint.spark.source.file | ||
|
||
import org.opensearch.flint.spark.source.{FlintSparkSourceRelation, FlintSparkSourceRelationProvider} | ||
|
||
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan | ||
import org.apache.spark.sql.execution.datasources.LogicalRelation | ||
|
||
/** | ||
* Source relation provider for Spark built-in file-based source. | ||
* | ||
* @param name | ||
* the name of the file source provider | ||
*/ | ||
class FileSourceRelationProvider(override val name: String = "file") | ||
extends FlintSparkSourceRelationProvider { | ||
|
||
override def isSupported(plan: LogicalPlan): Boolean = plan match { | ||
case LogicalRelation(_, _, Some(_), false) => true | ||
case _ => false | ||
} | ||
|
||
override def getRelation(plan: LogicalPlan): FlintSparkSourceRelation = { | ||
FileSourceRelation(plan.asInstanceOf[LogicalRelation]) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters