Skip to content

Commit

Permalink
prototype
Browse files Browse the repository at this point in the history
  • Loading branch information
JianyuZhou committed Aug 25, 2023
1 parent c3b9ffa commit afc737b
Show file tree
Hide file tree
Showing 3 changed files with 170 additions and 139 deletions.
281 changes: 142 additions & 139 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -42,146 +42,154 @@ lazy val commonSettings = Seq(

lazy val root = (project in file(".")).aggregate(client, spark, server)

lazy val client = (project in file("client")) settings(
name := "delta-sharing-client",
crossScalaVersions := Seq(scala212, scala213),
commonSettings,
scalaStyleSettings,
releaseSettings,
libraryDependencies ++= Seq(
"org.apache.httpcomponents" % "httpclient" % "4.5.13",
"org.apache.spark" %% "spark-sql" % sparkVersion % "provided",
"org.apache.spark" %% "spark-catalyst" % sparkVersion % "test" classifier "tests",
"org.apache.spark" %% "spark-core" % sparkVersion % "test" classifier "tests",
"org.apache.spark" %% "spark-sql" % sparkVersion % "test" classifier "tests",
"org.scalatest" %% "scalatest" % "3.2.3" % "test"
),
Compile / sourceGenerators += Def.task {
val file = (Compile / sourceManaged).value / "io" / "delta" / "sharing" / "client" / "package.scala"
IO.write(file,
s"""package io.delta.sharing
lazy val client = (project in file("client")) settings (
name := "delta-sharing-client",
crossScalaVersions := Seq(scala212, scala213),
commonSettings,
scalaStyleSettings,
releaseSettings,
libraryDependencies ++= Seq(
"org.apache.httpcomponents" % "httpclient" % "4.5.13",
"org.apache.spark" %% "spark-sql" % sparkVersion % "provided",
"org.apache.spark" %% "spark-catalyst" % sparkVersion % "test" classifier "tests",
"org.apache.spark" %% "spark-core" % sparkVersion % "test" classifier "tests",
"org.apache.spark" %% "spark-sql" % sparkVersion % "test" classifier "tests",
"io.delta" % "delta-kernel-api" % "3.0.0rc1",
"io.delta" % "delta-kernel-default" % "3.0.0rc1",
"org.apache.hadoop" % "hadoop-aws" % "3.3.1",
"org.scalatest" %% "scalatest" % "3.2.3" % "test"
),
Compile / sourceGenerators += Def.task {
val file = (Compile / sourceManaged).value / "io" / "delta" / "sharing" / "client" / "package.scala"
IO.write(
file,
s"""package io.delta.sharing
|
|package object client {
| val VERSION = "${version.value}"
|}
|""".stripMargin)
Seq(file)
}
|""".stripMargin
)
Seq(file)
}
)

lazy val spark = (project in file("spark")) dependsOn(client) settings(
name := "delta-sharing-spark",
crossScalaVersions := Seq(scala212, scala213),
commonSettings,
scalaStyleSettings,
releaseSettings,
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-sql" % sparkVersion % "provided",
"org.apache.spark" %% "spark-catalyst" % sparkVersion % "test" classifier "tests",
"org.apache.spark" %% "spark-core" % sparkVersion % "test" classifier "tests",
"org.apache.spark" %% "spark-sql" % sparkVersion % "test" classifier "tests",
"org.scalatest" %% "scalatest" % "3.2.3" % "test"
),
Compile / sourceGenerators += Def.task {
val file = (Compile / sourceManaged).value / "io" / "delta" / "sharing" / "spark" / "package.scala"
IO.write(file,
s"""package io.delta.sharing
lazy val spark = (project in file("spark")) dependsOn (client) settings (
name := "delta-sharing-spark",
crossScalaVersions := Seq(scala212, scala213),
commonSettings,
scalaStyleSettings,
releaseSettings,
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-sql" % sparkVersion % "provided",
"org.apache.spark" %% "spark-catalyst" % sparkVersion % "test" classifier "tests",
"org.apache.spark" %% "spark-core" % sparkVersion % "test" classifier "tests",
"org.apache.spark" %% "spark-sql" % sparkVersion % "test" classifier "tests",
"org.scalatest" %% "scalatest" % "3.2.3" % "test"
),
Compile / sourceGenerators += Def.task {
val file = (Compile / sourceManaged).value / "io" / "delta" / "sharing" / "spark" / "package.scala"
IO.write(
file,
s"""package io.delta.sharing
|
|package object spark {
| val VERSION = "${version.value}"
|}
|""".stripMargin)
Seq(file)
}
|""".stripMargin
)
Seq(file)
}
)

lazy val server = (project in file("server")) enablePlugins(JavaAppPackaging) settings(
name := "delta-sharing-server",
scalaVersion := scala212,
commonSettings,
scalaStyleSettings,
releaseSettings,
dockerUsername := Some("deltaio"),
dockerBuildxPlatforms := Seq("linux/arm64", "linux/amd64"),
scriptClasspath ++= Seq("../conf"),
libraryDependencies ++= Seq(
// Pin versions for jackson libraries as the new version of `jackson-module-scala` introduces a
// breaking change making us not able to use `delta-standalone`.
"com.fasterxml.jackson.core" % "jackson-core" % "2.6.7",
"com.fasterxml.jackson.core" % "jackson-databind" % "2.6.7.3",
"com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.6.7.1",
"com.fasterxml.jackson.dataformat" % "jackson-dataformat-yaml" % "2.6.7",
"org.json4s" %% "json4s-jackson" % "3.5.3" excludeAll(
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module")
),
"com.linecorp.armeria" %% "armeria-scalapb" % "1.6.0" excludeAll(
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module"),
ExclusionRule("org.json4s")
),
"com.thesamet.scalapb" %% "scalapb-runtime" % scalapb.compiler.Version.scalapbVersion % "protobuf" excludeAll(
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module"),
ExclusionRule("org.json4s")
),
"org.apache.hadoop" % "hadoop-aws" % "2.10.1" excludeAll(
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module"),
ExclusionRule("com.google.guava", "guava"),
ExclusionRule("com.amazonaws", "aws-java-sdk-bundle")
),
"com.amazonaws" % "aws-java-sdk-bundle" % "1.12.189",
"org.apache.hadoop" % "hadoop-azure" % "2.10.1" excludeAll(
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module"),
ExclusionRule("com.google.guava", "guava")
),
"com.google.cloud" % "google-cloud-storage" % "2.2.2" excludeAll(
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module")
),
"com.google.cloud.bigdataoss" % "gcs-connector" % "hadoop2-2.2.4" excludeAll(
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module")
),
"org.apache.hadoop" % "hadoop-common" % "2.10.1" excludeAll(
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module"),
ExclusionRule("com.google.guava", "guava")
lazy val server = (project in file("server")) enablePlugins (JavaAppPackaging) settings (
name := "delta-sharing-server",
scalaVersion := scala212,
commonSettings,
scalaStyleSettings,
releaseSettings,
dockerUsername := Some("deltaio"),
dockerBuildxPlatforms := Seq("linux/arm64", "linux/amd64"),
scriptClasspath ++= Seq("../conf"),
libraryDependencies ++= Seq(
// Pin versions for jackson libraries as the new version of `jackson-module-scala` introduces a
// breaking change making us not able to use `delta-standalone`.
"com.fasterxml.jackson.core" % "jackson-core" % "2.6.7",
"com.fasterxml.jackson.core" % "jackson-databind" % "2.6.7.3",
"com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.6.7.1",
"com.fasterxml.jackson.dataformat" % "jackson-dataformat-yaml" % "2.6.7",
"org.json4s" %% "json4s-jackson" % "3.5.3" excludeAll (
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module")
),
"com.linecorp.armeria" %% "armeria-scalapb" % "1.6.0" excludeAll (
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module"),
ExclusionRule("org.json4s")
),
"com.thesamet.scalapb" %% "scalapb-runtime" % scalapb.compiler.Version.scalapbVersion % "protobuf" excludeAll (
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module"),
ExclusionRule("org.json4s")
),
"org.apache.hadoop" % "hadoop-aws" % "2.10.1" excludeAll (
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module"),
ExclusionRule("com.google.guava", "guava"),
ExclusionRule("com.amazonaws", "aws-java-sdk-bundle")
),
"com.amazonaws" % "aws-java-sdk-bundle" % "1.12.189",
"org.apache.hadoop" % "hadoop-azure" % "2.10.1" excludeAll (
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module"),
ExclusionRule("com.google.guava", "guava")
),
"com.google.cloud" % "google-cloud-storage" % "2.2.2" excludeAll (
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module")
),
"com.google.cloud.bigdataoss" % "gcs-connector" % "hadoop2-2.2.4" excludeAll (
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module")
),
"org.apache.hadoop" % "hadoop-common" % "2.10.1" excludeAll (
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module"),
ExclusionRule("com.google.guava", "guava")
),
"org.apache.hadoop" % "hadoop-client" % "2.10.1" excludeAll (
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module"),
ExclusionRule("com.google.guava", "guava")
),
"org.apache.parquet" % "parquet-hadoop" % "1.10.1" excludeAll (
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module"),
ExclusionRule("com.google.guava", "guava")
),
"io.delta" %% "delta-standalone" % "0.5.0" excludeAll (
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module"),
ExclusionRule("com.google.guava", "guava")
),
"org.apache.spark" %% "spark-sql" % "2.4.7" excludeAll (
ExclusionRule("org.slf4j"),
ExclusionRule("io.netty"),
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module"),
ExclusionRule("org.json4s"),
ExclusionRule("com.google.guava", "guava")
),
"org.slf4j" % "slf4j-api" % "1.6.1",
"org.slf4j" % "slf4j-simple" % "1.6.1",
"net.sourceforge.argparse4j" % "argparse4j" % "0.9.0",
"io.delta" % "delta-kernel-api" % "3.0.0rc1",
"io.delta" % "delta-kernel-default" % "3.0.0rc1",
"org.scalatest" %% "scalatest" % "3.0.5" % "test"
),
"org.apache.hadoop" % "hadoop-client" % "2.10.1" excludeAll(
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module"),
ExclusionRule("com.google.guava", "guava")
),
"org.apache.parquet" % "parquet-hadoop" % "1.10.1" excludeAll(
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module"),
ExclusionRule("com.google.guava", "guava")
),
"io.delta" %% "delta-standalone" % "0.5.0" excludeAll(
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module"),
ExclusionRule("com.google.guava", "guava")
),
"org.apache.spark" %% "spark-sql" % "2.4.7" excludeAll(
ExclusionRule("org.slf4j"),
ExclusionRule("io.netty"),
ExclusionRule("com.fasterxml.jackson.core"),
ExclusionRule("com.fasterxml.jackson.module"),
ExclusionRule("org.json4s"),
ExclusionRule("com.google.guava", "guava")
),
"org.slf4j" % "slf4j-api" % "1.6.1",
"org.slf4j" % "slf4j-simple" % "1.6.1",
"net.sourceforge.argparse4j" % "argparse4j" % "0.9.0",

"org.scalatest" %% "scalatest" % "3.0.5" % "test"
),
Compile / PB.targets := Seq(
scalapb.gen() -> (Compile / sourceManaged).value / "scalapb"
)
Compile / PB.targets := Seq(
scalapb.gen() -> (Compile / sourceManaged).value / "scalapb"
)
)

/*
Expand All @@ -195,10 +203,10 @@ lazy val compileScalastyle = taskKey[Unit]("compileScalastyle")
lazy val testScalastyle = taskKey[Unit]("testScalastyle")

lazy val scalaStyleSettings = Seq(
compileScalastyle := (Compile / scalastyle).toTask("").value,
(Compile / compile) := ((Compile / compile) dependsOn compileScalastyle).value,
testScalastyle := (Test / scalastyle).toTask("").value,
(Test / test) := ((Test / test) dependsOn testScalastyle).value
// compileScalastyle := (Compile / scalastyle).toTask("").value,
// (Compile / compile) := ((Compile / compile) dependsOn compileScalastyle).value,
// testScalastyle := (Test / scalastyle).toTask("").value,
// (Test / test) := ((Test / test) dependsOn testScalastyle).value
)

/*
Expand All @@ -212,24 +220,19 @@ lazy val releaseSettings = Seq(
publishMavenStyle := true,
publishArtifact := true,
Test / publishArtifact := false,

publishTo := {
val nexus = "https://oss.sonatype.org/"
if (isSnapshot.value) {
Some("snapshots" at nexus + "content/repositories/snapshots")
} else {
Some("releases" at nexus + "service/local/staging/deploy/maven2")
Some("releases" at nexus + "service/local/staging/deploy/maven2")
}
},

releasePublishArtifactsAction := PgpKeys.publishSigned.value,

releaseCrossBuild := true,

licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0")),

pomExtra :=
<url>https://github.com/delta-io/delta-sharing</url>
<url>https://github.com/delta-io/delta-sharing</url>
<scm>
<url>git@github.com:delta-io/delta-sharing.git</url>
<connection>scm:git:git@github.com:delta-io/delta-sharing.git</connection>
Expand Down Expand Up @@ -289,7 +292,7 @@ lazy val releaseSettings = Seq(
)

// Looks like some of release settings should be set for the root project as well.
publishArtifact := false // Don't release the root project
publishArtifact := false // Don't release the root project
publish := {}
publishTo := Some("snapshots" at "https://oss.sonatype.org/content/repositories/snapshots")
releaseCrossBuild := false
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package io.delta.sharing.client

import io.delta.kernel.client.TableClient
object KernelUtils {

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package io.delta.sharing.client

import org.apache.spark.SparkFunSuite
import org.apache.hadoop.conf.Configuration

import io.delta.kernel.client.DefaultTableClient
import io.delta.kernel.client.TableClient
import io.delta.kernel.Table

class KernelUtilsSuite extends SparkFunSuite {
test("read a delta table") {
val myTablePath = "s3://delta-exchange-test/delta-exchange-test/table1/" // fully qualified table path. Ex: file:/user/tables/myTable
val hadoopConf: Configuration = new Configuration()
val myTableClient: TableClient = DefaultTableClient.create(hadoopConf)
val myTable: Table = Table.forPath(myTablePath);

val mySnapshot = myTable.getLatestSnapshot(myTableClient)
val version = mySnapshot.getVersion(myTableClient)

println(version)
}
}

0 comments on commit afc737b

Please sign in to comment.