Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve correctness for some datatypes and add some QOL changes #127

Merged
merged 5 commits into from
Jul 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,11 @@ jobs:

- name: Make target directories
if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v'))
run: mkdir -p testing/.jvm/target target .js/target site/target prometheus/.jvm/target core/.jvm/target .jvm/target .native/target project/target
run: mkdir -p target .js/target site/target prometheus/.jvm/target testing/.jvm/target .jvm/target .native/target core/.jvm/target project/target

- name: Compress target directories
if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v'))
run: tar cf targets.tar testing/.jvm/target target .js/target site/target prometheus/.jvm/target core/.jvm/target .jvm/target .native/target project/target
run: tar cf targets.tar target .js/target site/target prometheus/.jvm/target testing/.jvm/target .jvm/target .native/target core/.jvm/target project/target

- name: Upload target directories
if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v'))
Expand Down
6 changes: 5 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import com.typesafe.tools.mima.core._

// https://typelevel.org/sbt-typelevel/faq.html#what-is-a-base-version-anyway
ThisBuild / tlBaseVersion := "0.8" // your current series x.y
ThisBuild / tlBaseVersion := "0.9" // your current series x.y

ThisBuild / organization := "no.nrk.bigquery"
ThisBuild / organizationName := "NRK"
Expand Down Expand Up @@ -98,6 +98,7 @@ lazy val root = tlCrossRootProject
.disablePlugins(TypelevelCiSigningPlugin, Sonatype, SbtGpg)

lazy val core = crossProject(JVMPlatform)
.withoutSuffixFor(JVMPlatform)
.crossType(CrossType.Pure)
.in(file("core"))
.settings(commonSettings)
Expand All @@ -110,6 +111,7 @@ lazy val core = crossProject(JVMPlatform)
"org.typelevel" %% "cats-effect" % "3.5.1",
"org.typelevel" %% "literally" % "1.1.0",
"org.scalameta" %% "munit" % "0.7.29" % Test,
"org.scalameta" %% "munit-scalacheck" % "0.7.29" % Test,
"org.typelevel" %% "munit-cats-effect-3" % "1.0.7" % Test,
"com.google.cloud" % "google-cloud-bigquery" % "2.29.0",
"com.google.cloud" % "google-cloud-bigquerystorage" % "2.39.1",
Expand Down Expand Up @@ -141,6 +143,7 @@ lazy val core = crossProject(JVMPlatform)
.disablePlugins(TypelevelCiSigningPlugin, Sonatype, SbtGpg)

lazy val prometheus = crossProject(JVMPlatform)
.withoutSuffixFor(JVMPlatform)
.crossType(CrossType.Pure)
.in(file("prometheus"))
.settings(commonSettings)
Expand All @@ -154,6 +157,7 @@ lazy val prometheus = crossProject(JVMPlatform)
.disablePlugins(TypelevelCiSigningPlugin, Sonatype, SbtGpg)

lazy val testing = crossProject(JVMPlatform)
.withoutSuffixFor(JVMPlatform)
.crossType(CrossType.Pure)
.in(file("testing"))
.dependsOn(core)
Expand Down
34 changes: 34 additions & 0 deletions core/src/main/scala/no/nrk/bigquery/BQDataset.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package no.nrk.bigquery

import com.google.cloud.bigquery.DatasetId

import java.util.regex.Pattern

/** When you create a dataset in BigQuery, the dataset name must be unique for each project. The dataset name can
* contain the following:
*
* - Up to 1,024 characters.
* - Letters (uppercase or lowercase), numbers, and underscores.
*
* FROM https://cloud.google.com/bigquery/docs/datasets#dataset-naming
*/
final case class BQDataset private[bigquery] (
project: ProjectId,
id: String,
location: Option[LocationId]
) {
def underlying: DatasetId = DatasetId.of(project.value, id)

def withLocation(locationId: LocationId): BQDataset = copy(location = Some(locationId))
}

object BQDataset {
private val regex: Pattern = "^[a-zA-Z0-9_]{1,1024}".r.pattern

def unsafeOf(project: ProjectId, dataset: String) =
of(project, dataset).fold(err => throw new IllegalArgumentException(err), identity)

def of(project: ProjectId, dataset: String): Either[String, BQDataset] =
if (regex.matcher(dataset).matches()) Right(BQDataset(project, dataset, None))
else Left(s"invalid project ID '$dataset' - must match ${regex.pattern()}")
}
9 changes: 9 additions & 0 deletions core/src/main/scala/no/nrk/bigquery/BQField.scala
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,16 @@ case class BQField(
def isRequired: Boolean = mode == Field.Mode.REQUIRED

def withName(newName: String) = copy(name = newName)

def mapName(f: String => String) = withName(f(name))

def withDescription(desc: String) = copy(description = Some(desc))
def withoutDescription = copy(description = None)

@deprecated("use withRequired instead", "0.9.0")
def required = copy(mode = Field.Mode.REQUIRED)
def withRequired = copy(mode = Field.Mode.REQUIRED)
def withRepeated = copy(mode = Field.Mode.REPEATED)

def withType(newType: StandardSQLTypeName) = copy(tpe = newType)

Expand Down
54 changes: 34 additions & 20 deletions core/src/main/scala/no/nrk/bigquery/BQTableId.scala
hamnis marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,28 +1,22 @@
package no.nrk.bigquery

import cats.Show
import com.google.cloud.bigquery.{DatasetId, TableId}
import com.google.cloud.bigquery.TableId

final case class LocationId(value: String) extends AnyVal
import java.util.regex.Pattern

final case class ProjectId(value: String) extends AnyVal

final case class BQDataset(
project: ProjectId,
id: String,
location: Option[LocationId]
) {
def underlying: DatasetId = DatasetId.of(project.value, id)
}

object BQDataset {
def of(project: ProjectId, dataset: String) =
BQDataset(project, dataset, None)
}
final case class BQTableId(dataset: BQDataset, tableName: String) {
/** When you create a table in BigQuery, the table name must be unique per dataset. The table name can:
*
* - Contain up to 1,024 characters.
* - Contain Unicode characters in category L (letter), M (mark), N (number), Pc (connector, including underscore),
* Pd (dash), Zs (space).
*
* FROM https://cloud.google.com/bigquery/docs/tables#table_naming
*/
final case class BQTableId private[bigquery] (dataset: BQDataset, tableName: String) {

def modifyTableName(f: String => String): BQTableId =
copy(tableName = f(tableName))
BQTableId.unsafeOf(dataset, f(tableName))
def underlying: TableId =
TableId.of(dataset.project.value, dataset.id, tableName)

Expand All @@ -34,8 +28,15 @@ final case class BQTableId(dataset: BQDataset, tableName: String) {
}

object BQTableId {
def of(project: ProjectId, dataset: String, tableName: String) =
BQTableId(BQDataset.of(project, dataset), tableName)

private val regex: Pattern = "(?U)^\\w[\\w_ -]{1,1023}".r.pattern

def of(dataset: BQDataset, tableName: String): Either[String, BQTableId] =
if (regex.matcher(tableName).matches()) Right(BQTableId(dataset, tableName))
else Left(s"Expected '$tableName' to match regex (${regex.pattern()})")

def unsafeOf(dataset: BQDataset, tableName: String): BQTableId =
of(dataset, tableName).fold(err => throw new IllegalArgumentException(err), identity)

def unsafeFromGoogle(dataset: BQDataset, tableId: TableId): BQTableId = {
require(
Expand All @@ -45,6 +46,19 @@ object BQTableId {
BQTableId(dataset, tableId.getTable)
}

def unsafeFromString(id: String): BQTableId =
fromString(id).fold(
err => throw new IllegalArgumentException(err),
identity
)

def fromString(id: String): Either[String, BQTableId] =
id.split("\\.", 3) match {
case Array(project, dataset, tableName) =>
ProjectId.fromString(project).flatMap(BQDataset.of(_, dataset)).flatMap(of(_, tableName))
case _ => Left(s"Expected [projectId].[datasetId].[tableName] but got ${id}")
}

implicit val show: Show[BQTableId] =
Show.show(_.asFragment.asString)

Expand Down
67 changes: 67 additions & 0 deletions core/src/main/scala/no/nrk/bigquery/LocationId.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package no.nrk.bigquery

final case class LocationId private[bigquery] (value: String) extends AnyVal

object LocationId {
// Multi region
val US = LocationId("us")
val EU = LocationId("eu")

// Americas
val NorthAmericaNorthEast1 = LocationId("northamerica-northeast1")
val NorthAmericaNorthEast2 = LocationId("northamerica-northeast2")

val SouthAmericaEast1 = LocationId("southamerica-east1")
val SouthAmericaWest1 = LocationId("southamerica-west1")

val USSouth1 = LocationId("us-south1")

val USCentral1 = LocationId("us-central1")

val USEast1 = LocationId("us-east1")
val USEast4 = LocationId("us-east4")
val USEast5 = LocationId("us-east5")

val USWest1 = LocationId("us-west1")
val USWest2 = LocationId("us-west2")
val USWest3 = LocationId("us-west3")
val USWest4 = LocationId("us-west4")

// Europe
val EuropeNorth1 = LocationId("europe-north1")

val EuropeCentral2 = LocationId("europe-central2")

val EuropeWest1 = LocationId("europe-west1")
val EuropeWest2 = LocationId("europe-west2")
val EuropeWest3 = LocationId("europe-west3")
val EuropeWest4 = LocationId("europe-west4")
val EuropeWest6 = LocationId("europe-west6")
val EuropeWest8 = LocationId("europe-west8")
val EuropeWest9 = LocationId("europe-west9")
val EuropeWest12 = LocationId("europe-west12")

val EuropeSouthWest1 = LocationId("europe-southwest1")

// Middle east
val MiddleEastWest1 = LocationId("me-west1")

val MiddleEastCentral1 = LocationId("me-central1")

// Asia and Oceania
val AsiaEast1 = LocationId("asia-east1")
val AsiaEast2 = LocationId("asia-east2")

val AsiaSouth1 = LocationId("asia-south1")
val AsiaSouth2 = LocationId("asia-south2")

val AsiaNorthEast1 = LocationId("asia-northeast1")
val AsiaNorthEast2 = LocationId("asia-northeast2")
val AsiaNorthEast3 = LocationId("asia-northeast3")

val AsiaSouthEast1 = LocationId("asia-southeast1")
val AsiaSouthEast2 = LocationId("asia-southeast2")

val AustraliaSouthEast1 = LocationId("australia-southeast1")
val AustraliaSouthEast2 = LocationId("australia-southeast2")
}
20 changes: 20 additions & 0 deletions core/src/main/scala/no/nrk/bigquery/ProjectId.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package no.nrk.bigquery

import java.util.regex.Pattern

/* From https://cloud.google.com/resource-manager/docs/creating-managing-projects#before_you_begin:
*
* The project ID must be a unique string of 6 to 30 lowercase letters, digits, or hyphens. It must start with a
* letter, and cannot have a trailing hyphen.
*/
final case class ProjectId private[bigquery] (value: String) extends AnyVal

object ProjectId {
private val regex: Pattern = "^[a-z][a-z0-9-]{5,29}(?<!-)".r.pattern
def fromString(input: String): Either[String, ProjectId] =
if (regex.matcher(input).matches()) Right(new ProjectId(input))
else Left(s"invalid project ID '$input' - must match ${regex.pattern()}")

def unsafeFromString(input: String): ProjectId =
fromString(input).fold(err => throw new IllegalArgumentException(err), identity)
}
26 changes: 26 additions & 0 deletions core/src/test/scala/no/nrk/bigquery/BQDatasetTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package no.nrk.bigquery

import org.scalacheck._

class BQDatasetTest extends munit.ScalaCheckSuite {
val project = ProjectId.unsafeFromString("test-123")

property("valid dataset") {
Prop.forAll(Generators.validDatasetIdGen) { (ident: String) =>
assertEquals(BQDataset.of(project, ident), Right(BQDataset(project, ident, None)))
}
}

property("invalid dataset") {
val gen = for {
sep <- Gen.oneOf("", "-", "$", "@", ".")
alpha <-
if (sep.nonEmpty) Generators.shorterThanAlphaNum(1024).filterNot(_.isEmpty).map(_ + sep)
else Gen.stringOfN(1025, Gen.alphaNumChar)
} yield alpha

Prop.forAll(gen) { (ident: String) =>
BQDataset.of(project, ident).isLeft
}
}
}
2 changes: 1 addition & 1 deletion core/src/test/scala/no/nrk/bigquery/BQSqlFragTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ class BQSqlFragTest extends FunSuite {
val partitionField = BQField("partitionDate", StandardSQLTypeName.DATE, Mode.REQUIRED)

BQTableDef.Table(
BQTableId.of(ProjectId("foo"), "bar", name),
BQTableId.unsafeOf(BQDataset.unsafeOf(ProjectId("foo"), "bar"), name),
BQSchema.of(partitionField, BQField("num", StandardSQLTypeName.FLOAT64, Mode.REQUIRED)),
BQPartitionType.DatePartitioned(partitionField.ident)
)
Expand Down
23 changes: 23 additions & 0 deletions core/src/test/scala/no/nrk/bigquery/BQTableIdTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package no.nrk.bigquery

import org.scalacheck.Prop

class BQTableIdTest extends munit.ScalaCheckSuite {

val dataset = BQDataset.unsafeOf(ProjectId.unsafeFromString("com-example"), "test")

property("valid tableId") {
Prop.forAll(Generators.validTableIdGen) { (input: String) =>
val obtained = BQTableId.of(dataset, input)
assertEquals(obtained, Right(BQTableId(dataset, input)))
}
}

property("fromString") {
Prop.forAll(Generators.validProjectIdGen, Generators.validDatasetIdGen, Generators.validTableIdGen) {
(project: String, dataset: String, table: String) =>
val obtained = BQTableId.fromString(s"${project}.${dataset}.${table}")
assertEquals(obtained, Right(BQTableId(BQDataset(ProjectId(project), dataset, None), table)))
}
}
}
39 changes: 39 additions & 0 deletions core/src/test/scala/no/nrk/bigquery/Generators.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package no.nrk.bigquery

import org.scalacheck.{Arbitrary, Gen}

object Generators {
def shorterThan(n: Int, gen: Gen[String]): Gen[String] =
gen.map(str => if (str.length < n) str else str.substring(0, n))

def shorterThanAlphaNum(n: Int): Gen[String] = shorterThan(n, Gen.alphaNumStr)

val unicodeIdentifierPart = Arbitrary.arbChar.arbitrary
.filter(c => Character.isUnicodeIdentifierPart(c) && !Character.isWhitespace(c))

val validProjectIdGen = for {
firstchar <- Gen.stringOfN(1, Gen.alphaLowerChar)
maybedash <- Gen.oneOf("", "-")
alphaNumLower = Gen.oneOf(Gen.alphaLowerChar, Gen.numChar)
afterdash <- Gen.stringOfN(6 - maybedash.length, alphaNumLower)
lastChars <- Generators
.shorterThan(29 - (1 + afterdash.length + maybedash.length), Gen.stringOf(alphaNumLower))
} yield firstchar + maybedash + afterdash + lastChars

val validDatasetIdGen = for {
alpha <- Generators.shorterThanAlphaNum(1021).filterNot(_.isEmpty)
underscore <- Gen.oneOf("", "_")
alpha2 <- Generators.shorterThanAlphaNum(2).filterNot(_.isEmpty)
} yield alpha + underscore + alpha2

val validTableIdGen = for {
first <- Gen
.stringOfN(1, unicodeIdentifierPart)
.map(_.replaceAll("(?U)\\W", ""))
.map(s => if (s.isEmpty) "a" else s)
choose <- Gen.oneOf("", " ", "-", "_")
next <- Generators.shorterThan(
1022,
Gen.stringOf(unicodeIdentifierPart).map(_.replaceAll("(?U)\\W", "")).filter(_.nonEmpty))
} yield first + choose + next
}
21 changes: 21 additions & 0 deletions core/src/test/scala/no/nrk/bigquery/ProjectIdTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package no.nrk.bigquery

import org.scalacheck.Prop

class ProjectIdTest extends munit.ScalaCheckSuite {
test("invalid projectId") {
assert(ProjectId.fromString("abc-def-").isLeft)
assert(ProjectId.fromString("1abcdef_").isLeft)
assert(ProjectId.fromString("1fffffff").isLeft)
assert(ProjectId.fromString("zxyvx").isLeft)
assert(ProjectId.fromString("1zxyvxx").isLeft)
assert(ProjectId.fromString("zxyvxxZ").isLeft)
assert(ProjectId.fromString("abcdefghijklmnopqrstuvwxyz12345").isLeft)
}

property("valid projectId") {
Prop.forAll(Generators.validProjectIdGen) { (input: String) =>
assertEquals(ProjectId.fromString(input), Right(ProjectId.apply(input)))
}
}
}
Loading