Skip to content

Commit

Permalink
fix: check fields by name when we can
Browse files Browse the repository at this point in the history
In some cases we do not care about the order of the fields
as long as the name and type matches.
  • Loading branch information
ingarabr committed Aug 24, 2023
1 parent 5dc2775 commit 7a4318f
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 13 deletions.
48 changes: 44 additions & 4 deletions core/src/main/scala/no/nrk/bigquery/conforms.scala
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ object conforms {
case other => BQSchema.of(other)
}

apply(
onlyTypes(
actualSchema.copy(fields = actualSchema.fields.map(anonymize)),
givenSchema
)
Expand Down Expand Up @@ -60,11 +60,10 @@ object conforms {
case other => BQSchema.of(other)
}

apply(actualSchema, givenSchema)
typesAndName(actualSchema, givenSchema)
}

/* this is a stronger comparison than `onlyTypes`, because it takes into column names as well */
def apply(
def onlyTypes(
actualSchema: BQSchema,
givenSchema: BQSchema
): Option[List[String]] = {
Expand Down Expand Up @@ -105,4 +104,45 @@ object conforms {
case reasons => Some(reasons)
}
}

/** this is a stronger comparison than `onlyTypes`, because it takes into column names as well */
def typesAndName(
actualSchema: BQSchema,
givenSchema: BQSchema
): Option[List[String]] = {
val reasonsBuilder = List.newBuilder[String]

def go(
path: List[BQField],
actualFields: Seq[BQField],
givenFields: Seq[BQField]
): Unit =
actualFields.foreach { actualField =>
val givenFieldOpt = givenFields.find(_.name == actualField.name)

// if we're inside structs, render the full path
def render(f: BQField) =
s"field `${(f :: path).reverse.map(_.name).mkString(".")}`"

givenFieldOpt match {
case Some(givenField) if givenField.tpe != actualField.tpe =>
reasonsBuilder += s"Expected ${render(actualField)} to have type ${actualField.tpe}, got ${givenField.tpe}"
case Some(givenField) if (givenField.mode == Mode.REPEATED) != (actualField.mode == Mode.REPEATED) =>
reasonsBuilder += s"Expected ${render(actualField)} to have mode ${actualField.mode}, got ${givenField.mode}"
case Some(givenField) if givenField.subFields.nonEmpty =>
go(givenField :: path, actualField.subFields, givenField.subFields)
case Some(ok @ _) =>
()
case None =>
reasonsBuilder += s"Expected ${render(actualField)} to part of [${givenFields.map(_.name).mkString(", ")}]"
}
}

go(Nil, actualSchema.fields, givenSchema.fields)

reasonsBuilder.result() match {
case Nil => None
case reasons => Some(reasons)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,17 @@ object TableUpdateOperation {
)

val illegalSchemaExtension: Option[UpdateOperation.IllegalSchemaExtension] =
conforms(
actualSchema = remoteAsTableDef.schema,
givenSchema = localTableDef.schema
).map { reasons =>
UpdateOperation.IllegalSchemaExtension(
TableDefOperationMeta(existingRemoteTable, tableDef),
reasons.mkString(", ")
conforms
.onlyTypes(
actualSchema = remoteAsTableDef.schema,
givenSchema = localTableDef.schema
)
}
.map { reasons =>
UpdateOperation.IllegalSchemaExtension(
TableDefOperationMeta(existingRemoteTable, tableDef),
reasons.mkString(", ")
)
}

if (localTableDef == remoteAsTableDef)
UpdateOperation.Noop(TableDefOperationMeta(existingRemoteTable, localTableDef))
Expand Down
63 changes: 63 additions & 0 deletions core/src/test/scala/no/nrk/bigquery/conformsTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package no.nrk.bigquery

import com.google.cloud.bigquery.{Field, StandardSQLTypeName}
import munit.FunSuite

class conformsTest extends FunSuite {
test("failing conform by type position") {
val violations = conforms.onlyTypes(
BQSchema.of(
BQField("one", StandardSQLTypeName.STRING, Field.Mode.NULLABLE),
BQField("two", StandardSQLTypeName.INT64, Field.Mode.NULLABLE)
),
BQSchema.of(
BQField("one", StandardSQLTypeName.INT64, Field.Mode.NULLABLE),
BQField("two", StandardSQLTypeName.STRING, Field.Mode.NULLABLE)
)
)
assertEquals(violations.map(_.length), Some(2), clue(violations.map(_.mkString(", "))))
}

test("passing conform by type position") {
val violations = conforms.onlyTypes(
BQSchema.of(
BQField("one", StandardSQLTypeName.STRING, Field.Mode.NULLABLE),
BQField("two", StandardSQLTypeName.INT64, Field.Mode.NULLABLE)
),
BQSchema.of(
BQField("one", StandardSQLTypeName.STRING, Field.Mode.NULLABLE),
BQField("two", StandardSQLTypeName.INT64, Field.Mode.NULLABLE)
)
)
assertEquals(violations.map(_.length), None, clue(violations.map(_.mkString(", "))))
}

test("passing conform by name and type") {
val violations = conforms.typesAndName(
BQSchema.of(
BQField("one", StandardSQLTypeName.STRING, Field.Mode.NULLABLE),
BQField("two", StandardSQLTypeName.INT64, Field.Mode.NULLABLE)
),
BQSchema.of(
BQField("two", StandardSQLTypeName.INT64, Field.Mode.NULLABLE),
BQField("one", StandardSQLTypeName.STRING, Field.Mode.NULLABLE)
)
)
assertEquals(violations.map(_.length), None, clue(violations.map(_.mkString(", "))))
}

test("failing conform by name and type") {
val violations = conforms.typesAndName(
BQSchema.of(
BQField("one", StandardSQLTypeName.STRING, Field.Mode.NULLABLE),
BQField("two", StandardSQLTypeName.INT64, Field.Mode.NULLABLE)
),
BQSchema.of(
BQField("two", StandardSQLTypeName.STRING, Field.Mode.NULLABLE),
BQField("one", StandardSQLTypeName.INT64, Field.Mode.NULLABLE)
)
)
assertEquals(violations.map(_.length), Some(2), clue(violations.map(_.mkString(", "))))
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ object BQSmokeTest {
def checkSchema(actualSchema: BQSchema): Unit =
this match {
case CheckType.Schema(expectedSchema) =>
conforms(actualSchema, expectedSchema) match {
conforms.onlyTypes(actualSchema, expectedSchema) match {
case Some(reasons) =>
fail(s"Failed because ${reasons.mkString(", ")}", TypeClue(expectedSchema, actualSchema))
case None => assert(true)
Expand Down

0 comments on commit 7a4318f

Please sign in to comment.