From 7a33cb48793d42a330fc2810836adf6603119a60 Mon Sep 17 00:00:00 2001 From: Ingar Abrahamsen Date: Wed, 23 Aug 2023 15:14:50 +0200 Subject: [PATCH] fix: check fields by name when we can In some cases we do not care about the order of the fields as long as the name and type matches. --- .../main/scala/no/nrk/bigquery/conforms.scala | 43 ++++++++++-- .../internal/TableUpdateOperation.scala | 3 +- .../scala/no/nrk/bigquery/conformsTest.scala | 67 +++++++++++++++++++ .../no/nrk/bigquery/testing/BQSmokeTest.scala | 2 +- 4 files changed, 106 insertions(+), 9 deletions(-) create mode 100644 core/src/test/scala/no/nrk/bigquery/conformsTest.scala diff --git a/core/src/main/scala/no/nrk/bigquery/conforms.scala b/core/src/main/scala/no/nrk/bigquery/conforms.scala index b314c486..60dda2ff 100644 --- a/core/src/main/scala/no/nrk/bigquery/conforms.scala +++ b/core/src/main/scala/no/nrk/bigquery/conforms.scala @@ -34,7 +34,8 @@ object conforms { apply( actualSchema.copy(fields = actualSchema.fields.map(anonymize)), - givenSchema + givenSchema, + checkIndex = true ) } @@ -60,17 +61,18 @@ object conforms { case other => BQSchema.of(other) } - apply(actualSchema, givenSchema) + apply(actualSchema, givenSchema, checkIndex = false) } - /* this is a stronger comparison than `onlyTypes`, because it takes into column names as well */ + /** this is a stronger comparison than `onlyTypes`, because it takes into column names as well */ def apply( actualSchema: BQSchema, - givenSchema: BQSchema + givenSchema: BQSchema, + checkIndex: Boolean ): Option[List[String]] = { val reasonsBuilder = List.newBuilder[String] - def go( + def goByIndex( path: List[BQField], actualFields: Seq[BQField], givenFields: Seq[BQField] @@ -90,7 +92,7 @@ object conforms { case Some(givenField) if (givenField.mode == Mode.REPEATED) != (actualField.mode == Mode.REPEATED) => reasonsBuilder += s"Expected ${render(actualField)} to have mode ${actualField.mode}, got ${givenField.mode}" case Some(givenField) if givenField.subFields.nonEmpty => - go(givenField :: path, actualField.subFields, givenField.subFields) + goByIndex(givenField :: path, actualField.subFields, givenField.subFields) case Some(ok @ _) => () case None => @@ -98,7 +100,34 @@ object conforms { } } - go(Nil, actualSchema.fields, givenSchema.fields) + def goByName( + path: List[BQField], + actualFields: Seq[BQField], + givenFields: Seq[BQField] + ): Unit = + actualFields.foreach { actualField => + val givenFieldOpt = givenFields.find(_.name == actualField.name) + + // if we're inside structs, render the full path + def render(f: BQField) = + s"field `${(f :: path).reverse.map(_.name).mkString(".")}`" + + givenFieldOpt match { + case Some(givenField) if givenField.tpe != actualField.tpe => + reasonsBuilder += s"Expected ${render(actualField)} to have type ${actualField.tpe}, got ${givenField.tpe}" + case Some(givenField) if (givenField.mode == Mode.REPEATED) != (actualField.mode == Mode.REPEATED) => + reasonsBuilder += s"Expected ${render(actualField)} to have mode ${actualField.mode}, got ${givenField.mode}" + case Some(givenField) if givenField.subFields.nonEmpty => + goByName(givenField :: path, actualField.subFields, givenField.subFields) + case Some(ok @ _) => + () + case None => + reasonsBuilder += s"Expected ${render(actualField)} to part of [${givenFields.map(_.name).mkString(", ")}]" + } + } + + if (checkIndex) goByIndex(Nil, actualSchema.fields, givenSchema.fields) + else goByName(Nil, actualSchema.fields, givenSchema.fields) reasonsBuilder.result() match { case Nil => None diff --git a/core/src/main/scala/no/nrk/bigquery/internal/TableUpdateOperation.scala b/core/src/main/scala/no/nrk/bigquery/internal/TableUpdateOperation.scala index 07186b86..10cd89b7 100644 --- a/core/src/main/scala/no/nrk/bigquery/internal/TableUpdateOperation.scala +++ b/core/src/main/scala/no/nrk/bigquery/internal/TableUpdateOperation.scala @@ -41,7 +41,8 @@ object TableUpdateOperation { val illegalSchemaExtension: Option[UpdateOperation.IllegalSchemaExtension] = conforms( actualSchema = remoteAsTableDef.schema, - givenSchema = localTableDef.schema + givenSchema = localTableDef.schema, + checkIndex = true ).map { reasons => UpdateOperation.IllegalSchemaExtension( TableDefOperationMeta(existingRemoteTable, tableDef), diff --git a/core/src/test/scala/no/nrk/bigquery/conformsTest.scala b/core/src/test/scala/no/nrk/bigquery/conformsTest.scala new file mode 100644 index 00000000..2a9f2911 --- /dev/null +++ b/core/src/test/scala/no/nrk/bigquery/conformsTest.scala @@ -0,0 +1,67 @@ +package no.nrk.bigquery + +import com.google.cloud.bigquery.{Field, StandardSQLTypeName} +import munit.FunSuite + +class conformsTest extends FunSuite { + test("failing check by index") { + val res = conforms( + BQSchema.of( + BQField("one", StandardSQLTypeName.STRING, Field.Mode.NULLABLE), + BQField("two", StandardSQLTypeName.INT64, Field.Mode.NULLABLE) + ), + BQSchema.of( + BQField("one", StandardSQLTypeName.INT64, Field.Mode.NULLABLE), + BQField("two", StandardSQLTypeName.STRING, Field.Mode.NULLABLE) + ), + checkIndex = true + ) + assertEquals(res.map(_.length), Some(2), clue(res.map(_.mkString(", ")))) + } + + test("passing check by index") { + val res = conforms( + BQSchema.of( + BQField("one", StandardSQLTypeName.STRING, Field.Mode.NULLABLE), + BQField("two", StandardSQLTypeName.INT64, Field.Mode.NULLABLE) + ), + BQSchema.of( + BQField("one", StandardSQLTypeName.STRING, Field.Mode.NULLABLE), + BQField("two", StandardSQLTypeName.INT64, Field.Mode.NULLABLE) + ), + checkIndex = true + ) + assertEquals(res.map(_.length), None, clue(res.map(_.mkString(", ")))) + } + + test("passing check by name") { + val res = conforms( + BQSchema.of( + BQField("one", StandardSQLTypeName.STRING, Field.Mode.NULLABLE), + BQField("two", StandardSQLTypeName.INT64, Field.Mode.NULLABLE) + ), + BQSchema.of( + BQField("two", StandardSQLTypeName.INT64, Field.Mode.NULLABLE), + BQField("one", StandardSQLTypeName.STRING, Field.Mode.NULLABLE) + ), + checkIndex = false + ) + assertEquals(res.map(_.length), None, clue(res.map(_.mkString(", ")))) + } + + test("failing check by name") { + val res = conforms( + BQSchema.of( + BQField("one", StandardSQLTypeName.STRING, Field.Mode.NULLABLE), + BQField("two", StandardSQLTypeName.INT64, Field.Mode.NULLABLE) + ), + BQSchema.of( + BQField("two", StandardSQLTypeName.STRING, Field.Mode.NULLABLE), + BQField("one", StandardSQLTypeName.INT64, Field.Mode.NULLABLE) + ), + checkIndex = false + ) + assertEquals(res.map(_.length), Some(2), clue(res.map(_.mkString(", ")))) + } + +} diff --git a/testing/src/main/scala/no/nrk/bigquery/testing/BQSmokeTest.scala b/testing/src/main/scala/no/nrk/bigquery/testing/BQSmokeTest.scala index f2ea84ee..902e393e 100644 --- a/testing/src/main/scala/no/nrk/bigquery/testing/BQSmokeTest.scala +++ b/testing/src/main/scala/no/nrk/bigquery/testing/BQSmokeTest.scala @@ -291,7 +291,7 @@ object BQSmokeTest { def checkSchema(actualSchema: BQSchema): Unit = this match { case CheckType.Schema(expectedSchema) => - conforms(actualSchema, expectedSchema) match { + conforms(actualSchema, expectedSchema, checkIndex = true) match { case Some(reasons) => fail(s"Failed because ${reasons.mkString(", ")}", TypeClue(expectedSchema, actualSchema)) case None => assert(true)