From 463cc1ae256a5e22b9726ae58ea8b651e81e3882 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 18 Oct 2023 09:11:31 -0700 Subject: [PATCH] Make dbinit.sql slightly less order-dependent (#4288) This PR does two things: 1. It avoids checking for the order-dependent NOT NULL constraint in information schema, opting to parse the output of `SHOW CONSTRAINTS` and [information_schema.columna.is_nullable](https://www.cockroachlabs.com/docs/v23.1/information-schema#columns) instead. 2. It re-orders the db_metadata creation to the end of dbinit.sql (mostly for clarity) Fixes https://github.com/oxidecomputer/omicron/issues/4286 --- nexus/tests/integration_tests/schema.rs | 175 ++++++++++++++++-------- schema/crdb/README.adoc | 41 ++---- schema/crdb/dbinit.sql | 60 ++++---- 3 files changed, 153 insertions(+), 123 deletions(-) diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index 6d2595b561..f7d6c1da6a 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -253,7 +253,35 @@ impl<'a> From<&'a [&'static str]> for ColumnSelector<'a> { } } -async fn query_crdb_for_rows_of_strings( +async fn crdb_show_constraints( + crdb: &CockroachInstance, + table: &str, +) -> Vec { + let client = crdb.connect().await.expect("failed to connect"); + + let sql = format!("SHOW CONSTRAINTS FROM {table}"); + let rows = client + .query(&sql, &[]) + .await + .unwrap_or_else(|_| panic!("failed to query {table}")); + client.cleanup().await.expect("cleaning up after wipe"); + + let mut result = vec![]; + for row in rows { + let mut row_result = Row::new(); + for i in 0..row.len() { + let column_name = row.columns()[i].name(); + row_result.values.push(NamedSqlValue { + column: column_name.to_string(), + value: row.get(i), + }); + } + result.push(row_result); + } + result +} + +async fn crdb_select( crdb: &CockroachInstance, columns: ColumnSelector<'_>, table: &str, @@ -453,22 +481,16 @@ async fn versions_have_idempotent_up() { logctx.cleanup_successful(); } -const COLUMNS: [&'static str; 6] = [ +const COLUMNS: [&'static str; 7] = [ "table_catalog", "table_schema", "table_name", "column_name", "column_default", + "is_nullable", "data_type", ]; -const CHECK_CONSTRAINTS: [&'static str; 4] = [ - "constraint_catalog", - "constraint_schema", - "constraint_name", - "check_clause", -]; - const CONSTRAINT_COLUMN_USAGE: [&'static str; 7] = [ "table_catalog", "table_schema", @@ -538,22 +560,9 @@ const PG_INDEXES: [&'static str; 5] = const TABLES: [&'static str; 4] = ["table_catalog", "table_schema", "table_name", "table_type"]; -const TABLE_CONSTRAINTS: [&'static str; 9] = [ - "constraint_catalog", - "constraint_schema", - "constraint_name", - "table_catalog", - "table_schema", - "table_name", - "constraint_type", - "is_deferrable", - "initially_deferred", -]; - #[derive(Eq, PartialEq, Debug)] struct InformationSchema { columns: Vec, - check_constraints: Vec, constraint_column_usage: Vec, key_column_usage: Vec, referential_constraints: Vec, @@ -562,7 +571,7 @@ struct InformationSchema { sequences: Vec, pg_indexes: Vec, tables: Vec, - table_constraints: Vec, + table_constraints: BTreeMap>, } impl InformationSchema { @@ -576,10 +585,6 @@ impl InformationSchema { self.table_constraints, other.table_constraints ); - similar_asserts::assert_eq!( - self.check_constraints, - other.check_constraints - ); similar_asserts::assert_eq!( self.constraint_column_usage, other.constraint_column_usage @@ -602,7 +607,7 @@ impl InformationSchema { // https://www.cockroachlabs.com/docs/v23.1/information-schema // // For details on each of these tables. - let columns = query_crdb_for_rows_of_strings( + let columns = crdb_select( crdb, COLUMNS.as_slice().into(), "information_schema.columns", @@ -610,15 +615,7 @@ impl InformationSchema { ) .await; - let check_constraints = query_crdb_for_rows_of_strings( - crdb, - CHECK_CONSTRAINTS.as_slice().into(), - "information_schema.check_constraints", - None, - ) - .await; - - let constraint_column_usage = query_crdb_for_rows_of_strings( + let constraint_column_usage = crdb_select( crdb, CONSTRAINT_COLUMN_USAGE.as_slice().into(), "information_schema.constraint_column_usage", @@ -626,7 +623,7 @@ impl InformationSchema { ) .await; - let key_column_usage = query_crdb_for_rows_of_strings( + let key_column_usage = crdb_select( crdb, KEY_COLUMN_USAGE.as_slice().into(), "information_schema.key_column_usage", @@ -634,7 +631,7 @@ impl InformationSchema { ) .await; - let referential_constraints = query_crdb_for_rows_of_strings( + let referential_constraints = crdb_select( crdb, REFERENTIAL_CONSTRAINTS.as_slice().into(), "information_schema.referential_constraints", @@ -642,7 +639,7 @@ impl InformationSchema { ) .await; - let views = query_crdb_for_rows_of_strings( + let views = crdb_select( crdb, VIEWS.as_slice().into(), "information_schema.views", @@ -650,7 +647,7 @@ impl InformationSchema { ) .await; - let statistics = query_crdb_for_rows_of_strings( + let statistics = crdb_select( crdb, STATISTICS.as_slice().into(), "information_schema.statistics", @@ -658,7 +655,7 @@ impl InformationSchema { ) .await; - let sequences = query_crdb_for_rows_of_strings( + let sequences = crdb_select( crdb, SEQUENCES.as_slice().into(), "information_schema.sequences", @@ -666,7 +663,7 @@ impl InformationSchema { ) .await; - let pg_indexes = query_crdb_for_rows_of_strings( + let pg_indexes = crdb_select( crdb, PG_INDEXES.as_slice().into(), "pg_indexes", @@ -674,7 +671,7 @@ impl InformationSchema { ) .await; - let tables = query_crdb_for_rows_of_strings( + let tables = crdb_select( crdb, TABLES.as_slice().into(), "information_schema.tables", @@ -682,17 +679,11 @@ impl InformationSchema { ) .await; - let table_constraints = query_crdb_for_rows_of_strings( - crdb, - TABLE_CONSTRAINTS.as_slice().into(), - "information_schema.table_constraints", - Some("table_schema = 'public'"), - ) - .await; + let table_constraints = + Self::show_constraints_all_tables(&tables, crdb).await; Self { columns, - check_constraints, constraint_column_usage, key_column_usage, referential_constraints, @@ -705,6 +696,33 @@ impl InformationSchema { } } + async fn show_constraints_all_tables( + tables: &Vec, + crdb: &CockroachInstance, + ) -> BTreeMap> { + let mut map = BTreeMap::new(); + + for table in tables { + let table = &table.values; + let table_catalog = + table[0].expect("table_catalog").unwrap().as_str(); + let table_schema = + table[1].expect("table_schema").unwrap().as_str(); + let table_name = table[2].expect("table_name").unwrap().as_str(); + let table_type = table[3].expect("table_type").unwrap().as_str(); + + if table_type != "BASE TABLE" { + continue; + } + + let table_name = + format!("{}.{}.{}", table_catalog, table_schema, table_name); + let rows = crdb_show_constraints(crdb, &table_name).await; + map.insert(table_name, rows); + } + map + } + // This would normally be quite an expensive operation, but we expect it'll // at least be slightly cheaper for the freshly populated DB, which // shouldn't have that many records yet. @@ -731,13 +749,9 @@ impl InformationSchema { let table_name = format!("{}.{}.{}", table_catalog, table_schema, table_name); info!(log, "Querying table: {table_name}"); - let rows = query_crdb_for_rows_of_strings( - crdb, - ColumnSelector::Star, - &table_name, - None, - ) - .await; + let rows = + crdb_select(crdb, ColumnSelector::Star, &table_name, None) + .await; info!(log, "Saw data: {rows:?}"); map.insert(table_name, rows); } @@ -1012,6 +1026,47 @@ async fn compare_table_differing_constraint() { ) .await; - assert_ne!(schema1.check_constraints, schema2.check_constraints); + assert_ne!(schema1.table_constraints, schema2.table_constraints); + logctx.cleanup_successful(); +} + +#[tokio::test] +async fn compare_table_differing_not_null_order() { + let config = load_test_config(); + let logctx = LogContext::new( + "compare_table_differing_not_null_order", + &config.pkg.log, + ); + let log = &logctx.log; + + let schema1 = get_information_schema( + log, + " + CREATE DATABASE omicron; + CREATE TABLE omicron.public.pet ( id UUID PRIMARY KEY ); + CREATE TABLE omicron.public.employee ( + id UUID PRIMARY KEY, + name TEXT NOT NULL, + hobbies TEXT + ); + ", + ) + .await; + + let schema2 = get_information_schema( + log, + " + CREATE DATABASE omicron; + CREATE TABLE omicron.public.employee ( + id UUID PRIMARY KEY, + name TEXT NOT NULL, + hobbies TEXT + ); + CREATE TABLE omicron.public.pet ( id UUID PRIMARY KEY ); + ", + ) + .await; + + schema1.pretty_assert_eq(&schema2); logctx.cleanup_successful(); } diff --git a/schema/crdb/README.adoc b/schema/crdb/README.adoc index c15b51e374..f92748f101 100644 --- a/schema/crdb/README.adoc +++ b/schema/crdb/README.adoc @@ -73,38 +73,15 @@ SQL Validation, via Automated Tests: ==== Handling common schema changes -CockroachDB's schema includes a description of all of the database's CHECK -constraints. If a CHECK constraint is anonymous (i.e. it is written simply as -`CHECK ` and not `CONSTRAINT CHECK expression`), CRDB -assigns it a name based on the table and column to which the constraint applies. -The challenge is that CRDB identifies tables and columns using opaque -identifiers whose values depend on the order in which tables and views were -defined in the current database. This means that adding, removing, or renaming -objects needs to be done carefully to preserve the relative ordering of objects -in new databases created by `dbinit.sql` and upgraded databases created by -applying `up.sql` transformations. - -===== Adding new columns with constraints - -Strongly consider naming new constraints (`CONSTRAINT `) to -avoid the problems with anonymous constraints described above. - -===== Adding tables and views - -New tables and views must be added to the end of `dbinit.sql` so that the order -of preceding `CREATE` statements is left unchanged. If your changes fail the -`CHECK` constraints test and you get a constraint name diff like this... - -``` -NamedSqlValue { - column: "constraint_name", - value: Some( - String( -< "4101115737_149_10_not_null", -> "4101115737_148_10_not_null", -``` - -...then you've probably inadvertently added a table or view in the wrong place. +Although CockroachDB's schema includes some opaque internally-generated fields +that are order dependent - such as the names of anonymous CHECK constraints - +our schema comparison tools intentionally ignore these values. As a result, +when performing schema changes, the order of new tables and constraints should +generally not be important. + +As convention, however, we recommend keeping the `db_metadata` file at the end of +`dbinit.sql`, so that the database does not contain a version until it is fully +populated. ==== Adding new source tables to an existing view diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 9f5f78326c..4d0589b3a0 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2513,37 +2513,6 @@ BEGIN; * nothing to ensure it gets bumped when it should be, but it's a start. */ -CREATE TABLE IF NOT EXISTS omicron.public.db_metadata ( - -- There should only be one row of this table for the whole DB. - -- It's a little goofy, but filter on "singleton = true" before querying - -- or applying updates, and you'll access the singleton row. - -- - -- We also add a constraint on this table to ensure it's not possible to - -- access the version of this table with "singleton = false". - singleton BOOL NOT NULL PRIMARY KEY, - time_created TIMESTAMPTZ NOT NULL, - time_modified TIMESTAMPTZ NOT NULL, - -- Semver representation of the DB version - version STRING(64) NOT NULL, - - -- (Optional) Semver representation of the DB version to which we're upgrading - target_version STRING(64), - - CHECK (singleton = true) -); - -INSERT INTO omicron.public.db_metadata ( - singleton, - time_created, - time_modified, - version, - target_version -) VALUES - ( TRUE, NOW(), NOW(), '7.0.0', NULL) -ON CONFLICT DO NOTHING; - - - -- Per-VMM state. CREATE TABLE IF NOT EXISTS omicron.public.vmm ( id UUID PRIMARY KEY, @@ -2590,4 +2559,33 @@ FROM WHERE instance.time_deleted IS NULL AND vmm.time_deleted IS NULL; +CREATE TABLE IF NOT EXISTS omicron.public.db_metadata ( + -- There should only be one row of this table for the whole DB. + -- It's a little goofy, but filter on "singleton = true" before querying + -- or applying updates, and you'll access the singleton row. + -- + -- We also add a constraint on this table to ensure it's not possible to + -- access the version of this table with "singleton = false". + singleton BOOL NOT NULL PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + -- Semver representation of the DB version + version STRING(64) NOT NULL, + + -- (Optional) Semver representation of the DB version to which we're upgrading + target_version STRING(64), + + CHECK (singleton = true) +); + +INSERT INTO omicron.public.db_metadata ( + singleton, + time_created, + time_modified, + version, + target_version +) VALUES + ( TRUE, NOW(), NOW(), '7.0.0', NULL) +ON CONFLICT DO NOTHING; + COMMIT;