From 5a469e26c59bdb7b9c42c2b74043e09ea5e3254f Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Wed, 17 Jul 2024 13:51:59 -0500 Subject: [PATCH 01/48] restoring maria-migrate branch --- MariaDB Migration to PostgreSQL/Dockerfile | 8 + .../requirements.txt | 4 + MariaDB Migration to PostgreSQL/utils.py | 372 ++++++++++++++++++ 3 files changed, 384 insertions(+) create mode 100644 MariaDB Migration to PostgreSQL/Dockerfile create mode 100644 MariaDB Migration to PostgreSQL/requirements.txt create mode 100644 MariaDB Migration to PostgreSQL/utils.py diff --git a/MariaDB Migration to PostgreSQL/Dockerfile b/MariaDB Migration to PostgreSQL/Dockerfile new file mode 100644 index 00000000..cf517dfe --- /dev/null +++ b/MariaDB Migration to PostgreSQL/Dockerfile @@ -0,0 +1,8 @@ +FROM dimitri/pgloader:latest +RUN apt-get update && apt-get install -y postgresql-client +RUN apt-get install -y ca-certificates +WORKDIR /app +COPY . /app + +RUN pip install -r requirements.txt +CMD ["python3", "app.py"] diff --git a/MariaDB Migration to PostgreSQL/requirements.txt b/MariaDB Migration to PostgreSQL/requirements.txt new file mode 100644 index 00000000..59ea34b7 --- /dev/null +++ b/MariaDB Migration to PostgreSQL/requirements.txt @@ -0,0 +1,4 @@ +SQLAlchemy==2.0.7 +PyMySQL==1.1.1 +psycopg2==2.9.9 + diff --git a/MariaDB Migration to PostgreSQL/utils.py b/MariaDB Migration to PostgreSQL/utils.py new file mode 100644 index 00000000..31d8b3ed --- /dev/null +++ b/MariaDB Migration to PostgreSQL/utils.py @@ -0,0 +1,372 @@ +from sqlalchemy import text, create_engine, inspect +from Constants import * +import os +from sqlalchemy.exc import SQLAlchemyError +import time + +""" +Copies table structure and table data from one schema to another schema on the same host. +Command line in cmd.exe language +""" +def pg_dump(): + os.system(f'pg_dump -h {pg_server} -d {pg_db_name} -U {pg_user} -W -F d -f ./postgres_dump') + os.system(f'{pg_pass}') + print('Starting database export........') + return + +def pg_restore(): + os.system(f'pg_dump -h {pg_server} -d {pg_db_name_two} -U {pg_user} -W -F d ./postgres_dump') + os.system(f'{pg_pass}') + return + + +def maria_dump(): + output_file = './maria_dump.sql' + maria_dump_command = [ + 'mysqldump', + '-h', maria_server, + '-d', maria_db_name, + '-u', maria_super_user, + f'-p{maria_super_pass}', + '--ssl-verify-server-cert=false', + '--no-data=false', + '--verbose', + '--result-file=./maria_dump.sql', + ] + os.system(' '.join(maria_dump_command)) + return + + +def maria_restore(): + SQLALCHEMY_DATABASE_URI = f"mysql://{maria_super_user}:{maria_super_pass}@{maria_server}" + engine = create_engine(SQLALCHEMY_DATABASE_URI) + with engine.connect() as conn: + conn.execute(text(f"CREATE DATABASE IF NOT EXISTS {maria_db_name_two};")) + print(f'Database created: {maria_db_name_two} ') + engine.dispose() + maria_restore_input = f'mariadb -h {maria_server} -u {maria_super_user} -p{maria_super_pass} --ssl-verify-server-cert=false ' \ + f'{maria_db_name_two} < ./maria_dump.sql' + + print('Restoring new Maria database....') + os.system(maria_restore_input) + return + + +def get_data_counts_maria(): + SQLALCHEMY_DATABASE_URI = f"mysql+pymysql://{maria_super_user}:" \ + f"{maria_super_pass}@{maria_server}/{maria_db_name_two}" + engine = create_engine(SQLALCHEMY_DATABASE_URI) + maria_rows = {} + maria_columns = {} + + with engine.connect() as conn: + tables_query = text(f"SELECT table_name FROM information_schema.tables WHERE table_schema = '{maria_db_name_two}'" \ + " AND table_type = 'BASE TABLE';") + row_result = conn.execute(tables_query) + maria_tables = [row[0] for row in row_result] + for table in maria_tables: + row_query = text(f"SELECT COUNT(*) FROM {table};") + row_result = conn.execute(row_query) + row_count = row_result.scalar() + maria_rows[table.lower()] = row_count + + column_query = text(f"SELECT COUNT(*) FROM information_schema.columns WHERE table_schema = '{maria_db_name_two}' AND table_name = '{table}';") + column_result = conn.execute(column_query) + column_count = column_result.scalar() + maria_columns[table.lower()] = column_count + + engine.dispose() + return maria_rows, maria_columns + + +def get_data_counts_pg(database_name, username, password, schema): + SQLALCHEMY_DATABASE_URI = f"postgresql://{username}:{password}@{pg_server}/{database_name}" + engine = create_engine(SQLALCHEMY_DATABASE_URI) + pg_rows = {} + pg_columns = {} + + with engine.connect() as conn: + table_query = text(f"SELECT table_name FROM information_schema.tables WHERE table_catalog = '{database_name}'" \ + " AND table_type = 'BASE TABLE'" \ + f" AND table_schema = '{schema}'") + table_result = conn.execute(table_query) + pg_tables = [row[0] for row in table_result] + for table in pg_tables: + row_query = text(f"SELECT COUNT(*) FROM {database_name}.{schema}.{table};") + row_result = conn.execute(row_query) + row_count = row_result.scalar() + pg_rows[table.lower()] = row_count + + column_query = text(f"SELECT COUNT(*) FROM information_schema.columns WHERE table_catalog = '{database_name}' " + f"AND table_schema = '{schema}' AND table_name = '{table}';") + column_result = conn.execute(column_query) + column_count = column_result.scalar() + pg_columns[table.lower()] = column_count + engine.dispose() + return pg_rows, pg_columns + + + + +""" +Compares the data counts between tables, rows, and columns that vary between any two db's +""" +def compare_data_counts(db1_rows, db2_rows, db1_columns, db2_columns, db1, db2): + db1_rows_not_in_db2 = {table_name: (db1_rows[table_name], 0) for table_name in db1_rows if + table_name not in db2_rows} + db2_rows_not_in_db1 = {table_name: (0, db2_rows[table_name]) for table_name in db2_rows if + table_name not in db1_rows} + db1_cols_not_in_db2 = {table_name: (db1_columns[table_name], 0) for table_name in db1_columns if + table_name not in db2_columns} + db2_cols_not_in_db1 = {table_name: (0, db2_columns[table_name]) for table_name in db2_columns if + table_name not in db1_columns} + + if len(db1_rows_not_in_db2) == 0 and len(db2_rows_not_in_db1) == 0: + print(f"\nSuccess! All tables exist in both {db1} and {db2}. Checking row counts....\n") + else: + if len(db1_rows_not_in_db2) > 0: + print(f'\nERROR: {db1} tables that are not in {db2}:\n', [key for key in db1_rows_not_in_db2]) + if len(db2_rows_not_in_db1) > 0: + print(f'\nERROR: {db2} tables that are not in {db1}: \n', [key for key in db2_rows_not_in_db1]) + + row_count_difference = {key: (db1_rows[key], db2_rows[key]) for key in db1_rows if + key in db2_rows and db1_rows[key] != db2_rows[key]} + #row_count_difference.update(db1_rows_not_in_db2) + #row_count_difference.update(db2_rows_not_in_db1) + + col_count_difference = {key: (db1_columns[key], db2_columns[key]) for key in db1_columns if + key in db2_columns and db1_columns[key] != db2_columns[key]} + #col_count_difference.update(db1_cols_not_in_db2) + #col_count_difference.update(db2_cols_not_in_db1) + + if len(row_count_difference) == 0: + print(f"Success! All row counts in all tables are the same in both {db1} and {db2}!\n") + else: + print(f'\nERROR: Row count differences for {len(row_count_difference)} tables in both {db1} and {db2} databases:\n' + f'Table Name: ({db1} Rows, {db2} Rows)\n' + f'{row_count_difference}') + if len(col_count_difference) == 0: + print(f"Success! All column counts in all tables are the same in both {db1} and {db2}!\n") + else: + print(f'\nERROR: Column count differences for {len(col_count_difference)} tables in both {db1} and {db2} databases:\n' + f'Table Name: ({db1} Columns, {db2} Columns)\n' + f'{col_count_difference}') + + return row_count_difference, col_count_difference + + + +#connect pg_loader to external macrostrat_two database rather than schema +#test if mariadb and postgresql connections work in docker container: +#docker interactive: docker run -it --rm --network host dimitri/pgloader /bin/bash +#docker udpate: apt-get update +#docker install postgresql and mysql client: apt-get install -y postgresql-client mysql-client curl +#docker test postgresql connection: psql -h db.development.svc.macrostrat.org -U macrostrat-admin -d macrostrat_two +#manually type in macrostrat-admin password: *@I/TW.-kSY5M,l[o4@9AuU} +#execute query to test connection: select * from pg_catalog.pg_amop +# +def pg_loader_pre_script(): + # Query alters the MariaDB pbdb_matches table by adding a new column for the text data, + # setting the datatype of the new column data to WKT format, + # dropping the old geometry column, + # adding default values for data formats that pgloader accepts + SQLALCHEMY_DATABASE_URI = f"mysql://{maria_super_user}:{maria_super_pass}@{maria_server}/{maria_db_name_two}" + engine = create_engine(SQLALCHEMY_DATABASE_URI) + pre_script_queries = [] + query_pbdb_matches = text(""" + ALTER TABLE macrostrat_temp.pbdb_matches ADD COLUMN coordinate_point_text TEXT; + UPDATE macrostrat_temp.pbdb_matches SET coordinate_point_text = ST_AsText(coordinate); + ALTER TABLE macrostrat_temp.pbdb_matches DROP COLUMN coordinate; + UPDATE macrostrat_temp.pbdb_matches SET release_date = '2000-01-01' WHERE release_date = '0000-00-00 00:00:00';""") + + query_places = text(""" + ALTER TABLE macrostrat_temp.places ADD COLUMN geom_text LONGTEXT; + UPDATE macrostrat_temp.places + SET geom_text = ST_AsText(geom); + ALTER TABLE macrostrat_temp.places DROP COLUMN geom; + """) + + query_refs = text(""" + ALTER TABLE macrostrat_temp.refs ADD COLUMN rgeom_text LONGTEXT; + UPDATE macrostrat_temp.refs + SET rgeom_text = ST_AsText(rgeom); + ALTER TABLE macrostrat_temp.refs DROP COLUMN rgeom; + """) + + query_unit_contacts = text(""" + UPDATE unit_contacts + -- Enum data type can't be null so set to enum option 'below'. + SET contact = 'below' + WHERE contact = ''; + UPDATE unit_contacts + -- enum data type can't be null so set to enum option 'above'. + SET old_contact = 'above' + WHERE old_contact = ''; + """) + + query_cols = text(""" + ALTER TABLE macrostrat_temp.cols ADD COLUMN coordinate_text LONGTEXT; + UPDATE macrostrat_temp.cols + SET coordinate_text = ST_AsText(coordinate); + ALTER TABLE macrostrat_temp.cols DROP COLUMN coordinate; + UPDATE macrostrat_temp.cols + SET created = '2000-01-01' + WHERE created = '0000-00-00 00:00:00'; + """) + + query_col_areas = text(""" + ALTER TABLE macrostrat_temp.col_areas ADD COLUMN col_area_text LONGTEXT; + UPDATE macrostrat_temp.col_areas + SET col_areas.col_area_text = ST_AsText(col_area); + ALTER TABLE macrostrat_temp.col_areas DROP COLUMN col_area; + + """) + + query_col_areas_6April2016 = text(""" + ALTER TABLE macrostrat_temp.col_areas_6April2016 ADD COLUMN col_area_text LONGTEXT; + UPDATE macrostrat_temp.col_areas_6April2016 + SET col_areas_6April2016.col_area_text = ST_AsText(col_area); + ALTER TABLE macrostrat_temp.col_areas_6April2016 DROP COLUMN col_area; + """) + pre_script_queries = [query_pbdb_matches, query_places, query_refs, query_unit_contacts, query_cols, query_col_areas, + query_col_areas_6April2016] + + with engine.connect() as conn: + + for query in pre_script_queries: + try: + result = conn.execute(query) + except Exception as e: + print(f"Error with {query}: {e}") + finally: + print(f"Successfully executed {query}.") + + engine.dispose() + return + + +def pg_loader_post_script(): + # Query alters the MariaDB pbdb_matches table by adding a new column for the text data, + # setting the datatype of the new column data to WKT format, + # dropping the old geometry column, + # adding default values for data formats that pgloader accepts + #vaccuum...refresh postgresql database after pgloader + SQLALCHEMY_DATABASE_URI = f"postgresql://{pg_user_migrate}:{pg_pass_migrate}@{pg_server}/{pg_db_name_two}?sslmode=prefer" + engine = create_engine(SQLALCHEMY_DATABASE_URI) #connect_args={'options': '-csearch_path=public,macrostrat_temp' + + + query_pbdb_matches = text(""" + ALTER TABLE macrostrat_two.macrostrat_temp.pbdb_matches ADD COLUMN coordinate geometry(Point, 4326); + UPDATE macrostrat_two.macrostrat_temp.pbdb_matches SET coordinate = ST_GeomFromText(coordinate_point_text, 4326); + ALTER TABLE macrostrat_two.macrostrat_temp.pbdb_matches DROP COLUMN coordinate_point_text; + SELECT * FROM macrostrat_two.macrostrat_temp.pbdb_matches LIMIT 5;""") + + query_places = text(""" + ALTER TABLE macrostrat_two.macrostrat_temp.places ADD COLUMN geom geometry; + UPDATE macrostrat_two.macrostrat_temp.places SET geom = ST_GeomFromText(geom_text, 4326); + ALTER TABLE macrostrat_two.macrostrat_temp.places DROP COLUMN geom_text; + SELECT * FROM macrostrat_two.macrostrat_temp.places LIMIT 5;""") + + query_refs = text(""" + ALTER TABLE macrostrat_two.macrostrat_temp.refs ADD COLUMN rgeom geometry; + UPDATE macrostrat_two.macrostrat_temp.refs SET rgeom = ST_GeomFromText(rgeom_text, 4326); + ALTER TABLE macrostrat_two.macrostrat_temp.refs DROP COLUMN rgeom_text; + SELECT * FROM macrostrat_two.macrostrat_temp.refs LIMIT 5;""") + + + query_cols = text(""" + ALTER TABLE macrostrat_two.macrostrat_temp.cols ADD COLUMN coordinate geometry; + UPDATE macrostrat_two.macrostrat_temp.cols SET coordinate = ST_GeomFromText(coordinate_text, 4326); + ALTER TABLE macrostrat_two.macrostrat_temp.cols DROP COLUMN coordinate_text; + SELECT * FROM macrostrat_two.macrostrat_temp.cols LIMIT 5;""") + + query_col_areas = text(""" + ALTER TABLE macrostrat_two.macrostrat_temp.col_areas ADD COLUMN col_area geometry; + UPDATE macrostrat_two.macrostrat_temp.col_areas SET col_area = ST_GeomFromText(col_area_text, 4326); + ALTER TABLE macrostrat_two.macrostrat_temp.col_areas DROP COLUMN col_area_text; + SELECT * FROM macrostrat_two.macrostrat_temp.col_areas LIMIT 5;""") + + query_col_areas_6April2016 = text(""" + ALTER TABLE macrostrat_two.macrostrat_temp.col_areas_6April2016 ADD COLUMN col_area geometry; + UPDATE macrostrat_two.macrostrat_temp.col_areas_6April2016 SET col_area = ST_GeomFromText(col_area_text, 4326); + ALTER TABLE macrostrat_two.macrostrat_temp.col_areas_6April2016 DROP COLUMN col_area_text; + SELECT * FROM macrostrat_two.macrostrat_temp.col_areas_6April2016 LIMIT 5;""") + + post_script_queries = [query_pbdb_matches, query_refs, query_cols, query_places, query_col_areas, query_col_areas_6April2016] + print('Starting PostScript execution....') + with engine.connect() as conn: + for query in post_script_queries: + try: + result = conn.execute(query.execution_options(autocommit=True)) + for row in result: + print(row) + except SQLAlchemyError as e: + print(f"Error: {e}") + #rollback the transaction if an error occurs + conn.execute(text("ROLLBACK;")) + + engine.dispose() + return + +def pg_loader(): + """ + Command terminal to run pgloader. Ensure Docker app is running. + """ + dockerfile_content = "FROM dimitri/pgloader:latest\n" \ + "RUN apt-get update && apt-get install -y postgresql-client\n" \ + "RUN apt-get install -y ca-certificates" + with open("Dockerfile", "w") as dockerfile: + dockerfile.write(dockerfile_content) + os.system("docker build -t pgloader-test .") + + + input_command = f"--with \"prefetch rows = 1000\" --verbose " \ + f"mysql://root:{maria_super_pass}@{maria_server}/{maria_db_name_two} " \ + f"postgresql://{pg_user_migrate}:{pg_pass_migrate}@{pg_server}/{pg_db_name_two}?sslmode=prefer"\ + + os.system(f"docker run -i --rm pgloader-test pgloader {input_command}") + return + + +def reset(): + SQLALCHEMY_DATABASE_URI = f"postgresql://{pg_user_migrate}:{pg_pass_migrate}@{pg_server}/{pg_db_name_two}" + pg_engine = create_engine(SQLALCHEMY_DATABASE_URI) + pg_drop_query = text(f"DROP SCHEMA macrostrat_temp CASCADE") # {new_migrate_schema_name} + + with pg_engine.connect() as conn: + conn.execute(pg_drop_query) + pg_engine.dispose() + + SQLALCHEMY_DATABASE_URI = f"mysql+pymysql://{maria_super_user}:" \ + f"{maria_super_pass}@{maria_server}/{maria_db_name_two}" + maria_engine = create_engine(SQLALCHEMY_DATABASE_URI) + maria_drop_query = text(f"DROP DATABASE {maria_db_name_two}") + + with maria_engine.connect() as conn: + conn.execute(maria_drop_query) + maria_engine.dispose() + + + + +if __name__ == "__main__": + #maria_dump() + #maria_restore() + #pg_loader_pre_script() + #pg_loader() + #pg_loader_post_script() + maria_rows, maria_columns = get_data_counts_maria() + pg_rows, pg_columns = get_data_counts_pg(pg_db_name, pg_user, pg_pass_encoded, 'macrostrat') + pg_macrostrat_two_rows, pg_macrostrat_two_columns = get_data_counts_pg(pg_db_name_two, pg_user_migrate, pg_pass_migrate, 'macrostrat_temp') + print('\nMARIADB (db1) comparison to PG MACROSTRAT_TWO (db2). These should be clones. ') + db1 = 'MariaDB' + db2 = 'PG Macrostrat_Two' + row_variance, column_variance = compare_data_counts(maria_rows, pg_macrostrat_two_rows, maria_columns, + pg_macrostrat_two_columns, db1, db2) + print('\nPG MACROSTRAT_TWO (db1 maria db clone) comparison to PG MACROSTRAT (db2). This will show what data ' + 'needs to be moved over from Maria to PG prod.') + db1 = 'PG Macrostrat_Two' + db2 = 'PG Macrostrat' + row_variance_two, column_variance_two = compare_data_counts(pg_macrostrat_two_rows, pg_rows, pg_macrostrat_two_columns, + pg_columns, db1, db2) + #reset() \ No newline at end of file From 677cc4a841878fbafe17cd6c65a8837b74a4a471 Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Wed, 17 Jul 2024 15:56:23 -0500 Subject: [PATCH 02/48] modified .gitignore to ignore specific files --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 033df5fb..3aa15cb7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ .venv __pycache__ +/MariaDB Migration to PostgreSQL/.idea +/MariaDB Migration to PostgreSQL/Constants.py From 871128b6dc00185b4307b86ccaf17c1101a4eed5 Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Mon, 22 Jul 2024 13:46:02 -0500 Subject: [PATCH 03/48] Refactored some code --- .../requirements.txt | 2 +- MariaDB Migration to PostgreSQL/utils.py | 49 +++++++++++++------ 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/MariaDB Migration to PostgreSQL/requirements.txt b/MariaDB Migration to PostgreSQL/requirements.txt index 59ea34b7..21dd2320 100644 --- a/MariaDB Migration to PostgreSQL/requirements.txt +++ b/MariaDB Migration to PostgreSQL/requirements.txt @@ -1,4 +1,4 @@ SQLAlchemy==2.0.7 PyMySQL==1.1.1 psycopg2==2.9.9 - +pandas diff --git a/MariaDB Migration to PostgreSQL/utils.py b/MariaDB Migration to PostgreSQL/utils.py index 31d8b3ed..a7da67ca 100644 --- a/MariaDB Migration to PostgreSQL/utils.py +++ b/MariaDB Migration to PostgreSQL/utils.py @@ -1,3 +1,4 @@ +import pandas as pd from sqlalchemy import text, create_engine, inspect from Constants import * import os @@ -154,6 +155,22 @@ def compare_data_counts(db1_rows, db2_rows, db1_columns, db2_columns, db1, db2): return row_count_difference, col_count_difference +def find_row_variances(database_name_one, schema_one, database_name_two, schema_two, username, password, table): + SQLALCHEMY_DATABASE_URI = f"postgresql://{username}:{password}@{pg_server}/{database_name_one}" + engine = create_engine(SQLALCHEMY_DATABASE_URI) + with engine.connect() as conn: + query = text(f"SELECT * FROM {schema_one}.{table}") + result = conn.execute(query) + df = pd.DataFrame(result) + engine.dispose() + SQLALCHEMY_DATABASE_URI = f"postgresql://{username}:{password}@{pg_server}/{database_name_two}" + engine = create_engine(SQLALCHEMY_DATABASE_URI) + with engine.connect() as conn: + query = text(f"SELECT * FROM {schema_two}.{table}") + result = conn.execute(query) + df_two = pd.DataFrame(result) + engine.dispose() + return df, df_two #connect pg_loader to external macrostrat_two database rather than schema @@ -355,18 +372,20 @@ def reset(): #pg_loader_pre_script() #pg_loader() #pg_loader_post_script() - maria_rows, maria_columns = get_data_counts_maria() - pg_rows, pg_columns = get_data_counts_pg(pg_db_name, pg_user, pg_pass_encoded, 'macrostrat') - pg_macrostrat_two_rows, pg_macrostrat_two_columns = get_data_counts_pg(pg_db_name_two, pg_user_migrate, pg_pass_migrate, 'macrostrat_temp') - print('\nMARIADB (db1) comparison to PG MACROSTRAT_TWO (db2). These should be clones. ') - db1 = 'MariaDB' - db2 = 'PG Macrostrat_Two' - row_variance, column_variance = compare_data_counts(maria_rows, pg_macrostrat_two_rows, maria_columns, - pg_macrostrat_two_columns, db1, db2) - print('\nPG MACROSTRAT_TWO (db1 maria db clone) comparison to PG MACROSTRAT (db2). This will show what data ' - 'needs to be moved over from Maria to PG prod.') - db1 = 'PG Macrostrat_Two' - db2 = 'PG Macrostrat' - row_variance_two, column_variance_two = compare_data_counts(pg_macrostrat_two_rows, pg_rows, pg_macrostrat_two_columns, - pg_columns, db1, db2) - #reset() \ No newline at end of file + #maria_rows, maria_columns = get_data_counts_maria() + #pg_rows, pg_columns = get_data_counts_pg(pg_db_name, pg_user, pg_pass_encoded, 'macrostrat') + #pg_macrostrat_two_rows, pg_macrostrat_two_columns = get_data_counts_pg(pg_db_name_two, pg_user_migrate, pg_pass_migrate, 'macrostrat_temp') + #print('\nMARIADB (db1) comparison to PG MACROSTRAT_TWO (db2). These should be clones. ') + #db1 = 'MariaDB' + #db2 = 'PG Macrostrat_Two' + #row_variance, column_variance = compare_data_counts(maria_rows, pg_macrostrat_two_rows, maria_columns, + #pg_macrostrat_two_columns, db1, db2) + #print('\nPG MACROSTRAT_TWO (db1 maria db clone) comparison to PG MACROSTRAT (db2). This will show what data ' + #'needs to be moved over from Maria to PG prod.') + #db1 = 'PG Macrostrat_Two' + #db2 = 'PG Macrostrat' + #row_variance_two, column_variance_two = compare_data_counts(pg_macrostrat_two_rows, pg_rows, pg_macrostrat_two_columns, + #pg_columns, db1, db2) + #reset() + df, df_two = find_row_variances(pg_db_name, pg_db_name, pg_db_name_two, maria_db_name_two, + pg_user, pg_pass_new, 'cols') \ No newline at end of file From 66c161af2505e98138e3314b2b19ad6201b1aec8 Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Thu, 25 Jul 2024 14:34:19 -0500 Subject: [PATCH 04/48] updated utils.py --- MariaDB Migration to PostgreSQL/utils.py | 170 ++++++++++++----------- 1 file changed, 89 insertions(+), 81 deletions(-) diff --git a/MariaDB Migration to PostgreSQL/utils.py b/MariaDB Migration to PostgreSQL/utils.py index a7da67ca..f75d9d43 100644 --- a/MariaDB Migration to PostgreSQL/utils.py +++ b/MariaDB Migration to PostgreSQL/utils.py @@ -1,34 +1,40 @@ -import pandas as pd -from sqlalchemy import text, create_engine, inspect +from sqlalchemy import text, create_engine from Constants import * import os from sqlalchemy.exc import SQLAlchemyError +import pandas as pd + import time """ Copies table structure and table data from one schema to another schema on the same host. Command line in cmd.exe language """ -def pg_dump(): +def pg_dump(server, user, password, dbname): os.system(f'pg_dump -h {pg_server} -d {pg_db_name} -U {pg_user} -W -F d -f ./postgres_dump') os.system(f'{pg_pass}') print('Starting database export........') return -def pg_restore(): +def pg_restore(server, user, password, dbname): os.system(f'pg_dump -h {pg_server} -d {pg_db_name_two} -U {pg_user} -W -F d ./postgres_dump') os.system(f'{pg_pass}') return -def maria_dump(): +def maria_dump(server, user, password, dbname): + SQLALCHEMY_DATABASE_URI = f"mysql+pymysql://{user}:{password}@{server}/{dbname}" + engine = create_engine(SQLALCHEMY_DATABASE_URI) + with engine.connect() as conn: + conn.execute(text(f"CREATE DATABASE IF NOT EXISTS {maria_db_name_two};")) + engine.dispose() output_file = './maria_dump.sql' maria_dump_command = [ 'mysqldump', - '-h', maria_server, - '-d', maria_db_name, - '-u', maria_super_user, - f'-p{maria_super_pass}', + '-h', server, + '-d', dbname, + '-u', user, + f'-p{password}', '--ssl-verify-server-cert=false', '--no-data=false', '--verbose', @@ -38,15 +44,10 @@ def maria_dump(): return -def maria_restore(): - SQLALCHEMY_DATABASE_URI = f"mysql://{maria_super_user}:{maria_super_pass}@{maria_server}" - engine = create_engine(SQLALCHEMY_DATABASE_URI) - with engine.connect() as conn: - conn.execute(text(f"CREATE DATABASE IF NOT EXISTS {maria_db_name_two};")) - print(f'Database created: {maria_db_name_two} ') - engine.dispose() - maria_restore_input = f'mariadb -h {maria_server} -u {maria_super_user} -p{maria_super_pass} --ssl-verify-server-cert=false ' \ - f'{maria_db_name_two} < ./maria_dump.sql' +def maria_restore(server, user, password, dbname): + + maria_restore_input = f'mariadb -h {server} -u {user} -p{password} --ssl-verify-server-cert=false ' \ + f'{dbname} < ./maria_dump.sql' print('Restoring new Maria database....') os.system(maria_restore_input) @@ -155,6 +156,10 @@ def compare_data_counts(db1_rows, db2_rows, db1_columns, db2_columns, db1, db2): return row_count_difference, col_count_difference + +""" +Script to output dataframes for comparing data between two databases and tables. +""" def find_row_variances(database_name_one, schema_one, database_name_two, schema_two, username, password, table): SQLALCHEMY_DATABASE_URI = f"postgresql://{username}:{password}@{pg_server}/{database_name_one}" engine = create_engine(SQLALCHEMY_DATABASE_URI) @@ -173,44 +178,31 @@ def find_row_variances(database_name_one, schema_one, database_name_two, schema_ return df, df_two -#connect pg_loader to external macrostrat_two database rather than schema -#test if mariadb and postgresql connections work in docker container: -#docker interactive: docker run -it --rm --network host dimitri/pgloader /bin/bash -#docker udpate: apt-get update -#docker install postgresql and mysql client: apt-get install -y postgresql-client mysql-client curl -#docker test postgresql connection: psql -h db.development.svc.macrostrat.org -U macrostrat-admin -d macrostrat_two -#manually type in macrostrat-admin password: *@I/TW.-kSY5M,l[o4@9AuU} -#execute query to test connection: select * from pg_catalog.pg_amop -# def pg_loader_pre_script(): - # Query alters the MariaDB pbdb_matches table by adding a new column for the text data, + # Query alters the MariaDB tables by adding a new column for geom -> text data, # setting the datatype of the new column data to WKT format, # dropping the old geometry column, # adding default values for data formats that pgloader accepts - SQLALCHEMY_DATABASE_URI = f"mysql://{maria_super_user}:{maria_super_pass}@{maria_server}/{maria_db_name_two}" - engine = create_engine(SQLALCHEMY_DATABASE_URI) - pre_script_queries = [] - query_pbdb_matches = text(""" - ALTER TABLE macrostrat_temp.pbdb_matches ADD COLUMN coordinate_point_text TEXT; - UPDATE macrostrat_temp.pbdb_matches SET coordinate_point_text = ST_AsText(coordinate); - ALTER TABLE macrostrat_temp.pbdb_matches DROP COLUMN coordinate; - UPDATE macrostrat_temp.pbdb_matches SET release_date = '2000-01-01' WHERE release_date = '0000-00-00 00:00:00';""") + query_pbdb_matches = """ALTER TABLE macrostrat_temp.pbdb_matches ADD COLUMN coordinate_point_text TEXT; + UPDATE macrostrat_temp.pbdb_matches SET coordinate_point_text = ST_AsText(coordinate); + ALTER TABLE macrostrat_temp.pbdb_matches DROP COLUMN coordinate; + UPDATE macrostrat_temp.pbdb_matches SET release_date = '2000-01-01' WHERE release_date = '0000-00-00 00:00:00';""" - query_places = text(""" + query_places = """ ALTER TABLE macrostrat_temp.places ADD COLUMN geom_text LONGTEXT; UPDATE macrostrat_temp.places SET geom_text = ST_AsText(geom); ALTER TABLE macrostrat_temp.places DROP COLUMN geom; - """) + """ - query_refs = text(""" + query_refs = """ ALTER TABLE macrostrat_temp.refs ADD COLUMN rgeom_text LONGTEXT; UPDATE macrostrat_temp.refs SET rgeom_text = ST_AsText(rgeom); ALTER TABLE macrostrat_temp.refs DROP COLUMN rgeom; - """) + """ - query_unit_contacts = text(""" + query_unit_contacts = """ UPDATE unit_contacts -- Enum data type can't be null so set to enum option 'below'. SET contact = 'below' @@ -219,47 +211,60 @@ def pg_loader_pre_script(): -- enum data type can't be null so set to enum option 'above'. SET old_contact = 'above' WHERE old_contact = ''; - """) + """ - query_cols = text(""" - ALTER TABLE macrostrat_temp.cols ADD COLUMN coordinate_text LONGTEXT; + query_cols = """ + ALTER TABLE macrostrat_temp.cols ADD COLUMN coordinate_text LONGTEXT; UPDATE macrostrat_temp.cols SET coordinate_text = ST_AsText(coordinate); ALTER TABLE macrostrat_temp.cols DROP COLUMN coordinate; UPDATE macrostrat_temp.cols SET created = '2000-01-01' WHERE created = '0000-00-00 00:00:00'; - """) + """ - query_col_areas = text(""" - ALTER TABLE macrostrat_temp.col_areas ADD COLUMN col_area_text LONGTEXT; + query_col_areas = """ + ALTER TABLE macrostrat_temp.col_areas ADD COLUMN col_area_text LONGTEXT; UPDATE macrostrat_temp.col_areas SET col_areas.col_area_text = ST_AsText(col_area); ALTER TABLE macrostrat_temp.col_areas DROP COLUMN col_area; - """) + """ - query_col_areas_6April2016 = text(""" - ALTER TABLE macrostrat_temp.col_areas_6April2016 ADD COLUMN col_area_text LONGTEXT; + query_col_areas_6April2016 = """ + ALTER TABLE macrostrat_temp.col_areas_6April2016 ADD COLUMN col_area_text LONGTEXT; UPDATE macrostrat_temp.col_areas_6April2016 SET col_areas_6April2016.col_area_text = ST_AsText(col_area); ALTER TABLE macrostrat_temp.col_areas_6April2016 DROP COLUMN col_area; - """) + """ pre_script_queries = [query_pbdb_matches, query_places, query_refs, query_unit_contacts, query_cols, query_col_areas, query_col_areas_6April2016] + URL = f"mysql+pymysql://{maria_super_user}:{maria_super_pass}@{maria_server}/{maria_db_name_two}" + engine = create_engine(URL) with engine.connect() as conn: - for query in pre_script_queries: - try: - result = conn.execute(query) - except Exception as e: - print(f"Error with {query}: {e}") - finally: - print(f"Successfully executed {query}.") - + statements = query.split(';') + for statement in statements: + if statement.strip(): + try: + conn.execute(text(statement)) + print(f"Successfully executed: {statement}") + except Exception as e: + print(f"Error with statement: {statement}\n{e}") engine.dispose() return +''' + #create db, create temp user before pgloader + URL = f"postgresql://{pg_user}:{pg_pass_new}@{pg_server}/{pg_db_name}" + pg_engine = create_engine(URL) + with pg_engine.connect() as conn: + conn.execute(text(f"CREATE DATABASE {pg_db_name_two}")) + conn.execute(text(f"DROP USER IF EXISTS {pg_user_maria_temp};")) + conn.execute(text(f"CREATE USER maria_migrate WITH PASSWORD '{pg_pass_maria_temp}'")) + conn.execute(text(f"GRANT CONNECT ON DATABASE {pg_db_name_two} TO {pg_user_maria_temp};")) + pg_engine.dispose()''' + def pg_loader_post_script(): @@ -268,6 +273,7 @@ def pg_loader_post_script(): # dropping the old geometry column, # adding default values for data formats that pgloader accepts #vaccuum...refresh postgresql database after pgloader + #CREATE EXTENSION IF NOT EXISTS postgis; SQLALCHEMY_DATABASE_URI = f"postgresql://{pg_user_migrate}:{pg_pass_migrate}@{pg_server}/{pg_db_name_two}?sslmode=prefer" engine = create_engine(SQLALCHEMY_DATABASE_URI) #connect_args={'options': '-csearch_path=public,macrostrat_temp' @@ -339,14 +345,15 @@ def pg_loader(): input_command = f"--with \"prefetch rows = 1000\" --verbose " \ f"mysql://root:{maria_super_pass}@{maria_server}/{maria_db_name_two} " \ - f"postgresql://{pg_user_migrate}:{pg_pass_migrate}@{pg_server}/{pg_db_name_two}?sslmode=prefer"\ + f"postgresql://{pg_user_migrate}:{pg_pass_migrate}@{pg_server}/{pg_db_name_two}?sslmode=prefer" + print(input_command) os.system(f"docker run -i --rm pgloader-test pgloader {input_command}") return def reset(): - SQLALCHEMY_DATABASE_URI = f"postgresql://{pg_user_migrate}:{pg_pass_migrate}@{pg_server}/{pg_db_name_two}" + SQLALCHEMY_DATABASE_URI = f"{pg_user_maria_temp}:{pg_pass_maria_temp}@{pg_server}/{pg_db_name_two}" pg_engine = create_engine(SQLALCHEMY_DATABASE_URI) pg_drop_query = text(f"DROP SCHEMA macrostrat_temp CASCADE") # {new_migrate_schema_name} @@ -367,25 +374,26 @@ def reset(): if __name__ == "__main__": - #maria_dump() - #maria_restore() - #pg_loader_pre_script() + #maria_dump(maria_server, maria_super_user, maria_super_pass, maria_db_name) + #maria_restore(maria_server, maria_super_user, maria_super_pass, maria_db_name_two) + pg_loader_pre_script() #pg_loader() - #pg_loader_post_script() - #maria_rows, maria_columns = get_data_counts_maria() - #pg_rows, pg_columns = get_data_counts_pg(pg_db_name, pg_user, pg_pass_encoded, 'macrostrat') - #pg_macrostrat_two_rows, pg_macrostrat_two_columns = get_data_counts_pg(pg_db_name_two, pg_user_migrate, pg_pass_migrate, 'macrostrat_temp') - #print('\nMARIADB (db1) comparison to PG MACROSTRAT_TWO (db2). These should be clones. ') - #db1 = 'MariaDB' - #db2 = 'PG Macrostrat_Two' - #row_variance, column_variance = compare_data_counts(maria_rows, pg_macrostrat_two_rows, maria_columns, - #pg_macrostrat_two_columns, db1, db2) - #print('\nPG MACROSTRAT_TWO (db1 maria db clone) comparison to PG MACROSTRAT (db2). This will show what data ' - #'needs to be moved over from Maria to PG prod.') - #db1 = 'PG Macrostrat_Two' - #db2 = 'PG Macrostrat' - #row_variance_two, column_variance_two = compare_data_counts(pg_macrostrat_two_rows, pg_rows, pg_macrostrat_two_columns, - #pg_columns, db1, db2) + pg_loader_post_script() + maria_rows, maria_columns = get_data_counts_maria() + pg_rows, pg_columns = get_data_counts_pg(pg_db_name, pg_user, pg_pass_new, 'macrostrat') + pg_macrostrat_two_rows, pg_macrostrat_two_columns = get_data_counts_pg(pg_db_name_two, pg_user_migrate, pg_pass_migrate, 'macrostrat_temp') + + print('\nMARIADB (db1) comparison to PG MACROSTRAT_TWO (db2). These should be clones. ') + db1 = 'MariaDB' + db2 = 'PG Macrostrat_Two' + row_variance, column_variance = compare_data_counts(maria_rows, pg_macrostrat_two_rows, maria_columns, + pg_macrostrat_two_columns, db1, db2) + print('\nPG MACROSTRAT_TWO (db1 maria db clone) comparison to PG MACROSTRAT (db2). This will show what data ' + 'needs to be moved over from Maria to PG prod.') + db1 = 'PG Macrostrat_Two' + db2 = 'PG Macrostrat' + row_variance_two, column_variance_two = compare_data_counts(pg_macrostrat_two_rows, pg_rows, pg_macrostrat_two_columns, + pg_columns, db1, db2) #reset() - df, df_two = find_row_variances(pg_db_name, pg_db_name, pg_db_name_two, maria_db_name_two, - pg_user, pg_pass_new, 'cols') \ No newline at end of file + #df, df_two = find_row_variances(pg_db_name, pg_db_name, pg_db_name_two, maria_db_name_two, + #pg_user, pg_pass_new, 'cols') From b2700bdb4be30e78ca020fc56dde562c59dd7b46 Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Thu, 25 Jul 2024 14:41:56 -0500 Subject: [PATCH 05/48] added liths prescript query --- MariaDB Migration to PostgreSQL/utils.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/MariaDB Migration to PostgreSQL/utils.py b/MariaDB Migration to PostgreSQL/utils.py index f75d9d43..72b4c3f0 100644 --- a/MariaDB Migration to PostgreSQL/utils.py +++ b/MariaDB Migration to PostgreSQL/utils.py @@ -237,8 +237,14 @@ def pg_loader_pre_script(): SET col_areas_6April2016.col_area_text = ST_AsText(col_area); ALTER TABLE macrostrat_temp.col_areas_6April2016 DROP COLUMN col_area; """ + + query_liths = """ + UPDATE macrostrat_temp.liths + SET macrostrat_temp.lith_group = null + where macrostrat_temp.lith_group = '';""" + pre_script_queries = [query_pbdb_matches, query_places, query_refs, query_unit_contacts, query_cols, query_col_areas, - query_col_areas_6April2016] + query_col_areas_6April2016, query_liths] URL = f"mysql+pymysql://{maria_super_user}:{maria_super_pass}@{maria_server}/{maria_db_name_two}" engine = create_engine(URL) @@ -376,9 +382,9 @@ def reset(): if __name__ == "__main__": #maria_dump(maria_server, maria_super_user, maria_super_pass, maria_db_name) #maria_restore(maria_server, maria_super_user, maria_super_pass, maria_db_name_two) - pg_loader_pre_script() + #pg_loader_pre_script() #pg_loader() - pg_loader_post_script() + #pg_loader_post_script() maria_rows, maria_columns = get_data_counts_maria() pg_rows, pg_columns = get_data_counts_pg(pg_db_name, pg_user, pg_pass_new, 'macrostrat') pg_macrostrat_two_rows, pg_macrostrat_two_columns = get_data_counts_pg(pg_db_name_two, pg_user_migrate, pg_pass_migrate, 'macrostrat_temp') From b4da9678479401cf6fbcc5d83e7be77d21e9dce6 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Thu, 25 Jul 2024 16:42:56 -0500 Subject: [PATCH 06/48] Moved entire mariadb_migration system to subdirectory --- .gitignore | 2 -- .../macrostrat/cli/database/mariadb_migration}/Dockerfile | 0 cli/macrostrat/cli/database/mariadb_migration/__init__.py | 0 .../macrostrat/cli/database/mariadb_migration}/requirements.txt | 0 .../macrostrat/cli/database/mariadb_migration}/utils.py | 0 5 files changed, 2 deletions(-) rename {MariaDB Migration to PostgreSQL => cli/macrostrat/cli/database/mariadb_migration}/Dockerfile (100%) create mode 100644 cli/macrostrat/cli/database/mariadb_migration/__init__.py rename {MariaDB Migration to PostgreSQL => cli/macrostrat/cli/database/mariadb_migration}/requirements.txt (100%) rename {MariaDB Migration to PostgreSQL => cli/macrostrat/cli/database/mariadb_migration}/utils.py (100%) diff --git a/.gitignore b/.gitignore index 3aa15cb7..033df5fb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,2 @@ .venv __pycache__ -/MariaDB Migration to PostgreSQL/.idea -/MariaDB Migration to PostgreSQL/Constants.py diff --git a/MariaDB Migration to PostgreSQL/Dockerfile b/cli/macrostrat/cli/database/mariadb_migration/Dockerfile similarity index 100% rename from MariaDB Migration to PostgreSQL/Dockerfile rename to cli/macrostrat/cli/database/mariadb_migration/Dockerfile diff --git a/cli/macrostrat/cli/database/mariadb_migration/__init__.py b/cli/macrostrat/cli/database/mariadb_migration/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/MariaDB Migration to PostgreSQL/requirements.txt b/cli/macrostrat/cli/database/mariadb_migration/requirements.txt similarity index 100% rename from MariaDB Migration to PostgreSQL/requirements.txt rename to cli/macrostrat/cli/database/mariadb_migration/requirements.txt diff --git a/MariaDB Migration to PostgreSQL/utils.py b/cli/macrostrat/cli/database/mariadb_migration/utils.py similarity index 100% rename from MariaDB Migration to PostgreSQL/utils.py rename to cli/macrostrat/cli/database/mariadb_migration/utils.py From 28c062a9cacd1cd664d2c360d6ebfbe9249d29aa Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Thu, 25 Jul 2024 16:55:57 -0500 Subject: [PATCH 07/48] Incorporated legacy migration command into new directory --- cli/macrostrat/cli/database/__init__.py | 48 ++----------------- .../database/mariadb_migration/__init__.py | 40 ++++++++++++++++ 2 files changed, 45 insertions(+), 43 deletions(-) diff --git a/cli/macrostrat/cli/database/__init__.py b/cli/macrostrat/cli/database/__init__.py index 878bfdf5..eeae5bad 100644 --- a/cli/macrostrat/cli/database/__init__.py +++ b/cli/macrostrat/cli/database/__init__.py @@ -2,27 +2,24 @@ from pathlib import Path from sys import exit, stderr, stdin from typing import Any, Callable -from urllib.parse import quote import typer from macrostrat.database import Database from macrostrat.utils.shell import run from pydantic import BaseModel from rich import print -from sqlalchemy import create_engine, text -from sqlalchemy_utils import create_database +from sqlalchemy import text from typer import Argument, Option from .migrations import run_migrations from macrostrat.core import MacrostratSubsystem, app from macrostrat.core.utils import is_pg_url +from .mariadb_migration import import_mariadb from .._dev.utils import ( - _create_database_if_not_exists, - _docker_local_run_args, raw_database_url, ) -from ._legacy import * +from ._legacy import get_db __here__ = Path(__file__).parent fixtures_dir = __here__.parent / "fixtures" @@ -407,41 +404,6 @@ def field_title(name): db_app.command(name="migrations", rich_help_panel="Schema management")(run_migrations) - -@db_app.command( +db_app.command( name="import-mariadb", rich_help_panel="Schema management", deprecated=True -) -def import_legacy(): - """Import legacy MariaDB database to PostgreSQL using pgloader""" - # Run pgloader in docker - - cfg = app.settings - - args = _docker_local_run_args(postgres_container="dimitri/pgloader") - - # Get the database URL - db = get_db() - url = db.engine.url - url = url.set(database="macrostrat_v1") - - _create_database_if_not_exists(url, create=True) - - pg_url = str(url) - # Repl - - pg_url = cfg.get("pg_database", None) - - url = pg_url + "_v1" - - dburl = cfg.get("mysql_database", None) - if dburl is None: - raise Exception("No MariaDB database URL available in configuration") - - run( - *args, - "pgloader", - "--with", - "prefetch rows = 1000", - str(dburl), - str(url), - ) +)(import_mariadb) diff --git a/cli/macrostrat/cli/database/mariadb_migration/__init__.py b/cli/macrostrat/cli/database/mariadb_migration/__init__.py index e69de29b..d655d641 100644 --- a/cli/macrostrat/cli/database/mariadb_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb_migration/__init__.py @@ -0,0 +1,40 @@ +from macrostrat.utils.shell import run +from .._legacy import get_db + +from macrostrat.core import app + +from ..._dev.utils import ( + _create_database_if_not_exists, + _docker_local_run_args, +) + + +def import_mariadb(target_database="macrostrat_from_mariadb"): + """Import legacy MariaDB database to PostgreSQL using pgloader""" + # Run pgloader in docker + + cfg = app.settings + + args = _docker_local_run_args(postgres_container="dimitri/pgloader") + + # Get the database URL + db = get_db() + url = db.engine.url + url = url.set(database=target_database) + + _create_database_if_not_exists(url, create=True) + + pg_url = str(url) + + dburl = cfg.get("mysql_database", None) + if dburl is None: + raise Exception("No MariaDB database URL available in configuration") + + run( + *args, + "pgloader", + "--with", + "prefetch rows = 1000", + str(dburl), + str(pg_url), + ) From 00c1a0215db4e5c68a8f3dec56a72e0012f99207 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Fri, 26 Jul 2024 01:19:48 -0500 Subject: [PATCH 08/48] Created basic command to run MariaDB CLI --- cli/macrostrat/cli/cli.py | 12 +++++ .../cli/database/mariadb/__init__.py | 44 +++++++++++++++++++ cli/macrostrat/cli/database/mariadb/utils.py | 18 ++++++++ 3 files changed, 74 insertions(+) create mode 100644 cli/macrostrat/cli/database/mariadb/__init__.py create mode 100644 cli/macrostrat/cli/database/mariadb/utils.py diff --git a/cli/macrostrat/cli/cli.py b/cli/macrostrat/cli/cli.py index afb478c6..afdb9b53 100644 --- a/cli/macrostrat/cli/cli.py +++ b/cli/macrostrat/cli/cli.py @@ -270,6 +270,18 @@ def update_tileserver(db): app.subsystems.add(MacrostratAPISubsystem(app)) +# Mariadb CLI +if mariadb_url := getattr(settings, "mysql_database", None): + from .database.mariadb import app as mariadb_app + + main.add_typer( + mariadb_app, + name="mariadb", + rich_help_panel="Subsystems", + short_help="Manage the MariaDB database", + ) + + app.finish_loading_subsystems() diff --git a/cli/macrostrat/cli/database/mariadb/__init__.py b/cli/macrostrat/cli/database/mariadb/__init__.py new file mode 100644 index 00000000..e68afaff --- /dev/null +++ b/cli/macrostrat/cli/database/mariadb/__init__.py @@ -0,0 +1,44 @@ +import os + +from typer import Typer, Context +from sys import stdin +from macrostrat.utils.shell import run +from sqlalchemy.engine.url import URL + +from .utils import build_connection_args + +app = Typer(no_args_is_help=True) + +mariadb_container = "mariadb:10.10" + + +@app.command( + name="cli", + add_help_option=False, + context_settings={"allow_extra_args": True, "ignore_unknown_options": True}, +) +def cli_command(ctx: Context): + """Run psql in the database container""" + from macrostrat.core.config import docker_internal_url, mysql_database + + _database: URL = docker_internal_url(mysql_database) + + flags = [ + "-i", + "--rm", + "--network", + "host", + ] + + if len(ctx.args) == 0 and stdin.isatty(): + flags.append("-t") + + run( + "docker", + "run", + *flags, + mariadb_container, + "mariadb", + *build_connection_args(_database), + *ctx.args, + ) diff --git a/cli/macrostrat/cli/database/mariadb/utils.py b/cli/macrostrat/cli/database/mariadb/utils.py new file mode 100644 index 00000000..b1c952d7 --- /dev/null +++ b/cli/macrostrat/cli/database/mariadb/utils.py @@ -0,0 +1,18 @@ +from sqlalchemy.engine.url import URL + + +def build_connection_args(url: URL) -> [str]: + """Build MariaDB connection arguments from a SQLAlchemy URL.""" + args = [ + "-h", + url.host, + "-P", + str(url.port), + "-u", + url.username, + "-D", + url.database, + ] + if url.password: + args.extend(["-p" + str(url.password)]) + return args From 1f234f91d0ae44164610b41f4ff2384a639ded88 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Fri, 26 Jul 2024 01:23:50 -0500 Subject: [PATCH 09/48] Remove need to connect with --net=host --- cli/macrostrat/cli/database/mariadb/__init__.py | 4 +--- core/macrostrat/core/config.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/cli/macrostrat/cli/database/mariadb/__init__.py b/cli/macrostrat/cli/database/mariadb/__init__.py index e68afaff..2aa6e031 100644 --- a/cli/macrostrat/cli/database/mariadb/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/__init__.py @@ -18,7 +18,7 @@ context_settings={"allow_extra_args": True, "ignore_unknown_options": True}, ) def cli_command(ctx: Context): - """Run psql in the database container""" + """Run the MariaDB CLI against the Macrostrat database.""" from macrostrat.core.config import docker_internal_url, mysql_database _database: URL = docker_internal_url(mysql_database) @@ -26,8 +26,6 @@ def cli_command(ctx: Context): flags = [ "-i", "--rm", - "--network", - "host", ] if len(ctx.args) == 0 and stdin.isatty(): diff --git a/core/macrostrat/core/config.py b/core/macrostrat/core/config.py index 35c5d68f..4a7f5028 100644 --- a/core/macrostrat/core/config.py +++ b/core/macrostrat/core/config.py @@ -4,6 +4,7 @@ from dynaconf import Dynaconf, Validator from sqlalchemy.engine import make_url from toml import load as load_toml +from sqlalchemy.engine.url import URL from .utils import find_macrostrat_config, is_pg_url @@ -54,6 +55,10 @@ def all_environments(self): docker_localhost = getattr(settings, "docker_localhost", "localhost") PG_DATABASE_DOCKER = PG_DATABASE.replace("localhost", docker_localhost) +mysql_database = getattr(settings, "mysql_database", None) +# TODO: handle this more intelligently + + if elevation_database := getattr(settings, "elevation_database", None): environ["ELEVATION_DATABASE_URL"] = elevation_database @@ -110,3 +115,10 @@ def all_environments(self): settings.srcroot = Path(__file__).parent.parent.parent.parent environ["MACROSTRAT_ROOT"] = str(settings.srcroot) + + +def docker_internal_url(url: URL | str) -> URL: + url = make_url(url) + if url.host == "localhost": + url = url.set(host=docker_localhost) + return url From 7f9193c606ede8484d63b85446f5eb602c3ec58e Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Fri, 26 Jul 2024 11:06:21 -0500 Subject: [PATCH 10/48] Add command that restores a database onto the MariaDB server --- cli/macrostrat/cli/_dev/restore_database.py | 9 +- cli/macrostrat/cli/_dev/utils.py | 72 ++++++--- .../cli/database/mariadb/__init__.py | 31 +++- .../cli/database/mariadb/restore.py | 142 ++++++++++++++++++ core/macrostrat/core/config.py | 5 +- core/macrostrat/core/exc.py | 2 +- 6 files changed, 224 insertions(+), 37 deletions(-) create mode 100644 cli/macrostrat/cli/database/mariadb/restore.py diff --git a/cli/macrostrat/cli/_dev/restore_database.py b/cli/macrostrat/cli/_dev/restore_database.py index 73cd2f96..e2a2b4db 100644 --- a/cli/macrostrat/cli/_dev/restore_database.py +++ b/cli/macrostrat/cli/_dev/restore_database.py @@ -26,10 +26,8 @@ def pg_restore(*args, **kwargs): async def _pg_restore( engine: Engine, - *, + *args, create=False, - command_prefix: Optional[list] = None, - args: list = [], postgres_container: str = "postgres:15", ): # Pipe file to pg_restore, mimicking @@ -42,11 +40,10 @@ async def _pg_restore( # host, if possible, is probably the fastest option. There should be # multiple options ideally. _cmd = _create_command( - engine, "pg_restore", "-d", - args=args, - prefix=command_prefix, + engine, + *args, container=postgres_container, ) diff --git a/cli/macrostrat/cli/_dev/utils.py b/cli/macrostrat/cli/_dev/utils.py index 916c9887..475097da 100644 --- a/cli/macrostrat/cli/_dev/utils.py +++ b/cli/macrostrat/cli/_dev/utils.py @@ -6,7 +6,9 @@ from rich.console import Console from sqlalchemy.engine import Engine from sqlalchemy.engine.url import URL -from sqlalchemy_utils import create_database, database_exists +from sqlalchemy_utils import create_database, database_exists, drop_database +from macrostrat.core.exc import MacrostratError + console = Console() @@ -25,51 +27,66 @@ def _docker_local_run_args(postgres_container: str = "postgres:15"): ] -def _create_database_if_not_exists(_url: URL, create=False): +def _create_database_if_not_exists( + _url: URL, *, create=False, allow_exists=True, overwrite=False +): database = _url.database + if overwrite: + create = True db_exists = database_exists(_url) if db_exists: - console.print(f"Database [bold cyan]{database}[/] already exists") + msg = f"Database [bold underline]{database}[/] already exists" + if overwrite: + console.print(f"{msg}, overwriting") + drop_database(_url) + db_exists = False + elif not allow_exists: + raise MacrostratError(msg, details="Use `--overwrite` to overwrite") + else: + console.print(msg) if create and not db_exists: console.print(f"Creating database [bold cyan]{database}[/]") create_database(_url) if not db_exists and not create: - raise ValueError( + raise MacrostratError( f"Database [bold cyan]{database}[/] does not exist. " "Use `--create` to create it." ) def _create_command( - engine: Engine, *command, - args=[], - prefix=None | list[str], - container="postgres:16", + container=None | str, ): - command_prefix = prefix or _docker_local_run_args(container) - _cmd = [*command_prefix, *command, str(engine.url), *args] + """Create a command for operating on a database""" - log.info(" ".join(_cmd)) + _args = [] + if container is not None: + _args = _docker_local_run_args(container) - # Replace asterisks with the real password (if any). This is kind of backwards - # but it works. - if "***" in str(engine.url) and engine.url.password is not None: - _cmd = [ - *command_prefix, - *command, - raw_database_url(engine.url), - *args, - ] + # We keep a separate list of arguments for logging purposes + # in order to avoid logging the password in the URL + _log_args = _args.copy() + for arg in command: + log_arg = arg + if isinstance(arg, Engine): + arg = arg.url + if isinstance(arg, URL): + log_arg = str(arg) + arg = raw_database_url(arg) + _log_args.append(log_arg) + _args.append(arg) - return _cmd + log.info(" ".join(_log_args)) + + return _args async def print_stream_progress( in_stream: asyncio.StreamReader, - out_stream: asyncio.StreamWriter | AsyncBufferedIOBase, + out_stream: asyncio.StreamWriter | AsyncBufferedIOBase | None, ): """This should be unified with print_stream_progress, but there seem to be slight API differences between aiofiles and asyncio.StreamWriter APIs.?""" @@ -80,7 +97,7 @@ async def print_stream_progress( if isinstance(out_stream, AsyncBufferedIOBase): await out_stream.write(line) await out_stream.flush() - else: + elif out_stream is not None: out_stream.write(line) await out_stream.drain() i += 1 @@ -88,7 +105,8 @@ async def print_stream_progress( i = 0 _print_progress(megabytes_written, end="\r") - out_stream.close() + if out_stream is not None: + out_stream.close() _print_progress(megabytes_written) @@ -103,4 +121,8 @@ async def print_stdout(stream: asyncio.StreamReader): def raw_database_url(url: URL): - return str(url).replace("***", quote(url.password, safe="")) + """Replace the password asterisks with the actual password, in order to pass to other commands.""" + _url = str(url) + if "***" not in _url or url.password is None: + return _url + return _url.replace("***", quote(url.password, safe="")) diff --git a/cli/macrostrat/cli/database/mariadb/__init__.py b/cli/macrostrat/cli/database/mariadb/__init__.py index 2aa6e031..350c52be 100644 --- a/cli/macrostrat/cli/database/mariadb/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/__init__.py @@ -1,11 +1,12 @@ -import os - -from typer import Typer, Context +from typer import Typer, Context, Argument from sys import stdin from macrostrat.utils.shell import run -from sqlalchemy.engine.url import URL +from sqlalchemy.engine import create_engine +from sqlalchemy.engine.url import URL, make_url +from pathlib import Path from .utils import build_connection_args +from .restore import restore_mariadb app = Typer(no_args_is_help=True) @@ -40,3 +41,25 @@ def cli_command(ctx: Context): *build_connection_args(_database), *ctx.args, ) + + +@app.command("restore") +def restore_command( + input: str = Argument(None, help="Path to the dump file or stream"), + *, + create: bool = False, + overwrite: bool = False, +): + """Restore a MariaDB database from a dump file or stream.""" + from macrostrat.core.config import mysql_database + + _database: URL = make_url(mysql_database) + _database = _database.set(drivername="mysql+pymysql") + + restore_mariadb( + input, + create_engine(_database), + create=create, + overwrite=overwrite, + container=mariadb_container, + ) diff --git a/cli/macrostrat/cli/database/mariadb/restore.py b/cli/macrostrat/cli/database/mariadb/restore.py new file mode 100644 index 00000000..277e7eca --- /dev/null +++ b/cli/macrostrat/cli/database/mariadb/restore.py @@ -0,0 +1,142 @@ +import asyncio +from pathlib import Path +from typing import Optional +from sys import stdin + +from macrostrat.utils import get_logger +from rich.console import Console +from sqlalchemy.engine import Engine +from macrostrat.core.exc import MacrostratError +import aiofiles + +from .utils import build_connection_args +from macrostrat.core.config import docker_internal_url + +from ..._dev.utils import ( + _create_command, + _create_database_if_not_exists, + print_stdout, + print_stream_progress, +) + +console = Console() + +log = get_logger(__name__) + + +def restore_mariadb(_input: Optional[str], engine: Engine, *args, **kwargs): + """Restore a MariaDB database from a dump file or stream""" + + if _input.startswith("http"): + raise NotImplementedError("HTTP(S) restore not yet implemented") + + if _input is not None: + _input = Path(_input) + + if _input is None: + if stdin.isatty(): + raise MacrostratError("No input file specified") + + # Read from stdin + _input = Path("/dev/stdin") + + if not _input.is_file(): + raise MacrostratError(f"{_input} is not a file") + + task = _restore_mariadb_from_file(_input, engine, *args, **kwargs) + asyncio.run(task) + + +async def _restore_mariadb(engine: Engine, *args, **kwargs): + """Load MariaDB dump (GZipped SQL file) into a database.""" + overwrite = kwargs.pop("overwrite", False) + create = kwargs.pop("create", overwrite) + container = kwargs.pop("container", "mariadb:10.10") + + _create_database_if_not_exists( + engine.url, create=create, allow_exists=False, overwrite=overwrite + ) + conn = build_connection_args(docker_internal_url(engine.url)) + + # Run pg_restore in a local Docker container + # TODO: this could also be run with pg_restore in a Kubernetes pod + # or another location, if more appropriate. Running on the remote + # host, if possible, is probably the fastest option. There should be + # multiple options ideally. + _cmd = _create_command( + "mariadb", + *conn, + *args, + container=container, + ) + + log.debug(" ".join(_cmd)) + + return await asyncio.create_subprocess_exec( + *_cmd, + stdin=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + limit=1024 * 1024 * 1, # 1 MB windows + ) + + +async def _restore_mariadb_from_file(dumpfile: Path, engine: Engine, *args, **kwargs): + proc = await _restore_mariadb(engine, *args, **kwargs) + # Open dump file as an async stream + async with aiofiles.open(dumpfile, mode="rb") as source: + s1 = DecodingStreamReader(source) + await asyncio.gather( + asyncio.create_task( + print_stream_progress(s1, proc.stdin), + ), + asyncio.create_task(print_stdout(proc.stderr)), + ) + + # asyncio.create_task(print_stdout(proc.stderr)), + + +import zlib + + +class DecodingStreamReader(asyncio.StreamReader): + """A StreamReader that decompresses gzip files and decodes bytes to strings""" + + # https://ejosh.co/de/2022/08/stream-a-massive-gzipped-json-file-in-python/ + + def __init__(self, stream, encoding="utf-8", errors="strict"): + super().__init__() + self.stream = stream + self._is_gzipped = None + self.d = zlib.decompressobj(zlib.MAX_WBITS | 16) + + def decompress(self, input: bytes) -> bytes: + decompressed = self.d.decompress(input) + data = b"" + while self.d.unused_data != b"": + buf = self.d.unused_data + self.d = zlib.decompressobj(zlib.MAX_WBITS | 16) + data = self.d.decompress(buf) + return decompressed + data + + def transform_data(self, data): + if self._is_gzipped is None: + self._is_gzipped = data[:2] == b"\x1f\x8b" + log.info("is_gzipped: %s", self._is_gzipped) + if self._is_gzipped: + # Decompress the data + data = self.decompress(data) + return data + + async def read(self, n=-1): + data = await self.stream.read(n) + return self.transform_data(data) + + async def readline(self): + res = b"" + while res == b"": + # Read next line + line = await self.stream.readline() + if not line: + break + res += self.transform_data(line) + return res diff --git a/core/macrostrat/core/config.py b/core/macrostrat/core/config.py index 4a7f5028..7f663d57 100644 --- a/core/macrostrat/core/config.py +++ b/core/macrostrat/core/config.py @@ -56,7 +56,9 @@ def all_environments(self): PG_DATABASE_DOCKER = PG_DATABASE.replace("localhost", docker_localhost) mysql_database = getattr(settings, "mysql_database", None) -# TODO: handle this more intelligently +if mysql_database is not None: + mysql_database: URL = make_url(mysql_database).set(drivername="mysql+pymysql") + # TODO: handle this more intelligently if elevation_database := getattr(settings, "elevation_database", None): @@ -120,5 +122,6 @@ def all_environments(self): def docker_internal_url(url: URL | str) -> URL: url = make_url(url) if url.host == "localhost": + docker_localhost = getattr(settings, "docker_localhost", "localhost") url = url.set(host=docker_localhost) return url diff --git a/core/macrostrat/core/exc.py b/core/macrostrat/core/exc.py index 0b66a44e..d6132e32 100644 --- a/core/macrostrat/core/exc.py +++ b/core/macrostrat/core/exc.py @@ -30,7 +30,7 @@ def setup_exception_handling(app: Typer): def wrapped_app(): try: app() - except MacrostratError as error: + except ApplicationError as error: rendered = Padding(error.render(), (1, 2)) err_console.print(rendered) exit(1) From 93c7fb63601ee49c2e00c6aa1dd8fe63934187ca Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Fri, 26 Jul 2024 11:11:47 -0500 Subject: [PATCH 11/48] Removed overly complex logging approach --- cli/macrostrat/cli/_dev/utils.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/cli/macrostrat/cli/_dev/utils.py b/cli/macrostrat/cli/_dev/utils.py index 475097da..a0cc8e44 100644 --- a/cli/macrostrat/cli/_dev/utils.py +++ b/cli/macrostrat/cli/_dev/utils.py @@ -66,21 +66,13 @@ def _create_command( if container is not None: _args = _docker_local_run_args(container) - # We keep a separate list of arguments for logging purposes - # in order to avoid logging the password in the URL - _log_args = _args.copy() for arg in command: - log_arg = arg if isinstance(arg, Engine): arg = arg.url if isinstance(arg, URL): - log_arg = str(arg) arg = raw_database_url(arg) - _log_args.append(log_arg) _args.append(arg) - log.info(" ".join(_log_args)) - return _args From 225e016daa2345dbf057df1c6511834d6e406da1 Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Fri, 26 Jul 2024 11:14:56 -0500 Subject: [PATCH 12/48] find_row_variances() compares maridb data to macrostrat data to ensure all data exists before migration in dev --- MariaDB Migration to PostgreSQL/utils.py | 123 ++++++++++++++++------- 1 file changed, 87 insertions(+), 36 deletions(-) diff --git a/MariaDB Migration to PostgreSQL/utils.py b/MariaDB Migration to PostgreSQL/utils.py index 72b4c3f0..fd4b0643 100644 --- a/MariaDB Migration to PostgreSQL/utils.py +++ b/MariaDB Migration to PostgreSQL/utils.py @@ -1,4 +1,4 @@ -from sqlalchemy import text, create_engine +from sqlalchemy import text, create_engine, inspect from Constants import * import os from sqlalchemy.exc import SQLAlchemyError @@ -10,15 +10,23 @@ Copies table structure and table data from one schema to another schema on the same host. Command line in cmd.exe language """ -def pg_dump(server, user, password, dbname): - os.system(f'pg_dump -h {pg_server} -d {pg_db_name} -U {pg_user} -W -F d -f ./postgres_dump') - os.system(f'{pg_pass}') + +def pg_dump(server, user, password, dbname, schema): + pg_dump_command = f'pg_dump -h {server} -d {dbname} -U {user} --schema={schema} -W -F d -f ./postgres_dump' + pg_dump_command_two = f'{password}' + print(pg_dump_command) + print(pg_dump_command_two) + #os.system(pg_dump_command) + #os.system(pg_dump_command_two) print('Starting database export........') return -def pg_restore(server, user, password, dbname): - os.system(f'pg_dump -h {pg_server} -d {pg_db_name_two} -U {pg_user} -W -F d ./postgres_dump') - os.system(f'{pg_pass}') +def pg_restore(server, user, password, dbname, schema): + pg_restore = f'pg_restore -h {server} -d {dbname} -U {user} --schema={schema} -W -F d ./postgres_dump' + pg_restore_two = f'{password}' + print(pg_restore) + print(pg_restore_two) + return @@ -160,22 +168,30 @@ def compare_data_counts(db1_rows, db2_rows, db1_columns, db2_columns, db1, db2): """ Script to output dataframes for comparing data between two databases and tables. """ -def find_row_variances(database_name_one, schema_one, database_name_two, schema_two, username, password, table): +def find_row_variances(database_name_one, schema_one, schema_two, username, password, tables): SQLALCHEMY_DATABASE_URI = f"postgresql://{username}:{password}@{pg_server}/{database_name_one}" engine = create_engine(SQLALCHEMY_DATABASE_URI) + insp = inspect(engine) + with engine.connect() as conn: - query = text(f"SELECT * FROM {schema_one}.{table}") - result = conn.execute(query) - df = pd.DataFrame(result) - engine.dispose() - SQLALCHEMY_DATABASE_URI = f"postgresql://{username}:{password}@{pg_server}/{database_name_two}" - engine = create_engine(SQLALCHEMY_DATABASE_URI) - with engine.connect() as conn: - query = text(f"SELECT * FROM {schema_two}.{table}") - result = conn.execute(query) - df_two = pd.DataFrame(result) + for table in tables: + # Get the actual first column name for each table + columns = insp.get_columns(table, schema=schema_one) + first_column_name = columns[0]['name'] + query = f""" + SELECT m.{first_column_name} + FROM macrostrat.macrostrat.{table} m + RIGHT JOIN macrostrat.macrostrat_temp.{table} t ON m.{first_column_name} = t.{first_column_name} + WHERE t.{first_column_name} IS NULL; + """ + result_df = pd.read_sql_query(query, engine) + print(f"Macrostrat rows not in Macrostrat_two rows for table {table}:") + print(result_df) engine.dispose() - return df, df_two + return + + + def pg_loader_pre_script(): @@ -385,21 +401,56 @@ def reset(): #pg_loader_pre_script() #pg_loader() #pg_loader_post_script() - maria_rows, maria_columns = get_data_counts_maria() - pg_rows, pg_columns = get_data_counts_pg(pg_db_name, pg_user, pg_pass_new, 'macrostrat') - pg_macrostrat_two_rows, pg_macrostrat_two_columns = get_data_counts_pg(pg_db_name_two, pg_user_migrate, pg_pass_migrate, 'macrostrat_temp') - - print('\nMARIADB (db1) comparison to PG MACROSTRAT_TWO (db2). These should be clones. ') - db1 = 'MariaDB' - db2 = 'PG Macrostrat_Two' - row_variance, column_variance = compare_data_counts(maria_rows, pg_macrostrat_two_rows, maria_columns, - pg_macrostrat_two_columns, db1, db2) - print('\nPG MACROSTRAT_TWO (db1 maria db clone) comparison to PG MACROSTRAT (db2). This will show what data ' - 'needs to be moved over from Maria to PG prod.') - db1 = 'PG Macrostrat_Two' - db2 = 'PG Macrostrat' - row_variance_two, column_variance_two = compare_data_counts(pg_macrostrat_two_rows, pg_rows, pg_macrostrat_two_columns, - pg_columns, db1, db2) + #maria_rows, maria_columns = get_data_counts_maria() + #pg_rows, pg_columns = get_data_counts_pg(pg_db_name, pg_user, pg_pass_new, 'macrostrat') + #pg_macrostrat_two_rows, pg_macrostrat_two_columns = get_data_counts_pg(pg_db_name_two, pg_user_migrate, pg_pass_migrate, 'macrostrat_temp') + + #print('\nMARIADB (db1) comparison to PG MACROSTRAT_TWO (db2). These should be clones. ') + #db1 = 'MariaDB' + #db2 = 'PG Macrostrat_Two' + #row_variance, column_variance = compare_data_counts(maria_rows, pg_macrostrat_two_rows, maria_columns, + # pg_macrostrat_two_columns, db1, db2) + #print('\nPG MACROSTRAT_TWO (db1 maria db clone) comparison to PG MACROSTRAT (db2). This will show what data ' + # 'needs to be moved over from Maria to PG prod.') + #db1 = 'PG Macrostrat_Two' + #db2 = 'PG Macrostrat' + #row_variance_two, column_variance_two = compare_data_counts(pg_macrostrat_two_rows, pg_rows, pg_macrostrat_two_columns, + # pg_columns, db1, db2) #reset() - #df, df_two = find_row_variances(pg_db_name, pg_db_name, pg_db_name_two, maria_db_name_two, - #pg_user, pg_pass_new, 'cols') + pg_restore(pg_server, pg_user, pg_pass_new, pg_db_name, maria_db_name_two) + + tables = [ + "sections", + "strat_names", + "strat_names_places", + "strat_tree", + "units", + "timescales", + "timescales_intervals", + "units_sections", + "unit_boundaries", + "unit_econs", + "unit_measures", + "unit_strat_names", + "unit_environs", + "unit_liths", + "autocomplete", + "cols", + "col_areas", + "col_groups", + "col_refs", + "intervals", + "liths", + "lookup_strat_names", + "lookup_units", + "lookup_unit_attrs_api", + "lookup_unit_intervals", + "refs" + ] + + + + + results = find_row_variances(pg_db_name, pg_db_name, maria_db_name_two, pg_user, pg_pass_new, tables) + + From 12f550931f6fcf8b0206227d4cff987b415b896a Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Fri, 26 Jul 2024 11:21:19 -0500 Subject: [PATCH 13/48] Refactor - move stream reader to a utils file --- cli/macrostrat/cli/_dev/dump_database.py | 3 +- cli/macrostrat/cli/_dev/restore_database.py | 3 +- cli/macrostrat/cli/_dev/stream_utils.py | 88 +++++++++++++++++++ cli/macrostrat/cli/_dev/transfer_tables.py | 2 +- cli/macrostrat/cli/_dev/utils.py | 38 -------- .../cli/database/mariadb/restore.py | 49 +---------- 6 files changed, 94 insertions(+), 89 deletions(-) create mode 100644 cli/macrostrat/cli/_dev/stream_utils.py diff --git a/cli/macrostrat/cli/_dev/dump_database.py b/cli/macrostrat/cli/_dev/dump_database.py index d4d8d769..998276e8 100644 --- a/cli/macrostrat/cli/_dev/dump_database.py +++ b/cli/macrostrat/cli/_dev/dump_database.py @@ -6,7 +6,8 @@ from macrostrat.utils import get_logger from sqlalchemy.engine import Engine -from .utils import _create_command, print_stdout, print_stream_progress +from .utils import _create_command +from .stream_utils import print_stream_progress, print_stdout log = get_logger(__name__) diff --git a/cli/macrostrat/cli/_dev/restore_database.py b/cli/macrostrat/cli/_dev/restore_database.py index e2a2b4db..885696ad 100644 --- a/cli/macrostrat/cli/_dev/restore_database.py +++ b/cli/macrostrat/cli/_dev/restore_database.py @@ -10,9 +10,8 @@ from .utils import ( _create_command, _create_database_if_not_exists, - print_stdout, - print_stream_progress, ) +from .stream_utils import print_stream_progress, print_stdout console = Console() diff --git a/cli/macrostrat/cli/_dev/stream_utils.py b/cli/macrostrat/cli/_dev/stream_utils.py new file mode 100644 index 00000000..1bec8f3b --- /dev/null +++ b/cli/macrostrat/cli/_dev/stream_utils.py @@ -0,0 +1,88 @@ +import asyncio +import zlib + +from aiofiles.threadpool import AsyncBufferedIOBase +from macrostrat.utils import get_logger +from .utils import console + +log = get_logger(__name__) + + +async def print_stream_progress( + in_stream: asyncio.StreamReader, + out_stream: asyncio.StreamWriter | AsyncBufferedIOBase | None, +): + """This should be unified with print_stream_progress, but there seem to be + slight API differences between aiofiles and asyncio.StreamWriter APIs.?""" + megabytes_written = 0 + i = 0 + async for line in in_stream: + megabytes_written += len(line) / 1_000_000 + if isinstance(out_stream, AsyncBufferedIOBase): + await out_stream.write(line) + await out_stream.flush() + elif out_stream is not None: + out_stream.write(line) + await out_stream.drain() + i += 1 + if i == 1000: + i = 0 + _print_progress(megabytes_written, end="\r") + + if out_stream is not None: + out_stream.close() + _print_progress(megabytes_written) + + +def _print_progress(megabytes: float, **kwargs): + progress = f"Dumped {megabytes:.1f} MB" + print(progress, **kwargs) + + +async def print_stdout(stream: asyncio.StreamReader): + async for line in stream: + console.print(line.decode("utf-8"), style="dim") + + +class DecodingStreamReader(asyncio.StreamReader): + """A StreamReader that decompresses gzip files and decodes bytes to strings""" + + # https://ejosh.co/de/2022/08/stream-a-massive-gzipped-json-file-in-python/ + + def __init__(self, stream, encoding="utf-8", errors="strict"): + super().__init__() + self.stream = stream + self._is_gzipped = None + self.d = zlib.decompressobj(zlib.MAX_WBITS | 16) + + def decompress(self, input: bytes) -> bytes: + decompressed = self.d.decompress(input) + data = b"" + while self.d.unused_data != b"": + buf = self.d.unused_data + self.d = zlib.decompressobj(zlib.MAX_WBITS | 16) + data = self.d.decompress(buf) + return decompressed + data + + def transform_data(self, data): + if self._is_gzipped is None: + self._is_gzipped = data[:2] == b"\x1f\x8b" + log.info("is_gzipped: %s", self._is_gzipped) + if self._is_gzipped: + # Decompress the data + data = self.decompress(data) + return data + + async def read(self, n=-1): + data = await self.stream.read(n) + return self.transform_data(data) + + async def readline(self): + res = b"" + while res == b"": + # Read next line + line = await self.stream.readline() + if not line: + break + res += self.transform_data(line) + return res diff --git a/cli/macrostrat/cli/_dev/transfer_tables.py b/cli/macrostrat/cli/_dev/transfer_tables.py index 0a4084dd..02e084b8 100644 --- a/cli/macrostrat/cli/_dev/transfer_tables.py +++ b/cli/macrostrat/cli/_dev/transfer_tables.py @@ -1,5 +1,5 @@ import asyncio -from .utils import print_stream_progress, print_stdout +from .stream_utils import print_stream_progress, print_stdout from sqlalchemy.engine import Engine from .dump_database import _pg_dump from .restore_database import _pg_restore diff --git a/cli/macrostrat/cli/_dev/utils.py b/cli/macrostrat/cli/_dev/utils.py index a0cc8e44..9e8aa797 100644 --- a/cli/macrostrat/cli/_dev/utils.py +++ b/cli/macrostrat/cli/_dev/utils.py @@ -1,7 +1,5 @@ -import asyncio from urllib.parse import quote -from aiofiles.threadpool.binary import AsyncBufferedIOBase from macrostrat.utils import get_logger from rich.console import Console from sqlalchemy.engine import Engine @@ -76,42 +74,6 @@ def _create_command( return _args -async def print_stream_progress( - in_stream: asyncio.StreamReader, - out_stream: asyncio.StreamWriter | AsyncBufferedIOBase | None, -): - """This should be unified with print_stream_progress, but there seem to be - slight API differences between aiofiles and asyncio.StreamWriter APIs.?""" - megabytes_written = 0 - i = 0 - async for line in in_stream: - megabytes_written += len(line) / 1_000_000 - if isinstance(out_stream, AsyncBufferedIOBase): - await out_stream.write(line) - await out_stream.flush() - elif out_stream is not None: - out_stream.write(line) - await out_stream.drain() - i += 1 - if i == 1000: - i = 0 - _print_progress(megabytes_written, end="\r") - - if out_stream is not None: - out_stream.close() - _print_progress(megabytes_written) - - -def _print_progress(megabytes: float, **kwargs): - progress = f"Dumped {megabytes:.1f} MB" - print(progress, **kwargs) - - -async def print_stdout(stream: asyncio.StreamReader): - async for line in stream: - console.print(line.decode("utf-8"), style="dim") - - def raw_database_url(url: URL): """Replace the password asterisks with the actual password, in order to pass to other commands.""" _url = str(url) diff --git a/cli/macrostrat/cli/database/mariadb/restore.py b/cli/macrostrat/cli/database/mariadb/restore.py index 277e7eca..1be48692 100644 --- a/cli/macrostrat/cli/database/mariadb/restore.py +++ b/cli/macrostrat/cli/database/mariadb/restore.py @@ -15,9 +15,8 @@ from ..._dev.utils import ( _create_command, _create_database_if_not_exists, - print_stdout, - print_stream_progress, ) +from ..._dev.stream_utils import print_stream_progress, print_stdout console = Console() @@ -95,48 +94,4 @@ async def _restore_mariadb_from_file(dumpfile: Path, engine: Engine, *args, **kw # asyncio.create_task(print_stdout(proc.stderr)), -import zlib - - -class DecodingStreamReader(asyncio.StreamReader): - """A StreamReader that decompresses gzip files and decodes bytes to strings""" - - # https://ejosh.co/de/2022/08/stream-a-massive-gzipped-json-file-in-python/ - - def __init__(self, stream, encoding="utf-8", errors="strict"): - super().__init__() - self.stream = stream - self._is_gzipped = None - self.d = zlib.decompressobj(zlib.MAX_WBITS | 16) - - def decompress(self, input: bytes) -> bytes: - decompressed = self.d.decompress(input) - data = b"" - while self.d.unused_data != b"": - buf = self.d.unused_data - self.d = zlib.decompressobj(zlib.MAX_WBITS | 16) - data = self.d.decompress(buf) - return decompressed + data - - def transform_data(self, data): - if self._is_gzipped is None: - self._is_gzipped = data[:2] == b"\x1f\x8b" - log.info("is_gzipped: %s", self._is_gzipped) - if self._is_gzipped: - # Decompress the data - data = self.decompress(data) - return data - - async def read(self, n=-1): - data = await self.stream.read(n) - return self.transform_data(data) - - async def readline(self): - res = b"" - while res == b"": - # Read next line - line = await self.stream.readline() - if not line: - break - res += self.transform_data(line) - return res + From 76b1b2bc271ece12520f5af5a9e56b38058db4f1 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Fri, 26 Jul 2024 11:24:42 -0500 Subject: [PATCH 14/48] Fix reference to decoder --- cli/macrostrat/cli/_dev/stream_utils.py | 2 +- cli/macrostrat/cli/database/mariadb/restore.py | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/cli/macrostrat/cli/_dev/stream_utils.py b/cli/macrostrat/cli/_dev/stream_utils.py index 1bec8f3b..6145f249 100644 --- a/cli/macrostrat/cli/_dev/stream_utils.py +++ b/cli/macrostrat/cli/_dev/stream_utils.py @@ -45,7 +45,7 @@ async def print_stdout(stream: asyncio.StreamReader): class DecodingStreamReader(asyncio.StreamReader): - """A StreamReader that decompresses gzip files and decodes bytes to strings""" + """A StreamReader that decompresses gzip files (if compressed)""" # https://ejosh.co/de/2022/08/stream-a-massive-gzipped-json-file-in-python/ diff --git a/cli/macrostrat/cli/database/mariadb/restore.py b/cli/macrostrat/cli/database/mariadb/restore.py index 1be48692..3855da96 100644 --- a/cli/macrostrat/cli/database/mariadb/restore.py +++ b/cli/macrostrat/cli/database/mariadb/restore.py @@ -16,7 +16,11 @@ _create_command, _create_database_if_not_exists, ) -from ..._dev.stream_utils import print_stream_progress, print_stdout +from ..._dev.stream_utils import ( + print_stream_progress, + print_stdout, + DecodingStreamReader, +) console = Console() @@ -90,8 +94,3 @@ async def _restore_mariadb_from_file(dumpfile: Path, engine: Engine, *args, **kw ), asyncio.create_task(print_stdout(proc.stderr)), ) - - # asyncio.create_task(print_stdout(proc.stderr)), - - - From 4c58f5ec88b0398faaaaffa99a3f41a6abe84eb6 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Fri, 26 Jul 2024 11:50:58 -0500 Subject: [PATCH 15/48] Rearrange CLI utils --- cli/__init__.py | 0 cli/macrostrat/__init__.py | 0 cli/macrostrat/cli/database/__init__.py | 5 - .../postgresql_migration}/Dockerfile | 0 .../postgresql_migration/__init__.py} | 388 ++++++++++++------ .../postgresql_migration/_legacy.py} | 4 +- .../postgresql_migration}/requirements.txt | 0 .../cli/database/mariadb/restore.py | 6 +- 8 files changed, 264 insertions(+), 139 deletions(-) create mode 100644 cli/__init__.py create mode 100644 cli/macrostrat/__init__.py rename cli/macrostrat/cli/database/{mariadb_migration => mariadb/postgresql_migration}/Dockerfile (100%) rename cli/macrostrat/cli/database/{mariadb_migration/utils.py => mariadb/postgresql_migration/__init__.py} (56%) rename cli/macrostrat/cli/database/{mariadb_migration/__init__.py => mariadb/postgresql_migration/_legacy.py} (89%) rename cli/macrostrat/cli/database/{mariadb_migration => mariadb/postgresql_migration}/requirements.txt (100%) diff --git a/cli/__init__.py b/cli/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cli/macrostrat/__init__.py b/cli/macrostrat/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cli/macrostrat/cli/database/__init__.py b/cli/macrostrat/cli/database/__init__.py index eeae5bad..a68115ef 100644 --- a/cli/macrostrat/cli/database/__init__.py +++ b/cli/macrostrat/cli/database/__init__.py @@ -14,7 +14,6 @@ from macrostrat.core import MacrostratSubsystem, app from macrostrat.core.utils import is_pg_url -from .mariadb_migration import import_mariadb from .._dev.utils import ( raw_database_url, @@ -403,7 +402,3 @@ def field_title(name): db_app.command(name="migrations", rich_help_panel="Schema management")(run_migrations) - -db_app.command( - name="import-mariadb", rich_help_panel="Schema management", deprecated=True -)(import_mariadb) diff --git a/cli/macrostrat/cli/database/mariadb_migration/Dockerfile b/cli/macrostrat/cli/database/mariadb/postgresql_migration/Dockerfile similarity index 100% rename from cli/macrostrat/cli/database/mariadb_migration/Dockerfile rename to cli/macrostrat/cli/database/mariadb/postgresql_migration/Dockerfile diff --git a/cli/macrostrat/cli/database/mariadb_migration/utils.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py similarity index 56% rename from cli/macrostrat/cli/database/mariadb_migration/utils.py rename to cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index 72b4c3f0..9a2a3767 100644 --- a/cli/macrostrat/cli/database/mariadb_migration/utils.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -3,6 +3,8 @@ import os from sqlalchemy.exc import SQLAlchemyError import pandas as pd +from macrostrat.database.utils import run_query, run_sql +from psycopg2.sql import Identifier import time @@ -10,69 +12,92 @@ Copies table structure and table data from one schema to another schema on the same host. Command line in cmd.exe language """ + + def pg_dump(server, user, password, dbname): - os.system(f'pg_dump -h {pg_server} -d {pg_db_name} -U {pg_user} -W -F d -f ./postgres_dump') - os.system(f'{pg_pass}') - print('Starting database export........') + # TODO: integrate with existing PostgreSQL database utilities + os.system( + f"pg_dump -h {pg_server} -d {pg_db_name} -U {pg_user} -W -F d -f ./postgres_dump" + ) + os.system(f"{pg_pass}") + print("Starting database export........") return + def pg_restore(server, user, password, dbname): - os.system(f'pg_dump -h {pg_server} -d {pg_db_name_two} -U {pg_user} -W -F d ./postgres_dump') - os.system(f'{pg_pass}') + # TODO: integrate with existing PostgreSQL database utilities + os.system( + f"pg_dump -h {pg_server} -d {pg_db_name_two} -U {pg_user} -W -F d ./postgres_dump" + ) + os.system(f"{pg_pass}") return def maria_dump(server, user, password, dbname): + # TODO: integrate with streaming approach SQLALCHEMY_DATABASE_URI = f"mysql+pymysql://{user}:{password}@{server}/{dbname}" engine = create_engine(SQLALCHEMY_DATABASE_URI) with engine.connect() as conn: conn.execute(text(f"CREATE DATABASE IF NOT EXISTS {maria_db_name_two};")) engine.dispose() - output_file = './maria_dump.sql' + output_file = "./maria_dump.sql" maria_dump_command = [ - 'mysqldump', - '-h', server, - '-d', dbname, - '-u', user, - f'-p{password}', - '--ssl-verify-server-cert=false', - '--no-data=false', - '--verbose', - '--result-file=./maria_dump.sql', + "mysqldump", + "-h", + server, + "-d", + dbname, + "-u", + user, + f"-p{password}", + "--ssl-verify-server-cert=false", + "--no-data=false", + "--verbose", + "--result-file=./maria_dump.sql", ] - os.system(' '.join(maria_dump_command)) + os.system(" ".join(maria_dump_command)) return def maria_restore(server, user, password, dbname): - maria_restore_input = f'mariadb -h {server} -u {user} -p{password} --ssl-verify-server-cert=false ' \ - f'{dbname} < ./maria_dump.sql' + maria_restore_input = ( + f"mariadb -h {server} -u {user} -p{password} --ssl-verify-server-cert=false " + f"{dbname} < ./maria_dump.sql" + ) - print('Restoring new Maria database....') + print("Restoring new Maria database....") os.system(maria_restore_input) return def get_data_counts_maria(): - SQLALCHEMY_DATABASE_URI = f"mysql+pymysql://{maria_super_user}:" \ - f"{maria_super_pass}@{maria_server}/{maria_db_name_two}" + SQLALCHEMY_DATABASE_URI = ( + f"mysql+pymysql://{maria_super_user}:" + f"{maria_super_pass}@{maria_server}/{maria_db_name_two}" + ) engine = create_engine(SQLALCHEMY_DATABASE_URI) maria_rows = {} maria_columns = {} with engine.connect() as conn: - tables_query = text(f"SELECT table_name FROM information_schema.tables WHERE table_schema = '{maria_db_name_two}'" \ - " AND table_type = 'BASE TABLE';") - row_result = conn.execute(tables_query) + row_result = run_query( + conn, + "SELECT table_name FROM information_schema.tables WHERE table_schema = :table_schema AND table_type = 'BASE TABLE'", + {"table_schema": maria_db_name_two}, + ) + maria_tables = [row[0] for row in row_result] for table in maria_tables: - row_query = text(f"SELECT COUNT(*) FROM {table};") - row_result = conn.execute(row_query) + row_result = run_query( + conn, "SELECT COUNT(*) FROM {table}", dict(table=Identifier(table)) + ) row_count = row_result.scalar() maria_rows[table.lower()] = row_count - column_query = text(f"SELECT COUNT(*) FROM information_schema.columns WHERE table_schema = '{maria_db_name_two}' AND table_name = '{table}';") + column_query = text( + f"SELECT COUNT(*) FROM information_schema.columns WHERE table_schema = '{maria_db_name_two}' AND table_name = '{table}';" + ) column_result = conn.execute(column_query) column_count = column_result.scalar() maria_columns[table.lower()] = column_count @@ -82,15 +107,19 @@ def get_data_counts_maria(): def get_data_counts_pg(database_name, username, password, schema): - SQLALCHEMY_DATABASE_URI = f"postgresql://{username}:{password}@{pg_server}/{database_name}" + SQLALCHEMY_DATABASE_URI = ( + f"postgresql://{username}:{password}@{pg_server}/{database_name}" + ) engine = create_engine(SQLALCHEMY_DATABASE_URI) pg_rows = {} pg_columns = {} with engine.connect() as conn: - table_query = text(f"SELECT table_name FROM information_schema.tables WHERE table_catalog = '{database_name}'" \ - " AND table_type = 'BASE TABLE'" \ - f" AND table_schema = '{schema}'") + table_query = text( + f"SELECT table_name FROM information_schema.tables WHERE table_catalog = '{database_name}'" + " AND table_type = 'BASE TABLE'" + f" AND table_schema = '{schema}'" + ) table_result = conn.execute(table_query) pg_tables = [row[0] for row in table_result] for table in pg_tables: @@ -99,8 +128,10 @@ def get_data_counts_pg(database_name, username, password, schema): row_count = row_result.scalar() pg_rows[table.lower()] = row_count - column_query = text(f"SELECT COUNT(*) FROM information_schema.columns WHERE table_catalog = '{database_name}' " - f"AND table_schema = '{schema}' AND table_name = '{table}';") + column_query = text( + f"SELECT COUNT(*) FROM information_schema.columns WHERE table_catalog = '{database_name}' " + f"AND table_schema = '{schema}' AND table_name = '{table}';" + ) column_result = conn.execute(column_query) column_count = column_result.scalar() pg_columns[table.lower()] = column_count @@ -108,51 +139,85 @@ def get_data_counts_pg(database_name, username, password, schema): return pg_rows, pg_columns - - """ Compares the data counts between tables, rows, and columns that vary between any two db's """ + + def compare_data_counts(db1_rows, db2_rows, db1_columns, db2_columns, db1, db2): - db1_rows_not_in_db2 = {table_name: (db1_rows[table_name], 0) for table_name in db1_rows if - table_name not in db2_rows} - db2_rows_not_in_db1 = {table_name: (0, db2_rows[table_name]) for table_name in db2_rows if - table_name not in db1_rows} - db1_cols_not_in_db2 = {table_name: (db1_columns[table_name], 0) for table_name in db1_columns if - table_name not in db2_columns} - db2_cols_not_in_db1 = {table_name: (0, db2_columns[table_name]) for table_name in db2_columns if - table_name not in db1_columns} + db1_rows_not_in_db2 = { + table_name: (db1_rows[table_name], 0) + for table_name in db1_rows + if table_name not in db2_rows + } + db2_rows_not_in_db1 = { + table_name: (0, db2_rows[table_name]) + for table_name in db2_rows + if table_name not in db1_rows + } + db1_cols_not_in_db2 = { + table_name: (db1_columns[table_name], 0) + for table_name in db1_columns + if table_name not in db2_columns + } + db2_cols_not_in_db1 = { + table_name: (0, db2_columns[table_name]) + for table_name in db2_columns + if table_name not in db1_columns + } if len(db1_rows_not_in_db2) == 0 and len(db2_rows_not_in_db1) == 0: - print(f"\nSuccess! All tables exist in both {db1} and {db2}. Checking row counts....\n") + print( + f"\nSuccess! All tables exist in both {db1} and {db2}. Checking row counts....\n" + ) else: if len(db1_rows_not_in_db2) > 0: - print(f'\nERROR: {db1} tables that are not in {db2}:\n', [key for key in db1_rows_not_in_db2]) + print( + f"\nERROR: {db1} tables that are not in {db2}:\n", + [key for key in db1_rows_not_in_db2], + ) if len(db2_rows_not_in_db1) > 0: - print(f'\nERROR: {db2} tables that are not in {db1}: \n', [key for key in db2_rows_not_in_db1]) - - row_count_difference = {key: (db1_rows[key], db2_rows[key]) for key in db1_rows if - key in db2_rows and db1_rows[key] != db2_rows[key]} - #row_count_difference.update(db1_rows_not_in_db2) - #row_count_difference.update(db2_rows_not_in_db1) - - col_count_difference = {key: (db1_columns[key], db2_columns[key]) for key in db1_columns if - key in db2_columns and db1_columns[key] != db2_columns[key]} - #col_count_difference.update(db1_cols_not_in_db2) - #col_count_difference.update(db2_cols_not_in_db1) + print( + f"\nERROR: {db2} tables that are not in {db1}: \n", + [key for key in db2_rows_not_in_db1], + ) + + row_count_difference = { + key: (db1_rows[key], db2_rows[key]) + for key in db1_rows + if key in db2_rows and db1_rows[key] != db2_rows[key] + } + # row_count_difference.update(db1_rows_not_in_db2) + # row_count_difference.update(db2_rows_not_in_db1) + + col_count_difference = { + key: (db1_columns[key], db2_columns[key]) + for key in db1_columns + if key in db2_columns and db1_columns[key] != db2_columns[key] + } + # col_count_difference.update(db1_cols_not_in_db2) + # col_count_difference.update(db2_cols_not_in_db1) if len(row_count_difference) == 0: - print(f"Success! All row counts in all tables are the same in both {db1} and {db2}!\n") + print( + f"Success! All row counts in all tables are the same in both {db1} and {db2}!\n" + ) else: - print(f'\nERROR: Row count differences for {len(row_count_difference)} tables in both {db1} and {db2} databases:\n' - f'Table Name: ({db1} Rows, {db2} Rows)\n' - f'{row_count_difference}') + print( + f"\nERROR: Row count differences for {len(row_count_difference)} tables in both {db1} and {db2} databases:\n" + f"Table Name: ({db1} Rows, {db2} Rows)\n" + f"{row_count_difference}" + ) if len(col_count_difference) == 0: - print(f"Success! All column counts in all tables are the same in both {db1} and {db2}!\n") + print( + f"Success! All column counts in all tables are the same in both {db1} and {db2}!\n" + ) else: - print(f'\nERROR: Column count differences for {len(col_count_difference)} tables in both {db1} and {db2} databases:\n' - f'Table Name: ({db1} Columns, {db2} Columns)\n' - f'{col_count_difference}') + print( + f"\nERROR: Column count differences for {len(col_count_difference)} tables in both {db1} and {db2} databases:\n" + f"Table Name: ({db1} Columns, {db2} Columns)\n" + f"{col_count_difference}" + ) return row_count_difference, col_count_difference @@ -160,15 +225,29 @@ def compare_data_counts(db1_rows, db2_rows, db1_columns, db2_columns, db1, db2): """ Script to output dataframes for comparing data between two databases and tables. """ -def find_row_variances(database_name_one, schema_one, database_name_two, schema_two, username, password, table): - SQLALCHEMY_DATABASE_URI = f"postgresql://{username}:{password}@{pg_server}/{database_name_one}" + + +def find_row_variances( + database_name_one, + schema_one, + database_name_two, + schema_two, + username, + password, + table, +): + SQLALCHEMY_DATABASE_URI = ( + f"postgresql://{username}:{password}@{pg_server}/{database_name_one}" + ) engine = create_engine(SQLALCHEMY_DATABASE_URI) with engine.connect() as conn: query = text(f"SELECT * FROM {schema_one}.{table}") result = conn.execute(query) df = pd.DataFrame(result) engine.dispose() - SQLALCHEMY_DATABASE_URI = f"postgresql://{username}:{password}@{pg_server}/{database_name_two}" + SQLALCHEMY_DATABASE_URI = ( + f"postgresql://{username}:{password}@{pg_server}/{database_name_two}" + ) engine = create_engine(SQLALCHEMY_DATABASE_URI) with engine.connect() as conn: query = text(f"SELECT * FROM {schema_two}.{table}") @@ -243,14 +322,22 @@ def pg_loader_pre_script(): SET macrostrat_temp.lith_group = null where macrostrat_temp.lith_group = '';""" - pre_script_queries = [query_pbdb_matches, query_places, query_refs, query_unit_contacts, query_cols, query_col_areas, - query_col_areas_6April2016, query_liths] + pre_script_queries = [ + query_pbdb_matches, + query_places, + query_refs, + query_unit_contacts, + query_cols, + query_col_areas, + query_col_areas_6April2016, + query_liths, + ] URL = f"mysql+pymysql://{maria_super_user}:{maria_super_pass}@{maria_server}/{maria_db_name_two}" engine = create_engine(URL) with engine.connect() as conn: for query in pre_script_queries: - statements = query.split(';') + statements = query.split(";") for statement in statements: if statement.strip(): try: @@ -260,7 +347,9 @@ def pg_loader_pre_script(): print(f"Error with statement: {statement}\n{e}") engine.dispose() return -''' + + +""" #create db, create temp user before pgloader URL = f"postgresql://{pg_user}:{pg_pass_new}@{pg_server}/{pg_db_name}" pg_engine = create_engine(URL) @@ -269,8 +358,7 @@ def pg_loader_pre_script(): conn.execute(text(f"DROP USER IF EXISTS {pg_user_maria_temp};")) conn.execute(text(f"CREATE USER maria_migrate WITH PASSWORD '{pg_pass_maria_temp}'")) conn.execute(text(f"GRANT CONNECT ON DATABASE {pg_db_name_two} TO {pg_user_maria_temp};")) - pg_engine.dispose()''' - + pg_engine.dispose()""" def pg_loader_post_script(): @@ -278,51 +366,70 @@ def pg_loader_post_script(): # setting the datatype of the new column data to WKT format, # dropping the old geometry column, # adding default values for data formats that pgloader accepts - #vaccuum...refresh postgresql database after pgloader - #CREATE EXTENSION IF NOT EXISTS postgis; + # vaccuum...refresh postgresql database after pgloader + # CREATE EXTENSION IF NOT EXISTS postgis; SQLALCHEMY_DATABASE_URI = f"postgresql://{pg_user_migrate}:{pg_pass_migrate}@{pg_server}/{pg_db_name_two}?sslmode=prefer" - engine = create_engine(SQLALCHEMY_DATABASE_URI) #connect_args={'options': '-csearch_path=public,macrostrat_temp' - + engine = create_engine( + SQLALCHEMY_DATABASE_URI + ) # connect_args={'options': '-csearch_path=public,macrostrat_temp' - query_pbdb_matches = text(""" + query_pbdb_matches = text( + """ ALTER TABLE macrostrat_two.macrostrat_temp.pbdb_matches ADD COLUMN coordinate geometry(Point, 4326); UPDATE macrostrat_two.macrostrat_temp.pbdb_matches SET coordinate = ST_GeomFromText(coordinate_point_text, 4326); ALTER TABLE macrostrat_two.macrostrat_temp.pbdb_matches DROP COLUMN coordinate_point_text; - SELECT * FROM macrostrat_two.macrostrat_temp.pbdb_matches LIMIT 5;""") + SELECT * FROM macrostrat_two.macrostrat_temp.pbdb_matches LIMIT 5;""" + ) - query_places = text(""" + query_places = text( + """ ALTER TABLE macrostrat_two.macrostrat_temp.places ADD COLUMN geom geometry; UPDATE macrostrat_two.macrostrat_temp.places SET geom = ST_GeomFromText(geom_text, 4326); ALTER TABLE macrostrat_two.macrostrat_temp.places DROP COLUMN geom_text; - SELECT * FROM macrostrat_two.macrostrat_temp.places LIMIT 5;""") + SELECT * FROM macrostrat_two.macrostrat_temp.places LIMIT 5;""" + ) - query_refs = text(""" + query_refs = text( + """ ALTER TABLE macrostrat_two.macrostrat_temp.refs ADD COLUMN rgeom geometry; UPDATE macrostrat_two.macrostrat_temp.refs SET rgeom = ST_GeomFromText(rgeom_text, 4326); ALTER TABLE macrostrat_two.macrostrat_temp.refs DROP COLUMN rgeom_text; - SELECT * FROM macrostrat_two.macrostrat_temp.refs LIMIT 5;""") - + SELECT * FROM macrostrat_two.macrostrat_temp.refs LIMIT 5;""" + ) - query_cols = text(""" + query_cols = text( + """ ALTER TABLE macrostrat_two.macrostrat_temp.cols ADD COLUMN coordinate geometry; UPDATE macrostrat_two.macrostrat_temp.cols SET coordinate = ST_GeomFromText(coordinate_text, 4326); ALTER TABLE macrostrat_two.macrostrat_temp.cols DROP COLUMN coordinate_text; - SELECT * FROM macrostrat_two.macrostrat_temp.cols LIMIT 5;""") + SELECT * FROM macrostrat_two.macrostrat_temp.cols LIMIT 5;""" + ) - query_col_areas = text(""" + query_col_areas = text( + """ ALTER TABLE macrostrat_two.macrostrat_temp.col_areas ADD COLUMN col_area geometry; UPDATE macrostrat_two.macrostrat_temp.col_areas SET col_area = ST_GeomFromText(col_area_text, 4326); ALTER TABLE macrostrat_two.macrostrat_temp.col_areas DROP COLUMN col_area_text; - SELECT * FROM macrostrat_two.macrostrat_temp.col_areas LIMIT 5;""") + SELECT * FROM macrostrat_two.macrostrat_temp.col_areas LIMIT 5;""" + ) - query_col_areas_6April2016 = text(""" + query_col_areas_6April2016 = text( + """ ALTER TABLE macrostrat_two.macrostrat_temp.col_areas_6April2016 ADD COLUMN col_area geometry; UPDATE macrostrat_two.macrostrat_temp.col_areas_6April2016 SET col_area = ST_GeomFromText(col_area_text, 4326); ALTER TABLE macrostrat_two.macrostrat_temp.col_areas_6April2016 DROP COLUMN col_area_text; - SELECT * FROM macrostrat_two.macrostrat_temp.col_areas_6April2016 LIMIT 5;""") - - post_script_queries = [query_pbdb_matches, query_refs, query_cols, query_places, query_col_areas, query_col_areas_6April2016] - print('Starting PostScript execution....') + SELECT * FROM macrostrat_two.macrostrat_temp.col_areas_6April2016 LIMIT 5;""" + ) + + post_script_queries = [ + query_pbdb_matches, + query_refs, + query_cols, + query_places, + query_col_areas, + query_col_areas_6April2016, + ] + print("Starting PostScript execution....") with engine.connect() as conn: for query in post_script_queries: try: @@ -331,27 +438,31 @@ def pg_loader_post_script(): print(row) except SQLAlchemyError as e: print(f"Error: {e}") - #rollback the transaction if an error occurs + # rollback the transaction if an error occurs conn.execute(text("ROLLBACK;")) engine.dispose() return + def pg_loader(): """ Command terminal to run pgloader. Ensure Docker app is running. """ - dockerfile_content = "FROM dimitri/pgloader:latest\n" \ - "RUN apt-get update && apt-get install -y postgresql-client\n" \ - "RUN apt-get install -y ca-certificates" + dockerfile_content = ( + "FROM dimitri/pgloader:latest\n" + "RUN apt-get update && apt-get install -y postgresql-client\n" + "RUN apt-get install -y ca-certificates" + ) with open("Dockerfile", "w") as dockerfile: dockerfile.write(dockerfile_content) os.system("docker build -t pgloader-test .") - - input_command = f"--with \"prefetch rows = 1000\" --verbose " \ - f"mysql://root:{maria_super_pass}@{maria_server}/{maria_db_name_two} " \ - f"postgresql://{pg_user_migrate}:{pg_pass_migrate}@{pg_server}/{pg_db_name_two}?sslmode=prefer" + input_command = ( + f'--with "prefetch rows = 1000" --verbose ' + f"mysql://root:{maria_super_pass}@{maria_server}/{maria_db_name_two} " + f"postgresql://{pg_user_migrate}:{pg_pass_migrate}@{pg_server}/{pg_db_name_two}?sslmode=prefer" + ) print(input_command) os.system(f"docker run -i --rm pgloader-test pgloader {input_command}") @@ -359,16 +470,22 @@ def pg_loader(): def reset(): - SQLALCHEMY_DATABASE_URI = f"{pg_user_maria_temp}:{pg_pass_maria_temp}@{pg_server}/{pg_db_name_two}" + SQLALCHEMY_DATABASE_URI = ( + f"{pg_user_maria_temp}:{pg_pass_maria_temp}@{pg_server}/{pg_db_name_two}" + ) pg_engine = create_engine(SQLALCHEMY_DATABASE_URI) - pg_drop_query = text(f"DROP SCHEMA macrostrat_temp CASCADE") # {new_migrate_schema_name} + pg_drop_query = text( + f"DROP SCHEMA macrostrat_temp CASCADE" + ) # {new_migrate_schema_name} with pg_engine.connect() as conn: conn.execute(pg_drop_query) pg_engine.dispose() - SQLALCHEMY_DATABASE_URI = f"mysql+pymysql://{maria_super_user}:" \ - f"{maria_super_pass}@{maria_server}/{maria_db_name_two}" + SQLALCHEMY_DATABASE_URI = ( + f"mysql+pymysql://{maria_super_user}:" + f"{maria_super_pass}@{maria_server}/{maria_db_name_two}" + ) maria_engine = create_engine(SQLALCHEMY_DATABASE_URI) maria_drop_query = text(f"DROP DATABASE {maria_db_name_two}") @@ -377,29 +494,42 @@ def reset(): maria_engine.dispose() - - if __name__ == "__main__": - #maria_dump(maria_server, maria_super_user, maria_super_pass, maria_db_name) - #maria_restore(maria_server, maria_super_user, maria_super_pass, maria_db_name_two) - #pg_loader_pre_script() - #pg_loader() - #pg_loader_post_script() + # maria_dump(maria_server, maria_super_user, maria_super_pass, maria_db_name) + # maria_restore(maria_server, maria_super_user, maria_super_pass, maria_db_name_two) + # pg_loader_pre_script() + # pg_loader() + # pg_loader_post_script() maria_rows, maria_columns = get_data_counts_maria() - pg_rows, pg_columns = get_data_counts_pg(pg_db_name, pg_user, pg_pass_new, 'macrostrat') - pg_macrostrat_two_rows, pg_macrostrat_two_columns = get_data_counts_pg(pg_db_name_two, pg_user_migrate, pg_pass_migrate, 'macrostrat_temp') - - print('\nMARIADB (db1) comparison to PG MACROSTRAT_TWO (db2). These should be clones. ') - db1 = 'MariaDB' - db2 = 'PG Macrostrat_Two' - row_variance, column_variance = compare_data_counts(maria_rows, pg_macrostrat_two_rows, maria_columns, - pg_macrostrat_two_columns, db1, db2) - print('\nPG MACROSTRAT_TWO (db1 maria db clone) comparison to PG MACROSTRAT (db2). This will show what data ' - 'needs to be moved over from Maria to PG prod.') - db1 = 'PG Macrostrat_Two' - db2 = 'PG Macrostrat' - row_variance_two, column_variance_two = compare_data_counts(pg_macrostrat_two_rows, pg_rows, pg_macrostrat_two_columns, - pg_columns, db1, db2) - #reset() - #df, df_two = find_row_variances(pg_db_name, pg_db_name, pg_db_name_two, maria_db_name_two, - #pg_user, pg_pass_new, 'cols') + pg_rows, pg_columns = get_data_counts_pg( + pg_db_name, pg_user, pg_pass_new, "macrostrat" + ) + pg_macrostrat_two_rows, pg_macrostrat_two_columns = get_data_counts_pg( + pg_db_name_two, pg_user_migrate, pg_pass_migrate, "macrostrat_temp" + ) + + print( + "\nMARIADB (db1) comparison to PG MACROSTRAT_TWO (db2). These should be clones. " + ) + db1 = "MariaDB" + db2 = "PG Macrostrat_Two" + row_variance, column_variance = compare_data_counts( + maria_rows, + pg_macrostrat_two_rows, + maria_columns, + pg_macrostrat_two_columns, + db1, + db2, + ) + print( + "\nPG MACROSTRAT_TWO (db1 maria db clone) comparison to PG MACROSTRAT (db2). This will show what data " + "needs to be moved over from Maria to PG prod." + ) + db1 = "PG Macrostrat_Two" + db2 = "PG Macrostrat" + row_variance_two, column_variance_two = compare_data_counts( + pg_macrostrat_two_rows, pg_rows, pg_macrostrat_two_columns, pg_columns, db1, db2 + ) + # reset() + # df, df_two = find_row_variances(pg_db_name, pg_db_name, pg_db_name_two, maria_db_name_two, + # pg_user, pg_pass_new, 'cols') diff --git a/cli/macrostrat/cli/database/mariadb_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/_legacy.py similarity index 89% rename from cli/macrostrat/cli/database/mariadb_migration/__init__.py rename to cli/macrostrat/cli/database/mariadb/postgresql_migration/_legacy.py index d655d641..c1e1ca3f 100644 --- a/cli/macrostrat/cli/database/mariadb_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/_legacy.py @@ -1,9 +1,9 @@ from macrostrat.utils.shell import run -from .._legacy import get_db +from cli.macrostrat.cli.database._legacy import get_db from macrostrat.core import app -from ..._dev.utils import ( +from cli.macrostrat.cli._dev.utils import ( _create_database_if_not_exists, _docker_local_run_args, ) diff --git a/cli/macrostrat/cli/database/mariadb_migration/requirements.txt b/cli/macrostrat/cli/database/mariadb/postgresql_migration/requirements.txt similarity index 100% rename from cli/macrostrat/cli/database/mariadb_migration/requirements.txt rename to cli/macrostrat/cli/database/mariadb/postgresql_migration/requirements.txt diff --git a/cli/macrostrat/cli/database/mariadb/restore.py b/cli/macrostrat/cli/database/mariadb/restore.py index 3855da96..ca910b07 100644 --- a/cli/macrostrat/cli/database/mariadb/restore.py +++ b/cli/macrostrat/cli/database/mariadb/restore.py @@ -51,7 +51,8 @@ def restore_mariadb(_input: Optional[str], engine: Engine, *args, **kwargs): async def _restore_mariadb(engine: Engine, *args, **kwargs): - """Load MariaDB dump (GZipped SQL file) into a database.""" + """Load MariaDB dump (GZipped SQL file) into a database, using centrally managed credentials, + a Docker containerized `mariadb` client, and a streaming approach.""" overwrite = kwargs.pop("overwrite", False) create = kwargs.pop("create", overwrite) container = kwargs.pop("container", "mariadb:10.10") @@ -61,8 +62,7 @@ async def _restore_mariadb(engine: Engine, *args, **kwargs): ) conn = build_connection_args(docker_internal_url(engine.url)) - # Run pg_restore in a local Docker container - # TODO: this could also be run with pg_restore in a Kubernetes pod + # Run mariadb in a local Docker container # or another location, if more appropriate. Running on the remote # host, if possible, is probably the fastest option. There should be # multiple options ideally. From 8d7eccdb354f07b860e82bc87af4693426e0ac4a Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Fri, 26 Jul 2024 12:16:31 -0500 Subject: [PATCH 16/48] Moved pre- and post- scripts to external SQL files --- .../mariadb/postgresql_migration/__init__.py | 188 ++---------------- .../pgloader-post-script.sql | 44 ++++ .../pgloader-pre-script.sql | 65 ++++++ 3 files changed, 130 insertions(+), 167 deletions(-) create mode 100644 cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql create mode 100644 cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-pre-script.sql diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index 9a2a3767..dde84913 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -5,6 +5,7 @@ import pandas as pd from macrostrat.database.utils import run_query, run_sql from psycopg2.sql import Identifier +from pathlib import Path import time @@ -13,6 +14,8 @@ Command line in cmd.exe language """ +__here__ = Path(__file__).parent + def pg_dump(server, user, password, dbname): # TODO: integrate with existing PostgreSQL database utilities @@ -94,11 +97,12 @@ def get_data_counts_maria(): ) row_count = row_result.scalar() maria_rows[table.lower()] = row_count - - column_query = text( - f"SELECT COUNT(*) FROM information_schema.columns WHERE table_schema = '{maria_db_name_two}' AND table_name = '{table}';" + column_result = run_query( + conn, + "SELECT COUNT(*) FROM information_schema.columns WHERE table_schema = :table_schema AND table_name = :table_name", + dict(table_schema=maria_db_name_two, table_name=table), ) - column_result = conn.execute(column_query) + column_count = column_result.scalar() maria_columns[table.lower()] = column_count @@ -115,10 +119,11 @@ def get_data_counts_pg(database_name, username, password, schema): pg_columns = {} with engine.connect() as conn: - table_query = text( - f"SELECT table_name FROM information_schema.tables WHERE table_catalog = '{database_name}'" - " AND table_type = 'BASE TABLE'" - f" AND table_schema = '{schema}'" + table_query = run_query( + conn, + """SELECT table_name FROM information_schema.tables WHERE table_catalog = :table_catalog + AND table_type = 'BASE TABLE' AND table_schema = :table_schema""", + dict(table_schema=schema, table_catalog=database_name), ) table_result = conn.execute(table_query) pg_tables = [row[0] for row in table_result] @@ -139,12 +144,11 @@ def get_data_counts_pg(database_name, username, password, schema): return pg_rows, pg_columns -""" -Compares the data counts between tables, rows, and columns that vary between any two db's -""" - - def compare_data_counts(db1_rows, db2_rows, db1_columns, db2_columns, db1, db2): + """ + Compares the data counts between tables, rows, and columns that vary between any two db's + """ + db1_rows_not_in_db2 = { table_name: (db1_rows[table_name], 0) for table_name in db1_rows @@ -258,95 +262,12 @@ def find_row_variances( def pg_loader_pre_script(): - # Query alters the MariaDB tables by adding a new column for geom -> text data, - # setting the datatype of the new column data to WKT format, - # dropping the old geometry column, - # adding default values for data formats that pgloader accepts - query_pbdb_matches = """ALTER TABLE macrostrat_temp.pbdb_matches ADD COLUMN coordinate_point_text TEXT; - UPDATE macrostrat_temp.pbdb_matches SET coordinate_point_text = ST_AsText(coordinate); - ALTER TABLE macrostrat_temp.pbdb_matches DROP COLUMN coordinate; - UPDATE macrostrat_temp.pbdb_matches SET release_date = '2000-01-01' WHERE release_date = '0000-00-00 00:00:00';""" - - query_places = """ - ALTER TABLE macrostrat_temp.places ADD COLUMN geom_text LONGTEXT; - UPDATE macrostrat_temp.places - SET geom_text = ST_AsText(geom); - ALTER TABLE macrostrat_temp.places DROP COLUMN geom; - """ - - query_refs = """ - ALTER TABLE macrostrat_temp.refs ADD COLUMN rgeom_text LONGTEXT; - UPDATE macrostrat_temp.refs - SET rgeom_text = ST_AsText(rgeom); - ALTER TABLE macrostrat_temp.refs DROP COLUMN rgeom; - """ - - query_unit_contacts = """ - UPDATE unit_contacts - -- Enum data type can't be null so set to enum option 'below'. - SET contact = 'below' - WHERE contact = ''; - UPDATE unit_contacts - -- enum data type can't be null so set to enum option 'above'. - SET old_contact = 'above' - WHERE old_contact = ''; - """ - - query_cols = """ - ALTER TABLE macrostrat_temp.cols ADD COLUMN coordinate_text LONGTEXT; - UPDATE macrostrat_temp.cols - SET coordinate_text = ST_AsText(coordinate); - ALTER TABLE macrostrat_temp.cols DROP COLUMN coordinate; - UPDATE macrostrat_temp.cols - SET created = '2000-01-01' - WHERE created = '0000-00-00 00:00:00'; - """ - - query_col_areas = """ - ALTER TABLE macrostrat_temp.col_areas ADD COLUMN col_area_text LONGTEXT; - UPDATE macrostrat_temp.col_areas - SET col_areas.col_area_text = ST_AsText(col_area); - ALTER TABLE macrostrat_temp.col_areas DROP COLUMN col_area; - - """ - - query_col_areas_6April2016 = """ - ALTER TABLE macrostrat_temp.col_areas_6April2016 ADD COLUMN col_area_text LONGTEXT; - UPDATE macrostrat_temp.col_areas_6April2016 - SET col_areas_6April2016.col_area_text = ST_AsText(col_area); - ALTER TABLE macrostrat_temp.col_areas_6April2016 DROP COLUMN col_area; - """ - - query_liths = """ - UPDATE macrostrat_temp.liths - SET macrostrat_temp.lith_group = null - where macrostrat_temp.lith_group = '';""" - - pre_script_queries = [ - query_pbdb_matches, - query_places, - query_refs, - query_unit_contacts, - query_cols, - query_col_areas, - query_col_areas_6April2016, - query_liths, - ] + pre_script = __here__ / "pgloader-pre-script.sql" URL = f"mysql+pymysql://{maria_super_user}:{maria_super_pass}@{maria_server}/{maria_db_name_two}" engine = create_engine(URL) - with engine.connect() as conn: - for query in pre_script_queries: - statements = query.split(";") - for statement in statements: - if statement.strip(): - try: - conn.execute(text(statement)) - print(f"Successfully executed: {statement}") - except Exception as e: - print(f"Error with statement: {statement}\n{e}") + run_sql(engine, pre_script) engine.dispose() - return """ @@ -373,76 +294,9 @@ def pg_loader_post_script(): SQLALCHEMY_DATABASE_URI ) # connect_args={'options': '-csearch_path=public,macrostrat_temp' - query_pbdb_matches = text( - """ - ALTER TABLE macrostrat_two.macrostrat_temp.pbdb_matches ADD COLUMN coordinate geometry(Point, 4326); - UPDATE macrostrat_two.macrostrat_temp.pbdb_matches SET coordinate = ST_GeomFromText(coordinate_point_text, 4326); - ALTER TABLE macrostrat_two.macrostrat_temp.pbdb_matches DROP COLUMN coordinate_point_text; - SELECT * FROM macrostrat_two.macrostrat_temp.pbdb_matches LIMIT 5;""" - ) - - query_places = text( - """ - ALTER TABLE macrostrat_two.macrostrat_temp.places ADD COLUMN geom geometry; - UPDATE macrostrat_two.macrostrat_temp.places SET geom = ST_GeomFromText(geom_text, 4326); - ALTER TABLE macrostrat_two.macrostrat_temp.places DROP COLUMN geom_text; - SELECT * FROM macrostrat_two.macrostrat_temp.places LIMIT 5;""" - ) - - query_refs = text( - """ - ALTER TABLE macrostrat_two.macrostrat_temp.refs ADD COLUMN rgeom geometry; - UPDATE macrostrat_two.macrostrat_temp.refs SET rgeom = ST_GeomFromText(rgeom_text, 4326); - ALTER TABLE macrostrat_two.macrostrat_temp.refs DROP COLUMN rgeom_text; - SELECT * FROM macrostrat_two.macrostrat_temp.refs LIMIT 5;""" - ) - - query_cols = text( - """ - ALTER TABLE macrostrat_two.macrostrat_temp.cols ADD COLUMN coordinate geometry; - UPDATE macrostrat_two.macrostrat_temp.cols SET coordinate = ST_GeomFromText(coordinate_text, 4326); - ALTER TABLE macrostrat_two.macrostrat_temp.cols DROP COLUMN coordinate_text; - SELECT * FROM macrostrat_two.macrostrat_temp.cols LIMIT 5;""" - ) - - query_col_areas = text( - """ - ALTER TABLE macrostrat_two.macrostrat_temp.col_areas ADD COLUMN col_area geometry; - UPDATE macrostrat_two.macrostrat_temp.col_areas SET col_area = ST_GeomFromText(col_area_text, 4326); - ALTER TABLE macrostrat_two.macrostrat_temp.col_areas DROP COLUMN col_area_text; - SELECT * FROM macrostrat_two.macrostrat_temp.col_areas LIMIT 5;""" - ) - - query_col_areas_6April2016 = text( - """ - ALTER TABLE macrostrat_two.macrostrat_temp.col_areas_6April2016 ADD COLUMN col_area geometry; - UPDATE macrostrat_two.macrostrat_temp.col_areas_6April2016 SET col_area = ST_GeomFromText(col_area_text, 4326); - ALTER TABLE macrostrat_two.macrostrat_temp.col_areas_6April2016 DROP COLUMN col_area_text; - SELECT * FROM macrostrat_two.macrostrat_temp.col_areas_6April2016 LIMIT 5;""" - ) - - post_script_queries = [ - query_pbdb_matches, - query_refs, - query_cols, - query_places, - query_col_areas, - query_col_areas_6April2016, - ] print("Starting PostScript execution....") - with engine.connect() as conn: - for query in post_script_queries: - try: - result = conn.execute(query.execution_options(autocommit=True)) - for row in result: - print(row) - except SQLAlchemyError as e: - print(f"Error: {e}") - # rollback the transaction if an error occurs - conn.execute(text("ROLLBACK;")) - - engine.dispose() - return + post_script = __here__ / "pgloader-post-script.sql" + run_sql(engine, post_script) def pg_loader(): diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql b/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql new file mode 100644 index 00000000..6ca3084a --- /dev/null +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql @@ -0,0 +1,44 @@ +/* + The query + - Alters the MariaDB pbdb_matches table by adding a new column for the text data, + - sets the datatype of the new column data to WKT format, + - drops old geometry columns + - refreshes the database after pgloader + + */ + +CREATE EXTENSION IF NOT EXISTS postgis; + +SET search_path TO macrostrat_two, public; + +ALTER TABLE macrostrat_two.macrostrat_temp.pbdb_matches ADD COLUMN coordinate geometry(Point, 4326); +UPDATE macrostrat_two.macrostrat_temp.pbdb_matches SET coordinate = ST_GeomFromText(coordinate_point_text, 4326); +ALTER TABLE macrostrat_two.macrostrat_temp.pbdb_matches DROP COLUMN coordinate_point_text; +SELECT * FROM macrostrat_two.macrostrat_temp.pbdb_matches LIMIT 5; + +ALTER TABLE macrostrat_two.macrostrat_temp.places ADD COLUMN geom geometry; +UPDATE macrostrat_two.macrostrat_temp.places SET geom = ST_GeomFromText(geom_text, 4326); + +ALTER TABLE macrostrat_two.macrostrat_temp.places DROP COLUMN geom_text; +SELECT * FROM macrostrat_two.macrostrat_temp.places LIMIT 5; + +ALTER TABLE macrostrat_two.macrostrat_temp.refs ADD COLUMN rgeom geometry; +UPDATE macrostrat_two.macrostrat_temp.refs SET rgeom = ST_GeomFromText(rgeom_text, 4326); +ALTER TABLE macrostrat_two.macrostrat_temp.refs DROP COLUMN rgeom_text; +SELECT * FROM macrostrat_two.macrostrat_temp.refs LIMIT 5; + +ALTER TABLE macrostrat_two.macrostrat_temp.cols ADD COLUMN coordinate geometry; +UPDATE macrostrat_two.macrostrat_temp.cols SET coordinate = ST_GeomFromText(coordinate_text, 4326); +ALTER TABLE macrostrat_two.macrostrat_temp.cols DROP COLUMN coordinate_text; +SELECT * FROM macrostrat_two.macrostrat_temp.cols LIMIT 5; + +ALTER TABLE macrostrat_two.macrostrat_temp.col_areas ADD COLUMN col_area geometry; +UPDATE macrostrat_two.macrostrat_temp.col_areas SET col_area = ST_GeomFromText(col_area_text, 4326); +ALTER TABLE macrostrat_two.macrostrat_temp.col_areas DROP COLUMN col_area_text; +SELECT * FROM macrostrat_two.macrostrat_temp.col_areas LIMIT 5; + +ALTER TABLE macrostrat_two.macrostrat_temp.col_areas_6April2016 ADD COLUMN col_area geometry; +UPDATE macrostrat_two.macrostrat_temp.col_areas_6April2016 SET col_area = ST_GeomFromText(col_area_text, 4326); +ALTER TABLE macrostrat_two.macrostrat_temp.col_areas_6April2016 DROP COLUMN col_area_text; +SELECT * FROM macrostrat_two.macrostrat_temp.col_areas_6April2016 LIMIT 5; + diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-pre-script.sql b/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-pre-script.sql new file mode 100644 index 00000000..fa5a3dd7 --- /dev/null +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-pre-script.sql @@ -0,0 +1,65 @@ +/* SQL script that + - alters the MariaDB tables by adding a new column for geom -> text data, + - sets the datatype of the new column data to WKT format, + - drops the old geometry column, + - adds default values for data formats that pgloader accepts + + NOTE: this runs in MariaDB, not PostgreSQL + */ +ALTER TABLE macrostrat_temp.pbdb_matches ADD COLUMN coordinate_point_text TEXT; + +UPDATE macrostrat_temp.pbdb_matches SET coordinate_point_text = ST_AsText(coordinate); + +ALTER TABLE macrostrat_temp.pbdb_matches DROP COLUMN coordinate; + +UPDATE macrostrat_temp.pbdb_matches SET release_date = '2000-01-01' WHERE release_date = '0000-00-00 00:00:00'; + +ALTER TABLE macrostrat_temp.places ADD COLUMN geom_text LONGTEXT; + +UPDATE macrostrat_temp.places +SET geom_text = ST_AsText(geom); +ALTER TABLE macrostrat_temp.places DROP COLUMN geom; + + +ALTER TABLE macrostrat_temp.refs ADD COLUMN rgeom_text LONGTEXT; +UPDATE macrostrat_temp.refs +SET rgeom_text = ST_AsText(rgeom); +ALTER TABLE macrostrat_temp.refs DROP COLUMN rgeom; + +UPDATE unit_contacts +-- Enum data type can't be null so set to enum option 'below'. +SET contact = 'below' +WHERE contact = ''; + +UPDATE unit_contacts +-- enum data type can't be null so set to enum option 'above'. +SET old_contact = 'above' +WHERE old_contact = ''; + +ALTER TABLE macrostrat_temp.cols ADD COLUMN coordinate_text LONGTEXT; +UPDATE macrostrat_temp.cols +SET coordinate_text = ST_AsText(coordinate); + +ALTER TABLE macrostrat_temp.cols DROP COLUMN coordinate; + +UPDATE macrostrat_temp.cols +SET created = '2000-01-01' +WHERE created = '0000-00-00 00:00:00'; + +ALTER TABLE macrostrat_temp.col_areas ADD COLUMN col_area_text LONGTEXT; + +UPDATE macrostrat_temp.col_areas +SET col_areas.col_area_text = ST_AsText(col_area); + +ALTER TABLE macrostrat_temp.col_areas DROP COLUMN col_area; + +ALTER TABLE macrostrat_temp.col_areas_6April2016 ADD COLUMN col_area_text LONGTEXT; + +UPDATE macrostrat_temp.col_areas_6April2016 +SET col_areas_6April2016.col_area_text = ST_AsText(col_area); + +ALTER TABLE macrostrat_temp.col_areas_6April2016 DROP COLUMN col_area; + +UPDATE macrostrat_temp.liths +SET macrostrat_temp.lith_group = null +WHERE macrostrat_temp.lith_group = ''; From 9b8ba3407c24dac70b0e7c0157d51482badfa78e Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Fri, 26 Jul 2024 16:36:45 -0500 Subject: [PATCH 17/48] Removed extra files created by IntelliJ --- cli/__init__.py | 0 cli/macrostrat/__init__.py | 0 py-root/poetry.lock | 1231 +++--------------------------------- py-root/pyproject.toml | 1 + 4 files changed, 73 insertions(+), 1159 deletions(-) delete mode 100644 cli/__init__.py delete mode 100644 cli/macrostrat/__init__.py diff --git a/cli/__init__.py b/cli/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/cli/macrostrat/__init__.py b/cli/macrostrat/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/py-root/poetry.lock b/py-root/poetry.lock index 0fea3769..0b267ced 100644 --- a/py-root/poetry.lock +++ b/py-root/poetry.lock @@ -1,20 +1,5 @@ # This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. -[[package]] -name = "affine" -version = "2.4.0" -description = "Matrices describing affine transformation of the plane" -optional = false -python-versions = ">=3.7" -files = [ - {file = "affine-2.4.0-py3-none-any.whl", hash = "sha256:8a3df80e2b2378aef598a83c1392efd47967afec4242021a0b06b4c7cbc61a92"}, - {file = "affine-2.4.0.tar.gz", hash = "sha256:a24d818d6a836c131976d22f8c27b8d3ca32d0af64c1d8d29deb7bafa4da1eea"}, -] - -[package.extras] -dev = ["coveralls", "flake8", "pydocstyle"] -test = ["pytest (>=4.6)", "pytest-cov"] - [[package]] name = "aiofiles" version = "23.2.1" @@ -37,26 +22,6 @@ files = [ {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"}, ] -[[package]] -name = "anyio" -version = "4.0.0" -description = "High level compatibility layer for multiple asynchronous event loop implementations" -optional = false -python-versions = ">=3.8" -files = [ - {file = "anyio-4.0.0-py3-none-any.whl", hash = "sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f"}, - {file = "anyio-4.0.0.tar.gz", hash = "sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a"}, -] - -[package.dependencies] -idna = ">=2.8" -sniffio = ">=1.1" - -[package.extras] -doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)"] -test = ["anyio[trio]", "coverage[toml] (>=7)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] -trio = ["trio (>=0.22)"] - [[package]] name = "appnope" version = "0.1.3" @@ -125,20 +90,6 @@ cffi = ">=1.0.1" dev = ["cogapp", "pre-commit", "pytest", "wheel"] tests = ["pytest"] -[[package]] -name = "asgiref" -version = "3.7.2" -description = "ASGI specs, helper code, and adapters" -optional = false -python-versions = ">=3.7" -files = [ - {file = "asgiref-3.7.2-py3-none-any.whl", hash = "sha256:89b2ef2247e3b562a16eef663bc0e2e703ec6468e2fa8a5cd61cd449786d4f6e"}, - {file = "asgiref-3.7.2.tar.gz", hash = "sha256:9e0ce3aa93a819ba5b45120216b23878cf6e8525eb3848653452b4192b92afed"}, -] - -[package.extras] -tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] - [[package]] name = "asttokens" version = "2.4.0" @@ -156,59 +107,6 @@ six = ">=1.12.0" [package.extras] test = ["astroid", "pytest"] -[[package]] -name = "asyncpg" -version = "0.28.0" -description = "An asyncio PostgreSQL driver" -optional = false -python-versions = ">=3.7.0" -files = [ - {file = "asyncpg-0.28.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a6d1b954d2b296292ddff4e0060f494bb4270d87fb3655dd23c5c6096d16d83"}, - {file = "asyncpg-0.28.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0740f836985fd2bd73dca42c50c6074d1d61376e134d7ad3ad7566c4f79f8184"}, - {file = "asyncpg-0.28.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e907cf620a819fab1737f2dd90c0f185e2a796f139ac7de6aa3212a8af96c050"}, - {file = "asyncpg-0.28.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86b339984d55e8202e0c4b252e9573e26e5afa05617ed02252544f7b3e6de3e9"}, - {file = "asyncpg-0.28.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0c402745185414e4c204a02daca3d22d732b37359db4d2e705172324e2d94e85"}, - {file = "asyncpg-0.28.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c88eef5e096296626e9688f00ab627231f709d0e7e3fb84bb4413dff81d996d7"}, - {file = "asyncpg-0.28.0-cp310-cp310-win32.whl", hash = "sha256:90a7bae882a9e65a9e448fdad3e090c2609bb4637d2a9c90bfdcebbfc334bf89"}, - {file = "asyncpg-0.28.0-cp310-cp310-win_amd64.whl", hash = "sha256:76aacdcd5e2e9999e83c8fbcb748208b60925cc714a578925adcb446d709016c"}, - {file = "asyncpg-0.28.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a0e08fe2c9b3618459caaef35979d45f4e4f8d4f79490c9fa3367251366af207"}, - {file = "asyncpg-0.28.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b24e521f6060ff5d35f761a623b0042c84b9c9b9fb82786aadca95a9cb4a893b"}, - {file = "asyncpg-0.28.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99417210461a41891c4ff301490a8713d1ca99b694fef05dabd7139f9d64bd6c"}, - {file = "asyncpg-0.28.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f029c5adf08c47b10bcdc857001bbef551ae51c57b3110964844a9d79ca0f267"}, - {file = "asyncpg-0.28.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ad1d6abf6c2f5152f46fff06b0e74f25800ce8ec6c80967f0bc789974de3c652"}, - {file = "asyncpg-0.28.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d7fa81ada2807bc50fea1dc741b26a4e99258825ba55913b0ddbf199a10d69d8"}, - {file = "asyncpg-0.28.0-cp311-cp311-win32.whl", hash = "sha256:f33c5685e97821533df3ada9384e7784bd1e7865d2b22f153f2e4bd4a083e102"}, - {file = "asyncpg-0.28.0-cp311-cp311-win_amd64.whl", hash = "sha256:5e7337c98fb493079d686a4a6965e8bcb059b8e1b8ec42106322fc6c1c889bb0"}, - {file = "asyncpg-0.28.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1c56092465e718a9fdcc726cc3d9dcf3a692e4834031c9a9f871d92a75d20d48"}, - {file = "asyncpg-0.28.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4acd6830a7da0eb4426249d71353e8895b350daae2380cb26d11e0d4a01c5472"}, - {file = "asyncpg-0.28.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63861bb4a540fa033a56db3bb58b0c128c56fad5d24e6d0a8c37cb29b17c1c7d"}, - {file = "asyncpg-0.28.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:a93a94ae777c70772073d0512f21c74ac82a8a49be3a1d982e3f259ab5f27307"}, - {file = "asyncpg-0.28.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d14681110e51a9bc9c065c4e7944e8139076a778e56d6f6a306a26e740ed86d2"}, - {file = "asyncpg-0.28.0-cp37-cp37m-win32.whl", hash = "sha256:8aec08e7310f9ab322925ae5c768532e1d78cfb6440f63c078b8392a38aa636a"}, - {file = "asyncpg-0.28.0-cp37-cp37m-win_amd64.whl", hash = "sha256:319f5fa1ab0432bc91fb39b3960b0d591e6b5c7844dafc92c79e3f1bff96abef"}, - {file = "asyncpg-0.28.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b337ededaabc91c26bf577bfcd19b5508d879c0ad009722be5bb0a9dd30b85a0"}, - {file = "asyncpg-0.28.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4d32b680a9b16d2957a0a3cc6b7fa39068baba8e6b728f2e0a148a67644578f4"}, - {file = "asyncpg-0.28.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4f62f04cdf38441a70f279505ef3b4eadf64479b17e707c950515846a2df197"}, - {file = "asyncpg-0.28.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f20cac332c2576c79c2e8e6464791c1f1628416d1115935a34ddd7121bfc6a4"}, - {file = "asyncpg-0.28.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:59f9712ce01e146ff71d95d561fb68bd2d588a35a187116ef05028675462d5ed"}, - {file = "asyncpg-0.28.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fc9e9f9ff1aa0eddcc3247a180ac9e9b51a62311e988809ac6152e8fb8097756"}, - {file = "asyncpg-0.28.0-cp38-cp38-win32.whl", hash = "sha256:9e721dccd3838fcff66da98709ed884df1e30a95f6ba19f595a3706b4bc757e3"}, - {file = "asyncpg-0.28.0-cp38-cp38-win_amd64.whl", hash = "sha256:8ba7d06a0bea539e0487234511d4adf81dc8762249858ed2a580534e1720db00"}, - {file = "asyncpg-0.28.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d009b08602b8b18edef3a731f2ce6d3f57d8dac2a0a4140367e194eabd3de457"}, - {file = "asyncpg-0.28.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ec46a58d81446d580fb21b376ec6baecab7288ce5a578943e2fc7ab73bf7eb39"}, - {file = "asyncpg-0.28.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b48ceed606cce9e64fd5480a9b0b9a95cea2b798bb95129687abd8599c8b019"}, - {file = "asyncpg-0.28.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8858f713810f4fe67876728680f42e93b7e7d5c7b61cf2118ef9153ec16b9423"}, - {file = "asyncpg-0.28.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5e18438a0730d1c0c1715016eacda6e9a505fc5aa931b37c97d928d44941b4bf"}, - {file = "asyncpg-0.28.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e9c433f6fcdd61c21a715ee9128a3ca48be8ac16fa07be69262f016bb0f4dbd2"}, - {file = "asyncpg-0.28.0-cp39-cp39-win32.whl", hash = "sha256:41e97248d9076bc8e4849da9e33e051be7ba37cd507cbd51dfe4b2d99c70e3dc"}, - {file = "asyncpg-0.28.0-cp39-cp39-win_amd64.whl", hash = "sha256:3ed77f00c6aacfe9d79e9eff9e21729ce92a4b38e80ea99a58ed382f42ebd55b"}, - {file = "asyncpg-0.28.0.tar.gz", hash = "sha256:7252cdc3acb2f52feaa3664280d3bcd78a46bd6c10bfd681acfffefa1120e278"}, -] - -[package.extras] -docs = ["Sphinx (>=5.3.0,<5.4.0)", "sphinx-rtd-theme (>=1.2.2)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] -test = ["flake8 (>=5.0,<6.0)", "uvloop (>=0.15.3)"] - [[package]] name = "attrs" version = "23.1.0" @@ -238,28 +136,6 @@ files = [ {file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"}, ] -[[package]] -name = "buildpg" -version = "0.4" -description = "Query building for the postgresql prepared statements and asyncpg." -optional = false -python-versions = ">=3.6" -files = [ - {file = "buildpg-0.4-py3-none-any.whl", hash = "sha256:20d539976c81ea6f5529d3930016b0482ed0ff06def3d6da79d0fc0a3bbaeeb1"}, - {file = "buildpg-0.4.tar.gz", hash = "sha256:3a6c1f40fb6c826caa819d84727e36a1372f7013ba696637b492e5935916d479"}, -] - -[[package]] -name = "cachetools" -version = "5.3.1" -description = "Extensible memoizing collections and decorators" -optional = false -python-versions = ">=3.7" -files = [ - {file = "cachetools-5.3.1-py3-none-any.whl", hash = "sha256:95ef631eeaea14ba2e36f06437f36463aac3a096799e876ee55e5cdccb102590"}, - {file = "cachetools-5.3.1.tar.gz", hash = "sha256:dce83f2d9b4e1f732a8cd44af8e8fab2dbe46201467fc98b3ef8f269092bf62b"}, -] - [[package]] name = "certifi" version = "2023.7.22" @@ -482,81 +358,6 @@ click = ">=4.0" [package.extras] test = ["pytest-cov"] -[[package]] -name = "cogeo-mosaic" -version = "7.0.1" -description = "CLI and Backends to work with MosaicJSON." -optional = false -python-versions = ">=3.8" -files = [ - {file = "cogeo_mosaic-7.0.1-py3-none-any.whl", hash = "sha256:6319df8f5137ccefdf0501ddd3b954179fdcfee93002802129c651e429c2e0ee"}, - {file = "cogeo_mosaic-7.0.1.tar.gz", hash = "sha256:c070d5393f9346e8b7604f49bd1d659536c75591a888344a520b7cc2caa6a1cf"}, -] - -[package.dependencies] -attrs = "*" -cachetools = "*" -httpx = "*" -morecantile = ">=5.0,<6.0" -pydantic = ">=2.0,<3.0" -pydantic-settings = ">=2.0,<3.0" -rasterio = "*" -rio-tiler = ">=6.0,<7.0" -shapely = ">=2.0,<3.0" -supermorecado = "*" - -[package.extras] -aws = ["boto3"] -az = ["azure-identity", "azure-storage-blob"] -dev = ["pre-commit", "pytest", "pytest-cov"] -docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "pygments"] -gcp = ["google-cloud-storage"] -test = ["boto3", "pytest", "pytest-cov"] - -[[package]] -name = "color-operations" -version = "0.1.1" -description = "Apply basic color-oriented image operations." -optional = false -python-versions = ">=3.8" -files = [ - {file = "color-operations-0.1.1.tar.gz", hash = "sha256:e072635a3a6709d83a05fa43804d4046f22aef44edd9afbc965eb1f435121697"}, - {file = "color_operations-0.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:da23d56d81eb06864b861f05b55de31122e32965d9cccb90f974d6f9fec1737d"}, - {file = "color_operations-0.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:96f16c2d80b874f07aee9297603a751dee3441b89c09444fc8fd11d37d58239d"}, - {file = "color_operations-0.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:873d608ec87cbc89094214835d761eff14536bfe3dff29151d80e26b1dfb2878"}, - {file = "color_operations-0.1.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5dcbf94d80d42654be0d33400c27f2d1627251f77fd806dbb5b1bed6713165d4"}, - {file = "color_operations-0.1.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c671f8b0d2c6044d9143206b5636ca2416afade13149dc290f0f0893daceb6bb"}, - {file = "color_operations-0.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:02dcaeaeee276485edc1b274db7cf482d742b0577541d6650a7c045f81b2337d"}, - {file = "color_operations-0.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd3a33e8501f5172854da841dabe4d814be1e6e4946fedcc5fdf65e614298333"}, - {file = "color_operations-0.1.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ca40117b6a3fb19a55823821c08071af4ef19d28085809da55900e7ba4df0a2e"}, - {file = "color_operations-0.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9261f15141f071e420f5351bbc3d94be159cef4645464b65cd0cdd748c73a84f"}, - {file = "color_operations-0.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f199cd9244e8c2de323b38dfe7b1635f85a45a1d7bd2a6ee28e3039be4ed287f"}, - {file = "color_operations-0.1.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:828a053cb6739394241d7cb3bc02a3093b950b5ae2617ca8a2ecec5412d7b4ef"}, - {file = "color_operations-0.1.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:2cc2878484fb95a19b87742606d7b5582fa673c283f1823df5fe95ea39a801b2"}, - {file = "color_operations-0.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d0f3db513ca2be9910404e5b092362dc48de393bce41f26003e7a5e5a0e96d8d"}, - {file = "color_operations-0.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:1588e5b5e42c85818fd380d81a2cc9550132118d4bc699a7bde03ee15607d751"}, - {file = "color_operations-0.1.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3050ba0c69f5572f2a57274c54a886d0f1ea583d6984b862b3798be949e7892d"}, - {file = "color_operations-0.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:05471e319a15d3beaacef80d1fdab867e7a9ace4a865abeb331a8c3b3532de4a"}, - {file = "color_operations-0.1.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c355b87fe27c8721832f9959433aa62005e60760a0e5530287dbc281aa4df200"}, - {file = "color_operations-0.1.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7673fc1e90b68e038827ba022fb2e5c096ad1b299158f5d7bf0c22087ebe9b7b"}, - {file = "color_operations-0.1.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:738482018c24b063659ee22067cac03855bec52d5e6e1953bc39a8ee7400540d"}, - {file = "color_operations-0.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:bfbad9089eee9676f206dc470edb4783b27168c28652764f9760f29b912954cc"}, - {file = "color_operations-0.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:bd81d0f6b9a147f500182cce9345bc28fead5eb873571a0e5bf1040582867ca5"}, - {file = "color_operations-0.1.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:80312a1dd9c248ea7d5eec0c0efb4f2e647c3e3e3d75b7c88302199ce777617a"}, - {file = "color_operations-0.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ad138417cdc7616b675986ce1e971be3f4185b17807313c9f0902aecaa1ca4bd"}, - {file = "color_operations-0.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9091622c0a9c4ea77dc064355d9cce7333bd1f85b2346d08bc74b6fbe65a3c39"}, - {file = "color_operations-0.1.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87b384a33526ab80bacd08e44ed4c09d4d324804d7b482b1b84eef4e56b4de5c"}, - {file = "color_operations-0.1.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:cb3a88337a2c665e377ea2309d22684c6291860a1d927bcdbe9d30252f0af67b"}, - {file = "color_operations-0.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e1945dd0c03fe065a3897e7b6501b1f46072b1bc0a658c7e83f0f73a8d1d9aab"}, - {file = "color_operations-0.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:f1e3e20aa5832de4edf50d6372cedb497c8221f8e6b7afaf3b2b17d1e4e3e3c9"}, -] - -[package.dependencies] -numpy = "*" - -[package.extras] -test = ["colormath (==2.0.2)", "pytest", "pytest-cov"] - [[package]] name = "colorama" version = "0.4.6" @@ -599,74 +400,6 @@ files = [ networkx = ">=2.0" numpy = "*" -[[package]] -name = "cramjam" -version = "2.6.2" -description = "Thin Python bindings to de/compression algorithms in Rust" -optional = false -python-versions = ">=3.7" -files = [ - {file = "cramjam-2.6.2-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:b6adc0f2f2d1c4fd0f93ecef036a3217b785737ada3bc7ad4233db6ca98eafff"}, - {file = "cramjam-2.6.2-cp310-cp310-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:a6df618d5f4b4d09ccde28beb5b1e6806ff88efbfa7d67e47226afa84363db51"}, - {file = "cramjam-2.6.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edc655ec9014d5ebe827d578e03c0ae2839b05fba6dcddf412059e3f7e4a3a68"}, - {file = "cramjam-2.6.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:656505c16ece09d54a98b0128d0ce3e75a09ed27aafa9fc36c6881b736f9740b"}, - {file = "cramjam-2.6.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9928cb6703209c9a5474863973d09ab3c5cfbc10c051acec9af917413f64026b"}, - {file = "cramjam-2.6.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:664a1ae58d735c92551cfbbc09d9398bb218c7e6833b2392fece71af1dcbeedd"}, - {file = "cramjam-2.6.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76db0782d610644be01a6aabad16d51a5989c58a07b27353b7c10ce1fe8cdfd3"}, - {file = "cramjam-2.6.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a4b9aa1eefb690a16bab8aaa522d6e4595b86a0e4924f062ec261771061434c5"}, - {file = "cramjam-2.6.2-cp310-none-win32.whl", hash = "sha256:a15db0c29278517465eee33bb5a4637930673ead242c98c81e613de0486ed00d"}, - {file = "cramjam-2.6.2-cp310-none-win_amd64.whl", hash = "sha256:a89a48527cf416a7b4fcd97e924fa8784b51ec4c38911c4454663648b4a4914f"}, - {file = "cramjam-2.6.2-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:70c725e90d7beede63e663a335eb8adf2038a5b1a5f5ae32fcfa25cda164b520"}, - {file = "cramjam-2.6.2-cp311-cp311-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:88f847313b6b3c4191eb15f74f3354f126297b7a51773cbfcc6a2917ecdcf40e"}, - {file = "cramjam-2.6.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ddc0776fc98cbd7967a3c243666363eb88e5d32c2e9640b8f59f4f5cd2934161"}, - {file = "cramjam-2.6.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4750df49a08396fbc06cf1fcf4055daa5e009f5e06e7fb5d70b23266f5bb28cc"}, - {file = "cramjam-2.6.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63ba3affd35c780cd69382b022fade08b1b14e82f45aa86576e10b5520f21ffe"}, - {file = "cramjam-2.6.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f09dbb5cde9d4130677875c68e53568829e00bda3eb85b880190e8c56ba7af73"}, - {file = "cramjam-2.6.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33d4bd5e280f055da306b0c78c72af4e166018bea3b38c50a44b6264188cfe10"}, - {file = "cramjam-2.6.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1797a1367e9f854be2173e07812a096aec67e0e4891ce3c527d1536fb3023344"}, - {file = "cramjam-2.6.2-cp311-none-win32.whl", hash = "sha256:b772394920b48af69db4b7b3b2e2684524f4b6d73a8e8e595811e2cc9d2fbee5"}, - {file = "cramjam-2.6.2-cp311-none-win_amd64.whl", hash = "sha256:6455273781378befa00d096d9a58bcaee2c34f59057149220dd8edd185627b59"}, - {file = "cramjam-2.6.2-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:514afdeca432f1b870f4c15c51b2538f841ea36500d8b2b63c437e949a48d737"}, - {file = "cramjam-2.6.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a6a1df30ac4da907499bf9f160ee25947609e94f4c2093c6b1cb63698c61d17"}, - {file = "cramjam-2.6.2-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:201924909735626eee4dcf841e720634ce753c3af30687c20958a0836221f6c2"}, - {file = "cramjam-2.6.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d1d3d73125fa692974ccf0c0af4a444564e52c45b933527d4c047630b8c4b78f"}, - {file = "cramjam-2.6.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:02c0d09552137f07c8ea3681c215ce94e8431b7eaa5f293b747a24e9038c5d5c"}, - {file = "cramjam-2.6.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bd152234396942ca12d755f4dd5ab2e39f478c5900c290e4f9582bcc2290988"}, - {file = "cramjam-2.6.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f45b2a6776637edd5017c5c1c6a167243a8d2250698e10971ce8da015ed43442"}, - {file = "cramjam-2.6.2-cp37-none-win32.whl", hash = "sha256:2313e5353176b8659f9e157cc8ef64d7f6d80d381ae493abba0b6b213a8cb2ea"}, - {file = "cramjam-2.6.2-cp37-none-win_amd64.whl", hash = "sha256:83041d02a9c3f09a41d5687f0a5dd2e5e591c6f5d7ccceba2d4788adf58dccb7"}, - {file = "cramjam-2.6.2-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:489a16df91d10825ed47f95b985f8e353c8c8b4e078f571fd84e38a8ca95284b"}, - {file = "cramjam-2.6.2-cp38-cp38-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:ea42c629046964bcfa9971d0a978fb647d769f726f69ad39a8c6c5dc435616ad"}, - {file = "cramjam-2.6.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a6b474523eb23d090812ace971ce28297664913b0d9b753f6648c766af7dc7e"}, - {file = "cramjam-2.6.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:54c945fe1ab67bcd9ca90626176ec4fb74354c698e83c992641a5c4834dda675"}, - {file = "cramjam-2.6.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d2256470f96c3ab0aba3b45f62708ea8eb98603f3417d9d1d3bd5cb4140fbf56"}, - {file = "cramjam-2.6.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe176dcb03ec241c48b6af4be800f3e99f6b5be52ea2b660511374be709be926"}, - {file = "cramjam-2.6.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60db085bff42099c4de9a2a0b10284ab4b1032d356193ada6275d3225dc16b0e"}, - {file = "cramjam-2.6.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6b98fb0ebc565298fab857bc09273c0fb57167a2703c51436f7f423ca62b8009"}, - {file = "cramjam-2.6.2-cp38-none-win32.whl", hash = "sha256:fa5ae1851a9fa93c3d6f1f2051d2a51438479476f2a07dd0f04e47d23ceea708"}, - {file = "cramjam-2.6.2-cp38-none-win_amd64.whl", hash = "sha256:6b78702dbc1a4b1f4da613c63c7be578d418a561025432e1e0400b0274800917"}, - {file = "cramjam-2.6.2-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:29769fbbb56cbada20ad66d0aa268a8f55dcef99053c5c16780394d5d656815a"}, - {file = "cramjam-2.6.2-cp39-cp39-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:35da290896b2953e3056441b6b42d3576588dddee28ae6a890b03047929ae34d"}, - {file = "cramjam-2.6.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:076ce14ec4bc99b7de72e2da328b350d8e22d50a9a6880e0538863ef65d6d507"}, - {file = "cramjam-2.6.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0cfa188b95fb311892504df236f45029c03d8ac68634a6b5bb66487ee2d43f0e"}, - {file = "cramjam-2.6.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9034e7591fd689f17b5de830026a75f9c1788f0c78416a0845ba4c91cf4e896c"}, - {file = "cramjam-2.6.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c3a55e68ed847b5fd8d151f9998d150d47d689cedbf89c11c0c05db656fd6336"}, - {file = "cramjam-2.6.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b621ce7d92a6620fb1737892b7b00a5911d92f80f0d5b454795ba1cd844e51"}, - {file = "cramjam-2.6.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a9add823575629c026625080b92ff44ba7bb6ade7f661252a07e6b300e1a689b"}, - {file = "cramjam-2.6.2-cp39-none-win32.whl", hash = "sha256:f0c83c643b1fe6a79a938e453c139c73a196182d47915897b2cbadf46531a3a5"}, - {file = "cramjam-2.6.2-cp39-none-win_amd64.whl", hash = "sha256:62c1ecc70be62e9dd5176949f2da6488f1e8981d33fd241a874f2f25f6fed3bf"}, - {file = "cramjam-2.6.2-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:4b322f268461e66530bfd851ae7e33eb37829284f6326d831b96eed1fbfee554"}, - {file = "cramjam-2.6.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa655603b0cf88e029f4d328d10bb6b78866a388c8bda4e5d17b5b644827d8cf"}, - {file = "cramjam-2.6.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:797beb5629e814766b6ebbc73625a6f741739ca302ec1bcb12b47e39e8a1e4d7"}, - {file = "cramjam-2.6.2-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:3a798f52a0cf6780f5b5aacc4a2ea06737f12b98762083d88c3e1ac6315260c7"}, - {file = "cramjam-2.6.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:755cdc43d79de9da949be797ee6f773a85cdec493f0339f48d944ebb7cc9342e"}, - {file = "cramjam-2.6.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8bc899d401055d7e54ce0182eda303c80f22c4a3271a01f44c72d51a07c4fed"}, - {file = "cramjam-2.6.2-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:02a07a3e17fab7f1b1cf81c8fd80416bd06ca0a508cd8e379e280dc591641e14"}, - {file = "cramjam-2.6.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:346f4b3c7845ea1c5f0bc4e1c6cfd39153c399e3c03262d4c9e6575edb16c15a"}, - {file = "cramjam-2.6.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a37eab7095c4bd4ae81d5c8c16c8fcf0e5c1c85c732716e2e7d6fdd873fc756"}, - {file = "cramjam-2.6.2.tar.gz", hash = "sha256:1ffdc8d1381b5fee57b33b537e38fa7fd29e8d8f3b544dbab1d71dbfaaec3bef"}, -] - [[package]] name = "criticalmaas-ta1-geopackage" version = "0.2.1" @@ -752,25 +485,6 @@ files = [ [package.extras] tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"] -[[package]] -name = "fastapi" -version = "0.100.1" -description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" -optional = false -python-versions = ">=3.7" -files = [ - {file = "fastapi-0.100.1-py3-none-any.whl", hash = "sha256:ec6dd52bfc4eff3063cfcd0713b43c87640fefb2687bbbe3d8a08d94049cdf32"}, - {file = "fastapi-0.100.1.tar.gz", hash = "sha256:522700d7a469e4a973d92321ab93312448fbe20fca9c8da97effc7e7bc56df23"}, -] - -[package.dependencies] -pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<3.0.0" -starlette = ">=0.27.0,<0.28.0" -typing-extensions = ">=4.5.0" - -[package.extras] -all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.5)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] - [[package]] name = "fiona" version = "1.9.5" @@ -848,25 +562,6 @@ files = [ {file = "geojson-2.5.0.tar.gz", hash = "sha256:6e4bb7ace4226a45d9c8c8b1348b3fc43540658359f93c3f7e03efa9f15f658a"}, ] -[[package]] -name = "geojson-pydantic" -version = "1.0.1" -description = "Pydantic data models for the GeoJSON spec." -optional = false -python-versions = ">=3.8" -files = [ - {file = "geojson_pydantic-1.0.1-py3-none-any.whl", hash = "sha256:da8c15f15a0a9fc3e0af0253f0c2bb8a948f95ece9a0356f43d4738fa2be5107"}, - {file = "geojson_pydantic-1.0.1.tar.gz", hash = "sha256:a996ffccd5a016d3acb4a0c6aac941d2c569e3c6163d5ce6a04b61ee131c8f94"}, -] - -[package.dependencies] -pydantic = ">=2.0,<3.0" - -[package.extras] -dev = ["pre-commit"] -docs = ["mkdocs", "mkdocs-material", "pygments"] -test = ["pytest", "pytest-cov", "shapely"] - [[package]] name = "geopandas" version = "0.14.2" @@ -956,61 +651,6 @@ files = [ docs = ["Sphinx", "furo"] test = ["objgraph", "psutil"] -[[package]] -name = "h11" -version = "0.14.0" -description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" -optional = false -python-versions = ">=3.7" -files = [ - {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, - {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, -] - -[[package]] -name = "httpcore" -version = "0.18.0" -description = "A minimal low-level HTTP client." -optional = false -python-versions = ">=3.8" -files = [ - {file = "httpcore-0.18.0-py3-none-any.whl", hash = "sha256:adc5398ee0a476567bf87467063ee63584a8bce86078bf748e48754f60202ced"}, - {file = "httpcore-0.18.0.tar.gz", hash = "sha256:13b5e5cd1dca1a6636a6aaea212b19f4f85cd88c366a2b82304181b769aab3c9"}, -] - -[package.dependencies] -anyio = ">=3.0,<5.0" -certifi = "*" -h11 = ">=0.13,<0.15" -sniffio = "==1.*" - -[package.extras] -http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] - -[[package]] -name = "httpx" -version = "0.25.0" -description = "The next generation HTTP client." -optional = false -python-versions = ">=3.8" -files = [ - {file = "httpx-0.25.0-py3-none-any.whl", hash = "sha256:181ea7f8ba3a82578be86ef4171554dd45fec26a02556a744db029a0a27b7100"}, - {file = "httpx-0.25.0.tar.gz", hash = "sha256:47ecda285389cb32bb2691cc6e069e3ab0205956f681c5b2ad2325719751d875"}, -] - -[package.dependencies] -certifi = "*" -httpcore = ">=0.18.0,<0.19.0" -idna = "*" -sniffio = "*" - -[package.extras] -brotli = ["brotli", "brotlicffi"] -cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] -http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] - [[package]] name = "idna" version = "3.4" @@ -1022,17 +662,6 @@ files = [ {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, ] -[[package]] -name = "iniconfig" -version = "2.0.0" -description = "brain-dead simple config-ini parsing" -optional = false -python-versions = ">=3.7" -files = [ - {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, - {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, -] - [[package]] name = "ipython" version = "8.16.1" @@ -1090,23 +719,6 @@ docs = ["Jinja2 (==2.11.3)", "MarkupSafe (==1.1.1)", "Pygments (==2.8.1)", "alab qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"] testing = ["Django", "attrs", "colorama", "docopt", "pytest (<7.0.0)"] -[[package]] -name = "jinja2" -version = "3.1.2" -description = "A very fast and expressive template engine." -optional = false -python-versions = ">=3.7" -files = [ - {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, - {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, -] - -[package.dependencies] -MarkupSafe = ">=2.0" - -[package.extras] -i18n = ["Babel (>=2.7)"] - [[package]] name = "macrostrat-app-frame" version = "1.2.4" @@ -1145,6 +757,7 @@ greenlet = "^3.0.3" ipython = "^8.5.0" "macrostrat.app-frame" = "^1.2.4" "macrostrat.database" = "^3.1.1" +macrostrat-dinosaur = "^3.0.1" numpy = "^1.23.4" psycopg2-binary = "^2.9.4" PyMySQL = "^1.0.2" @@ -1209,6 +822,29 @@ SQLAlchemy = ">=2.0.18,<3.0.0" SQLAlchemy-Utils = ">=0.41.1,<0.42.0" sqlparse = ">=0.4.4,<0.5.0" +[[package]] +name = "macrostrat-dinosaur" +version = "3.0.1" +description = "Diff-based database migrations" +optional = false +python-versions = ">=3.8,<4.0" +files = [ + {file = "macrostrat_dinosaur-3.0.1-py3-none-any.whl", hash = "sha256:1caba5f9196abef41b4ac35a8224946f442e47d96a34a088234605b4e692560d"}, + {file = "macrostrat_dinosaur-3.0.1.tar.gz", hash = "sha256:3339b2fad6b71d31d5a1b563a53dc96b97ebaf2bbf5fb1ab0dde28772f78facb"}, +] + +[package.dependencies] +docker = ">=6.0.1,<7.0.0" +GeoAlchemy2 = ">=0.14.0,<0.15.0" +"macrostrat.database" = ">=3.1.2,<4.0.0" +"macrostrat.utils" = ">=1.2.1,<2.0.0" +migra = ">=3.0.1621480950,<4.0.0" +psycopg2-binary = ">=2.9.1,<3.0.0" +schemainspect = ">=3.0.1616029793,<4.0.0" +SQLAlchemy = ">=2.0.0,<3.0.0" +SQLAlchemy-Utils = ">=0.41.1,<0.42.0" +sqlparse = ">=0.4.0,<0.5.0" + [[package]] name = "macrostrat-map-integration" version = "2.0.0" @@ -1256,33 +892,6 @@ rich = ">=13.3.5,<14.0.0" toml = ">=0.10.2,<0.11.0" typer = ">=0.9.0,<0.10.0" -[[package]] -name = "macrostrat-tileserver" -version = "2.1.0" -description = "Macrostrat tile server" -optional = false -python-versions = "^3.8" -files = [] -develop = true - -[package.dependencies] -fastapi = "^0.100.0" -"macrostrat.database" = "^3.0" -"macrostrat.utils" = "^1.2.0" -pytest = "^7.2.1" -python-dotenv = "^1.0.0" -timvt = {path = "deps/timvt", develop = true} -titiler = "^0.15.0" -typer = "^0.9" -uvicorn = "0.16.0" - -[package.extras] -raster-tiles = [] - -[package.source] -type = "directory" -url = "../../../Software/tileserver" - [[package]] name = "macrostrat-utils" version = "1.2.1" @@ -1322,75 +931,6 @@ profiling = ["gprof2dot"] rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] -[[package]] -name = "markupsafe" -version = "2.1.3" -description = "Safely add untrusted strings to HTML/XML markup." -optional = false -python-versions = ">=3.7" -files = [ - {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"}, - {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, -] - [[package]] name = "matplotlib-inline" version = "0.1.6" @@ -1416,6 +956,25 @@ files = [ {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, ] +[[package]] +name = "migra" +version = "3.0.1663481299" +description = "Like `diff` but for PostgreSQL schemas" +optional = false +python-versions = ">=3.7,<4" +files = [ + {file = "migra-3.0.1663481299-py3-none-any.whl", hash = "sha256:061643e9af63488e085d729f267ed4af4249789979732b703ddeb2c478ec9a93"}, + {file = "migra-3.0.1663481299.tar.gz", hash = "sha256:0cf0c125d553008d9ff5402663a51703ccc474bb65b5a4f4727906dbf58e217f"}, +] + +[package.dependencies] +schemainspect = ">=3.1.1663480743" +six = "*" +sqlbag = "*" + +[package.extras] +pg = ["psycopg2-binary"] + [[package]] name = "minio" version = "7.2.5" @@ -1434,28 +993,6 @@ pycryptodome = "*" typing-extensions = "*" urllib3 = "*" -[[package]] -name = "morecantile" -version = "5.0.0" -description = "Construct and use map tile grids (a.k.a TileMatrixSet / TMS)." -optional = false -python-versions = ">=3.8" -files = [ - {file = "morecantile-5.0.0-py3-none-any.whl", hash = "sha256:9f7bbc2e8642b63cdd11c66cc5d2e557a9fd28f0dbeaaa5e718ae5f8e0c0e554"}, - {file = "morecantile-5.0.0.tar.gz", hash = "sha256:0c86d2c9449fabbd406278e84cde4b0fa65aee5c757e155a74b4b7261236c77d"}, -] - -[package.dependencies] -attrs = "*" -pydantic = ">=2.0,<3.0" -pyproj = ">=3.1,<4.0" - -[package.extras] -dev = ["pre-commit"] -docs = ["mkdocs", "mkdocs-material", "pygments"] -rasterio = ["rasterio (>=1.2.1)"] -test = ["mercantile", "pytest", "pytest-cov", "rasterio (>=1.2.1)"] - [[package]] name = "networkx" version = "3.2" @@ -1474,47 +1011,6 @@ doc = ["nb2plots (>=0.7)", "nbconvert (<7.9)", "numpydoc (>=1.6)", "pillow (>=9. extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.11)", "sympy (>=1.10)"] test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] -[[package]] -name = "numexpr" -version = "2.8.7" -description = "Fast numerical expression evaluator for NumPy" -optional = false -python-versions = ">=3.9" -files = [ - {file = "numexpr-2.8.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d88531ffea3ea9287e8a1665c6a2d0206d3f4660d5244423e2a134a7f0ce5fba"}, - {file = "numexpr-2.8.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db1065ba663a854115cf1f493afd7206e2efcef6643129e8061e97a51ad66ebb"}, - {file = "numexpr-2.8.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4546416004ff2e7eb9cf52c2d7ab82732b1b505593193ee9f93fa770edc5230"}, - {file = "numexpr-2.8.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb2f473fdfd09d17db3038e34818d05b6bc561a36785aa927d6c0e06bccc9911"}, - {file = "numexpr-2.8.7-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5496fc9e3ae214637cbca1ab556b0e602bd3afe9ff4c943a29c482430972cda8"}, - {file = "numexpr-2.8.7-cp310-cp310-win32.whl", hash = "sha256:d43f1f0253a6f2db2f76214e6f7ae9611b422cba3f7d4c86415d7a78bbbd606f"}, - {file = "numexpr-2.8.7-cp310-cp310-win_amd64.whl", hash = "sha256:cf5f112bce5c5966c47cc33700bc14ce745c8351d437ed57a9574fff581f341a"}, - {file = "numexpr-2.8.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:32934d51b5bc8a6636436326da79ed380e2f151989968789cf65b1210572cb46"}, - {file = "numexpr-2.8.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f021ac93cb3dd5d8ba2882627b615b1f58cb089dcc85764c6fbe7a549ed21b0c"}, - {file = "numexpr-2.8.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dccf572763517db6562fb7b17db46aacbbf62a9ca0a66672872f4f71aee7b186"}, - {file = "numexpr-2.8.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11121b14ee3179bade92e823f25f1b94e18716d33845db5081973331188c3338"}, - {file = "numexpr-2.8.7-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:81451962d4145a46dba189df65df101d4d1caddb6efe6ebfe05982cd9f62b2cf"}, - {file = "numexpr-2.8.7-cp311-cp311-win32.whl", hash = "sha256:da55ba845b847cc33c4bf81cee4b1bddfb0831118cabff8db62888ab8697ec34"}, - {file = "numexpr-2.8.7-cp311-cp311-win_amd64.whl", hash = "sha256:fd93b88d5332069916fa00829ea1b972b7e73abcb1081eee5c905a514b8b59e3"}, - {file = "numexpr-2.8.7-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5340d2c86d83f52e1a3e7fd97c37d358ae99af9de316bdeeab2565b9b1e622ca"}, - {file = "numexpr-2.8.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f3bdf8cbc00c77a46230c765d242f92d35905c239b20c256c48dbac91e49f253"}, - {file = "numexpr-2.8.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d46c47e361fa60966a3339cb4f463ae6151ce7d78ed38075f06e8585d2c8929f"}, - {file = "numexpr-2.8.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a371cfc1670a18eea2d5c70abaa95a0e8824b70d28da884bad11931266e3a0ca"}, - {file = "numexpr-2.8.7-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:47a249cecd1382d482a5bf1fac0d11392fb2ed0f7d415ebc4cd901959deb1ec9"}, - {file = "numexpr-2.8.7-cp312-cp312-win32.whl", hash = "sha256:b8a5b2c21c26b62875bf819d375d798b96a32644e3c28bd4ce7789ed1fb489da"}, - {file = "numexpr-2.8.7-cp312-cp312-win_amd64.whl", hash = "sha256:f29f4d08d9b0ed6fa5d32082971294b2f9131b8577c2b7c36432ed670924313f"}, - {file = "numexpr-2.8.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4ecaa5be24cf8fa0f00108e9dfa1021b7510e9dd9d159b8d8bc7c7ddbb995b31"}, - {file = "numexpr-2.8.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3a84284e0a407ca52980fd20962e89aff671c84cd6e73458f2e29ea2aa206356"}, - {file = "numexpr-2.8.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e838289e3b7bbe100b99e35496e6cc4cc0541c2207078941ee5a1d46e6b925ae"}, - {file = "numexpr-2.8.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0983052f308ea75dd232eb7f4729eed839db8fe8d82289940342b32cc55b15d0"}, - {file = "numexpr-2.8.7-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8bf005acd7f1985c71b1b247aaac8950d6ea05a0fe0bbbbf3f96cd398b136daa"}, - {file = "numexpr-2.8.7-cp39-cp39-win32.whl", hash = "sha256:56ec95f8d1db0819e64987dcf1789acd500fa4ea396eeabe4af6efdcb8902d07"}, - {file = "numexpr-2.8.7-cp39-cp39-win_amd64.whl", hash = "sha256:c7bf60fc1a9c90a9cb21c4c235723e579bff70c8d5362228cb2cf34426104ba2"}, - {file = "numexpr-2.8.7.tar.gz", hash = "sha256:596eeb3bbfebc912f4b6eaaf842b61ba722cebdb8bc42dfefa657d3a74953849"}, -] - -[package.dependencies] -numpy = ">=1.13.3" - [[package]] name = "numpy" version = "1.26.3" @@ -1560,65 +1056,6 @@ files = [ {file = "numpy-1.26.3.tar.gz", hash = "sha256:697df43e2b6310ecc9d95f05d5ef20eacc09c7c4ecc9da3f235d39e71b7da1e4"}, ] -[[package]] -name = "orjson" -version = "3.9.9" -description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" -optional = false -python-versions = ">=3.8" -files = [ - {file = "orjson-3.9.9-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:f28090060a31f4d11221f9ba48b2273b0d04b702f4dcaa197c38c64ce639cc51"}, - {file = "orjson-3.9.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8038ba245d0c0a6337cfb6747ea0c51fe18b0cf1a4bc943d530fd66799fae33d"}, - {file = "orjson-3.9.9-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:543b36df56db195739c70d645ecd43e49b44d5ead5f8f645d2782af118249b37"}, - {file = "orjson-3.9.9-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8e7877256b5092f1e4e48fc0f1004728dc6901e7a4ffaa4acb0a9578610aa4ce"}, - {file = "orjson-3.9.9-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:12b83e0d8ba4ca88b894c3e00efc59fe6d53d9ffb5dbbb79d437a466fc1a513d"}, - {file = "orjson-3.9.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ef06431f021453a47a9abb7f7853f04f031d31fbdfe1cc83e3c6aadde502cce"}, - {file = "orjson-3.9.9-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0a1a4d9e64597e550428ba091e51a4bcddc7a335c8f9297effbfa67078972b5c"}, - {file = "orjson-3.9.9-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:879d2d1f6085c9c0831cec6716c63aaa89e41d8e036cabb19a315498c173fcc6"}, - {file = "orjson-3.9.9-cp310-none-win32.whl", hash = "sha256:d3f56e41bc79d30fdf077073072f2377d2ebf0b946b01f2009ab58b08907bc28"}, - {file = "orjson-3.9.9-cp310-none-win_amd64.whl", hash = "sha256:ab7bae2b8bf17620ed381e4101aeeb64b3ba2a45fc74c7617c633a923cb0f169"}, - {file = "orjson-3.9.9-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:31d676bc236f6e919d100fb85d0a99812cff1ebffaa58106eaaec9399693e227"}, - {file = "orjson-3.9.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:678ffb5c0a6b1518b149cc328c610615d70d9297e351e12c01d0beed5d65360f"}, - {file = "orjson-3.9.9-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a71b0cc21f2c324747bc77c35161e0438e3b5e72db6d3b515310457aba743f7f"}, - {file = "orjson-3.9.9-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae72621f216d1d990468291b1ec153e1b46e0ed188a86d54e0941f3dabd09ee8"}, - {file = "orjson-3.9.9-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:512e5a41af008e76451f5a344941d61f48dddcf7d7ddd3073deb555de64596a6"}, - {file = "orjson-3.9.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f89dc338a12f4357f5bf1b098d3dea6072fb0b643fd35fec556f4941b31ae27"}, - {file = "orjson-3.9.9-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:957a45fb201c61b78bcf655a16afbe8a36c2c27f18a998bd6b5d8a35e358d4ad"}, - {file = "orjson-3.9.9-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d1c01cf4b8e00c7e98a0a7cf606a30a26c32adf2560be2d7d5d6766d6f474b31"}, - {file = "orjson-3.9.9-cp311-none-win32.whl", hash = "sha256:397a185e5dd7f8ebe88a063fe13e34d61d394ebb8c70a443cee7661b9c89bda7"}, - {file = "orjson-3.9.9-cp311-none-win_amd64.whl", hash = "sha256:24301f2d99d670ded4fb5e2f87643bc7428a54ba49176e38deb2887e42fe82fb"}, - {file = "orjson-3.9.9-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:bd55ea5cce3addc03f8fb0705be0cfed63b048acc4f20914ce5e1375b15a293b"}, - {file = "orjson-3.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b28c1a65cd13fff5958ab8b350f0921121691464a7a1752936b06ed25c0c7b6e"}, - {file = "orjson-3.9.9-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b97a67c47840467ccf116136450c50b6ed4e16a8919c81a4b4faef71e0a2b3f4"}, - {file = "orjson-3.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:75b805549cbbcb963e9c9068f1a05abd0ea4c34edc81f8d8ef2edb7e139e5b0f"}, - {file = "orjson-3.9.9-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5424ecbafe57b2de30d3b5736c5d5835064d522185516a372eea069b92786ba6"}, - {file = "orjson-3.9.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d2cd6ef4726ef1b8c63e30d8287225a383dbd1de3424d287b37c1906d8d2855"}, - {file = "orjson-3.9.9-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c959550e0705dc9f59de8fca1a316da0d9b115991806b217c82931ac81d75f74"}, - {file = "orjson-3.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ece2d8ed4c34903e7f1b64fb1e448a00e919a4cdb104fc713ad34b055b665fca"}, - {file = "orjson-3.9.9-cp312-none-win_amd64.whl", hash = "sha256:f708ca623287186e5876256cb30599308bce9b2757f90d917b7186de54ce6547"}, - {file = "orjson-3.9.9-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:335406231f9247f985df045f0c0c8f6b6d5d6b3ff17b41a57c1e8ef1a31b4d04"}, - {file = "orjson-3.9.9-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d9b5440a5d215d9e1cfd4aee35fd4101a8b8ceb8329f549c16e3894ed9f18b5"}, - {file = "orjson-3.9.9-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e98ca450cb4fb176dd572ce28c6623de6923752c70556be4ef79764505320acb"}, - {file = "orjson-3.9.9-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a3bf6ca6bce22eb89dd0650ef49c77341440def966abcb7a2d01de8453df083a"}, - {file = "orjson-3.9.9-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eb50d869b3c97c7c5187eda3759e8eb15deb1271d694bc5d6ba7040db9e29036"}, - {file = "orjson-3.9.9-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fcf06c69ccc78e32d9f28aa382ab2ab08bf54b696dbe00ee566808fdf05da7d"}, - {file = "orjson-3.9.9-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9a4402e7df1b5c9a4c71c7892e1c8f43f642371d13c73242bda5964be6231f95"}, - {file = "orjson-3.9.9-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b20becf50d4aec7114dc902b58d85c6431b3a59b04caa977e6ce67b6fee0e159"}, - {file = "orjson-3.9.9-cp38-none-win32.whl", hash = "sha256:1f352117eccac268a59fedac884b0518347f5e2b55b9f650c2463dd1e732eb61"}, - {file = "orjson-3.9.9-cp38-none-win_amd64.whl", hash = "sha256:c4eb31a8e8a5e1d9af5aa9e247c2a52ad5cf7e968aaa9aaefdff98cfcc7f2e37"}, - {file = "orjson-3.9.9-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:4a308aeac326c2bafbca9abbae1e1fcf682b06e78a54dad0347b760525838d85"}, - {file = "orjson-3.9.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e159b97f5676dcdac0d0f75ec856ef5851707f61d262851eb41a30e8fadad7c9"}, - {file = "orjson-3.9.9-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f692e7aabad92fa0fff5b13a846fb586b02109475652207ec96733a085019d80"}, - {file = "orjson-3.9.9-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cffb77cf0cd3cbf20eb603f932e0dde51b45134bdd2d439c9f57924581bb395b"}, - {file = "orjson-3.9.9-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c63eca397127ebf46b59c9c1fb77b30dd7a8fc808ac385e7a58a7e64bae6e106"}, - {file = "orjson-3.9.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06f0c024a75e8ba5d9101facb4fb5a028cdabe3cdfe081534f2a9de0d5062af2"}, - {file = "orjson-3.9.9-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8cba20c9815c2a003b8ca4429b0ad4aa87cb6649af41365821249f0fd397148e"}, - {file = "orjson-3.9.9-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:906cac73b7818c20cf0f6a7dde5a6f009c52aecc318416c7af5ea37f15ca7e66"}, - {file = "orjson-3.9.9-cp39-none-win32.whl", hash = "sha256:50232572dd300c49f134838c8e7e0917f29a91f97dbd608d23f2895248464b7f"}, - {file = "orjson-3.9.9-cp39-none-win_amd64.whl", hash = "sha256:920814e02e3dd7af12f0262bbc18b9fe353f75a0d0c237f6a67d270da1a1bb44"}, - {file = "orjson-3.9.9.tar.gz", hash = "sha256:02e693843c2959befdd82d1ebae8b05ed12d1cb821605d5f9fe9f98ca5c9fd2b"}, -] - [[package]] name = "packaging" version = "23.2" @@ -1714,21 +1151,6 @@ files = [ {file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"}, ] -[[package]] -name = "pluggy" -version = "1.3.0" -description = "plugin and hook calling mechanisms for python" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, - {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, -] - -[package.extras] -dev = ["pre-commit", "tox"] -testing = ["pytest", "pytest-benchmark"] - [[package]] name = "prompt-toolkit" version = "3.0.39" @@ -2038,21 +1460,6 @@ files = [ [package.dependencies] typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" -[[package]] -name = "pydantic-settings" -version = "2.0.3" -description = "Settings management using Pydantic" -optional = false -python-versions = ">=3.7" -files = [ - {file = "pydantic_settings-2.0.3-py3-none-any.whl", hash = "sha256:ddd907b066622bd67603b75e2ff791875540dc485b7307c4fffc015719da8625"}, - {file = "pydantic_settings-2.0.3.tar.gz", hash = "sha256:962dc3672495aad6ae96a4390fac7e593591e144625e5112d359f8f67fb75945"}, -] - -[package.dependencies] -pydantic = ">=2.0.1" -python-dotenv = ">=0.21.0" - [[package]] name = "pygments" version = "2.16.1" @@ -2127,20 +1534,6 @@ dev = ["Cython"] geopandas = ["geopandas"] test = ["pytest", "pytest-cov"] -[[package]] -name = "pyparsing" -version = "3.1.1" -description = "pyparsing module - Classes and methods to define and execute parsing grammars" -optional = false -python-versions = ">=3.6.8" -files = [ - {file = "pyparsing-3.1.1-py3-none-any.whl", hash = "sha256:32c7c0b711493c72ff18a981d24f28aaf9c1fb7ed5e9667c9e84e3db623bdbfb"}, - {file = "pyparsing-3.1.1.tar.gz", hash = "sha256:ede28a1a32462f5a9705e07aea48001a08f7cf81a021585011deba701581a0db"}, -] - -[package.extras] -diagrams = ["jinja2", "railroad-diagrams"] - [[package]] name = "pyproj" version = "3.6.1" @@ -2180,49 +1573,6 @@ files = [ [package.dependencies] certifi = "*" -[[package]] -name = "pystac" -version = "1.8.4" -description = "Python library for working with the SpatioTemporal Asset Catalog (STAC) specification" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pystac-1.8.4-py3-none-any.whl", hash = "sha256:0d9917bf3abc71fca3edfb2dd5f66be44b22a63774cda520c81484a126fa780e"}, - {file = "pystac-1.8.4.tar.gz", hash = "sha256:ab9b93d16c4cca80e3c225cd5815dbaec610ee9320e2119857db17481b9bd2e5"}, -] - -[package.dependencies] -python-dateutil = ">=2.7.0" - -[package.extras] -bench = ["asv (>=0.6.0,<0.7.0)", "packaging (>=23.1,<24.0)", "virtualenv (>=20.22,<21.0)"] -docs = ["Sphinx (>=6.2,<7.0)", "ipython (>=8.12,<9.0)", "jinja2 (<4.0)", "jupyter (>=1.0,<2.0)", "nbsphinx (>=0.9.0,<0.10.0)", "pydata-sphinx-theme (>=0.13,<1.0)", "sphinx-autobuild (==2021.3.14)", "sphinx-design (>=0.5.0,<0.6.0)", "sphinxcontrib-fulltoc (>=1.2,<2.0)"] -jinja2 = ["jinja2 (<4.0)"] -orjson = ["orjson (>=3.5)"] -test = ["black (>=23.3,<24.0)", "codespell (>=2.2,<3.0)", "coverage (>=7.2,<8.0)", "doc8 (>=1.1,<2.0)", "html5lib (>=1.1,<2.0)", "jinja2 (<4.0)", "jsonschema (>=4.18,<5.0)", "mypy (>=1.2,<2.0)", "orjson (>=3.8,<4.0)", "pre-commit (>=3.2,<4.0)", "pytest (>=7.3,<8.0)", "pytest-cov (>=4.0,<5.0)", "pytest-mock (>=3.10,<4.0)", "pytest-recording (>=0.13.0,<0.14.0)", "ruff (==0.0.291)", "types-html5lib (>=1.1,<2.0)", "types-orjson (>=3.6,<4.0)", "types-python-dateutil (>=2.8,<3.0)", "types-urllib3 (>=1.26,<2.0)"] -urllib3 = ["urllib3 (>=1.26)"] -validation = ["jsonschema (>=4.18,<5.0)"] - -[[package]] -name = "pytest" -version = "7.4.2" -description = "pytest: simple powerful testing with Python" -optional = false -python-versions = ">=3.7" -files = [ - {file = "pytest-7.4.2-py3-none-any.whl", hash = "sha256:1d881c6124e08ff0a1bb75ba3ec0bfd8b5354a01c194ddd5a0a870a48d99b002"}, - {file = "pytest-7.4.2.tar.gz", hash = "sha256:a766259cfab564a2ad52cb1aae1b881a75c3eb7e34ca3779697c23ed47c47069"}, -] - -[package.dependencies] -colorama = {version = "*", markers = "sys_platform == \"win32\""} -iniconfig = "*" -packaging = "*" -pluggy = ">=0.12,<2.0" - -[package.extras] -testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] - [[package]] name = "python-dateutil" version = "2.8.2" @@ -2355,55 +1705,6 @@ files = [ {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, ] -[[package]] -name = "rasterio" -version = "1.3.9" -description = "Fast and direct raster I/O for use with Numpy and SciPy" -optional = false -python-versions = ">=3.8" -files = [ - {file = "rasterio-1.3.9-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:04247da9f4002587ac2bec967c3a72f63fc0e6654101c06850bae3d8131b700d"}, - {file = "rasterio-1.3.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c9edce37b70f4cd4be5d3f5d314877e3130aeebb612120405cd28f83fe200865"}, - {file = "rasterio-1.3.9-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:fd6a850a37840ba590ddcf7ff90ba007b1e231b04434d8b4ac5ce0f746ada91a"}, - {file = "rasterio-1.3.9-cp310-cp310-win_amd64.whl", hash = "sha256:0c83156a44f8fda11876ff9f2ff1b602d7e7434447f7d621353f2929cefb1bf1"}, - {file = "rasterio-1.3.9-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:0172dbd80bd9adc105ec2c9bd207dbd5519ea06b438a4d965c6290ae8ed6ff9f"}, - {file = "rasterio-1.3.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0ea5b42597d85868ee88c750cc33f2ae729e1b5e3fe28f99071f39e1417bf1c0"}, - {file = "rasterio-1.3.9-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:be9b343bd08245df22115775dc9513c912afb4134d832662fa165d70cb805c34"}, - {file = "rasterio-1.3.9-cp311-cp311-win_amd64.whl", hash = "sha256:06d53e2e0885f039f960beb7c861400b92ea3e0e5abc2c67483fb56b1e5cbc13"}, - {file = "rasterio-1.3.9-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:a34bb9eef67b7896e2dfb39e10ba6372f9894226fb790bd7a46f5748f205b7d8"}, - {file = "rasterio-1.3.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:67b144b9678f9ad4cf5f2c3f455cbc6a7166c0523179249cee8f2e2c57d76c5b"}, - {file = "rasterio-1.3.9-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:99b72fccb702a921f43e56a4507b4cafe2a9196b478b993b98e82ec6851916d7"}, - {file = "rasterio-1.3.9-cp312-cp312-win_amd64.whl", hash = "sha256:6777fad3c31eb3e5da0ccaa28a032ad07c20d003bcd14f8bc13e16ca2f62348c"}, - {file = "rasterio-1.3.9-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:55bb1a2701dd67c1952b261a2ffbabd947a435d4457f13c25092a32ab7a4b36e"}, - {file = "rasterio-1.3.9-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:726d8e8884359c34f672312171310052d5483af550ef00fb4f2562cc022a6f5a"}, - {file = "rasterio-1.3.9-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:f65879415df188fdc9388ccf2ee01e0659abae370d12518a17b60151e7d04efe"}, - {file = "rasterio-1.3.9-cp38-cp38-win_amd64.whl", hash = "sha256:89771b70ee722c4cc808e2a6139b367bef1a736ecd497b311b3515d78a5d16bc"}, - {file = "rasterio-1.3.9-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:14df8413c030b04e54d478d6ecec4e5958b46585c3cb970bf0dc19b4831146c8"}, - {file = "rasterio-1.3.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:911e54e0bb97c456a045f6d8e24b00aeb055a235d2aa7c2c1f9128f4c6c7a52d"}, - {file = "rasterio-1.3.9-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:01e428ee5ba8444f5cb4fff56225acb1ab9bc8b77209b6e4198e04565d8a8509"}, - {file = "rasterio-1.3.9-cp39-cp39-win_amd64.whl", hash = "sha256:26d9aea05b035927647bb32cc04fad0a68346a2f5186224dc1c2555c33515183"}, - {file = "rasterio-1.3.9.tar.gz", hash = "sha256:fc6d0d290492fa1a5068711cfebb21cc936968891b7ed9da0690c8a7388885c5"}, -] - -[package.dependencies] -affine = "*" -attrs = "*" -certifi = "*" -click = ">=4.0" -click-plugins = "*" -cligj = ">=0.5" -numpy = "*" -setuptools = "*" -snuggs = ">=1.4.1" - -[package.extras] -all = ["boto3 (>=1.2.4)", "ghp-import", "hypothesis", "ipython (>=2.0)", "matplotlib", "numpydoc", "packaging", "pytest (>=2.8.2)", "pytest-cov (>=2.2.0)", "shapely", "sphinx", "sphinx-rtd-theme"] -docs = ["ghp-import", "numpydoc", "sphinx", "sphinx-rtd-theme"] -ipython = ["ipython (>=2.0)"] -plot = ["matplotlib"] -s3 = ["boto3 (>=1.2.4)"] -test = ["boto3 (>=1.2.4)", "hypothesis", "packaging", "pytest (>=2.8.2)", "pytest-cov (>=2.2.0)", "shapely"] - [[package]] name = "requests" version = "2.31.0" @@ -2444,79 +1745,18 @@ pygments = ">=2.13.0,<3.0.0" jupyter = ["ipywidgets (>=7.5.1,<9)"] [[package]] -name = "rio-cogeo" -version = "5.0.0" -description = "Cloud Optimized GeoTIFF (COGEO) creation plugin for rasterio" -optional = false -python-versions = ">=3.8" -files = [ - {file = "rio_cogeo-5.0.0-py3-none-any.whl", hash = "sha256:3a3716c2a28e24497eed8bc25874e4106ce762833974b1a34571c93c75cf89ae"}, - {file = "rio_cogeo-5.0.0.tar.gz", hash = "sha256:b4114369a3f83322667c5b6c1358b3b483aea07418183bcea27743e292ac9141"}, -] - -[package.dependencies] -click = ">=7.0" -morecantile = ">=5.0,<6.0" -numpy = ">=1.15,<2.0" -pydantic = ">=2.0,<3.0" -rasterio = ">=1.3.3" - -[package.extras] -dev = ["pre-commit"] -docs = ["mkdocs", "mkdocs-material"] -test = ["cogdumper", "pytest", "pytest-cov"] - -[[package]] -name = "rio-stac" -version = "0.8.1" -description = "Create STAC Items from raster datasets." +name = "schemainspect" +version = "3.1.1663587362" +description = "Schema inspection for PostgreSQL (and possibly others)" optional = false -python-versions = ">=3.8" +python-versions = ">=3.7,<4" files = [ - {file = "rio_stac-0.8.1-py3-none-any.whl", hash = "sha256:e1e762836c762f7d64ec46a4383c3db47f9231d12240ef0cc773685e09128d2e"}, - {file = "rio_stac-0.8.1.tar.gz", hash = "sha256:5e95ff1d3fad616a28a8017cd0212c91c3ea31dc14028e9773a192141fb03d40"}, + {file = "schemainspect-3.1.1663587362-py3-none-any.whl", hash = "sha256:3071265712863c4d4e742940a4b44ac685135af3c93416872ec1bb6c822c4aca"}, + {file = "schemainspect-3.1.1663587362.tar.gz", hash = "sha256:a295ad56f7a19c09e5e1ef9f16dadbf6392e26196cb5f05b5afe613c99ce7468"}, ] [package.dependencies] -pystac = ">=1.0.0,<2.0.0" -rasterio = "*" - -[package.extras] -dev = ["pre-commit"] -doc = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "pdocs", "pygments"] -test = ["pystac[validation] (>=1.0.0,<2.0.0)", "pytest", "pytest-cov", "requests"] - -[[package]] -name = "rio-tiler" -version = "6.2.4" -description = "User friendly Rasterio plugin to read raster datasets." -optional = false -python-versions = ">=3.8" -files = [ - {file = "rio_tiler-6.2.4-py3-none-any.whl", hash = "sha256:70c2cdbbbeef8cb395ca2d261f194380c98e336cb4ff0c837d82e2c5a5bd4a6d"}, - {file = "rio_tiler-6.2.4.tar.gz", hash = "sha256:93d7d44af66bbdf0b788745292e90e43184a1dace775d824c99c1f98d7ec9e9c"}, -] - -[package.dependencies] -attrs = "*" -cachetools = "*" -color-operations = "*" -httpx = "*" -morecantile = ">=5.0,<6.0" -numexpr = "*" -numpy = "*" -pydantic = ">=2.0,<3.0" -pystac = ">=0.5.4" -rasterio = ">=1.3.0" - -[package.extras] -benchmark = ["pytest", "pytest-benchmark"] -dev = ["pre-commit"] -docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "nbconvert", "pygments"] -s3 = ["boto3"] -test = ["boto3", "pytest", "pytest-cov", "rioxarray", "xarray"] -tilebench = ["pytest", "tilebench"] -xarray = ["rioxarray", "xarray"] +sqlalchemy = "*" [[package]] name = "scipy" @@ -2633,113 +1873,6 @@ numpy = ">=1.14" docs = ["matplotlib", "numpydoc (==1.1.*)", "sphinx", "sphinx-book-theme", "sphinx-remove-toctrees"] test = ["pytest", "pytest-cov"] -[[package]] -name = "simplejson" -version = "3.19.2" -description = "Simple, fast, extensible JSON encoder/decoder for Python" -optional = false -python-versions = ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*" -files = [ - {file = "simplejson-3.19.2-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:3471e95110dcaf901db16063b2e40fb394f8a9e99b3fe9ee3acc6f6ef72183a2"}, - {file = "simplejson-3.19.2-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:3194cd0d2c959062b94094c0a9f8780ffd38417a5322450a0db0ca1a23e7fbd2"}, - {file = "simplejson-3.19.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:8a390e56a7963e3946ff2049ee1eb218380e87c8a0e7608f7f8790ba19390867"}, - {file = "simplejson-3.19.2-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:1537b3dd62d8aae644f3518c407aa8469e3fd0f179cdf86c5992792713ed717a"}, - {file = "simplejson-3.19.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:a8617625369d2d03766413bff9e64310feafc9fc4f0ad2b902136f1a5cd8c6b0"}, - {file = "simplejson-3.19.2-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:2c433a412e96afb9a3ce36fa96c8e61a757af53e9c9192c97392f72871e18e69"}, - {file = "simplejson-3.19.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:f1c70249b15e4ce1a7d5340c97670a95f305ca79f376887759b43bb33288c973"}, - {file = "simplejson-3.19.2-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:287e39ba24e141b046812c880f4619d0ca9e617235d74abc27267194fc0c7835"}, - {file = "simplejson-3.19.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:6f0a0b41dd05eefab547576bed0cf066595f3b20b083956b1405a6f17d1be6ad"}, - {file = "simplejson-3.19.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2f98d918f7f3aaf4b91f2b08c0c92b1774aea113334f7cde4fe40e777114dbe6"}, - {file = "simplejson-3.19.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7d74beca677623481810c7052926365d5f07393c72cbf62d6cce29991b676402"}, - {file = "simplejson-3.19.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7f2398361508c560d0bf1773af19e9fe644e218f2a814a02210ac2c97ad70db0"}, - {file = "simplejson-3.19.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ad331349b0b9ca6da86064a3599c425c7a21cd41616e175ddba0866da32df48"}, - {file = "simplejson-3.19.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:332c848f02d71a649272b3f1feccacb7e4f7e6de4a2e6dc70a32645326f3d428"}, - {file = "simplejson-3.19.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:25785d038281cd106c0d91a68b9930049b6464288cea59ba95b35ee37c2d23a5"}, - {file = "simplejson-3.19.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18955c1da6fc39d957adfa346f75226246b6569e096ac9e40f67d102278c3bcb"}, - {file = "simplejson-3.19.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:11cc3afd8160d44582543838b7e4f9aa5e97865322844b75d51bf4e0e413bb3e"}, - {file = "simplejson-3.19.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:b01fda3e95d07a6148702a641e5e293b6da7863f8bc9b967f62db9461330562c"}, - {file = "simplejson-3.19.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:778331444917108fa8441f59af45886270d33ce8a23bfc4f9b192c0b2ecef1b3"}, - {file = "simplejson-3.19.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9eb117db8d7ed733a7317c4215c35993b815bf6aeab67523f1f11e108c040672"}, - {file = "simplejson-3.19.2-cp310-cp310-win32.whl", hash = "sha256:39b6d79f5cbfa3eb63a869639cfacf7c41d753c64f7801efc72692c1b2637ac7"}, - {file = "simplejson-3.19.2-cp310-cp310-win_amd64.whl", hash = "sha256:5675e9d8eeef0aa06093c1ff898413ade042d73dc920a03e8cea2fb68f62445a"}, - {file = "simplejson-3.19.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ed628c1431100b0b65387419551e822987396bee3c088a15d68446d92f554e0c"}, - {file = "simplejson-3.19.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:adcb3332979cbc941b8fff07181f06d2b608625edc0a4d8bc3ffc0be414ad0c4"}, - {file = "simplejson-3.19.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:08889f2f597ae965284d7b52a5c3928653a9406d88c93e3161180f0abc2433ba"}, - {file = "simplejson-3.19.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef7938a78447174e2616be223f496ddccdbf7854f7bf2ce716dbccd958cc7d13"}, - {file = "simplejson-3.19.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a970a2e6d5281d56cacf3dc82081c95c1f4da5a559e52469287457811db6a79b"}, - {file = "simplejson-3.19.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:554313db34d63eac3b3f42986aa9efddd1a481169c12b7be1e7512edebff8eaf"}, - {file = "simplejson-3.19.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d36081c0b1c12ea0ed62c202046dca11438bee48dd5240b7c8de8da62c620e9"}, - {file = "simplejson-3.19.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a3cd18e03b0ee54ea4319cdcce48357719ea487b53f92a469ba8ca8e39df285e"}, - {file = "simplejson-3.19.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:66e5dc13bfb17cd6ee764fc96ccafd6e405daa846a42baab81f4c60e15650414"}, - {file = "simplejson-3.19.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:972a7833d4a1fcf7a711c939e315721a88b988553fc770a5b6a5a64bd6ebeba3"}, - {file = "simplejson-3.19.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3e74355cb47e0cd399ead3477e29e2f50e1540952c22fb3504dda0184fc9819f"}, - {file = "simplejson-3.19.2-cp311-cp311-win32.whl", hash = "sha256:1dd4f692304854352c3e396e9b5f0a9c9e666868dd0bdc784e2ac4c93092d87b"}, - {file = "simplejson-3.19.2-cp311-cp311-win_amd64.whl", hash = "sha256:9300aee2a8b5992d0f4293d88deb59c218989833e3396c824b69ba330d04a589"}, - {file = "simplejson-3.19.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b8d940fd28eb34a7084877747a60873956893e377f15a32ad445fe66c972c3b8"}, - {file = "simplejson-3.19.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:4969d974d9db826a2c07671273e6b27bc48e940738d768fa8f33b577f0978378"}, - {file = "simplejson-3.19.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c594642d6b13d225e10df5c16ee15b3398e21a35ecd6aee824f107a625690374"}, - {file = "simplejson-3.19.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2f5a398b5e77bb01b23d92872255e1bcb3c0c719a3be40b8df146570fe7781a"}, - {file = "simplejson-3.19.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:176a1b524a3bd3314ed47029a86d02d5a95cc0bee15bd3063a1e1ec62b947de6"}, - {file = "simplejson-3.19.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3c7363a8cb8c5238878ec96c5eb0fc5ca2cb11fc0c7d2379863d342c6ee367a"}, - {file = "simplejson-3.19.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:346820ae96aa90c7d52653539a57766f10f33dd4be609206c001432b59ddf89f"}, - {file = "simplejson-3.19.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:de9a2792612ec6def556d1dc621fd6b2073aff015d64fba9f3e53349ad292734"}, - {file = "simplejson-3.19.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:1c768e7584c45094dca4b334af361e43b0aaa4844c04945ac7d43379eeda9bc2"}, - {file = "simplejson-3.19.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:9652e59c022e62a5b58a6f9948b104e5bb96d3b06940c6482588176f40f4914b"}, - {file = "simplejson-3.19.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9c1a4393242e321e344213a90a1e3bf35d2f624aa8b8f6174d43e3c6b0e8f6eb"}, - {file = "simplejson-3.19.2-cp312-cp312-win32.whl", hash = "sha256:7cb98be113911cb0ad09e5523d0e2a926c09a465c9abb0784c9269efe4f95917"}, - {file = "simplejson-3.19.2-cp312-cp312-win_amd64.whl", hash = "sha256:6779105d2fcb7fcf794a6a2a233787f6bbd4731227333a072d8513b252ed374f"}, - {file = "simplejson-3.19.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:061e81ea2d62671fa9dea2c2bfbc1eec2617ae7651e366c7b4a2baf0a8c72cae"}, - {file = "simplejson-3.19.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4280e460e51f86ad76dc456acdbfa9513bdf329556ffc8c49e0200878ca57816"}, - {file = "simplejson-3.19.2-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:11c39fbc4280d7420684494373b7c5904fa72a2b48ef543a56c2d412999c9e5d"}, - {file = "simplejson-3.19.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bccb3e88ec26ffa90f72229f983d3a5d1155e41a1171190fa723d4135523585b"}, - {file = "simplejson-3.19.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bb5b50dc6dd671eb46a605a3e2eb98deb4a9af787a08fcdddabe5d824bb9664"}, - {file = "simplejson-3.19.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:d94245caa3c61f760c4ce4953cfa76e7739b6f2cbfc94cc46fff6c050c2390c5"}, - {file = "simplejson-3.19.2-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:d0e5ffc763678d48ecc8da836f2ae2dd1b6eb2d27a48671066f91694e575173c"}, - {file = "simplejson-3.19.2-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:d222a9ed082cd9f38b58923775152003765016342a12f08f8c123bf893461f28"}, - {file = "simplejson-3.19.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:8434dcdd347459f9fd9c526117c01fe7ca7b016b6008dddc3c13471098f4f0dc"}, - {file = "simplejson-3.19.2-cp36-cp36m-win32.whl", hash = "sha256:c9ac1c2678abf9270e7228133e5b77c6c3c930ad33a3c1dfbdd76ff2c33b7b50"}, - {file = "simplejson-3.19.2-cp36-cp36m-win_amd64.whl", hash = "sha256:92c4a4a2b1f4846cd4364855cbac83efc48ff5a7d7c06ba014c792dd96483f6f"}, - {file = "simplejson-3.19.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0d551dc931638e2102b8549836a1632e6e7cf620af3d093a7456aa642bff601d"}, - {file = "simplejson-3.19.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:73a8a4653f2e809049999d63530180d7b5a344b23a793502413ad1ecea9a0290"}, - {file = "simplejson-3.19.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:40847f617287a38623507d08cbcb75d51cf9d4f9551dd6321df40215128325a3"}, - {file = "simplejson-3.19.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:be893258d5b68dd3a8cba8deb35dc6411db844a9d35268a8d3793b9d9a256f80"}, - {file = "simplejson-3.19.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9eb3cff1b7d71aa50c89a0536f469cb8d6dcdd585d8f14fb8500d822f3bdee4"}, - {file = "simplejson-3.19.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d0f402e787e6e7ee7876c8b05e2fe6464820d9f35ba3f172e95b5f8b699f6c7f"}, - {file = "simplejson-3.19.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:fbbcc6b0639aa09b9649f36f1bcb347b19403fe44109948392fbb5ea69e48c3e"}, - {file = "simplejson-3.19.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:2fc697be37585eded0c8581c4788fcfac0e3f84ca635b73a5bf360e28c8ea1a2"}, - {file = "simplejson-3.19.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0b0a3eb6dd39cce23801a50c01a0976971498da49bc8a0590ce311492b82c44b"}, - {file = "simplejson-3.19.2-cp37-cp37m-win32.whl", hash = "sha256:49f9da0d6cd17b600a178439d7d2d57c5ef01f816b1e0e875e8e8b3b42db2693"}, - {file = "simplejson-3.19.2-cp37-cp37m-win_amd64.whl", hash = "sha256:c87c22bd6a987aca976e3d3e23806d17f65426191db36d40da4ae16a6a494cbc"}, - {file = "simplejson-3.19.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:9e4c166f743bb42c5fcc60760fb1c3623e8fda94f6619534217b083e08644b46"}, - {file = "simplejson-3.19.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0a48679310e1dd5c9f03481799311a65d343748fe86850b7fb41df4e2c00c087"}, - {file = "simplejson-3.19.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c0521e0f07cb56415fdb3aae0bbd8701eb31a9dfef47bb57206075a0584ab2a2"}, - {file = "simplejson-3.19.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d2d5119b1d7a1ed286b8af37357116072fc96700bce3bec5bb81b2e7057ab41"}, - {file = "simplejson-3.19.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2c1467d939932901a97ba4f979e8f2642415fcf02ea12f53a4e3206c9c03bc17"}, - {file = "simplejson-3.19.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49aaf4546f6023c44d7e7136be84a03a4237f0b2b5fb2b17c3e3770a758fc1a0"}, - {file = "simplejson-3.19.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60848ab779195b72382841fc3fa4f71698a98d9589b0a081a9399904487b5832"}, - {file = "simplejson-3.19.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0436a70d8eb42bea4fe1a1c32d371d9bb3b62c637969cb33970ad624d5a3336a"}, - {file = "simplejson-3.19.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:49e0e3faf3070abdf71a5c80a97c1afc059b4f45a5aa62de0c2ca0444b51669b"}, - {file = "simplejson-3.19.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ff836cd4041e16003549449cc0a5e372f6b6f871eb89007ab0ee18fb2800fded"}, - {file = "simplejson-3.19.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:3848427b65e31bea2c11f521b6fc7a3145d6e501a1038529da2391aff5970f2f"}, - {file = "simplejson-3.19.2-cp38-cp38-win32.whl", hash = "sha256:3f39bb1f6e620f3e158c8b2eaf1b3e3e54408baca96a02fe891794705e788637"}, - {file = "simplejson-3.19.2-cp38-cp38-win_amd64.whl", hash = "sha256:0405984f3ec1d3f8777c4adc33eac7ab7a3e629f3b1c05fdded63acc7cf01137"}, - {file = "simplejson-3.19.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:445a96543948c011a3a47c8e0f9d61e9785df2544ea5be5ab3bc2be4bd8a2565"}, - {file = "simplejson-3.19.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4a8c3cc4f9dfc33220246760358c8265dad6e1104f25f0077bbca692d616d358"}, - {file = "simplejson-3.19.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:af9c7e6669c4d0ad7362f79cb2ab6784d71147503e62b57e3d95c4a0f222c01c"}, - {file = "simplejson-3.19.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:064300a4ea17d1cd9ea1706aa0590dcb3be81112aac30233823ee494f02cb78a"}, - {file = "simplejson-3.19.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9453419ea2ab9b21d925d0fd7e3a132a178a191881fab4169b6f96e118cc25bb"}, - {file = "simplejson-3.19.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9e038c615b3906df4c3be8db16b3e24821d26c55177638ea47b3f8f73615111c"}, - {file = "simplejson-3.19.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16ca9c90da4b1f50f089e14485db8c20cbfff2d55424062791a7392b5a9b3ff9"}, - {file = "simplejson-3.19.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1018bd0d70ce85f165185d2227c71e3b1e446186f9fa9f971b69eee223e1e3cd"}, - {file = "simplejson-3.19.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:e8dd53a8706b15bc0e34f00e6150fbefb35d2fd9235d095b4f83b3c5ed4fa11d"}, - {file = "simplejson-3.19.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:2d022b14d7758bfb98405672953fe5c202ea8a9ccf9f6713c5bd0718eba286fd"}, - {file = "simplejson-3.19.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:febffa5b1eda6622d44b245b0685aff6fb555ce0ed734e2d7b1c3acd018a2cff"}, - {file = "simplejson-3.19.2-cp39-cp39-win32.whl", hash = "sha256:4edcd0bf70087b244ba77038db23cd98a1ace2f91b4a3ecef22036314d77ac23"}, - {file = "simplejson-3.19.2-cp39-cp39-win_amd64.whl", hash = "sha256:aad7405c033d32c751d98d3a65801e2797ae77fac284a539f6c3a3e13005edc4"}, - {file = "simplejson-3.19.2-py3-none-any.whl", hash = "sha256:bcedf4cae0d47839fee7de344f96b5694ca53c786f28b5f773d4f0b265a159eb"}, - {file = "simplejson-3.19.2.tar.gz", hash = "sha256:9eb442a2442ce417801c912df68e1f6ccfcd41577ae7274953ab3ad24ef7d82c"}, -] - [[package]] name = "six" version = "1.16.0" @@ -2751,35 +1884,6 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] -[[package]] -name = "sniffio" -version = "1.3.0" -description = "Sniff out which async library your code is running under" -optional = false -python-versions = ">=3.7" -files = [ - {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"}, - {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, -] - -[[package]] -name = "snuggs" -version = "1.4.7" -description = "Snuggs are s-expressions for Numpy" -optional = false -python-versions = "*" -files = [ - {file = "snuggs-1.4.7-py3-none-any.whl", hash = "sha256:988dde5d4db88e9d71c99457404773dabcc7a1c45971bfbe81900999942d9f07"}, - {file = "snuggs-1.4.7.tar.gz", hash = "sha256:501cf113fe3892e14e2fee76da5cd0606b7e149c411c271898e6259ebde2617b"}, -] - -[package.dependencies] -numpy = "*" -pyparsing = ">=2.1.6" - -[package.extras] -test = ["hypothesis", "pytest"] - [[package]] name = "spectra" version = "0.0.11" @@ -2908,6 +2012,27 @@ test-all = ["Babel (>=1.3)", "Jinja2 (>=2.3)", "Pygments (>=1.2)", "arrow (>=0.3 timezone = ["python-dateutil"] url = ["furl (>=0.4.1)"] +[[package]] +name = "sqlbag" +version = "0.1.1617247075" +description = "various snippets of SQL-related boilerplate" +optional = false +python-versions = "*" +files = [ + {file = "sqlbag-0.1.1617247075-py2.py3-none-any.whl", hash = "sha256:ecdef26d661f8640711030ac6ee618deb92b91f9f0fc2efbf8a3b133af13092d"}, + {file = "sqlbag-0.1.1617247075.tar.gz", hash = "sha256:b9d7862c3b2030356d796ca872907962fd54704066978d7ae89383f5123366ed"}, +] + +[package.dependencies] +packaging = "*" +six = "*" +sqlalchemy = "*" + +[package.extras] +maria = ["pymysql"] +pendulum = ["pendulum", "relativedelta"] +pg = ["psycopg2"] + [[package]] name = "sqlparse" version = "0.4.4" @@ -2943,62 +2068,6 @@ pure-eval = "*" [package.extras] tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] -[[package]] -name = "starlette" -version = "0.27.0" -description = "The little ASGI library that shines." -optional = false -python-versions = ">=3.7" -files = [ - {file = "starlette-0.27.0-py3-none-any.whl", hash = "sha256:918416370e846586541235ccd38a474c08b80443ed31c578a418e2209b3eef91"}, - {file = "starlette-0.27.0.tar.gz", hash = "sha256:6a6b0d042acb8d469a01eba54e9cda6cbd24ac602c4cd016723117d6a7e73b75"}, -] - -[package.dependencies] -anyio = ">=3.4.0,<5" - -[package.extras] -full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"] - -[[package]] -name = "starlette-cramjam" -version = "0.3.2" -description = "Cramjam integration for Starlette ASGI framework." -optional = false -python-versions = ">=3.7" -files = [ - {file = "starlette-cramjam-0.3.2.tar.gz", hash = "sha256:848dd036657038830b4943518acb62e73c32a12031d0dd752bef9e68cd6af00e"}, - {file = "starlette_cramjam-0.3.2-py3-none-any.whl", hash = "sha256:51f618e188aeab23d4cee2d0458abd831231172081082540bafb334c19f76bae"}, -] - -[package.dependencies] -cramjam = ">=2.4,<2.7" -starlette = "*" - -[package.extras] -dev = ["pre-commit"] -test = ["brotlipy", "httpx", "pytest", "pytest-cov"] - -[[package]] -name = "supermorecado" -version = "0.1.2" -description = "Extend the functionality of morecantile with additional commands." -optional = false -python-versions = ">=3.8" -files = [ - {file = "supermorecado-0.1.2-py3-none-any.whl", hash = "sha256:5116b9ff8c8aa0b0da235cb5962449dc878515c8155adf1440268c3cf271bed6"}, - {file = "supermorecado-0.1.2.tar.gz", hash = "sha256:b51664d2eb12326e657a9d80c6849857c42ef3876545515ef988e266e6cc9654"}, -] - -[package.dependencies] -morecantile = "*" -rasterio = "*" - -[package.extras] -dev = ["pre-commit"] -docs = ["mkdocs", "mkdocs-material", "pygments"] -test = ["pytest", "pytest-cov"] - [[package]] name = "tiletanic" version = "1.1.0" @@ -3015,143 +2084,6 @@ click = "*" geojson = "*" shapely = ">=1.6" -[[package]] -name = "timvt" -version = "0.8.0a4" -description = "A lightweight PostGIS based dynamic vector tile server." -optional = false -python-versions = ">=3.8" -files = [] -develop = true - -[package.dependencies] -asyncpg = ">=0.23.0" -buildpg = ">=0.3" -fastapi = ">=0.87" -jinja2 = ">=2.11.2,<4.0.0" -morecantile = ">=5.0,<6.0" -orjson = "*" -pydantic-settings = ">=2.0.3" -starlette-cramjam = ">=0.3,<0.4" - -[package.extras] -dev = ["pre-commit"] -docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "nbconvert", "pdocs", "pygments"] -server = ["uvicorn[standard] (>=0.12.0,<0.19.0)"] -test = ["httpx", "mapbox-vector-tile", "numpy", "protobuf (>=3.0,<4.0)", "psycopg2", "pytest", "pytest-asyncio", "pytest-benchmark", "pytest-cov", "pytest-pgsql", "sqlalchemy (>=1.1,<1.4)"] - -[package.source] -type = "directory" -url = "../../../Software/tileserver/deps/timvt" - -[[package]] -name = "titiler" -version = "0.15.1" -description = "A modern dynamic tile server built on top of FastAPI and Rasterio/GDAL." -optional = false -python-versions = ">=3.8" -files = [ - {file = "titiler-0.15.1-py3-none-any.whl", hash = "sha256:08180835c6a69e7f0e76d3fe13c76b0bb217142afd53220c4f558cb1872e15c6"}, - {file = "titiler-0.15.1.tar.gz", hash = "sha256:31361ac3ccc1a80395a7a8664406b1c2b1491d18341290234e4001431ac9b6b1"}, -] - -[package.dependencies] -titiler-application = "0.15.1" -titiler-core = "0.15.1" -titiler-extensions = "0.15.1" -titiler-mosaic = "0.15.1" - -[package.extras] -dev = ["pre-commit"] -docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "nbconvert", "pdocs", "pygments"] - -[[package]] -name = "titiler-application" -version = "0.15.1" -description = "A modern dynamic tile server built on top of FastAPI and Rasterio/GDAL." -optional = false -python-versions = ">=3.8" -files = [ - {file = "titiler.application-0.15.1-py3-none-any.whl", hash = "sha256:f3a7c61c025cf3e9799dfddbf940cd06d20423f62112208f95c03633f2c718b5"}, - {file = "titiler.application-0.15.1.tar.gz", hash = "sha256:0d152fc174ac1c25ed06acc51fe0e78f18fe9f3bb7dfd757a87f1c7fb798d019"}, -] - -[package.dependencies] -pydantic-settings = ">=2.0,<3.0" -starlette-cramjam = ">=0.3,<0.4" -"titiler.core" = "0.15.1" -"titiler.extensions" = {version = "0.15.1", extras = ["cogeo", "stac"]} -"titiler.mosaic" = "0.15.1" - -[package.extras] -server = ["uvicorn[standard] (>=0.12.0,<0.19.0)"] -test = ["boto3", "brotlipy", "httpx", "pytest", "pytest-asyncio", "pytest-cov"] - -[[package]] -name = "titiler-core" -version = "0.15.1" -description = "A modern dynamic tile server built on top of FastAPI and Rasterio/GDAL." -optional = false -python-versions = ">=3.8" -files = [ - {file = "titiler.core-0.15.1-py3-none-any.whl", hash = "sha256:c870c3656175df4b202633996ba0d6a8d439a828cce1c7e669a53029597796ba"}, - {file = "titiler.core-0.15.1.tar.gz", hash = "sha256:243097ba4163b7b67033f6882e1485ee25d54f131ea712d04187365b1864c27a"}, -] - -[package.dependencies] -fastapi = ">=0.100.0" -geojson-pydantic = ">=1.0,<2.0" -jinja2 = ">=2.11.2,<4.0.0" -morecantile = ">=5.0,<6.0" -numpy = "*" -pydantic = ">=2.0,<3.0" -rasterio = "*" -rio-tiler = ">=6.2.1,<7.0" -simplejson = "*" -typing_extensions = ">=4.6.1" - -[package.extras] -test = ["httpx", "pytest", "pytest-asyncio", "pytest-cov"] - -[[package]] -name = "titiler-extensions" -version = "0.15.1" -description = "Extensions for TiTiler Factories." -optional = false -python-versions = ">=3.8" -files = [ - {file = "titiler.extensions-0.15.1-py3-none-any.whl", hash = "sha256:a0a0c8d03486006c4dee9bae97de91a616b82241a875bf6bd95301b4c628697f"}, - {file = "titiler.extensions-0.15.1.tar.gz", hash = "sha256:b45cccd08e2bdbf4f86bbc19f56e86863f9c3734f4a3bfdb1e534cdf32f2c213"}, -] - -[package.dependencies] -rio-cogeo = {version = ">=5.0,<6.0", optional = true, markers = "extra == \"cogeo\""} -rio-stac = {version = ">=0.8,<0.9", optional = true, markers = "extra == \"stac\""} -"titiler.core" = "0.15.1" - -[package.extras] -cogeo = ["rio-cogeo (>=5.0,<6.0)"] -stac = ["rio-stac (>=0.8,<0.9)"] -test = ["httpx", "pystac[validation] (>=1.0.0,<2.0.0)", "pytest", "pytest-asyncio", "pytest-cov"] - -[[package]] -name = "titiler-mosaic" -version = "0.15.1" -description = "cogeo-mosaic (MosaicJSON) plugin for TiTiler." -optional = false -python-versions = ">=3.8" -files = [ - {file = "titiler.mosaic-0.15.1-py3-none-any.whl", hash = "sha256:9fbde6b16d34a982e720432e24646341b67a49a283493c28ce1bbeada7340ec3"}, - {file = "titiler.mosaic-0.15.1.tar.gz", hash = "sha256:b7e94affa5aa93420fcd9cb6de1425a6f672f9e39ce8ea0c3102615a9bab54dd"}, -] - -[package.dependencies] -cogeo-mosaic = ">=7.0,<8.0" -"titiler.core" = "0.15.1" - -[package.extras] -test = ["httpx", "pytest", "pytest-asyncio", "pytest-cov"] - [[package]] name = "toml" version = "0.10.2" @@ -3258,25 +2190,6 @@ secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17. socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] -[[package]] -name = "uvicorn" -version = "0.16.0" -description = "The lightning-fast ASGI server." -optional = false -python-versions = "*" -files = [ - {file = "uvicorn-0.16.0-py3-none-any.whl", hash = "sha256:d8c839231f270adaa6d338d525e2652a0b4a5f4c2430b5c4ef6ae4d11776b0d2"}, - {file = "uvicorn-0.16.0.tar.gz", hash = "sha256:eacb66afa65e0648fcbce5e746b135d09722231ffffc61883d4fac2b62fbea8d"}, -] - -[package.dependencies] -asgiref = ">=3.4.0" -click = ">=7.0" -h11 = ">=0.8" - -[package.extras] -standard = ["PyYAML (>=5.1)", "colorama (>=0.4)", "httptools (>=0.2.0,<0.4.0)", "python-dotenv (>=0.13)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchgod (>=0.6)", "websockets (>=10.0)", "websockets (>=9.1)"] - [[package]] name = "wcwidth" version = "0.2.8" @@ -3307,4 +2220,4 @@ test = ["websockets"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "a6e11549c505d2ddac375ffc7e32688f99ac0ee84c87d892cb0611eb04c6b1a7" +content-hash = "b4a533a6c421df7eced82dd3516daec87735604344fded9d94a87c3960012f32" diff --git a/py-root/pyproject.toml b/py-root/pyproject.toml index 3bd950f9..cb081ede 100644 --- a/py-root/pyproject.toml +++ b/py-root/pyproject.toml @@ -12,6 +12,7 @@ python = "^3.11" "macrostrat.map_integration" = { path = "../map-integration", develop = true } "criticalmaas.ta1_geopackage" = "^0.2.0" macrostrat-utils = "^1.2.1" +macrostrat-package-tools = "^1.0.0" [tool.poetry.dev-dependencies] "macrostrat.package_tools" = "^1.0.0" From 6245e567ac0eb85811238b79f65b26174e5a9ef7 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Fri, 26 Jul 2024 16:42:19 -0500 Subject: [PATCH 18/48] Set up namespaced packages correctly in intellij --- .idea/macrostrat.iml | 14 ++++++-------- .idea/sqldialects.xml | 6 ------ 2 files changed, 6 insertions(+), 14 deletions(-) delete mode 100644 .idea/sqldialects.xml diff --git a/.idea/macrostrat.iml b/.idea/macrostrat.iml index dbe26036..34fdb16d 100644 --- a/.idea/macrostrat.iml +++ b/.idea/macrostrat.iml @@ -7,16 +7,14 @@ - + + + + + + - - - \ No newline at end of file diff --git a/.idea/sqldialects.xml b/.idea/sqldialects.xml deleted file mode 100644 index 13561b76..00000000 --- a/.idea/sqldialects.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file From 8161b71f83870addc7bda8826f56ab99a8b30647 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Fri, 26 Jul 2024 17:07:31 -0500 Subject: [PATCH 19/48] Fix a bit more sql --- .../mariadb/postgresql_migration/__init__.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index dde84913..22d36709 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -125,19 +125,22 @@ def get_data_counts_pg(database_name, username, password, schema): AND table_type = 'BASE TABLE' AND table_schema = :table_schema""", dict(table_schema=schema, table_catalog=database_name), ) + table_result = conn.execute(table_query) pg_tables = [row[0] for row in table_result] for table in pg_tables: - row_query = text(f"SELECT COUNT(*) FROM {database_name}.{schema}.{table};") - row_result = conn.execute(row_query) + row_result = run_query( + conn, + "SELECT COUNT(*) FROM {table}", + dict(table=Identifier(schema, table)), + ) row_count = row_result.scalar() pg_rows[table.lower()] = row_count - column_query = text( - f"SELECT COUNT(*) FROM information_schema.columns WHERE table_catalog = '{database_name}' " - f"AND table_schema = '{schema}' AND table_name = '{table}';" + column_result = run_query( + "SELECT COUNT(*) FROM information_schema.columns WHERE table_catalog = :table_catalog AND table_schema = :schema AND table_name = :table", + dict(table_catalog=database_name, schema=schema, table=table), ) - column_result = conn.execute(column_query) column_count = column_result.scalar() pg_columns[table.lower()] = column_count engine.dispose() From 985bab4f6dc44558349944b10000ebcf8843803f Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Fri, 26 Jul 2024 17:11:48 -0500 Subject: [PATCH 20/48] added find_col_variance() method --- MariaDB Migration to PostgreSQL/utils.py | 84 ++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 7 deletions(-) diff --git a/MariaDB Migration to PostgreSQL/utils.py b/MariaDB Migration to PostgreSQL/utils.py index fd4b0643..d5f7f0ba 100644 --- a/MariaDB Migration to PostgreSQL/utils.py +++ b/MariaDB Migration to PostgreSQL/utils.py @@ -165,14 +165,12 @@ def compare_data_counts(db1_rows, db2_rows, db1_columns, db2_columns, db1, db2): return row_count_difference, col_count_difference -""" -Script to output dataframes for comparing data between two databases and tables. -""" + + def find_row_variances(database_name_one, schema_one, schema_two, username, password, tables): SQLALCHEMY_DATABASE_URI = f"postgresql://{username}:{password}@{pg_server}/{database_name_one}" engine = create_engine(SQLALCHEMY_DATABASE_URI) insp = inspect(engine) - with engine.connect() as conn: for table in tables: # Get the actual first column name for each table @@ -190,8 +188,67 @@ def find_row_variances(database_name_one, schema_one, schema_two, username, pass engine.dispose() return +#strat_tree column names renamed from this_name and that_name to parent and child in Macrostrat. Determine how to merge. +#possibly rename after the merge and update the API. + +def find_col_variances(database_name_one, schema_one, schema_two, username, password, tables): + SQLALCHEMY_DATABASE_URI = f"postgresql://{username}:{password}@{pg_server}/{database_name_one}" + engine = create_engine(SQLALCHEMY_DATABASE_URI) + insp = inspect(engine) + table_one = [] + table_two = [] + col_not_in_macrostrat_two = [] + with engine.connect() as conn: + for table in tables: + dict = [] + dict_two = [] + table_one = [] + table_two = [] + columns_one = insp.get_columns(table, schema=schema_one) + columns_two = insp.get_columns(table, schema=schema_two) + for index in range(0, len(columns_one)-1): + dict = columns_one[index] + table_one.append(dict['name']) + for index_two in range(0, len(columns_two)-1): + dict_two = columns_two[index_two] + table_two.append(dict_two['name']) + for col in table_one: + if col not in table_two: + col_not_in_macrostrat_two.append(col) + if len(col_not_in_macrostrat_two) > 0: + print(f"Columns that exist in Macrostrat and NOT in macrostrat_two for {table}:", col_not_in_macrostrat_two) + else: + print("Success! All columns in Macrostrat exist in Macrostrat_two") + engine.dispose() + return +def find_row_variances_primary_key(database_name_one, schema_one, schema_two, username, password, tables): + SQLALCHEMY_DATABASE_URI = f"postgresql://{username}:{password}@{pg_server}/{database_name_one}" + engine = create_engine(SQLALCHEMY_DATABASE_URI) + insp = inspect(engine) + with engine.connect() as conn: + for table in tables: + # Get the primary key constraint for each table + pk_constraint = insp.get_pk_constraint(table, schema=schema_one) + try: + pk_column_name = pk_constraint['constrained_columns'][0] + except Exception as e: + pk_column_name = None + if pk_column_name: + query = f""" + SELECT m.{pk_column_name} + FROM macrostrat.macrostrat.{table} m + RIGHT JOIN macrostrat.macrostrat_temp.{table} t ON m.{pk_column_name} = t.{pk_column_name} + WHERE t.{pk_column_name} IS NULL; + """ + result_df = pd.read_sql_query(query, engine) + print(f"Macrostrat rows not in Macrostrat_two rows for table {table}:") + print(result_df) + else: + print(f"Table macrostrat.{table} does not have a primary key.") + engine.dispose() + return def pg_loader_pre_script(): @@ -419,7 +476,7 @@ def reset(): #reset() pg_restore(pg_server, pg_user, pg_pass_new, pg_db_name, maria_db_name_two) - tables = [ + table_rows = [ "sections", "strat_names", "strat_names_places", @@ -448,9 +505,22 @@ def reset(): "refs" ] + table_cols = ['sections', + 'strat_names', + 'strat_tree', + 'units', + 'unit_strat_names', + 'unit_environs', + 'cols', + 'environs', + 'lith_atts', + 'lookup_strat_names', + 'lookup_unit_intervals', + 'measuremeta', + 'measures'] + #results = find_row_variances(pg_db_name, pg_db_name, maria_db_name_two, pg_user, pg_pass_new, table_rows) - results = find_row_variances(pg_db_name, pg_db_name, maria_db_name_two, pg_user, pg_pass_new, tables) - + results_two = find_col_variances(pg_db_name, pg_db_name, maria_db_name_two, pg_user, pg_pass_new, table_cols) From acf35b9488d0523642f9e36dd080ff33b695d983 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Fri, 26 Jul 2024 18:49:04 -0500 Subject: [PATCH 21/48] Move change assessment methods to separate file --- .../mariadb/postgresql_migration/__init__.py | 191 +----------------- .../postgresql_migration/db_changes.py | 189 +++++++++++++++++ 2 files changed, 191 insertions(+), 189 deletions(-) create mode 100644 cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index 22d36709..54a30d60 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -1,13 +1,11 @@ from sqlalchemy import text, create_engine from Constants import * import os -from sqlalchemy.exc import SQLAlchemyError -import pandas as pd -from macrostrat.database.utils import run_query, run_sql -from psycopg2.sql import Identifier +from macrostrat.database.utils import run_sql from pathlib import Path import time +from .db_changes import get_data_counts_maria, get_data_counts_pg, compare_data_counts """ Copies table structure and table data from one schema to another schema on the same host. @@ -74,196 +72,11 @@ def maria_restore(server, user, password, dbname): return -def get_data_counts_maria(): - SQLALCHEMY_DATABASE_URI = ( - f"mysql+pymysql://{maria_super_user}:" - f"{maria_super_pass}@{maria_server}/{maria_db_name_two}" - ) - engine = create_engine(SQLALCHEMY_DATABASE_URI) - maria_rows = {} - maria_columns = {} - - with engine.connect() as conn: - row_result = run_query( - conn, - "SELECT table_name FROM information_schema.tables WHERE table_schema = :table_schema AND table_type = 'BASE TABLE'", - {"table_schema": maria_db_name_two}, - ) - - maria_tables = [row[0] for row in row_result] - for table in maria_tables: - row_result = run_query( - conn, "SELECT COUNT(*) FROM {table}", dict(table=Identifier(table)) - ) - row_count = row_result.scalar() - maria_rows[table.lower()] = row_count - column_result = run_query( - conn, - "SELECT COUNT(*) FROM information_schema.columns WHERE table_schema = :table_schema AND table_name = :table_name", - dict(table_schema=maria_db_name_two, table_name=table), - ) - - column_count = column_result.scalar() - maria_columns[table.lower()] = column_count - - engine.dispose() - return maria_rows, maria_columns - - -def get_data_counts_pg(database_name, username, password, schema): - SQLALCHEMY_DATABASE_URI = ( - f"postgresql://{username}:{password}@{pg_server}/{database_name}" - ) - engine = create_engine(SQLALCHEMY_DATABASE_URI) - pg_rows = {} - pg_columns = {} - - with engine.connect() as conn: - table_query = run_query( - conn, - """SELECT table_name FROM information_schema.tables WHERE table_catalog = :table_catalog - AND table_type = 'BASE TABLE' AND table_schema = :table_schema""", - dict(table_schema=schema, table_catalog=database_name), - ) - - table_result = conn.execute(table_query) - pg_tables = [row[0] for row in table_result] - for table in pg_tables: - row_result = run_query( - conn, - "SELECT COUNT(*) FROM {table}", - dict(table=Identifier(schema, table)), - ) - row_count = row_result.scalar() - pg_rows[table.lower()] = row_count - - column_result = run_query( - "SELECT COUNT(*) FROM information_schema.columns WHERE table_catalog = :table_catalog AND table_schema = :schema AND table_name = :table", - dict(table_catalog=database_name, schema=schema, table=table), - ) - column_count = column_result.scalar() - pg_columns[table.lower()] = column_count - engine.dispose() - return pg_rows, pg_columns - - -def compare_data_counts(db1_rows, db2_rows, db1_columns, db2_columns, db1, db2): - """ - Compares the data counts between tables, rows, and columns that vary between any two db's - """ - - db1_rows_not_in_db2 = { - table_name: (db1_rows[table_name], 0) - for table_name in db1_rows - if table_name not in db2_rows - } - db2_rows_not_in_db1 = { - table_name: (0, db2_rows[table_name]) - for table_name in db2_rows - if table_name not in db1_rows - } - db1_cols_not_in_db2 = { - table_name: (db1_columns[table_name], 0) - for table_name in db1_columns - if table_name not in db2_columns - } - db2_cols_not_in_db1 = { - table_name: (0, db2_columns[table_name]) - for table_name in db2_columns - if table_name not in db1_columns - } - - if len(db1_rows_not_in_db2) == 0 and len(db2_rows_not_in_db1) == 0: - print( - f"\nSuccess! All tables exist in both {db1} and {db2}. Checking row counts....\n" - ) - else: - if len(db1_rows_not_in_db2) > 0: - print( - f"\nERROR: {db1} tables that are not in {db2}:\n", - [key for key in db1_rows_not_in_db2], - ) - if len(db2_rows_not_in_db1) > 0: - print( - f"\nERROR: {db2} tables that are not in {db1}: \n", - [key for key in db2_rows_not_in_db1], - ) - - row_count_difference = { - key: (db1_rows[key], db2_rows[key]) - for key in db1_rows - if key in db2_rows and db1_rows[key] != db2_rows[key] - } - # row_count_difference.update(db1_rows_not_in_db2) - # row_count_difference.update(db2_rows_not_in_db1) - - col_count_difference = { - key: (db1_columns[key], db2_columns[key]) - for key in db1_columns - if key in db2_columns and db1_columns[key] != db2_columns[key] - } - # col_count_difference.update(db1_cols_not_in_db2) - # col_count_difference.update(db2_cols_not_in_db1) - - if len(row_count_difference) == 0: - print( - f"Success! All row counts in all tables are the same in both {db1} and {db2}!\n" - ) - else: - print( - f"\nERROR: Row count differences for {len(row_count_difference)} tables in both {db1} and {db2} databases:\n" - f"Table Name: ({db1} Rows, {db2} Rows)\n" - f"{row_count_difference}" - ) - if len(col_count_difference) == 0: - print( - f"Success! All column counts in all tables are the same in both {db1} and {db2}!\n" - ) - else: - print( - f"\nERROR: Column count differences for {len(col_count_difference)} tables in both {db1} and {db2} databases:\n" - f"Table Name: ({db1} Columns, {db2} Columns)\n" - f"{col_count_difference}" - ) - - return row_count_difference, col_count_difference - - """ Script to output dataframes for comparing data between two databases and tables. """ -def find_row_variances( - database_name_one, - schema_one, - database_name_two, - schema_two, - username, - password, - table, -): - SQLALCHEMY_DATABASE_URI = ( - f"postgresql://{username}:{password}@{pg_server}/{database_name_one}" - ) - engine = create_engine(SQLALCHEMY_DATABASE_URI) - with engine.connect() as conn: - query = text(f"SELECT * FROM {schema_one}.{table}") - result = conn.execute(query) - df = pd.DataFrame(result) - engine.dispose() - SQLALCHEMY_DATABASE_URI = ( - f"postgresql://{username}:{password}@{pg_server}/{database_name_two}" - ) - engine = create_engine(SQLALCHEMY_DATABASE_URI) - with engine.connect() as conn: - query = text(f"SELECT * FROM {schema_two}.{table}") - result = conn.execute(query) - df_two = pd.DataFrame(result) - engine.dispose() - return df, df_two - - def pg_loader_pre_script(): pre_script = __here__ / "pgloader-pre-script.sql" diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py new file mode 100644 index 00000000..7625c1fc --- /dev/null +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py @@ -0,0 +1,189 @@ +import pandas as pd +from macrostrat.database import run_query +from psycopg2.sql import Identifier +from sqlalchemy import create_engine, text + + +def get_data_counts_maria(): + SQLALCHEMY_DATABASE_URI = ( + f"mysql+pymysql://{maria_super_user}:" + f"{maria_super_pass}@{maria_server}/{maria_db_name_two}" + ) + engine = create_engine(SQLALCHEMY_DATABASE_URI) + maria_rows = {} + maria_columns = {} + + with engine.connect() as conn: + row_result = run_query( + conn, + "SELECT table_name FROM information_schema.tables WHERE table_schema = :table_schema AND table_type = 'BASE TABLE'", + {"table_schema": maria_db_name_two}, + ) + + maria_tables = [row[0] for row in row_result] + for table in maria_tables: + row_result = run_query( + conn, "SELECT COUNT(*) FROM {table}", dict(table=Identifier(table)) + ) + row_count = row_result.scalar() + maria_rows[table.lower()] = row_count + column_result = run_query( + conn, + "SELECT COUNT(*) FROM information_schema.columns WHERE table_schema = :table_schema AND table_name = :table_name", + dict(table_schema=maria_db_name_two, table_name=table), + ) + + column_count = column_result.scalar() + maria_columns[table.lower()] = column_count + + engine.dispose() + return maria_rows, maria_columns + + +def get_data_counts_pg(database_name, username, password, schema): + SQLALCHEMY_DATABASE_URI = ( + f"postgresql://{username}:{password}@{pg_server}/{database_name}" + ) + engine = create_engine(SQLALCHEMY_DATABASE_URI) + pg_rows = {} + pg_columns = {} + + with engine.connect() as conn: + table_query = run_query( + conn, + """SELECT table_name FROM information_schema.tables WHERE table_catalog = :table_catalog + AND table_type = 'BASE TABLE' AND table_schema = :table_schema""", + dict(table_schema=schema, table_catalog=database_name), + ) + + table_result = conn.execute(table_query) + pg_tables = [row[0] for row in table_result] + for table in pg_tables: + row_result = run_query( + conn, + "SELECT COUNT(*) FROM {table}", + dict(table=Identifier(schema, table)), + ) + row_count = row_result.scalar() + pg_rows[table.lower()] = row_count + + column_result = run_query( + "SELECT COUNT(*) FROM information_schema.columns WHERE table_catalog = :table_catalog AND table_schema = :schema AND table_name = :table", + dict(table_catalog=database_name, schema=schema, table=table), + ) + column_count = column_result.scalar() + pg_columns[table.lower()] = column_count + engine.dispose() + return pg_rows, pg_columns + + +def compare_data_counts(db1_rows, db2_rows, db1_columns, db2_columns, db1, db2): + """ + Compares the data counts between tables, rows, and columns that vary between any two db's + """ + + db1_rows_not_in_db2 = { + table_name: (db1_rows[table_name], 0) + for table_name in db1_rows + if table_name not in db2_rows + } + db2_rows_not_in_db1 = { + table_name: (0, db2_rows[table_name]) + for table_name in db2_rows + if table_name not in db1_rows + } + db1_cols_not_in_db2 = { + table_name: (db1_columns[table_name], 0) + for table_name in db1_columns + if table_name not in db2_columns + } + db2_cols_not_in_db1 = { + table_name: (0, db2_columns[table_name]) + for table_name in db2_columns + if table_name not in db1_columns + } + + if len(db1_rows_not_in_db2) == 0 and len(db2_rows_not_in_db1) == 0: + print( + f"\nSuccess! All tables exist in both {db1} and {db2}. Checking row counts....\n" + ) + else: + if len(db1_rows_not_in_db2) > 0: + print( + f"\nERROR: {db1} tables that are not in {db2}:\n", + [key for key in db1_rows_not_in_db2], + ) + if len(db2_rows_not_in_db1) > 0: + print( + f"\nERROR: {db2} tables that are not in {db1}: \n", + [key for key in db2_rows_not_in_db1], + ) + + row_count_difference = { + key: (db1_rows[key], db2_rows[key]) + for key in db1_rows + if key in db2_rows and db1_rows[key] != db2_rows[key] + } + # row_count_difference.update(db1_rows_not_in_db2) + # row_count_difference.update(db2_rows_not_in_db1) + + col_count_difference = { + key: (db1_columns[key], db2_columns[key]) + for key in db1_columns + if key in db2_columns and db1_columns[key] != db2_columns[key] + } + # col_count_difference.update(db1_cols_not_in_db2) + # col_count_difference.update(db2_cols_not_in_db1) + + if len(row_count_difference) == 0: + print( + f"Success! All row counts in all tables are the same in both {db1} and {db2}!\n" + ) + else: + print( + f"\nERROR: Row count differences for {len(row_count_difference)} tables in both {db1} and {db2} databases:\n" + f"Table Name: ({db1} Rows, {db2} Rows)\n" + f"{row_count_difference}" + ) + if len(col_count_difference) == 0: + print( + f"Success! All column counts in all tables are the same in both {db1} and {db2}!\n" + ) + else: + print( + f"\nERROR: Column count differences for {len(col_count_difference)} tables in both {db1} and {db2} databases:\n" + f"Table Name: ({db1} Columns, {db2} Columns)\n" + f"{col_count_difference}" + ) + + return row_count_difference, col_count_difference + + +def find_row_variances( + database_name_one, + schema_one, + database_name_two, + schema_two, + username, + password, + table, +): + SQLALCHEMY_DATABASE_URI = ( + f"postgresql://{username}:{password}@{pg_server}/{database_name_one}" + ) + engine = create_engine(SQLALCHEMY_DATABASE_URI) + with engine.connect() as conn: + query = text(f"SELECT * FROM {schema_one}.{table}") + result = conn.execute(query) + df = pd.DataFrame(result) + engine.dispose() + SQLALCHEMY_DATABASE_URI = ( + f"postgresql://{username}:{password}@{pg_server}/{database_name_two}" + ) + engine = create_engine(SQLALCHEMY_DATABASE_URI) + with engine.connect() as conn: + query = text(f"SELECT * FROM {schema_two}.{table}") + result = conn.execute(query) + df_two = pd.DataFrame(result) + engine.dispose() + return df, df_two From aa4c84ded727089af29a749ddfabf6663328c256 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Sat, 27 Jul 2024 04:57:31 -0500 Subject: [PATCH 22/48] Updated MariaDB dump/restore functions --- .../cli/database/mariadb/__init__.py | 48 ++++++++-- .../mariadb/postgresql_migration/__init__.py | 96 +++---------------- .../postgresql_migration/db_changes.py | 18 +++- .../cli/database/mariadb/restore.py | 80 +++++++++++++++- cli/macrostrat/cli/database/mariadb/utils.py | 18 +++- 5 files changed, 162 insertions(+), 98 deletions(-) diff --git a/cli/macrostrat/cli/database/mariadb/__init__.py b/cli/macrostrat/cli/database/mariadb/__init__.py index 350c52be..1a1f0995 100644 --- a/cli/macrostrat/cli/database/mariadb/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/__init__.py @@ -6,7 +6,7 @@ from pathlib import Path from .utils import build_connection_args -from .restore import restore_mariadb +from .restore import restore_mariadb, dump_mariadb app = Typer(no_args_is_help=True) @@ -43,23 +43,59 @@ def cli_command(ctx: Context): ) +@app.command( + "dump", + context_settings={"allow_extra_args": True, "ignore_unknown_options": True}, +) +def dump_command( + ctx: Context, + output: Path = Argument(None, help="Path to the dump file"), + database: str = Argument(None, help="Database to dump"), +): + """Dump a MariaDB database to a file.""" + engine = mysql_engine(database) + + if output is None: + output = Path("/dev/stdout") + + dump_mariadb(engine, output, *ctx.args, container=mariadb_container) + + @app.command("restore") def restore_command( input: str = Argument(None, help="Path to the dump file or stream"), + database: str = Argument(None, help="Database to restore to"), *, create: bool = False, overwrite: bool = False, ): """Restore a MariaDB database from a dump file or stream.""" - from macrostrat.core.config import mysql_database - - _database: URL = make_url(mysql_database) - _database = _database.set(drivername="mysql+pymysql") + engine = mysql_engine(database) restore_mariadb( input, - create_engine(_database), + engine, create=create, overwrite=overwrite, container=mariadb_container, ) + + +def mysql_engine(database: str = None): + from macrostrat.core.config import mysql_database + + _database: URL = make_url(mysql_database) + _database = _database.set(drivername="mysql+pymysql") + if database is not None: + _database = _database.set(database=database) + return create_engine(mysql_database) + + +@app.command("migrate-to-postgres") +def migrate_to_postgres_command(overwrite: bool = False): + """Import legacy MariaDB database to PostgreSQL using pgloader""" + from .postgresql_migration import migrate_mariadb_to_postgresql + + engine = mysql_engine() + + migrate_mariadb_to_postgresql(engine, overwrite=overwrite) diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index 54a30d60..0b24602b 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -1,89 +1,31 @@ from sqlalchemy import text, create_engine -from Constants import * import os from macrostrat.database.utils import run_sql from pathlib import Path +from sqlalchemy.engine import Engine +from ..restore import copy_mariadb_database import time from .db_changes import get_data_counts_maria, get_data_counts_pg, compare_data_counts -""" -Copies table structure and table data from one schema to another schema on the same host. -Command line in cmd.exe language -""" - __here__ = Path(__file__).parent -def pg_dump(server, user, password, dbname): - # TODO: integrate with existing PostgreSQL database utilities - os.system( - f"pg_dump -h {pg_server} -d {pg_db_name} -U {pg_user} -W -F d -f ./postgres_dump" - ) - os.system(f"{pg_pass}") - print("Starting database export........") - return - - -def pg_restore(server, user, password, dbname): - # TODO: integrate with existing PostgreSQL database utilities - os.system( - f"pg_dump -h {pg_server} -d {pg_db_name_two} -U {pg_user} -W -F d ./postgres_dump" - ) - os.system(f"{pg_pass}") - return - - -def maria_dump(server, user, password, dbname): - # TODO: integrate with streaming approach - SQLALCHEMY_DATABASE_URI = f"mysql+pymysql://{user}:{password}@{server}/{dbname}" - engine = create_engine(SQLALCHEMY_DATABASE_URI) - with engine.connect() as conn: - conn.execute(text(f"CREATE DATABASE IF NOT EXISTS {maria_db_name_two};")) - engine.dispose() - output_file = "./maria_dump.sql" - maria_dump_command = [ - "mysqldump", - "-h", - server, - "-d", - dbname, - "-u", - user, - f"-p{password}", - "--ssl-verify-server-cert=false", - "--no-data=false", - "--verbose", - "--result-file=./maria_dump.sql", - ] - os.system(" ".join(maria_dump_command)) - return - +def migrate_mariadb_to_postgresql(engine: Engine, overwrite: bool = False): + """Migrate the entire Macrostrat database from MariaDB to PostgreSQL.""" + temp_db_name = engine.url.database + "_temp" -def maria_restore(server, user, password, dbname): + copy_mariadb_database(engine, temp_db_name, overwrite=overwrite) - maria_restore_input = ( - f"mariadb -h {server} -u {user} -p{password} --ssl-verify-server-cert=false " - f"{dbname} < ./maria_dump.sql" - ) - - print("Restoring new Maria database....") - os.system(maria_restore_input) - return - - -""" -Script to output dataframes for comparing data between two databases and tables. -""" + # pg_loader_pre_script() + # pg_loader() + # pg_loader_post_script() -def pg_loader_pre_script(): +def pgloader_pre_script(engine: Engine): + assert engine.dialect.startswith("mysql") pre_script = __here__ / "pgloader-pre-script.sql" - - URL = f"mysql+pymysql://{maria_super_user}:{maria_super_pass}@{maria_server}/{maria_db_name_two}" - engine = create_engine(URL) run_sql(engine, pre_script) - engine.dispose() """ @@ -98,24 +40,14 @@ def pg_loader_pre_script(): pg_engine.dispose()""" -def pg_loader_post_script(): - # Query alters the MariaDB pbdb_matches table by adding a new column for the text data, - # setting the datatype of the new column data to WKT format, - # dropping the old geometry column, - # adding default values for data formats that pgloader accepts - # vaccuum...refresh postgresql database after pgloader - # CREATE EXTENSION IF NOT EXISTS postgis; - SQLALCHEMY_DATABASE_URI = f"postgresql://{pg_user_migrate}:{pg_pass_migrate}@{pg_server}/{pg_db_name_two}?sslmode=prefer" - engine = create_engine( - SQLALCHEMY_DATABASE_URI - ) # connect_args={'options': '-csearch_path=public,macrostrat_temp' - +def pgloader_post_script(engine: Engine): + assert engine.dialect.startswith("postgresql") print("Starting PostScript execution....") post_script = __here__ / "pgloader-post-script.sql" run_sql(engine, post_script) -def pg_loader(): +def pgloader(): """ Command terminal to run pgloader. Ensure Docker app is running. """ diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py index 7625c1fc..f462a797 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py @@ -1,3 +1,7 @@ +""" +Script to output dataframes for comparing data between two databases and tables. +""" + import pandas as pd from macrostrat.database import run_query from psycopg2.sql import Identifier @@ -51,8 +55,11 @@ def get_data_counts_pg(database_name, username, password, schema): with engine.connect() as conn: table_query = run_query( conn, - """SELECT table_name FROM information_schema.tables WHERE table_catalog = :table_catalog - AND table_type = 'BASE TABLE' AND table_schema = :table_schema""", + """ + SELECT table_name FROM information_schema.tables + WHERE table_catalog = :table_catalog + AND table_type = 'BASE TABLE' AND table_schema = :table_schema + """, dict(table_schema=schema, table_catalog=database_name), ) @@ -68,7 +75,12 @@ def get_data_counts_pg(database_name, username, password, schema): pg_rows[table.lower()] = row_count column_result = run_query( - "SELECT COUNT(*) FROM information_schema.columns WHERE table_catalog = :table_catalog AND table_schema = :schema AND table_name = :table", + """ + SELECT COUNT(*) FROM information_schema.columns + WHERE table_catalog = :table_catalog + AND table_schema = :schema + AND table_name = :table + """, dict(table_catalog=database_name, schema=schema, table=table), ) column_count = column_result.scalar() diff --git a/cli/macrostrat/cli/database/mariadb/restore.py b/cli/macrostrat/cli/database/mariadb/restore.py index ca910b07..2497f2fc 100644 --- a/cli/macrostrat/cli/database/mariadb/restore.py +++ b/cli/macrostrat/cli/database/mariadb/restore.py @@ -5,11 +5,11 @@ from macrostrat.utils import get_logger from rich.console import Console -from sqlalchemy.engine import Engine +from sqlalchemy.engine import Engine, URL, create_engine from macrostrat.core.exc import MacrostratError import aiofiles -from .utils import build_connection_args +from .utils import build_connection_args, ParameterStyle from macrostrat.core.config import docker_internal_url from ..._dev.utils import ( @@ -31,7 +31,7 @@ def restore_mariadb(_input: Optional[str], engine: Engine, *args, **kwargs): """Restore a MariaDB database from a dump file or stream""" if _input.startswith("http"): - raise NotImplementedError("HTTP(S) restore not yet implemented") + raise NotImplementedError("http(s) restore not yet implemented") if _input is not None: _input = Path(_input) @@ -50,6 +50,14 @@ def restore_mariadb(_input: Optional[str], engine: Engine, *args, **kwargs): asyncio.run(task) +def _log_command(url: URL, cmd: list[str]): + logged_cmd = " ".join(cmd) + if url.password: + logged_cmd = logged_cmd.replace(url.password, "***") + log.debug(logged_cmd) + return logged_cmd + + async def _restore_mariadb(engine: Engine, *args, **kwargs): """Load MariaDB dump (GZipped SQL file) into a database, using centrally managed credentials, a Docker containerized `mariadb` client, and a streaming approach.""" @@ -73,7 +81,7 @@ async def _restore_mariadb(engine: Engine, *args, **kwargs): container=container, ) - log.debug(" ".join(_cmd)) + _log_command(engine.url, _cmd) return await asyncio.create_subprocess_exec( *_cmd, @@ -94,3 +102,67 @@ async def _restore_mariadb_from_file(dumpfile: Path, engine: Engine, *args, **kw ), asyncio.create_task(print_stdout(proc.stderr)), ) + + +async def _dump_mariadb(engine: Engine, *args, **kwargs): + """Dump a MariaDB database to a stream""" + container = kwargs.pop("container", "mariadb:10.10") + stdout = kwargs.pop("stdout", asyncio.subprocess.PIPE) + + conn = build_connection_args( + docker_internal_url(engine.url), ParameterStyle.MySQLDump + ) + + _cmd = _create_command( + "mysqldump", + *conn, + *args, + container=container, + ) + + _log_command(engine.url, _cmd) + + return await asyncio.create_subprocess_exec( + *_cmd, + stdout=stdout, + stderr=asyncio.subprocess.PIPE, + ) + + +def dump_mariadb(engine: Engine, dumpfile: Path, *args, **kwargs): + task = _dump_mariadb_to_file(engine, dumpfile, *args, **kwargs) + asyncio.run(task) + + +async def _dump_mariadb_to_file(engine: Engine, dumpfile: Path, *args, **kwargs): + proc = await _dump_mariadb(engine, *args, **kwargs) + # Open dump file as an async stream + async with aiofiles.open(dumpfile, mode="wb") as dest: + await asyncio.gather( + asyncio.create_task(print_stream_progress(proc.stdout, dest)), + asyncio.create_task(print_stdout(proc.stderr)), + ) + + +def copy_mariadb_database(engine: Engine, new_database: str, *args, **kwargs): + task = _copy_mariadb(engine, new_database, *args, **kwargs) + asyncio.run(task) + + +async def _copy_mariadb(engine: Engine, new_database: str, *args, **kwargs): + """Copy a MariaDB database to a new database in the same cluster""" + new_url = engine.url.set(database=new_database) + new_engine = create_engine(new_url) + overwrite = kwargs.pop("overwrite", False) + create = True + + dump = await _dump_mariadb(engine, *args, **kwargs) + restore = await _restore_mariadb( + new_engine, *args, **kwargs, create=create, overwrite=overwrite + ) + + # Connect the streams + await asyncio.gather( + asyncio.create_task(print_stream_progress(dump.stdout, restore.stdin)), + asyncio.create_task(print_stdout(restore.stderr)), + ) diff --git a/cli/macrostrat/cli/database/mariadb/utils.py b/cli/macrostrat/cli/database/mariadb/utils.py index b1c952d7..aedef339 100644 --- a/cli/macrostrat/cli/database/mariadb/utils.py +++ b/cli/macrostrat/cli/database/mariadb/utils.py @@ -1,7 +1,15 @@ from sqlalchemy.engine.url import URL +from enum import Enum -def build_connection_args(url: URL) -> [str]: +class ParameterStyle(Enum): + MariaDB = "mariadb" + MySQLDump = "mysqldump" + + +def build_connection_args( + url: URL, style: ParameterStyle = ParameterStyle.MariaDB +) -> [str]: """Build MariaDB connection arguments from a SQLAlchemy URL.""" args = [ "-h", @@ -10,9 +18,13 @@ def build_connection_args(url: URL) -> [str]: str(url.port), "-u", url.username, - "-D", - url.database, ] if url.password: args.extend(["-p" + str(url.password)]) + + if style == ParameterStyle.MariaDB: + args.extend(["-D", url.database]) + elif style == ParameterStyle.MySQLDump: + args.extend(["--databases", url.database]) + return args From 1262ea3a157afe14508d47cb2859335e2f0350b0 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Mon, 29 Jul 2024 03:54:00 -0500 Subject: [PATCH 23/48] PostgreSQL migration now mostly works --- cli/macrostrat/cli/_dev/stream_utils.py | 56 ++++++++++---- cli/macrostrat/cli/_dev/utils.py | 4 +- cli/macrostrat/cli/database/__init__.py | 16 ++-- .../cli/database/mariadb/__init__.py | 7 +- .../mariadb/postgresql_migration/Dockerfile | 8 -- .../mariadb/postgresql_migration/__init__.py | 75 ++++++++++++++----- .../cli/database/mariadb/restore.py | 57 +++++++------- cli/macrostrat/cli/database/mariadb/utils.py | 2 +- cli/macrostrat/cli/database/utils.py | 20 +++++ cli/poetry.lock | 18 +++-- cli/pyproject.toml | 2 +- core/macrostrat/core/config.py | 8 -- py-root/poetry.lock | 8 +- 13 files changed, 173 insertions(+), 108 deletions(-) delete mode 100644 cli/macrostrat/cli/database/mariadb/postgresql_migration/Dockerfile create mode 100644 cli/macrostrat/cli/database/utils.py diff --git a/cli/macrostrat/cli/_dev/stream_utils.py b/cli/macrostrat/cli/_dev/stream_utils.py index 6145f249..0593b089 100644 --- a/cli/macrostrat/cli/_dev/stream_utils.py +++ b/cli/macrostrat/cli/_dev/stream_utils.py @@ -1,4 +1,5 @@ import asyncio +import sys import zlib from aiofiles.threadpool import AsyncBufferedIOBase @@ -9,38 +10,61 @@ async def print_stream_progress( - in_stream: asyncio.StreamReader, - out_stream: asyncio.StreamWriter | AsyncBufferedIOBase | None, + input: asyncio.StreamReader | asyncio.subprocess.Process, + out_stream: asyncio.StreamWriter | None, + verbose: bool = False, + chunk_size: int = 1024, ): """This should be unified with print_stream_progress, but there seem to be slight API differences between aiofiles and asyncio.StreamWriter APIs.?""" + in_stream = input + if isinstance(in_stream, asyncio.subprocess.Process): + in_stream = input.stdout + megabytes_written = 0 i = 0 - async for line in in_stream: - megabytes_written += len(line) / 1_000_000 + + # Iterate over the stream by chunks + try: + while True: + chunk = await in_stream.read(chunk_size) + if not chunk: + log.info("End of stream") + break + if verbose: + log.info(chunk) + megabytes_written += len(chunk) / 1_000_000 + if isinstance(out_stream, AsyncBufferedIOBase): + await out_stream.write(chunk) + await out_stream.flush() + elif out_stream is not None: + out_stream.write(chunk) + await out_stream.drain() + i += 1 + if i == 100: + i = 0 + _print_progress(megabytes_written, end="\r") + except asyncio.CancelledError: + pass + finally: + _print_progress(megabytes_written) + if isinstance(out_stream, AsyncBufferedIOBase): - await out_stream.write(line) - await out_stream.flush() + out_stream.close() elif out_stream is not None: - out_stream.write(line) - await out_stream.drain() - i += 1 - if i == 1000: - i = 0 - _print_progress(megabytes_written, end="\r") - - if out_stream is not None: - out_stream.close() - _print_progress(megabytes_written) + out_stream.close() + await out_stream.wait_closed() def _print_progress(megabytes: float, **kwargs): progress = f"Dumped {megabytes:.1f} MB" + kwargs["file"] = sys.stderr print(progress, **kwargs) async def print_stdout(stream: asyncio.StreamReader): async for line in stream: + log.info(line) console.print(line.decode("utf-8"), style="dim") diff --git a/cli/macrostrat/cli/_dev/utils.py b/cli/macrostrat/cli/_dev/utils.py index 9e8aa797..a1344060 100644 --- a/cli/macrostrat/cli/_dev/utils.py +++ b/cli/macrostrat/cli/_dev/utils.py @@ -18,9 +18,9 @@ def _docker_local_run_args(postgres_container: str = "postgres:15"): "docker", "run", "-i", + "--log-driver", + "none", "--rm", - "--network", - "host", postgres_container, ] diff --git a/cli/macrostrat/cli/database/__init__.py b/cli/macrostrat/cli/database/__init__.py index a68115ef..49244c8b 100644 --- a/cli/macrostrat/cli/database/__init__.py +++ b/cli/macrostrat/cli/database/__init__.py @@ -11,6 +11,8 @@ from sqlalchemy import text from typer import Argument, Option from .migrations import run_migrations +from .utils import engine_for_db_name + from macrostrat.core import MacrostratSubsystem, app from macrostrat.core.utils import is_pg_url @@ -206,7 +208,7 @@ def dump( db_container = app.settings.get("pg_database_container", "postgres:15") - engine = _engine_for_db_name(database) + engine = engine_for_db_name(database) args = ctx.args custom_format = True @@ -236,7 +238,7 @@ def restore( db_container = app.settings.get("pg_database_container", "postgres:15") - engine = _engine_for_db_name(database) + engine = engine_for_db_name(database) args = [] if jobs is not None: @@ -251,14 +253,6 @@ def restore( ) -def _engine_for_db_name(name: str | None): - engine = get_db().engine - if name is None: - return engine - url = engine.url.set(database=name) - return create_engine(url) - - @db_app.command(name="tables") def list_tables(ctx: typer.Context, database: str = Argument(None), schema: str = None): """List tables in the database""" @@ -274,7 +268,7 @@ def list_tables(ctx: typer.Context, database: str = Argument(None), schema: str sql += "\nORDER BY table_schema, table_name;" - engine = _engine_for_db_name(database) + engine = engine_for_db_name(database) print( f"[dim]Tables in database: [bold cyan]{engine.url.database}[/]\n", file=stderr diff --git a/cli/macrostrat/cli/database/mariadb/__init__.py b/cli/macrostrat/cli/database/mariadb/__init__.py index 1a1f0995..f884693f 100644 --- a/cli/macrostrat/cli/database/mariadb/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/__init__.py @@ -5,6 +5,7 @@ from sqlalchemy.engine.url import URL, make_url from pathlib import Path +from ..utils import docker_internal_url from .utils import build_connection_args from .restore import restore_mariadb, dump_mariadb @@ -12,6 +13,8 @@ mariadb_container = "mariadb:10.10" +# TODO: Adjust Typer context to ignore unconsumed arguments or arguments after "--" + @app.command( name="cli", @@ -20,7 +23,7 @@ ) def cli_command(ctx: Context): """Run the MariaDB CLI against the Macrostrat database.""" - from macrostrat.core.config import docker_internal_url, mysql_database + from macrostrat.core.config import mysql_database _database: URL = docker_internal_url(mysql_database) @@ -88,7 +91,7 @@ def mysql_engine(database: str = None): _database = _database.set(drivername="mysql+pymysql") if database is not None: _database = _database.set(database=database) - return create_engine(mysql_database) + return create_engine(_database) @app.command("migrate-to-postgres") diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/Dockerfile b/cli/macrostrat/cli/database/mariadb/postgresql_migration/Dockerfile deleted file mode 100644 index cf517dfe..00000000 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/Dockerfile +++ /dev/null @@ -1,8 +0,0 @@ -FROM dimitri/pgloader:latest -RUN apt-get update && apt-get install -y postgresql-client -RUN apt-get install -y ca-certificates -WORKDIR /app -COPY . /app - -RUN pip install -r requirements.txt -CMD ["python3", "app.py"] diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index 0b24602b..1b28b967 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -1,29 +1,48 @@ from sqlalchemy import text, create_engine import os from macrostrat.database.utils import run_sql +from macrostrat.app_frame.exc import ApplicationError from pathlib import Path from sqlalchemy.engine import Engine +from macrostrat.database import database_exists, create_database from ..restore import copy_mariadb_database +from ...._dev.utils import raw_database_url +from ...utils import engine_for_db_name, docker_internal_url +from macrostrat.utils import get_logger +from macrostrat.utils.shell import run +from macrostrat.core import app +from textwrap import dedent import time from .db_changes import get_data_counts_maria, get_data_counts_pg, compare_data_counts __here__ = Path(__file__).parent +log = get_logger(__name__) + def migrate_mariadb_to_postgresql(engine: Engine, overwrite: bool = False): """Migrate the entire Macrostrat database from MariaDB to PostgreSQL.""" temp_db_name = engine.url.database + "_temp" - copy_mariadb_database(engine, temp_db_name, overwrite=overwrite) + temp_engine = create_engine(engine.url.set(database=temp_db_name)) - # pg_loader_pre_script() - # pg_loader() + if database_exists(temp_engine.url) and not overwrite: + app.console.print( + "Database [bold cyan]macrostrat_temp[/] already exists. Use --overwrite to overwrite." + ) + else: + copy_mariadb_database(engine, temp_engine, overwrite=overwrite) + + pg_engine = engine_for_db_name("macrostrat_temp") + + pgloader_pre_script(temp_engine) + pgloader(temp_engine, pg_engine) # pg_loader_post_script() def pgloader_pre_script(engine: Engine): - assert engine.dialect.startswith("mysql") + assert engine.url.drivername.startswith("mysql") pre_script = __here__ / "pgloader-pre-script.sql" run_sql(engine, pre_script) @@ -47,28 +66,44 @@ def pgloader_post_script(engine: Engine): run_sql(engine, post_script) -def pgloader(): +def pgloader(source: Engine, dest: Engine): """ Command terminal to run pgloader. Ensure Docker app is running. """ - dockerfile_content = ( - "FROM dimitri/pgloader:latest\n" - "RUN apt-get update && apt-get install -y postgresql-client\n" - "RUN apt-get install -y ca-certificates" + dockerfile = dedent( + """FROM dimitri/pgloader:latest + RUN apt-get update && apt-get install -y postgresql-client ca-certificates && rm -rf /var/lib/apt/lists/* + ENTRYPOINT ["pgloader"] + """ ) - with open("Dockerfile", "w") as dockerfile: - dockerfile.write(dockerfile_content) - os.system("docker build -t pgloader-test .") - - input_command = ( - f'--with "prefetch rows = 1000" --verbose ' - f"mysql://root:{maria_super_pass}@{maria_server}/{maria_db_name_two} " - f"postgresql://{pg_user_migrate}:{pg_pass_migrate}@{pg_server}/{pg_db_name_two}?sslmode=prefer" + + run( + "docker", + "build", + "-t", + "pgloader-runner", + "-", + input=dockerfile.encode("utf-8"), ) - print(input_command) - os.system(f"docker run -i --rm pgloader-test pgloader {input_command}") - return + # PyMySQL is not installed in the pgloader image, so we need to use the mysql client + # to connect to the MariaDB database. + source_url = source.url.set(drivername="mysql") + + create_database(dest.url) + + run( + "docker", + "run", + "-i", + "--rm", + "pgloader-runner", + "--with", + "prefetch rows = 1000", + "--verbose", + raw_database_url(docker_internal_url(source_url)), + raw_database_url(docker_internal_url(dest.url)), + ) def reset(): diff --git a/cli/macrostrat/cli/database/mariadb/restore.py b/cli/macrostrat/cli/database/mariadb/restore.py index 2497f2fc..ed396894 100644 --- a/cli/macrostrat/cli/database/mariadb/restore.py +++ b/cli/macrostrat/cli/database/mariadb/restore.py @@ -2,19 +2,23 @@ from pathlib import Path from typing import Optional from sys import stdin +from subprocess import run from macrostrat.utils import get_logger from rich.console import Console from sqlalchemy.engine import Engine, URL, create_engine from macrostrat.core.exc import MacrostratError import aiofiles +from tempfile import NamedTemporaryFile from .utils import build_connection_args, ParameterStyle -from macrostrat.core.config import docker_internal_url +from ..utils import docker_internal_url + from ..._dev.utils import ( _create_command, _create_database_if_not_exists, + _docker_local_run_args, ) from ..._dev.stream_utils import ( print_stream_progress, @@ -30,10 +34,10 @@ def restore_mariadb(_input: Optional[str], engine: Engine, *args, **kwargs): """Restore a MariaDB database from a dump file or stream""" - if _input.startswith("http"): - raise NotImplementedError("http(s) restore not yet implemented") - if _input is not None: + if _input.startswith("http"): + raise NotImplementedError("http(s) restore not yet implemented") + _input = Path(_input) if _input is None: @@ -43,7 +47,7 @@ def restore_mariadb(_input: Optional[str], engine: Engine, *args, **kwargs): # Read from stdin _input = Path("/dev/stdin") - if not _input.is_file(): + if not _input.is_file() and not _input.is_fifo(): raise MacrostratError(f"{_input} is not a file") task = _restore_mariadb_from_file(_input, engine, *args, **kwargs) @@ -88,6 +92,8 @@ async def _restore_mariadb(engine: Engine, *args, **kwargs): stdin=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, limit=1024 * 1024 * 1, # 1 MB windows + # Stdout to dev null + stdout=asyncio.subprocess.PIPE, ) @@ -97,10 +103,8 @@ async def _restore_mariadb_from_file(dumpfile: Path, engine: Engine, *args, **kw async with aiofiles.open(dumpfile, mode="rb") as source: s1 = DecodingStreamReader(source) await asyncio.gather( - asyncio.create_task( - print_stream_progress(s1, proc.stdin), - ), - asyncio.create_task(print_stdout(proc.stderr)), + print_stream_progress(s1, proc.stdin), + print_stdout(proc.stderr), ) @@ -144,25 +148,24 @@ async def _dump_mariadb_to_file(engine: Engine, dumpfile: Path, *args, **kwargs) ) -def copy_mariadb_database(engine: Engine, new_database: str, *args, **kwargs): - task = _copy_mariadb(engine, new_database, *args, **kwargs) - asyncio.run(task) - - -async def _copy_mariadb(engine: Engine, new_database: str, *args, **kwargs): +def copy_mariadb_database(engine: Engine, new_engine: Engine, *args, **kwargs): """Copy a MariaDB database to a new database in the same cluster""" - new_url = engine.url.set(database=new_database) - new_engine = create_engine(new_url) - overwrite = kwargs.pop("overwrite", False) - create = True + container = kwargs.pop("container", "mariadb:10.10") - dump = await _dump_mariadb(engine, *args, **kwargs) - restore = await _restore_mariadb( - new_engine, *args, **kwargs, create=create, overwrite=overwrite + overwrite = kwargs.pop("overwrite", False) + create = kwargs.pop("create", True) + _create_database_if_not_exists( + new_engine.url, create=create, allow_exists=False, overwrite=overwrite ) - # Connect the streams - await asyncio.gather( - asyncio.create_task(print_stream_progress(dump.stdout, restore.stdin)), - asyncio.create_task(print_stdout(restore.stderr)), - ) + # Get a temporary file to store the dump + # Right now this is necessary because we can't properly pipe mysqldump to mariadb + with NamedTemporaryFile(delete=True) as tmp: + log.info(f"Copying {engine.url.database} to {new_engine.url.database}") + tmp_file = Path(tmp.name) + log.info(f"Dumping {engine.url.database} to {tmp.name}") + dump_mariadb(engine, tmp_file, *args, container=container) + log.info(f"Restoring {engine.url.database} from {tmp.name}") + restore_mariadb( + str(tmp_file), new_engine, *args, overwrite=overwrite, container=container + ) diff --git a/cli/macrostrat/cli/database/mariadb/utils.py b/cli/macrostrat/cli/database/mariadb/utils.py index aedef339..383679c9 100644 --- a/cli/macrostrat/cli/database/mariadb/utils.py +++ b/cli/macrostrat/cli/database/mariadb/utils.py @@ -23,7 +23,7 @@ def build_connection_args( args.extend(["-p" + str(url.password)]) if style == ParameterStyle.MariaDB: - args.extend(["-D", url.database]) + args.extend([url.database]) elif style == ParameterStyle.MySQLDump: args.extend(["--databases", url.database]) diff --git a/cli/macrostrat/cli/database/utils.py b/cli/macrostrat/cli/database/utils.py new file mode 100644 index 00000000..2c444428 --- /dev/null +++ b/cli/macrostrat/cli/database/utils.py @@ -0,0 +1,20 @@ +from ._legacy import get_db +from sqlalchemy.engine import create_engine +from sqlalchemy.engine.url import URL, make_url +from macrostrat.core.config import settings + + +def engine_for_db_name(name: str | None): + engine = get_db().engine + if name is None: + return engine + url = engine.url.set(database=name) + return create_engine(url) + + +def docker_internal_url(url: URL | str) -> URL: + url = make_url(url) + if url.host == "localhost": + docker_localhost = getattr(settings, "docker_localhost", "localhost") + url = url.set(host=docker_localhost) + return url diff --git a/cli/poetry.lock b/cli/poetry.lock index 66b5421f..f0d56473 100644 --- a/cli/poetry.lock +++ b/cli/poetry.lock @@ -607,20 +607,22 @@ typer = ">=0.9.0,<0.10.0" [[package]] name = "macrostrat-database" -version = "3.1.2" +version = "3.3.1" description = "A SQLAlchemy-based database toolkit." optional = false -python-versions = ">=3.8,<4.0" +python-versions = "<4.0,>=3.8" files = [ - {file = "macrostrat_database-3.1.2-py3-none-any.whl", hash = "sha256:481ef7d7696c5e7cb3d537ca4e178eba97423b44dddb01af5bc2729eb89c90a4"}, - {file = "macrostrat_database-3.1.2.tar.gz", hash = "sha256:92d9e61c24428dcd485fa174e731e2da181de7b0e02c43027c82bf48e483ca21"}, + {file = "macrostrat_database-3.3.1-py3-none-any.whl", hash = "sha256:3a7adb0d3b3dc1f1e1c84a2b764bfe210f0c850d3c93e0a2256c8fd379875b75"}, + {file = "macrostrat_database-3.3.1.tar.gz", hash = "sha256:51ccc801fc5884001d6c7393ddf595688ffdc49bde0252cd65f9027619a1c51e"}, ] [package.dependencies] +aiofiles = ">=23.2.1,<24.0.0" click = ">=8.1.3,<9.0.0" GeoAlchemy2 = ">=0.14.0,<0.15.0" "macrostrat.utils" = ">=1.0.0,<2.0.0" psycopg2-binary = ">=2.9.6,<3.0.0" +rich = ">=13.7.1,<14.0.0" SQLAlchemy = ">=2.0.18,<3.0.0" SQLAlchemy-Utils = ">=0.41.1,<0.42.0" sqlparse = ">=0.4.4,<0.5.0" @@ -1310,13 +1312,13 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "rich" -version = "13.5.2" +version = "13.7.1" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" optional = false python-versions = ">=3.7.0" files = [ - {file = "rich-13.5.2-py3-none-any.whl", hash = "sha256:146a90b3b6b47cac4a73c12866a499e9817426423f57c5a66949c086191a8808"}, - {file = "rich-13.5.2.tar.gz", hash = "sha256:fb9d6c0a0f643c99eed3875b5377a184132ba9be4d61516a55273d3554d75a39"}, + {file = "rich-13.7.1-py3-none-any.whl", hash = "sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222"}, + {file = "rich-13.7.1.tar.gz", hash = "sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432"}, ] [package.dependencies] @@ -1840,4 +1842,4 @@ test = ["websockets"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "2492acb363222b18285ed823951258f67d09e3a8fcb52e2308ca13ef2ddfb786" +content-hash = "f1fcb36b9257e3ad57705ae57e71a851a3cc26acb01eeca72983cee80a184f31" diff --git a/cli/pyproject.toml b/cli/pyproject.toml index 7116b608..f0d29f35 100644 --- a/cli/pyproject.toml +++ b/cli/pyproject.toml @@ -21,7 +21,7 @@ dynaconf = "^3.1.12" geopandas = "^0.14.1" ipython = "^8.5.0" "macrostrat.app-frame" = "^1.2.4" -"macrostrat.database" = "^3.1.1" +"macrostrat.database" = "^3.3.1" numpy = "^1.23.4" psycopg2-binary = "^2.9.4" pyproj = "^3.4.0" diff --git a/core/macrostrat/core/config.py b/core/macrostrat/core/config.py index 7f663d57..564b1ee4 100644 --- a/core/macrostrat/core/config.py +++ b/core/macrostrat/core/config.py @@ -117,11 +117,3 @@ def all_environments(self): settings.srcroot = Path(__file__).parent.parent.parent.parent environ["MACROSTRAT_ROOT"] = str(settings.srcroot) - - -def docker_internal_url(url: URL | str) -> URL: - url = make_url(url) - if url.host == "localhost": - docker_localhost = getattr(settings, "docker_localhost", "localhost") - url = url.set(host=docker_localhost) - return url diff --git a/py-root/poetry.lock b/py-root/poetry.lock index 0b267ced..00524ef7 100644 --- a/py-root/poetry.lock +++ b/py-root/poetry.lock @@ -756,7 +756,7 @@ geopandas = "^0.14.1" greenlet = "^3.0.3" ipython = "^8.5.0" "macrostrat.app-frame" = "^1.2.4" -"macrostrat.database" = "^3.1.1" +"macrostrat.database" = "^3.3.1" macrostrat-dinosaur = "^3.0.1" numpy = "^1.23.4" psycopg2-binary = "^2.9.4" @@ -802,13 +802,13 @@ url = "../core" [[package]] name = "macrostrat-database" -version = "3.3.0" +version = "3.3.1" description = "A SQLAlchemy-based database toolkit." optional = false python-versions = "<4.0,>=3.8" files = [ - {file = "macrostrat_database-3.3.0-py3-none-any.whl", hash = "sha256:5a45c7d67a4fe006728842b2ff67b86f217271822da354fc884762864acb6732"}, - {file = "macrostrat_database-3.3.0.tar.gz", hash = "sha256:dafaac5ef28a668ff75f215caa8edda4345b2a2b4c273d0cdae3ac18b9a97b78"}, + {file = "macrostrat_database-3.3.1-py3-none-any.whl", hash = "sha256:3a7adb0d3b3dc1f1e1c84a2b764bfe210f0c850d3c93e0a2256c8fd379875b75"}, + {file = "macrostrat_database-3.3.1.tar.gz", hash = "sha256:51ccc801fc5884001d6c7393ddf595688ffdc49bde0252cd65f9027619a1c51e"}, ] [package.dependencies] From de6470e2d9157f161dc083d505ae7dc80cf9dac7 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Mon, 29 Jul 2024 04:35:06 -0500 Subject: [PATCH 24/48] Get reporting to work --- .../mariadb/postgresql_migration/__init__.py | 87 ++++++++++++------- .../postgresql_migration/db_changes.py | 30 +++---- .../pgloader-post-script.sql | 64 +++++++------- 3 files changed, 101 insertions(+), 80 deletions(-) diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index 1b28b967..9cfa38af 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -1,17 +1,20 @@ +import docker from sqlalchemy import text, create_engine import os from macrostrat.database.utils import run_sql from macrostrat.app_frame.exc import ApplicationError from pathlib import Path from sqlalchemy.engine import Engine -from macrostrat.database import database_exists, create_database +from macrostrat.database import database_exists, create_database, drop_database from ..restore import copy_mariadb_database from ...._dev.utils import raw_database_url from ...utils import engine_for_db_name, docker_internal_url +from ..._legacy import get_db from macrostrat.utils import get_logger from macrostrat.utils.shell import run from macrostrat.core import app from textwrap import dedent +import docker import time from .db_changes import get_data_counts_maria, get_data_counts_pg, compare_data_counts @@ -28,17 +31,23 @@ def migrate_mariadb_to_postgresql(engine: Engine, overwrite: bool = False): temp_engine = create_engine(engine.url.set(database=temp_db_name)) if database_exists(temp_engine.url) and not overwrite: - app.console.print( + header( "Database [bold cyan]macrostrat_temp[/] already exists. Use --overwrite to overwrite." ) else: copy_mariadb_database(engine, temp_engine, overwrite=overwrite) - pg_engine = engine_for_db_name("macrostrat_temp") + pg_engine = get_db().engine + + pg_temp_engine = create_engine(pg_engine.url.set(database=temp_db_name)) pgloader_pre_script(temp_engine) - pgloader(temp_engine, pg_engine) - # pg_loader_post_script() + + pgloader(temp_engine, pg_temp_engine, overwrite=overwrite) + + pgloader_post_script(pg_temp_engine) + + compare_row_counts(engine, pg_temp_engine, pg_engine) def pgloader_pre_script(engine: Engine): @@ -60,16 +69,34 @@ def pgloader_pre_script(engine: Engine): def pgloader_post_script(engine: Engine): - assert engine.dialect.startswith("postgresql") - print("Starting PostScript execution....") + app.console.print("\n[bold]Running post-migration script[/]") + assert engine.url.drivername.startswith("postgres") post_script = __here__ / "pgloader-post-script.sql" run_sql(engine, post_script) -def pgloader(source: Engine, dest: Engine): +def pgloader(source: Engine, dest: Engine, overwrite=False): """ Command terminal to run pgloader. Ensure Docker app is running. """ + db_exists = database_exists(dest.url) + + if db_exists: + if overwrite: + header("Dropping PostgreSQL database") + drop_database(dest.url) + else: + header( + f"PostgreSQL database [bold cyan]{dest.url.database}[/] already exists. Skipping pgloader." + ) + return + + if not db_exists: + header("Creating PostgreSQL database") + create_database(dest.url) + + header("Building pgloader") + dockerfile = dedent( """FROM dimitri/pgloader:latest RUN apt-get update && apt-get install -y postgresql-client ca-certificates && rm -rf /var/lib/apt/lists/* @@ -77,21 +104,25 @@ def pgloader(source: Engine, dest: Engine): """ ) - run( - "docker", - "build", - "-t", - "pgloader-runner", - "-", - input=dockerfile.encode("utf-8"), - ) + # Check if docker container exists + + client = docker.from_env() + + _image_exists = client.images.get("pgloader-runner:latest") + + if _image_exists: + app.console.print("pgloader-runner image already exists.") + + if not _image_exists or overwrite: + app.console.print("Building pgloader-runner image.") + client.images.build(dockerfile, tag="pgloader-runner:latest") + + header("Running pgloader") # PyMySQL is not installed in the pgloader image, so we need to use the mysql client # to connect to the MariaDB database. source_url = source.url.set(drivername="mysql") - create_database(dest.url) - run( "docker", "run", @@ -131,18 +162,12 @@ def reset(): maria_engine.dispose() -if __name__ == "__main__": - # maria_dump(maria_server, maria_super_user, maria_super_pass, maria_db_name) - # maria_restore(maria_server, maria_super_user, maria_super_pass, maria_db_name_two) - # pg_loader_pre_script() - # pg_loader() - # pg_loader_post_script() - maria_rows, maria_columns = get_data_counts_maria() - pg_rows, pg_columns = get_data_counts_pg( - pg_db_name, pg_user, pg_pass_new, "macrostrat" - ) +def compare_row_counts(maria: Engine, pg_temp: Engine, pg_final: Engine): + + maria_rows, maria_columns = get_data_counts_maria(maria) + pg_rows, pg_columns = get_data_counts_pg(pg_final, "macrostrat") pg_macrostrat_two_rows, pg_macrostrat_two_columns = get_data_counts_pg( - pg_db_name_two, pg_user_migrate, pg_pass_migrate, "macrostrat_temp" + pg_temp, "macrostrat_temp" ) print( @@ -170,3 +195,7 @@ def reset(): # reset() # df, df_two = find_row_variances(pg_db_name, pg_db_name, pg_db_name_two, maria_db_name_two, # pg_user, pg_pass_new, 'cols') + + +def header(text): + app.console.print(f"\n[bold]{text}[/]\n") diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py index f462a797..8720221b 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py @@ -6,14 +6,11 @@ from macrostrat.database import run_query from psycopg2.sql import Identifier from sqlalchemy import create_engine, text +from sqlalchemy.engine import Engine -def get_data_counts_maria(): - SQLALCHEMY_DATABASE_URI = ( - f"mysql+pymysql://{maria_super_user}:" - f"{maria_super_pass}@{maria_server}/{maria_db_name_two}" - ) - engine = create_engine(SQLALCHEMY_DATABASE_URI) +def get_data_counts_maria(engine: Engine): + db_name = engine.url.database maria_rows = {} maria_columns = {} @@ -21,20 +18,18 @@ def get_data_counts_maria(): row_result = run_query( conn, "SELECT table_name FROM information_schema.tables WHERE table_schema = :table_schema AND table_type = 'BASE TABLE'", - {"table_schema": maria_db_name_two}, + {"table_schema": db_name}, ) maria_tables = [row[0] for row in row_result] for table in maria_tables: - row_result = run_query( - conn, "SELECT COUNT(*) FROM {table}", dict(table=Identifier(table)) - ) + row_result = run_query(conn, f"SELECT COUNT(*) FROM {table}") row_count = row_result.scalar() maria_rows[table.lower()] = row_count column_result = run_query( conn, "SELECT COUNT(*) FROM information_schema.columns WHERE table_schema = :table_schema AND table_name = :table_name", - dict(table_schema=maria_db_name_two, table_name=table), + dict(table_schema=db_name, table_name=table), ) column_count = column_result.scalar() @@ -44,16 +39,14 @@ def get_data_counts_maria(): return maria_rows, maria_columns -def get_data_counts_pg(database_name, username, password, schema): - SQLALCHEMY_DATABASE_URI = ( - f"postgresql://{username}:{password}@{pg_server}/{database_name}" - ) - engine = create_engine(SQLALCHEMY_DATABASE_URI) +def get_data_counts_pg(engine: Engine, schema): + database_name = engine.url.database + pg_rows = {} pg_columns = {} with engine.connect() as conn: - table_query = run_query( + table_result = run_query( conn, """ SELECT table_name FROM information_schema.tables @@ -62,8 +55,6 @@ def get_data_counts_pg(database_name, username, password, schema): """, dict(table_schema=schema, table_catalog=database_name), ) - - table_result = conn.execute(table_query) pg_tables = [row[0] for row in table_result] for table in pg_tables: row_result = run_query( @@ -75,6 +66,7 @@ def get_data_counts_pg(database_name, username, password, schema): pg_rows[table.lower()] = row_count column_result = run_query( + conn, """ SELECT COUNT(*) FROM information_schema.columns WHERE table_catalog = :table_catalog diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql b/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql index 6ca3084a..8799fdd6 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql @@ -9,36 +9,36 @@ CREATE EXTENSION IF NOT EXISTS postgis; -SET search_path TO macrostrat_two, public; - -ALTER TABLE macrostrat_two.macrostrat_temp.pbdb_matches ADD COLUMN coordinate geometry(Point, 4326); -UPDATE macrostrat_two.macrostrat_temp.pbdb_matches SET coordinate = ST_GeomFromText(coordinate_point_text, 4326); -ALTER TABLE macrostrat_two.macrostrat_temp.pbdb_matches DROP COLUMN coordinate_point_text; -SELECT * FROM macrostrat_two.macrostrat_temp.pbdb_matches LIMIT 5; - -ALTER TABLE macrostrat_two.macrostrat_temp.places ADD COLUMN geom geometry; -UPDATE macrostrat_two.macrostrat_temp.places SET geom = ST_GeomFromText(geom_text, 4326); - -ALTER TABLE macrostrat_two.macrostrat_temp.places DROP COLUMN geom_text; -SELECT * FROM macrostrat_two.macrostrat_temp.places LIMIT 5; - -ALTER TABLE macrostrat_two.macrostrat_temp.refs ADD COLUMN rgeom geometry; -UPDATE macrostrat_two.macrostrat_temp.refs SET rgeom = ST_GeomFromText(rgeom_text, 4326); -ALTER TABLE macrostrat_two.macrostrat_temp.refs DROP COLUMN rgeom_text; -SELECT * FROM macrostrat_two.macrostrat_temp.refs LIMIT 5; - -ALTER TABLE macrostrat_two.macrostrat_temp.cols ADD COLUMN coordinate geometry; -UPDATE macrostrat_two.macrostrat_temp.cols SET coordinate = ST_GeomFromText(coordinate_text, 4326); -ALTER TABLE macrostrat_two.macrostrat_temp.cols DROP COLUMN coordinate_text; -SELECT * FROM macrostrat_two.macrostrat_temp.cols LIMIT 5; - -ALTER TABLE macrostrat_two.macrostrat_temp.col_areas ADD COLUMN col_area geometry; -UPDATE macrostrat_two.macrostrat_temp.col_areas SET col_area = ST_GeomFromText(col_area_text, 4326); -ALTER TABLE macrostrat_two.macrostrat_temp.col_areas DROP COLUMN col_area_text; -SELECT * FROM macrostrat_two.macrostrat_temp.col_areas LIMIT 5; - -ALTER TABLE macrostrat_two.macrostrat_temp.col_areas_6April2016 ADD COLUMN col_area geometry; -UPDATE macrostrat_two.macrostrat_temp.col_areas_6April2016 SET col_area = ST_GeomFromText(col_area_text, 4326); -ALTER TABLE macrostrat_two.macrostrat_temp.col_areas_6April2016 DROP COLUMN col_area_text; -SELECT * FROM macrostrat_two.macrostrat_temp.col_areas_6April2016 LIMIT 5; +SET search_path TO macrostrat_temp, public; + +ALTER TABLE macrostrat_temp.pbdb_matches ADD COLUMN coordinate geometry(Point, 4326); +UPDATE macrostrat_temp.pbdb_matches SET coordinate = ST_GeomFromText(coordinate_point_text, 4326); +ALTER TABLE macrostrat_temp.pbdb_matches DROP COLUMN coordinate_point_text; +SELECT * FROM macrostrat_temp.pbdb_matches LIMIT 5; + +ALTER TABLE macrostrat_temp.places ADD COLUMN geom geometry; +UPDATE macrostrat_temp.places SET geom = ST_GeomFromText(geom_text, 4326); + +ALTER TABLE macrostrat_temp.places DROP COLUMN geom_text; +SELECT * FROM macrostrat_temp.places LIMIT 5; + +ALTER TABLE macrostrat_temp.refs ADD COLUMN rgeom geometry; +UPDATE macrostrat_temp.refs SET rgeom = ST_GeomFromText(rgeom_text, 4326); +ALTER TABLE macrostrat_temp.refs DROP COLUMN rgeom_text; +SELECT * FROM macrostrat_temp.refs LIMIT 5; + +ALTER TABLE macrostrat_temp.cols ADD COLUMN coordinate geometry; +UPDATE macrostrat_temp.cols SET coordinate = ST_GeomFromText(coordinate_text, 4326); +ALTER TABLE macrostrat_temp.cols DROP COLUMN coordinate_text; +SELECT * FROM macrostrat_temp.cols LIMIT 5; + +ALTER TABLE macrostrat_temp.col_areas ADD COLUMN col_area geometry; +UPDATE macrostrat_temp.col_areas SET col_area = ST_GeomFromText(col_area_text, 4326); +ALTER TABLE macrostrat_temp.col_areas DROP COLUMN col_area_text; +SELECT * FROM macrostrat_temp.col_areas LIMIT 5; + +ALTER TABLE macrostrat_temp.col_areas_6April2016 ADD COLUMN col_area geometry; +UPDATE macrostrat_temp.col_areas_6April2016 SET col_area = ST_GeomFromText(col_area_text, 4326); +ALTER TABLE macrostrat_temp.col_areas_6April2016 DROP COLUMN col_area_text; +SELECT * FROM macrostrat_temp.col_areas_6April2016 LIMIT 5; From e239b9b11f0319c771d9055a323578562a7db3f4 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Mon, 29 Jul 2024 05:11:58 -0500 Subject: [PATCH 25/48] Somewhat improved column and row count functions --- .../mariadb/postgresql_migration/__init__.py | 73 ++++++++----------- .../postgresql_migration/db_changes.py | 57 +++++++++------ 2 files changed, 68 insertions(+), 62 deletions(-) diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index 9cfa38af..24914437 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -137,65 +137,56 @@ def pgloader(source: Engine, dest: Engine, overwrite=False): ) -def reset(): - SQLALCHEMY_DATABASE_URI = ( - f"{pg_user_maria_temp}:{pg_pass_maria_temp}@{pg_server}/{pg_db_name_two}" - ) - pg_engine = create_engine(SQLALCHEMY_DATABASE_URI) - pg_drop_query = text( - f"DROP SCHEMA macrostrat_temp CASCADE" - ) # {new_migrate_schema_name} - - with pg_engine.connect() as conn: - conn.execute(pg_drop_query) - pg_engine.dispose() - - SQLALCHEMY_DATABASE_URI = ( - f"mysql+pymysql://{maria_super_user}:" - f"{maria_super_pass}@{maria_server}/{maria_db_name_two}" - ) - maria_engine = create_engine(SQLALCHEMY_DATABASE_URI) - maria_drop_query = text(f"DROP DATABASE {maria_db_name_two}") - - with maria_engine.connect() as conn: - conn.execute(maria_drop_query) - maria_engine.dispose() - - def compare_row_counts(maria: Engine, pg_temp: Engine, pg_final: Engine): + console = app.console + maria_rows, maria_columns = get_data_counts_maria(maria) - pg_rows, pg_columns = get_data_counts_pg(pg_final, "macrostrat") - pg_macrostrat_two_rows, pg_macrostrat_two_columns = get_data_counts_pg( + pg_macrostrat_temp_rows, pg_macrostrat_temp_columns = get_data_counts_pg( pg_temp, "macrostrat_temp" ) - print( - "\nMARIADB (db1) comparison to PG MACROSTRAT_TWO (db2). These should be clones. " - ) - db1 = "MariaDB" - db2 = "PG Macrostrat_Two" + db1 = db_identifier(maria) + db2 = db_identifier(pg_temp) + db3 = db_identifier(pg_final) + + header(f"\n\nComparing [cyan]{db1}[/] to [cyan]{db2}[/].") + row_variance, column_variance = compare_data_counts( maria_rows, - pg_macrostrat_two_rows, + pg_macrostrat_temp_rows, maria_columns, - pg_macrostrat_two_columns, + pg_macrostrat_temp_columns, db1, db2, ) - print( - "\nPG MACROSTRAT_TWO (db1 maria db clone) comparison to PG MACROSTRAT (db2). This will show what data " - "needs to be moved over from Maria to PG prod." - ) - db1 = "PG Macrostrat_Two" - db2 = "PG Macrostrat" + + pg_rows, pg_columns = get_data_counts_pg(pg_final, "macrostrat") + + header(f"\n\nComparing [cyan]{db2}[/] to [cyan]{db3}[/].") + row_variance_two, column_variance_two = compare_data_counts( - pg_macrostrat_two_rows, pg_rows, pg_macrostrat_two_columns, pg_columns, db1, db2 + pg_macrostrat_temp_rows, + pg_rows, + pg_macrostrat_temp_columns, + pg_columns, + db2, + db3, ) # reset() # df, df_two = find_row_variances(pg_db_name, pg_db_name, pg_db_name_two, maria_db_name_two, # pg_user, pg_pass_new, 'cols') +def db_identifier(engine: Engine): + driver = engine.url.drivername + if driver.startswith("postgresql"): + driver = "PostgreSQL" + elif driver.startswith("mysql"): + driver = "MariaDB" + + return f"{engine.url.database} ({driver})" + + def header(text): app.console.print(f"\n[bold]{text}[/]\n") diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py index 8720221b..9696b7e9 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py @@ -7,6 +7,9 @@ from psycopg2.sql import Identifier from sqlalchemy import create_engine, text from sqlalchemy.engine import Engine +from macrostrat.core import app + +console = app.console def get_data_counts_maria(engine: Engine): @@ -108,21 +111,21 @@ def compare_data_counts(db1_rows, db2_rows, db1_columns, db2_columns, db1, db2): } if len(db1_rows_not_in_db2) == 0 and len(db2_rows_not_in_db1) == 0: - print( - f"\nSuccess! All tables exist in both {db1} and {db2}. Checking row counts....\n" - ) + success(f"All tables exist in both {db1} and {db2}.") else: if len(db1_rows_not_in_db2) > 0: - print( - f"\nERROR: {db1} tables that are not in {db2}:\n", + error(f"{len(db1_rows_not_in_db2)} {db1} tables not found in {db2}:") + console.print( [key for key in db1_rows_not_in_db2], ) if len(db2_rows_not_in_db1) > 0: - print( - f"\nERROR: {db2} tables that are not in {db1}: \n", + error(f"{len(db2_rows_not_in_db1)} {db2} tables not found in {db1}:") + console.print( [key for key in db2_rows_not_in_db1], ) + console.print("\n[bold]Checking row counts...") + row_count_difference = { key: (db1_rows[key], db2_rows[key]) for key in db1_rows @@ -140,29 +143,41 @@ def compare_data_counts(db1_rows, db2_rows, db1_columns, db2_columns, db1, db2): # col_count_difference.update(db2_cols_not_in_db1) if len(row_count_difference) == 0: - print( - f"Success! All row counts in all tables are the same in both {db1} and {db2}!\n" - ) + success(f"All row counts in all tables are the same in {db1} and {db2}!") else: - print( - f"\nERROR: Row count differences for {len(row_count_difference)} tables in both {db1} and {db2} databases:\n" - f"Table Name: ({db1} Rows, {db2} Rows)\n" - f"{row_count_difference}" + error( + f"Row count differences for {len(row_count_difference)} tables in {db1} and {db2} databases" ) + print_counts(row_count_difference) + if len(col_count_difference) == 0: - print( - f"Success! All column counts in all tables are the same in both {db1} and {db2}!\n" - ) + success(f"All column counts in all tables are the same in {db1} and {db2}!\n") else: - print( - f"\nERROR: Column count differences for {len(col_count_difference)} tables in both {db1} and {db2} databases:\n" - f"Table Name: ({db1} Columns, {db2} Columns)\n" - f"{col_count_difference}" + error( + f"Column count differences for {len(col_count_difference)} tables in {db1} and {db2} databases" ) + print_counts(col_count_difference) return row_count_difference, col_count_difference +def print_counts(counts): + for key, (v1, v2) in counts.items(): + diff = v1 - v2 + col = "red" if diff < 0 else "green" + diff = f"[{col}]{diff:+8d}[/]" + + console.print(f"{key:30s} {v1:9d} {v2:9d} [dim]{diff}[/dim]") + + +def error(message): + console.print(f"\n[red bold]ERROR:[red] {message}") + + +def success(message): + console.print(f"\n[green bold]SUCCESS:[green] {message}") + + def find_row_variances( database_name_one, schema_one, From e7650e6b7e04eda1be1cd87842770699f532bb53 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Mon, 29 Jul 2024 12:05:47 -0500 Subject: [PATCH 26/48] Updated mariadb migration functions --- .idea/sqldialects.xml | 7 +++ .idea/vcs.xml | 1 - cli/macrostrat/cli/_dev/stream_utils.py | 11 +++-- cli/macrostrat/cli/_dev/utils.py | 6 +++ .../cli/database/mariadb/__init__.py | 4 +- .../mariadb/postgresql_migration/__init__.py | 46 +++++++++---------- .../cli/database/mariadb/restore.py | 43 +++++++++-------- 7 files changed, 72 insertions(+), 46 deletions(-) create mode 100644 .idea/sqldialects.xml diff --git a/.idea/sqldialects.xml b/.idea/sqldialects.xml new file mode 100644 index 00000000..523e12e8 --- /dev/null +++ b/.idea/sqldialects.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml index f2911a75..35eb1ddf 100644 --- a/.idea/vcs.xml +++ b/.idea/vcs.xml @@ -2,6 +2,5 @@ - \ No newline at end of file diff --git a/cli/macrostrat/cli/_dev/stream_utils.py b/cli/macrostrat/cli/_dev/stream_utils.py index 0593b089..783a445d 100644 --- a/cli/macrostrat/cli/_dev/stream_utils.py +++ b/cli/macrostrat/cli/_dev/stream_utils.py @@ -12,8 +12,10 @@ async def print_stream_progress( input: asyncio.StreamReader | asyncio.subprocess.Process, out_stream: asyncio.StreamWriter | None, + *, verbose: bool = False, chunk_size: int = 1024, + prefix: str = None, ): """This should be unified with print_stream_progress, but there seem to be slight API differences between aiofiles and asyncio.StreamWriter APIs.?""" @@ -43,11 +45,11 @@ async def print_stream_progress( i += 1 if i == 100: i = 0 - _print_progress(megabytes_written, end="\r") + _print_progress(megabytes_written, end="\r", prefix=prefix) except asyncio.CancelledError: pass finally: - _print_progress(megabytes_written) + _print_progress(megabytes_written, prefix=prefix) if isinstance(out_stream, AsyncBufferedIOBase): out_stream.close() @@ -57,7 +59,10 @@ async def print_stream_progress( def _print_progress(megabytes: float, **kwargs): - progress = f"Dumped {megabytes:.1f} MB" + prefix = kwargs.pop("prefix", None) + if prefix is None: + prefix = "Dumped" + progress = f"{prefix} {megabytes:.1f} MB" kwargs["file"] = sys.stderr print(progress, **kwargs) diff --git a/cli/macrostrat/cli/_dev/utils.py b/cli/macrostrat/cli/_dev/utils.py index a1344060..43f4ebad 100644 --- a/cli/macrostrat/cli/_dev/utils.py +++ b/cli/macrostrat/cli/_dev/utils.py @@ -18,6 +18,12 @@ def _docker_local_run_args(postgres_container: str = "postgres:15"): "docker", "run", "-i", + "--attach", + "stdin", + "--attach", + "stdout", + "--attach", + "stderr", "--log-driver", "none", "--rm", diff --git a/cli/macrostrat/cli/database/mariadb/__init__.py b/cli/macrostrat/cli/database/mariadb/__init__.py index f884693f..9223e53b 100644 --- a/cli/macrostrat/cli/database/mariadb/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/__init__.py @@ -1,6 +1,7 @@ from typer import Typer, Context, Argument from sys import stdin from macrostrat.utils.shell import run +from .._legacy import get_db from sqlalchemy.engine import create_engine from sqlalchemy.engine.url import URL, make_url from pathlib import Path @@ -100,5 +101,6 @@ def migrate_to_postgres_command(overwrite: bool = False): from .postgresql_migration import migrate_mariadb_to_postgresql engine = mysql_engine() + dest = get_db().engine - migrate_mariadb_to_postgresql(engine, overwrite=overwrite) + migrate_mariadb_to_postgresql(engine, dest, overwrite=overwrite) diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index 24914437..a06a7c59 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -1,3 +1,5 @@ +from io import StringIO + import docker from sqlalchemy import text, create_engine import os @@ -15,6 +17,7 @@ from macrostrat.core import app from textwrap import dedent import docker +from io import BytesIO import time from .db_changes import get_data_counts_maria, get_data_counts_pg, compare_data_counts @@ -24,30 +27,30 @@ log = get_logger(__name__) -def migrate_mariadb_to_postgresql(engine: Engine, overwrite: bool = False): +def migrate_mariadb_to_postgresql( + maria_engine: Engine, pg_engine: Engine, overwrite: bool = False +): """Migrate the entire Macrostrat database from MariaDB to PostgreSQL.""" - temp_db_name = engine.url.database + "_temp" + temp_db_name = maria_engine.url.database + "_temp" - temp_engine = create_engine(engine.url.set(database=temp_db_name)) + maria_temp_engine = create_engine(maria_engine.url.set(database=temp_db_name)) - if database_exists(temp_engine.url) and not overwrite: + if database_exists(maria_temp_engine.url) and not overwrite: header( "Database [bold cyan]macrostrat_temp[/] already exists. Use --overwrite to overwrite." ) else: - copy_mariadb_database(engine, temp_engine, overwrite=overwrite) - - pg_engine = get_db().engine + copy_mariadb_database(maria_engine, maria_temp_engine, overwrite=overwrite) pg_temp_engine = create_engine(pg_engine.url.set(database=temp_db_name)) - pgloader_pre_script(temp_engine) + pgloader_pre_script(maria_temp_engine) - pgloader(temp_engine, pg_temp_engine, overwrite=overwrite) + pgloader(maria_temp_engine, pg_temp_engine, overwrite=overwrite) pgloader_post_script(pg_temp_engine) - compare_row_counts(engine, pg_temp_engine, pg_engine) + compare_row_counts(maria_engine, pg_temp_engine, pg_engine) def pgloader_pre_script(engine: Engine): @@ -85,6 +88,7 @@ def pgloader(source: Engine, dest: Engine, overwrite=False): if overwrite: header("Dropping PostgreSQL database") drop_database(dest.url) + db_exists = False else: header( f"PostgreSQL database [bold cyan]{dest.url.database}[/] already exists. Skipping pgloader." @@ -95,7 +99,7 @@ def pgloader(source: Engine, dest: Engine, overwrite=False): header("Creating PostgreSQL database") create_database(dest.url) - header("Building pgloader") + header("Building pgloader-runner Docker image") dockerfile = dedent( """FROM dimitri/pgloader:latest @@ -104,18 +108,14 @@ def pgloader(source: Engine, dest: Engine, overwrite=False): """ ) - # Check if docker container exists - - client = docker.from_env() - - _image_exists = client.images.get("pgloader-runner:latest") - - if _image_exists: - app.console.print("pgloader-runner image already exists.") - - if not _image_exists or overwrite: - app.console.print("Building pgloader-runner image.") - client.images.build(dockerfile, tag="pgloader-runner:latest") + run( + "docker", + "build", + "-t", + "pgloader-runner:latest", + "-", + input=dockerfile.encode("utf-8"), + ) header("Running pgloader") diff --git a/cli/macrostrat/cli/database/mariadb/restore.py b/cli/macrostrat/cli/database/mariadb/restore.py index ed396894..5f79393a 100644 --- a/cli/macrostrat/cli/database/mariadb/restore.py +++ b/cli/macrostrat/cli/database/mariadb/restore.py @@ -1,15 +1,16 @@ import asyncio from pathlib import Path -from typing import Optional +from typing import Union from sys import stdin -from subprocess import run from macrostrat.utils import get_logger +from macrostrat.database import database_exists from rich.console import Console -from sqlalchemy.engine import Engine, URL, create_engine +from sqlalchemy.engine import Engine, URL from macrostrat.core.exc import MacrostratError import aiofiles from tempfile import NamedTemporaryFile +from contextlib import contextmanager from .utils import build_connection_args, ParameterStyle from ..utils import docker_internal_url @@ -18,7 +19,6 @@ from ..._dev.utils import ( _create_command, _create_database_if_not_exists, - _docker_local_run_args, ) from ..._dev.stream_utils import ( print_stream_progress, @@ -31,11 +31,11 @@ log = get_logger(__name__) -def restore_mariadb(_input: Optional[str], engine: Engine, *args, **kwargs): +def restore_mariadb(_input: Union[str, Path, None], engine: Engine, *args, **kwargs): """Restore a MariaDB database from a dump file or stream""" if _input is not None: - if _input.startswith("http"): + if str(_input).startswith("http"): raise NotImplementedError("http(s) restore not yet implemented") _input = Path(_input) @@ -103,7 +103,7 @@ async def _restore_mariadb_from_file(dumpfile: Path, engine: Engine, *args, **kw async with aiofiles.open(dumpfile, mode="rb") as source: s1 = DecodingStreamReader(source) await asyncio.gather( - print_stream_progress(s1, proc.stdin), + print_stream_progress(s1, proc.stdin, prefix="Restored"), print_stdout(proc.stderr), ) @@ -154,18 +154,25 @@ def copy_mariadb_database(engine: Engine, new_engine: Engine, *args, **kwargs): overwrite = kwargs.pop("overwrite", False) create = kwargs.pop("create", True) - _create_database_if_not_exists( - new_engine.url, create=create, allow_exists=False, overwrite=overwrite - ) + if database_exists(new_engine.url) and not overwrite: + console.print( + f"Database [bold underline]{new_engine.url.database}[/] already exists. Use --overwrite to overwrite." + ) + return # Get a temporary file to store the dump # Right now this is necessary because we can't properly pipe mysqldump to mariadb - with NamedTemporaryFile(delete=True) as tmp: + with _tempfile(suffix=".sql") as tmp: log.info(f"Copying {engine.url.database} to {new_engine.url.database}") - tmp_file = Path(tmp.name) - log.info(f"Dumping {engine.url.database} to {tmp.name}") - dump_mariadb(engine, tmp_file, *args, container=container) - log.info(f"Restoring {engine.url.database} from {tmp.name}") - restore_mariadb( - str(tmp_file), new_engine, *args, overwrite=overwrite, container=container - ) + log.info(f"Dumping {engine.url.database} to {tmp}") + dump_mariadb(engine, tmp, *args, container=container) + log.info(f"Restoring {engine.url.database} from {tmp}") + restore_mariadb(tmp, new_engine, overwrite=overwrite, container=container) + + +@contextmanager +def _tempfile(suffix: str = ""): + with NamedTemporaryFile(delete=False, suffix=suffix) as tmp: + pth = Path(tmp.name) + yield pth + pth.unlink() From 0ab36784727d3a2d1ebc4d4915ae8c2d7bbfa97c Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Mon, 29 Jul 2024 12:34:25 -0500 Subject: [PATCH 27/48] Fixed MYSQLDump command and deadlock in dump/restore --- .../cli/database/mariadb/restore.py | 30 ++++++++++++------- cli/macrostrat/cli/database/mariadb/utils.py | 5 +--- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/cli/macrostrat/cli/database/mariadb/restore.py b/cli/macrostrat/cli/database/mariadb/restore.py index 5f79393a..81ae0435 100644 --- a/cli/macrostrat/cli/database/mariadb/restore.py +++ b/cli/macrostrat/cli/database/mariadb/restore.py @@ -149,6 +149,12 @@ async def _dump_mariadb_to_file(engine: Engine, dumpfile: Path, *args, **kwargs) def copy_mariadb_database(engine: Engine, new_engine: Engine, *args, **kwargs): + """Copy a MariaDB database to a new database in the same cluster""" + task = _copy_mariadb_database(engine, new_engine, *args, **kwargs) + asyncio.run(task) + + +async def _copy_mariadb_database(engine: Engine, new_engine: Engine, *args, **kwargs): """Copy a MariaDB database to a new database in the same cluster""" container = kwargs.pop("container", "mariadb:10.10") @@ -160,19 +166,21 @@ def copy_mariadb_database(engine: Engine, new_engine: Engine, *args, **kwargs): ) return - # Get a temporary file to store the dump - # Right now this is necessary because we can't properly pipe mysqldump to mariadb - with _tempfile(suffix=".sql") as tmp: - log.info(f"Copying {engine.url.database} to {new_engine.url.database}") - log.info(f"Dumping {engine.url.database} to {tmp}") - dump_mariadb(engine, tmp, *args, container=container) - log.info(f"Restoring {engine.url.database} from {tmp}") - restore_mariadb(tmp, new_engine, overwrite=overwrite, container=container) + dump = await _dump_mariadb(engine, *args, container=container) + restore = await _restore_mariadb( + new_engine, overwrite=overwrite, create=create, container=container + ) + + return await asyncio.gather( + asyncio.create_task(print_stream_progress(dump.stdout, restore.stdin, prefix="Copied")), + asyncio.create_task(print_stdout(dump.stderr)), + asyncio.create_task(print_stdout(restore.stderr)), + ) @contextmanager def _tempfile(suffix: str = ""): - with NamedTemporaryFile(delete=False, suffix=suffix) as tmp: - pth = Path(tmp.name) - yield pth + pth = Path("/tmp/sql-dump.sql") + if pth.is_file(): pth.unlink() + yield pth diff --git a/cli/macrostrat/cli/database/mariadb/utils.py b/cli/macrostrat/cli/database/mariadb/utils.py index 383679c9..7daa0584 100644 --- a/cli/macrostrat/cli/database/mariadb/utils.py +++ b/cli/macrostrat/cli/database/mariadb/utils.py @@ -22,9 +22,6 @@ def build_connection_args( if url.password: args.extend(["-p" + str(url.password)]) - if style == ParameterStyle.MariaDB: - args.extend([url.database]) - elif style == ParameterStyle.MySQLDump: - args.extend(["--databases", url.database]) + args.append(url.database) return args From 1a9fbf3c20a86dc2ef0b226f92b26145b9f59f95 Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Mon, 29 Jul 2024 13:34:06 -0500 Subject: [PATCH 28/48] added find_col_variances() function --- MariaDB Migration to PostgreSQL/utils.py | 176 ++++++++++++----------- 1 file changed, 96 insertions(+), 80 deletions(-) diff --git a/MariaDB Migration to PostgreSQL/utils.py b/MariaDB Migration to PostgreSQL/utils.py index d5f7f0ba..2af7ae16 100644 --- a/MariaDB Migration to PostgreSQL/utils.py +++ b/MariaDB Migration to PostgreSQL/utils.py @@ -75,6 +75,7 @@ def get_data_counts_maria(): row_result = conn.execute(tables_query) maria_tables = [row[0] for row in row_result] for table in maria_tables: + print(table) row_query = text(f"SELECT COUNT(*) FROM {table};") row_result = conn.execute(row_query) row_count = row_result.scalar() @@ -195,30 +196,20 @@ def find_col_variances(database_name_one, schema_one, schema_two, username, pass SQLALCHEMY_DATABASE_URI = f"postgresql://{username}:{password}@{pg_server}/{database_name_one}" engine = create_engine(SQLALCHEMY_DATABASE_URI) insp = inspect(engine) - table_one = [] - table_two = [] - col_not_in_macrostrat_two = [] - with engine.connect() as conn: - for table in tables: - dict = [] - dict_two = [] - table_one = [] - table_two = [] - columns_one = insp.get_columns(table, schema=schema_one) - columns_two = insp.get_columns(table, schema=schema_two) - for index in range(0, len(columns_one)-1): - dict = columns_one[index] - table_one.append(dict['name']) - for index_two in range(0, len(columns_two)-1): - dict_two = columns_two[index_two] - table_two.append(dict_two['name']) - for col in table_one: - if col not in table_two: - col_not_in_macrostrat_two.append(col) - if len(col_not_in_macrostrat_two) > 0: - print(f"Columns that exist in Macrostrat and NOT in macrostrat_two for {table}:", col_not_in_macrostrat_two) - else: - print("Success! All columns in Macrostrat exist in Macrostrat_two") + for table in tables: + columns_one = insp.get_columns(table, schema=schema_one) + columns_two = insp.get_columns(table, schema=schema_two) + + col_names_one = {col['name'] for col in columns_one} + col_names_two = {col['name'] for col in columns_two} + + col_not_in_schema_two = col_names_one - col_names_two + + if col_not_in_schema_two: + print(f"Columns that exist in {schema_one} but NOT in {schema_two} for {table}: {col_not_in_schema_two}") + else: + print(f"All columns in {schema_one} exist in {schema_two} for {table}") + engine.dispose() return @@ -353,60 +344,59 @@ def pg_loader_post_script(): # adding default values for data formats that pgloader accepts #vaccuum...refresh postgresql database after pgloader #CREATE EXTENSION IF NOT EXISTS postgis; - SQLALCHEMY_DATABASE_URI = f"postgresql://{pg_user_migrate}:{pg_pass_migrate}@{pg_server}/{pg_db_name_two}?sslmode=prefer" + SQLALCHEMY_DATABASE_URI = f"postgresql://{pg_user}:{pg_pass_new}@{pg_server}/{pg_db_name}?sslmode=prefer" engine = create_engine(SQLALCHEMY_DATABASE_URI) #connect_args={'options': '-csearch_path=public,macrostrat_temp' - query_pbdb_matches = text(""" - ALTER TABLE macrostrat_two.macrostrat_temp.pbdb_matches ADD COLUMN coordinate geometry(Point, 4326); - UPDATE macrostrat_two.macrostrat_temp.pbdb_matches SET coordinate = ST_GeomFromText(coordinate_point_text, 4326); - ALTER TABLE macrostrat_two.macrostrat_temp.pbdb_matches DROP COLUMN coordinate_point_text; - SELECT * FROM macrostrat_two.macrostrat_temp.pbdb_matches LIMIT 5;""") + query_pbdb_matches = """ + ALTER TABLE macrostrat.macrostrat_temp.pbdb_matches ADD COLUMN coordinate geometry(Point, 4326); + UPDATE macrostrat.macrostrat_temp.pbdb_matches SET coordinate = ST_GeomFromText(coordinate_point_text, 4326); + ALTER TABLE macrostrat.macrostrat_temp.pbdb_matches DROP COLUMN coordinate_point_text; + SELECT * FROM macrostrat.macrostrat_temp.pbdb_matches LIMIT 5;""" - query_places = text(""" - ALTER TABLE macrostrat_two.macrostrat_temp.places ADD COLUMN geom geometry; - UPDATE macrostrat_two.macrostrat_temp.places SET geom = ST_GeomFromText(geom_text, 4326); - ALTER TABLE macrostrat_two.macrostrat_temp.places DROP COLUMN geom_text; - SELECT * FROM macrostrat_two.macrostrat_temp.places LIMIT 5;""") + query_places = """ + ALTER TABLE macrostrat.macrostrat_temp.places ADD COLUMN geom geometry; + UPDATE macrostrat.macrostrat_temp.places SET geom = ST_GeomFromText(geom_text, 4326); + ALTER TABLE macrostrat.macrostrat_temp.places DROP COLUMN geom_text; + SELECT * FROM macrostrat.macrostrat_temp.places LIMIT 5;""" - query_refs = text(""" - ALTER TABLE macrostrat_two.macrostrat_temp.refs ADD COLUMN rgeom geometry; - UPDATE macrostrat_two.macrostrat_temp.refs SET rgeom = ST_GeomFromText(rgeom_text, 4326); - ALTER TABLE macrostrat_two.macrostrat_temp.refs DROP COLUMN rgeom_text; - SELECT * FROM macrostrat_two.macrostrat_temp.refs LIMIT 5;""") + query_refs = """ + ALTER TABLE macrostrat.macrostrat_temp.refs ADD COLUMN rgeom geometry; + UPDATE macrostrat.macrostrat_temp.refs SET rgeom = ST_GeomFromText(rgeom_text, 4326); + ALTER TABLE macrostrat.macrostrat_temp.refs DROP COLUMN rgeom_text; + SELECT * FROM macrostrat.macrostrat_temp.refs LIMIT 5;""" - query_cols = text(""" - ALTER TABLE macrostrat_two.macrostrat_temp.cols ADD COLUMN coordinate geometry; - UPDATE macrostrat_two.macrostrat_temp.cols SET coordinate = ST_GeomFromText(coordinate_text, 4326); - ALTER TABLE macrostrat_two.macrostrat_temp.cols DROP COLUMN coordinate_text; - SELECT * FROM macrostrat_two.macrostrat_temp.cols LIMIT 5;""") + query_cols = """ + ALTER TABLE macrostrat.macrostrat_temp.cols ADD COLUMN coordinate geometry; + UPDATE macrostrat.macrostrat_temp.cols SET coordinate = ST_GeomFromText(coordinate_text, 4326); + ALTER TABLE macrostrat.macrostrat_temp.cols DROP COLUMN coordinate_text; + SELECT * FROM macrostrat.macrostrat_temp.cols LIMIT 5;""" - query_col_areas = text(""" - ALTER TABLE macrostrat_two.macrostrat_temp.col_areas ADD COLUMN col_area geometry; - UPDATE macrostrat_two.macrostrat_temp.col_areas SET col_area = ST_GeomFromText(col_area_text, 4326); - ALTER TABLE macrostrat_two.macrostrat_temp.col_areas DROP COLUMN col_area_text; - SELECT * FROM macrostrat_two.macrostrat_temp.col_areas LIMIT 5;""") + query_col_areas = """ + ALTER TABLE macrostrat.macrostrat_temp.col_areas ADD COLUMN col_area geometry; + UPDATE macrostrat.macrostrat_temp.col_areas SET col_area = ST_GeomFromText(col_area_text, 4326); + ALTER TABLE macrostrat.macrostrat_temp.col_areas DROP COLUMN col_area_text; + SELECT * FROM macrostrat.macrostrat_temp.col_areas LIMIT 5;""" - query_col_areas_6April2016 = text(""" - ALTER TABLE macrostrat_two.macrostrat_temp.col_areas_6April2016 ADD COLUMN col_area geometry; - UPDATE macrostrat_two.macrostrat_temp.col_areas_6April2016 SET col_area = ST_GeomFromText(col_area_text, 4326); - ALTER TABLE macrostrat_two.macrostrat_temp.col_areas_6April2016 DROP COLUMN col_area_text; - SELECT * FROM macrostrat_two.macrostrat_temp.col_areas_6April2016 LIMIT 5;""") + query_col_areas_6April2016 = """ + ALTER TABLE macrostrat.macrostrat_temp.col_areas_6April2016 ADD COLUMN col_area geometry; + UPDATE macrostrat.macrostrat_temp.col_areas_6April2016 SET col_area = ST_GeomFromText(col_area_text, 4326); + ALTER TABLE macrostrat.macrostrat_temp.col_areas_6April2016 DROP COLUMN col_area_text; + SELECT * FROM macrostrat.macrostrat_temp.col_areas_6April2016 LIMIT 5;""" post_script_queries = [query_pbdb_matches, query_refs, query_cols, query_places, query_col_areas, query_col_areas_6April2016] print('Starting PostScript execution....') with engine.connect() as conn: for query in post_script_queries: - try: - result = conn.execute(query.execution_options(autocommit=True)) - for row in result: - print(row) - except SQLAlchemyError as e: - print(f"Error: {e}") - #rollback the transaction if an error occurs - conn.execute(text("ROLLBACK;")) - + statements = query.split(';') + for statement in statements: + if statement.strip(): + try: + conn.execute(text(statement)) + print(f"Successfully executed: {statement}") + except Exception as e: + print(f"Error with statement: {statement}\n{e}") engine.dispose() return @@ -455,9 +445,9 @@ def reset(): if __name__ == "__main__": #maria_dump(maria_server, maria_super_user, maria_super_pass, maria_db_name) #maria_restore(maria_server, maria_super_user, maria_super_pass, maria_db_name_two) - #pg_loader_pre_script() + pg_loader_pre_script() #pg_loader() - #pg_loader_post_script() + pg_loader_post_script() #maria_rows, maria_columns = get_data_counts_maria() #pg_rows, pg_columns = get_data_counts_pg(pg_db_name, pg_user, pg_pass_new, 'macrostrat') #pg_macrostrat_two_rows, pg_macrostrat_two_columns = get_data_counts_pg(pg_db_name_two, pg_user_migrate, pg_pass_migrate, 'macrostrat_temp') @@ -474,7 +464,7 @@ def reset(): #row_variance_two, column_variance_two = compare_data_counts(pg_macrostrat_two_rows, pg_rows, pg_macrostrat_two_columns, # pg_columns, db1, db2) #reset() - pg_restore(pg_server, pg_user, pg_pass_new, pg_db_name, maria_db_name_two) + #pg_restore(pg_server, pg_user, pg_pass_new, pg_db_name, maria_db_name_two) table_rows = [ "sections", @@ -505,19 +495,45 @@ def reset(): "refs" ] - table_cols = ['sections', - 'strat_names', - 'strat_tree', - 'units', - 'unit_strat_names', - 'unit_environs', - 'cols', - 'environs', - 'lith_atts', - 'lookup_strat_names', - 'lookup_unit_intervals', - 'measuremeta', - 'measures'] + table_cols = ['col_refs', + 'lookup_unit_attrs_api', + 'lookup_unit_intervals', + 'strat_names_meta', + 'sections', + 'unit_econs', + 'lookup_strat_names', + 'measures', + 'projects', + 'timescales', + 'strat_tree', + 'refs', + 'unit_liths', + 'lookup_units', + 'measurements', + 'units', + 'autocomplete', + 'col_areas', + 'unit_strat_names', + 'unit_environs', + 'cols', + 'intervals', + 'lith_atts', + 'timescales_intervals', + 'unit_boundaries', + 'econs', + 'environs', + 'units_sections', + 'unit_measures', + 'strat_names', + 'lookup_unit_liths', + 'liths', + 'concepts_places', + 'strat_names_places', + 'col_groups', + 'measuremeta', + 'places'] + + print(len(table_cols)) #results = find_row_variances(pg_db_name, pg_db_name, maria_db_name_two, pg_user, pg_pass_new, table_rows) From 5fc263694d89476e50dfa68bc2c62e11f8276fa8 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Mon, 29 Jul 2024 13:57:18 -0500 Subject: [PATCH 29/48] Updated some formatting --- cli/macrostrat/cli/database/mariadb/restore.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cli/macrostrat/cli/database/mariadb/restore.py b/cli/macrostrat/cli/database/mariadb/restore.py index 81ae0435..ee8441bf 100644 --- a/cli/macrostrat/cli/database/mariadb/restore.py +++ b/cli/macrostrat/cli/database/mariadb/restore.py @@ -172,7 +172,9 @@ async def _copy_mariadb_database(engine: Engine, new_engine: Engine, *args, **kw ) return await asyncio.gather( - asyncio.create_task(print_stream_progress(dump.stdout, restore.stdin, prefix="Copied")), + asyncio.create_task( + print_stream_progress(dump.stdout, restore.stdin, prefix="Copied") + ), asyncio.create_task(print_stdout(dump.stderr)), asyncio.create_task(print_stdout(restore.stderr)), ) From 4fdc5345fb35ad5e3820fdff688dba54e0f6521b Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Mon, 29 Jul 2024 15:48:44 -0500 Subject: [PATCH 30/48] Streamlined migration scripts and renamed files --- .../cli/database/mariadb/__init__.py | 39 +++++------------- .../mariadb/postgresql_migration/__init__.py | 40 +++++++++---------- .../mariadb/postgresql_migration/_legacy.py | 40 ------------------- cli/macrostrat/cli/database/mariadb/utils.py | 13 +++++- 4 files changed, 42 insertions(+), 90 deletions(-) delete mode 100644 cli/macrostrat/cli/database/mariadb/postgresql_migration/_legacy.py diff --git a/cli/macrostrat/cli/database/mariadb/__init__.py b/cli/macrostrat/cli/database/mariadb/__init__.py index 9223e53b..bc26e499 100644 --- a/cli/macrostrat/cli/database/mariadb/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/__init__.py @@ -1,14 +1,15 @@ -from typer import Typer, Context, Argument +from pathlib import Path from sys import stdin + from macrostrat.utils.shell import run -from .._legacy import get_db -from sqlalchemy.engine import create_engine -from sqlalchemy.engine.url import URL, make_url -from pathlib import Path +from sqlalchemy.engine.url import URL +from typer import Typer, Context, Argument -from ..utils import docker_internal_url -from .utils import build_connection_args from .restore import restore_mariadb, dump_mariadb +from .utils import build_connection_args, mariadb_engine +from ..utils import docker_internal_url +from .postgresql_migration import migrate_mariadb_to_postgresql + app = Typer(no_args_is_help=True) @@ -57,7 +58,7 @@ def dump_command( database: str = Argument(None, help="Database to dump"), ): """Dump a MariaDB database to a file.""" - engine = mysql_engine(database) + engine = mariadb_engine(database) if output is None: output = Path("/dev/stdout") @@ -74,7 +75,7 @@ def restore_command( overwrite: bool = False, ): """Restore a MariaDB database from a dump file or stream.""" - engine = mysql_engine(database) + engine = mariadb_engine(database) restore_mariadb( input, @@ -85,22 +86,4 @@ def restore_command( ) -def mysql_engine(database: str = None): - from macrostrat.core.config import mysql_database - - _database: URL = make_url(mysql_database) - _database = _database.set(drivername="mysql+pymysql") - if database is not None: - _database = _database.set(database=database) - return create_engine(_database) - - -@app.command("migrate-to-postgres") -def migrate_to_postgres_command(overwrite: bool = False): - """Import legacy MariaDB database to PostgreSQL using pgloader""" - from .postgresql_migration import migrate_mariadb_to_postgresql - - engine = mysql_engine() - dest = get_db().engine - - migrate_mariadb_to_postgresql(engine, dest, overwrite=overwrite) +app.command("migrate-to-postgres")(migrate_mariadb_to_postgresql) diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index a06a7c59..d2ba8053 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -1,36 +1,34 @@ -from io import StringIO - -import docker -from sqlalchemy import text, create_engine -import os -from macrostrat.database.utils import run_sql -from macrostrat.app_frame.exc import ApplicationError from pathlib import Path -from sqlalchemy.engine import Engine +from textwrap import dedent + from macrostrat.database import database_exists, create_database, drop_database -from ..restore import copy_mariadb_database -from ...._dev.utils import raw_database_url -from ...utils import engine_for_db_name, docker_internal_url -from ..._legacy import get_db +from macrostrat.database.utils import run_sql from macrostrat.utils import get_logger from macrostrat.utils.shell import run -from macrostrat.core import app -from textwrap import dedent -import docker -from io import BytesIO +from sqlalchemy import text, create_engine +from sqlalchemy.engine import Engine -import time +from macrostrat.core import app from .db_changes import get_data_counts_maria, get_data_counts_pg, compare_data_counts +from ..restore import copy_mariadb_database +from ..utils import mariadb_engine +from ..._legacy import get_db +from ...utils import docker_internal_url +from ...._dev.utils import raw_database_url __here__ = Path(__file__).parent log = get_logger(__name__) -def migrate_mariadb_to_postgresql( - maria_engine: Engine, pg_engine: Engine, overwrite: bool = False -): - """Migrate the entire Macrostrat database from MariaDB to PostgreSQL.""" +def migrate_mariadb_to_postgresql(overwrite: bool = False): + """Migrate the legacy Macrostrat database from MariaDB to PostgreSQL.""" + + # Get the default MariaDB and PostgreSQL engines from the Macrostrat app's + # configuration (macrostrat.toml). + maria_engine = mariadb_engine() + pg_engine = get_db().engine + temp_db_name = maria_engine.url.database + "_temp" maria_temp_engine = create_engine(maria_engine.url.set(database=temp_db_name)) diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/_legacy.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/_legacy.py deleted file mode 100644 index c1e1ca3f..00000000 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/_legacy.py +++ /dev/null @@ -1,40 +0,0 @@ -from macrostrat.utils.shell import run -from cli.macrostrat.cli.database._legacy import get_db - -from macrostrat.core import app - -from cli.macrostrat.cli._dev.utils import ( - _create_database_if_not_exists, - _docker_local_run_args, -) - - -def import_mariadb(target_database="macrostrat_from_mariadb"): - """Import legacy MariaDB database to PostgreSQL using pgloader""" - # Run pgloader in docker - - cfg = app.settings - - args = _docker_local_run_args(postgres_container="dimitri/pgloader") - - # Get the database URL - db = get_db() - url = db.engine.url - url = url.set(database=target_database) - - _create_database_if_not_exists(url, create=True) - - pg_url = str(url) - - dburl = cfg.get("mysql_database", None) - if dburl is None: - raise Exception("No MariaDB database URL available in configuration") - - run( - *args, - "pgloader", - "--with", - "prefetch rows = 1000", - str(dburl), - str(pg_url), - ) diff --git a/cli/macrostrat/cli/database/mariadb/utils.py b/cli/macrostrat/cli/database/mariadb/utils.py index 7daa0584..40bd2e14 100644 --- a/cli/macrostrat/cli/database/mariadb/utils.py +++ b/cli/macrostrat/cli/database/mariadb/utils.py @@ -1,4 +1,5 @@ -from sqlalchemy.engine.url import URL +from sqlalchemy.engine.url import URL, make_url +from sqlalchemy.engine import create_engine from enum import Enum @@ -25,3 +26,13 @@ def build_connection_args( args.append(url.database) return args + + +def mariadb_engine(database: str = None): + from macrostrat.core.config import mysql_database + + _database: URL = make_url(mysql_database) + _database = _database.set(drivername="mysql+pymysql") + if database is not None: + _database = _database.set(database=database) + return create_engine(_database) From 795395c5af6da2f48d780376134f16220a67318b Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Mon, 29 Jul 2024 16:49:26 -0500 Subject: [PATCH 31/48] Added the ability to run different steps of the migration process --- .../mariadb/postgresql_migration/__init__.py | 51 +++++++++++++------ 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index d2ba8053..4948a715 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -20,35 +20,56 @@ log = get_logger(__name__) +from enum import Enum -def migrate_mariadb_to_postgresql(overwrite: bool = False): + +class MariaDBMigrationStep(Enum): + COPY_MARIADB = "copy-mariadb" + PGLOADER = "pgloader" + CHECK_DATA = "check-data" + FINALIZE = "finalize" + + +_all_steps = { + MariaDBMigrationStep.COPY_MARIADB, + MariaDBMigrationStep.PGLOADER, + MariaDBMigrationStep.CHECK_DATA, + MariaDBMigrationStep.FINALIZE, +} + + +def migrate_mariadb_to_postgresql( + overwrite: bool = False, step: list[MariaDBMigrationStep] = None +): """Migrate the legacy Macrostrat database from MariaDB to PostgreSQL.""" # Get the default MariaDB and PostgreSQL engines from the Macrostrat app's # configuration (macrostrat.toml). maria_engine = mariadb_engine() pg_engine = get_db().engine - temp_db_name = maria_engine.url.database + "_temp" - maria_temp_engine = create_engine(maria_engine.url.set(database=temp_db_name)) - - if database_exists(maria_temp_engine.url) and not overwrite: - header( - "Database [bold cyan]macrostrat_temp[/] already exists. Use --overwrite to overwrite." - ) - else: - copy_mariadb_database(maria_engine, maria_temp_engine, overwrite=overwrite) - pg_temp_engine = create_engine(pg_engine.url.set(database=temp_db_name)) - pgloader_pre_script(maria_temp_engine) + steps: set[MariaDBMigrationStep] = _all_steps + if step is not None and len(step) > 0: + steps = set(step) + + if MariaDBMigrationStep.COPY_MARIADB in steps: + copy_mariadb_database(maria_engine, maria_temp_engine, overwrite=overwrite) - pgloader(maria_temp_engine, pg_temp_engine, overwrite=overwrite) + if MariaDBMigrationStep.PGLOADER in steps: + pgloader_pre_script(maria_temp_engine) + pgloader(maria_temp_engine, pg_temp_engine, overwrite=overwrite) + pgloader_post_script(pg_temp_engine) - pgloader_post_script(pg_temp_engine) + if MariaDBMigrationStep.CHECK_DATA in steps: + should_proceed = compare_row_counts(maria_engine, pg_temp_engine, pg_engine) + if not should_proceed: + raise ValueError("Data comparison failed. Aborting migration.") - compare_row_counts(maria_engine, pg_temp_engine, pg_engine) + if MariaDBMigrationStep.FINALIZE in steps: + raise NotImplementedError("Copy to Macrostrat database not yet implemented") def pgloader_pre_script(engine: Engine): From 506402998d63cd2645272531090dc83e8656f0db Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Wed, 31 Jul 2024 10:17:38 -0500 Subject: [PATCH 32/48] accommodated code for port missing in macrostrat.toml. also added ssl mode parameters for pgloader command to work --- .idea/inspectionProfiles/Project_Default.xml | 69 +++++++++++++++++++ .../mariadb/postgresql_migration/__init__.py | 3 +- cli/macrostrat/cli/database/mariadb/utils.py | 6 +- 3 files changed, 75 insertions(+), 3 deletions(-) create mode 100644 .idea/inspectionProfiles/Project_Default.xml diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 00000000..00bf5117 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,69 @@ + + + + \ No newline at end of file diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index 4948a715..ea446b68 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -152,7 +152,8 @@ def pgloader(source: Engine, dest: Engine, overwrite=False): "prefetch rows = 1000", "--verbose", raw_database_url(docker_internal_url(source_url)), - raw_database_url(docker_internal_url(dest.url)), + raw_database_url(docker_internal_url(dest.url))+"?sslmode=prefer", + ) diff --git a/cli/macrostrat/cli/database/mariadb/utils.py b/cli/macrostrat/cli/database/mariadb/utils.py index 40bd2e14..ba089fa0 100644 --- a/cli/macrostrat/cli/database/mariadb/utils.py +++ b/cli/macrostrat/cli/database/mariadb/utils.py @@ -12,14 +12,16 @@ def build_connection_args( url: URL, style: ParameterStyle = ParameterStyle.MariaDB ) -> [str]: """Build MariaDB connection arguments from a SQLAlchemy URL.""" + args = [ "-h", url.host, - "-P", - str(url.port), "-u", url.username, ] + + if url.port: + args.extend(["-P" + str(url.port)]) if url.password: args.extend(["-p" + str(url.password)]) From 5152d493c249e2e1a0b3a3af1b2bd809fb8a54d2 Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Wed, 31 Jul 2024 15:28:36 -0500 Subject: [PATCH 33/48] updated pgloader code to use new pg_temp_engine url and creds --- .../cli/database/mariadb/postgresql_migration/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index ea446b68..0a2649a2 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -7,7 +7,7 @@ from macrostrat.utils.shell import run from sqlalchemy import text, create_engine from sqlalchemy.engine import Engine - +from macrostrat.core.config import settings from macrostrat.core import app from .db_changes import get_data_counts_maria, get_data_counts_pg, compare_data_counts from ..restore import copy_mariadb_database @@ -49,8 +49,8 @@ def migrate_mariadb_to_postgresql( pg_engine = get_db().engine temp_db_name = maria_engine.url.database + "_temp" maria_temp_engine = create_engine(maria_engine.url.set(database=temp_db_name)) - pg_temp_engine = create_engine(pg_engine.url.set(database=temp_db_name)) - + #pg_temp_engine = create_engine(pg_engine.url.set(database=temp_db_name)) + pg_temp_engine = create_engine(settings.pgloader_target_database) steps: set[MariaDBMigrationStep] = _all_steps if step is not None and len(step) > 0: steps = set(step) From 02d71128c7cfeb3e3f030fe311ecff2a41fb9c5f Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Thu, 1 Aug 2024 09:52:54 -0500 Subject: [PATCH 34/48] Added most up-to-date find_row_variances() and find_col_variances --- .../postgresql_migration/db_changes.py | 63 ++++++++++++++----- 1 file changed, 47 insertions(+), 16 deletions(-) diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py index 9696b7e9..1f3b8a19 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py @@ -179,30 +179,61 @@ def success(message): def find_row_variances( - database_name_one, - schema_one, - database_name_two, - schema_two, - username, - password, - table, + database_name_one, + schema_one, + schema_two, + username, + password, + tables ): SQLALCHEMY_DATABASE_URI = ( f"postgresql://{username}:{password}@{pg_server}/{database_name_one}" ) engine = create_engine(SQLALCHEMY_DATABASE_URI) + insp = inspect(engine) with engine.connect() as conn: - query = text(f"SELECT * FROM {schema_one}.{table}") - result = conn.execute(query) - df = pd.DataFrame(result) + for table in tables: + # Get the actual first column name for each table + columns = insp.get_columns(table, schema=schema_one) + first_column_name = columns[0]['name'] + query = f""" + SELECT m.{first_column_name} + FROM macrostrat.macrostrat.{table} m + RIGHT JOIN macrostrat.macrostrat_temp.{table} t ON m.{first_column_name} = t.{first_column_name} + WHERE t.{first_column_name} IS NULL; + """ + result_df = pd.read_sql_query(query, engine) + print(f"Macrostrat rows not in Macrostrat_two rows for table {table}:") + print(result_df) engine.dispose() + return + +def find_col_variances( + database_name_one, + schema_one, + schema_two, + username, + password, + tables +): SQLALCHEMY_DATABASE_URI = ( - f"postgresql://{username}:{password}@{pg_server}/{database_name_two}" + f"postgresql://{username}:{password}@{pg_server}/{database_name_one}" ) engine = create_engine(SQLALCHEMY_DATABASE_URI) - with engine.connect() as conn: - query = text(f"SELECT * FROM {schema_two}.{table}") - result = conn.execute(query) - df_two = pd.DataFrame(result) + insp = inspect(engine) + for table in tables: + columns_one = insp.get_columns(table, schema=schema_one) + columns_two = insp.get_columns(table, schema=schema_two) + + col_names_one = {col['name'] for col in columns_one} + col_names_two = {col['name'] for col in columns_two} + + col_not_in_schema_two = col_names_one - col_names_two + + if col_not_in_schema_two: + print(f"Columns that exist in {schema_one} but NOT in {schema_two} for {table}: {col_not_in_schema_two}") + else: + print(f"All columns in {schema_one} exist in {schema_two} for {table}") + engine.dispose() - return df, df_two + return From 6f3774f44d3704e74c66e61348598891d744e449 Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Thu, 1 Aug 2024 15:07:04 -0500 Subject: [PATCH 35/48] Got find_row_variances() and find_col_variances to function! --- .../mariadb/postgresql_migration/__init__.py | 56 ++++++++++++++++++- .../postgresql_migration/db_changes.py | 40 ++++++------- 2 files changed, 69 insertions(+), 27 deletions(-) diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index 0a2649a2..f93aeeef 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -5,11 +5,11 @@ from macrostrat.database.utils import run_sql from macrostrat.utils import get_logger from macrostrat.utils.shell import run -from sqlalchemy import text, create_engine +from sqlalchemy import text, create_engine, inspect from sqlalchemy.engine import Engine from macrostrat.core.config import settings from macrostrat.core import app -from .db_changes import get_data_counts_maria, get_data_counts_pg, compare_data_counts +from .db_changes import get_data_counts_maria, get_data_counts_pg, compare_data_counts, find_row_variances, find_col_variances from ..restore import copy_mariadb_database from ..utils import mariadb_engine from ..._legacy import get_db @@ -50,7 +50,8 @@ def migrate_mariadb_to_postgresql( temp_db_name = maria_engine.url.database + "_temp" maria_temp_engine = create_engine(maria_engine.url.set(database=temp_db_name)) #pg_temp_engine = create_engine(pg_engine.url.set(database=temp_db_name)) - pg_temp_engine = create_engine(settings.pgloader_target_database) + pg_temp_engine = create_engine(pg_engine.url.set(database=temp_db_name)) + steps: set[MariaDBMigrationStep] = _all_steps if step is not None and len(step) > 0: steps = set(step) @@ -65,6 +66,7 @@ def migrate_mariadb_to_postgresql( if MariaDBMigrationStep.CHECK_DATA in steps: should_proceed = compare_row_counts(maria_engine, pg_temp_engine, pg_engine) + find_row_col_variances(pg_engine) if not should_proceed: raise ValueError("Data comparison failed. Aborting migration.") @@ -197,6 +199,54 @@ def compare_row_counts(maria: Engine, pg_temp: Engine, pg_final: Engine): # df, df_two = find_row_variances(pg_db_name, pg_db_name, pg_db_name_two, maria_db_name_two, # pg_user, pg_pass_new, 'cols') +def find_row_col_variances(pg_engine: Engine): + tables = ['col_refs', + 'lookup_unit_attrs_api', + 'lookup_unit_intervals', + 'strat_names_meta', + 'sections', + 'unit_econs', + 'lookup_strat_names', + 'measures', + 'projects', + 'timescales', + 'strat_tree', + 'refs', + 'unit_liths', + 'lookup_units', + 'measurements', + 'units', + 'autocomplete', + 'col_areas', + 'unit_strat_names', + 'unit_environs', + 'cols', + 'intervals', + 'lith_atts', + 'timescales_intervals', + 'unit_boundaries', + 'econs', + 'environs', + 'units_sections', + 'unit_measures', + 'strat_names', + 'lookup_unit_liths', + 'liths', + 'concepts_places', + 'strat_names_places', + 'col_groups', + 'measuremeta', + 'places'] + find_row_variances( + pg_engine.url.database, pg_engine.url.database, "macrostrat_temp", pg_engine.url.username, pg_engine.url.password, tables, + pg_engine + ) + find_col_variances( + pg_engine.url.database, pg_engine.url.database, "macrostrat_temp", pg_engine.url.username, pg_engine.url.password, tables, + pg_engine + ) + + def db_identifier(engine: Engine): driver = engine.url.drivername diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py index 1f3b8a19..2c351d0d 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py @@ -5,7 +5,7 @@ import pandas as pd from macrostrat.database import run_query from psycopg2.sql import Identifier -from sqlalchemy import create_engine, text +from sqlalchemy import create_engine, text, inspect from sqlalchemy.engine import Engine from macrostrat.core import app @@ -184,28 +184,26 @@ def find_row_variances( schema_two, username, password, - tables + tables, + pg_engine ): - SQLALCHEMY_DATABASE_URI = ( - f"postgresql://{username}:{password}@{pg_server}/{database_name_one}" - ) - engine = create_engine(SQLALCHEMY_DATABASE_URI) - insp = inspect(engine) - with engine.connect() as conn: + insp = inspect(pg_engine) + count = 0 + with pg_engine.connect() as conn: for table in tables: # Get the actual first column name for each table columns = insp.get_columns(table, schema=schema_one) first_column_name = columns[0]['name'] query = f""" - SELECT m.{first_column_name} + SELECT COUNT(m.{first_column_name}) FROM macrostrat.macrostrat.{table} m RIGHT JOIN macrostrat.macrostrat_temp.{table} t ON m.{first_column_name} = t.{first_column_name} WHERE t.{first_column_name} IS NULL; """ - result_df = pd.read_sql_query(query, engine) - print(f"Macrostrat rows not in Macrostrat_two rows for table {table}:") - print(result_df) - engine.dispose() + result = conn.execute(text(query)) + for row in result: + print(row[0], table) + pg_engine.dispose() return def find_col_variances( @@ -214,26 +212,20 @@ def find_col_variances( schema_two, username, password, - tables + tables, + pg_engine ): - SQLALCHEMY_DATABASE_URI = ( - f"postgresql://{username}:{password}@{pg_server}/{database_name_one}" - ) - engine = create_engine(SQLALCHEMY_DATABASE_URI) - insp = inspect(engine) + + insp = inspect(pg_engine) for table in tables: columns_one = insp.get_columns(table, schema=schema_one) columns_two = insp.get_columns(table, schema=schema_two) - col_names_one = {col['name'] for col in columns_one} col_names_two = {col['name'] for col in columns_two} - col_not_in_schema_two = col_names_one - col_names_two - if col_not_in_schema_two: print(f"Columns that exist in {schema_one} but NOT in {schema_two} for {table}: {col_not_in_schema_two}") else: print(f"All columns in {schema_one} exist in {schema_two} for {table}") - - engine.dispose() + pg_engine.dispose() return From 15d7a90070fa5e0e8dd2cd33cad1f69f67d7d899 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Thu, 1 Aug 2024 17:14:55 -0500 Subject: [PATCH 36/48] Created utility function we might use to run PGLoader --- cli/macrostrat/cli/database/utils.py | 37 ++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/cli/macrostrat/cli/database/utils.py b/cli/macrostrat/cli/database/utils.py index 2c444428..c78b5e9f 100644 --- a/cli/macrostrat/cli/database/utils.py +++ b/cli/macrostrat/cli/database/utils.py @@ -2,6 +2,11 @@ from sqlalchemy.engine import create_engine from sqlalchemy.engine.url import URL, make_url from macrostrat.core.config import settings +from sqlalchemy.engine import Engine +from macrostrat.database.utils import run_sql +from psycopg2.sql import Identifier +from contextlib import contextmanager +from uuid import uuid4 def engine_for_db_name(name: str | None): @@ -18,3 +23,35 @@ def docker_internal_url(url: URL | str) -> URL: docker_localhost = getattr(settings, "docker_localhost", "localhost") url = url.set(host=docker_localhost) return url + + +@contextmanager +def pg_temp_engine( + pg_engine: Engine, username: str, password: str = None, schemas: list[str] = None +): + """Create a temporary login user for a PostgreSQL database with a limited set of permissions.""" + if password is None: + password = str(uuid4().hex) + + run_sql( + "CREATE USER {username} WITH PASSWORD {password}", + dict(username=Identifier(username), password=password), + ) + + # Create a new database engine that uses the new user + url = pg_engine.url.set(username=username, password=password) + + temp_engine = create_engine(url) + + if schemas is not None: + for schema in schemas: + run_sql( + "CREATE SCHEMA IF NOT EXISTS {schema}", + dict(schema=Identifier(schema)), + engine=temp_engine, + ) + + yield temp_engine + + # Clean up + run_sql("DROP USER {username}", dict(username=Identifier(username))) From 88c01915555d5a8b5cb9c3dc9a6271344b133700 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Fri, 2 Aug 2024 02:12:03 -0500 Subject: [PATCH 37/48] Updated utility functions for creating temporary database users --- .../mariadb/postgresql_migration/__init__.py | 238 +++++++++++------- cli/macrostrat/cli/database/utils.py | 127 ++++++++-- 2 files changed, 252 insertions(+), 113 deletions(-) diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index f93aeeef..7d47d9b0 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -2,18 +2,25 @@ from textwrap import dedent from macrostrat.database import database_exists, create_database, drop_database -from macrostrat.database.utils import run_sql +from macrostrat.database.utils import run_sql, run_query from macrostrat.utils import get_logger from macrostrat.utils.shell import run from sqlalchemy import text, create_engine, inspect from sqlalchemy.engine import Engine from macrostrat.core.config import settings from macrostrat.core import app -from .db_changes import get_data_counts_maria, get_data_counts_pg, compare_data_counts, find_row_variances, find_col_variances +from .db_changes import ( + get_data_counts_maria, + get_data_counts_pg, + compare_data_counts, + find_row_variances, + find_col_variances, +) +from psycopg2.sql import Identifier from ..restore import copy_mariadb_database from ..utils import mariadb_engine from ..._legacy import get_db -from ...utils import docker_internal_url +from ...utils import docker_internal_url, pg_temp_user from ...._dev.utils import raw_database_url __here__ = Path(__file__).parent @@ -47,10 +54,11 @@ def migrate_mariadb_to_postgresql( # configuration (macrostrat.toml). maria_engine = mariadb_engine() pg_engine = get_db().engine - temp_db_name = maria_engine.url.database + "_temp" + temp_db_name = "macrostrat_temp" maria_temp_engine = create_engine(maria_engine.url.set(database=temp_db_name)) - #pg_temp_engine = create_engine(pg_engine.url.set(database=temp_db_name)) - pg_temp_engine = create_engine(pg_engine.url.set(database=temp_db_name)) + + # Final schema in the PostgreSQL database + schema = temp_db_name steps: set[MariaDBMigrationStep] = _all_steps if step is not None and len(step) > 0: @@ -60,18 +68,65 @@ def migrate_mariadb_to_postgresql( copy_mariadb_database(maria_engine, maria_temp_engine, overwrite=overwrite) if MariaDBMigrationStep.PGLOADER in steps: - pgloader_pre_script(maria_temp_engine) - pgloader(maria_temp_engine, pg_temp_engine, overwrite=overwrite) - pgloader_post_script(pg_temp_engine) + pgloader(maria_temp_engine, pg_engine, schema, overwrite=overwrite) if MariaDBMigrationStep.CHECK_DATA in steps: - should_proceed = compare_row_counts(maria_engine, pg_temp_engine, pg_engine) + # NOTE: the temp schema and the final schema must be provided + should_proceed = compare_row_counts( + maria_engine, pg_engine, temp_schema, final_schema + ) + # TODO: integrate this with the previous function find_row_col_variances(pg_engine) if not should_proceed: raise ValueError("Data comparison failed. Aborting migration.") if MariaDBMigrationStep.FINALIZE in steps: - raise NotImplementedError("Copy to Macrostrat database not yet implemented") + raise NotImplementedError("Copy to macrostrat schema not yet implemented") + + +def pgloader(source: Engine, dest: Engine, target_schema: str, overwrite: bool = False): + _build_pgloader() + + if target_schema != source.url.database: + raise ValueError( + "The target schema must be the same as the source database name" + ) + + pgloader_pre_script(source) + _schema = Identifier(target_schema) + + if overwrite: + run_sql( + dest, + """ + DROP SCHEMA IF EXISTS {schema} CASCADE; + CREATE SCHEMA {schema}; + """, + dict(schema=_schema), + ) + + username = "maria_migrate" + with pg_temp_user(dest, username, overwrite=overwrite) as pg_temp: + # Create a temporary user that PGLoader can use to connect to the PostgreSQL database + # and create the temporary schema. + run_sql( + dest, + "GRANT ALL PRIVILEGES ON SCHEMA {schema} TO {user}", + dict( + schema=_schema, + user=Identifier(username), + ), + ) + _run_pgloader(source, pg_temp) + pgloader_post_script(pg_temp) + + +def schema_exists(engine: Engine, schema: str): + return run_query( + engine, + "SELECT 1 FROM information_schema.schemata WHERE schema_name = :schema_name", + dict(schema=schema), + ).scalar() def pgloader_pre_script(engine: Engine): @@ -80,18 +135,6 @@ def pgloader_pre_script(engine: Engine): run_sql(engine, pre_script) -""" - #create db, create temp user before pgloader - URL = f"postgresql://{pg_user}:{pg_pass_new}@{pg_server}/{pg_db_name}" - pg_engine = create_engine(URL) - with pg_engine.connect() as conn: - conn.execute(text(f"CREATE DATABASE {pg_db_name_two}")) - conn.execute(text(f"DROP USER IF EXISTS {pg_user_maria_temp};")) - conn.execute(text(f"CREATE USER maria_migrate WITH PASSWORD '{pg_pass_maria_temp}'")) - conn.execute(text(f"GRANT CONNECT ON DATABASE {pg_db_name_two} TO {pg_user_maria_temp};")) - pg_engine.dispose()""" - - def pgloader_post_script(engine: Engine): app.console.print("\n[bold]Running post-migration script[/]") assert engine.url.drivername.startswith("postgres") @@ -99,45 +142,15 @@ def pgloader_post_script(engine: Engine): run_sql(engine, post_script) -def pgloader(source: Engine, dest: Engine, overwrite=False): +def _run_pgloader(source: Engine, dest: Engine): """ Command terminal to run pgloader. Ensure Docker app is running. """ db_exists = database_exists(dest.url) - - if db_exists: - if overwrite: - header("Dropping PostgreSQL database") - drop_database(dest.url) - db_exists = False - else: - header( - f"PostgreSQL database [bold cyan]{dest.url.database}[/] already exists. Skipping pgloader." - ) - return - if not db_exists: header("Creating PostgreSQL database") create_database(dest.url) - header("Building pgloader-runner Docker image") - - dockerfile = dedent( - """FROM dimitri/pgloader:latest - RUN apt-get update && apt-get install -y postgresql-client ca-certificates && rm -rf /var/lib/apt/lists/* - ENTRYPOINT ["pgloader"] - """ - ) - - run( - "docker", - "build", - "-t", - "pgloader-runner:latest", - "-", - input=dockerfile.encode("utf-8"), - ) - header("Running pgloader") # PyMySQL is not installed in the pgloader image, so we need to use the mysql client @@ -154,8 +167,27 @@ def pgloader(source: Engine, dest: Engine, overwrite=False): "prefetch rows = 1000", "--verbose", raw_database_url(docker_internal_url(source_url)), - raw_database_url(docker_internal_url(dest.url))+"?sslmode=prefer", + raw_database_url(docker_internal_url(dest.url)) + "?sslmode=prefer", + ) + +def _build_pgloader(): + header("Building pgloader-runner Docker image") + + dockerfile = dedent( + """FROM dimitri/pgloader:latest + RUN apt-get update && apt-get install -y postgresql-client ca-certificates && rm -rf /var/lib/apt/lists/* + ENTRYPOINT ["pgloader"] + """ + ) + + run( + "docker", + "build", + "-t", + "pgloader-runner:latest", + "-", + input=dockerfile.encode("utf-8"), ) @@ -199,55 +231,67 @@ def compare_row_counts(maria: Engine, pg_temp: Engine, pg_final: Engine): # df, df_two = find_row_variances(pg_db_name, pg_db_name, pg_db_name_two, maria_db_name_two, # pg_user, pg_pass_new, 'cols') + def find_row_col_variances(pg_engine: Engine): - tables = ['col_refs', - 'lookup_unit_attrs_api', - 'lookup_unit_intervals', - 'strat_names_meta', - 'sections', - 'unit_econs', - 'lookup_strat_names', - 'measures', - 'projects', - 'timescales', - 'strat_tree', - 'refs', - 'unit_liths', - 'lookup_units', - 'measurements', - 'units', - 'autocomplete', - 'col_areas', - 'unit_strat_names', - 'unit_environs', - 'cols', - 'intervals', - 'lith_atts', - 'timescales_intervals', - 'unit_boundaries', - 'econs', - 'environs', - 'units_sections', - 'unit_measures', - 'strat_names', - 'lookup_unit_liths', - 'liths', - 'concepts_places', - 'strat_names_places', - 'col_groups', - 'measuremeta', - 'places'] + tables = [ + "col_refs", + "lookup_unit_attrs_api", + "lookup_unit_intervals", + "strat_names_meta", + "sections", + "unit_econs", + "lookup_strat_names", + "measures", + "projects", + "timescales", + "strat_tree", + "refs", + "unit_liths", + "lookup_units", + "measurements", + "units", + "autocomplete", + "col_areas", + "unit_strat_names", + "unit_environs", + "cols", + "intervals", + "lith_atts", + "timescales_intervals", + "unit_boundaries", + "econs", + "environs", + "units_sections", + "unit_measures", + "strat_names", + "lookup_unit_liths", + "liths", + "concepts_places", + "strat_names_places", + "col_groups", + "measuremeta", + "places", + ] find_row_variances( - pg_engine.url.database, pg_engine.url.database, "macrostrat_temp", pg_engine.url.username, pg_engine.url.password, tables, - pg_engine + pg_engine.url.database, + pg_engine.url.database, + "macrostrat_temp", + pg_engine.url.username, + pg_engine.url.password, + tables, + pg_engine, ) find_col_variances( - pg_engine.url.database, pg_engine.url.database, "macrostrat_temp", pg_engine.url.username, pg_engine.url.password, tables, - pg_engine + pg_engine.url.database, + pg_engine.url.database, + "macrostrat_temp", + pg_engine.url.username, + pg_engine.url.password, + tables, + pg_engine, ) - def db_identifier(engine: Engine): driver = engine.url.drivername if driver.startswith("postgresql"): diff --git a/cli/macrostrat/cli/database/utils.py b/cli/macrostrat/cli/database/utils.py index c78b5e9f..3a21c81b 100644 --- a/cli/macrostrat/cli/database/utils.py +++ b/cli/macrostrat/cli/database/utils.py @@ -3,10 +3,11 @@ from sqlalchemy.engine.url import URL, make_url from macrostrat.core.config import settings from sqlalchemy.engine import Engine -from macrostrat.database.utils import run_sql +from macrostrat.database.utils import run_sql, run_query from psycopg2.sql import Identifier from contextlib import contextmanager from uuid import uuid4 +from typing import Optional def engine_for_db_name(name: str | None): @@ -26,32 +27,126 @@ def docker_internal_url(url: URL | str) -> URL: @contextmanager -def pg_temp_engine( - pg_engine: Engine, username: str, password: str = None, schemas: list[str] = None +def pg_temp_user( + pg_engine: Engine, username: str, *, password: str = None, overwrite: bool = False ): """Create a temporary login user for a PostgreSQL database with a limited set of permissions.""" + # Check whether the user already exists + exists = has_user(pg_engine, username) + if exists: + if overwrite: + drop_user( + pg_engine, + username, + owned_by=OwnedByPolicy.Reassign, + ) + else: + raise ValueError(f"User {username} already exists") + if password is None: password = str(uuid4().hex) run_sql( - "CREATE USER {username} WITH PASSWORD {password}", + pg_engine, + "CREATE USER {username} WITH PASSWORD :password", dict(username=Identifier(username), password=password), ) # Create a new database engine that uses the new user - url = pg_engine.url.set(username=username, password=password) + url = pg_engine.url.set(username=username).set(password=password) - temp_engine = create_engine(url) + try: + temp_engine = create_engine(url) + yield temp_engine + temp_engine.dispose() + finally: + # Clean up + drop_user( + pg_engine, + username, + owned_by=OwnedByPolicy.Reassign, + ) - if schemas is not None: - for schema in schemas: - run_sql( - "CREATE SCHEMA IF NOT EXISTS {schema}", - dict(schema=Identifier(schema)), - engine=temp_engine, - ) - yield temp_engine +from enum import Enum +from warnings import warn + + +class OwnedByPolicy(Enum): + Reassign = "reassign" + Drop = "drop" + Restrict = "restrict" + + +def drop_user( + engine: Engine, + username: str, + *, + owned_by: Optional[OwnedByPolicy] = OwnedByPolicy.Restrict, + allow_privilege_escalation: bool = True, +): + params = dict(username=Identifier(username)) + if owned_by == OwnedByPolicy.Reassign: + # Check for privilege escalation + reassign_privileges( + engine, username, allow_privilege_escalation=allow_privilege_escalation + ) + if owned_by in (OwnedByPolicy.Drop, OwnedByPolicy.Reassign): + # Drop all objects owned by the user (this actually drops permissions). + # It is hard to drop all objects owned by a user without using this sort + # of intense approach. + run_sql(engine, "DROP OWNED BY {username}", params) + + run_sql( + engine, + "DROP USER {username}", + params, + ) + + +def has_user(engine: Engine, username: str) -> bool: + """Check if a database role exists in a PostgreSQL database.""" + return ( + run_query( + engine, + "SELECT 1 FROM pg_roles WHERE rolname = :username", + dict(username=username), + ).scalar() + is not None + ) + + +def is_superuser(engine: Engine, username: str) -> bool: + return run_query( + engine, + "select usesuper from pg_user where usename = :username", + dict(username=username), + ).scalar() + - # Clean up - run_sql("DROP USER {username}", dict(username=Identifier(username))) +def reassign_privileges( + engine: Engine, + from_user: str, + to_user: str = None, + *, + allow_privilege_escalation: bool = True, +): + """Reassign all objects owned by one user to another user, reporting + privilege escalation that may not be desired.""" + + if to_user is None: + to_user = engine.url.username + # Check for privilege escalation + if not is_superuser(engine, from_user) and is_superuser( + engine, engine.url.username + ): + warning = "Privilege escalation to superuser may not be desired." + if not allow_privilege_escalation: + raise ValueError(warning) + warn(warning) + + run_sql( + engine, + "REASSIGN OWNED BY {from_user} TO {to_user}", + dict(from_user=Identifier(from_user), to_user=Identifier(to_user)), + ) From 0c6dda9ac89a6c3bd2f55285887b2fff9693382d Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Fri, 2 Aug 2024 02:23:12 -0500 Subject: [PATCH 38/48] Updated function names somewhat --- .../cli/database/mariadb/postgresql_migration/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index 7d47d9b0..eef515c3 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -57,8 +57,9 @@ def migrate_mariadb_to_postgresql( temp_db_name = "macrostrat_temp" maria_temp_engine = create_engine(maria_engine.url.set(database=temp_db_name)) - # Final schema in the PostgreSQL database - schema = temp_db_name + # Destination schemas in the PostgreSQL database + temp_schema = temp_db_name + final_schema = "macrostrat" steps: set[MariaDBMigrationStep] = _all_steps if step is not None and len(step) > 0: @@ -68,7 +69,7 @@ def migrate_mariadb_to_postgresql( copy_mariadb_database(maria_engine, maria_temp_engine, overwrite=overwrite) if MariaDBMigrationStep.PGLOADER in steps: - pgloader(maria_temp_engine, pg_engine, schema, overwrite=overwrite) + pgloader(maria_temp_engine, pg_engine, temp_schema, overwrite=overwrite) if MariaDBMigrationStep.CHECK_DATA in steps: # NOTE: the temp schema and the final schema must be provided From cb633f651a1e50978170d7f35db361258632a97a Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Tue, 6 Aug 2024 14:09:10 -0500 Subject: [PATCH 39/48] fixed pgloader issue by adding mariadb_migrator as superuser --- .../mariadb/postgresql_migration/__init__.py | 13 ++++++++++--- .../mariadb/postgresql_migration/db_changes.py | 13 ++++--------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index f93aeeef..e2514bd3 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -6,7 +6,7 @@ from macrostrat.utils import get_logger from macrostrat.utils.shell import run from sqlalchemy import text, create_engine, inspect -from sqlalchemy.engine import Engine +from sqlalchemy.engine import Engine, make_url from macrostrat.core.config import settings from macrostrat.core import app from .db_changes import get_data_counts_maria, get_data_counts_pg, compare_data_counts, find_row_variances, find_col_variances @@ -49,8 +49,8 @@ def migrate_mariadb_to_postgresql( pg_engine = get_db().engine temp_db_name = maria_engine.url.database + "_temp" maria_temp_engine = create_engine(maria_engine.url.set(database=temp_db_name)) - #pg_temp_engine = create_engine(pg_engine.url.set(database=temp_db_name)) - pg_temp_engine = create_engine(pg_engine.url.set(database=temp_db_name)) + #had to set mariadb_migrator user as admin before running pgloader + pg_temp_engine = create_engine(make_url(settings.pgloader_target_database)) steps: set[MariaDBMigrationStep] = _all_steps if step is not None and len(step) > 0: @@ -61,6 +61,7 @@ def migrate_mariadb_to_postgresql( if MariaDBMigrationStep.PGLOADER in steps: pgloader_pre_script(maria_temp_engine) + # had to set mariadb_migrator user as admin before running pgloader: ALTER USER mariadb_migrator WITH SUPERUSER pgloader(maria_temp_engine, pg_temp_engine, overwrite=overwrite) pgloader_post_script(pg_temp_engine) @@ -104,6 +105,7 @@ def pgloader(source: Engine, dest: Engine, overwrite=False): Command terminal to run pgloader. Ensure Docker app is running. """ db_exists = database_exists(dest.url) + print(dest.url) if db_exists: if overwrite: @@ -115,6 +117,7 @@ def pgloader(source: Engine, dest: Engine, overwrite=False): f"PostgreSQL database [bold cyan]{dest.url.database}[/] already exists. Skipping pgloader." ) return + if not db_exists: header("Creating PostgreSQL database") @@ -167,6 +170,10 @@ def compare_row_counts(maria: Engine, pg_temp: Engine, pg_final: Engine): pg_macrostrat_temp_rows, pg_macrostrat_temp_columns = get_data_counts_pg( pg_temp, "macrostrat_temp" ) + #print(pg_macrostrat_temp_rows) + #print(len(pg_macrostrat_temp_rows)) + #print(pg_macrostrat_temp_columns) + #print(len(pg_macrostrat_temp_columns)) db1 = db_identifier(maria) db2 = db_identifier(pg_temp) diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py index 2c351d0d..057569e2 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py @@ -16,14 +16,12 @@ def get_data_counts_maria(engine: Engine): db_name = engine.url.database maria_rows = {} maria_columns = {} - with engine.connect() as conn: row_result = run_query( conn, "SELECT table_name FROM information_schema.tables WHERE table_schema = :table_schema AND table_type = 'BASE TABLE'", {"table_schema": db_name}, ) - maria_tables = [row[0] for row in row_result] for table in maria_tables: row_result = run_query(conn, f"SELECT COUNT(*) FROM {table}") @@ -34,20 +32,16 @@ def get_data_counts_maria(engine: Engine): "SELECT COUNT(*) FROM information_schema.columns WHERE table_schema = :table_schema AND table_name = :table_name", dict(table_schema=db_name, table_name=table), ) - column_count = column_result.scalar() maria_columns[table.lower()] = column_count - engine.dispose() return maria_rows, maria_columns def get_data_counts_pg(engine: Engine, schema): database_name = engine.url.database - pg_rows = {} pg_columns = {} - with engine.connect() as conn: table_result = run_query( conn, @@ -56,14 +50,15 @@ def get_data_counts_pg(engine: Engine, schema): WHERE table_catalog = :table_catalog AND table_type = 'BASE TABLE' AND table_schema = :table_schema """, - dict(table_schema=schema, table_catalog=database_name), + dict(table_catalog=database_name, table_schema=schema), ) pg_tables = [row[0] for row in table_result] + print(pg_tables) + for table in pg_tables: row_result = run_query( conn, - "SELECT COUNT(*) FROM {table}", - dict(table=Identifier(schema, table)), + f"SELECT COUNT(*) FROM {schema}.{table}", ) row_count = row_result.scalar() pg_rows[table.lower()] = row_count From daa13ad96b1b53ef14f3eeaf435db58a259c1718 Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Wed, 7 Aug 2024 11:23:13 -0500 Subject: [PATCH 40/48] repointed migration scripts to point to macrostrat.macrostrat_temp schema --- .../mariadb/postgresql_migration/__init__.py | 18 ++++++------------ .../pgloader-pre-script.sql | 4 ++-- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index 8fadc793..b7d06202 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -74,11 +74,7 @@ def migrate_mariadb_to_postgresql( if MariaDBMigrationStep.CHECK_DATA in steps: # NOTE: the temp schema and the final schema must be provided - should_proceed = compare_row_counts( - maria_temp_engine, pg_temp_engine, pg_engine - ) - # TODO: integrate this with the previous function - find_row_col_variances(pg_engine) + should_proceed = compare_row_counts(maria_temp_engine, pg_engine, temp_schema) if not should_proceed: raise ValueError("Data comparison failed. Aborting migration.") @@ -193,18 +189,18 @@ def _build_pgloader(): ) -def compare_row_counts(maria: Engine, pg_temp: Engine, pg_final: Engine): +def compare_row_counts(maria: Engine, pg_engine: Engine, schema): console = app.console maria_rows, maria_columns = get_data_counts_maria(maria) pg_macrostrat_temp_rows, pg_macrostrat_temp_columns = get_data_counts_pg( - pg_temp, "macrostrat_temp" + pg_engine, schema ) db1 = db_identifier(maria) - db2 = db_identifier(pg_temp) - db3 = db_identifier(pg_final) + db2 = schema + db3 = db_identifier(pg_engine) header(f"\n\nComparing [cyan]{db1}[/] to [cyan]{db2}[/].") @@ -217,7 +213,7 @@ def compare_row_counts(maria: Engine, pg_temp: Engine, pg_final: Engine): db2, ) - pg_rows, pg_columns = get_data_counts_pg(pg_final, "macrostrat") + pg_rows, pg_columns = get_data_counts_pg(pg_engine, "macrostrat") header(f"\n\nComparing [cyan]{db2}[/] to [cyan]{db3}[/].") @@ -233,8 +229,6 @@ def compare_row_counts(maria: Engine, pg_temp: Engine, pg_final: Engine): # df, df_two = find_row_variances(pg_db_name, pg_db_name, pg_db_name_two, maria_db_name_two, # pg_user, pg_pass_new, 'cols') - -def find_row_col_variances(pg_engine: Engine): tables = [ "col_refs", "lookup_unit_attrs_api", diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-pre-script.sql b/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-pre-script.sql index fa5a3dd7..1d30701c 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-pre-script.sql +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-pre-script.sql @@ -6,6 +6,7 @@ NOTE: this runs in MariaDB, not PostgreSQL */ + ALTER TABLE macrostrat_temp.pbdb_matches ADD COLUMN coordinate_point_text TEXT; UPDATE macrostrat_temp.pbdb_matches SET coordinate_point_text = ST_AsText(coordinate); @@ -61,5 +62,4 @@ SET col_areas_6April2016.col_area_text = ST_AsText(col_area); ALTER TABLE macrostrat_temp.col_areas_6April2016 DROP COLUMN col_area; UPDATE macrostrat_temp.liths -SET macrostrat_temp.lith_group = null -WHERE macrostrat_temp.lith_group = ''; +SET lith_group = NULL WHERE lith_group = ''; From 454129f157ef60591e56803a709e1e907d4f8015 Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Thu, 8 Aug 2024 15:06:58 -0500 Subject: [PATCH 41/48] added preserve_macrostrat_data() function for the final steps in the mariadb-migrate-postgres step --- .../mariadb/postgresql_migration/__init__.py | 8 ++++++- .../preserve-macrostrat-data.sql | 21 +++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 cli/macrostrat/cli/database/mariadb/postgresql_migration/preserve-macrostrat-data.sql diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index b7d06202..daef081c 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -56,7 +56,7 @@ def migrate_mariadb_to_postgresql( pg_engine = get_db().engine temp_db_name = "macrostrat_temp" maria_temp_engine = create_engine(maria_engine.url.set(database=temp_db_name)) - pg_temp_engine = create_engine(make_url(settings.pgloader_target_database)) + #pg_temp_engine = create_engine(make_url(settings.pgloader_target_database)) # Destination schemas in the PostgreSQL database temp_schema = temp_db_name @@ -79,6 +79,7 @@ def migrate_mariadb_to_postgresql( raise ValueError("Data comparison failed. Aborting migration.") if MariaDBMigrationStep.FINALIZE in steps: + should_proceed = preserve_macrostrat_data(pg_engine, temp_schema) raise NotImplementedError("Copy to macrostrat schema not yet implemented") @@ -287,6 +288,11 @@ def compare_row_counts(maria: Engine, pg_engine: Engine, schema): pg_engine, ) +def preserve_macrostrat_data(engine: Engine, schema): + app.console.print("\n[bold]Running script[/]") + assert engine.url.drivername.startswith("postgres") + preserve_macrostrat_data = __here__ / "preserve-macrostrat-data.sql" + run_sql(engine, preserve_macrostrat_data) def db_identifier(engine: Engine): driver = engine.url.drivername diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/preserve-macrostrat-data.sql b/cli/macrostrat/cli/database/mariadb/postgresql_migration/preserve-macrostrat-data.sql new file mode 100644 index 00000000..3d4991f1 --- /dev/null +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/preserve-macrostrat-data.sql @@ -0,0 +1,21 @@ +DO $$ +DECLARE + table_name text; + source_schema text := 'macrostrat'; + target_schema text := 'macrostrat_temp'; + tables text[] := ARRAY[ + 'strat_name_footprints', + 'grainsize', + 'pbdb_collections', + 'pbdb_collections_strat_names', + 'temp_rocks', + 'temp_names', + 'unit_lith_atts' + ]; +BEGIN + FOREACH table_name IN ARRAY tables + LOOP + EXECUTE format('CREATE TABLE %I.%I (LIKE %I.%I INCLUDING ALL)', target_schema, table_name, source_schema, table_name); + EXECUTE format('INSERT INTO %I.%I SELECT * FROM %I.%I', target_schema, table_name, source_schema, table_name); + END LOOP; +END $$; From fd039a55f83d477123c4a9b158d2b98a3fec8508 Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Fri, 9 Aug 2024 15:44:07 -0500 Subject: [PATCH 42/48] Added code to resolve table and column variances across macrostrat and macrostrat_temp --- .../mariadb/postgresql_migration/__init__.py | 16 ++++--- .../pgloader-post-script.sql | 18 ++++++++ .../pgloader-pre-script.sql | 7 +++ .../preserve-macrostrat-data.sql | 46 +++++++++++++++++-- 4 files changed, 76 insertions(+), 11 deletions(-) diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index daef081c..92aa5ba5 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -79,9 +79,11 @@ def migrate_mariadb_to_postgresql( raise ValueError("Data comparison failed. Aborting migration.") if MariaDBMigrationStep.FINALIZE in steps: - should_proceed = preserve_macrostrat_data(pg_engine, temp_schema) - raise NotImplementedError("Copy to macrostrat schema not yet implemented") - + should_proceed = preserve_macrostrat_data(pg_engine) + if should_proceed: + raise NotImplementedError("Copy to macrostrat schema not yet implemented") + else: + print("Completed!") def pgloader(source: Engine, dest: Engine, target_schema: str, overwrite: bool = False): _build_pgloader() @@ -288,11 +290,11 @@ def compare_row_counts(maria: Engine, pg_engine: Engine, schema): pg_engine, ) -def preserve_macrostrat_data(engine: Engine, schema): - app.console.print("\n[bold]Running script[/]") +def preserve_macrostrat_data(engine: Engine): + app.console.print("\n[bold]Running script[/]") assert engine.url.drivername.startswith("postgres") - preserve_macrostrat_data = __here__ / "preserve-macrostrat-data.sql" - run_sql(engine, preserve_macrostrat_data) + preserve_data = __here__ / "preserve-macrostrat-data.sql" + run_sql(engine, preserve_data) def db_identifier(engine: Engine): driver = engine.url.drivername diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql b/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql index 8799fdd6..306ff40f 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql @@ -42,3 +42,21 @@ UPDATE macrostrat_temp.col_areas_6April2016 SET col_area = ST_GeomFromText(col_a ALTER TABLE macrostrat_temp.col_areas_6April2016 DROP COLUMN col_area_text; SELECT * FROM macrostrat_temp.col_areas_6April2016 LIMIT 5; +--added query below since column exists in macrostrat and not in macrostrat_temp. +ALTER TABLE macrostrat_temp.measuremeta ADD COLUMN geometry geometry(Point, 4326); +UPDATE macrostrat_temp.measuremeta SET geometry = ST_SetSRID(ST_makepoint(lng, lat), 4326); + +--added query below since column exists in macrostrat and not in macrostrat_temp. + +ALTER TABLE macrostrat_temp.col_areas ADD COLUMN wkt text; +UPDATE macrostrat_temp.col_areas SET wkt = ST_AsText(col_area); + +--added query below since column exists in macrostrat and not in macrostrat_temp. + +ALTER TABLE macrostrat_temp.cols ADD COLUMN wkt text; +ALTER TABLE macrostrat_temp.cols ADD COLUMN poly_geom geometry; +UPDATE macrostrat_temp.cols SET wkt = ST_AsText(coordinate); +UPDATE macrostrat_temp.cols c SET poly_geom = a.col_area +FROM macrostrat_temp.col_areas a WHERE c.id = a.col_id; +UPDATE macrostrat_temp.cols SET poly_geom = ST_SetSRID(poly_geom, 4326); + diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-pre-script.sql b/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-pre-script.sql index 1d30701c..52382b1e 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-pre-script.sql +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-pre-script.sql @@ -22,6 +22,13 @@ SET geom_text = ST_AsText(geom); ALTER TABLE macrostrat_temp.places DROP COLUMN geom; +--Added query below to match the PG macrostrat database. Will need to review the API to ensure it doesn't break. +--https://github.com/UW-Macrostrat/macrostrat/blob/7aefe2d0cc89a738b356ff444b7b3dd0fd85e607/cli/macrostrat/cli/commands/table_meta/strat_tree/0-dump.sql +ALTER TABLE macrostrat_temp.strat_tree RENAME COLUMN this_name TO parent; +ALTER TABLE macrostrat_temp.strat_tree RENAME COLUMN that_name TO child; + + + ALTER TABLE macrostrat_temp.refs ADD COLUMN rgeom_text LONGTEXT; UPDATE macrostrat_temp.refs SET rgeom_text = ST_AsText(rgeom); diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/preserve-macrostrat-data.sql b/cli/macrostrat/cli/database/mariadb/postgresql_migration/preserve-macrostrat-data.sql index 3d4991f1..bddb4cfd 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/preserve-macrostrat-data.sql +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/preserve-macrostrat-data.sql @@ -1,3 +1,37 @@ +--This query adds columns and data that exist in macrostrat and not in macrostrat_temp + +--Used this script to add best_interval_id column into macrostrat_temp.lookup_unit_intervals +--https://github.com/UW-Macrostrat/macrostrat/blob/7aefe2d0cc89a738b356ff444b7b3dd0fd85e607/cli/macrostrat/cli/commands/table_meta/lookup_unit_intervals/4-process.sql#L21 +ALTER TABLE macrostrat.macrostrat_temp.lookup_unit_intervals ADD COLUMN best_interval_id INTEGER; + +WITH bests AS ( + select unit_id, + CASE + WHEN age_id > 0 THEN + age_id + WHEN epoch_id > 0 THEN + epoch_id + WHEN period_id > 0 THEN + period_id + WHEN era_id > 0 THEN + era_id + WHEN eon_id > 0 THEN + eon_id + ELSE + 0 + END + AS b_interval_id from macrostrat_temp.lookup_unit_intervals +) +UPDATE macrostrat_temp.lookup_unit_intervals lui +SET best_interval_id = b_interval_id +FROM bests +WHERE lui.unit_id = bests.unit_id; +/* +This query copies the table configuration and all data from macrostrat.macrostrat and inserts it +into the macrostrat.temp schema. This is to preserve the data that exists in macrostrat and NOT in +MariaDB before we run the migration. +*/ + DO $$ DECLARE table_name text; @@ -7,10 +41,7 @@ DECLARE 'strat_name_footprints', 'grainsize', 'pbdb_collections', - 'pbdb_collections_strat_names', - 'temp_rocks', - 'temp_names', - 'unit_lith_atts' + 'pbdb_collections_strat_names' ]; BEGIN FOREACH table_name IN ARRAY tables @@ -19,3 +50,10 @@ BEGIN EXECUTE format('INSERT INTO %I.%I SELECT * FROM %I.%I', target_schema, table_name, source_schema, table_name); END LOOP; END $$; + + + +INSERT INTO macrostrat_temp.strat_tree (parent, child) +SELECT parent, child +FROM macrostrat.strat_tree; + From 4c729c3e8cc33ece66201f3a0ffc2a083eff7585 Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Mon, 12 Aug 2024 19:14:50 -0500 Subject: [PATCH 43/48] post script and preserve-macrostrat-data script is finalized! --- .../mariadb/postgresql_migration/__init__.py | 7 +++++-- .../postgresql_migration/pgloader-post-script.sql | 5 +++++ .../preserve-macrostrat-data.sql | 12 ++++++++---- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index 92aa5ba5..11f9e264 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -75,15 +75,18 @@ def migrate_mariadb_to_postgresql( if MariaDBMigrationStep.CHECK_DATA in steps: # NOTE: the temp schema and the final schema must be provided should_proceed = compare_row_counts(maria_temp_engine, pg_engine, temp_schema) - if not should_proceed: + if should_proceed: raise ValueError("Data comparison failed. Aborting migration.") + else: + print("check-data completed!") + if MariaDBMigrationStep.FINALIZE in steps: should_proceed = preserve_macrostrat_data(pg_engine) if should_proceed: raise NotImplementedError("Copy to macrostrat schema not yet implemented") else: - print("Completed!") + print("finalize completed!") def pgloader(source: Engine, dest: Engine, target_schema: str, overwrite: bool = False): _build_pgloader() diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql b/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql index 306ff40f..333da9b0 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql @@ -60,3 +60,8 @@ UPDATE macrostrat_temp.cols c SET poly_geom = a.col_area FROM macrostrat_temp.col_areas a WHERE c.id = a.col_id; UPDATE macrostrat_temp.cols SET poly_geom = ST_SetSRID(poly_geom, 4326); + +--add rank column since this column only exists in macrostrat and not macrostrat_temp + +ALTER TABLE macrostrat_temp.intervals ADD COLUMN rank integer; + diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/preserve-macrostrat-data.sql b/cli/macrostrat/cli/database/mariadb/postgresql_migration/preserve-macrostrat-data.sql index bddb4cfd..5814d048 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/preserve-macrostrat-data.sql +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/preserve-macrostrat-data.sql @@ -53,7 +53,11 @@ END $$; -INSERT INTO macrostrat_temp.strat_tree (parent, child) -SELECT parent, child -FROM macrostrat.strat_tree; - +--from schlep scripts +UPDATE macrostrat_temp.intervals SET rank = 6 WHERE interval_type = 'age'; +UPDATE macrostrat_temp.intervals SET rank = 5 WHERE interval_type = 'epoch'; +UPDATE macrostrat_temp.intervals SET rank = 4 WHERE interval_type = 'period'; +UPDATE macrostrat_temp.intervals SET rank = 3 WHERE interval_type = 'era'; +UPDATE macrostrat_temp.intervals SET rank = 2 WHERE interval_type = 'eon'; +UPDATE macrostrat_temp.intervals SET rank = 1 WHERE interval_type = 'supereon'; +UPDATE macrostrat_temp.intervals SET rank = 0 WHERE rank IS NULL; \ No newline at end of file From f03941c0008b946ffb84a18e7ce073680dc0f6ce Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Tue, 13 Aug 2024 12:09:03 -0500 Subject: [PATCH 44/48] refactored a little bit of data --- .../mariadb/postgresql_migration/pgloader-post-script.sql | 2 ++ .../mariadb/postgresql_migration/preserve-macrostrat-data.sql | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql b/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql index 333da9b0..a0e934a0 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql @@ -46,6 +46,8 @@ SELECT * FROM macrostrat_temp.col_areas_6April2016 LIMIT 5; ALTER TABLE macrostrat_temp.measuremeta ADD COLUMN geometry geometry(Point, 4326); UPDATE macrostrat_temp.measuremeta SET geometry = ST_SetSRID(ST_makepoint(lng, lat), 4326); +ALTER TABLE macrostrat.macrostrat_temp.lookup_unit_intervals ADD COLUMN best_interval_id INTEGER; + --added query below since column exists in macrostrat and not in macrostrat_temp. ALTER TABLE macrostrat_temp.col_areas ADD COLUMN wkt text; diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/preserve-macrostrat-data.sql b/cli/macrostrat/cli/database/mariadb/postgresql_migration/preserve-macrostrat-data.sql index 5814d048..4b664354 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/preserve-macrostrat-data.sql +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/preserve-macrostrat-data.sql @@ -2,7 +2,6 @@ --Used this script to add best_interval_id column into macrostrat_temp.lookup_unit_intervals --https://github.com/UW-Macrostrat/macrostrat/blob/7aefe2d0cc89a738b356ff444b7b3dd0fd85e607/cli/macrostrat/cli/commands/table_meta/lookup_unit_intervals/4-process.sql#L21 -ALTER TABLE macrostrat.macrostrat_temp.lookup_unit_intervals ADD COLUMN best_interval_id INTEGER; WITH bests AS ( select unit_id, From 265feaf4c3232751d91dd1b2e761150df72f722e Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Fri, 30 Aug 2024 13:25:25 -0500 Subject: [PATCH 45/48] removing unnecessary schlep scripts. index files in schlep-index.sql and kep the schlep scripts for the tables that are modified in the pre/post migratino scripts. --- .../table_meta/autocomplete/0-dump.sql | 4 - .../table_meta/autocomplete/1-create.sql | 8 - .../table_meta/autocomplete/2-insert.sql | 3 - .../table_meta/autocomplete/3-index.sql | 6 - .../commands/table_meta/col_groups/0-dump.sql | 4 - .../table_meta/col_groups/1-create.sql | 7 - .../table_meta/col_groups/2-insert.sql | 3 - .../table_meta/col_groups/3-index.sql | 3 - .../commands/table_meta/col_refs/0-dump.sql | 4 - .../commands/table_meta/col_refs/1-create.sql | 7 - .../commands/table_meta/col_refs/2-insert.sql | 3 - .../commands/table_meta/col_refs/3-index.sql | 4 - .../table_meta/concepts_places/0-dump.sql | 4 - .../table_meta/concepts_places/1-create.sql | 6 - .../table_meta/concepts_places/2-insert.sql | 3 - .../table_meta/concepts_places/3-index.sql | 4 - .../cli/commands/table_meta/econs/0-dump.sql | 4 - .../commands/table_meta/econs/1-create.sql | 9 - .../commands/table_meta/econs/2-insert.sql | 3 - .../commands/table_meta/environs/0-dump.sql | 4 - .../commands/table_meta/environs/1-create.sql | 9 - .../commands/table_meta/environs/2-insert.sql | 3 - .../commands/table_meta/lith_atts/0-dump.sql | 4 - .../table_meta/lith_atts/1-create.sql | 8 - .../table_meta/lith_atts/2-insert.sql | 3 - .../commands/table_meta/lith_atts/3-index.sql | 4 - .../cli/commands/table_meta/liths/0-dump.sql | 4 - .../commands/table_meta/liths/1-create.sql | 15 -- .../commands/table_meta/liths/2-insert.sql | 3 - .../cli/commands/table_meta/liths/3-index.sql | 5 - .../table_meta/lookup_strat_names/0-dump.sql | 4 - .../lookup_strat_names/1-create.sql | 26 --- .../lookup_strat_names/2-insert.sql | 3 - .../table_meta/lookup_strat_names/3-index.sql | 10 - .../lookup_unit_attrs_api/0-dump.sql | 4 - .../lookup_unit_attrs_api/1-create.sql | 10 - .../lookup_unit_attrs_api/2-insert.sql | 4 - .../lookup_unit_attrs_api/3-index.sql | 3 - .../table_meta/lookup_unit_liths/0-dump.sql | 4 - .../table_meta/lookup_unit_liths/1-create.sql | 12 -- .../table_meta/lookup_unit_liths/2-insert.sql | 3 - .../table_meta/lookup_unit_liths/3-index.sql | 3 - .../table_meta/lookup_units/0-dump.sql | 4 - .../table_meta/lookup_units/1-create.sql | 39 ---- .../table_meta/lookup_units/2-insert.sql | 3 - .../table_meta/lookup_units/3-index.sql | 5 - .../table_meta/measurements/0-dump.sql | 7 - .../table_meta/measurements/1-create.sql | 16 -- .../table_meta/measurements/2-insert.sql | 9 - .../table_meta/measurements/3-index.sql | 4 - .../table_meta/measurements/4-process.sql | 0 .../commands/table_meta/measures/0-dump.sql | 16 -- .../commands/table_meta/measures/1-create.sql | 14 -- .../commands/table_meta/measures/2-insert.sql | 29 --- .../commands/table_meta/measures/3-index.sql | 3 - .../cli/commands/table_meta/places/0-dump.sql | 4 - .../commands/table_meta/places/1-create.sql | 11 -- .../commands/table_meta/places/2-insert.sql | 3 - .../commands/table_meta/places/3-index.sql | 3 - .../commands/table_meta/projects/0-dump.sql | 6 - .../commands/table_meta/projects/1-create.sql | 6 - .../commands/table_meta/projects/2-insert.sql | 2 - .../commands/table_meta/projects/3-index.sql | 2 - .../cli/commands/table_meta/refs/0-dump.sql | 4 - .../cli/commands/table_meta/refs/1-create.sql | 12 -- .../cli/commands/table_meta/refs/2-insert.sql | 3 - .../cli/commands/table_meta/refs/3-index.sql | 3 - .../commands/table_meta/sections/0-dump.sql | 1 - .../commands/table_meta/sections/1-create.sql | 4 - .../commands/table_meta/sections/2-insert.sql | 1 - .../commands/table_meta/sections/3-index.sql | 2 - .../table_meta/strat_names/0-dump.sql | 4 - .../table_meta/strat_names/1-create.sql | 9 - .../table_meta/strat_names/2-insert.sql | 3 - .../table_meta/strat_names/3-index.sql | 6 - .../table_meta/strat_names_meta/0-dump.sql | 4 - .../table_meta/strat_names_meta/1-create.sql | 16 -- .../table_meta/strat_names_meta/2-insert.sql | 3 - .../table_meta/strat_names_meta/3-index.sql | 6 - .../table_meta/strat_names_places/0-dump.sql | 4 - .../strat_names_places/1-create.sql | 6 - .../strat_names_places/2-insert.sql | 3 - .../table_meta/strat_names_places/3-index.sql | 4 - .../commands/table_meta/timescales/0-dump.sql | 4 - .../table_meta/timescales/1-create.sql | 7 - .../table_meta/timescales/2-insert.sql | 3 - .../table_meta/timescales/3-index.sql | 4 - .../timescales_intervals/0-dump.sql | 4 - .../timescales_intervals/1-create.sql | 6 - .../timescales_intervals/2-insert.sql | 3 - .../timescales_intervals/3-index.sql | 4 - .../table_meta/unit_boundaries/0-dump.sql | 15 -- .../table_meta/unit_boundaries/1-create.sql | 21 -- .../table_meta/unit_boundaries/2-insert.sql | 2 - .../table_meta/unit_boundaries/3-index.sql | 4 - .../commands/table_meta/unit_econs/0-dump.sql | 4 - .../table_meta/unit_econs/1-create.sql | 9 - .../table_meta/unit_econs/2-insert.sql | 3 - .../table_meta/unit_econs/3-index.sql | 5 - .../table_meta/unit_environs/0-dump.sql | 4 - .../table_meta/unit_environs/1-create.sql | 9 - .../table_meta/unit_environs/2-insert.sql | 3 - .../table_meta/unit_environs/3-index.sql | 5 - .../table_meta/unit_lith_atts/0-dump.sql | 4 - .../table_meta/unit_lith_atts/1-create.sql | 9 - .../table_meta/unit_lith_atts/2-insert.sql | 3 - .../table_meta/unit_lith_atts/3-index.sql | 5 - .../commands/table_meta/unit_liths/0-dump.sql | 4 - .../table_meta/unit_liths/1-create.sql | 14 -- .../table_meta/unit_liths/2-insert.sql | 3 - .../table_meta/unit_liths/3-index.sql | 5 - .../table_meta/unit_strat_names/0-dump.sql | 4 - .../table_meta/unit_strat_names/1-create.sql | 7 - .../table_meta/unit_strat_names/2-insert.sql | 3 - .../table_meta/unit_strat_names/3-index.sql | 4 - .../table_meta/units_sections/0-dump.sql | 4 - .../table_meta/units_sections/1-create.sql | 8 - .../table_meta/units_sections/2-insert.sql | 3 - .../table_meta/units_sections/3-index.sql | 5 - .../postgresql_migration/schlep-index.sql | 179 ++++++++++++++++++ 120 files changed, 179 insertions(+), 725 deletions(-) delete mode 100644 cli/macrostrat/cli/commands/table_meta/autocomplete/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/autocomplete/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/autocomplete/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/autocomplete/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/col_groups/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/col_groups/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/col_groups/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/col_groups/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/col_refs/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/col_refs/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/col_refs/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/col_refs/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/concepts_places/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/concepts_places/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/concepts_places/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/concepts_places/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/econs/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/econs/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/econs/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/environs/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/environs/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/environs/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lith_atts/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lith_atts/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lith_atts/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lith_atts/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/liths/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/liths/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/liths/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/liths/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lookup_strat_names/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lookup_strat_names/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lookup_strat_names/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lookup_strat_names/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lookup_unit_attrs_api/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lookup_unit_attrs_api/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lookup_unit_attrs_api/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lookup_unit_attrs_api/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lookup_unit_liths/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lookup_unit_liths/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lookup_unit_liths/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lookup_unit_liths/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lookup_units/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lookup_units/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lookup_units/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/lookup_units/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/measurements/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/measurements/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/measurements/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/measurements/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/measurements/4-process.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/measures/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/measures/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/measures/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/measures/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/places/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/places/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/places/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/places/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/projects/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/projects/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/projects/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/projects/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/refs/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/refs/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/refs/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/refs/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/sections/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/sections/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/sections/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/sections/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/strat_names/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/strat_names/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/strat_names/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/strat_names/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/strat_names_meta/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/strat_names_meta/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/strat_names_meta/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/strat_names_meta/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/strat_names_places/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/strat_names_places/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/strat_names_places/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/strat_names_places/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/timescales/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/timescales/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/timescales/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/timescales/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/timescales_intervals/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/timescales_intervals/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/timescales_intervals/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/timescales_intervals/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_boundaries/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_boundaries/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_boundaries/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_boundaries/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_econs/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_econs/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_econs/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_econs/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_environs/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_environs/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_environs/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_environs/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_lith_atts/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_lith_atts/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_lith_atts/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_lith_atts/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_liths/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_liths/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_liths/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_liths/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_strat_names/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_strat_names/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_strat_names/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/unit_strat_names/3-index.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/units_sections/0-dump.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/units_sections/1-create.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/units_sections/2-insert.sql delete mode 100644 cli/macrostrat/cli/commands/table_meta/units_sections/3-index.sql create mode 100644 cli/macrostrat/cli/database/mariadb/postgresql_migration/schlep-index.sql diff --git a/cli/macrostrat/cli/commands/table_meta/autocomplete/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/autocomplete/0-dump.sql deleted file mode 100644 index 0b834892..00000000 --- a/cli/macrostrat/cli/commands/table_meta/autocomplete/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT id, name, type, category -FROM autocomplete - diff --git a/cli/macrostrat/cli/commands/table_meta/autocomplete/1-create.sql b/cli/macrostrat/cli/commands/table_meta/autocomplete/1-create.sql deleted file mode 100644 index 865f4096..00000000 --- a/cli/macrostrat/cli/commands/table_meta/autocomplete/1-create.sql +++ /dev/null @@ -1,8 +0,0 @@ - -CREATE TABLE macrostrat.autocomplete_new ( - id integer NOT NULL, - name text, - type text, - category text -); - diff --git a/cli/macrostrat/cli/commands/table_meta/autocomplete/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/autocomplete/2-insert.sql deleted file mode 100644 index deb5863f..00000000 --- a/cli/macrostrat/cli/commands/table_meta/autocomplete/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.autocomplete_new (id, name, type, category) VALUES (%(id)s, %(name)s, %(type)s, %(category)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/autocomplete/3-index.sql b/cli/macrostrat/cli/commands/table_meta/autocomplete/3-index.sql deleted file mode 100644 index 915b1a2b..00000000 --- a/cli/macrostrat/cli/commands/table_meta/autocomplete/3-index.sql +++ /dev/null @@ -1,6 +0,0 @@ - -CREATE INDEX ON macrostrat.autocomplete_new (id); -CREATE INDEX ON macrostrat.autocomplete_new (name); -CREATE INDEX ON macrostrat.autocomplete_new (type); -CREATE INDEX ON macrostrat.autocomplete_new (category); - diff --git a/cli/macrostrat/cli/commands/table_meta/col_groups/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/col_groups/0-dump.sql deleted file mode 100644 index e43aeef7..00000000 --- a/cli/macrostrat/cli/commands/table_meta/col_groups/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT id, col_group, col_group_long -FROM col_groups - diff --git a/cli/macrostrat/cli/commands/table_meta/col_groups/1-create.sql b/cli/macrostrat/cli/commands/table_meta/col_groups/1-create.sql deleted file mode 100644 index ac779de4..00000000 --- a/cli/macrostrat/cli/commands/table_meta/col_groups/1-create.sql +++ /dev/null @@ -1,7 +0,0 @@ - -CREATE TABLE macrostrat.col_groups_new ( - id integer PRIMARY KEY, - col_group character varying(100), - col_group_long character varying(100) -); - diff --git a/cli/macrostrat/cli/commands/table_meta/col_groups/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/col_groups/2-insert.sql deleted file mode 100644 index b1e044cd..00000000 --- a/cli/macrostrat/cli/commands/table_meta/col_groups/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.col_groups_new (id, col_group, col_group_long ) VALUES (%(id)s, %(col_group)s, %(col_group_long)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/col_groups/3-index.sql b/cli/macrostrat/cli/commands/table_meta/col_groups/3-index.sql deleted file mode 100644 index 1582abe7..00000000 --- a/cli/macrostrat/cli/commands/table_meta/col_groups/3-index.sql +++ /dev/null @@ -1,3 +0,0 @@ - -CREATE INDEX ON macrostrat.col_groups_new (id); - diff --git a/cli/macrostrat/cli/commands/table_meta/col_refs/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/col_refs/0-dump.sql deleted file mode 100644 index 4afd34ca..00000000 --- a/cli/macrostrat/cli/commands/table_meta/col_refs/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT id, col_id, ref_id -FROM col_refs - diff --git a/cli/macrostrat/cli/commands/table_meta/col_refs/1-create.sql b/cli/macrostrat/cli/commands/table_meta/col_refs/1-create.sql deleted file mode 100644 index 43e44895..00000000 --- a/cli/macrostrat/cli/commands/table_meta/col_refs/1-create.sql +++ /dev/null @@ -1,7 +0,0 @@ - -CREATE TABLE macrostrat.col_refs_new ( - id integer PRIMARY KEY, - col_id integer, - ref_id integer -); - diff --git a/cli/macrostrat/cli/commands/table_meta/col_refs/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/col_refs/2-insert.sql deleted file mode 100644 index b13a8652..00000000 --- a/cli/macrostrat/cli/commands/table_meta/col_refs/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.col_refs_new (id, col_id, ref_id) VALUES (%(id)s, %(col_id)s, %(ref_id)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/col_refs/3-index.sql b/cli/macrostrat/cli/commands/table_meta/col_refs/3-index.sql deleted file mode 100644 index 4b256688..00000000 --- a/cli/macrostrat/cli/commands/table_meta/col_refs/3-index.sql +++ /dev/null @@ -1,4 +0,0 @@ - -CREATE INDEX ON macrostrat.col_refs_new (col_id); -CREATE INDEX ON macrostrat.col_refs_new (ref_id); - diff --git a/cli/macrostrat/cli/commands/table_meta/concepts_places/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/concepts_places/0-dump.sql deleted file mode 100644 index ca07b037..00000000 --- a/cli/macrostrat/cli/commands/table_meta/concepts_places/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT concept_id, place_id -FROM concepts_places - diff --git a/cli/macrostrat/cli/commands/table_meta/concepts_places/1-create.sql b/cli/macrostrat/cli/commands/table_meta/concepts_places/1-create.sql deleted file mode 100644 index 5ab44020..00000000 --- a/cli/macrostrat/cli/commands/table_meta/concepts_places/1-create.sql +++ /dev/null @@ -1,6 +0,0 @@ - -CREATE TABLE macrostrat.concepts_places_new ( - concept_id integer NOT NULL, - place_id integer NOT NULL -); - diff --git a/cli/macrostrat/cli/commands/table_meta/concepts_places/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/concepts_places/2-insert.sql deleted file mode 100644 index f4272af7..00000000 --- a/cli/macrostrat/cli/commands/table_meta/concepts_places/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.concepts_places_new (concept_id, place_id) VALUES (%(concept_id)s, %(place_id)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/concepts_places/3-index.sql b/cli/macrostrat/cli/commands/table_meta/concepts_places/3-index.sql deleted file mode 100644 index 37166cf1..00000000 --- a/cli/macrostrat/cli/commands/table_meta/concepts_places/3-index.sql +++ /dev/null @@ -1,4 +0,0 @@ - -CREATE INDEX ON macrostrat.concepts_places_new (concept_id); -CREATE INDEX ON macrostrat.concepts_places_new (place_id); - diff --git a/cli/macrostrat/cli/commands/table_meta/econs/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/econs/0-dump.sql deleted file mode 100644 index b4b1adbd..00000000 --- a/cli/macrostrat/cli/commands/table_meta/econs/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT id, econ, econ_type, econ_class, econ_color -FROM econs - diff --git a/cli/macrostrat/cli/commands/table_meta/econs/1-create.sql b/cli/macrostrat/cli/commands/table_meta/econs/1-create.sql deleted file mode 100644 index 16051815..00000000 --- a/cli/macrostrat/cli/commands/table_meta/econs/1-create.sql +++ /dev/null @@ -1,9 +0,0 @@ - -CREATE TABLE macrostrat.econs_new ( - id integer NOT NULL PRIMARY KEY, - econ text, - econ_type text, - econ_class text, - econ_color text -); - diff --git a/cli/macrostrat/cli/commands/table_meta/econs/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/econs/2-insert.sql deleted file mode 100644 index 0d45e3d5..00000000 --- a/cli/macrostrat/cli/commands/table_meta/econs/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.econs_new (id, econ, econ_type, econ_class, econ_color) VALUES (%(id)s, %(econ)s, %(econ_type)s, %(econ_class)s, %(econ_color)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/environs/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/environs/0-dump.sql deleted file mode 100644 index f000ed95..00000000 --- a/cli/macrostrat/cli/commands/table_meta/environs/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT id, environ, environ_type, environ_class, environ_color -FROM environs - diff --git a/cli/macrostrat/cli/commands/table_meta/environs/1-create.sql b/cli/macrostrat/cli/commands/table_meta/environs/1-create.sql deleted file mode 100644 index b0517753..00000000 --- a/cli/macrostrat/cli/commands/table_meta/environs/1-create.sql +++ /dev/null @@ -1,9 +0,0 @@ - -CREATE TABLE macrostrat.environs_new ( - id integer NOT NULL PRIMARY KEY, - environ text, - environ_type text, - environ_class text, - environ_color text -); - diff --git a/cli/macrostrat/cli/commands/table_meta/environs/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/environs/2-insert.sql deleted file mode 100644 index cd8f95db..00000000 --- a/cli/macrostrat/cli/commands/table_meta/environs/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.environs_new (id, environ, environ_type, environ_class, environ_color) VALUES (%(id)s, %(environ)s, %(environ_type)s, %(environ_class)s, %(environ_color)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/lith_atts/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/lith_atts/0-dump.sql deleted file mode 100644 index 1961ae66..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lith_atts/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT id, lith_att, att_type, lith_att_fill -FROM lith_atts - diff --git a/cli/macrostrat/cli/commands/table_meta/lith_atts/1-create.sql b/cli/macrostrat/cli/commands/table_meta/lith_atts/1-create.sql deleted file mode 100644 index df10ccc6..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lith_atts/1-create.sql +++ /dev/null @@ -1,8 +0,0 @@ - -CREATE TABLE macrostrat.lith_atts_new ( - id integer PRIMARY KEY NOT NULL, - lith_att character varying(75), - att_type character varying(25), - lith_att_fill integer -); - diff --git a/cli/macrostrat/cli/commands/table_meta/lith_atts/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/lith_atts/2-insert.sql deleted file mode 100644 index 9506dcf2..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lith_atts/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.lith_atts_new (id, lith_att, att_type, lith_att_fill) VALUES (%(id)s, %(lith_att)s, %(att_type)s, %(lith_att_fill)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/lith_atts/3-index.sql b/cli/macrostrat/cli/commands/table_meta/lith_atts/3-index.sql deleted file mode 100644 index a7e6f696..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lith_atts/3-index.sql +++ /dev/null @@ -1,4 +0,0 @@ - -CREATE INDEX ON macrostrat.lith_atts_new (att_type); -CREATE INDEX ON macrostrat.lith_atts_new (lith_att); - diff --git a/cli/macrostrat/cli/commands/table_meta/liths/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/liths/0-dump.sql deleted file mode 100644 index 13b4454a..00000000 --- a/cli/macrostrat/cli/commands/table_meta/liths/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT id, lith, lith_group, lith_type, lith_class, lith_equiv, lith_fill, comp_coef, initial_porosity, bulk_density, lith_color -FROM liths - diff --git a/cli/macrostrat/cli/commands/table_meta/liths/1-create.sql b/cli/macrostrat/cli/commands/table_meta/liths/1-create.sql deleted file mode 100644 index b46cde79..00000000 --- a/cli/macrostrat/cli/commands/table_meta/liths/1-create.sql +++ /dev/null @@ -1,15 +0,0 @@ - -CREATE TABLE macrostrat.liths_new ( - id integer PRIMARY KEY NOT NULL, - lith character varying(75), - lith_group text, - lith_type character varying(50), - lith_class character varying(50), - lith_equiv integer, - lith_fill integer, - comp_coef numeric, - initial_porosity numeric, - bulk_density numeric, - lith_color character varying(12) -); - diff --git a/cli/macrostrat/cli/commands/table_meta/liths/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/liths/2-insert.sql deleted file mode 100644 index 12684f72..00000000 --- a/cli/macrostrat/cli/commands/table_meta/liths/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.liths_new ( id, lith, lith_group, lith_type, lith_class, lith_equiv, lith_fill, comp_coef, initial_porosity, bulk_density, lith_color) VALUES (%(id)s, %(lith)s, %(lith_group)s, %(lith_type)s, %(lith_class)s, %(lith_equiv)s, %(lith_fill)s, %(comp_coef)s, %(initial_porosity)s, %(bulk_density)s, %(lith_color)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/liths/3-index.sql b/cli/macrostrat/cli/commands/table_meta/liths/3-index.sql deleted file mode 100644 index ba8835b3..00000000 --- a/cli/macrostrat/cli/commands/table_meta/liths/3-index.sql +++ /dev/null @@ -1,5 +0,0 @@ - -CREATE INDEX ON macrostrat.liths_new (lith); -CREATE INDEX ON macrostrat.liths_new (lith_class); -CREATE INDEX ON macrostrat.liths_new (lith_type); - diff --git a/cli/macrostrat/cli/commands/table_meta/lookup_strat_names/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/lookup_strat_names/0-dump.sql deleted file mode 100644 index 73e11620..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lookup_strat_names/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT strat_name_id, strat_name, rank, concept_id, rank_name, bed_id, bed_name, mbr_id, mbr_name, fm_id, fm_name, gp_id, gp_name, sgp_id, sgp_name, early_age, late_age, gsc_lexicon, b_period, t_period, c_interval, name_no_lith -FROM lookup_strat_names - diff --git a/cli/macrostrat/cli/commands/table_meta/lookup_strat_names/1-create.sql b/cli/macrostrat/cli/commands/table_meta/lookup_strat_names/1-create.sql deleted file mode 100644 index a28b247d..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lookup_strat_names/1-create.sql +++ /dev/null @@ -1,26 +0,0 @@ - -CREATE TABLE macrostrat.lookup_strat_names_new ( - strat_name_id integer, - strat_name character varying(100), - rank character varying(20), - concept_id integer, - rank_name character varying(200), - bed_id integer, - bed_name character varying(100), - mbr_id integer, - mbr_name character varying(100), - fm_id integer, - fm_name character varying(100), - gp_id integer, - gp_name character varying(100), - sgp_id integer, - sgp_name character varying(100), - early_age numeric, - late_age numeric, - gsc_lexicon character varying(20), - b_period character varying(100), - t_period character varying(100), - c_interval character varying(100), - name_no_lith character varying(100) -); - diff --git a/cli/macrostrat/cli/commands/table_meta/lookup_strat_names/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/lookup_strat_names/2-insert.sql deleted file mode 100644 index 57082065..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lookup_strat_names/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.lookup_strat_names_new (strat_name_id, strat_name, rank, concept_id, rank_name, bed_id, bed_name, mbr_id, mbr_name, fm_id, fm_name, gp_id, gp_name, sgp_id, sgp_name, early_age, late_age, gsc_lexicon, b_period, t_period, c_interval, name_no_lith) VALUES (%(strat_name_id)s, %(strat_name)s, %(rank)s, %(concept_id)s, %(rank_name)s, %(bed_id)s, %(bed_name)s, %(mbr_id)s, %(mbr_name)s, %(fm_id)s, %(fm_name)s, %(gp_id)s, %(gp_name)s, %(sgp_id)s, %(sgp_name)s, %(early_age)s, %(late_age)s, %(gsc_lexicon)s, %(b_period)s, %(t_period)s, %(c_interval)s, %(name_no_lith)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/lookup_strat_names/3-index.sql b/cli/macrostrat/cli/commands/table_meta/lookup_strat_names/3-index.sql deleted file mode 100644 index 40b0a376..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lookup_strat_names/3-index.sql +++ /dev/null @@ -1,10 +0,0 @@ - -CREATE INDEX ON macrostrat.lookup_strat_names_new (strat_name_id); -CREATE INDEX ON macrostrat.lookup_strat_names_new (concept_id); -CREATE INDEX ON macrostrat.lookup_strat_names_new (bed_id); -CREATE INDEX ON macrostrat.lookup_strat_names_new (mbr_id); -CREATE INDEX ON macrostrat.lookup_strat_names_new (fm_id); -CREATE INDEX ON macrostrat.lookup_strat_names_new (gp_id); -CREATE INDEX ON macrostrat.lookup_strat_names_new (sgp_id); -CREATE INDEX ON macrostrat.lookup_strat_names_new (strat_name); - diff --git a/cli/macrostrat/cli/commands/table_meta/lookup_unit_attrs_api/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/lookup_unit_attrs_api/0-dump.sql deleted file mode 100644 index d0cf75b1..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lookup_unit_attrs_api/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT unit_id, lith, environ, econ, measure_short, measure_long -FROM lookup_unit_attrs_api - diff --git a/cli/macrostrat/cli/commands/table_meta/lookup_unit_attrs_api/1-create.sql b/cli/macrostrat/cli/commands/table_meta/lookup_unit_attrs_api/1-create.sql deleted file mode 100644 index ebd97728..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lookup_unit_attrs_api/1-create.sql +++ /dev/null @@ -1,10 +0,0 @@ - -CREATE TABLE macrostrat.lookup_unit_attrs_api_new ( - unit_id integer, - lith json, - environ json, - econ json, - measure_short json, - measure_long json -); - diff --git a/cli/macrostrat/cli/commands/table_meta/lookup_unit_attrs_api/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/lookup_unit_attrs_api/2-insert.sql deleted file mode 100644 index 48e9c771..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lookup_unit_attrs_api/2-insert.sql +++ /dev/null @@ -1,4 +0,0 @@ - -INSERT INTO macrostrat.lookup_unit_attrs_api_new (unit_id, lith, environ, econ, measure_short, measure_long) VALUES -(%(unit_id)s, encode(%(lith)s, 'escape')::json, encode(%(environ)s, 'escape')::json, encode(%(econ)s, 'escape')::json, encode(%(measure_short)s, 'escape')::json, encode(%(measure_long)s, 'escape')::json) - diff --git a/cli/macrostrat/cli/commands/table_meta/lookup_unit_attrs_api/3-index.sql b/cli/macrostrat/cli/commands/table_meta/lookup_unit_attrs_api/3-index.sql deleted file mode 100644 index 454a458b..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lookup_unit_attrs_api/3-index.sql +++ /dev/null @@ -1,3 +0,0 @@ - -CREATE INDEX ON macrostrat.lookup_unit_attrs_api_new (unit_id); - diff --git a/cli/macrostrat/cli/commands/table_meta/lookup_unit_liths/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/lookup_unit_liths/0-dump.sql deleted file mode 100644 index f69c220c..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lookup_unit_liths/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT unit_id, lith_class, lith_type, lith_short, lith_long, environ_class, environ_type, environ -FROM lookup_unit_liths - diff --git a/cli/macrostrat/cli/commands/table_meta/lookup_unit_liths/1-create.sql b/cli/macrostrat/cli/commands/table_meta/lookup_unit_liths/1-create.sql deleted file mode 100644 index ca8c770d..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lookup_unit_liths/1-create.sql +++ /dev/null @@ -1,12 +0,0 @@ - -CREATE TABLE macrostrat.lookup_unit_liths_new ( - unit_id integer, - lith_class character varying(100), - lith_type character varying(100), - lith_short text, - lith_long text, - environ_class character varying(100), - environ_type character varying(100), - environ character varying(255) -); - diff --git a/cli/macrostrat/cli/commands/table_meta/lookup_unit_liths/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/lookup_unit_liths/2-insert.sql deleted file mode 100644 index 47bcc6ba..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lookup_unit_liths/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.lookup_unit_liths_new (unit_id, lith_class, lith_type, lith_short, lith_long, environ_class, environ_type, environ) VALUES (%(unit_id)s, %(lith_class)s, %(lith_type)s, %(lith_short)s, %(lith_long)s, %(environ_class)s, %(environ_type)s, %(environ)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/lookup_unit_liths/3-index.sql b/cli/macrostrat/cli/commands/table_meta/lookup_unit_liths/3-index.sql deleted file mode 100644 index 5b095501..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lookup_unit_liths/3-index.sql +++ /dev/null @@ -1,3 +0,0 @@ - -CREATE INDEX ON macrostrat.lookup_unit_liths_new (unit_id); - diff --git a/cli/macrostrat/cli/commands/table_meta/lookup_units/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/lookup_units/0-dump.sql deleted file mode 100644 index 3dd49e9a..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lookup_units/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT unit_id, col_area, project_id, t_int, t_int_name, t_int_age, t_age, t_prop, t_plat, t_plng, b_int, b_int_name, b_int_age, b_age, b_prop, b_plat, b_plng, clat, clng, color, text_color, units_above, units_below, pbdb_collections, pbdb_occurrences, age, age_id, epoch, epoch_id, period, period_id, era, era_id, eon, eon_id -FROM lookup_units - diff --git a/cli/macrostrat/cli/commands/table_meta/lookup_units/1-create.sql b/cli/macrostrat/cli/commands/table_meta/lookup_units/1-create.sql deleted file mode 100644 index 2604013c..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lookup_units/1-create.sql +++ /dev/null @@ -1,39 +0,0 @@ - -CREATE TABLE macrostrat.lookup_units_new ( - unit_id integer PRIMARY KEY NOT NULL, - col_area numeric NOT NULL, - project_id integer NOT NULL, - t_int integer, - t_int_name text, - t_int_age numeric, - t_age numeric, - t_prop numeric, - t_plat numeric, - t_plng numeric, - b_int integer, - b_int_name text, - b_int_age numeric, - b_age numeric, - b_prop numeric, - b_plat numeric, - b_plng numeric, - clat numeric, - clng numeric, - color text, - text_color text, - units_above text, - units_below text, - pbdb_collections integer, - pbdb_occurrences integer, - age text, - age_id integer, - epoch text, - epoch_id integer, - period text, - period_id integer, - era text, - era_id integer, - eon text, - eon_id integer -); - diff --git a/cli/macrostrat/cli/commands/table_meta/lookup_units/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/lookup_units/2-insert.sql deleted file mode 100644 index 14197062..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lookup_units/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.lookup_units_new (unit_id, col_area, project_id, t_int, t_int_name, t_int_age, t_age, t_prop, t_plat, t_plng, b_int, b_int_name, b_int_age, b_age, b_prop, b_plat, b_plng, clat, clng, color, text_color, units_above, units_below, pbdb_collections, pbdb_occurrences, age, age_id, epoch, epoch_id, period, period_id, era, era_id, eon, eon_id) VALUES (%(unit_id)s, %(col_area)s, %(project_id)s, %(t_int)s, %(t_int_name)s, %(t_int_age)s, %(t_age)s, %(t_prop)s, %(t_plat)s, %(t_plng)s, %(b_int)s, %(b_int_name)s, %(b_int_age)s, %(b_age)s, %(b_prop)s, %(b_plat)s, %(b_plng)s, %(clat)s, %(clng)s, %(color)s, %(text_color)s, %(units_above)s, %(units_below)s, %(pbdb_collections)s, %(pbdb_occurrences)s, %(age)s, %(age_id)s, %(epoch)s, %(epoch_id)s, %(period)s, %(period_id)s, %(era)s, %(era_id)s, %(eon)s, %(eon_id)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/lookup_units/3-index.sql b/cli/macrostrat/cli/commands/table_meta/lookup_units/3-index.sql deleted file mode 100644 index 9dc7100a..00000000 --- a/cli/macrostrat/cli/commands/table_meta/lookup_units/3-index.sql +++ /dev/null @@ -1,5 +0,0 @@ - -CREATE INDEX ON macrostrat.lookup_units_new (project_id); -CREATE INDEX ON macrostrat.lookup_units_new (t_int); -CREATE INDEX ON macrostrat.lookup_units_new (b_int); - diff --git a/cli/macrostrat/cli/commands/table_meta/measurements/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/measurements/0-dump.sql deleted file mode 100644 index 48fb838f..00000000 --- a/cli/macrostrat/cli/commands/table_meta/measurements/0-dump.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This one is executed on MariaDB -SELECT - id, - measurement_class, - measurement_type, - measurement -FROM measurements diff --git a/cli/macrostrat/cli/commands/table_meta/measurements/1-create.sql b/cli/macrostrat/cli/commands/table_meta/measurements/1-create.sql deleted file mode 100644 index a2c4c42d..00000000 --- a/cli/macrostrat/cli/commands/table_meta/measurements/1-create.sql +++ /dev/null @@ -1,16 +0,0 @@ -DROP TYPE IF EXISTS measurement_class CASCADE; -DROP TYPE IF EXISTS measurement_type CASCADE; - -CREATE TYPE measurement_class AS ENUM( - '','geophysical','geochemical','sedimentological'); -CREATE TYPE measurement_type AS ENUM( - '','material properties','geochronological','major elements','minor elements', - 'radiogenic isotopes','stable isotopes','petrologic','environmental'); -COMMIT; - -CREATE TABLE macrostrat.measurements_new ( - id serial PRIMARY KEY NOT NULL, - measurement_class measurement_class NOT NULL, - measurement_type measurement_type NOT NULL, - measurement text NOT NULL -); diff --git a/cli/macrostrat/cli/commands/table_meta/measurements/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/measurements/2-insert.sql deleted file mode 100644 index 4d3180c7..00000000 --- a/cli/macrostrat/cli/commands/table_meta/measurements/2-insert.sql +++ /dev/null @@ -1,9 +0,0 @@ -INSERT INTO macrostrat.measurements_new ( - id, measurement_class, measurement_type, measurement) -VALUES ( - %(id)s, - %(measurement_class)s, - %(measurement_type)s, - %(measurement)s -) - diff --git a/cli/macrostrat/cli/commands/table_meta/measurements/3-index.sql b/cli/macrostrat/cli/commands/table_meta/measurements/3-index.sql deleted file mode 100644 index a1fa36ec..00000000 --- a/cli/macrostrat/cli/commands/table_meta/measurements/3-index.sql +++ /dev/null @@ -1,4 +0,0 @@ -CREATE INDEX ON macrostrat.measurements_new (id); -CREATE INDEX ON macrostrat.measurements_new (measurement_class); -CREATE INDEX ON macrostrat.measurements_new (measurement_type); - diff --git a/cli/macrostrat/cli/commands/table_meta/measurements/4-process.sql b/cli/macrostrat/cli/commands/table_meta/measurements/4-process.sql deleted file mode 100644 index e69de29b..00000000 diff --git a/cli/macrostrat/cli/commands/table_meta/measures/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/measures/0-dump.sql deleted file mode 100644 index 0de59ef1..00000000 --- a/cli/macrostrat/cli/commands/table_meta/measures/0-dump.sql +++ /dev/null @@ -1,16 +0,0 @@ --- This one is executed on MariaDB -SELECT - id, - measuremeta_id, - measurement_id, - -- We had a problem with NUL characters in this column - replace(sample_no, CHAR(0x00 using utf8), "") sample_no, - measure_phase, - method, - units, - measure_value, - v_error, - v_error_units, - v_type, - v_n -FROM measures; diff --git a/cli/macrostrat/cli/commands/table_meta/measures/1-create.sql b/cli/macrostrat/cli/commands/table_meta/measures/1-create.sql deleted file mode 100644 index 99696ea8..00000000 --- a/cli/macrostrat/cli/commands/table_meta/measures/1-create.sql +++ /dev/null @@ -1,14 +0,0 @@ -CREATE TABLE macrostrat.measures_new ( - id serial, - measuremeta_id integer NOT NULL, -- REFERENCES macrostrat.measuremeta(id), - measurement_id integer NOT NULL, -- REFERENCES macrostrat.measurements(id), - sample_no varchar(50), - measure_phase varchar(100) NOT NULL, - method varchar(100) NOT NULL, - units varchar(25) NOT NULL, - measure_value decimal(10,5), - v_error decimal(10,5), - v_error_units varchar(25), - v_type varchar(100), - v_n integer -) diff --git a/cli/macrostrat/cli/commands/table_meta/measures/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/measures/2-insert.sql deleted file mode 100644 index 82921818..00000000 --- a/cli/macrostrat/cli/commands/table_meta/measures/2-insert.sql +++ /dev/null @@ -1,29 +0,0 @@ -INSERT INTO macrostrat.measures_new ( - id, - measuremeta_id, - measurement_id, - sample_no, - measure_phase, - method, - units, - measure_value, - v_error, - v_error_units, - v_type, - v_n -) -VALUES ( - %(id)s, - %(measuremeta_id)s, - %(measurement_id)s, - %(sample_no)s, - %(measure_phase)s, - %(method)s, - %(units)s, - %(measure_value)s, - %(v_error)s, - %(v_error_units)s, - %(v_type)s, - %(v_n)s -) - diff --git a/cli/macrostrat/cli/commands/table_meta/measures/3-index.sql b/cli/macrostrat/cli/commands/table_meta/measures/3-index.sql deleted file mode 100644 index 7b6a03a6..00000000 --- a/cli/macrostrat/cli/commands/table_meta/measures/3-index.sql +++ /dev/null @@ -1,3 +0,0 @@ -CREATE INDEX ON macrostrat.measures_new (measurement_id); -CREATE INDEX ON macrostrat.measures_new (measuremeta_id); - diff --git a/cli/macrostrat/cli/commands/table_meta/places/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/places/0-dump.sql deleted file mode 100644 index eb376e5e..00000000 --- a/cli/macrostrat/cli/commands/table_meta/places/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT place_id, name, abbrev, postal, country, country_abbrev, ST_AsText(geom) geom -FROM places - diff --git a/cli/macrostrat/cli/commands/table_meta/places/1-create.sql b/cli/macrostrat/cli/commands/table_meta/places/1-create.sql deleted file mode 100644 index 6a3bf77c..00000000 --- a/cli/macrostrat/cli/commands/table_meta/places/1-create.sql +++ /dev/null @@ -1,11 +0,0 @@ - -CREATE TABLE macrostrat.places_new ( - place_id integer PRIMARY KEY, - name text, - abbrev text, - postal text, - country text, - country_abbrev text, - geom geometry -); - diff --git a/cli/macrostrat/cli/commands/table_meta/places/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/places/2-insert.sql deleted file mode 100644 index c8fe4028..00000000 --- a/cli/macrostrat/cli/commands/table_meta/places/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.places_new (place_id, name, abbrev, postal, country, country_abbrev, geom) VALUES (%(place_id)s, %(name)s, %(abbrev)s, %(postal)s, %(country)s, %(country_abbrev)s, ST_SetSRID(ST_GeomFromText(%(geom)s), 4326)) - diff --git a/cli/macrostrat/cli/commands/table_meta/places/3-index.sql b/cli/macrostrat/cli/commands/table_meta/places/3-index.sql deleted file mode 100644 index ac967ada..00000000 --- a/cli/macrostrat/cli/commands/table_meta/places/3-index.sql +++ /dev/null @@ -1,3 +0,0 @@ - -CREATE INDEX ON macrostrat.places_new USING GiST (geom); - diff --git a/cli/macrostrat/cli/commands/table_meta/projects/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/projects/0-dump.sql deleted file mode 100644 index c1187a82..00000000 --- a/cli/macrostrat/cli/commands/table_meta/projects/0-dump.sql +++ /dev/null @@ -1,6 +0,0 @@ -SELECT -id, -project, -descrip, -timescale_id -FROM projects \ No newline at end of file diff --git a/cli/macrostrat/cli/commands/table_meta/projects/1-create.sql b/cli/macrostrat/cli/commands/table_meta/projects/1-create.sql deleted file mode 100644 index 4e09cfe7..00000000 --- a/cli/macrostrat/cli/commands/table_meta/projects/1-create.sql +++ /dev/null @@ -1,6 +0,0 @@ -CREATE TABLE macrostrat.projects_new ( - id serial PRIMARY KEY, - project text, - descrip text, - timescale_id int -); \ No newline at end of file diff --git a/cli/macrostrat/cli/commands/table_meta/projects/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/projects/2-insert.sql deleted file mode 100644 index 8e6348db..00000000 --- a/cli/macrostrat/cli/commands/table_meta/projects/2-insert.sql +++ /dev/null @@ -1,2 +0,0 @@ -INSERT INTO macrostrat.projects_new (id, project, descrip, timescale_id) VALUES - (%(id)s, %(project)s, %(descrip)s, %(timescale_id)s); \ No newline at end of file diff --git a/cli/macrostrat/cli/commands/table_meta/projects/3-index.sql b/cli/macrostrat/cli/commands/table_meta/projects/3-index.sql deleted file mode 100644 index f7724bc2..00000000 --- a/cli/macrostrat/cli/commands/table_meta/projects/3-index.sql +++ /dev/null @@ -1,2 +0,0 @@ -CREATE INDEX ON macrostrat.projects_new (project); -CREATE INDEX ON macrostrat.projects_new (timescale_id); \ No newline at end of file diff --git a/cli/macrostrat/cli/commands/table_meta/refs/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/refs/0-dump.sql deleted file mode 100644 index a44c5d77..00000000 --- a/cli/macrostrat/cli/commands/table_meta/refs/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT id, pub_year, author, ref, doi, compilation_code, url, ST_AsText(rgeom) rgeom -FROM refs - diff --git a/cli/macrostrat/cli/commands/table_meta/refs/1-create.sql b/cli/macrostrat/cli/commands/table_meta/refs/1-create.sql deleted file mode 100644 index dc81db82..00000000 --- a/cli/macrostrat/cli/commands/table_meta/refs/1-create.sql +++ /dev/null @@ -1,12 +0,0 @@ - -CREATE TABLE macrostrat.refs_new ( - id integer PRIMARY key, - pub_year integer, - author character varying(255), - ref text, - doi character varying(40), - compilation_code character varying(100), - url text, - rgeom geometry -); - diff --git a/cli/macrostrat/cli/commands/table_meta/refs/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/refs/2-insert.sql deleted file mode 100644 index bcada6b7..00000000 --- a/cli/macrostrat/cli/commands/table_meta/refs/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.refs_new (id, pub_year, author, ref, doi, compilation_code, url, rgeom) VALUES (%(id)s, %(pub_year)s, %(author)s, %(ref)s, %(doi)s, %(compilation_code)s, %(url)s, ST_SetSRID(ST_GeomFromText(%(rgeom)s), 4326)) - diff --git a/cli/macrostrat/cli/commands/table_meta/refs/3-index.sql b/cli/macrostrat/cli/commands/table_meta/refs/3-index.sql deleted file mode 100644 index e20b7c7f..00000000 --- a/cli/macrostrat/cli/commands/table_meta/refs/3-index.sql +++ /dev/null @@ -1,3 +0,0 @@ - -CREATE INDEX ON macrostrat.refs_new USING GiST (rgeom); - diff --git a/cli/macrostrat/cli/commands/table_meta/sections/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/sections/0-dump.sql deleted file mode 100644 index c428c80f..00000000 --- a/cli/macrostrat/cli/commands/table_meta/sections/0-dump.sql +++ /dev/null @@ -1 +0,0 @@ -select id, col_id from sections \ No newline at end of file diff --git a/cli/macrostrat/cli/commands/table_meta/sections/1-create.sql b/cli/macrostrat/cli/commands/table_meta/sections/1-create.sql deleted file mode 100644 index fa0f5f50..00000000 --- a/cli/macrostrat/cli/commands/table_meta/sections/1-create.sql +++ /dev/null @@ -1,4 +0,0 @@ -CREATE TABLE macrostrat.sections_new( - id serial PRIMARY KEY, - col_id int -) \ No newline at end of file diff --git a/cli/macrostrat/cli/commands/table_meta/sections/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/sections/2-insert.sql deleted file mode 100644 index 9c82660f..00000000 --- a/cli/macrostrat/cli/commands/table_meta/sections/2-insert.sql +++ /dev/null @@ -1 +0,0 @@ -INSERT INTO macrostrat.sections_new(id, col_id) VALUES (%(id)s,%(col_id)s) \ No newline at end of file diff --git a/cli/macrostrat/cli/commands/table_meta/sections/3-index.sql b/cli/macrostrat/cli/commands/table_meta/sections/3-index.sql deleted file mode 100644 index 7993dfcb..00000000 --- a/cli/macrostrat/cli/commands/table_meta/sections/3-index.sql +++ /dev/null @@ -1,2 +0,0 @@ -CREATE INDEX ON macrostrat.sections_new(id); -CREATE INDEX ON macrostrat.sections_new(col_id); \ No newline at end of file diff --git a/cli/macrostrat/cli/commands/table_meta/strat_names/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/strat_names/0-dump.sql deleted file mode 100644 index e73ee58f..00000000 --- a/cli/macrostrat/cli/commands/table_meta/strat_names/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT id, strat_name, rank, ref_id, concept_id -FROM strat_names - diff --git a/cli/macrostrat/cli/commands/table_meta/strat_names/1-create.sql b/cli/macrostrat/cli/commands/table_meta/strat_names/1-create.sql deleted file mode 100644 index ff21c625..00000000 --- a/cli/macrostrat/cli/commands/table_meta/strat_names/1-create.sql +++ /dev/null @@ -1,9 +0,0 @@ - -CREATE TABLE macrostrat.strat_names_new ( - id serial PRIMARY KEY NOT NULL, - strat_name character varying(100) NOT NULL, - rank character varying(50), - ref_id integer NOT NULL, - concept_id integer -) - diff --git a/cli/macrostrat/cli/commands/table_meta/strat_names/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/strat_names/2-insert.sql deleted file mode 100644 index ffea9a22..00000000 --- a/cli/macrostrat/cli/commands/table_meta/strat_names/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.strat_names_new (id, strat_name, rank, ref_id, concept_id) VALUES (%(id)s, %(strat_name)s, %(rank)s, %(ref_id)s, %(concept_id)s); - diff --git a/cli/macrostrat/cli/commands/table_meta/strat_names/3-index.sql b/cli/macrostrat/cli/commands/table_meta/strat_names/3-index.sql deleted file mode 100644 index 417de63e..00000000 --- a/cli/macrostrat/cli/commands/table_meta/strat_names/3-index.sql +++ /dev/null @@ -1,6 +0,0 @@ - -CREATE INDEX ON macrostrat.strat_names_new (strat_name); -CREATE INDEX ON macrostrat.strat_names_new (rank); -CREATE INDEX ON macrostrat.strat_names_new (ref_id); -CREATE INDEX ON macrostrat.strat_names_new (concept_id); - diff --git a/cli/macrostrat/cli/commands/table_meta/strat_names_meta/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/strat_names_meta/0-dump.sql deleted file mode 100644 index 6a4f6438..00000000 --- a/cli/macrostrat/cli/commands/table_meta/strat_names_meta/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT concept_id, orig_id, name, geologic_age, interval_id, b_int, t_int, usage_notes, other, province, url, ref_id -FROM strat_names_meta - diff --git a/cli/macrostrat/cli/commands/table_meta/strat_names_meta/1-create.sql b/cli/macrostrat/cli/commands/table_meta/strat_names_meta/1-create.sql deleted file mode 100644 index 6d385af0..00000000 --- a/cli/macrostrat/cli/commands/table_meta/strat_names_meta/1-create.sql +++ /dev/null @@ -1,16 +0,0 @@ - -CREATE TABLE macrostrat.strat_names_meta_new ( - concept_id integer PRIMARY KEY, - orig_id integer NOT NULL, - name character varying(40), - geologic_age text, - interval_id integer NOT NULL, - b_int integer NOT NULL, - t_int integer NOT NULL, - usage_notes text, - other text, - province text, - url character varying(150), - ref_id integer NOT NULL -); - diff --git a/cli/macrostrat/cli/commands/table_meta/strat_names_meta/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/strat_names_meta/2-insert.sql deleted file mode 100644 index 988ba9a4..00000000 --- a/cli/macrostrat/cli/commands/table_meta/strat_names_meta/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.strat_names_meta_new (concept_id, orig_id, name, geologic_age, interval_id, b_int, t_int, usage_notes, other, province, url, ref_id) VALUES (%(concept_id)s, %(orig_id)s, %(name)s, %(geologic_age)s, %(interval_id)s, %(b_int)s, %(t_int)s, %(usage_notes)s, %(other)s, %(province)s, %(url)s, %(ref_id)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/strat_names_meta/3-index.sql b/cli/macrostrat/cli/commands/table_meta/strat_names_meta/3-index.sql deleted file mode 100644 index 99689c85..00000000 --- a/cli/macrostrat/cli/commands/table_meta/strat_names_meta/3-index.sql +++ /dev/null @@ -1,6 +0,0 @@ - -CREATE INDEX ON macrostrat.strat_names_meta_new (interval_id); -CREATE INDEX ON macrostrat.strat_names_meta_new (b_int); -CREATE INDEX ON macrostrat.strat_names_meta_new (t_int); -CREATE INDEX ON macrostrat.strat_names_meta_new (ref_id); - diff --git a/cli/macrostrat/cli/commands/table_meta/strat_names_places/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/strat_names_places/0-dump.sql deleted file mode 100644 index ae301876..00000000 --- a/cli/macrostrat/cli/commands/table_meta/strat_names_places/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT strat_name_id, place_id -FROM strat_names_places - diff --git a/cli/macrostrat/cli/commands/table_meta/strat_names_places/1-create.sql b/cli/macrostrat/cli/commands/table_meta/strat_names_places/1-create.sql deleted file mode 100644 index a0ea9b95..00000000 --- a/cli/macrostrat/cli/commands/table_meta/strat_names_places/1-create.sql +++ /dev/null @@ -1,6 +0,0 @@ - -CREATE TABLE macrostrat.strat_names_places_new ( - strat_name_id integer NOT NULL, - place_id integer NOT NULL -); - diff --git a/cli/macrostrat/cli/commands/table_meta/strat_names_places/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/strat_names_places/2-insert.sql deleted file mode 100644 index 6989d450..00000000 --- a/cli/macrostrat/cli/commands/table_meta/strat_names_places/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.strat_names_places_new (strat_name_id, place_id) VALUES (%(strat_name_id)s, %(place_id)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/strat_names_places/3-index.sql b/cli/macrostrat/cli/commands/table_meta/strat_names_places/3-index.sql deleted file mode 100644 index 6d76f3cc..00000000 --- a/cli/macrostrat/cli/commands/table_meta/strat_names_places/3-index.sql +++ /dev/null @@ -1,4 +0,0 @@ - -CREATE INDEX ON macrostrat.strat_names_places_new (strat_name_id); -CREATE INDEX ON macrostrat.strat_names_places_new (place_id); - diff --git a/cli/macrostrat/cli/commands/table_meta/timescales/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/timescales/0-dump.sql deleted file mode 100644 index 01c1c7ec..00000000 --- a/cli/macrostrat/cli/commands/table_meta/timescales/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT id, timescale, ref_id -FROM timescales - diff --git a/cli/macrostrat/cli/commands/table_meta/timescales/1-create.sql b/cli/macrostrat/cli/commands/table_meta/timescales/1-create.sql deleted file mode 100644 index c4c6ebb3..00000000 --- a/cli/macrostrat/cli/commands/table_meta/timescales/1-create.sql +++ /dev/null @@ -1,7 +0,0 @@ - -CREATE TABLE macrostrat.timescales_new ( - id integer PRIMARY KEY, - timescale character varying(100), - ref_id integer -); - diff --git a/cli/macrostrat/cli/commands/table_meta/timescales/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/timescales/2-insert.sql deleted file mode 100644 index 00f12b4a..00000000 --- a/cli/macrostrat/cli/commands/table_meta/timescales/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.timescales_new (id, timescale, ref_id) VALUES (%(id)s, %(timescale)s, %(ref_id)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/timescales/3-index.sql b/cli/macrostrat/cli/commands/table_meta/timescales/3-index.sql deleted file mode 100644 index 693a7ba1..00000000 --- a/cli/macrostrat/cli/commands/table_meta/timescales/3-index.sql +++ /dev/null @@ -1,4 +0,0 @@ - -CREATE INDEX ON macrostrat.timescales_new (timescale); -CREATE INDEX ON macrostrat.timescales_new (ref_id); - diff --git a/cli/macrostrat/cli/commands/table_meta/timescales_intervals/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/timescales_intervals/0-dump.sql deleted file mode 100644 index 4fe044c5..00000000 --- a/cli/macrostrat/cli/commands/table_meta/timescales_intervals/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT timescale_id, interval_id -FROM timescales_intervals - diff --git a/cli/macrostrat/cli/commands/table_meta/timescales_intervals/1-create.sql b/cli/macrostrat/cli/commands/table_meta/timescales_intervals/1-create.sql deleted file mode 100644 index 7cb2b69a..00000000 --- a/cli/macrostrat/cli/commands/table_meta/timescales_intervals/1-create.sql +++ /dev/null @@ -1,6 +0,0 @@ - -CREATE TABLE macrostrat.timescales_intervals_new ( - timescale_id integer, - interval_id integer -); - diff --git a/cli/macrostrat/cli/commands/table_meta/timescales_intervals/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/timescales_intervals/2-insert.sql deleted file mode 100644 index 4bed69a3..00000000 --- a/cli/macrostrat/cli/commands/table_meta/timescales_intervals/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.timescales_intervals_new (timescale_id, interval_id) VALUES (%(timescale_id)s, %(interval_id)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/timescales_intervals/3-index.sql b/cli/macrostrat/cli/commands/table_meta/timescales_intervals/3-index.sql deleted file mode 100644 index 502f1553..00000000 --- a/cli/macrostrat/cli/commands/table_meta/timescales_intervals/3-index.sql +++ /dev/null @@ -1,4 +0,0 @@ - -CREATE INDEX ON macrostrat.timescales_intervals_new (timescale_id); -CREATE INDEX ON macrostrat.timescales_intervals_new (interval_id); - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_boundaries/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/unit_boundaries/0-dump.sql deleted file mode 100644 index 1c96b4b9..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_boundaries/0-dump.sql +++ /dev/null @@ -1,15 +0,0 @@ -SELECT - id, - t1, - t1_prop, - t1_age, - unit_id, - unit_id_2, - section_id, - boundary_position, - boundary_type, - boundary_status, - paleo_lat, - paleo_lng, - ref_id -FROM unit_boundaries; \ No newline at end of file diff --git a/cli/macrostrat/cli/commands/table_meta/unit_boundaries/1-create.sql b/cli/macrostrat/cli/commands/table_meta/unit_boundaries/1-create.sql deleted file mode 100644 index 35d1572e..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_boundaries/1-create.sql +++ /dev/null @@ -1,21 +0,0 @@ -DROP TYPE IF EXISTS macrostrat.boundary_type CASCADE; -CREATE TYPE macrostrat.boundary_type AS ENUM('','unconformity','conformity','fault','disconformity','non-conformity','angular unconformity'); - -DROP TYPE IF EXISTS macrostrat.boundary_status CASCADE; -CREATE TYPE macrostrat.boundary_status AS ENUM('','modeled','relative','absolute','spike'); - -CREATE TABLE macrostrat.unit_boundaries ( - id serial PRIMARY KEY, - t1 numeric NOT NULL, - t1_prop decimal(6,5) NOT NULL, - t1_age decimal(8,4) NOT NULL, - unit_id integer NOT NULL, - unit_id_2 integer NOT NULL, - section_id integer NOT NULL, - boundary_position decimal(6,2) DEFAULT NULL, - boundary_type macrostrat.boundary_type NOT NULL DEFAULT '', - boundary_status macrostrat.boundary_status NOT NULL DEFAULT 'modeled', - paleo_lat decimal(8,5), - paleo_lng decimal(8,5), - ref_id integer NOT NULL DEFAULT 217 -); diff --git a/cli/macrostrat/cli/commands/table_meta/unit_boundaries/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/unit_boundaries/2-insert.sql deleted file mode 100644 index 2d6a3876..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_boundaries/2-insert.sql +++ /dev/null @@ -1,2 +0,0 @@ -INSERT INTO macrostrat.unit_boundaries(id, t1, t1_prop, t1_age, unit_id, unit_id_2, section_id, boundary_position, boundary_type, boundary_status, paleo_lat, paleo_lng, ref_id) VALUES - (%(id)s, %(t1)s, %(t1_prop)s, %(t1_age)s, %(unit_id)s, %(unit_id_2)s, %(section_id)s, %(boundary_position)s, %(boundary_type)s, %(boundary_status)s, %(paleo_lat)s, %(paleo_lng)s, %(ref_id)s); diff --git a/cli/macrostrat/cli/commands/table_meta/unit_boundaries/3-index.sql b/cli/macrostrat/cli/commands/table_meta/unit_boundaries/3-index.sql deleted file mode 100644 index ddc31173..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_boundaries/3-index.sql +++ /dev/null @@ -1,4 +0,0 @@ -CREATE INDEX on macrostrat.unit_boundaries (t1); -CREATE INDEX on macrostrat.unit_boundaries (unit_id); -CREATE INDEX on macrostrat.unit_boundaries (unit_id_2); -CREATE INDEX on macrostrat.unit_boundaries (section_id); \ No newline at end of file diff --git a/cli/macrostrat/cli/commands/table_meta/unit_econs/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/unit_econs/0-dump.sql deleted file mode 100644 index 7664ffb7..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_econs/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT id, unit_id, econ_id, ref_id, date_mod -FROM unit_econs - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_econs/1-create.sql b/cli/macrostrat/cli/commands/table_meta/unit_econs/1-create.sql deleted file mode 100644 index 8c3b4bd7..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_econs/1-create.sql +++ /dev/null @@ -1,9 +0,0 @@ - -CREATE TABLE macrostrat.unit_econs_new ( - id integer NOT NULL PRIMARY KEY, - unit_id integer, - econ_id integer, - ref_id integer, - date_mod text -); - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_econs/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/unit_econs/2-insert.sql deleted file mode 100644 index 36e2d364..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_econs/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.unit_econs_new (id, unit_id, econ_id, ref_id, date_mod) VALUES (%(id)s, %(unit_id)s, %(econ_id)s, %(ref_id)s, %(date_mod)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_econs/3-index.sql b/cli/macrostrat/cli/commands/table_meta/unit_econs/3-index.sql deleted file mode 100644 index 1f4c2a2b..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_econs/3-index.sql +++ /dev/null @@ -1,5 +0,0 @@ - -CREATE INDEX ON macrostrat.unit_econs_new (econ_id); -CREATE INDEX ON macrostrat.unit_econs_new (unit_id); -CREATE INDEX ON macrostrat.unit_econs_new (ref_id); - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_environs/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/unit_environs/0-dump.sql deleted file mode 100644 index b6149ce5..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_environs/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT id, unit_id, environ_id, ref_id, date_mod -FROM unit_environs - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_environs/1-create.sql b/cli/macrostrat/cli/commands/table_meta/unit_environs/1-create.sql deleted file mode 100644 index e76de328..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_environs/1-create.sql +++ /dev/null @@ -1,9 +0,0 @@ - -CREATE TABLE macrostrat.unit_environs_new ( - id integer NOT NULL PRIMARY KEY, - unit_id integer, - environ_id integer, - ref_id integer, - date_mod text -); - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_environs/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/unit_environs/2-insert.sql deleted file mode 100644 index 99546f7e..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_environs/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.unit_environs_new (id, unit_id, environ_id, ref_id, date_mod) VALUES (%(id)s, %(unit_id)s, %(environ_id)s, %(ref_id)s, %(date_mod)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_environs/3-index.sql b/cli/macrostrat/cli/commands/table_meta/unit_environs/3-index.sql deleted file mode 100644 index 490afbff..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_environs/3-index.sql +++ /dev/null @@ -1,5 +0,0 @@ - -CREATE INDEX ON macrostrat.unit_environs_new (environ_id); -CREATE INDEX ON macrostrat.unit_environs_new (unit_id); -CREATE INDEX ON macrostrat.unit_environs_new (ref_id); - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_lith_atts/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/unit_lith_atts/0-dump.sql deleted file mode 100644 index 28d94034..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_lith_atts/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT id, unit_lith_id, lith_att_id, ref_id, date_mod -FROM unit_liths_atts - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_lith_atts/1-create.sql b/cli/macrostrat/cli/commands/table_meta/unit_lith_atts/1-create.sql deleted file mode 100644 index 498a22dd..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_lith_atts/1-create.sql +++ /dev/null @@ -1,9 +0,0 @@ - -CREATE TABLE macrostrat.unit_lith_atts_new ( - id integer NOT NULL PRIMARY KEY, - unit_lith_id integer, - lith_att_id integer, - ref_id integer, - date_mod text -); - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_lith_atts/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/unit_lith_atts/2-insert.sql deleted file mode 100644 index c4b8da45..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_lith_atts/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.unit_lith_atts_new (id, unit_lith_id, lith_att_id, ref_id, date_mod) VALUES (%(id)s, %(unit_lith_id)s, %(lith_att_id)s, %(ref_id)s, %(date_mod)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_lith_atts/3-index.sql b/cli/macrostrat/cli/commands/table_meta/unit_lith_atts/3-index.sql deleted file mode 100644 index cab49ff8..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_lith_atts/3-index.sql +++ /dev/null @@ -1,5 +0,0 @@ - -CREATE INDEX ON macrostrat.unit_lith_atts_new (unit_lith_id); -CREATE INDEX ON macrostrat.unit_lith_atts_new (lith_att_id); -CREATE INDEX ON macrostrat.unit_lith_atts_new (ref_id); - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_liths/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/unit_liths/0-dump.sql deleted file mode 100644 index f5e01f1c..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_liths/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT id, lith_id, unit_id, prop, dom, comp_prop, mod_prop, toc, ref_id, date_mod -FROM unit_liths - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_liths/1-create.sql b/cli/macrostrat/cli/commands/table_meta/unit_liths/1-create.sql deleted file mode 100644 index f09ba911..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_liths/1-create.sql +++ /dev/null @@ -1,14 +0,0 @@ - -CREATE TABLE macrostrat.unit_liths_new ( - id integer NOT NULL PRIMARY KEY, - lith_id integer, - unit_id integer, - prop text, - dom text, - comp_prop numeric, - mod_prop numeric, - toc numeric, - ref_id integer, - date_mod text -); - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_liths/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/unit_liths/2-insert.sql deleted file mode 100644 index b03c7509..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_liths/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.unit_liths_new (id, lith_id, unit_id, prop, dom, comp_prop, mod_prop, toc, ref_id, date_mod) VALUES (%(id)s, %(lith_id)s, %(unit_id)s, %(prop)s, %(dom)s, %(comp_prop)s, %(mod_prop)s, %(toc)s, %(ref_id)s, %(date_mod)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_liths/3-index.sql b/cli/macrostrat/cli/commands/table_meta/unit_liths/3-index.sql deleted file mode 100644 index 77016fe3..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_liths/3-index.sql +++ /dev/null @@ -1,5 +0,0 @@ - -CREATE INDEX ON macrostrat.unit_liths_new (lith_id); -CREATE INDEX ON macrostrat.unit_liths_new (unit_id); -CREATE INDEX ON macrostrat.unit_liths_new (ref_id); - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_strat_names/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/unit_strat_names/0-dump.sql deleted file mode 100644 index f466074d..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_strat_names/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT id, unit_id, strat_name_id -FROM unit_strat_names - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_strat_names/1-create.sql b/cli/macrostrat/cli/commands/table_meta/unit_strat_names/1-create.sql deleted file mode 100644 index 3cf2ee52..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_strat_names/1-create.sql +++ /dev/null @@ -1,7 +0,0 @@ - -CREATE TABLE macrostrat.unit_strat_names_new ( - id serial PRIMARY KEY NOT NULL, - unit_id integer NOT NULL, - strat_name_id integer NOT NULL -); - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_strat_names/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/unit_strat_names/2-insert.sql deleted file mode 100644 index 0e936bc2..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_strat_names/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.unit_strat_names_new (id, unit_id, strat_name_id) VALUES (%(id)s, %(unit_id)s, %(strat_name_id)s); - diff --git a/cli/macrostrat/cli/commands/table_meta/unit_strat_names/3-index.sql b/cli/macrostrat/cli/commands/table_meta/unit_strat_names/3-index.sql deleted file mode 100644 index bb35d720..00000000 --- a/cli/macrostrat/cli/commands/table_meta/unit_strat_names/3-index.sql +++ /dev/null @@ -1,4 +0,0 @@ - -CREATE INDEX ON macrostrat.unit_strat_names_new (unit_id); -CREATE INDEX ON macrostrat.unit_strat_names_new (strat_name_id); - diff --git a/cli/macrostrat/cli/commands/table_meta/units_sections/0-dump.sql b/cli/macrostrat/cli/commands/table_meta/units_sections/0-dump.sql deleted file mode 100644 index be95112e..00000000 --- a/cli/macrostrat/cli/commands/table_meta/units_sections/0-dump.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SELECT id, unit_id, section_id, col_id -FROM units_sections - diff --git a/cli/macrostrat/cli/commands/table_meta/units_sections/1-create.sql b/cli/macrostrat/cli/commands/table_meta/units_sections/1-create.sql deleted file mode 100644 index 734d2baa..00000000 --- a/cli/macrostrat/cli/commands/table_meta/units_sections/1-create.sql +++ /dev/null @@ -1,8 +0,0 @@ - -CREATE TABLE macrostrat.units_sections_new ( - id serial PRIMARY KEY NOT NULL, - unit_id integer NOT NULL, - section_id integer NOT NULL, - col_id integer NOT NULL -); - diff --git a/cli/macrostrat/cli/commands/table_meta/units_sections/2-insert.sql b/cli/macrostrat/cli/commands/table_meta/units_sections/2-insert.sql deleted file mode 100644 index 3efe3866..00000000 --- a/cli/macrostrat/cli/commands/table_meta/units_sections/2-insert.sql +++ /dev/null @@ -1,3 +0,0 @@ - -INSERT INTO macrostrat.units_sections_new (id, unit_id, section_id, col_id) VALUES (%(id)s, %(unit_id)s, %(section_id)s, %(col_id)s) - diff --git a/cli/macrostrat/cli/commands/table_meta/units_sections/3-index.sql b/cli/macrostrat/cli/commands/table_meta/units_sections/3-index.sql deleted file mode 100644 index 9091bb6d..00000000 --- a/cli/macrostrat/cli/commands/table_meta/units_sections/3-index.sql +++ /dev/null @@ -1,5 +0,0 @@ - -CREATE INDEX ON macrostrat.units_sections_new (unit_id); -CREATE INDEX ON macrostrat.units_sections_new (section_id); -CREATE INDEX ON macrostrat.units_sections_new (col_id); - diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/schlep-index.sql b/cli/macrostrat/cli/database/mariadb/postgresql_migration/schlep-index.sql new file mode 100644 index 00000000..065a1461 --- /dev/null +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/schlep-index.sql @@ -0,0 +1,179 @@ +--Consolidating all of the schelp index scripts here from https://github.com/UW-Macrostrat/macrostrat/blob/maria-migrate/cli/macrostrat/cli/commands/table_meta/ + +--autocomplete +CREATE INDEX ON macrostrat.autocomplete_new (id); +CREATE INDEX ON macrostrat.autocomplete_new (name); +CREATE INDEX ON macrostrat.autocomplete_new (type); +CREATE INDEX ON macrostrat.autocomplete_new (category); + +--col_areas +CREATE INDEX ON macrostrat.col_areas_new (col_id); +CREATE INDEX ON macrostrat.col_areas_new USING GIST (col_area); + +--col_groups +CREATE INDEX ON macrostrat.col_groups_new (id); + +--col_refs +CREATE INDEX ON macrostrat.col_refs_new (col_id); +CREATE INDEX ON macrostrat.col_refs_new (ref_id); + +--cols +CREATE INDEX ON macrostrat.cols_new (project_id); +CREATE INDEX ON macrostrat.cols_new USING GIST (coordinate); +CREATE INDEX ON macrostrat.cols_new USING GIST (poly_geom); +CREATE INDEX ON macrostrat.cols_new (col_group_id); +CREATE INDEX ON macrostrat.cols_new (status_code); + +--concepts_places +CREATE INDEX ON macrostrat.concepts_places_new (concept_id); +CREATE INDEX ON macrostrat.concepts_places_new (place_id); + +--econs + +--environs + +--intervals +CREATE INDEX ON macrostrat.intervals_new (id); +CREATE INDEX ON macrostrat.intervals_new (age_top); +CREATE INDEX ON macrostrat.intervals_new (age_bottom); +CREATE INDEX ON macrostrat.intervals_new (interval_type); +CREATE INDEX ON macrostrat.intervals_new (interval_name); + +--lith_atts +CREATE INDEX ON macrostrat.lith_atts_new (att_type); +CREATE INDEX ON macrostrat.lith_atts_new (lith_att); + +--liths +CREATE INDEX ON macrostrat.liths_new (lith); +CREATE INDEX ON macrostrat.liths_new (lith_class); +CREATE INDEX ON macrostrat.liths_new (lith_type); + +--lookup_strat_names +CREATE INDEX ON macrostrat.lookup_strat_names_new (strat_name_id); +CREATE INDEX ON macrostrat.lookup_strat_names_new (concept_id); +CREATE INDEX ON macrostrat.lookup_strat_names_new (bed_id); +CREATE INDEX ON macrostrat.lookup_strat_names_new (mbr_id); +CREATE INDEX ON macrostrat.lookup_strat_names_new (fm_id); +CREATE INDEX ON macrostrat.lookup_strat_names_new (gp_id); +CREATE INDEX ON macrostrat.lookup_strat_names_new (sgp_id); +CREATE INDEX ON macrostrat.lookup_strat_names_new (strat_name); + +--lookup_unit_attrs_api +CREATE INDEX ON macrostrat.lookup_unit_attrs_api_new (unit_id); + +--lookup_unit_intervals +CREATE INDEX ON macrostrat.lookup_unit_intervals_new (unit_id); +CREATE INDEX ON macrostrat.lookup_unit_intervals_new (best_interval_id); + +--lookup_unit_liths +CREATE INDEX ON macrostrat.lookup_unit_liths_new (unit_id); + +--lookup_units +CREATE INDEX ON macrostrat.lookup_units_new (project_id); +CREATE INDEX ON macrostrat.lookup_units_new (t_int); +CREATE INDEX ON macrostrat.lookup_units_new (b_int); + +--measurements +CREATE INDEX ON macrostrat.measurements_new (id); +CREATE INDEX ON macrostrat.measurements_new (measurement_class); +CREATE INDEX ON macrostrat.measurements_new (measurement_type); + +--measuremeta +CREATE INDEX ON macrostrat.measuremeta_new (lith_id); +CREATE INDEX ON macrostrat.measuremeta_new (ref_id); +CREATE INDEX ON macrostrat.measuremeta_new (lith_att_id); + +--measures +CREATE INDEX ON macrostrat.measures_new (measurement_id); +CREATE INDEX ON macrostrat.measures_new (measuremeta_id); + +--pbdb_collections +CREATE INDEX ON macrostrat.pbdb_collections_new (collection_no); +CREATE INDEX ON macrostrat.pbdb_collections_new (early_age); +CREATE INDEX ON macrostrat.pbdb_collections_new (late_age); +CREATE INDEX ON macrostrat.pbdb_collections_new USING GiST (geom); + +--places +CREATE INDEX ON macrostrat.places_new USING GiST (geom); + +--projects +CREATE INDEX ON macrostrat.projects_new (project); +CREATE INDEX ON macrostrat.projects_new (timescale_id); + +--refs +CREATE INDEX ON macrostrat.refs_new USING GiST (rgeom); + +--sections +CREATE INDEX ON macrostrat.sections_new(id); +CREATE INDEX ON macrostrat.sections_new(col_id); + +--strat_names +CREATE INDEX ON macrostrat.strat_names_new (strat_name); +CREATE INDEX ON macrostrat.strat_names_new (rank); +CREATE INDEX ON macrostrat.strat_names_new (ref_id); +CREATE INDEX ON macrostrat.strat_names_new (concept_id); + +--strat_names_meta +CREATE INDEX ON macrostrat.strat_names_meta_new (interval_id); +CREATE INDEX ON macrostrat.strat_names_meta_new (b_int); +CREATE INDEX ON macrostrat.strat_names_meta_new (t_int); +CREATE INDEX ON macrostrat.strat_names_meta_new (ref_id); + +--strat_names_places +CREATE INDEX ON macrostrat.strat_names_places_new (strat_name_id); +CREATE INDEX ON macrostrat.strat_names_places_new (place_id); + +--strat_tree +CREATE INDEX ON macrostrat.strat_tree_new (parent); +CREATE INDEX ON macrostrat.strat_tree_new (child); +CREATE INDEX ON macrostrat.strat_tree_new (ref_id); + +--timescales +CREATE INDEX ON macrostrat.timescales_new (timescale); +CREATE INDEX ON macrostrat.timescales_new (ref_id); + +--timescales_intervals +CREATE INDEX ON macrostrat.timescales_intervals_new (timescale_id); +CREATE INDEX ON macrostrat.timescales_intervals_new (interval_id); + +--unit_boundaries +CREATE INDEX on macrostrat.unit_boundaries (t1); +CREATE INDEX on macrostrat.unit_boundaries (unit_id); +CREATE INDEX on macrostrat.unit_boundaries (unit_id_2); +CREATE INDEX on macrostrat.unit_boundaries (section_id); + +--unit_econs +CREATE INDEX ON macrostrat.unit_econs_new (econ_id); +CREATE INDEX ON macrostrat.unit_econs_new (unit_id); +CREATE INDEX ON macrostrat.unit_econs_new (ref_id); + +--unit_environs +CREATE INDEX ON macrostrat.unit_environs_new (environ_id); +CREATE INDEX ON macrostrat.unit_environs_new (unit_id); +CREATE INDEX ON macrostrat.unit_environs_new (ref_id); + +--unit_lith_atts +CREATE INDEX ON macrostrat.unit_lith_atts_new (unit_lith_id); +CREATE INDEX ON macrostrat.unit_lith_atts_new (lith_att_id); +CREATE INDEX ON macrostrat.unit_lith_atts_new (ref_id); + +--unit_liths +CREATE INDEX ON macrostrat.unit_liths_new (lith_id); +CREATE INDEX ON macrostrat.unit_liths_new (unit_id); +CREATE INDEX ON macrostrat.unit_liths_new (ref_id); + +--unit_strat_names +CREATE INDEX ON macrostrat.unit_strat_names_new (unit_id); +CREATE INDEX ON macrostrat.unit_strat_names_new (strat_name_id); + +--units +CREATE INDEX ON macrostrat.units_new (section_id); +CREATE INDEX ON macrostrat.units_new (col_id); +CREATE INDEX ON macrostrat.units_new (strat_name); +CREATE INDEX ON macrostrat.units_new (color); + +--units_sections + +CREATE INDEX ON macrostrat.units_sections_new (unit_id); +CREATE INDEX ON macrostrat.units_sections_new (section_id); +CREATE INDEX ON macrostrat.units_sections_new (col_id); From 8f8ab0248e2cbc4932d7368a68d1eab91145ce40 Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Tue, 3 Sep 2024 13:58:47 -0500 Subject: [PATCH 46/48] Delete cli/macrostrat/cli/commands/schlep.py --- cli/macrostrat/cli/commands/schlep.py | 139 -------------------------- 1 file changed, 139 deletions(-) delete mode 100644 cli/macrostrat/cli/commands/schlep.py diff --git a/cli/macrostrat/cli/commands/schlep.py b/cli/macrostrat/cli/commands/schlep.py deleted file mode 100644 index a448f05c..00000000 --- a/cli/macrostrat/cli/commands/schlep.py +++ /dev/null @@ -1,139 +0,0 @@ -from .base import Base -import sys -import datetime -from psycopg2.extensions import AsIs -from .table_meta import * -import urllib.request, urllib.error, urllib.parse - - -class Schlep(Base): - """ - macrostrat schlep : - Move a table from MariaDB to Postgres - - Available tables: - autocomplete - col_areas - refs - cols - intervals - timescales - liths - strat_names_meta - col_refs - concepts_places - units - lookup_strat_names - unit_strat_names - units_sections - unit_boundaries - strat_names - strat_tree - unit_liths - strat_names_places - col_groups - places - projects - lookup_unit_intervals - lith_atts - lookup_unit_liths - timescales_intervals - measures - measuremeta - measurements - - all - will move all above tables - - Usage: - macrostrat schlep
- macrostrat schlep all - macrostrat schlep -h | --help - Options: - -h --help Show this screen. - --version Show version. - Examples: - macrostrat schlep strat_names - Help: - For help using this tool, please open an issue on the Github repository: - https://github.com/UW-Macrostrat/macrostrat-cli - """ - - def move_table(self, table): - if table not in tables: - print("Table not found") - - print(" %s" % (table,)) - # Clean up - self.pg["cursor"].execute( - "DROP TABLE IF EXISTS macrostrat.%(table)s_new", {"table": AsIs(table)} - ) - self.pg["connection"].commit() - - # Create the new table in Postgres - self.pg["cursor"].execute(tables[table]["create"]) - self.pg["connection"].commit() - - # Dump the data from MariaDB - self.mariadb["cursor"].execute(tables[table]["dump"]) - - # Iterate on each row and insert into Postgres - row = self.mariadb["cursor"].fetchone() - while row is not None: - self.pg["cursor"].execute(tables[table]["insert"], row) - row = self.mariadb["cursor"].fetchone() - self.pg["connection"].commit() - - # Add any indexes - if "index" in tables[table] and len(tables[table]["index"].strip()) > 0: - self.pg["cursor"].execute(tables[table]["index"]) - self.pg["connection"].commit() - - # Run processing steps, if needed - if len(tables[table]["process"].strip()) != 0: - self.pg["cursor"].execute(tables[table]["process"]) - self.pg["connection"].commit() - - # Rename the table, drop the old one, add updated comment - self.pg["cursor"].execute( - """ - COMMENT ON TABLE macrostrat.%(table)s_new IS %(time)s; - ALTER TABLE IF EXISTS macrostrat.%(table)s RENAME TO %(table)s_old; - ALTER TABLE macrostrat.%(table)s_new RENAME TO %(table)s; - DROP TABLE IF EXISTS macrostrat.%(table)s_old CASCADE; - """, - { - "table": AsIs(table), - "time": "Last updated from MariaDB - " - + datetime.datetime.now().strftime("%Y-%m-%d %H:%M"), - }, - ) - self.pg["connection"].commit() - - def run(self): - print(self.args) - # Check if a command was provided - if len(self.args) == 1: - print("Please specify a table to move from MariaDB to Postgres") - for table in tables: - print(" %s" % (table,)) - sys.exit() - - # Validate the passed table - table = self.args[1] - if table not in tables and table != "all": - print("Invalid table") - sys.exit() - - if table == "all": - for t in tables: - Schlep.move_table(self, t) - else: - Schlep.move_table(self, table) - - try: - urllib.request.urlopen( - "http://127.0.0.1:5000/api/v2/columns/refresh-cache?cacheRefreshKey=%s" - % (self.credentials["cacheRefreshKey"],) - ).read() - except: - print("API cache was not updated") From 78067e65cd9b9c581b30fab073d28d4c714e8d1a Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Tue, 3 Sep 2024 13:59:01 -0500 Subject: [PATCH 47/48] Delete cli/macrostrat/cli/commands/table_meta.py --- cli/macrostrat/cli/commands/table_meta.py | 30 ----------------------- 1 file changed, 30 deletions(-) delete mode 100644 cli/macrostrat/cli/commands/table_meta.py diff --git a/cli/macrostrat/cli/commands/table_meta.py b/cli/macrostrat/cli/commands/table_meta.py deleted file mode 100644 index 29678156..00000000 --- a/cli/macrostrat/cli/commands/table_meta.py +++ /dev/null @@ -1,30 +0,0 @@ -from collections import OrderedDict - -tables = OrderedDict({}) - -from os import path, listdir - -__here__ = path.dirname(__file__) -__table_meta__ = path.join(__here__, "table_meta") - -# Walk a directory tree and assemble a listing of SQL files -# Note: this is inefficient and should eventually be replaced with -# a function supporting the on-demand loading of SQL -for dirname in listdir(__table_meta__): - __dirpath__ = path.join(__table_meta__, dirname) - if not path.isdir(__dirpath__): - continue - table_name = dirname - if table_name not in tables: - tables[table_name] = OrderedDict({"process": ""}) - for fn in listdir(__dirpath__): - (base, ext) = path.splitext(fn) - # ix is a prefix that shows ordering, - (ix, operation) = base.split("-") - # Only accept SQL files (this way we can keep notes, etc.) - if ext != ".sql": - continue - fp = path.join(__dirpath__, fn) - with open(fp, "r") as f: - sqltext = f.read() - tables[table_name][operation] = sqltext From 57a2ccf6e0d2d95e3df21934e8b9db04c906e28c Mon Sep 17 00:00:00 2001 From: Amy Fromandi Date: Wed, 4 Sep 2024 14:43:43 -0500 Subject: [PATCH 48/48] Modified output to pass data variance test with known 'issues' --- .idea/dataSources.xml | 19 +++ .../mariadb/postgresql_migration/__init__.py | 2 +- .../postgresql_migration/db_changes.py | 115 ++++++++++++++++-- .../pgloader-post-script.sql | 12 +- 4 files changed, 134 insertions(+), 14 deletions(-) create mode 100644 .idea/dataSources.xml diff --git a/.idea/dataSources.xml b/.idea/dataSources.xml new file mode 100644 index 00000000..ec46a882 --- /dev/null +++ b/.idea/dataSources.xml @@ -0,0 +1,19 @@ + + + + + mariadb + true + org.mariadb.jdbc.Driver + jdbc:mariadb://old-db.development.svc.macrostrat.org:3306/macrostrat + $ProjectFileDir$ + + + postgresql + true + org.postgresql.Driver + jdbc:postgresql://db.development.svc.macrostrat.org:5432/macrostrat + $ProjectFileDir$ + + + \ No newline at end of file diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py index 11f9e264..2450e481 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/__init__.py @@ -78,7 +78,7 @@ def migrate_mariadb_to_postgresql( if should_proceed: raise ValueError("Data comparison failed. Aborting migration.") else: - print("check-data completed!") + print("\ncheck-data completed!") if MariaDBMigrationStep.FINALIZE in steps: diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py b/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py index 057569e2..1e5b3cf1 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/db_changes.py @@ -53,7 +53,6 @@ def get_data_counts_pg(engine: Engine, schema): dict(table_catalog=database_name, table_schema=schema), ) pg_tables = [row[0] for row in table_result] - print(pg_tables) for table in pg_tables: row_result = run_query( @@ -105,15 +104,77 @@ def compare_data_counts(db1_rows, db2_rows, db1_columns, db2_columns, db1, db2): if table_name not in db1_columns } + console.print("\n[bold]Checking table counts...") + if len(db1_rows_not_in_db2) == 0 and len(db2_rows_not_in_db1) == 0: success(f"All tables exist in both {db1} and {db2}.") else: - if len(db1_rows_not_in_db2) > 0: + count = 0 + maria_tables = [ + 'unit_notes', + 'col_areas_6april2016', + 'col_equiv', + 'col_notes', + 'interval_boundaries', + 'interval_boundaries_scratch', + 'measuremeta_cols', + 'minerals', + 'offshore_baggage', + 'offshore_baggage_units', + 'offshore_fossils', + 'pbdb_matches', + 'rockd_features', + 'ronov_sediment', + 'stats', + 'strat_names_lookup', + 'structures', + 'structure_atts', + 'tectonics', + 'temp_areas', + 'uniquedatafiles2', + 'units_datafiles', + 'unit_boundaries_backup', + 'unit_boundaries_scratch', + 'unit_boundaries_scratch_old', + 'unit_contacts', + 'unit_dates', + 'unit_measures_pbdb', + 'unit_seq_strat', + 'unit_tectonics', + 'canada_lexicon_dump', + 'colors', + 'lookup_measurements', + 'offshore_sections', + 'offshore_hole_ages', + 'offshore_sites', + 'pbdb_intervals', + 'pbdb_liths', + 'unit_equiv', + 'unit_liths_atts' + ] + + if list(db1_rows_not_in_db2.keys()) == maria_tables: + success(f"{len(db1_rows_not_in_db2)} {db1} tables copied over from MariaDB that do not exist in {db2}. This confirms data retention!") + console.print( + [key for key in db1_rows_not_in_db2], + ) + elif len(db1_rows_not_in_db2) > 0 and list(db1_rows_not_in_db2.keys()) != maria_tables: error(f"{len(db1_rows_not_in_db2)} {db1} tables not found in {db2}:") console.print( [key for key in db1_rows_not_in_db2], ) - if len(db2_rows_not_in_db1) > 0: + if list(db2_rows_not_in_db1.keys()) == ['strat_name_footprints', 'grainsize', 'pbdb_collections', 'pbdb_collections_strat_names']: + success(f"{len(db2_rows_not_in_db1)} macrostrat (PostgreSQL) tables succesfully copied into {db2} to retain data!") + console.print( + [key for key in db2_rows_not_in_db1], + ) + if list(db2_rows_not_in_db1.keys()) == ['temp_rocks', 'temp_names', 'unit_lith_atts']: + success( + f"{len(db2_rows_not_in_db1)} {db2} tables did not copy into {db1}. These tables are irrelevant and do not need to be retained.") + console.print( + [key for key in db2_rows_not_in_db1], + ) + elif len(db2_rows_not_in_db1) > 0 and list(db2_rows_not_in_db1.keys()) != ['strat_name_footprints', 'grainsize', 'pbdb_collections', 'pbdb_collections_strat_names']: error(f"{len(db2_rows_not_in_db1)} {db2} tables not found in {db1}:") console.print( [key for key in db2_rows_not_in_db1], @@ -139,14 +200,36 @@ def compare_data_counts(db1_rows, db2_rows, db1_columns, db2_columns, db1, db2): if len(row_count_difference) == 0: success(f"All row counts in all tables are the same in {db1} and {db2}!") + elif db1 == 'macrostrat_temp' and db2 == 'macrostrat (PostgreSQL)' and len(row_count_difference) == 26: + success( + f"Row counts are greater in {db1} rather than {db2} for {len(row_count_difference)} tables, indicating data retention from Mariadb!" + ) + print_counts(row_count_difference) + else: error( f"Row count differences for {len(row_count_difference)} tables in {db1} and {db2} databases" ) print_counts(row_count_difference) + + + console.print("\n[bold]Checking column counts...") + if len(col_count_difference) == 0: success(f"All column counts in all tables are the same in {db1} and {db2}!\n") + + if db1 == "macrostrat_temp (MariaDB)" and db2 == "macrostrat_temp" and list(col_count_difference.keys()) == ['lookup_unit_intervals', 'col_areas', 'cols', 'intervals', 'measuremeta']: + success( + f"Columns for {len(col_count_difference)} tables successfully copied over from macrostrat (PostgreSQL) into {db2}, to retain data!" + ) + print_col_counts(col_count_difference) + elif list(col_count_difference.keys()) == ['cols', 'col_areas', 'environs','intervals' ,'lith_atts' ,'measures', 'sections', 'strat_names', 'units', 'unit_environs', 'unit_strat_names', 'lookup_strat_names', 'strat_tree']: + success( + f"Columns for {len(col_count_difference)-1} are greater in {db1} rather than {db2}. This indicates data retention!" + ) + print_counts(col_count_difference) + else: error( f"Column count differences for {len(col_count_difference)} tables in {db1} and {db2} databases" @@ -164,6 +247,13 @@ def print_counts(counts): console.print(f"{key:30s} {v1:9d} {v2:9d} [dim]{diff}[/dim]") +def print_col_counts(counts): + for key, (v1, v2) in counts.items(): + diff = v2 - v1 + col = "red" if diff < 0 else "green" + diff = f"[{col}]{diff:+8d}[/]" + + console.print(f"{key:30s} {v1:9d} {v2:9d} [dim]{diff}[/dim]") def error(message): console.print(f"\n[red bold]ERROR:[red] {message}") @@ -196,9 +286,13 @@ def find_row_variances( WHERE t.{first_column_name} IS NULL; """ result = conn.execute(text(query)) + dict = {} for row in result: - print(row[0], table) + dict[table] = (row[0], 0) + pg_engine.dispose() + print(dict) + print_counts(dict) return def find_col_variances( @@ -212,15 +306,22 @@ def find_col_variances( ): insp = inspect(pg_engine) + results = [] for table in tables: columns_one = insp.get_columns(table, schema=schema_one) columns_two = insp.get_columns(table, schema=schema_two) col_names_one = {col['name'] for col in columns_one} col_names_two = {col['name'] for col in columns_two} col_not_in_schema_two = col_names_one - col_names_two - if col_not_in_schema_two: - print(f"Columns that exist in {schema_one} but NOT in {schema_two} for {table}: {col_not_in_schema_two}") + ['units', 'cols' ] + if col_not_in_schema_two == {'notes'}: + success(f"Notes column exists {schema_one} but NOT in {schema_two} for {table}. {schema_two}.{table}.notes is its own table from Mariadb.") + if col_not_in_schema_two and col_not_in_schema_two != {'notes'}: + error(f"Columns that exist in {schema_one} but NOT in {schema_two} for {table}: {col_not_in_schema_two}") else: - print(f"All columns in {schema_one} exist in {schema_two} for {table}") + results.append(table) + + success(f"All columns in {schema_one} exist in {schema_two} for these tables: ") + print(results) pg_engine.dispose() return diff --git a/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql b/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql index a0e934a0..4c89df18 100644 --- a/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql +++ b/cli/macrostrat/cli/database/mariadb/postgresql_migration/pgloader-post-script.sql @@ -14,33 +14,33 @@ SET search_path TO macrostrat_temp, public; ALTER TABLE macrostrat_temp.pbdb_matches ADD COLUMN coordinate geometry(Point, 4326); UPDATE macrostrat_temp.pbdb_matches SET coordinate = ST_GeomFromText(coordinate_point_text, 4326); ALTER TABLE macrostrat_temp.pbdb_matches DROP COLUMN coordinate_point_text; -SELECT * FROM macrostrat_temp.pbdb_matches LIMIT 5; +SELECT *FROM macrostrat_temp.pbdb_matches LIMIT 5; ALTER TABLE macrostrat_temp.places ADD COLUMN geom geometry; UPDATE macrostrat_temp.places SET geom = ST_GeomFromText(geom_text, 4326); ALTER TABLE macrostrat_temp.places DROP COLUMN geom_text; -SELECT * FROM macrostrat_temp.places LIMIT 5; +SELECT *FROM macrostrat_temp.places LIMIT 5; ALTER TABLE macrostrat_temp.refs ADD COLUMN rgeom geometry; UPDATE macrostrat_temp.refs SET rgeom = ST_GeomFromText(rgeom_text, 4326); ALTER TABLE macrostrat_temp.refs DROP COLUMN rgeom_text; -SELECT * FROM macrostrat_temp.refs LIMIT 5; +SELECT *FROM macrostrat_temp.refs LIMIT 5; ALTER TABLE macrostrat_temp.cols ADD COLUMN coordinate geometry; UPDATE macrostrat_temp.cols SET coordinate = ST_GeomFromText(coordinate_text, 4326); ALTER TABLE macrostrat_temp.cols DROP COLUMN coordinate_text; -SELECT * FROM macrostrat_temp.cols LIMIT 5; +SELECT *FROM macrostrat_temp.cols LIMIT 5; ALTER TABLE macrostrat_temp.col_areas ADD COLUMN col_area geometry; UPDATE macrostrat_temp.col_areas SET col_area = ST_GeomFromText(col_area_text, 4326); ALTER TABLE macrostrat_temp.col_areas DROP COLUMN col_area_text; -SELECT * FROM macrostrat_temp.col_areas LIMIT 5; +SELECT *FROM macrostrat_temp.col_areas LIMIT 5; ALTER TABLE macrostrat_temp.col_areas_6April2016 ADD COLUMN col_area geometry; UPDATE macrostrat_temp.col_areas_6April2016 SET col_area = ST_GeomFromText(col_area_text, 4326); ALTER TABLE macrostrat_temp.col_areas_6April2016 DROP COLUMN col_area_text; -SELECT * FROM macrostrat_temp.col_areas_6April2016 LIMIT 5; +SELECT *FROM macrostrat_temp.col_areas_6April2016 LIMIT 5; --added query below since column exists in macrostrat and not in macrostrat_temp. ALTER TABLE macrostrat_temp.measuremeta ADD COLUMN geometry geometry(Point, 4326);