Skip to content

Commit

Permalink
Merge pull request #95 from UW-Macrostrat/migrations-rework
Browse files Browse the repository at this point in the history
Fix paleogeography subsystem; small improvements to migrations management
  • Loading branch information
davenquinn authored Sep 13, 2024
2 parents c49a187 + 7472d5f commit 4a1949d
Show file tree
Hide file tree
Showing 18 changed files with 429 additions and 209 deletions.
9 changes: 6 additions & 3 deletions cli/macrostrat/cli/database/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@ def update_schema(

db_subsystem = app.subsystems.get("database")

if subsystems is None:
subsystems = []

"""Create schema additions"""
schema_dir = fixtures_dir
# Loaded from env file
Expand Down Expand Up @@ -350,8 +353,8 @@ def inspect_table(table: str):


@db_app.command(name="scripts", rich_help_panel="Schema management")
def run_migration(migration: str = Argument(None)):
"""Ad-hoc migration scripts"""
def run_scripts(migration: str = Argument(None)):
"""Ad-hoc database management scripts"""
pth = Path(__file__).parent.parent / "ad-hoc-migrations"
files = list(pth.glob("*.sql"))
files.sort()
Expand All @@ -363,7 +366,7 @@ def run_migration(migration: str = Argument(None)):
exit(1)
migration = pth / (migration + ".sql")
if not migration.exists():
print(f"Migration {migration} does not exist", file=stderr)
print(f"Script {migration} does not exist", file=stderr)
exit(1)

db = get_db()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,18 @@
from textwrap import dedent

from psycopg2.sql import Identifier
from sqlalchemy import create_engine, inspect, text
from sqlalchemy.engine import Engine, make_url
from sqlalchemy import create_engine
from sqlalchemy.engine import Engine

from macrostrat.core import app
from macrostrat.core.config import settings
from macrostrat.database import create_database, database_exists, drop_database
from macrostrat.database import create_database, database_exists
from macrostrat.database.utils import run_query, run_sql
from macrostrat.utils import get_logger
from macrostrat.utils.shell import run

from ...._dev.utils import raw_database_url
from ..._legacy import get_db
from ...migrations import migration_has_been_run
from ...utils import docker_internal_url, pg_temp_user
from ..restore import copy_mariadb_database
from ..utils import mariadb_engine
Expand Down Expand Up @@ -48,7 +48,9 @@ class MariaDBMigrationStep(Enum):


def migrate_mariadb_to_postgresql(
overwrite: bool = False, step: list[MariaDBMigrationStep] = None
overwrite: bool = False,
step: list[MariaDBMigrationStep] = None,
force: bool = False,
):
"""Migrate the legacy Macrostrat database from MariaDB to PostgreSQL."""

Expand Down Expand Up @@ -84,10 +86,20 @@ def migrate_mariadb_to_postgresql(

if MariaDBMigrationStep.FINALIZE in steps:
should_proceed = preserve_macrostrat_data(pg_engine)
if should_proceed:
raise NotImplementedError("Copy to macrostrat schema not yet implemented")
else:
print("finalize completed!")
if not should_proceed:
raise ValueError("Data would be destroyed. Aborting migration.")
if should_proceed and not force:
raise ValueError(
"This will overwrite the macrostrat schema. Use --force to proceed."
)

get_db().run_sql(
"ALTER SCHEMA {temp_schema} RENAME TO {final_schema}",
dict(
temp_schema=Identifier(temp_schema),
final_schema=Identifier(final_schema),
),
)


def pgloader(source: Engine, dest: Engine, target_schema: str, overwrite: bool = False):
Expand Down Expand Up @@ -302,6 +314,14 @@ def preserve_macrostrat_data(engine: Engine):
preserve_data = __here__ / "preserve-macrostrat-data.sql"
run_sql(engine, preserve_data)

# Check if necessary migrations are satisfied
did_migrate = migration_has_been_run("maps-scale-type")
if not did_migrate:
raise ValueError(
"Migration 'maps-scale-type' has not been run. Please run `macrostrat db migrations` before proceeding."
)
return True


def db_identifier(engine: Engine):
driver = engine.url.drivername
Expand Down
17 changes: 17 additions & 0 deletions cli/macrostrat/cli/database/migrations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
macrostrat_mariadb,
map_source_slugs,
map_sources,
maps_scale_custom_type,
maps_source_operations,
partition_carto,
partition_maps,
Expand Down Expand Up @@ -138,3 +139,19 @@ def run_migrations(
# Short circuit after applying the migration specified by --name
if name is not None and name == _name:
break


def migration_has_been_run(*names: str):
db = get_db()
migrations = Migration.__subclasses__()

available_migrations = {m.name for m in migrations}
if not set(names).issubset(available_migrations):
raise ValueError(f"Unknown migrations: {set(names) - available_migrations}")

for _migration in migrations:
if _migration.name in names:
apply_status = _migration.should_apply(db)
if apply_status != ApplicationStatus.APPLIED:
return True
return False
5 changes: 5 additions & 0 deletions cli/macrostrat/cli/database/migrations/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ def has_fks(schema: str, *table_names: str) -> DbEvaluator:
)


def custom_type_exists(schema: str, *type_names: str) -> DbEvaluator:
"""Return a function that evaluates to true when every given custom type in the given schema exists"""
return lambda db: all(db.inspector.has_type(t, schema=schema) for t in type_names)


class ApplicationStatus(Enum):
"""Enum for the possible"""

Expand Down
4 changes: 2 additions & 2 deletions cli/macrostrat/cli/database/migrations/baseline/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ class BaselineMigration(Migration):
name = "baseline"
subsystem = "core"
description = """
Starting from an empty database, create the baseline macrostrat schemas as of 2023-08-29.
Starting from an empty database, create the baseline macrostrat schemas as of 2023-08-29.
"""

# Basic sanity check, just confirm that the first table created in the migration is present
postconditions = [exists("carto", "flat_large")]
postconditions = [exists("maps", "sources")]
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from macrostrat.database import Database

from ..base import Migration, custom_type_exists


class MapsScaleCustomTypeMigration(Migration):
name = "maps-scale-type"
subsystem = "maps"
description = """
Relocate custom type that drives the maps schema
"""

depends_on = ["baseline", "macrostrat-mariadb"]

postconditions = [custom_type_exists("maps", "map_scale")]

preconditions = []

def apply(self, db: Database):
# Handle edge case where the MariaDB migration has already been applied
db.run_sql("ALTER TYPE macrostrat_backup.map_scale SET SCHEMA macrostrat")
db.run_sql("ALTER TYPE macrostrat.map_scale SET SCHEMA maps")
8 changes: 8 additions & 0 deletions cli/macrostrat/cli/sql-scripts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Ad-hoc SQL scripts

This directory contains ad-hoc SQL scripts that can be run on demand.
These scripts aren't yet structured well enough to be included in the
main codebase, but they are useful for one-off tasks.

Ideally, these should eventually be integrated into data maintenance scripts
or migrations.
4 changes: 2 additions & 2 deletions cli/macrostrat/cli/subsystems/paleogeography/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from os import environ
from sys import stderr

from typer import Context, Typer

from macrostrat.app_frame.control_command import ControlCommand

from ...database import get_db
from .corelle import build_carto_plate_index, create_corelle_fixtures

Expand All @@ -14,6 +13,7 @@ def load_paleogeography_subsystem(app, main, db_subsystem):
from corelle.engine import cli as corelle_cli
from corelle.engine.database import initialize
except ImportError as err:
print("Corelle subsystem not available", err, file=stderr)
return app

paleo_app = Typer(name="paleogeography", no_args_is_help=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ JOIN corelle.plate_polygon pp
-- Create a needed index
CREATE UNIQUE INDEX plate_polygon_unique_idx ON corelle.plate_polygon (id, plate_id, model_id);

-- Pre-split carto layers
-- Pre-split carto layers
CREATE TABLE IF NOT EXISTS corelle_macrostrat.carto_plate_index (
map_id integer NOT NULL,
scale macrostrat.map_scale NOT NULL,
scale maps.map_scale NOT NULL,
model_id integer NOT NULL,
plate_id integer NOT NULL,
plate_polygon_id integer NOT NULL,
Expand Down Expand Up @@ -99,7 +99,7 @@ BEGIN
8
);
END;
$$ LANGUAGE plpgsql IMMUTABLE;
$$ LANGUAGE plpgsql IMMUTABLE;

CREATE OR REPLACE FUNCTION corelle_macrostrat.antimeridian_split(
geom geometry
Expand Down Expand Up @@ -180,4 +180,4 @@ $$ LANGUAGE sql IMMUTABLE;

-- Drop outdated functions
DROP FUNCTION IF EXISTS corelle_macrostrat.rotate(geometry, numeric[], boolean);
DROP FUNCTION IF EXISTS corelle_macrostrat.rotated_web_mercator_proj(numeric[]);
DROP FUNCTION IF EXISTS corelle_macrostrat.rotated_web_mercator_proj(numeric[]);
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ lines bytea;
tolerance double precision;
_t_step integer;
_model_id integer;
_scale macrostrat.map_scale;
_scale maps.map_scale;

BEGIN

Expand Down Expand Up @@ -55,7 +55,7 @@ tolerance := 6;
projected_bbox := ST_Transform(mercator_bbox, 4326);

WITH rotated_plates AS (
SELECT
SELECT
pp.plate_id,
pp.model_id,
p.name,
Expand Down Expand Up @@ -110,7 +110,7 @@ units AS (
ON u.map_id = cpi.map_id
AND u.scale = _scale
-- This causes tile-boundary errors
WHERE _scale = 'tiny'::macrostrat.map_scale
WHERE _scale = 'tiny'::maps.map_scale
OR ST_Intersects(coalesce(cpi.geom, u.geom), tile_geom)
),
bedrock_ AS (
Expand Down Expand Up @@ -144,7 +144,7 @@ plates_ AS (
FROM relevant_plates
)
SELECT
(SELECT ST_AsMVT(plates_, 'plates') FROM plates_) ||
(SELECT ST_AsMVT(plates_, 'plates') FROM plates_) ||
(SELECT ST_AsMVT(bedrock_, 'units') FROM bedrock_)
INTO result;

Expand Down Expand Up @@ -273,4 +273,4 @@ FROM expanded;
RETURN bedrock || plates;

END;
$$ LANGUAGE plpgsql IMMUTABLE;
$$ LANGUAGE plpgsql IMMUTABLE;
2 changes: 1 addition & 1 deletion cli/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dynaconf = "^3.1.12"
geopandas = "^0.14.1"
ipython = "^8.5.0"
"macrostrat.app-frame" = "^2.0.0"
"macrostrat.database" = "^3.3.2"
"macrostrat.database" = "^3.3.3"
"macrostrat.utils" = "^1.2.2"
"macrostrat.dinosaur" = "^3.0.1"
"macrostrat.core" = { path = "../core", develop = true }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ INSERT INTO maps.polygons (
b_interval,
geom
)
SELECT
SELECT
source_id,
{scale}::macrostrat.map_scale,
{scale}::maps.map_scale,
orig_id,
name,
strat_name,
Expand All @@ -44,9 +44,9 @@ INSERT INTO maps.lines (
descrip,
geom
)
SELECT
SELECT
source_id,
{scale}::macrostrat.map_scale,
{scale}::maps.map_scale,
orig_id,
name,
type,
Expand All @@ -69,7 +69,7 @@ INSERT INTO maps.points (
geom,
orig_id
)
SELECT
SELECT
source_id,
strike,
dip,
Expand All @@ -81,4 +81,4 @@ SELECT
orig_id
FROM {points_table}
WHERE source_id = {source_id}
AND NOT coalesce(omit, false);
AND NOT coalesce(omit, false);
Loading

0 comments on commit 4a1949d

Please sign in to comment.