From 3dbd13c96dc80f4ef066cb705d9f66d0ff36b6a4 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Thu, 21 Sep 2023 11:31:29 -0500 Subject: [PATCH] Add sslmode require to backfill scripts (#32) * Create backfill_talk_comments.py * backfill classifications * Update backfill_classifications.py * add user_group_membership_backfill and update select query of backfilling talk comments and classifications * add update classification events * add sslmode require to connection strings --- scripts/backfill_classifications.py | 2 +- scripts/backfill_talk_comments.py | 2 +- scripts/user_group_membership_classification_backfill.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/backfill_classifications.py b/scripts/backfill_classifications.py index 53a2ca7..98d7b8d 100644 --- a/scripts/backfill_classifications.py +++ b/scripts/backfill_classifications.py @@ -18,7 +18,7 @@ current_time = now.strftime("%H:%M:%S") print("CLASSIFICATIONS backfill BEFORE Time =", current_time) -with psycopg.connect(f"host={PANOPTES_CONN} port={PANOPTES_PORT} dbname={PANOPTES_DB} user={PANOPTES_USER} password={PANOPTES_PW}") as panoptes_db_conn, psycopg.connect(f"host={TIMESCALE_CONNECTION} port={TIMESCALE_PORT} dbname={ERAS_DB} user={ERAS_USER} password={ERAS_PW}") as timescale_db_conn: +with psycopg.connect(f"host={PANOPTES_CONN} port={PANOPTES_PORT} dbname={PANOPTES_DB} user={PANOPTES_USER} password={PANOPTES_PW} sslmode=require") as panoptes_db_conn, psycopg.connect(f"host={TIMESCALE_CONNECTION} port={TIMESCALE_PORT} dbname={ERAS_DB} user={ERAS_USER} password={ERAS_PW} sslmode=require") as timescale_db_conn: with panoptes_db_conn.cursor(name="panoptes_cursor").copy("COPY (select id as classification_id, created_at as event_time, updated_at as classification_updated_at, TO_TIMESTAMP(metadata ->> 'started_at', 'YYYY-MM-DD HH24:MI:SS') as started_at, TO_TIMESTAMP(metadata ->> 'finished_at', 'YYYY-MM-DD HH24:MI:SS') as finished_at, project_id, workflow_id, user_id, string_to_array(replace(replace(replace(metadata ->> 'user_group_ids', '[', ''), ']', ''), ' ', '' ), ',')::int[] as user_group_ids, EXTRACT(EPOCH FROM TO_TIMESTAMP(metadata ->> 'finished_at', 'YYYY-MM-DD HH24:MI:SS') - TO_TIMESTAMP(metadata ->> 'started_at', 'YYYY-MM-DD HH24:MI:SS')) as session_time, created_at, updated_at from classifications where id < %s) TO STDOUT (FORMAT BINARY)", (FIRST_INGESTED_CLASSIFICATION_ID,)) as panoptes_copy: with timescale_db_conn.cursor().copy("COPY classification_events FROM STDIN (FORMAT BINARY)") as timescale_copy: for data in panoptes_copy: diff --git a/scripts/backfill_talk_comments.py b/scripts/backfill_talk_comments.py index 2cfb791..8b48b9b 100644 --- a/scripts/backfill_talk_comments.py +++ b/scripts/backfill_talk_comments.py @@ -14,7 +14,7 @@ FIRST_INGESTED_COMMENT_ID = os.getenv('FIRST_COMMENT_ID') -with psycopg.connect(f"host={TALK_CONN} port={TALK_PORT} dbname={TALK_DB} user={TALK_USER} password={TALK_PW}") as talk_db_conn, psycopg.connect(f"host={TIMESCALE_CONNECTION} port={TIMESCALE_PORT} dbname={ERAS_DB} user={ERAS_USER} password={ERAS_PW}") as timescale_db_conn: +with psycopg.connect(f"host={TALK_CONN} port={TALK_PORT} dbname={TALK_DB} user={TALK_USER} password={TALK_PW} sslmode=require") as talk_db_conn, psycopg.connect(f"host={TIMESCALE_CONNECTION} port={TIMESCALE_PORT} dbname={ERAS_DB} user={ERAS_USER} password={ERAS_PW} sslmode=require") as timescale_db_conn: with talk_db_conn.cursor(name="talk").copy("COPY (SELECT id as comment_id, created_at as event_time, updated_at as comment_updated_at, project_id, user_id, created_at, updated_at from comments where id < %s}) TO STDOUT (FORMAT BINARY)", (FIRST_INGESTED_COMMENT_ID,)) as talk_copy: with timescale_db_conn.cursor().copy("COPY comment_events FROM STDIN (FORMAT BINARY)") as timescale_copy: for data in talk_copy: diff --git a/scripts/user_group_membership_classification_backfill.py b/scripts/user_group_membership_classification_backfill.py index c168474..9b26a50 100644 --- a/scripts/user_group_membership_classification_backfill.py +++ b/scripts/user_group_membership_classification_backfill.py @@ -28,10 +28,10 @@ # email formats in form of comma separated string with no spaces (eg. "%a.com,%b.org%") email_formats = args.email_domain_formats -panoptes_db_conn = psycopg.connect(f"host={PANOPTES_CONN} port={PANOPTES_PORT} dbname={PANOPTES_DB} user={PANOPTES_USER} password={PANOPTES_PW}") +panoptes_db_conn = psycopg.connect(f"host={PANOPTES_CONN} port={PANOPTES_PORT} dbname={PANOPTES_DB} user={PANOPTES_USER} password={PANOPTES_PW} sslmode=require") panoptes_cursor = panoptes_db_conn.cursor() -eras_conn = psycopg.connect(f"host={TIMESCALE_CONNECTION} port={TIMESCALE_PORT} dbname={ERAS_DB} user={ERAS_USER} password={ERAS_PW}") +eras_conn = psycopg.connect(f"host={TIMESCALE_CONNECTION} port={TIMESCALE_PORT} dbname={ERAS_DB} user={ERAS_USER} password={ERAS_PW} sslmode=require") eras_cursor = eras_conn.cursor() # get ids of users that are not in group yet