From 3700e8fbe956ab962bfa01914c7716684fe0fa43 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Thu, 19 Oct 2023 10:54:33 -0500 Subject: [PATCH] add log on copying over to target db so we can keep track of what files have been copied over. (#45) * update classifications backfill script to chunk and save in file * update to <= remove unused limit in query * update copy into source * split backfill to file creation then copy from files * cast to int * revert accidental adding commas on limit * add keepalives to hopefully ensure connection does not get lost * remove order by desc * update to use the correct query * add log to keep track of which files have finished being copied over --- scripts/copy_classifications_from_files.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/copy_classifications_from_files.py b/scripts/copy_classifications_from_files.py index 1a32079..7595b6d 100644 --- a/scripts/copy_classifications_from_files.py +++ b/scripts/copy_classifications_from_files.py @@ -21,6 +21,7 @@ with psycopg.connect(f"host={TIMESCALE_CONNECTION} port={TIMESCALE_PORT} dbname={ERAS_DB} user={ERAS_USER} password={ERAS_PW} sslmode=require keepalives=1 keepalives_idle=30 keepalives_interval=10 keepalives_count=20") as timescale_db_conn: with timescale_db_conn.cursor(name="timescale_cursor").copy("COPY classification_events FROM STDIN DELIMITER ',' CSV HEADER") as timescale_copy: timescale_copy.write(open(f"prod_classifications_{output_file_no}.csv").read()) + print("FINISHED COPYING FILE #", output_file_no) output_file_no += 1 finish_time = datetime.now()