diff --git a/dsst_etl/upload_pdfs.py b/dsst_etl/upload_pdfs.py index 6f26de2..53bb308 100644 --- a/dsst_etl/upload_pdfs.py +++ b/dsst_etl/upload_pdfs.py @@ -7,7 +7,7 @@ import psycopg2 import sqlalchemy -from dsst_etl import __version__, logger +from dsst_etl import __version__, get_db_engine, logger from dsst_etl._utils import get_bucket_name, get_compute_context_id from dsst_etl.db import get_db_session from dsst_etl.models import Documents, Provenance, Works @@ -188,8 +188,7 @@ def upload_directory(pdf_directory_path: str, comment: Optional[str] = None) -> if not pdf_files: logger.warning(f"No PDF files found in {pdf_directory_path}") return - - uploader = PDFUploader(get_db_session()) + uploader = PDFUploader(get_db_session(get_db_engine())) # Upload PDFs successful_uploads, failed_uploads = uploader.upload_pdfs(pdf_files) diff --git a/scripts/ingest_pdfs.py b/scripts/ingest_pdfs.py index 660e41d..ebe56a0 100644 --- a/scripts/ingest_pdfs.py +++ b/scripts/ingest_pdfs.py @@ -1,5 +1,6 @@ import argparse import sys +from dsst_etl import get_db_engine from dsst_etl.extract import extract_data_from_pdf_dir from dsst_etl.transform import transform_data from dsst_etl.load import load_data @@ -22,7 +23,7 @@ def main(): try: # Initialize database session - db_session = get_db_session() + db_session = get_db_session(get_db_engine()) logger.info({"message": "Database session initialized."}) # Step 1: Extract data diff --git a/scripts/run_upload_rtransparent_data.py b/scripts/run_upload_rtransparent_data.py index a62350d..16eb7f9 100644 --- a/scripts/run_upload_rtransparent_data.py +++ b/scripts/run_upload_rtransparent_data.py @@ -1,4 +1,5 @@ import argparse +from dsst_etl import get_db_engine from dsst_etl.db import get_db_session from dsst_etl.upload_rtransparent_data import RTransparentDataUploader @@ -9,7 +10,7 @@ def main(): args = parser.parse_args() - uploader = RTransparentDataUploader(get_db_session()) + uploader = RTransparentDataUploader(get_db_session(get_db_engine())) uploader.upload_data(args.input_file)