Skip to content

Commit

Permalink
Refactor database session initialization in PDF upload and ingestion …
Browse files Browse the repository at this point in the history
…scripts to use get_db_engine for improved database connection management.
  • Loading branch information
quang-ng committed Dec 5, 2024
1 parent d354d0f commit 1f381f8
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 5 deletions.
5 changes: 2 additions & 3 deletions dsst_etl/upload_pdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import psycopg2
import sqlalchemy

from dsst_etl import __version__, logger
from dsst_etl import __version__, get_db_engine, logger
from dsst_etl._utils import get_bucket_name, get_compute_context_id
from dsst_etl.db import get_db_session
from dsst_etl.models import Documents, Provenance, Works
Expand Down Expand Up @@ -188,8 +188,7 @@ def upload_directory(pdf_directory_path: str, comment: Optional[str] = None) ->
if not pdf_files:
logger.warning(f"No PDF files found in {pdf_directory_path}")
return

uploader = PDFUploader(get_db_session())
uploader = PDFUploader(get_db_session(get_db_engine()))

# Upload PDFs
successful_uploads, failed_uploads = uploader.upload_pdfs(pdf_files)
Expand Down
3 changes: 2 additions & 1 deletion scripts/ingest_pdfs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
import sys
from dsst_etl import get_db_engine
from dsst_etl.extract import extract_data_from_pdf_dir
from dsst_etl.transform import transform_data
from dsst_etl.load import load_data
Expand All @@ -22,7 +23,7 @@ def main():

try:
# Initialize database session
db_session = get_db_session()
db_session = get_db_session(get_db_engine())
logger.info({"message": "Database session initialized."})

# Step 1: Extract data
Expand Down
3 changes: 2 additions & 1 deletion scripts/run_upload_rtransparent_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
from dsst_etl import get_db_engine
from dsst_etl.db import get_db_session
from dsst_etl.upload_rtransparent_data import RTransparentDataUploader

Expand All @@ -9,7 +10,7 @@ def main():

args = parser.parse_args()

uploader = RTransparentDataUploader(get_db_session())
uploader = RTransparentDataUploader(get_db_session(get_db_engine()))

uploader.upload_data(args.input_file)

Expand Down

0 comments on commit 1f381f8

Please sign in to comment.