Skip to content

Commit

Permalink
Merge pull request #71 from Sage-Bionetworks/bwmac/AG-1041/add_timest…
Browse files Browse the repository at this point in the history
…amp_logging

[AG-1041] Adds timestamp logging for processing runs
  • Loading branch information
BWMac authored May 5, 2023
2 parents 7569774 + 945b8cf commit 170f0d4
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 2 deletions.
9 changes: 9 additions & 0 deletions src/agoradatatools/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import sys
import logging

logging.basicConfig(
stream=sys.stdout,
level=logging.INFO,
format="%(levelname)s: %(asctime)s | %(name)s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
62 changes: 62 additions & 0 deletions src/agoradatatools/logs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import time
import logging


def format_seconds(seconds):
"""Converts a floating-point seconds value to format hh:mm:ss format."""
minutes, seconds = divmod(int(seconds), 60)
hours, minutes = divmod(minutes, 60)
return f"{hours:02}:{minutes:02}:{seconds:02}"


def time_function(func, *args, **kwargs):
"""Returns the elapsed time for a function to run."""
start_time = time.monotonic()
func(*args, **kwargs)
end_time = time.monotonic()
elapsed_time = end_time - start_time
elapsed_time_formatted = format_seconds(elapsed_time)
return elapsed_time_formatted


def log_time(func_name: str, logger: logging.Logger):
"""Decorator function that calculates how long it takes for a function to run and then logs that time.
Args:
func_name (str): Name of function to be wrapped.
Set up configurations for process_dataset and process_all_files functions
logger (logging.Logger): logger initialized in the module this decorator is used in
Raises:
ValueError: If the configuration is not supported, error is raised.
Returns:
log: log containing timestamp for function run.
"""
if func_name not in ["process_dataset", "process_all_files"]:
raise ValueError(
f"configuration {func_name} not supported for log_time decorator."
)

def log(func):
def wrapped(*args, **kwargs):
if func_name == "process_dataset":
dataset = next(iter(kwargs["dataset_obj"]))
logger.info("Now processing %s dataset", dataset)
elapsed_time_formatted = time_function(func, *args, **kwargs)
logger.info("Processing complete for %s dataset", dataset)
string_list = [elapsed_time_formatted, dataset]
message = "Elapsed time: %s for %s dataset"

if func_name == "process_all_files":
logger.info("Agora Data Tools processing has started")
elapsed_time_formatted = time_function(func, *args, **kwargs)
logger.info("Agora Data Tools processing has completed")
string_list = [elapsed_time_formatted]
message = "Elapsed time: %s for all data processing"

logger.info(message, *string_list)

return wrapped

return log
8 changes: 6 additions & 2 deletions src/agoradatatools/process.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os

import logging
import synapseclient
from pandas import DataFrame
from typer import Argument, Option, Typer
Expand All @@ -9,8 +8,12 @@
import agoradatatools.etl.transform as transform
import agoradatatools.etl.utils as utils
from agoradatatools.errors import ADTDataProcessingError
from agoradatatools.logs import log_time

logger = logging.getLogger(__name__)


@log_time(func_name="process_dataset", logger=logger)
def process_dataset(
dataset_obj: dict, staging_path: str, syn: synapseclient.Synapse
) -> tuple:
Expand Down Expand Up @@ -108,6 +111,7 @@ def create_data_manifest(parent=None, syn=None) -> DataFrame:
return DataFrame(folder)


@log_time(func_name="process_all_files", logger=logger)
def process_all_files(config_path: str = None, syn=None):
"""This function will read through the entire configuration and process each file listed.
Expand Down

0 comments on commit 170f0d4

Please sign in to comment.