diff --git a/harvester/cde_harvester/ERDDAP.py b/harvester/cde_harvester/ERDDAP.py index aac24c6a..cb547219 100644 --- a/harvester/cde_harvester/ERDDAP.py +++ b/harvester/cde_harvester/ERDDAP.py @@ -42,7 +42,7 @@ def __init__(self, erddap, cache_requests=False): if erddap.get("username") and erddap.get("password"): self.session.auth = HTTPBasicAuth(erddap["username"], erddap["password"]) - self.logger = logger.bind(erddap_url=erddap_url) + self.logger = logger self.df_all_datasets = None erddap_url = erddap_url.rstrip("/") diff --git a/harvester/cde_harvester/dataset.py b/harvester/cde_harvester/dataset.py index 44ee9d32..e183f192 100644 --- a/harvester/cde_harvester/dataset.py +++ b/harvester/cde_harvester/dataset.py @@ -21,7 +21,7 @@ class Dataset: def __init__(self, erddap_server, id): self.id = id self.erddap_server = erddap_server - self.logger = logger.bind(erddap_url=erddap_server.url, dataset_id=id) + self.logger = logger self.erddap_url = erddap_server.url self.erddap_csv_to_df = erddap_server.erddap_csv_to_df diff --git a/harvester/cde_harvester/harvest_erddap.py b/harvester/cde_harvester/harvest_erddap.py index 6c012661..5bd097eb 100644 --- a/harvester/cde_harvester/harvest_erddap.py +++ b/harvester/cde_harvester/harvest_erddap.py @@ -134,45 +134,46 @@ def skipped_reason(code): variables_all = [] for i, df_dataset_row in df_all_datasets.iterrows(): dataset_id = df_dataset_row["datasetID"] - if dataset_id in datasets_to_skip: - logger.info("Skipping dataset: {} because its on the skip list", dataset_id) - continue - try: - logger.info( - "Querying dataset: {} {}/{}", dataset_id, i + 1, len(df_all_datasets) - ) - dataset = erddap.get_dataset(dataset_id) - compliance_checker = CDEComplianceChecker(dataset) - passes_checks = compliance_checker.passes_all_checks() - - # these are the variables we are pulling max/min values for - if passes_checks: - df_profiles = get_profiles(dataset) - - if df_profiles.empty: - logger.warning("No profiles found") - continue - - # only write dataset/metadata/profile if there are some profiles - profiles_all.append(df_profiles) - datasets_all.append(dataset.get_df()) - variables_all.append(dataset.df_variables) - logger.info("complete") - else: - skipped_datasets_reasons += skipped_reason( - compliance_checker.failure_reason_code + with logger.contextualize(erddap_url=erddap_url,dataset_id=dataset_id): + if dataset_id in datasets_to_skip: + logger.info("Skipping dataset: {} because its on the skip list", dataset_id) + continue + try: + logger.info( + "Querying dataset: {} {}/{}", dataset_id, i + 1, len(df_all_datasets) ) - except HTTPError as e: - response = e.response - # dataset_logger.error(response.text) - logger.error("HTTP ERROR: {} {}", response.status_code, response.reason) - skipped_datasets_reasons += skipped_reason(HTTP_ERROR) - - except Exception as e: - logger.error( - "Error occurred at {} {}", erddap_url, dataset_id, exc_info=True - ) - skipped_datasets_reasons += skipped_reason(UNKNOWN_ERROR) + dataset = erddap.get_dataset(dataset_id) + compliance_checker = CDEComplianceChecker(dataset) + passes_checks = compliance_checker.passes_all_checks() + + # these are the variables we are pulling max/min values for + if passes_checks: + df_profiles = get_profiles(dataset) + + if df_profiles.empty: + logger.warning("No profiles found") + continue + + # only write dataset/metadata/profile if there are some profiles + profiles_all.append(df_profiles) + datasets_all.append(dataset.get_df()) + variables_all.append(dataset.df_variables) + logger.info("complete") + else: + skipped_datasets_reasons += skipped_reason( + compliance_checker.failure_reason_code + ) + except HTTPError as e: + response = e.response + # dataset_logger.error(response.text) + logger.error("HTTP ERROR: {} {}", response.status_code, response.reason) + skipped_datasets_reasons += skipped_reason(HTTP_ERROR) + + except Exception as e: + logger.exception( + "Error occurred at {} {}", erddap_url, dataset_id + ) + skipped_datasets_reasons += skipped_reason(UNKNOWN_ERROR) skipped_datasets_columns = ["erddap_url", "dataset_id", "reason_code"]