From 8ca2f3a0c5279d6ad0ec042d04a2310c2d286231 Mon Sep 17 00:00:00 2001 From: cirun Date: Thu, 10 Oct 2024 09:13:48 +0200 Subject: [PATCH 1/3] LLCAXCHZF-61/handles cases where CKAN type guessing is disabled --- ckanext/charts/fetchers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ckanext/charts/fetchers.py b/ckanext/charts/fetchers.py index d701b1f..64b0c73 100644 --- a/ckanext/charts/fetchers.py +++ b/ckanext/charts/fetchers.py @@ -79,8 +79,9 @@ def fetch_data(self) -> pd.DataFrame: df[non_datetime_cols] = df[non_datetime_cols].apply(pd.to_numeric, errors='ignore').fillna(0) if "date_time" in df.columns: - # Convert the 'date_time' column to string format in ISO 8601 - df['date_time'] = df['date_time'].dt.strftime("%Y-%m-%dT%H:%M:%S") + # Ensure datetime type consistency and format to ISO 8601 + # Handles cases where CKAN type guessing is disabled + df['date_time'] = pd.to_datetime(df['date_time']).dt.strftime("%Y-%m-%dT%H:%M:%S") except (ProgrammingError, UndefinedTable) as e: raise exception.DataFetchError( From a74add6edf3de6c44186e752dbc8bd0178ab538f Mon Sep 17 00:00:00 2001 From: cirun Date: Thu, 10 Oct 2024 09:24:24 +0200 Subject: [PATCH 2/3] LLCAXCHZF-61/typo --- ckanext/charts/fetchers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/charts/fetchers.py b/ckanext/charts/fetchers.py index 64b0c73..2a6f6e9 100644 --- a/ckanext/charts/fetchers.py +++ b/ckanext/charts/fetchers.py @@ -80,7 +80,7 @@ def fetch_data(self) -> pd.DataFrame: if "date_time" in df.columns: # Ensure datetime type consistency and format to ISO 8601 - # Handles cases where CKAN type guessing is disabled + # Handles cases where ckanext.xloader.use_type_guessing is disabled df['date_time'] = pd.to_datetime(df['date_time']).dt.strftime("%Y-%m-%dT%H:%M:%S") except (ProgrammingError, UndefinedTable) as e: From 9a84bc71694493a5e0f9b300cb8ee8d78464a0c7 Mon Sep 17 00:00:00 2001 From: cirun Date: Thu, 10 Oct 2024 13:45:27 +0200 Subject: [PATCH 3/3] LLCAXCHZF-61/perform date conversion before applying numeric operations --- ckanext/charts/fetchers.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/ckanext/charts/fetchers.py b/ckanext/charts/fetchers.py index 2a6f6e9..3f0bb93 100644 --- a/ckanext/charts/fetchers.py +++ b/ckanext/charts/fetchers.py @@ -73,15 +73,17 @@ def fetch_data(self) -> pd.DataFrame: get_read_engine(), ).drop(columns=["_id", "_full_text"]) - # Identify columns that are not datetime - non_datetime_cols = df.select_dtypes(exclude=['datetime']).columns - # Apply numeric conversion only to non-datetime columns - df[non_datetime_cols] = df[non_datetime_cols].apply(pd.to_numeric, errors='ignore').fillna(0) - if "date_time" in df.columns: - # Ensure datetime type consistency and format to ISO 8601 - # Handles cases where ckanext.xloader.use_type_guessing is disabled - df['date_time'] = pd.to_datetime(df['date_time']).dt.strftime("%Y-%m-%dT%H:%M:%S") + try: + df['date_time'] = pd.to_datetime(df['date_time']) + # Convert valid dates to ISO format + df['date_time'] = df['date_time'].dt.strftime("%Y-%m-%dT%H:%M:%S") + except (ValueError, TypeError, AttributeError) as e: + # Log the warning and keep the original values if conversion fails + log.warning(f"Warning: Could not convert date_time column: {e}") + + # Apply numeric conversion to all columns - it will safely ignore non-numeric values + df = df.apply(pd.to_numeric, errors='ignore').fillna(0) except (ProgrammingError, UndefinedTable) as e: raise exception.DataFetchError(