diff --git a/ckanext/charts/cache.py b/ckanext/charts/cache.py index 7747288..a369117 100644 --- a/ckanext/charts/cache.py +++ b/ckanext/charts/cache.py @@ -3,12 +3,11 @@ import hashlib import logging import os -from re import T import tempfile import time from abc import ABC, abstractmethod from io import BytesIO -from typing import IO as File +from typing import IO import pandas as pd from redis.exceptions import ResponseError @@ -18,7 +17,6 @@ from ckanext.charts import config, const, exception - log = logging.getLogger(__name__) @@ -62,8 +60,8 @@ def set_data(self, key: str, data: pd.DataFrame): self.client.setex(key, cache_ttl, data.to_csv(index=False)) else: self.client.set(key, value=data.to_csv(index=False)) - except Exception as e: - log.exception("Failed to save data to Redis: %s", e) + except Exception: + log.exception("Failed to save data to Redis") def invalidate(self, key: str): self.client.delete(key) @@ -92,7 +90,7 @@ def get_data(self, key: str) -> pd.DataFrame | None: return None @abstractmethod - def read_data(self, file: File) -> pd.DataFrame | None: + def read_data(self, file: IO) -> pd.DataFrame | None: pass def set_data(self, key: str, data: pd.DataFrame) -> None: @@ -140,7 +138,7 @@ class FileCacheORC(FileCache): FILE_FORMAT = "orc" - def read_data(self, file: File) -> pd.DataFrame | None: + def read_data(self, file: IO) -> pd.DataFrame | None: from pyarrow import orc return orc.ORCFile(file).read().to_pandas() @@ -151,12 +149,13 @@ def write_data(self, file_path: str, data: pd.DataFrame) -> None: data.to_orc(file_path) + class FileCacheCSV(FileCache): """Cache data as CSV file""" FILE_FORMAT = "csv" - def read_data(self, file: File) -> pd.DataFrame | None: + def read_data(self, file: IO) -> pd.DataFrame | None: return pd.read_csv(file) def write_data(self, file_path: str, data: pd.DataFrame) -> None: diff --git a/ckanext/charts/chart_builders/__init__.py b/ckanext/charts/chart_builders/__init__.py index 3b2e7e7..ae763d5 100644 --- a/ckanext/charts/chart_builders/__init__.py +++ b/ckanext/charts/chart_builders/__init__.py @@ -1,10 +1,9 @@ from __future__ import annotations from .base import BaseChartBuilder -from .plotly import PlotlyBuilder, PlotlyBarForm -from .observable import ObservableBuilder from .chartjs import ChartJSBarBuilder - +from .observable import ObservableBuilder +from .plotly import PlotlyBarForm, PlotlyBuilder DEFAULT_CHART_FORM = PlotlyBarForm diff --git a/ckanext/charts/chart_builders/base.py b/ckanext/charts/chart_builders/base.py index dd7b7af..1a7ffd4 100644 --- a/ckanext/charts/chart_builders/base.py +++ b/ckanext/charts/chart_builders/base.py @@ -3,20 +3,22 @@ from abc import ABC, abstractmethod from typing import Any, cast -import pandas as pd import numpy as np +import pandas as pd -import ckan.types as types import ckan.plugins.toolkit as tk +from ckan import types -import ckanext.charts.const as const +from ckanext.charts import const, fetchers from ckanext.charts.exception import ChartTypeNotImplementedError -from ckanext.charts import fetchers class FilterDecoder: def __init__( - self, filter_input: str, pair_divider: str = "|", key_value_divider: str = ":" + self, + filter_input: str, + pair_divider: str = "|", + key_value_divider: str = ":", ): self.filter_input = filter_input self.pair_divider = pair_divider @@ -62,11 +64,13 @@ def __init__( # TODO: requires more work here... # I'm not sure about other types, that column can have if column_type == np.int64: - values = [int(value) for value in values] + converted_values = [int(value) for value in values] elif column_type == np.float64: - values = [float(value) for value in values] + converted_values = [float(value) for value in values] + else: + converted_values = values - filtered_df = filtered_df[filtered_df[column].isin(values)] + filtered_df = filtered_df[filtered_df[column].isin(converted_values)] self.df = filtered_df @@ -118,7 +122,6 @@ def to_json(self) -> str: """This method should return the chart data as a dumped JSON data. It will be passed to a JS script, that will render a chart based on this data.""" - pass def drop_empty_values(self, data: dict[str, Any]) -> dict[str, Any]: """Remove empty values from the dictionary""" @@ -408,7 +411,7 @@ def y_axis_field(self, choices: list[dict[str, str]]) -> dict[str, Any]: self.get_validator("charts_if_empty_same_as")("names"), self.get_validator("unicode_safe"), ], - } + }, ) return field @@ -446,7 +449,7 @@ def y_multi_axis_field( if max_items: field["validators"].append( - self.get_validator("charts_list_length_validator")(max_items) + self.get_validator("charts_list_length_validator")(max_items), ) field["form_attrs"]["maxItems"] = max_items @@ -476,7 +479,7 @@ def values_multi_field( self.get_validator("charts_to_list_if_string"), ], "help_text": help_text, - } + }, ) return field @@ -491,8 +494,10 @@ def split_data_field(self) -> dict[str, Any]: self.get_validator("default")(False), self.get_validator("boolean_validator"), ], - "help_text": """Split data into different columns by years based - on datetime column stated for the x-axis""" + "help_text": ( + "Split data into different columns by years based on datetime " + "column stated for the x-axis" + ), } def skip_null_values_field(self) -> dict[str, Any]: @@ -504,7 +509,7 @@ def skip_null_values_field(self) -> dict[str, Any]: "validators": [ self.get_validator("boolean_validator"), ], - "help_text": """Entries of the data with missing values will not be + "help_text": """Entries of the data with missing values will not be graphed or will be skipped""", } diff --git a/ckanext/charts/chart_builders/chartjs.py b/ckanext/charts/chart_builders/chartjs.py index 1c8a610..d5b1456 100644 --- a/ckanext/charts/chart_builders/chartjs.py +++ b/ckanext/charts/chart_builders/chartjs.py @@ -50,7 +50,7 @@ def create_zoom_and_title_options(self, options: str[dict, Any]) -> dict[str, An "display": True, "position": "bottom", }, - } + }, ) return options @@ -69,7 +69,7 @@ def _prepare_data(self) -> dict[str, Any]: "elements": {"bar": {"borderWidth": 1}}, "plugins": {"legend": {"position": "top"}}, "scales": {"y": {"beginAtZero": True}}, - } + }, ) datasets = [] @@ -80,10 +80,10 @@ def _prepare_data(self) -> dict[str, Any]: for label in data["data"]["labels"]: try: aggregate_value = int( - self.df[self.df[self.settings["x"]] == label][field].sum() + self.df[self.df[self.settings["x"]] == label][field].sum(), ) - except ValueError: - raise ChartBuildError(f"Column '{field}' is not numeric") + except ValueError as e: + raise ChartBuildError(f"Column '{field}' is not numeric") from e dataset_data.append(aggregate_value) @@ -91,7 +91,7 @@ def _prepare_data(self) -> dict[str, Any]: { "label": field, "data": dataset_data, - } + }, ) data["data"]["datasets"] = datasets @@ -216,15 +216,15 @@ def to_json(self) -> str: for label in data["data"]["labels"]: dataset_data.append( self.convert_to_native_types( - self.df[self.df[self.settings["names"]] == label][field].sum() - ) + self.df[self.df[self.settings["names"]] == label][field].sum(), + ), ) data["data"]["datasets"] = [ { "label": field, "data": dataset_data, - } + }, ] return json.dumps(data) @@ -280,17 +280,17 @@ def to_json(self) -> str: dataset_data.append( { "x": self.convert_to_native_types( - data_series[self.settings["x"]] + data_series[self.settings["x"]], ), "y": self.convert_to_native_types(data_series[field]), - } + }, ) data["data"]["datasets"] = [ { "label": self.settings["y"], "data": dataset_data, - } + }, ] data["options"] = self.create_zoom_and_title_options(data["options"]) return json.dumps(self._configure_date_axis(data)) @@ -311,7 +311,7 @@ def _configure_date_axis(self, data: dict[str, Any]) -> dict[str, Any]: "unit": "day", "displayFormats": {"day": "YYYY-MM-DD"}, }, - } + }, ) scales["x"] = x_scale @@ -367,11 +367,11 @@ def to_json(self) -> str: dataset_data.append( { "x": self.convert_to_native_types( - data_series[self.settings["x"]] + data_series[self.settings["x"]], ), "y": self.convert_to_native_types(data_series[field]), "r": self._calculate_bubble_radius(data_series, max_size), - } + }, ) data["data"]["datasets"] = [ @@ -389,8 +389,8 @@ def _calculate_bubble_radius(self, data_series: pd.Series, max_size: int) -> int # or the column is not numeric try: pd.to_numeric(max_size) - except ValueError: - raise ChartBuildError(f"Column '{size_column}' is not numeric") + except ValueError as e: + raise ChartBuildError(f"Column '{size_column}' is not numeric") from e if max_size == 0 or np.isnan(max_size): bubble_radius = self.min_bubble_radius @@ -434,7 +434,7 @@ def to_json(self) -> str: for value in self.settings["values"]: try: dataset_data.append( - self.df[self.df[self.settings["names"]] == label][value].item() + self.df[self.df[self.settings["names"]] == label][value].item(), ) except ValueError: # TODO: probably collision by name column, e.g two or more rows @@ -468,7 +468,9 @@ def get_form_fields(self): self.names_field(columns), self.values_multi_field( columns, - help_text="Select 3 or more different categorical variables (dimensions)", + help_text=( + "Select 3 or more different categorical variables (dimensions)" + ), ), self.more_info_button_field(), self.limit_field(), diff --git a/ckanext/charts/chart_builders/observable.py b/ckanext/charts/chart_builders/observable.py index b78328a..7385604 100644 --- a/ckanext/charts/chart_builders/observable.py +++ b/ckanext/charts/chart_builders/observable.py @@ -26,7 +26,7 @@ def to_json(self) -> str: "type": "bar", "data": self.df.to_dict(orient="records"), "settings": self.settings, - } + }, ) @@ -72,7 +72,7 @@ def to_json(self) -> str: "type": "horizontal-bar", "data": self.df.to_dict(orient="records"), "settings": self.settings, - } + }, ) @@ -90,9 +90,9 @@ def to_json(self) -> str: "settings": self.settings, "plot": { "x": {"reverse": self.settings.get("invert_x", False)}, - "y": {"reverse": self.settings.get("invert_y", False)} - } - } + "y": {"reverse": self.settings.get("invert_y", False)}, + }, + }, ) @@ -132,7 +132,7 @@ def to_json(self) -> str: "type": "pie", "data": self.df.to_dict(orient="records"), "settings": self.settings, - } + }, ) @@ -211,7 +211,7 @@ def to_json(self) -> str: "type": "scatter", "data": self.df.to_dict(orient="records"), "settings": self.settings, - } + }, ) @@ -251,7 +251,7 @@ def to_json(self) -> str: "type": "auto", "data": self.df.to_dict(orient="records"), "settings": self.settings, - } + }, ) diff --git a/ckanext/charts/chart_builders/plotly.py b/ckanext/charts/chart_builders/plotly.py index b4962a0..9bce079 100644 --- a/ckanext/charts/chart_builders/plotly.py +++ b/ckanext/charts/chart_builders/plotly.py @@ -1,11 +1,10 @@ from __future__ import annotations -from typing import Any, cast +from typing import Any import pandas as pd import plotly.express as px import plotly.graph_objects as go - from pandas.core.frame import DataFrame from pandas.errors import ParserError from plotly.subplots import make_subplots @@ -91,7 +90,9 @@ def _split_data_by_year(self) -> None: self.df["year"] = pd.to_datetime(self.df[self.settings["x"]]).dt.year self.df = self.df.pivot( - index=self.settings["x"], columns="year", values=self.settings["y"][0] + index=self.settings["x"], + columns="year", + values=self.settings["y"][0], ) self.settings["y"] = self.df.columns.to_list() @@ -123,7 +124,9 @@ def _skip_null_values(self, column: str) -> tuple[Any, Any]: return x, y def _break_chart_by_missing_data( - self, df: DataFrame, column: str + self, + df: DataFrame, + column: str, ) -> tuple[Any, Any]: """ Find gaps in date column and fill them with missing dates. @@ -139,7 +142,9 @@ def _break_chart_by_missing_data( df["date"] = pd.to_datetime(df[self.settings["x"]]).dt.date all_dates = pd.date_range( - start=df["date"].min(), end=df["date"].max(), unit="ns" + start=df["date"].min(), + end=df["date"].max(), + unit="ns", ).date date_range_df = pd.DataFrame({"date": all_dates}) @@ -239,7 +244,7 @@ def build_scatter_chart(self) -> Any: if self.df[self.settings["size"]].dtype not in ["int64", "float64"]: raise exception.ChartBuildError( """The 'size' source should be a field of positive integer - or float type.""" + or float type.""", ) fig = px.scatter( @@ -332,7 +337,9 @@ class PlotlyLineForm(BasePlotlyForm): builder = PlotlyLineBuilder def plotly_y_multi_axis_field( - self, columns: list[dict[str, str]], max_y: int = 0 + self, + columns: list[dict[str, str]], + max_y: int = 0, ) -> dict[str, Any]: """Plotly line chart supports multi columns for y-axis""" field = self.y_multi_axis_field(columns, max_y) diff --git a/ckanext/charts/const.py b/ckanext/charts/const.py index eb88634..1e06f72 100644 --- a/ckanext/charts/const.py +++ b/ckanext/charts/const.py @@ -7,7 +7,7 @@ SUPPORTED_CACHE_STRATEGIES = [ CACHE_FILE_CSV, CACHE_FILE_ORC, - CACHE_REDIS + CACHE_REDIS, ] REDIS_PREFIX = "ckanext-charts:*" diff --git a/ckanext/charts/fetchers.py b/ckanext/charts/fetchers.py index 25c6838..8ea70d5 100644 --- a/ckanext/charts/fetchers.py +++ b/ckanext/charts/fetchers.py @@ -3,14 +3,14 @@ import logging from abc import ABC, abstractmethod from io import BytesIO -from typing import Any, cast +from typing import Any import lxml import pandas as pd import requests import sqlalchemy as sa -from sqlalchemy.exc import ProgrammingError from psycopg2.errors import UndefinedTable +from sqlalchemy.exc import ProgrammingError from ckanext.datastore.backend.postgres import get_read_engine @@ -71,19 +71,20 @@ def fetch_data(self) -> pd.DataFrame: .select_from(sa.table(self.resource_id)) .limit(self.limit), get_read_engine(), - ).drop(columns=["_id", "_full_text"], errors='ignore') + ).drop(columns=["_id", "_full_text"], errors="ignore") if "date_time" in df.columns: try: - df['date_time'] = pd.to_datetime(df['date_time']) + df["date_time"] = pd.to_datetime(df["date_time"]) # Convert valid dates to ISO format - df['date_time'] = df['date_time'].dt.strftime("%Y-%m-%dT%H:%M:%S") + df["date_time"] = df["date_time"].dt.strftime("%Y-%m-%dT%H:%M:%S") except (ValueError, TypeError, AttributeError) as e: # Log the warning and keep the original values if conversion fails - log.warning(f"Warning: Could not convert date_time column: {e}") + log.warning("Warning: Could not convert date_time column: %s", e) - # Apply numeric conversion to all columns - it will safely ignore non-numeric values - df = df.apply(pd.to_numeric, errors='ignore') + # Apply numeric conversion to all columns - it will safely ignore + # non-numeric values + df = df.apply(pd.to_numeric, errors="ignore") except (ProgrammingError, UndefinedTable) as e: raise exception.DataFetchError( diff --git a/ckanext/charts/helpers.py b/ckanext/charts/helpers.py index e8a5e4f..5dc9d78 100644 --- a/ckanext/charts/helpers.py +++ b/ckanext/charts/helpers.py @@ -4,11 +4,10 @@ import ckan.plugins.toolkit as tk -from ckanext.charts import utils +from ckanext.charts import config, utils from ckanext.charts.cache import count_file_cache_size, count_redis_cache_size -from ckanext.charts import config -from ckanext.charts.fetchers import DatastoreDataFetcher from ckanext.charts.chart_builders import get_chart_engines +from ckanext.charts.fetchers import DatastoreDataFetcher def get_redis_cache_size(): @@ -41,7 +40,7 @@ def charts_get_resource_columns(resource_id: str) -> str: fetcher = DatastoreDataFetcher(resource_id) return json.dumps( - [{"id": col, "title": col} for col in fetcher.fetch_data().columns] + [{"id": col, "title": col} for col in fetcher.fetch_data().columns], ) diff --git a/ckanext/charts/logic/validators.py b/ckanext/charts/logic/validators.py index 6f7370f..8b10827 100644 --- a/ckanext/charts/logic/validators.py +++ b/ckanext/charts/logic/validators.py @@ -2,10 +2,10 @@ from typing import Any, Callable -import ckan.types as types import ckan.plugins.toolkit as tk +from ckan import types -from ckanext.charts import utils, const +from ckanext.charts import const, utils from ckanext.charts.chart_builders import DEFAULT_CHART_FORM @@ -39,8 +39,8 @@ def charts_strategy_support(strategy: str) -> str: from pyarrow import orc as _ # noqa except ImportError: raise tk.Invalid( - tk._("Can't use File Orc cache strategy. PyArrow is not installed") - ) + tk._("Can't use File Orc cache strategy. PyArrow is not installed"), + ) from None if not strategy: return const.DEFAULT_CACHE_STRATEGY @@ -65,7 +65,7 @@ def validate_chart_extras( settings, err = tk.navl_validate( settings, builder(settings["resource_id"]).get_validation_schema( - context.get("_for_show", False) + context.get("_for_show", False), ), {}, ) diff --git a/ckanext/charts/plugin.py b/ckanext/charts/plugin.py index 7580c12..40e01c9 100644 --- a/ckanext/charts/plugin.py +++ b/ckanext/charts/plugin.py @@ -1,23 +1,20 @@ from __future__ import annotations -from typing import Any from os import path +from typing import Any from yaml import safe_load import ckan.plugins as p import ckan.plugins.toolkit as tk -import ckan.logic as logic -from ckan import types +from ckan import logic, types from ckan.common import CKANConfig from ckan.config.declaration import Declaration, Key import ckanext.charts.config as conf -import ckanext.charts.utils as utils -import ckanext.charts.const as const -from ckanext.charts import cache, exception, fetchers, utils -from ckanext.charts.logic.schema import settings_schema +from ckanext.charts import cache, const, exception, fetchers, utils from ckanext.charts.chart_builders import DEFAULT_CHART_FORM +from ckanext.charts.logic.schema import settings_schema @tk.blanket.helpers @@ -77,7 +74,9 @@ def can_view(self, data_dict: dict[str, Any]) -> bool: return utils.can_view_be_viewed(data_dict) def setup_template_variables( - self, context: types.Context, data_dict: dict[str, Any] + self, + context: types.Context, + data_dict: dict[str, Any], ) -> dict[str, Any]: """ The ``data_dict`` contains the following keys: @@ -98,7 +97,9 @@ def setup_template_variables( try: settings, _ = tk.navl_validate( - data_dict["resource_view"], settings_schema(), context + data_dict["resource_view"], + settings_schema(), + context, ) except Exception as e: data["error_msg"] = e @@ -119,7 +120,8 @@ def setup_template_variables( else: try: chart = utils.build_chart_for_resource( - settings, data_dict["resource"]["id"] + settings, + data_dict["resource"]["id"], ) except exception.ChartBuildError as e: data["error_msg"] = e @@ -225,11 +227,13 @@ def can_view(self, data_dict: dict[str, Any]) -> bool: return utils.can_view_be_viewed(data_dict) def setup_template_variables( - self, context: types.Context, data_dict: dict[str, Any] + self, + context: types.Context, + data_dict: dict[str, Any], ) -> dict[str, Any]: form_builder = DEFAULT_CHART_FORM - data = { + return { "resource_id": data_dict["resource"]["id"], "settings": { "engine": "plotly", @@ -239,8 +243,6 @@ def setup_template_variables( "form_builder": form_builder, } - return data - def view_template(self, context: types.Context, data_dict: dict[str, Any]) -> str: return "charts/charts_builder_view.html" diff --git a/ckanext/charts/tests/conftest.py b/ckanext/charts/tests/conftest.py index 9f2460d..7de52ed 100644 --- a/ckanext/charts/tests/conftest.py +++ b/ckanext/charts/tests/conftest.py @@ -16,5 +16,5 @@ def data_frame(): "name": ["Alice", "Bob"], "surname": ["Bing", "Right"], "age": [25, 30], - } + }, ) diff --git a/ckanext/charts/tests/test_builders.py b/ckanext/charts/tests/test_builders.py index 640f1e7..9085e72 100644 --- a/ckanext/charts/tests/test_builders.py +++ b/ckanext/charts/tests/test_builders.py @@ -73,10 +73,10 @@ def test_build_multi_y_line(self, data_frame): assert "data" in result assert "layout" in result - layout = json.loads(result)['layout'] + layout = json.loads(result)["layout"] - assert 'yaxis' in layout - assert 'yaxis2' in layout + assert "yaxis" in layout + assert "yaxis2" in layout def test_build_scatter(self, data_frame): result = utils.build_chart_for_data( diff --git a/ckanext/charts/tests/test_cache.py b/ckanext/charts/tests/test_cache.py index b2d9277..d7e7d20 100644 --- a/ckanext/charts/tests/test_cache.py +++ b/ckanext/charts/tests/test_cache.py @@ -213,7 +213,9 @@ def test_file_is_expired(self): assert isinstance(fetcher.get_cached_data(), pd.DataFrame) - file_path = cache.FileCacheORC().make_file_path_from_key(fetcher.make_cache_key()) + file_path = cache.FileCacheORC().make_file_path_from_key( + fetcher.make_cache_key(), + ) with freeze_time(datetime.now() + timedelta(seconds=101)): assert cache.FileCacheORC().is_file_cache_expired(file_path) @@ -230,6 +232,8 @@ def test_file_is_not_expired(self): assert isinstance(fetcher.get_cached_data(), pd.DataFrame) - file_path = cache.FileCacheORC().make_file_path_from_key(fetcher.make_cache_key()) + file_path = cache.FileCacheORC().make_file_path_from_key( + fetcher.make_cache_key(), + ) assert not cache.FileCacheORC().is_file_cache_expired(file_path) diff --git a/ckanext/charts/utils.py b/ckanext/charts/utils.py index 8b35047..4dd64e3 100644 --- a/ckanext/charts/utils.py +++ b/ckanext/charts/utils.py @@ -8,8 +8,8 @@ import ckan.plugins.toolkit as tk from ckanext.charts.chart_builders import get_chart_engines -from ckanext.charts.fetchers import DatastoreDataFetcher from ckanext.charts.exception import ChartBuildError +from ckanext.charts.fetchers import DatastoreDataFetcher def get_column_options(resource_id: str) -> list[dict[str, str]]: @@ -70,9 +70,9 @@ def _build_chart(settings: dict[str, Any], dataframe: pd.DataFrame) -> str | Non try: chart_config = builder(dataframe, settings).to_json() except KeyError as e: - raise ChartBuildError(f"Missing column or field {e}") + raise ChartBuildError(f"Missing column or field {e}") from e except ValueError as e: - raise ChartBuildError(f"{e}") + raise ChartBuildError from e return chart_config diff --git a/ckanext/charts/views.py b/ckanext/charts/views.py index 9feccc6..2261876 100644 --- a/ckanext/charts/views.py +++ b/ckanext/charts/views.py @@ -7,7 +7,7 @@ from ckan.logic import parse_params from ckan.plugins import plugin_loaded -from ckanext.charts import cache, exception, utils, fetchers +from ckanext.charts import cache, exception, fetchers, utils charts = Blueprint("charts_view", __name__) ERROR_TEMPLATE = "charts/snippets/error_chart.html" @@ -96,7 +96,7 @@ def _clear_chart( user_chart_builder: bool = False, ): builder = _get_form_builder( - {"engine": "plotly", "type": "Bar", "resource_id": resource_id} + {"engine": "plotly", "type": "Bar", "resource_id": resource_id}, ) data, errors = tk.navl_validate({}, builder.get_validation_schema(), {}) diff --git a/pyproject.toml b/pyproject.toml index 4c0d3b4..37bdf4d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,6 +96,7 @@ ignore = [ [tool.ruff.per-file-ignores] "ckanext/charts/tests*" = ["S", "PL"] "ckanext/charts/logic/schema.py" = ["PLR0913"] +"ckanext/charts/chart_builders/plotly.py" = ["C901", "PLR0912"] [tool.isort] known_ckan = "ckan"