diff --git a/ckanext/charts/cache.py b/ckanext/charts/cache.py index 7747288..12aae6e 100644 --- a/ckanext/charts/cache.py +++ b/ckanext/charts/cache.py @@ -143,7 +143,7 @@ class FileCacheORC(FileCache): def read_data(self, file: File) -> pd.DataFrame | None: from pyarrow import orc - return orc.ORCFile(file).read().to_pandas() + return orc.ORCFile(file).read().to_pandas(all_rows=True) def write_data(self, file_path: str, data: pd.DataFrame) -> None: for col in data.select_dtypes(include=["object"]).columns: diff --git a/ckanext/charts/chart_builders/base.py b/ckanext/charts/chart_builders/base.py index 08c132d..db8c8f2 100644 --- a/ckanext/charts/chart_builders/base.py +++ b/ckanext/charts/chart_builders/base.py @@ -445,6 +445,20 @@ def values_multi_field( return field + def split_data_field(self) -> dict[str, Any]: + return { + "field_name": "split_data", + "label": "Split by years", + "form_snippet": "chart_checkbox.html", + "group": "Data", + "validators": [ + self.get_validator("default")(False), + self.get_validator("boolean_validator"), + ], + "help_text": """Split data into different columns by years based + on datetime column stated for the x-axis""" + } + def sort_x_field(self) -> dict[str, Any]: return { "field_name": "sort_x", diff --git a/ckanext/charts/chart_builders/plotly.py b/ckanext/charts/chart_builders/plotly.py index 5351ee3..b5e5bf4 100644 --- a/ckanext/charts/chart_builders/plotly.py +++ b/ckanext/charts/chart_builders/plotly.py @@ -46,11 +46,31 @@ class PlotlyLineBuilder(PlotlyBuilder): def to_json(self) -> Any: return self.build_line_chart() + def split_data_by_year(self) -> dict[str, Any]: + """ + Prepare data for a line chart. It splits the data by year stated + in the date format column which is used for x-axis. + """ + self.df["date"] = pd.to_datetime(self.df[self.settings["x"]]).dt.date + self.df = self.df[["date", self.settings["y"][0]]].set_index(["date"]) + self.df = self.df[[self.settings["y"][0]]].groupby(["date"]).mean() + + self.df.index = [pd.to_datetime(self.df.index).strftime("%m/%d"), pd.to_datetime(self.df.index).strftime("%Y")] + self.df = self.df[self.settings["y"][0]].unstack() + + self.settings["y"] = self.df.columns.to_list() + self.df["date_time"] = self.df.index + + return self + def build_line_chart(self) -> Any: """ Build a line chart. It supports multi columns for y-axis to display on the line chart. """ + if self.settings.get("split_data", False): + self.split_data_by_year() + fig = make_subplots(specs=[[{"secondary_y": True}]]) fig.add_trace( @@ -199,7 +219,8 @@ def get_form_fields(self): self.invert_y_field(), self.sort_x_field(), self.sort_y_field(), - self.limit_field(), + self.split_data_field(), + self.limit_field(maximum=1000000), self.chart_title_field(), self.filter_field(columns), ]