From 780662b09ee547eea6986dfd48cb3b744d5beaae Mon Sep 17 00:00:00 2001 From: Fabien Arcellier Date: Mon, 1 Jul 2024 09:56:53 +0200 Subject: [PATCH 1/5] feat: implement editable dataframe to manage dataframe editor component * feat: implement the backend component EditableDataframe * feat: implement MutableValue --- alfred/ci.py | 8 +- src/writer/__init__.py | 1 + src/writer/core.py | 313 ++++++++++++++++++++++++++++++++++++- src/writer/ss_types.py | 13 +- tests/backend/test_core.py | 241 ++++++++++++++++++++++++++++ 5 files changed, 567 insertions(+), 9 deletions(-) diff --git a/alfred/ci.py b/alfred/ci.py index 350481c2a..a4b205beb 100644 --- a/alfred/ci.py +++ b/alfred/ci.py @@ -32,8 +32,12 @@ def ci_mypy(): alfred.run("mypy ./src/writer --exclude app_templates/*") @alfred.command("ci.ruff", help="linting with ruff") -def ci_ruff(): - alfred.run("ruff check") +@alfred.option('--fix', '-f', help="fix linting errors", is_flag=True, default=False) +def ci_ruff(fix): + if fix: + alfred.run("ruff check --fix") + else: + alfred.run("ruff check") @alfred.command("ci.pytest", help="run pytest on ./tests") def ci_test(): diff --git a/src/writer/__init__.py b/src/writer/__init__.py index b93a03f1d..1639e1974 100644 --- a/src/writer/__init__.py +++ b/src/writer/__init__.py @@ -5,6 +5,7 @@ from writer.core import ( BytesWrapper, Config, + EditableDataframe, FileWrapper, Readable, State, diff --git a/src/writer/core.py b/src/writer/core.py index 8cb2cf442..363893865 100644 --- a/src/writer/core.py +++ b/src/writer/core.py @@ -2,6 +2,7 @@ import base64 import contextlib import copy +import dataclasses import datetime import inspect import io @@ -14,6 +15,7 @@ import time import traceback import urllib.request +from abc import ABCMeta from multiprocessing.process import BaseProcess from types import ModuleType from typing import ( @@ -35,9 +37,15 @@ cast, ) +import pandas +import pyarrow # type: ignore + from writer import core_ui from writer.core_ui import Component from writer.ss_types import ( + DataframeRecordAdded, + DataframeRecordRemoved, + DataframeRecordUpdated, InstancePath, InstancePathItem, Readable, @@ -47,6 +55,8 @@ ) if TYPE_CHECKING: + import polars + from writer.app_runner import AppProcess @@ -129,12 +139,10 @@ class StateSerialiserException(ValueError): class StateSerialiser: - """ Serialises user state values before sending them to the front end. Provides JSON-compatible values, including data URLs for binary data. """ - def serialise(self, v: Any) -> Union[Dict, List, str, bool, int, float, None]: from writer.ai import Conversation if isinstance(v, State): @@ -153,6 +161,9 @@ def serialise(self, v: Any) -> Union[Dict, List, str, bool, int, float, None]: return self._serialise_list_recursively(v) if isinstance(v, (str, bool)): return v + if isinstance(v, EditableDataframe): + table = v.pyarrow_table() + return self._serialise_pyarrow_table(table) if v is None: return v @@ -242,6 +253,44 @@ def _serialise_pyarrow_table(self, table): bw = BytesWrapper(buf, "application/vnd.apache.arrow.file") return self.serialise(bw) +class MutableValue: + """ + MutableValue allows you to implement a value whose modification + will be followed by the state of Writer Framework and will trigger the refresh + of the user interface. + + >>> class MyValue(MutableValue): + >>> def __init__(self, value): + >>> self.value = value + >>> + >>> def modify(self, new_value): + >>> self.value = new_value + >>> self.mutate() + """ + def __init__(self): + self._mutated = False + + def mutated(self) -> bool: + """ + Returns whether the value has been mutated. + :return: + """ + return self._mutated + + def mutate(self) -> None: + """ + Marks the value as mutated. + This will trigger the refresh of the user interface on the next round trip + :return: + """ + self._mutated = True + + def reset_mutation(self) -> None: + """ + Resets the mutation flag to False. + :return: + """ + self._mutated = False class StateProxy: @@ -345,12 +394,13 @@ def carry_mutation_flag(base_key, child_key): for child_key, child_mutation in child_mutations.items(): nested_key = carry_mutation_flag(escaped_key, child_key) serialised_mutations[nested_key] = child_mutation - elif f"+{key}" in self.mutated: + elif f"+{key}" in self.mutated or \ + (isinstance(value, MutableValue) is True and value.mutated()): try: serialised_value = state_serialiser.serialise(value) + value.reset_mutation() except BaseException: - raise ValueError( - f"""Couldn't serialise value of type "{ type(value) }" for key "{ key }".""") + raise ValueError(f"""Couldn't serialise value of type "{ type(value) }" for key "{ key }".""") serialised_mutations[f"+{escaped_key}"] = serialised_value deleted_keys = \ @@ -1506,6 +1556,241 @@ def __set__(self, instance, value): proxy = getattr(instance, self.objectName) proxy[self.key] = value + +class DataframeRecordRemove: + pass + + +class DataframeRecordProcessor(): + """ + This interface defines the signature of the methods to process the events of a + dataframe compatible with EditableDataframe. + + A Dataframe can be any structure composed of tabular data. + + This class defines the signature of the methods to be implemented. + """ + __metaclass__ = ABCMeta + + @staticmethod + def match(df: Any) -> bool: + """ + This method checks if the dataframe is compatible with the processor. + """ + raise NotImplementedError + + @staticmethod + def record_add(df: Any, payload: DataframeRecordAdded) -> Any: + """ + signature of the methods to be implemented to process wf-dfeditor-add event + + >>> edf = EditableDataframe(df) + >>> edf.record_add({"record": {"a": 1, "b": 2}}) + """ + raise NotImplementedError + + @staticmethod + def record_update(df: Any, payload: DataframeRecordUpdated) -> Any: + """ + signature of the methods to be implemented to process wf-dfeditor-update event + + >>> edf = EditableDataframe(df) + >>> edf.record_update({"record_id": 12, "record": {"a": 1, "b": 2}}) + """ + raise NotImplementedError + + @staticmethod + def record_delete(df: Any, payload: DataframeRecordUpdated) -> Any: + """ + signature of the methods to be implemented to process wf-dfeditor-remove event + + >>> edf = EditableDataframe(df) + >>> edf.record_delete({"record_id": 12}) + """ + raise NotImplementedError + + @staticmethod + def pyarrow_table(df: Any) -> pyarrow.Table: + """ + Serializes the dataframe into a pyarrow table + """ + raise NotImplementedError + + +class PandasRecordProcessor(DataframeRecordProcessor): + """ + PandasRecordProcessor processes records from a pandas dataframe saved into an EditableDataframe + + >>> df = pandas.DataFrame({"a": [1, 2], "b": [3, 4]}) + >>> edf = EditableDataframe(df) + >>> edf.record_add({"a": 1, "b": 2}) + """ + + @staticmethod + def match(df: Any) -> bool: + return True if isinstance(df, pandas.DataFrame) else False + + @staticmethod + def record_add(df: pandas.DataFrame, payload: DataframeRecordAdded) -> pandas.DataFrame: + """ + >>> edf = EditableDataframe(df) + >>> edf.record_add({"record": {"a": 1, "b": 2}}) + """ + record, index = _split_record_as_pandas_record_and_index(payload['record'], df.index.names) + + new_df = pandas.DataFrame([record], index=[index]) + return pandas.concat([df, new_df]) + + @staticmethod + def record_update(df: pandas.DataFrame, payload: DataframeRecordUpdated): + raise NotImplementedError + + @staticmethod + def record_delete(df: pandas.DataFrame, payload: DataframeRecordUpdated): + raise NotImplementedError + + @staticmethod + def pyarrow_table(df: pandas.DataFrame) -> pyarrow.Table: + """ + Serializes the dataframe into a pyarrow table + """ + df['__record_id'] = range(1, len(df) + 1) + table = pyarrow.Table.from_pandas(df=df) + return table + + +class PolarRecordProcessor(DataframeRecordProcessor): + """ + PolarRecordProcessor processes records from a polar dataframe saved into an EditableDataframe + + >>> df = polars.DataFrame({"a": [1, 2], "b": [3, 4]}) + >>> edf = EditableDataframe(df) + >>> edf.record_add({"record": {"a": 1, "b": 2}}) + """ + + @staticmethod + def match(df: Any) -> bool: + import polars + return True if isinstance(df, polars.DataFrame) else False + + @staticmethod + def record_add(df: 'polars.DataFrame', payload: DataframeRecordAdded) -> 'polars.DataFrame': + import polars + new_df = polars.DataFrame([payload['record']]) + return polars.concat([df, new_df]) + + @staticmethod + def record_update(df: 'polars.DataFrame', payload: DataframeRecordUpdated) -> 'polars.DataFrame': + raise NotImplementedError + + @staticmethod + def record_delete(df: 'polars.DataFrame', payload: DataframeRecordUpdated) -> 'polars.DataFrame': + raise NotImplementedError + + @staticmethod + def pyarrow_table(df: 'polars.DataFrame') -> pyarrow.Table: + """ + Serializes the dataframe into a pyarrow table + """ + import pyarrow.interchange + table: pyarrow.Table = pyarrow.interchange.from_dataframe(df) + return table + +class RecordListRecordProcessor(DataframeRecordProcessor): + """ + RecordListRecordProcessor processes records from a list of record saved into an EditableDataframe + + >>> df = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] + >>> edf = EditableDataframe(df) + >>> edf.record_add({"record": {"a": 1, "b": 2}}) + """ + + @staticmethod + def match(df: Any) -> bool: + return True if isinstance(df, list) else False + + @staticmethod + def record_add(df: List[Dict[str, Any]], payload: DataframeRecordAdded) -> List[Dict[str, Any]]: + df.append(payload['record']) + return df + + @staticmethod + def record_update(df: List[Dict[str, Any]], payload: DataframeRecordUpdated) -> List[Dict[str, Any]]: + raise NotImplementedError + + @staticmethod + def record_delete(df: List[Dict[str, Any]], payload: DataframeRecordUpdated) -> List[Dict[str, Any]]: + raise NotImplementedError + + @staticmethod + def pyarrow_table(df: List[Dict[str, Any]]) -> pyarrow.Table: + """ + Serializes the dataframe into a pyarrow table + """ + column_names = list(df[0].keys()) + columns = {key: [record[key] for record in df] for key in column_names} + + pyarrow_columns = {key: pyarrow.array(values) for key, values in columns.items()} + schema = pyarrow.schema([(key, pyarrow_columns[key].type) for key in pyarrow_columns]) + table = pyarrow.Table.from_arrays( + [pyarrow_columns[key] for key in column_names], + schema=schema + ) + + return table + +class EditableDataframe(MutableValue): + """ + Editable Dataframe makes it easier to process events from components + that modify a dataframe like the dataframe editor. + + >>> initial_state = wf.init_state({ + >>> "df": wf.EditableDataframe(df) + >>> }) + + Editable Dataframe is compatible with a pandas, thrillers or record list dataframe + """ + processors = [PandasRecordProcessor, PolarRecordProcessor, RecordListRecordProcessor] + + def __init__(self, df: Union[pandas.DataFrame, 'polars.DataFrame', List[dict], List[list]]): + super().__init__() + self._df = df + self.processor: Type[DataframeRecordProcessor] + for processor in self.processors: + if processor.match(self.df): + self.processor = processor + break + + if self.processor is None: + raise ValueError("The dataframe must be a pandas, polar Dataframe or a list of record") + + @property + def df(self) -> Union[pandas.DataFrame, 'polars.DataFrame', List[dict], List[list]]: + return self._df + + @df.setter + def df(self, value: Union[pandas.DataFrame, 'polars.DataFrame', List[dict], List[list]]) -> None: + self._df = value + self.mutate() + + def record_add(self, payload: DataframeRecordAdded) -> None: + assert self.processor is not None + + self._df = self.processor.record_add(self.df, payload) + self.mutate() + + def record_update(self, payload: DataframeRecordUpdated) -> None: + pass + + def record_delete(self, payload: DataframeRecordRemoved) -> None: + pass + + def pyarrow_table(self) -> pyarrow.Table: + assert self.processor is not None + + pa_table = self.processor.pyarrow_table(self.df) + return pa_table + S = TypeVar("S", bound=WriterState) def new_initial_state(klass: Type[S], raw_state: dict) -> S: @@ -1623,6 +1908,24 @@ async def _async_wrapper_internal(callable_handler: Callable, arg_values: List[A result = await callable_handler(*arg_values) return result +def _split_record_as_pandas_record_and_index(param: dict, index_columns: list) -> Tuple[dict, tuple]: + """ + Separates a record into the record part and the index part to be able to + create or update a row in a dataframe. + + >>> record, index = _split_record_as_pandas_record_and_index({"a": 1, "b": 2}, ["a"]) + >>> print(record) # {"b": 2} + >>> print(index) # (1,) + """ + final_record = {} + final_index = [] + for key, value in param.items(): + if key in index_columns: + final_index.append(value) + else: + final_record[key] = value + + return final_record, tuple(final_index) state_serialiser = StateSerialiser() initial_state = WriterState() diff --git a/src/writer/ss_types.py b/src/writer/ss_types.py index d82d2cc17..c7e65d9e8 100644 --- a/src/writer/ss_types.py +++ b/src/writer/ss_types.py @@ -161,10 +161,19 @@ class StateEnquiryResponse(AppProcessServerResponse): payload: Optional[StateEnquiryResponsePayload] -AppProcessServerResponsePacket = Tuple[int, - Optional[str], AppProcessServerResponse] +AppProcessServerResponsePacket = Tuple[int, Optional[str], AppProcessServerResponse] +class DataframeRecordAdded(TypedDict): + record: Dict[str, Any] + +class DataframeRecordUpdated(TypedDict): + record_index: int + record: Dict[str, Any] + +class DataframeRecordRemoved(TypedDict): + record_index: int + class WriterEventResult(TypedDict): ok: bool result: Any diff --git a/tests/backend/test_core.py b/tests/backend/test_core.py index 7122c96a3..3eedcfc94 100644 --- a/tests/backend/test_core.py +++ b/tests/backend/test_core.py @@ -6,8 +6,10 @@ import altair import numpy as np +import pandas import pandas as pd import plotly.express as px +import polars import polars as pl import pyarrow as pa import pytest @@ -17,6 +19,7 @@ Evaluator, EventDeserialiser, FileWrapper, + MutableValue, SessionManager, State, StateSerialiser, @@ -192,6 +195,70 @@ def test_to_raw_state(self) -> None: assert self.sp.to_raw_state() == raw_state_dict assert self.sp_simple_dict.to_raw_state() == simple_dict + def test_mutable_value_should_raise_mutation(self) -> None: + """ + Tests that a class that implements MutableValue can be used in a State and throw mutations. + """ + class MyValue(MutableValue): + + def __init__(self): + super().__init__() + self._value = 0 + + def set(self, value): + self._value = value + self.mutate() + + def to_dict(self): + return {"a": self._value} + + s = WriterState({ + "value": MyValue() + }) + # Reset the mutation after initialisation + s._state_proxy.get_mutations_as_dict() + + # When + s["value"].set(2) + a = s._state_proxy.get_mutations_as_dict() + + # Then + assert "+value" in a + assert a["+value"] == {"a": 2} + + def test_mutable_value_should_reset_mutation_after_reading_get_mutations(self) -> None: + """ + Tests that after reading the mutations, they are reset to zero + with a focus on the MutableValue. + """ + class MyValue(MutableValue): + + def __init__(self): + super().__init__() + self._value = 0 + + def set(self, value): + self._value = value + self.mutate() + + def to_dict(self): + return {"a": self._value} + + s = WriterState({ + "value": MyValue() + }) + # Reset the mutation after initialisation + s._state_proxy.get_mutations_as_dict() + + # Then + s["value"].set(2) + s._state_proxy.get_mutations_as_dict() + + # Mutation is read a second time + a = s._state_proxy.get_mutations_as_dict() + + # Then + assert a == {} class TestState: @@ -991,3 +1058,177 @@ def session_verifier_2(headers: Dict[str, str]) -> None: None ) assert s_invalid is None + +class TestEditableDataframe: + + def test_editable_dataframe_expose_pandas_dataframe_as_df_property(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + + edf = wf.EditableDataframe(df) + assert edf.df is not None + assert isinstance(edf.df, pandas.DataFrame) + + def test_editable_dataframe_register_mutation_when_df_is_updated(self) -> None: + # Given + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + + edf = wf.EditableDataframe(df) + + # When + edf.df.loc[0, "age"] = 26 + edf.df = edf.df + + # Then + assert edf.mutated() is True + + def test_editable_dataframe_should_process_new_record_into_dataframe(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + + edf = wf.EditableDataframe(df) + + # When + edf.record_add({"record": {"name": "David", "age": 40}}) + + # Then + assert len(edf.df) == 4 + + def test_editable_dataframe_should_process_new_record_into_dataframe_with_index(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + df = df.set_index('name') + + edf = wf.EditableDataframe(df) + + # When + edf.record_add({"record": {"name": "David", "age": 40}}) + + # Then + assert len(edf.df) == 4 + + def test_editable_dataframe_should_process_new_record_into_dataframe_with_multiindex(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35], + "city": ["Paris", "London", "New York"] + }) + df = df.set_index(['name', 'city']) + + edf = wf.EditableDataframe(df) + + # When + edf.record_add({"record": {"name": "David", "age": 40, "city": "Berlin"}}) + + # Then + assert len(edf.df) == 4 + + def test_editable_dataframe_should_serialize_pandas_dataframe_with_multiindex(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35], + "city": ["Paris", "London", "New York"] + }) + df = df.set_index(['name', 'city']) + + edf = wf.EditableDataframe(df) + + # When + table = edf.pyarrow_table() + + # Then + assert len(table) == 3 + + def test_editable_dataframe_expose_polar_dataframe_in_df_property(self) -> None: + df = polars.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + + edf = wf.EditableDataframe(df) + assert edf.df is not None + assert isinstance(edf.df, polars.DataFrame) + + def test_editable_dataframe_should_process_new_record_into_polar_dataframe(self) -> None: + df = polars.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + + edf = wf.EditableDataframe(df) + + # When + edf.record_add({"record": {"name": "David", "age": 40}}) + + # Then + assert len(edf.df) == 4 + + + def test_editable_dataframe_should_serialize_polar_dataframe(self) -> None: + df = polars.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35], + "city": ["Paris", "London", "New York"] + }) + + edf = wf.EditableDataframe(df) + + # When + table = edf.pyarrow_table() + + # Then + assert len(table) == 3 + + + def test_editable_dataframe_expose_list_of_records_in_df_property(self) -> None: + records = [ + {"name": "Alice", "age": 25}, + {"name": "Bob", "age": 30}, + {"name": "Charlie", "age": 35} + ] + + edf = wf.EditableDataframe(records) + + assert edf.df is not None + assert isinstance(edf.df, list) + + + def test_editable_dataframe_should_process_new_record_into_list_of_records(self) -> None: + records = [ + {"name": "Alice", "age": 25}, + {"name": "Bob", "age": 30}, + {"name": "Charlie", "age": 35} + ] + + edf = wf.EditableDataframe(records) + + # When + edf.record_add({"record": {"name": "David", "age": 40}}) + + # Then + assert len(edf.df) == 4 + + + def test_editable_dataframe_should_serialized_list_of_records_into_pyarrow_table(self) -> None: + records = [ + {"name": "Alice", "age": 25}, + {"name": "Bob", "age": 30}, + {"name": "Charlie", "age": 35} + ] + + edf = wf.EditableDataframe(records) + + # When + table = edf.pyarrow_table() + + # Then + assert len(table) == 3 From 0bf4c154ee06406ed46e863cc77f80f0b54d0d39 Mon Sep 17 00:00:00 2001 From: Fabien Arcellier Date: Thu, 4 Jul 2024 08:54:31 +0200 Subject: [PATCH 2/5] feat: implement editable dataframe to manage dataframe editor component * feat: implement record_remove on EditableDataframe * docs: write documentation section about dataframe --- docs/framework/dataframe.mdx | 103 +++++++++++++++++++ src/writer/core.py | 186 ++++++++++++++++++++++++++++++----- tests/backend/test_core.py | 92 +++++++++++++++++ 3 files changed, 356 insertions(+), 25 deletions(-) create mode 100644 docs/framework/dataframe.mdx diff --git a/docs/framework/dataframe.mdx b/docs/framework/dataframe.mdx new file mode 100644 index 000000000..af2fa05fd --- /dev/null +++ b/docs/framework/dataframe.mdx @@ -0,0 +1,103 @@ +--- +title: "Dataframe" +--- + +**writer framework places the dataframe at the core of the application**. This is a great way for modeling a complex and massive data system. +it offers components as `dataframe` and `dataframe editor` to manipulate dataframes. These components allow you to visualize and interact with dataframes. + +| compatibility | dataframe | dataframe editor | +|--------------------|---------------------------------------|-------------------------------| +| `pandas.DataFrame` | x | x | +| `polar.DataFrame` | x | x | +| `list of records` | x (with `EditableDataframe`) | x (with `EditableDataframe`) | + +### Use a dataframe + +**a dataframe is simply added to the state**. A component like `dataframe` will be able to display it. + +```python +import pandas +import writer as wf + +wf.init_state({ + 'mydf': pandas.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) +}) +``` + +### Prepare a dataframe for editing + +**writer provides a helper to facilitate dataframe manipulatione**. This helper makes it easier to write event handlers such as adding a line, +deleting it or modifying a value, etc... + +```python +import pandas +import writer as wf + +df = pandas.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) + +wf.init_state({ + 'mydf': wf.EditableDataframe(df) +}) +``` + +An `EditableDataframe` value can also be displayed in the `dataframe` component + +#### Handle events from a dataframe editor + +**The dataframe editor emits events when an action is performed**. You must subscribe to events to integrate changes to the state of the application. + +```python +import pandas +import writer as wf + +df = pandas.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) +wf.init_state({ + 'mydf': wf.EditableDataframe(df) +}) + +# Subscribe this event handler to the `wf-dfeditor-add` event +def on_record_add(state, payload): + payload['record']['sales'] = 0 # default value inside the dataframe + state['mydf'].record_add(payload) + + +# Subscribe this event handler to the `wf-dfeditor-update` event +def on_record_change(state, payload): + state['mydf'].record_update(payload) + + +# Subscribe this event handler to the `wf-dfeditor-action` event +def on_record_action(state, payload): + """ + This event corresponds to a quick action in the drop-down menu to the left of the dataframe. + """ + if payload.action == 'remove': + state['mydf'].record_remove(payload) + if payload.action == 'important': + state['mydf'].record(payload.id).update('flag', True) # update the column flag of the dataframe to true, trigger une mutation record_update + if payload.action == 'open': + state['record'] = state['df'].record(payload.id) +``` + +#### Alternative to pandas.DataFrame + +`EditableDataframe` can also be used with a polar dataframe and list of records. + +```python +import pandas +import polars + +import writer as wf + +panda_df = pandas.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) +polars_df = polars.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) +list_of_records = [{'a': 1, 'b': 4}, {'a': 2, 'b': 5}, {'a': 3, 'b': 6}] +list_of_records = [[1, 4], [2, 5], [3, 6]] +list_of_records = ["a", "b", "c"] + +wf.init_state({ + 'mypandas': wf.EditableDataframe(panda_df), + 'mypolars': wf.EditableDataframe(polars_df), + 'mylistofrecords': wf.EditableDataframe(list_of_records) +}) +``` diff --git a/src/writer/core.py b/src/writer/core.py index 363893865..e48ec4856 100644 --- a/src/writer/core.py +++ b/src/writer/core.py @@ -37,7 +37,6 @@ cast, ) -import pandas import pyarrow # type: ignore from writer import core_ui @@ -55,6 +54,7 @@ ) if TYPE_CHECKING: + import pandas import polars from writer.app_runner import AppProcess @@ -394,8 +394,13 @@ def carry_mutation_flag(base_key, child_key): for child_key, child_mutation in child_mutations.items(): nested_key = carry_mutation_flag(escaped_key, child_key) serialised_mutations[nested_key] = child_mutation - elif f"+{key}" in self.mutated or \ - (isinstance(value, MutableValue) is True and value.mutated()): + elif f"+{key}" in self.mutated: + try: + serialised_value = state_serialiser.serialise(value) + except BaseException: + raise ValueError(f"""Couldn't serialise value of type "{ type(value) }" for key "{ key }".""") + serialised_mutations[f"+{escaped_key}"] = serialised_value + elif isinstance(value, MutableValue) is True and value.mutated(): try: serialised_value = state_serialiser.serialise(value) value.reset_mutation() @@ -1595,17 +1600,17 @@ def record_update(df: Any, payload: DataframeRecordUpdated) -> Any: signature of the methods to be implemented to process wf-dfeditor-update event >>> edf = EditableDataframe(df) - >>> edf.record_update({"record_id": 12, "record": {"a": 1, "b": 2}}) + >>> edf.record_update({"record_index": 12, "record": {"a": 1, "b": 2}}) """ raise NotImplementedError @staticmethod - def record_delete(df: Any, payload: DataframeRecordUpdated) -> Any: + def record_remove(df: Any, payload: DataframeRecordRemoved) -> Any: """ signature of the methods to be implemented to process wf-dfeditor-remove event >>> edf = EditableDataframe(df) - >>> edf.record_delete({"record_id": 12}) + >>> edf.record_remove({"record_index": 12}) """ raise NotImplementedError @@ -1628,33 +1633,61 @@ class PandasRecordProcessor(DataframeRecordProcessor): @staticmethod def match(df: Any) -> bool: + import pandas return True if isinstance(df, pandas.DataFrame) else False @staticmethod - def record_add(df: pandas.DataFrame, payload: DataframeRecordAdded) -> pandas.DataFrame: + def record_add(df: 'pandas.DataFrame', payload: DataframeRecordAdded) -> 'pandas.DataFrame': """ >>> edf = EditableDataframe(df) >>> edf.record_add({"record": {"a": 1, "b": 2}}) """ + import pandas + + _assert_record_match_pandas_df(df, payload['record']) + record, index = _split_record_as_pandas_record_and_index(payload['record'], df.index.names) new_df = pandas.DataFrame([record], index=[index]) return pandas.concat([df, new_df]) @staticmethod - def record_update(df: pandas.DataFrame, payload: DataframeRecordUpdated): - raise NotImplementedError + def record_update(df: 'pandas.DataFrame', payload: DataframeRecordUpdated) -> 'pandas.DataFrame': + """ + >>> edf = EditableDataframe(df) + >>> edf.record_update({"record_index": 12, "record": {"a": 1, "b": 2}}) + """ + _assert_record_match_pandas_df(df, payload['record']) + + record: dict + record, index = _split_record_as_pandas_record_and_index(payload['record'], df.index.names) + + record_index = payload['record_index'] + df.iloc[record_index] = record # type: ignore + + index_list = df.index.tolist() + index_list[record_index] = index + df.index = index_list # type: ignore + + return df @staticmethod - def record_delete(df: pandas.DataFrame, payload: DataframeRecordUpdated): - raise NotImplementedError + def record_remove(df: 'pandas.DataFrame', payload: DataframeRecordRemoved) -> 'pandas.DataFrame': + """ + >>> edf = EditableDataframe(df) + >>> edf.record_remove({"record_index": 12}) + """ + record_index: int = payload['record_index'] + idx = df.index[record_index] + df = df.drop(idx) + + return df @staticmethod - def pyarrow_table(df: pandas.DataFrame) -> pyarrow.Table: + def pyarrow_table(df: 'pandas.DataFrame') -> pyarrow.Table: """ Serializes the dataframe into a pyarrow table """ - df['__record_id'] = range(1, len(df) + 1) table = pyarrow.Table.from_pandas(df=df) return table @@ -1675,17 +1708,34 @@ def match(df: Any) -> bool: @staticmethod def record_add(df: 'polars.DataFrame', payload: DataframeRecordAdded) -> 'polars.DataFrame': + _assert_record_match_polar_df(df, payload['record']) + import polars new_df = polars.DataFrame([payload['record']]) return polars.concat([df, new_df]) @staticmethod def record_update(df: 'polars.DataFrame', payload: DataframeRecordUpdated) -> 'polars.DataFrame': - raise NotImplementedError + # This implementation works but is not optimal. + # I didn't find a better way to update a record in polars + # + # https://github.com/pola-rs/polars/issues/5973 + _assert_record_match_polar_df(df, payload['record']) + + record = payload['record'] + record_index = payload['record_index'] + for r in record: + df[record_index, r] = record[r] + + return df @staticmethod - def record_delete(df: 'polars.DataFrame', payload: DataframeRecordUpdated) -> 'polars.DataFrame': - raise NotImplementedError + def record_remove(df: 'polars.DataFrame', payload: DataframeRecordRemoved) -> 'polars.DataFrame': + import polars + + record_index: int = payload['record_index'] + df_filtered = polars.concat([df[:record_index], df[record_index + 1:]]) + return df_filtered @staticmethod def pyarrow_table(df: 'polars.DataFrame') -> pyarrow.Table: @@ -1711,16 +1761,24 @@ def match(df: Any) -> bool: @staticmethod def record_add(df: List[Dict[str, Any]], payload: DataframeRecordAdded) -> List[Dict[str, Any]]: + _assert_record_match_list_of_records(df, payload['record']) df.append(payload['record']) return df @staticmethod def record_update(df: List[Dict[str, Any]], payload: DataframeRecordUpdated) -> List[Dict[str, Any]]: - raise NotImplementedError + _assert_record_match_list_of_records(df, payload['record']) + + record_index = payload['record_index'] + record = payload['record'] + + df[record_index] = record + return df @staticmethod - def record_delete(df: List[Dict[str, Any]], payload: DataframeRecordUpdated) -> List[Dict[str, Any]]: - raise NotImplementedError + def record_remove(df: List[Dict[str, Any]], payload: DataframeRecordRemoved) -> List[Dict[str, Any]]: + del(df[payload['record_index']]) + return df @staticmethod def pyarrow_table(df: List[Dict[str, Any]]) -> pyarrow.Table: @@ -1752,7 +1810,7 @@ class EditableDataframe(MutableValue): """ processors = [PandasRecordProcessor, PolarRecordProcessor, RecordListRecordProcessor] - def __init__(self, df: Union[pandas.DataFrame, 'polars.DataFrame', List[dict], List[list]]): + def __init__(self, df: Union['pandas.DataFrame', 'polars.DataFrame', List[dict]]): super().__init__() self._df = df self.processor: Type[DataframeRecordProcessor] @@ -1765,27 +1823,66 @@ def __init__(self, df: Union[pandas.DataFrame, 'polars.DataFrame', List[dict], L raise ValueError("The dataframe must be a pandas, polar Dataframe or a list of record") @property - def df(self) -> Union[pandas.DataFrame, 'polars.DataFrame', List[dict], List[list]]: + def df(self) -> Union['pandas.DataFrame', 'polars.DataFrame', List[dict]]: return self._df @df.setter - def df(self, value: Union[pandas.DataFrame, 'polars.DataFrame', List[dict], List[list]]) -> None: + def df(self, value: Union['pandas.DataFrame', 'polars.DataFrame', List[dict]]) -> None: self._df = value self.mutate() def record_add(self, payload: DataframeRecordAdded) -> None: + """ + Adds a record to the dataframe + + >>> df = pandas.DataFrame({"a": [1, 2], "b": [3, 4]}) + >>> edf = EditableDataframe(df) + >>> edf.record_add({"record": {"a": 1, "b": 2}}) + """ assert self.processor is not None self._df = self.processor.record_add(self.df, payload) self.mutate() def record_update(self, payload: DataframeRecordUpdated) -> None: - pass + """ + Updates a record in the dataframe - def record_delete(self, payload: DataframeRecordRemoved) -> None: - pass + The record must be complete otherwise an error is raised (ValueError). + It must a value for each index / column. + + >>> df = pandas.DataFrame({"a": [1, 2], "b": [3, 4]}) + >>> edf = EditableDataframe(df) + >>> edf.record_update({"record_index": 0, "record": {"a": 2, "b": 2}}) + """ + assert self.processor is not None + + self._df = self.processor.record_update(self.df, payload) + self.mutate() + + def record_remove(self, payload: DataframeRecordRemoved) -> None: + """ + Removes a record from the dataframe + + >>> df = pandas.DataFrame({"a": [1, 2], "b": [3, 4]}) + >>> edf = EditableDataframe(df) + >>> edf.record_remove({"record_index": 0}) + """ + assert self.processor is not None + + self._df = self.processor.record_remove(self.df, payload) + self.mutate() def pyarrow_table(self) -> pyarrow.Table: + """ + Serializes the dataframe into a pyarrow table + + This mechanism is used for serializing data for transmission to the frontend. + + >>> df = pandas.DataFrame({"a": [1, 2], "b": [3, 4]}) + >>> edf = EditableDataframe(df) + >>> pa_table = edf.pyarrow_table() + """ assert self.processor is not None pa_table = self.processor.pyarrow_table(self.df) @@ -1908,6 +2005,45 @@ async def _async_wrapper_internal(callable_handler: Callable, arg_values: List[A result = await callable_handler(*arg_values) return result +def _assert_record_match_pandas_df(df: 'pandas.DataFrame', record: Dict[str, Any]) -> None: + """ + Asserts that the record matches the dataframe columns & index + + >>> _assert_record_match_pandas_df(pandas.DataFrame({"a": [1, 2], "b": [3, 4]}), {"a": 1, "b": 2}) + """ + import pandas + + columns = set(list(df.columns.values) + df.index.names) if isinstance(df.index, pandas.RangeIndex) is False else set(df.columns.values) + columns_record = set(record.keys()) + if columns != columns_record: + raise ValueError(f"Columns mismatch. Expected {columns}, got {columns_record}") + +def _assert_record_match_polar_df(df: 'polars.DataFrame', record: Dict[str, Any]) -> None: + """ + Asserts that the record matches the columns of polar dataframe + + >>> _assert_record_match_pandas_df(polars.DataFrame({"a": [1, 2], "b": [3, 4]}), {"a": 1, "b": 2}) + """ + columns = set(df.columns) + columns_record = set(record.keys()) + if columns != columns_record: + raise ValueError(f"Columns mismatch. Expected {columns}, got {columns_record}") + +def _assert_record_match_list_of_records(df: List[Dict[str, Any]], record: Dict[str, Any]) -> None: + """ + Asserts that the record matches the key in the record list (it use the first record to check) + + >>> _assert_record_match_list_of_records([{"a": 1, "b": 2}, {"a": 3, "b": 4}], {"a": 1, "b": 2}) + """ + if len(df) == 0: + return + + columns = set(df[0].keys()) + columns_record = set(record.keys()) + if columns != columns_record: + raise ValueError(f"Columns mismatch. Expected {columns}, got {columns_record}") + + def _split_record_as_pandas_record_and_index(param: dict, index_columns: list) -> Tuple[dict, tuple]: """ Separates a record into the record part and the index part to be able to diff --git a/tests/backend/test_core.py b/tests/backend/test_core.py index 3eedcfc94..5c9ff762b 100644 --- a/tests/backend/test_core.py +++ b/tests/backend/test_core.py @@ -1132,6 +1132,40 @@ def test_editable_dataframe_should_process_new_record_into_dataframe_with_multii # Then assert len(edf.df) == 4 + def test_editable_dataframe_should_update_existing_record_as_dateframe_with_multiindex(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35], + "city": ["Paris", "London", "New York"] + }) + + df = df.set_index(['name', 'city']) + + edf = wf.EditableDataframe(df) + + # When + edf.record_update({"record_index": 0, "record": {"name": "Alicia", "age": 25, "city": "Paris"}}) + + # Then + assert edf.df.iloc[0]['age'] == 25 + + def test_editable_dataframe_should_remove_existing_record_as_dateframe_with_multiindex(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35], + "city": ["Paris", "London", "New York"] + }) + + df = df.set_index(['name', 'city']) + + edf = wf.EditableDataframe(df) + + # When + edf.record_remove({"record_index": 0}) + + # Then + assert len(edf.df) == 2 + def test_editable_dataframe_should_serialize_pandas_dataframe_with_multiindex(self) -> None: df = pandas.DataFrame({ "name": ["Alice", "Bob", "Charlie"], @@ -1172,6 +1206,33 @@ def test_editable_dataframe_should_process_new_record_into_polar_dataframe(self) # Then assert len(edf.df) == 4 + def test_editable_dataframe_should_update_existing_record_into_polar_dataframe(self) -> None: + df = polars.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + + edf = wf.EditableDataframe(df) + + # When + edf.record_update({"record_index": 0, "record": {"name": "Alicia", "age": 25}}) + + # Then + assert edf.df[0, "name"] == "Alicia" + + def test_editable_dataframe_should_remove_existing_record_into_polar_dataframe(self) -> None: + df = polars.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + + edf = wf.EditableDataframe(df) + + # When + edf.record_remove({"record_index": 0}) + + # Then + assert len(edf.df) == 2 def test_editable_dataframe_should_serialize_polar_dataframe(self) -> None: df = polars.DataFrame({ @@ -1218,6 +1279,37 @@ def test_editable_dataframe_should_process_new_record_into_list_of_records(self) assert len(edf.df) == 4 + def test_editable_dataframe_should_update_existing_record_into_list_of_record(self) -> None: + records = [ + {"name": "Alice", "age": 25}, + {"name": "Bob", "age": 30}, + {"name": "Charlie", "age": 35} + ] + + edf = wf.EditableDataframe(records) + + # When + edf.record_update({"record_index": 0, "record": {"name": "Alicia", "age": 25}}) + + # Then + assert edf.df[0]['name'] == "Alicia" + + def test_editable_dataframe_should_remove_existing_record_into_list_of_record(self) -> None: + records = [ + {"name": "Alice", "age": 25}, + {"name": "Bob", "age": 30}, + {"name": "Charlie", "age": 35} + ] + + edf = wf.EditableDataframe(records) + + # When + edf.record_remove({"record_index": 0}) + + # Then + assert len(edf.df) == 2 + + def test_editable_dataframe_should_serialized_list_of_records_into_pyarrow_table(self) -> None: records = [ {"name": "Alice", "age": 25}, From d9d9fd0181402d325a9a714951f066d8414c470c Mon Sep 17 00:00:00 2001 From: Fabien Arcellier Date: Fri, 12 Jul 2024 07:51:35 +0200 Subject: [PATCH 3/5] feat: implement editable dataframe to manage dataframe editor component * feat: manage missing dependencies for PandasRecordProcessor and PolarRecordProcessor --- src/writer/core.py | 28 ++++++++++++++++++++++++++++ tests/backend/test_core.py | 25 ++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/src/writer/core.py b/src/writer/core.py index e48ec4856..229d11879 100644 --- a/src/writer/core.py +++ b/src/writer/core.py @@ -16,6 +16,7 @@ import traceback import urllib.request from abc import ABCMeta +from functools import wraps from multiprocessing.process import BaseProcess from types import ModuleType from typing import ( @@ -75,6 +76,31 @@ def get_app_process() -> 'AppProcess': raise RuntimeError( "Failed to retrieve the AppProcess: running in wrong context") + +def import_failure(rvalue: Any = None): + """ + This decorator captures the failure to load a volume and returns a value instead. + + If the import of a module fails, the decorator returns the value given as a parameter. + + >>> @import_failure(rvalue=False) + >>> def my_handler(): + >>> import pandas + >>> return pandas.DataFrame() + + :param rvalue: the value to return + """ + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except ImportError: + return rvalue + return wrapper + return decorator + + class Config: is_mail_enabled_for_log: bool = False @@ -1632,6 +1658,7 @@ class PandasRecordProcessor(DataframeRecordProcessor): """ @staticmethod + @import_failure(rvalue=False) def match(df: Any) -> bool: import pandas return True if isinstance(df, pandas.DataFrame) else False @@ -1702,6 +1729,7 @@ class PolarRecordProcessor(DataframeRecordProcessor): """ @staticmethod + @import_failure(rvalue=False) def match(df: Any) -> bool: import polars return True if isinstance(df, polars.DataFrame) else False diff --git a/tests/backend/test_core.py b/tests/backend/test_core.py index 5c9ff762b..ee1ba0ae9 100644 --- a/tests/backend/test_core.py +++ b/tests/backend/test_core.py @@ -24,7 +24,7 @@ State, StateSerialiser, StateSerialiserException, - WriterState, + WriterState, import_failure, ) from writer.core_ui import Component from writer.ss_types import WriterEvent @@ -1324,3 +1324,26 @@ def test_editable_dataframe_should_serialized_list_of_records_into_pyarrow_table # Then assert len(table) == 3 + + +def test_import_failure_returns_expected_value_when_import_fails(): + """ + Test that an import failure returns the expected value + """ + @import_failure(rvalue=False) + def myfunc(): + import yop + + assert myfunc() is False + + +def test_import_failure_do_nothing_when_import_go_well(): + """ + Test that the import_failure decorator do nothing when the import is a success + """ + @import_failure(rvalue=False) + def myfunc(): + import math + return 2 + + assert myfunc() == 2 From 8bdf0216883062c243a2d65e2e516d2a9194b534 Mon Sep 17 00:00:00 2001 From: Fabien Arcellier Date: Fri, 12 Jul 2024 17:42:00 +0200 Subject: [PATCH 4/5] feat: implement editable dataframe to manage dataframe editor component * feat: implement the method record to read a specific record --- docs/framework/dataframe.mdx | 28 ++++++------- src/writer/core.py | 75 +++++++++++++++++++++++++++++++-- tests/backend/test_core.py | 80 +++++++++++++++++++++++++++++++++++- 3 files changed, 164 insertions(+), 19 deletions(-) diff --git a/docs/framework/dataframe.mdx b/docs/framework/dataframe.mdx index af2fa05fd..864cbdfb7 100644 --- a/docs/framework/dataframe.mdx +++ b/docs/framework/dataframe.mdx @@ -3,13 +3,13 @@ title: "Dataframe" --- **writer framework places the dataframe at the core of the application**. This is a great way for modeling a complex and massive data system. -it offers components as `dataframe` and `dataframe editor` to manipulate dataframes. These components allow you to visualize and interact with dataframes. +it offers components as `dataframe` to manipulate dataframes. These components allow you to visualize and interact with dataframes. -| compatibility | dataframe | dataframe editor | -|--------------------|---------------------------------------|-------------------------------| -| `pandas.DataFrame` | x | x | -| `polar.DataFrame` | x | x | -| `list of records` | x (with `EditableDataframe`) | x (with `EditableDataframe`) | +| compatibility | dataframe | +|--------------------|---------------------------------------| +| `pandas.DataFrame` | x | +| `polar.DataFrame` | x | +| `list of records` | x (with `EditableDataframe`) | ### Use a dataframe @@ -44,7 +44,7 @@ wf.init_state({ #### Handle events from a dataframe editor -**The dataframe editor emits events when an action is performed**. You must subscribe to events to integrate changes to the state of the application. +**The dataframe component emits events when an action is performed**. You must subscribe to events to integrate changes to the state of the application. ```python import pandas @@ -55,28 +55,27 @@ wf.init_state({ 'mydf': wf.EditableDataframe(df) }) -# Subscribe this event handler to the `wf-dfeditor-add` event +# Subscribe this event handler to the `wf-dataframe-add` event def on_record_add(state, payload): payload['record']['sales'] = 0 # default value inside the dataframe state['mydf'].record_add(payload) -# Subscribe this event handler to the `wf-dfeditor-update` event +# Subscribe this event handler to the `wf-dataframe-update` event def on_record_change(state, payload): state['mydf'].record_update(payload) -# Subscribe this event handler to the `wf-dfeditor-action` event +# Subscribe this event handler to the `wf-dataframe-action` event def on_record_action(state, payload): """ This event corresponds to a quick action in the drop-down menu to the left of the dataframe. """ + record_index = payload['record_index'] if payload.action == 'remove': - state['mydf'].record_remove(payload) - if payload.action == 'important': - state['mydf'].record(payload.id).update('flag', True) # update the column flag of the dataframe to true, trigger une mutation record_update + state['mydf'].record_remove(payload) if payload.action == 'open': - state['record'] = state['df'].record(payload.id) + state['record'] = state['df'].record(record_index) # dict representation of record ``` #### Alternative to pandas.DataFrame @@ -93,7 +92,6 @@ panda_df = pandas.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) polars_df = polars.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) list_of_records = [{'a': 1, 'b': 4}, {'a': 2, 'b': 5}, {'a': 3, 'b': 6}] list_of_records = [[1, 4], [2, 5], [3, 6]] -list_of_records = ["a", "b", "c"] wf.init_state({ 'mypandas': wf.EditableDataframe(panda_df), diff --git a/src/writer/core.py b/src/writer/core.py index 229d11879..0d82bd83a 100644 --- a/src/writer/core.py +++ b/src/writer/core.py @@ -1610,10 +1610,20 @@ def match(df: Any) -> bool: """ raise NotImplementedError + @staticmethod + def record(df: Any, record_index: int) -> dict: + """ + This method read a record at the given line and get it back as dictionary + + >>> edf = EditableDataframe(df) + >>> r = edf.record(1) + """ + raise NotImplementedError + @staticmethod def record_add(df: Any, payload: DataframeRecordAdded) -> Any: """ - signature of the methods to be implemented to process wf-dfeditor-add event + signature of the methods to be implemented to process wf-dataframe-add event >>> edf = EditableDataframe(df) >>> edf.record_add({"record": {"a": 1, "b": 2}}) @@ -1623,7 +1633,7 @@ def record_add(df: Any, payload: DataframeRecordAdded) -> Any: @staticmethod def record_update(df: Any, payload: DataframeRecordUpdated) -> Any: """ - signature of the methods to be implemented to process wf-dfeditor-update event + signature of the methods to be implemented to process wf-dataframe-update event >>> edf = EditableDataframe(df) >>> edf.record_update({"record_index": 12, "record": {"a": 1, "b": 2}}) @@ -1633,7 +1643,7 @@ def record_update(df: Any, payload: DataframeRecordUpdated) -> Any: @staticmethod def record_remove(df: Any, payload: DataframeRecordRemoved) -> Any: """ - signature of the methods to be implemented to process wf-dfeditor-remove event + signature of the methods to be implemented to process wf-dataframe-action event >>> edf = EditableDataframe(df) >>> edf.record_remove({"record_index": 12}) @@ -1663,6 +1673,27 @@ def match(df: Any) -> bool: import pandas return True if isinstance(df, pandas.DataFrame) else False + @staticmethod + def record(df: 'pandas.DataFrame', record_index: int) -> dict: + """ + + >>> edf = EditableDataframe(df) + >>> r = edf.record(1) + """ + import pandas + + record = df.iloc[record_index] + if not isinstance(df.index, pandas.RangeIndex): + index_list = df.index.tolist() + record_index_content = index_list[record_index] + if isinstance(record_index_content, tuple): + for i, n in enumerate(df.index.names): + record[n] = record_index_content[i] + else: + record[df.index.names[0]] = record_index_content + + return dict(record) + @staticmethod def record_add(df: 'pandas.DataFrame', payload: DataframeRecordAdded) -> 'pandas.DataFrame': """ @@ -1734,6 +1765,21 @@ def match(df: Any) -> bool: import polars return True if isinstance(df, polars.DataFrame) else False + @staticmethod + def record(df: 'polars.DataFrame', record_index: int) -> dict: + """ + + >>> edf = EditableDataframe(df) + >>> r = edf.record(1) + """ + record = {} + r = df[record_index] + for c in r.columns: + record[c] = df[record_index, c] + + return record + + @staticmethod def record_add(df: 'polars.DataFrame', payload: DataframeRecordAdded) -> 'polars.DataFrame': _assert_record_match_polar_df(df, payload['record']) @@ -1787,6 +1833,17 @@ class RecordListRecordProcessor(DataframeRecordProcessor): def match(df: Any) -> bool: return True if isinstance(df, list) else False + + @staticmethod + def record(df: List[Dict[str, Any]], record_index: int) -> dict: + """ + + >>> edf = EditableDataframe(df) + >>> r = edf.record(1) + """ + r = df[record_index] + return copy.copy(r) + @staticmethod def record_add(df: List[Dict[str, Any]], payload: DataframeRecordAdded) -> List[Dict[str, Any]]: _assert_record_match_list_of_records(df, payload['record']) @@ -1916,6 +1973,18 @@ def pyarrow_table(self) -> pyarrow.Table: pa_table = self.processor.pyarrow_table(self.df) return pa_table + def record(self, record_index: int): + """ + Retrieves a specific record in dictionary form. + + :param record_index: + :return: + """ + assert self.processor is not None + + record = self.processor.record(self.df, record_index) + return record + S = TypeVar("S", bound=WriterState) def new_initial_state(klass: Type[S], raw_state: dict) -> S: diff --git a/tests/backend/test_core.py b/tests/backend/test_core.py index ee1ba0ae9..b0b9aa8cd 100644 --- a/tests/backend/test_core.py +++ b/tests/backend/test_core.py @@ -24,7 +24,8 @@ State, StateSerialiser, StateSerialiserException, - WriterState, import_failure, + WriterState, + import_failure, ) from writer.core_ui import Component from writer.ss_types import WriterEvent @@ -1087,6 +1088,54 @@ def test_editable_dataframe_register_mutation_when_df_is_updated(self) -> None: # Then assert edf.mutated() is True + def test_editable_dataframe_should_read_record_as_dict_based_on_record_index(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + edf = wf.EditableDataframe(df) + + # When + r = edf.record(0) + + # Then + assert r['name'] == 'Alice' + assert r['age'] == 25 + + def test_editable_dataframe_should_read_record_as_dict_based_on_record_index_when_dataframe_has_index(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + df = df.set_index('name') + + edf = wf.EditableDataframe(df) + + # When + r = edf.record(0) + + # Then + assert r['name'] == 'Alice' + assert r['age'] == 25 + + def test_editable_dataframe_should_read_record_as_dict_based_on_record_index_when_dataframe_has_multi_index(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35], + "city": ["Paris", "London", "New York"] + }) + df = df.set_index(['name', 'city']) + + edf = wf.EditableDataframe(df) + + # When + r = edf.record(0) + + # Then + assert r['name'] == 'Alice' + assert r['age'] == 25 + assert r['city'] == 'Paris' + def test_editable_dataframe_should_process_new_record_into_dataframe(self) -> None: df = pandas.DataFrame({ "name": ["Alice", "Bob", "Charlie"], @@ -1192,6 +1241,20 @@ def test_editable_dataframe_expose_polar_dataframe_in_df_property(self) -> None: assert edf.df is not None assert isinstance(edf.df, polars.DataFrame) + def test_editable_dataframe_should_read_record_from_polar_as_dict_based_on_record_index(self) -> None: + df = polars.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + edf = wf.EditableDataframe(df) + + # When + r = edf.record(0) + + # Then + assert r['name'] == 'Alice' + assert r['age'] == 25 + def test_editable_dataframe_should_process_new_record_into_polar_dataframe(self) -> None: df = polars.DataFrame({ "name": ["Alice", "Bob", "Charlie"], @@ -1262,6 +1325,21 @@ def test_editable_dataframe_expose_list_of_records_in_df_property(self) -> None: assert edf.df is not None assert isinstance(edf.df, list) + def test_editable_dataframe_should_read_record_from_list_of_record_as_dict_based_on_record_index(self) -> None: + records = [ + {"name": "Alice", "age": 25}, + {"name": "Bob", "age": 30}, + {"name": "Charlie", "age": 35} + ] + + edf = wf.EditableDataframe(records) + + # When + r = edf.record(0) + + # Then + assert r['name'] == 'Alice' + assert r['age'] == 25 def test_editable_dataframe_should_process_new_record_into_list_of_records(self) -> None: records = [ From 9236669a740529d6aa4e4ca390da3e2dad61fca5 Mon Sep 17 00:00:00 2001 From: Fabien Arcellier Date: Tue, 16 Jul 2024 08:46:36 +0200 Subject: [PATCH 5/5] feat: implement editable dataframe to manage dataframe editor component * docs: improve documentation * docs: add dataframe section --- docs/framework/dataframe.mdx | 15 ++++++--------- docs/mint.json | 1 + src/writer/core.py | 10 +++++++--- tests/backend/test_core.py | 1 + 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/docs/framework/dataframe.mdx b/docs/framework/dataframe.mdx index 864cbdfb7..77414035d 100644 --- a/docs/framework/dataframe.mdx +++ b/docs/framework/dataframe.mdx @@ -11,7 +11,7 @@ it offers components as `dataframe` to manipulate dataframes. These components a | `polar.DataFrame` | x | | `list of records` | x (with `EditableDataframe`) | -### Use a dataframe +## Use a dataframe **a dataframe is simply added to the state**. A component like `dataframe` will be able to display it. @@ -24,9 +24,9 @@ wf.init_state({ }) ``` -### Prepare a dataframe for editing +## Prepare a dataframe for editing -**writer provides a helper to facilitate dataframe manipulatione**. This helper makes it easier to write event handlers such as adding a line, +**writer provides `EditableDataframe` as a helper to facilitate manipulation**. it makes it easier to write event handlers such as adding a line, deleting it or modifying a value, etc... ```python @@ -40,9 +40,7 @@ wf.init_state({ }) ``` -An `EditableDataframe` value can also be displayed in the `dataframe` component - -#### Handle events from a dataframe editor +### Handle events from a dataframe editor **The dataframe component emits events when an action is performed**. You must subscribe to events to integrate changes to the state of the application. @@ -78,9 +76,9 @@ def on_record_action(state, payload): state['record'] = state['df'].record(record_index) # dict representation of record ``` -#### Alternative to pandas.DataFrame +### Datastructures supported by `EditableDataframe` -`EditableDataframe` can also be used with a polar dataframe and list of records. +`EditableDataframe` can be used with a panda dataframe, a polar dataframe and list of records. ```python import pandas @@ -91,7 +89,6 @@ import writer as wf panda_df = pandas.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) polars_df = polars.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) list_of_records = [{'a': 1, 'b': 4}, {'a': 2, 'b': 5}, {'a': 3, 'b': 6}] -list_of_records = [[1, 4], [2, 5], [3, 6]] wf.init_state({ 'mypandas': wf.EditableDataframe(panda_df), diff --git a/docs/mint.json b/docs/mint.json index 15e0eab03..6d5dd9e9f 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -41,6 +41,7 @@ "framework/event-handlers", "framework/builder-basics", "framework/handling-inputs", + "framework/dataframe", "framework/backend-driven-ui", "framework/stylesheets", "framework/frontend-scripts", diff --git a/src/writer/core.py b/src/writer/core.py index 0d82bd83a..c565066f9 100644 --- a/src/writer/core.py +++ b/src/writer/core.py @@ -1705,9 +1705,13 @@ def record_add(df: 'pandas.DataFrame', payload: DataframeRecordAdded) -> 'pandas _assert_record_match_pandas_df(df, payload['record']) record, index = _split_record_as_pandas_record_and_index(payload['record'], df.index.names) - - new_df = pandas.DataFrame([record], index=[index]) - return pandas.concat([df, new_df]) + + if isinstance(df.index, pandas.RangeIndex): + new_df = pandas.DataFrame([record]) + return pandas.concat([df, new_df], ignore_index=True) + else: + new_df = pandas.DataFrame([record], index=[index]) + return pandas.concat([df, new_df]) @staticmethod def record_update(df: 'pandas.DataFrame', payload: DataframeRecordUpdated) -> 'pandas.DataFrame': diff --git a/tests/backend/test_core.py b/tests/backend/test_core.py index b0b9aa8cd..b3bf26293 100644 --- a/tests/backend/test_core.py +++ b/tests/backend/test_core.py @@ -1149,6 +1149,7 @@ def test_editable_dataframe_should_process_new_record_into_dataframe(self) -> No # Then assert len(edf.df) == 4 + assert edf.df.index.tolist()[3] == 3 def test_editable_dataframe_should_process_new_record_into_dataframe_with_index(self) -> None: df = pandas.DataFrame({