feat: implement editable dataframe to manage dataframe editor component

* feat: implement record_remove on EditableDataframe * docs: write documentation section about dataframe
writer · Jul 6, 2024 · c27498c · c27498c
1 parent 5be456f
commit c27498c
Show file tree

Hide file tree

Showing 3 changed files with 351 additions and 22 deletions.
diff --git a/docs/framework/dataframe.mdx b/docs/framework/dataframe.mdx
@@ -0,0 +1,102 @@
+---
+title: "Dataframe"
+---
+
+**writer framework places the dataframe at the core of the application**. This is a great way for modeling a complex and massive data system.
+it offers components as `dataframe` and `dataframe editor` to manipulate dataframes. These components allow you to visualize and interact with dataframes.
+
+| compatibility      | dataframe                             | dataframe editor |
+|--------------------|---------------------------------------|------------------|
+| `pandas.DataFrame` | x                                     | x                |
+| `polar.DataFrame`  | x                                     | x                |
+| `list of records`  | x (with `EditableDataframe`)          | x                |
+
+### Use a dataframe
+
+**a dataframe is simply added to the state**. A component like `dataframe` will be able to display it.
+
+```python
+import pandas
+import writer as wf
+
+wf.init_state({
+	'mydf': pandas.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
+})
+```
+
+### Prepare a dataframe for editing
+
+**writer provides a helper to facilitate dataframe manipulatione**. This helper makes it easier to write event handlers such as adding a line,
+deleting it or modifying a value, etc...
+
+```python
+import pandas
+import writer as wf
+
+df = pandas.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
+
+wf.init_state({
+	'mydf': wf.EditableDataframe(df)
+})
+```
+
+<Info>An `EditableDataframe` value can also be displayed in the `dataframe` component</Info>
+
+#### Handle events from a dataframe editor
+
+**The dataframe editor emits events when an action is performed**. You must subscribe to events to integrate changes to the state of the application.
+
+```python
+import pandas
+import writer as wf
+
+# Subscribe this event handler to the `wf-dfeditor-add` event
+def on_record_add(state, payload):
+	payload['record']['sales'] = 0 # default value inside the dataframe
+    state['mydf'].record_add(payload)
+
+
+# Subscribe this event handler to the `wf-dfeditor-update` event
+def on_record_change(state, payload):
+    state['mydf'].record_update(payload)
+
+
+# Subscribe this event handler to the `wf-dfeditor-action` event
+def on_record_action(state, payload):
+	"""
+	This event corresponds to a quick action in the drop-down menu to the left of the dataframe.
+	"""
+	if payload.action == 'remove':
+	    state['mydf'].record_remove(payload)
+	if payload.action == 'important':
+		state['mydf'].record(payload.id).update('flag', True) # update the column flag of the dataframe to true, trigger une mutation record_update
+	if payload.action == 'open':
+		state['record'] = state['df'].record(payload.id)
+
+df = pandas.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
+
+wf.init_state({
+	'mydf': wf.EditableDataframe(df)
+})
+```
+
+#### Alternative to pandas.DataFrame
+
+`EditableDataframe` can also be used with a polar dataframe and list of records.
+
+```python
+import pandas
+import polars
+
+import writer as wf
+
+panda_df = pandas.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
+polars_df = polars.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
+list_of_records = [{'a': 1, 'b': 4}, {'a': 2, 'b': 5}, {'a': 3, 'b': 6}]
+
+wf.init_state({
+	'mypandas': wf.EditableDataframe(panda_df),
+	'mypolars': wf.EditableDataframe(polars_df),
+	'mylistofrecords': wf.EditableDataframe(list_of_records)
+})
+```
diff --git a/src/writer/core.py b/src/writer/core.py
@@ -37,7 +37,6 @@
     cast,
 )
 
-import pandas
 import pyarrow  # type: ignore
 
 from writer import core_ui
@@ -55,6 +54,7 @@
 )
 
 if TYPE_CHECKING:
+    import pandas
     import polars
 
     from writer.app_runner import AppProcess
@@ -394,8 +394,13 @@ def carry_mutation_flag(base_key, child_key):
                 for child_key, child_mutation in child_mutations.items():
                     nested_key = carry_mutation_flag(escaped_key, child_key)
                     serialised_mutations[nested_key] = child_mutation
-            elif f"+{key}" in self.mutated or \
-                (isinstance(value, MutableValue) is True and value.mutated()):
+            elif f"+{key}" in self.mutated:
+                try:
+                    serialised_value = state_serialiser.serialise(value)
+                except BaseException:
+                    raise ValueError(f"""Couldn't serialise value of type "{ type(value) }" for key "{ key }".""")
+                serialised_mutations[f"+{escaped_key}"] = serialised_value
+            elif isinstance(value, MutableValue) is True and value.mutated():
                 try:
                     serialised_value = state_serialiser.serialise(value)
                     value.reset_mutation()
@@ -1582,17 +1587,17 @@ def record_update(df: Any, payload: DataframeRecordUpdated) -> Any:
         signature of the methods to be implemented to process wf-dfeditor-update event
 
         >>> edf = EditableDataframe(df)
-        >>> edf.record_update({"record_id": 12, "record": {"a": 1, "b": 2}})
+        >>> edf.record_update({"record_index": 12, "record": {"a": 1, "b": 2}})
         """
         raise NotImplementedError
 
     @staticmethod
-    def record_delete(df: Any, payload: DataframeRecordUpdated) -> Any:
+    def record_remove(df: Any, payload: DataframeRecordRemoved) -> Any:
         """
         signature of the methods to be implemented to process wf-dfeditor-remove event
 
         >>> edf = EditableDataframe(df)
-        >>> edf.record_delete({"record_id": 12})
+        >>> edf.record_remove({"record_index": 12})
         """
         raise NotImplementedError
 
@@ -1618,30 +1623,57 @@ def match(df: Any) -> bool:
         return True if isinstance(df, pandas.DataFrame) else False
 
     @staticmethod
-    def record_add(df: pandas.DataFrame, payload: DataframeRecordAdded) -> pandas.DataFrame:
+    def record_add(df: 'pandas.DataFrame', payload: DataframeRecordAdded) -> pandas.DataFrame:
         """
         >>> edf = EditableDataframe(df)
         >>> edf.record_add({"record": {"a": 1, "b": 2}})
         """
+        import pandas
+
+        _assert_record_match_pandas_df(df, payload['record'])
+
         record, index = _split_record_as_pandas_record_and_index(payload['record'], df.index.names)
 
         new_df = pandas.DataFrame([record], index=[index])
         return pandas.concat([df, new_df])
 
     @staticmethod
-    def record_update(df: pandas.DataFrame, payload: DataframeRecordUpdated):
-        raise NotImplementedError
+    def record_update(df: 'pandas.DataFrame', payload: DataframeRecordUpdated):
+        """
+        >>> edf = EditableDataframe(df)
+        >>> edf.record_update({"record_index": 12, "record": {"a": 1, "b": 2}})
+        """
+        _assert_record_match_pandas_df(df, payload['record'])
+
+        record: dict
+        record, index = _split_record_as_pandas_record_and_index(payload['record'], df.index.names)
+
+        record_index = payload['record_index']
+        df.iloc[record_index] = record  # type: ignore
+
+        index_list = df.index.tolist()
+        index_list[record_index] = index
+        df.index = index_list  # type: ignore
+
+        return df
 
     @staticmethod
-    def record_delete(df: pandas.DataFrame, payload: DataframeRecordUpdated):
-        raise NotImplementedError
+    def record_remove(df: 'pandas.DataFrame', payload: DataframeRecordRemoved):
+        """
+        >>> edf = EditableDataframe(df)
+        >>> edf.record_remove({"record_index": 12})
+        """
+        record_index: int = payload['record_index']
+        idx = df.index[record_index]
+        df = df.drop(idx)
+
+        return df
 
     @staticmethod
-    def pyarrow_table(df: pandas.DataFrame) -> pyarrow.Table:
+    def pyarrow_table(df: 'pandas.DataFrame') -> pyarrow.Table:
         """
         Serializes the dataframe into a pyarrow table
         """
-        df['__record_id'] = range(1, len(df) + 1)
         table = pyarrow.Table.from_pandas(df=df)
         return table
 
@@ -1662,17 +1694,34 @@ def match(df: Any) -> bool:
 
     @staticmethod
     def record_add(df: 'polars.DataFrame', payload: DataframeRecordAdded) -> 'polars.DataFrame':
+        _assert_record_match_polar_df(df, payload['record'])
+
         import polars
         new_df = polars.DataFrame([payload['record']])
         return polars.concat([df, new_df])
 
     @staticmethod
     def record_update(df: 'polars.DataFrame', payload: DataframeRecordUpdated) -> 'polars.DataFrame':
-        raise NotImplementedError
+        # This implementation works but is not optimal.
+        # I didn't find a better way to update a record in polars
+        #
+        # https://github.com/pola-rs/polars/issues/5973
+        _assert_record_match_polar_df(df, payload['record'])
+
+        record = payload['record']
+        record_index = payload['record_index']
+        for r in record:
+            df[record_index, r] = record[r]
+
+        return df
 
     @staticmethod
-    def record_delete(df: 'polars.DataFrame', payload: DataframeRecordUpdated) -> 'polars.DataFrame':
-        raise NotImplementedError
+    def record_remove(df: 'polars.DataFrame', payload: DataframeRecordRemoved) -> 'polars.DataFrame':
+        import polars
+
+        record_index: int = payload['record_index']
+        df_filtered = polars.concat([df[:record_index], df[record_index + 1:]])
+        return df_filtered
 
     @staticmethod
     def pyarrow_table(df: 'polars.DataFrame') -> pyarrow.Table:
@@ -1698,16 +1747,24 @@ def match(df: Any) -> bool:
 
     @staticmethod
     def record_add(df: List[Dict[str, Any]], payload: DataframeRecordAdded) -> List[Dict[str, Any]]:
+        _assert_record_match_list_of_records(df, payload['record'])
         df.append(payload['record'])
         return df
 
     @staticmethod
     def record_update(df: List[Dict[str, Any]], payload: DataframeRecordUpdated) -> List[Dict[str, Any]]:
-        raise NotImplementedError
+        _assert_record_match_list_of_records(df, payload['record'])
+
+        record_index = payload['record_index']
+        record = payload['record']
+
+        df[record_index] = record
+        return df
 
     @staticmethod
-    def record_delete(df: List[Dict[str, Any]], payload: DataframeRecordUpdated) -> List[Dict[str, Any]]:
-        raise NotImplementedError
+    def record_remove(df: List[Dict[str, Any]], payload: DataframeRecordRemoved) -> List[Dict[str, Any]]:
+        del(df[payload['record_index']])
+        return df
 
     @staticmethod
     def pyarrow_table(df: List[Dict[str, Any]]) -> pyarrow.Table:
@@ -1761,18 +1818,57 @@ def df(self, value: Union[pandas.DataFrame, 'polars.DataFrame', List[dict], List
         self.mutate()
 
     def record_add(self, payload: DataframeRecordAdded) -> None:
+        """
+        Adds a record to the dataframe
+
+        >>> df = pandas.DataFrame({"a": [1, 2], "b": [3, 4]})
+        >>> edf = EditableDataframe(df)
+        >>> edf.record_add({"record": {"a": 1, "b": 2}})
+        """
         assert self.processor is not None
 
         self._df = self.processor.record_add(self.df, payload)
         self.mutate()
 
     def record_update(self, payload: DataframeRecordUpdated) -> None:
-        pass
+        """
+        Updates a record in the dataframe
 
-    def record_delete(self, payload: DataframeRecordRemoved) -> None:
-        pass
+        The record must be complete otherwise an error is raised (ValueError).
+        It must a value for each index / column.
+
+        >>> df = pandas.DataFrame({"a": [1, 2], "b": [3, 4]})
+        >>> edf = EditableDataframe(df)
+        >>> edf.record_update({"record_index": 0, "record": {"a": 2, "b": 2}})
+        """
+        assert self.processor is not None
+
+        self._df = self.processor.record_update(self.df, payload)
+        self.mutate()
+
+    def record_remove(self, payload: DataframeRecordRemoved) -> None:
+        """
+        Removes a record from the dataframe
+
+        >>> df = pandas.DataFrame({"a": [1, 2], "b": [3, 4]})
+        >>> edf = EditableDataframe(df)
+        >>> edf.record_remove({"record_index": 0})
+        """
+        assert self.processor is not None
+
+        self._df = self.processor.record_remove(self.df, payload)
+        self.mutate()
 
     def pyarrow_table(self) -> pyarrow.Table:
+        """
+        Serializes the dataframe into a pyarrow table
+
+        This mechanism is used for serializing data for transmission to the frontend.
+
+        >>> df = pandas.DataFrame({"a": [1, 2], "b": [3, 4]})
+        >>> edf = EditableDataframe(df)
+        >>> pa_table = edf.pyarrow_table()
+        """
         assert self.processor is not None
 
         pa_table = self.processor.pyarrow_table(self.df)
@@ -1895,6 +1991,45 @@ async def _async_wrapper_internal(callable_handler: Callable, arg_values: List[A
     result = await callable_handler(*arg_values)
     return result
 
+def _assert_record_match_pandas_df(df: pandas.DataFrame, record: Dict[str, Any]) -> None:
+    """
+    Asserts that the record matches the dataframe columns & index
+
+    >>> _assert_record_match_pandas_df(pandas.DataFrame({"a": [1, 2], "b": [3, 4]}), {"a": 1, "b": 2})
+    """
+    import pandas
+
+    columns = set(list(df.columns.values) + df.index.names) if isinstance(df.index, pandas.RangeIndex) is False else set(df.columns.values)
+    columns_record = set(record.keys())
+    if columns != columns_record:
+        raise ValueError(f"Columns mismatch. Expected {columns}, got {columns_record}")
+
+def _assert_record_match_polar_df(df: 'polars.DataFrame', record: Dict[str, Any]) -> None:
+    """
+    Asserts that the record matches the columns of polar dataframe
+
+    >>> _assert_record_match_pandas_df(polars.DataFrame({"a": [1, 2], "b": [3, 4]}), {"a": 1, "b": 2})
+    """
+    columns = set(df.columns)
+    columns_record = set(record.keys())
+    if columns != columns_record:
+        raise ValueError(f"Columns mismatch. Expected {columns}, got {columns_record}")
+
+def _assert_record_match_list_of_records(df: List[Dict[str, Any]], record: Dict[str, Any]) -> None:
+    """
+    Asserts that the record matches the key in the record list (it use the first record to check)
+
+    >>> _assert_record_match_list_of_records([{"a": 1, "b": 2}, {"a": 3, "b": 4}], {"a": 1, "b": 2})
+    """
+    if len(df) == 0:
+        return
+
+    columns = set(df[0].keys())
+    columns_record = set(record.keys())
+    if columns != columns_record:
+        raise ValueError(f"Columns mismatch. Expected {columns}, got {columns_record}")
+
+
 def _split_record_as_pandas_record_and_index(param: dict, index_columns: list) -> Tuple[dict, tuple]:
     """
     Separates a record into the record part and the index part to be able to