diff --git a/alfred/ci.py b/alfred/ci.py index 350481c2a..a4b205beb 100644 --- a/alfred/ci.py +++ b/alfred/ci.py @@ -32,8 +32,12 @@ def ci_mypy(): alfred.run("mypy ./src/writer --exclude app_templates/*") @alfred.command("ci.ruff", help="linting with ruff") -def ci_ruff(): - alfred.run("ruff check") +@alfred.option('--fix', '-f', help="fix linting errors", is_flag=True, default=False) +def ci_ruff(fix): + if fix: + alfred.run("ruff check --fix") + else: + alfred.run("ruff check") @alfred.command("ci.pytest", help="run pytest on ./tests") def ci_test(): diff --git a/docs/framework/dataframe.mdx b/docs/framework/dataframe.mdx new file mode 100644 index 000000000..77414035d --- /dev/null +++ b/docs/framework/dataframe.mdx @@ -0,0 +1,98 @@ +--- +title: "Dataframe" +--- + +**writer framework places the dataframe at the core of the application**. This is a great way for modeling a complex and massive data system. +it offers components as `dataframe` to manipulate dataframes. These components allow you to visualize and interact with dataframes. + +| compatibility | dataframe | +|--------------------|---------------------------------------| +| `pandas.DataFrame` | x | +| `polar.DataFrame` | x | +| `list of records` | x (with `EditableDataframe`) | + +## Use a dataframe + +**a dataframe is simply added to the state**. A component like `dataframe` will be able to display it. + +```python +import pandas +import writer as wf + +wf.init_state({ + 'mydf': pandas.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) +}) +``` + +## Prepare a dataframe for editing + +**writer provides `EditableDataframe` as a helper to facilitate manipulation**. it makes it easier to write event handlers such as adding a line, +deleting it or modifying a value, etc... + +```python +import pandas +import writer as wf + +df = pandas.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) + +wf.init_state({ + 'mydf': wf.EditableDataframe(df) +}) +``` + +### Handle events from a dataframe editor + +**The dataframe component emits events when an action is performed**. You must subscribe to events to integrate changes to the state of the application. + +```python +import pandas +import writer as wf + +df = pandas.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) +wf.init_state({ + 'mydf': wf.EditableDataframe(df) +}) + +# Subscribe this event handler to the `wf-dataframe-add` event +def on_record_add(state, payload): + payload['record']['sales'] = 0 # default value inside the dataframe + state['mydf'].record_add(payload) + + +# Subscribe this event handler to the `wf-dataframe-update` event +def on_record_change(state, payload): + state['mydf'].record_update(payload) + + +# Subscribe this event handler to the `wf-dataframe-action` event +def on_record_action(state, payload): + """ + This event corresponds to a quick action in the drop-down menu to the left of the dataframe. + """ + record_index = payload['record_index'] + if payload.action == 'remove': + state['mydf'].record_remove(payload) + if payload.action == 'open': + state['record'] = state['df'].record(record_index) # dict representation of record +``` + +### Datastructures supported by `EditableDataframe` + +`EditableDataframe` can be used with a panda dataframe, a polar dataframe and list of records. + +```python +import pandas +import polars + +import writer as wf + +panda_df = pandas.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) +polars_df = polars.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) +list_of_records = [{'a': 1, 'b': 4}, {'a': 2, 'b': 5}, {'a': 3, 'b': 6}] + +wf.init_state({ + 'mypandas': wf.EditableDataframe(panda_df), + 'mypolars': wf.EditableDataframe(polars_df), + 'mylistofrecords': wf.EditableDataframe(list_of_records) +}) +``` diff --git a/docs/mint.json b/docs/mint.json index 15e0eab03..6d5dd9e9f 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -41,6 +41,7 @@ "framework/event-handlers", "framework/builder-basics", "framework/handling-inputs", + "framework/dataframe", "framework/backend-driven-ui", "framework/stylesheets", "framework/frontend-scripts", diff --git a/src/writer/__init__.py b/src/writer/__init__.py index b93a03f1d..1639e1974 100644 --- a/src/writer/__init__.py +++ b/src/writer/__init__.py @@ -5,6 +5,7 @@ from writer.core import ( BytesWrapper, Config, + EditableDataframe, FileWrapper, Readable, State, diff --git a/src/writer/core.py b/src/writer/core.py index 8cb2cf442..c565066f9 100644 --- a/src/writer/core.py +++ b/src/writer/core.py @@ -2,6 +2,7 @@ import base64 import contextlib import copy +import dataclasses import datetime import inspect import io @@ -14,6 +15,8 @@ import time import traceback import urllib.request +from abc import ABCMeta +from functools import wraps from multiprocessing.process import BaseProcess from types import ModuleType from typing import ( @@ -35,9 +38,14 @@ cast, ) +import pyarrow # type: ignore + from writer import core_ui from writer.core_ui import Component from writer.ss_types import ( + DataframeRecordAdded, + DataframeRecordRemoved, + DataframeRecordUpdated, InstancePath, InstancePathItem, Readable, @@ -47,6 +55,9 @@ ) if TYPE_CHECKING: + import pandas + import polars + from writer.app_runner import AppProcess @@ -65,6 +76,31 @@ def get_app_process() -> 'AppProcess': raise RuntimeError( "Failed to retrieve the AppProcess: running in wrong context") + +def import_failure(rvalue: Any = None): + """ + This decorator captures the failure to load a volume and returns a value instead. + + If the import of a module fails, the decorator returns the value given as a parameter. + + >>> @import_failure(rvalue=False) + >>> def my_handler(): + >>> import pandas + >>> return pandas.DataFrame() + + :param rvalue: the value to return + """ + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except ImportError: + return rvalue + return wrapper + return decorator + + class Config: is_mail_enabled_for_log: bool = False @@ -129,12 +165,10 @@ class StateSerialiserException(ValueError): class StateSerialiser: - """ Serialises user state values before sending them to the front end. Provides JSON-compatible values, including data URLs for binary data. """ - def serialise(self, v: Any) -> Union[Dict, List, str, bool, int, float, None]: from writer.ai import Conversation if isinstance(v, State): @@ -153,6 +187,9 @@ def serialise(self, v: Any) -> Union[Dict, List, str, bool, int, float, None]: return self._serialise_list_recursively(v) if isinstance(v, (str, bool)): return v + if isinstance(v, EditableDataframe): + table = v.pyarrow_table() + return self._serialise_pyarrow_table(table) if v is None: return v @@ -242,6 +279,44 @@ def _serialise_pyarrow_table(self, table): bw = BytesWrapper(buf, "application/vnd.apache.arrow.file") return self.serialise(bw) +class MutableValue: + """ + MutableValue allows you to implement a value whose modification + will be followed by the state of Writer Framework and will trigger the refresh + of the user interface. + + >>> class MyValue(MutableValue): + >>> def __init__(self, value): + >>> self.value = value + >>> + >>> def modify(self, new_value): + >>> self.value = new_value + >>> self.mutate() + """ + def __init__(self): + self._mutated = False + + def mutated(self) -> bool: + """ + Returns whether the value has been mutated. + :return: + """ + return self._mutated + + def mutate(self) -> None: + """ + Marks the value as mutated. + This will trigger the refresh of the user interface on the next round trip + :return: + """ + self._mutated = True + + def reset_mutation(self) -> None: + """ + Resets the mutation flag to False. + :return: + """ + self._mutated = False class StateProxy: @@ -349,8 +424,14 @@ def carry_mutation_flag(base_key, child_key): try: serialised_value = state_serialiser.serialise(value) except BaseException: - raise ValueError( - f"""Couldn't serialise value of type "{ type(value) }" for key "{ key }".""") + raise ValueError(f"""Couldn't serialise value of type "{ type(value) }" for key "{ key }".""") + serialised_mutations[f"+{escaped_key}"] = serialised_value + elif isinstance(value, MutableValue) is True and value.mutated(): + try: + serialised_value = state_serialiser.serialise(value) + value.reset_mutation() + except BaseException: + raise ValueError(f"""Couldn't serialise value of type "{ type(value) }" for key "{ key }".""") serialised_mutations[f"+{escaped_key}"] = serialised_value deleted_keys = \ @@ -1506,6 +1587,408 @@ def __set__(self, instance, value): proxy = getattr(instance, self.objectName) proxy[self.key] = value + +class DataframeRecordRemove: + pass + + +class DataframeRecordProcessor(): + """ + This interface defines the signature of the methods to process the events of a + dataframe compatible with EditableDataframe. + + A Dataframe can be any structure composed of tabular data. + + This class defines the signature of the methods to be implemented. + """ + __metaclass__ = ABCMeta + + @staticmethod + def match(df: Any) -> bool: + """ + This method checks if the dataframe is compatible with the processor. + """ + raise NotImplementedError + + @staticmethod + def record(df: Any, record_index: int) -> dict: + """ + This method read a record at the given line and get it back as dictionary + + >>> edf = EditableDataframe(df) + >>> r = edf.record(1) + """ + raise NotImplementedError + + @staticmethod + def record_add(df: Any, payload: DataframeRecordAdded) -> Any: + """ + signature of the methods to be implemented to process wf-dataframe-add event + + >>> edf = EditableDataframe(df) + >>> edf.record_add({"record": {"a": 1, "b": 2}}) + """ + raise NotImplementedError + + @staticmethod + def record_update(df: Any, payload: DataframeRecordUpdated) -> Any: + """ + signature of the methods to be implemented to process wf-dataframe-update event + + >>> edf = EditableDataframe(df) + >>> edf.record_update({"record_index": 12, "record": {"a": 1, "b": 2}}) + """ + raise NotImplementedError + + @staticmethod + def record_remove(df: Any, payload: DataframeRecordRemoved) -> Any: + """ + signature of the methods to be implemented to process wf-dataframe-action event + + >>> edf = EditableDataframe(df) + >>> edf.record_remove({"record_index": 12}) + """ + raise NotImplementedError + + @staticmethod + def pyarrow_table(df: Any) -> pyarrow.Table: + """ + Serializes the dataframe into a pyarrow table + """ + raise NotImplementedError + + +class PandasRecordProcessor(DataframeRecordProcessor): + """ + PandasRecordProcessor processes records from a pandas dataframe saved into an EditableDataframe + + >>> df = pandas.DataFrame({"a": [1, 2], "b": [3, 4]}) + >>> edf = EditableDataframe(df) + >>> edf.record_add({"a": 1, "b": 2}) + """ + + @staticmethod + @import_failure(rvalue=False) + def match(df: Any) -> bool: + import pandas + return True if isinstance(df, pandas.DataFrame) else False + + @staticmethod + def record(df: 'pandas.DataFrame', record_index: int) -> dict: + """ + + >>> edf = EditableDataframe(df) + >>> r = edf.record(1) + """ + import pandas + + record = df.iloc[record_index] + if not isinstance(df.index, pandas.RangeIndex): + index_list = df.index.tolist() + record_index_content = index_list[record_index] + if isinstance(record_index_content, tuple): + for i, n in enumerate(df.index.names): + record[n] = record_index_content[i] + else: + record[df.index.names[0]] = record_index_content + + return dict(record) + + @staticmethod + def record_add(df: 'pandas.DataFrame', payload: DataframeRecordAdded) -> 'pandas.DataFrame': + """ + >>> edf = EditableDataframe(df) + >>> edf.record_add({"record": {"a": 1, "b": 2}}) + """ + import pandas + + _assert_record_match_pandas_df(df, payload['record']) + + record, index = _split_record_as_pandas_record_and_index(payload['record'], df.index.names) + + if isinstance(df.index, pandas.RangeIndex): + new_df = pandas.DataFrame([record]) + return pandas.concat([df, new_df], ignore_index=True) + else: + new_df = pandas.DataFrame([record], index=[index]) + return pandas.concat([df, new_df]) + + @staticmethod + def record_update(df: 'pandas.DataFrame', payload: DataframeRecordUpdated) -> 'pandas.DataFrame': + """ + >>> edf = EditableDataframe(df) + >>> edf.record_update({"record_index": 12, "record": {"a": 1, "b": 2}}) + """ + _assert_record_match_pandas_df(df, payload['record']) + + record: dict + record, index = _split_record_as_pandas_record_and_index(payload['record'], df.index.names) + + record_index = payload['record_index'] + df.iloc[record_index] = record # type: ignore + + index_list = df.index.tolist() + index_list[record_index] = index + df.index = index_list # type: ignore + + return df + + @staticmethod + def record_remove(df: 'pandas.DataFrame', payload: DataframeRecordRemoved) -> 'pandas.DataFrame': + """ + >>> edf = EditableDataframe(df) + >>> edf.record_remove({"record_index": 12}) + """ + record_index: int = payload['record_index'] + idx = df.index[record_index] + df = df.drop(idx) + + return df + + @staticmethod + def pyarrow_table(df: 'pandas.DataFrame') -> pyarrow.Table: + """ + Serializes the dataframe into a pyarrow table + """ + table = pyarrow.Table.from_pandas(df=df) + return table + + +class PolarRecordProcessor(DataframeRecordProcessor): + """ + PolarRecordProcessor processes records from a polar dataframe saved into an EditableDataframe + + >>> df = polars.DataFrame({"a": [1, 2], "b": [3, 4]}) + >>> edf = EditableDataframe(df) + >>> edf.record_add({"record": {"a": 1, "b": 2}}) + """ + + @staticmethod + @import_failure(rvalue=False) + def match(df: Any) -> bool: + import polars + return True if isinstance(df, polars.DataFrame) else False + + @staticmethod + def record(df: 'polars.DataFrame', record_index: int) -> dict: + """ + + >>> edf = EditableDataframe(df) + >>> r = edf.record(1) + """ + record = {} + r = df[record_index] + for c in r.columns: + record[c] = df[record_index, c] + + return record + + + @staticmethod + def record_add(df: 'polars.DataFrame', payload: DataframeRecordAdded) -> 'polars.DataFrame': + _assert_record_match_polar_df(df, payload['record']) + + import polars + new_df = polars.DataFrame([payload['record']]) + return polars.concat([df, new_df]) + + @staticmethod + def record_update(df: 'polars.DataFrame', payload: DataframeRecordUpdated) -> 'polars.DataFrame': + # This implementation works but is not optimal. + # I didn't find a better way to update a record in polars + # + # https://github.com/pola-rs/polars/issues/5973 + _assert_record_match_polar_df(df, payload['record']) + + record = payload['record'] + record_index = payload['record_index'] + for r in record: + df[record_index, r] = record[r] + + return df + + @staticmethod + def record_remove(df: 'polars.DataFrame', payload: DataframeRecordRemoved) -> 'polars.DataFrame': + import polars + + record_index: int = payload['record_index'] + df_filtered = polars.concat([df[:record_index], df[record_index + 1:]]) + return df_filtered + + @staticmethod + def pyarrow_table(df: 'polars.DataFrame') -> pyarrow.Table: + """ + Serializes the dataframe into a pyarrow table + """ + import pyarrow.interchange + table: pyarrow.Table = pyarrow.interchange.from_dataframe(df) + return table + +class RecordListRecordProcessor(DataframeRecordProcessor): + """ + RecordListRecordProcessor processes records from a list of record saved into an EditableDataframe + + >>> df = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] + >>> edf = EditableDataframe(df) + >>> edf.record_add({"record": {"a": 1, "b": 2}}) + """ + + @staticmethod + def match(df: Any) -> bool: + return True if isinstance(df, list) else False + + + @staticmethod + def record(df: List[Dict[str, Any]], record_index: int) -> dict: + """ + + >>> edf = EditableDataframe(df) + >>> r = edf.record(1) + """ + r = df[record_index] + return copy.copy(r) + + @staticmethod + def record_add(df: List[Dict[str, Any]], payload: DataframeRecordAdded) -> List[Dict[str, Any]]: + _assert_record_match_list_of_records(df, payload['record']) + df.append(payload['record']) + return df + + @staticmethod + def record_update(df: List[Dict[str, Any]], payload: DataframeRecordUpdated) -> List[Dict[str, Any]]: + _assert_record_match_list_of_records(df, payload['record']) + + record_index = payload['record_index'] + record = payload['record'] + + df[record_index] = record + return df + + @staticmethod + def record_remove(df: List[Dict[str, Any]], payload: DataframeRecordRemoved) -> List[Dict[str, Any]]: + del(df[payload['record_index']]) + return df + + @staticmethod + def pyarrow_table(df: List[Dict[str, Any]]) -> pyarrow.Table: + """ + Serializes the dataframe into a pyarrow table + """ + column_names = list(df[0].keys()) + columns = {key: [record[key] for record in df] for key in column_names} + + pyarrow_columns = {key: pyarrow.array(values) for key, values in columns.items()} + schema = pyarrow.schema([(key, pyarrow_columns[key].type) for key in pyarrow_columns]) + table = pyarrow.Table.from_arrays( + [pyarrow_columns[key] for key in column_names], + schema=schema + ) + + return table + +class EditableDataframe(MutableValue): + """ + Editable Dataframe makes it easier to process events from components + that modify a dataframe like the dataframe editor. + + >>> initial_state = wf.init_state({ + >>> "df": wf.EditableDataframe(df) + >>> }) + + Editable Dataframe is compatible with a pandas, thrillers or record list dataframe + """ + processors = [PandasRecordProcessor, PolarRecordProcessor, RecordListRecordProcessor] + + def __init__(self, df: Union['pandas.DataFrame', 'polars.DataFrame', List[dict]]): + super().__init__() + self._df = df + self.processor: Type[DataframeRecordProcessor] + for processor in self.processors: + if processor.match(self.df): + self.processor = processor + break + + if self.processor is None: + raise ValueError("The dataframe must be a pandas, polar Dataframe or a list of record") + + @property + def df(self) -> Union['pandas.DataFrame', 'polars.DataFrame', List[dict]]: + return self._df + + @df.setter + def df(self, value: Union['pandas.DataFrame', 'polars.DataFrame', List[dict]]) -> None: + self._df = value + self.mutate() + + def record_add(self, payload: DataframeRecordAdded) -> None: + """ + Adds a record to the dataframe + + >>> df = pandas.DataFrame({"a": [1, 2], "b": [3, 4]}) + >>> edf = EditableDataframe(df) + >>> edf.record_add({"record": {"a": 1, "b": 2}}) + """ + assert self.processor is not None + + self._df = self.processor.record_add(self.df, payload) + self.mutate() + + def record_update(self, payload: DataframeRecordUpdated) -> None: + """ + Updates a record in the dataframe + + The record must be complete otherwise an error is raised (ValueError). + It must a value for each index / column. + + >>> df = pandas.DataFrame({"a": [1, 2], "b": [3, 4]}) + >>> edf = EditableDataframe(df) + >>> edf.record_update({"record_index": 0, "record": {"a": 2, "b": 2}}) + """ + assert self.processor is not None + + self._df = self.processor.record_update(self.df, payload) + self.mutate() + + def record_remove(self, payload: DataframeRecordRemoved) -> None: + """ + Removes a record from the dataframe + + >>> df = pandas.DataFrame({"a": [1, 2], "b": [3, 4]}) + >>> edf = EditableDataframe(df) + >>> edf.record_remove({"record_index": 0}) + """ + assert self.processor is not None + + self._df = self.processor.record_remove(self.df, payload) + self.mutate() + + def pyarrow_table(self) -> pyarrow.Table: + """ + Serializes the dataframe into a pyarrow table + + This mechanism is used for serializing data for transmission to the frontend. + + >>> df = pandas.DataFrame({"a": [1, 2], "b": [3, 4]}) + >>> edf = EditableDataframe(df) + >>> pa_table = edf.pyarrow_table() + """ + assert self.processor is not None + + pa_table = self.processor.pyarrow_table(self.df) + return pa_table + + def record(self, record_index: int): + """ + Retrieves a specific record in dictionary form. + + :param record_index: + :return: + """ + assert self.processor is not None + + record = self.processor.record(self.df, record_index) + return record + S = TypeVar("S", bound=WriterState) def new_initial_state(klass: Type[S], raw_state: dict) -> S: @@ -1623,6 +2106,63 @@ async def _async_wrapper_internal(callable_handler: Callable, arg_values: List[A result = await callable_handler(*arg_values) return result +def _assert_record_match_pandas_df(df: 'pandas.DataFrame', record: Dict[str, Any]) -> None: + """ + Asserts that the record matches the dataframe columns & index + + >>> _assert_record_match_pandas_df(pandas.DataFrame({"a": [1, 2], "b": [3, 4]}), {"a": 1, "b": 2}) + """ + import pandas + + columns = set(list(df.columns.values) + df.index.names) if isinstance(df.index, pandas.RangeIndex) is False else set(df.columns.values) + columns_record = set(record.keys()) + if columns != columns_record: + raise ValueError(f"Columns mismatch. Expected {columns}, got {columns_record}") + +def _assert_record_match_polar_df(df: 'polars.DataFrame', record: Dict[str, Any]) -> None: + """ + Asserts that the record matches the columns of polar dataframe + + >>> _assert_record_match_pandas_df(polars.DataFrame({"a": [1, 2], "b": [3, 4]}), {"a": 1, "b": 2}) + """ + columns = set(df.columns) + columns_record = set(record.keys()) + if columns != columns_record: + raise ValueError(f"Columns mismatch. Expected {columns}, got {columns_record}") + +def _assert_record_match_list_of_records(df: List[Dict[str, Any]], record: Dict[str, Any]) -> None: + """ + Asserts that the record matches the key in the record list (it use the first record to check) + + >>> _assert_record_match_list_of_records([{"a": 1, "b": 2}, {"a": 3, "b": 4}], {"a": 1, "b": 2}) + """ + if len(df) == 0: + return + + columns = set(df[0].keys()) + columns_record = set(record.keys()) + if columns != columns_record: + raise ValueError(f"Columns mismatch. Expected {columns}, got {columns_record}") + + +def _split_record_as_pandas_record_and_index(param: dict, index_columns: list) -> Tuple[dict, tuple]: + """ + Separates a record into the record part and the index part to be able to + create or update a row in a dataframe. + + >>> record, index = _split_record_as_pandas_record_and_index({"a": 1, "b": 2}, ["a"]) + >>> print(record) # {"b": 2} + >>> print(index) # (1,) + """ + final_record = {} + final_index = [] + for key, value in param.items(): + if key in index_columns: + final_index.append(value) + else: + final_record[key] = value + + return final_record, tuple(final_index) state_serialiser = StateSerialiser() initial_state = WriterState() diff --git a/src/writer/ss_types.py b/src/writer/ss_types.py index d82d2cc17..c7e65d9e8 100644 --- a/src/writer/ss_types.py +++ b/src/writer/ss_types.py @@ -161,10 +161,19 @@ class StateEnquiryResponse(AppProcessServerResponse): payload: Optional[StateEnquiryResponsePayload] -AppProcessServerResponsePacket = Tuple[int, - Optional[str], AppProcessServerResponse] +AppProcessServerResponsePacket = Tuple[int, Optional[str], AppProcessServerResponse] +class DataframeRecordAdded(TypedDict): + record: Dict[str, Any] + +class DataframeRecordUpdated(TypedDict): + record_index: int + record: Dict[str, Any] + +class DataframeRecordRemoved(TypedDict): + record_index: int + class WriterEventResult(TypedDict): ok: bool result: Any diff --git a/tests/backend/test_core.py b/tests/backend/test_core.py index 7122c96a3..b3bf26293 100644 --- a/tests/backend/test_core.py +++ b/tests/backend/test_core.py @@ -6,8 +6,10 @@ import altair import numpy as np +import pandas import pandas as pd import plotly.express as px +import polars import polars as pl import pyarrow as pa import pytest @@ -17,11 +19,13 @@ Evaluator, EventDeserialiser, FileWrapper, + MutableValue, SessionManager, State, StateSerialiser, StateSerialiserException, WriterState, + import_failure, ) from writer.core_ui import Component from writer.ss_types import WriterEvent @@ -192,6 +196,70 @@ def test_to_raw_state(self) -> None: assert self.sp.to_raw_state() == raw_state_dict assert self.sp_simple_dict.to_raw_state() == simple_dict + def test_mutable_value_should_raise_mutation(self) -> None: + """ + Tests that a class that implements MutableValue can be used in a State and throw mutations. + """ + class MyValue(MutableValue): + + def __init__(self): + super().__init__() + self._value = 0 + + def set(self, value): + self._value = value + self.mutate() + + def to_dict(self): + return {"a": self._value} + + s = WriterState({ + "value": MyValue() + }) + # Reset the mutation after initialisation + s._state_proxy.get_mutations_as_dict() + + # When + s["value"].set(2) + a = s._state_proxy.get_mutations_as_dict() + + # Then + assert "+value" in a + assert a["+value"] == {"a": 2} + + def test_mutable_value_should_reset_mutation_after_reading_get_mutations(self) -> None: + """ + Tests that after reading the mutations, they are reset to zero + with a focus on the MutableValue. + """ + class MyValue(MutableValue): + + def __init__(self): + super().__init__() + self._value = 0 + + def set(self, value): + self._value = value + self.mutate() + + def to_dict(self): + return {"a": self._value} + + s = WriterState({ + "value": MyValue() + }) + # Reset the mutation after initialisation + s._state_proxy.get_mutations_as_dict() + + # Then + s["value"].set(2) + s._state_proxy.get_mutations_as_dict() + + # Mutation is read a second time + a = s._state_proxy.get_mutations_as_dict() + + # Then + assert a == {} class TestState: @@ -991,3 +1059,370 @@ def session_verifier_2(headers: Dict[str, str]) -> None: None ) assert s_invalid is None + +class TestEditableDataframe: + + def test_editable_dataframe_expose_pandas_dataframe_as_df_property(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + + edf = wf.EditableDataframe(df) + assert edf.df is not None + assert isinstance(edf.df, pandas.DataFrame) + + def test_editable_dataframe_register_mutation_when_df_is_updated(self) -> None: + # Given + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + + edf = wf.EditableDataframe(df) + + # When + edf.df.loc[0, "age"] = 26 + edf.df = edf.df + + # Then + assert edf.mutated() is True + + def test_editable_dataframe_should_read_record_as_dict_based_on_record_index(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + edf = wf.EditableDataframe(df) + + # When + r = edf.record(0) + + # Then + assert r['name'] == 'Alice' + assert r['age'] == 25 + + def test_editable_dataframe_should_read_record_as_dict_based_on_record_index_when_dataframe_has_index(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + df = df.set_index('name') + + edf = wf.EditableDataframe(df) + + # When + r = edf.record(0) + + # Then + assert r['name'] == 'Alice' + assert r['age'] == 25 + + def test_editable_dataframe_should_read_record_as_dict_based_on_record_index_when_dataframe_has_multi_index(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35], + "city": ["Paris", "London", "New York"] + }) + df = df.set_index(['name', 'city']) + + edf = wf.EditableDataframe(df) + + # When + r = edf.record(0) + + # Then + assert r['name'] == 'Alice' + assert r['age'] == 25 + assert r['city'] == 'Paris' + + def test_editable_dataframe_should_process_new_record_into_dataframe(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + + edf = wf.EditableDataframe(df) + + # When + edf.record_add({"record": {"name": "David", "age": 40}}) + + # Then + assert len(edf.df) == 4 + assert edf.df.index.tolist()[3] == 3 + + def test_editable_dataframe_should_process_new_record_into_dataframe_with_index(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + df = df.set_index('name') + + edf = wf.EditableDataframe(df) + + # When + edf.record_add({"record": {"name": "David", "age": 40}}) + + # Then + assert len(edf.df) == 4 + + def test_editable_dataframe_should_process_new_record_into_dataframe_with_multiindex(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35], + "city": ["Paris", "London", "New York"] + }) + df = df.set_index(['name', 'city']) + + edf = wf.EditableDataframe(df) + + # When + edf.record_add({"record": {"name": "David", "age": 40, "city": "Berlin"}}) + + # Then + assert len(edf.df) == 4 + + def test_editable_dataframe_should_update_existing_record_as_dateframe_with_multiindex(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35], + "city": ["Paris", "London", "New York"] + }) + + df = df.set_index(['name', 'city']) + + edf = wf.EditableDataframe(df) + + # When + edf.record_update({"record_index": 0, "record": {"name": "Alicia", "age": 25, "city": "Paris"}}) + + # Then + assert edf.df.iloc[0]['age'] == 25 + + def test_editable_dataframe_should_remove_existing_record_as_dateframe_with_multiindex(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35], + "city": ["Paris", "London", "New York"] + }) + + df = df.set_index(['name', 'city']) + + edf = wf.EditableDataframe(df) + + # When + edf.record_remove({"record_index": 0}) + + # Then + assert len(edf.df) == 2 + + def test_editable_dataframe_should_serialize_pandas_dataframe_with_multiindex(self) -> None: + df = pandas.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35], + "city": ["Paris", "London", "New York"] + }) + df = df.set_index(['name', 'city']) + + edf = wf.EditableDataframe(df) + + # When + table = edf.pyarrow_table() + + # Then + assert len(table) == 3 + + def test_editable_dataframe_expose_polar_dataframe_in_df_property(self) -> None: + df = polars.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + + edf = wf.EditableDataframe(df) + assert edf.df is not None + assert isinstance(edf.df, polars.DataFrame) + + def test_editable_dataframe_should_read_record_from_polar_as_dict_based_on_record_index(self) -> None: + df = polars.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + edf = wf.EditableDataframe(df) + + # When + r = edf.record(0) + + # Then + assert r['name'] == 'Alice' + assert r['age'] == 25 + + def test_editable_dataframe_should_process_new_record_into_polar_dataframe(self) -> None: + df = polars.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + + edf = wf.EditableDataframe(df) + + # When + edf.record_add({"record": {"name": "David", "age": 40}}) + + # Then + assert len(edf.df) == 4 + + def test_editable_dataframe_should_update_existing_record_into_polar_dataframe(self) -> None: + df = polars.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + + edf = wf.EditableDataframe(df) + + # When + edf.record_update({"record_index": 0, "record": {"name": "Alicia", "age": 25}}) + + # Then + assert edf.df[0, "name"] == "Alicia" + + def test_editable_dataframe_should_remove_existing_record_into_polar_dataframe(self) -> None: + df = polars.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35] + }) + + edf = wf.EditableDataframe(df) + + # When + edf.record_remove({"record_index": 0}) + + # Then + assert len(edf.df) == 2 + + def test_editable_dataframe_should_serialize_polar_dataframe(self) -> None: + df = polars.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35], + "city": ["Paris", "London", "New York"] + }) + + edf = wf.EditableDataframe(df) + + # When + table = edf.pyarrow_table() + + # Then + assert len(table) == 3 + + + def test_editable_dataframe_expose_list_of_records_in_df_property(self) -> None: + records = [ + {"name": "Alice", "age": 25}, + {"name": "Bob", "age": 30}, + {"name": "Charlie", "age": 35} + ] + + edf = wf.EditableDataframe(records) + + assert edf.df is not None + assert isinstance(edf.df, list) + + def test_editable_dataframe_should_read_record_from_list_of_record_as_dict_based_on_record_index(self) -> None: + records = [ + {"name": "Alice", "age": 25}, + {"name": "Bob", "age": 30}, + {"name": "Charlie", "age": 35} + ] + + edf = wf.EditableDataframe(records) + + # When + r = edf.record(0) + + # Then + assert r['name'] == 'Alice' + assert r['age'] == 25 + + def test_editable_dataframe_should_process_new_record_into_list_of_records(self) -> None: + records = [ + {"name": "Alice", "age": 25}, + {"name": "Bob", "age": 30}, + {"name": "Charlie", "age": 35} + ] + + edf = wf.EditableDataframe(records) + + # When + edf.record_add({"record": {"name": "David", "age": 40}}) + + # Then + assert len(edf.df) == 4 + + + def test_editable_dataframe_should_update_existing_record_into_list_of_record(self) -> None: + records = [ + {"name": "Alice", "age": 25}, + {"name": "Bob", "age": 30}, + {"name": "Charlie", "age": 35} + ] + + edf = wf.EditableDataframe(records) + + # When + edf.record_update({"record_index": 0, "record": {"name": "Alicia", "age": 25}}) + + # Then + assert edf.df[0]['name'] == "Alicia" + + def test_editable_dataframe_should_remove_existing_record_into_list_of_record(self) -> None: + records = [ + {"name": "Alice", "age": 25}, + {"name": "Bob", "age": 30}, + {"name": "Charlie", "age": 35} + ] + + edf = wf.EditableDataframe(records) + + # When + edf.record_remove({"record_index": 0}) + + # Then + assert len(edf.df) == 2 + + + def test_editable_dataframe_should_serialized_list_of_records_into_pyarrow_table(self) -> None: + records = [ + {"name": "Alice", "age": 25}, + {"name": "Bob", "age": 30}, + {"name": "Charlie", "age": 35} + ] + + edf = wf.EditableDataframe(records) + + # When + table = edf.pyarrow_table() + + # Then + assert len(table) == 3 + + +def test_import_failure_returns_expected_value_when_import_fails(): + """ + Test that an import failure returns the expected value + """ + @import_failure(rvalue=False) + def myfunc(): + import yop + + assert myfunc() is False + + +def test_import_failure_do_nothing_when_import_go_well(): + """ + Test that the import_failure decorator do nothing when the import is a success + """ + @import_failure(rvalue=False) + def myfunc(): + import math + return 2 + + assert myfunc() == 2