From df7d36e3403c2aec50ba7ff298feac745cd04a89 Mon Sep 17 00:00:00 2001 From: K4liber Date: Mon, 9 Oct 2023 20:56:23 +0200 Subject: [PATCH 1/4] init --- pdtable/frame.py | 9 +++++++-- pdtable/test/test_pdtable.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/pdtable/frame.py b/pdtable/frame.py index fb46b3d..3c3a770 100644 --- a/pdtable/frame.py +++ b/pdtable/frame.py @@ -241,8 +241,13 @@ def make_table_dataframe( if units is not None: columns = {col_name: ColumnMetadata(unit) for col_name, unit in zip(df.columns, units)} elif unit_map is not None: - for col, unit in unit_map.items(): - columns[col] = ColumnMetadata(unit) + for col in df.columns: + if col not in unit_map: + raise Exception( + "unit_map should contain units for all columns, " + f"missing unit for column {col}.") + + columns[col] = ColumnMetadata(unit_map[col]) table_info = ComplementaryTableInfo(table_metadata=table_metadata, columns=columns) df = TableDataFrame.from_table_info( diff --git a/pdtable/test/test_pdtable.py b/pdtable/test/test_pdtable.py index e324fa7..99e9948 100644 --- a/pdtable/test/test_pdtable.py +++ b/pdtable/test/test_pdtable.py @@ -1,3 +1,4 @@ +from pathlib import Path from textwrap import dedent import pandas as pd @@ -5,6 +6,8 @@ import pytest +from pdtable.io.csv import read_csv, write_csv + from .. import Table, frame from ..proxy import Column from ..table_metadata import ColumnFormat @@ -340,3 +343,28 @@ def test_table__str_destination_with_no_spaces_results_in_single_destination(): def test_table__str_destination_with_spaces_results_in_multiple_destinations(): table = Table(name="test", destinations="a b c") assert table.destinations == {"a", "b", "c"} + +def test_make_table_dataframe_units(tmpdir): + data_frame = pd.DataFrame.from_dict({ + 'column_b': ['a', 'b', 'c'], + 'column_a': [1, 2, 3] + }) + table = Table( + df=data_frame, + name="ab", + unit_map={ + 'column_a': 'deg', + 'column_b': 'text' + }, + strict_types=False + ) + csv_path = Path(tmpdir) / 'test.csv' + write_csv( + tables=table, + to=csv_path + ) + loaded_tables = read_csv( + source=csv_path + ) + loaded_table = next(loaded_tables)[1] + assert {'column_b': 'text', 'column_a': 'deg'} == dict(zip(loaded_table.df.columns, loaded_table.units)) From 2c180f03673a4cb7385343dfa6a329374b14226e Mon Sep 17 00:00:00 2001 From: Jan Bielecki Date: Tue, 10 Oct 2023 09:36:53 +0200 Subject: [PATCH 2/4] clean tests --- pdtable/test/test_pdtable.py | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/pdtable/test/test_pdtable.py b/pdtable/test/test_pdtable.py index 99e9948..0253ec9 100644 --- a/pdtable/test/test_pdtable.py +++ b/pdtable/test/test_pdtable.py @@ -344,17 +344,18 @@ def test_table__str_destination_with_spaces_results_in_multiple_destinations(): table = Table(name="test", destinations="a b c") assert table.destinations == {"a", "b", "c"} -def test_make_table_dataframe_units(tmpdir): + +def test_unit_map_with_different_order_than_columns(tmpdir): data_frame = pd.DataFrame.from_dict({ - 'column_b': ['a', 'b', 'c'], - 'column_a': [1, 2, 3] + 'column_text': ['a', 'b', 'c'], + 'column_deg': [1, 2, 3] }) table = Table( df=data_frame, name="ab", unit_map={ - 'column_a': 'deg', - 'column_b': 'text' + 'column_deg': 'deg', + 'column_text': 'text' }, strict_types=False ) @@ -367,4 +368,25 @@ def test_make_table_dataframe_units(tmpdir): source=csv_path ) loaded_table = next(loaded_tables)[1] - assert {'column_b': 'text', 'column_a': 'deg'} == dict(zip(loaded_table.df.columns, loaded_table.units)) + assert {'column_text': 'text', 'column_deg': 'deg'} == \ + dict(zip(loaded_table.df.columns, loaded_table.units)) + + +def test_unit_map_with_missing_columns(): + data_frame = pd.DataFrame.from_dict({ + 'column_text': ['a', 'b', 'c'], + 'column_deg': [1, 2, 3] + }) + + try: + Table( + df=data_frame, + name="ab", + unit_map={ + 'column_deg': 'deg', + }, + strict_types=False + ) + raise AssertionError('Table initialization should raise an Exception') + except Exception as exc: + assert "missing unit for column column_text" in str(exc) From e102281987eb8ffd170a9b5792aadc5d6900d1b1 Mon Sep 17 00:00:00 2001 From: Jan Bielecki Date: Tue, 10 Oct 2023 10:17:25 +0200 Subject: [PATCH 3/4] fix tests --- pdtable/frame.py | 14 +++++++------ pdtable/test/test_pdtable.py | 39 +++--------------------------------- 2 files changed, 11 insertions(+), 42 deletions(-) diff --git a/pdtable/frame.py b/pdtable/frame.py index 3c3a770..e8896e2 100644 --- a/pdtable/frame.py +++ b/pdtable/frame.py @@ -47,6 +47,7 @@ `ExtensionArray`. This option was discarded early due to performance concerns, but might be viable and would the be preferable to the chosen approach. """ +import logging from warnings import warn import pandas as pd @@ -59,6 +60,9 @@ _TABLE_INFO_FIELD_NAME = "_table_data" +logger = logging.getLogger(__name__) + + class UnknownOperationError(Exception): pass @@ -242,12 +246,10 @@ def make_table_dataframe( columns = {col_name: ColumnMetadata(unit) for col_name, unit in zip(df.columns, units)} elif unit_map is not None: for col in df.columns: - if col not in unit_map: - raise Exception( - "unit_map should contain units for all columns, " - f"missing unit for column {col}.") - - columns[col] = ColumnMetadata(unit_map[col]) + if col in unit_map: + columns[col] = ColumnMetadata(unit_map[col]) + else: + logger.warning(f'Missing unit for column "{col}".') table_info = ComplementaryTableInfo(table_metadata=table_metadata, columns=columns) df = TableDataFrame.from_table_info( diff --git a/pdtable/test/test_pdtable.py b/pdtable/test/test_pdtable.py index 0253ec9..5228665 100644 --- a/pdtable/test/test_pdtable.py +++ b/pdtable/test/test_pdtable.py @@ -1,4 +1,3 @@ -from pathlib import Path from textwrap import dedent import pandas as pd @@ -6,8 +5,6 @@ import pytest -from pdtable.io.csv import read_csv, write_csv - from .. import Table, frame from ..proxy import Column from ..table_metadata import ColumnFormat @@ -350,43 +347,13 @@ def test_unit_map_with_different_order_than_columns(tmpdir): 'column_text': ['a', 'b', 'c'], 'column_deg': [1, 2, 3] }) - table = Table( + table = frame.make_table_dataframe( df=data_frame, - name="ab", + name='test_unit_map', unit_map={ 'column_deg': 'deg', 'column_text': 'text' }, - strict_types=False - ) - csv_path = Path(tmpdir) / 'test.csv' - write_csv( - tables=table, - to=csv_path ) - loaded_tables = read_csv( - source=csv_path - ) - loaded_table = next(loaded_tables)[1] assert {'column_text': 'text', 'column_deg': 'deg'} == \ - dict(zip(loaded_table.df.columns, loaded_table.units)) - - -def test_unit_map_with_missing_columns(): - data_frame = pd.DataFrame.from_dict({ - 'column_text': ['a', 'b', 'c'], - 'column_deg': [1, 2, 3] - }) - - try: - Table( - df=data_frame, - name="ab", - unit_map={ - 'column_deg': 'deg', - }, - strict_types=False - ) - raise AssertionError('Table initialization should raise an Exception') - except Exception as exc: - assert "missing unit for column column_text" in str(exc) + dict(zip(table.columns, frame.get_table_info(df=table).units)) From 94a53ae8110b9ebbc477aaa29896de6ae0e69f50 Mon Sep 17 00:00:00 2001 From: Jan Bielecki Date: Thu, 12 Oct 2023 10:24:40 +0200 Subject: [PATCH 4/4] use warnings instead of logging --- pdtable/frame.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pdtable/frame.py b/pdtable/frame.py index e8896e2..4bedd31 100644 --- a/pdtable/frame.py +++ b/pdtable/frame.py @@ -47,7 +47,6 @@ `ExtensionArray`. This option was discarded early due to performance concerns, but might be viable and would the be preferable to the chosen approach. """ -import logging from warnings import warn import pandas as pd @@ -60,9 +59,6 @@ _TABLE_INFO_FIELD_NAME = "_table_data" -logger = logging.getLogger(__name__) - - class UnknownOperationError(Exception): pass @@ -249,7 +245,7 @@ def make_table_dataframe( if col in unit_map: columns[col] = ColumnMetadata(unit_map[col]) else: - logger.warning(f'Missing unit for column "{col}".') + warnings.warn(f'Missing unit for column "{col}".') table_info = ComplementaryTableInfo(table_metadata=table_metadata, columns=columns) df = TableDataFrame.from_table_info(