Skip to content

Commit

Permalink
Issue #145 (#146)
Browse files Browse the repository at this point in the history
Handle different data types for destinations from CellGrid

The destinations cell may hold other data types than string (e.g. when read from Excel). 
This fix handles data types other than strings by casting to a string before further processing.
Note: by default `datetime` is rendered as `dd/mm/yyyy hh:mm:ss` which contains a whitespace (" "). This would be interpreted as *two* destinations. Solution: Replace whitespace with underscore and raise a warning to the user.

---------

Co-authored-by: Jan Bielecki <[email protected]>
  • Loading branch information
K4liber and Jan Bielecki authored Jun 19, 2024
1 parent ac7b391 commit d38302d
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 7 deletions.
25 changes: 21 additions & 4 deletions pdtable/io/parsers/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,10 @@
- The original, raw cell grid, in case the user wants to do some low-level processing.
"""
from abc import abstractmethod
import datetime
import itertools
import re
from typing import Sequence, Optional, Tuple, Any, Iterable, List, Union, Dict
from collections import defaultdict
import pandas as pd
import warnings

Expand All @@ -39,7 +38,6 @@
LocationSheet,
NullLocationFile,
TableOrigin,
InputIssue,
InputIssueTracker,
NullInputIssueTracker,
)
Expand Down Expand Up @@ -96,6 +94,25 @@ def parse_column_names(column_names_raw: Sequence[Union[str, None]]) -> List[str
]


def _get_destinations_safely_stripped(input_data: Any) -> str:
"""
Save strip in cases where the input is not a string type.
"""
# Data Validation
if isinstance(input_data, datetime.datetime):
fixed_input_data = str(input_data).replace(' ', '_')
warnings.warn(
f"Found destination with a datetime format ({str(input_data)}). " \
f"Converting to {fixed_input_data}."
)
input_data = fixed_input_data
elif not isinstance(input_data, str):
# Data Conversion
input_data = str(input_data)

return input_data.strip()


def make_table_json_precursor(cells: CellGrid, origin, fixer:ParseFixer) -> Tuple[JsonDataPrecursor, bool]:
"""Parses cell grid into a JSON-like data structure but with some non-JSON-native values
Expand All @@ -118,7 +135,7 @@ def make_table_json_precursor(cells: CellGrid, origin, fixer:ParseFixer) -> Tupl
fixer.table_name = table_name

# internally hold destinations as json-compatible dict
destinations = {dest: None for dest in cells[1][0].strip().split(" ")}
destinations = {dest: None for dest in _get_destinations_safely_stripped(cells[1][0]).split(" ")}
table_is_empty = len(cells) < 3
if table_is_empty:
column_names = []
Expand Down
25 changes: 25 additions & 0 deletions pdtable/test/io/parsers/test_blocks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import datetime
import warnings
from pdtable.io.parsers.blocks import _get_destinations_safely_stripped


class TestGetDestinationsSafelyStripped:
def test_string_input(self) -> None:
assert _get_destinations_safely_stripped(" hello ") == "hello"

def test_int_input(self) -> None:
assert _get_destinations_safely_stripped(123) == "123"

def test_string_input_with_leading_trailing_spaces(self) -> None:
assert _get_destinations_safely_stripped(" hello world ") == "hello world"

def test_int_input_with_leading_trailing_spaces(self) -> None:
assert _get_destinations_safely_stripped(" 123 ") == "123"

def test_datetime_input(self) -> None:
datetime_now = datetime.datetime.now()

with warnings.catch_warnings(record=True) as w:
destinations = _get_destinations_safely_stripped(datetime_now)
assert len(w) == 1
assert destinations.replace(' ', '') == destinations
2 changes: 1 addition & 1 deletion requirements_pandas1.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pandas<2.0
numpy
numpy<2.0
typing-extensions

# OPTIONAL
Expand Down
4 changes: 2 additions & 2 deletions requirements_pandas2.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
pandas>=2.0
pyarrow
numpy
numpy<2.0 # numpy 2 does not work for pint library on python3.9
typing-extensions
pyarrow

# OPTIONAL
openpyxl
Expand Down

0 comments on commit d38302d

Please sign in to comment.