Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue #145 #146

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 21 additions & 4 deletions pdtable/io/parsers/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,10 @@
- The original, raw cell grid, in case the user wants to do some low-level processing.

"""
from abc import abstractmethod
import datetime
import itertools
import re
from typing import Sequence, Optional, Tuple, Any, Iterable, List, Union, Dict
from collections import defaultdict
import pandas as pd
import warnings

Expand All @@ -39,7 +38,6 @@
LocationSheet,
NullLocationFile,
TableOrigin,
InputIssue,
InputIssueTracker,
NullInputIssueTracker,
)
Expand Down Expand Up @@ -96,6 +94,25 @@ def parse_column_names(column_names_raw: Sequence[Union[str, None]]) -> List[str
]


def _get_destinations_safely_stripped(input_data: Any) -> str:
"""
Save strip in cases where the input is not a string type.
"""
# Data Validation
if isinstance(input_data, datetime.datetime):
fixed_input_data = str(input_data).replace(' ', '_')
warnings.warn(
f"Found destination with a datetime format ({str(input_data)}). " \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if at this point input_data is not already a str with white space replaced by underscore? This would make the warning a little confusing.

I would suggest to not reassign input_data but rather create a new variable result (or whatever name you prefer) to be able to report on the original data.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeap, nice catch, thanks:)

f"Converting to {fixed_input_data}."
)
input_data = fixed_input_data
elif not isinstance(input_data, str):
# Data Conversion
input_data = str(input_data)

return input_data.strip()


def make_table_json_precursor(cells: CellGrid, origin, fixer:ParseFixer) -> Tuple[JsonDataPrecursor, bool]:
"""Parses cell grid into a JSON-like data structure but with some non-JSON-native values

Expand All @@ -118,7 +135,7 @@ def make_table_json_precursor(cells: CellGrid, origin, fixer:ParseFixer) -> Tupl
fixer.table_name = table_name

# internally hold destinations as json-compatible dict
destinations = {dest: None for dest in cells[1][0].strip().split(" ")}
destinations = {dest: None for dest in _get_destinations_safely_stripped(cells[1][0]).split(" ")}
table_is_empty = len(cells) < 3
if table_is_empty:
column_names = []
Expand Down
25 changes: 25 additions & 0 deletions pdtable/test/io/parsers/test_blocks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import datetime
import warnings
from pdtable.io.parsers.blocks import _get_destinations_safely_stripped


class TestGetDestinationsSafelyStripped:
def test_string_input(self) -> None:
assert _get_destinations_safely_stripped(" hello ") == "hello"

def test_int_input(self) -> None:
assert _get_destinations_safely_stripped(123) == "123"

def test_string_input_with_leading_trailing_spaces(self) -> None:
assert _get_destinations_safely_stripped(" hello world ") == "hello world"

def test_int_input_with_leading_trailing_spaces(self) -> None:
assert _get_destinations_safely_stripped(" 123 ") == "123"

def test_datetime_input(self) -> None:
datetime_now = datetime.datetime.now()

with warnings.catch_warnings(record=True) as w:
destinations = _get_destinations_safely_stripped(datetime_now)
assert len(w) == 1
assert destinations.replace(' ', '') == destinations
2 changes: 1 addition & 1 deletion requirements_pandas1.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pandas<2.0
numpy
numpy<2.0
typing-extensions

# OPTIONAL
Expand Down
4 changes: 2 additions & 2 deletions requirements_pandas2.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
pandas>=2.0
pyarrow
numpy
numpy<2.0 # numpy 2 does not work for pint library on python3.9
typing-extensions
pyarrow
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder why we did not need pyarrow in the requirements before?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just have changed the order to have the same as we have in the requirements_pandas1.txt file.


# OPTIONAL
openpyxl
Expand Down
Loading