Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue #143 #144

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion pdtable/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ class InvalidTableCombineError(Exception):
pass


class UnknownMethodWarning(Warning):
pass


def _combine_tables(
obj: "TableDataFrame", other, method, **kwargs
) -> Optional[ComplementaryTableInfo]:
Expand Down Expand Up @@ -97,7 +101,8 @@ def _combine_tables(
warnings.warn(
f'While combining pdTable metadata an unknown __finalize__ method "{method}" was encountered. '
f"Will try to propagate metadata with generic methods, but please check outcome of this "
f"and notify pdTable maintainers."
f"and notify pdTable maintainers.",
category=UnknownMethodWarning
)

data = [d for d in (getattr(s, _TABLE_INFO_FIELD_NAME, None) for s in src) if d is not None]
Expand Down
43 changes: 39 additions & 4 deletions pdtable/io/parsers/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,9 @@
- The original, raw cell grid, in case the user wants to do some low-level processing.

"""
from abc import abstractmethod
import itertools
import re
from typing import Sequence, Optional, Tuple, Any, Iterable, List, Union, Dict
from collections import defaultdict
import pandas as pd
import warnings

Expand All @@ -39,7 +37,6 @@
LocationSheet,
NullLocationFile,
TableOrigin,
InputIssue,
InputIssueTracker,
NullInputIssueTracker,
)
Expand All @@ -49,6 +46,43 @@
from ...auxiliary import MetadataBlock, Directive
from ...table_metadata import TableMetadata


class EncodingException(Exception):
pass


def check_encoding(cell_rows: Iterable[Sequence]) -> Iterable[Sequence]:
"""
CSV file can have a BOM character at the start.
Reading file with a default encoding does not raise an issue,
but in such a case we ignore the first line
(and the whole table if the file starts with a table block).
This function checks if we loaded the file content with a correct encoding
and raise an EncodingException if not.
"""
if isinstance(cell_rows, list):
cell_rows = iter(cell_rows)

try:
first_cell_row = next(cell_rows)
except StopIteration:
return # generator is empty, do not yield anything

if first_cell_row and len(first_cell_row) > 0 and first_cell_row[0]:
first_sign = first_cell_row[0][0]

try:
first_sign.encode("ascii")
except UnicodeEncodeError:
raise EncodingException(
f'File starts with no ascii character "{first_sign}". '
'Please verify the file encoding.'
)

yield first_cell_row
yield from cell_rows


# Typing alias: 2D grid of cells with rows and cols. Intended indexing: cell_grid[row][col]
CellGrid = Sequence[Sequence]

Expand Down Expand Up @@ -451,7 +485,8 @@ def block_output(block_type, cell_grid, row: int):
state = BlockType.METADATA
next_state = None
this_block_1st_row = 0
for row_number_0based, row in enumerate(cell_rows):

for row_number_0based, row in enumerate(check_encoding(cell_rows)):
if row is None or len(row) == 0 or _is_cell_blank(row[0]):
if state != BlockType.BLANK:
next_state = BlockType.BLANK
Expand Down
13 changes: 13 additions & 0 deletions pdtable/test/io/input/only_tables_no_bom.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
**generic_inf;;;;;;;;;;;;;
all;;;;;;;;;;;;;
FATIMA_alias;node;constraint_alias;symmetry;sn_curve;sectional_force_modification;pristrco;signco;alpha;cutpoint_tol;file_name;transformation;IO;detail_type
text;text;text;text;text;text;text;-;-;mm;text;text;text;-
C00001;B0C066;C00001;rotate;F3;-;0;3;0.8;2000;..\..\..\inputs\INF\J_tube\CHW2204_INF_Swan_Neck_a30_root_V2.txt;;I;1
;;;;;;;;;;;;;
;;;;;;;;;;;;;
**generic_inf_constraints;;;;;;;;;;;;;
all;;;;;;;;;;;;;
constraint_alias;element;symmetry;cut_point_name;node;cut_distance;;;;;;;;
text;text;text;text;text;m;;;;;;;;
C00001;C660L;rotate;BRACE1;B0C066;3.091;;;;;;;;
C00001;CJT1V;rotate;BRACE2;B0C066;1.5319;;;;;;;;
13 changes: 13 additions & 0 deletions pdtable/test/io/input/only_tables_starts_with_bom.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
**generic_inf;;;;;;;;;;;;;
all;;;;;;;;;;;;;
FATIMA_alias;node;constraint_alias;symmetry;sn_curve;sectional_force_modification;pristrco;signco;alpha;cutpoint_tol;file_name;transformation;IO;detail_type
text;text;text;text;text;text;text;-;-;mm;text;text;text;-
C00001;B0C066;C00001;rotate;F3;-;0;3;0.8;2000;..\..\..\inputs\INF\J_tube\CHW2204_INF_Swan_Neck_a30_root_V2.txt;;I;1
;;;;;;;;;;;;;
;;;;;;;;;;;;;
**generic_inf_constraints;;;;;;;;;;;;;
all;;;;;;;;;;;;;
constraint_alias;element;symmetry;cut_point_name;node;cut_distance;;;;;;;;
text;text;text;text;text;m;;;;;;;;
C00001;C660L;rotate;BRACE1;B0C066;3.091;;;;;;;;
C00001;CJT1V;rotate;BRACE2;B0C066;1.5319;;;;;;;;
27 changes: 25 additions & 2 deletions pdtable/test/io/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@

from pytest import fixture, raises
import pandas as pd
import pytest

import pdtable
from pdtable import Table, BlockType, read_csv, write_csv
from pdtable.io.csv import _table_to_csv
from pdtable.io.parsers.blocks import EncodingException
from pdtable.table_metadata import ColumnFormat


Expand Down Expand Up @@ -333,15 +335,18 @@ def test_read_csv__sep_is_comma(csv_data):
assert len(template_rows) == 1


_input_dir = Path(__file__).parent / "input"


def test_read_csv__from_stream():
with open(Path(__file__).parent / "input" / "bundle.csv", "r") as fh:
with open(_input_dir / "bundle.csv", "r") as fh:
bls = list(read_csv(fh))
tables = [bl for ty, bl in bls if ty == BlockType.TABLE]
assert tables[1].name == "spelling_numbers"

# raises exception on common error if not text stream
with raises(Exception):
with open(Path(__file__).parent / "input" / "bundle.csv", "rb") as fh: # binary stream!
with open(_input_dir / "bundle.csv", "rb") as fh: # binary stream!
bls = list(read_csv(fh))
tables = [bl for ty, bl in bls if ty == BlockType.TABLE]

Expand Down Expand Up @@ -417,3 +422,21 @@ def test__table_is_preserved_when_written_to_and_read_from_csv():
assert table_read.column_names == table_write.column_names
assert table_read.units == table_write.units
assert table_read.destinations == table_write.destinations


def test_read_csv_only_tables_starting_with_bom():
only_tables_starts_with_bom_path = _input_dir / "only_tables_starts_with_bom.csv"

with pytest.raises(EncodingException):
list(read_csv(source=only_tables_starts_with_bom_path))

source = open(only_tables_starts_with_bom_path, mode='r', encoding='utf-8-sig')
tables = list(read_csv(source=source))
assert tables[0][1].name == "generic_inf"


def test_read_csv_only_tables_no_bom():
only_tables_no_bom_path = _input_dir / "only_tables_no_bom.csv"
source = open(only_tables_no_bom_path, mode='r', encoding='utf-8-sig')
tables = list(read_csv(source=source))
assert tables[0][1].name == "generic_inf"
23 changes: 15 additions & 8 deletions pdtable/test/test_pdtable.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys
from textwrap import dedent
from typing import List
import warnings

import pandas as pd
Expand Down Expand Up @@ -380,11 +381,17 @@ def table_data_frame() -> frame.TableDataFrame:
)


def _unknown_method_warnings(warnings_list: List[Warning]) -> List[frame.UnknownMethodWarning]:
return [
warning for warning in warnings_list
if issubclass(warning.category, frame.UnknownMethodWarning)
]

class TestFinalize:
def test_replace_ok(self, table_data_frame: frame.TableDataFrame) -> None:
with warnings.catch_warnings(record=True) as w:
table_data_frame.replace('a', 'z')
assert len(w) == 0
assert len(_unknown_method_warnings(warnings_list=w)) == 0

def test_replace_not_allowed_unit(self, table_data_frame: frame.TableDataFrame) -> None:
with pytest.raises(ColumnUnitException):
Expand All @@ -395,7 +402,7 @@ def test_sort_index_ok(self, table_data_frame: frame.TableDataFrame) -> None:

with warnings.catch_warnings(record=True) as w:
table_data_frame.sort_index()
assert len(w) == 0
assert len(_unknown_method_warnings(warnings_list=w)) == 0

def test_transpose_ok(self, table_data_frame: frame.TableDataFrame) -> None:
"""
Expand All @@ -411,7 +418,7 @@ def test_astype_ok(self, table_data_frame: frame.TableDataFrame) -> None:

with warnings.catch_warnings(record=True) as w:
table_data_frame_new_type = table_data_frame.astype({'B': float})
assert len(w) == 0
assert len(_unknown_method_warnings(warnings_list=w)) == 0

assert isinstance(table_data_frame_new_type['B'].iloc[0], np.float64)

Expand All @@ -429,7 +436,7 @@ def test_append_with_loc_ok(self, table_data_frame: frame.TableDataFrame) -> Non
"""
with warnings.catch_warnings(record=True) as w:
table_data_frame.loc[999] = {'A': 'y', 'B': 1, 'C': True}
assert len(w) == 0
assert len(_unknown_method_warnings(warnings_list=w)) == 0

assert 6 == table_data_frame.shape[0]

Expand All @@ -443,7 +450,7 @@ def test_fillna_ok(self, table_data_frame: frame.TableDataFrame) -> None:

with warnings.catch_warnings(record=True) as w:
table_data_frame_new_type.fillna(123)
assert len(w) == 0
assert len(_unknown_method_warnings(warnings_list=w)) == 0

@pytest.mark.skipif(
sys.version_info < (3, 8),
Expand All @@ -466,7 +473,7 @@ def test_rename_columns(self, table_data_frame: frame.TableDataFrame) -> None:
def test_rename_index(self, table_data_frame: frame.TableDataFrame) -> None:
with warnings.catch_warnings(record=True) as w:
table_data_frame.rename(index={1: 'a', 2: 'b'})
assert len(w) == 0
assert len(_unknown_method_warnings(warnings_list=w)) == 0

def test_unstack(self, table_data_frame: frame.TableDataFrame) -> None:
"""
Expand All @@ -483,7 +490,7 @@ def test_unstack(self, table_data_frame: frame.TableDataFrame) -> None:

with warnings.catch_warnings(record=True) as w:
unstacked_table_data_frame = table_data_frame.unstack()
assert len(w) == 0
assert len(_unknown_method_warnings(warnings_list=w)) == 0

unstacked_col_name_to_unit = {
name: col.unit for name, col in object.__getattribute__(
Expand All @@ -510,7 +517,7 @@ def test_melt(self, table_data_frame: frame.TableDataFrame) -> None:
"""
with warnings.catch_warnings(record=True) as w:
melted_table_data_frame = table_data_frame.melt(id_vars=['A'], value_vars=['B', 'C'])
assert len(w) == 0
assert len(_unknown_method_warnings(warnings_list=w)) == 0

melted_col_name_to_unit = {
name: col.unit for name, col in object.__getattribute__(
Expand Down
Loading