Skip to content

Commit

Permalink
Statistics on ParquetFile subset. (#940)
Browse files Browse the repository at this point in the history
Co-authored-by: Martin Durant <[email protected]>
  • Loading branch information
yohplala and martindurant authored Oct 29, 2024
1 parent 0f7a98e commit 25f3370
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 3 deletions.
2 changes: 1 addition & 1 deletion fastparquet/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def columns(self):

@property
def statistics(self):
if self._statistics is None:
if not hasattr(self, '_statistics') or self._statistics is None:
self._statistics = statistics(self)
return self._statistics

Expand Down
13 changes: 11 additions & 2 deletions fastparquet/test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,23 @@ def test_statistics(tempdir):
p = ParquetFile(fn)

s = statistics(p)
expected = {'distinct_count': {'x': [None, None],
expected1 = {'distinct_count': {'x': [None, None],
'y': [None, None],
'z': [None, None]},
'max': {'x': [2, 3], 'y': [2.0, 1.0], 'z': ['b', 'c']},
'min': {'x': [1, 3], 'y': [1.0, 1.0], 'z': ['a', 'c']},
'null_count': {'x': [0, 0], 'y': [0, 0], 'z': [0, 0]}}

assert s == expected
assert s == expected1

expected2 = {'distinct_count': {'x': [None],
'y': [None],
'z': [None]},
'max': {'x': [3], 'y': [1.0], 'z': ['c']},
'min': {'x': [3], 'y': [1.0], 'z': ['c']},
'null_count': {'x': [0], 'y': [0], 'z': [0]}}

assert p[-1].statistics == expected2


def test_logical_types(tempdir):
Expand Down

0 comments on commit 25f3370

Please sign in to comment.