Skip to content

Commit

Permalink
light work on wrappers
Browse files Browse the repository at this point in the history
  • Loading branch information
martindurant committed Aug 23, 2024
1 parent 18d01b2 commit 07776f7
Showing 1 changed file with 47 additions and 4 deletions.
51 changes: 47 additions & 4 deletions fastparquet/wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def __getitem__(self, item):
if isinstance(item, int):
ind = self.index[item]
return self.data[ind] if ind > 0 else None
elif isinstance(item, np.ndarray):
elif isinstance(item, (np.ndarray, slice)):
item = np.atleast_1d(item)
return IndexedNullable(self.index[item], self.data)
else:
Expand All @@ -23,6 +23,9 @@ def to_masked(self):
data[mask] = self.data
return MaskedNullable(mask, data)

def __len__(self):
return len(self.data)


class MaskedNullable:

Expand All @@ -34,8 +37,7 @@ def __getitem__(self, item):
if isinstance(item, int):
m = self.mask[item]
return self.data[item] if m else None
elif isinstance(item, np.ndarray):
item = np.atleast_1d(item)
elif isinstance(item, (np.ndarray, slice)):
return MaskedNullable(self.mask[item], self.data[item])
else:
raise TypeError
Expand All @@ -47,6 +49,9 @@ def to_indexed(self):
index[self.mask] = np.arange(len(data)) # could collect uniques
return IndexedNullable(index, data)

def __len__(self):
return len(self.data)


class String:
def __init__(self, offsets, data) -> None:
Expand All @@ -56,5 +61,43 @@ def __init__(self, offsets, data) -> None:
def __getitem__(self, item):
if isinstance(item, int):
return self.data[self.offsets[item]: self.offsets[item + 1]].decode()
elif isinstance(item, slice):
assert item.step is None
if item.stop is None or item.stop == -1:
stop = None
else:
stop = item.stop + 1
return String(self.offsets[item.start:stop], self.data)
elif isinstance(item, np.ndarray):
# completely repacks the data
# or make indexed/masked array? But what if they are then
# indexed?
raise NotImplementedError
else:
raise TypeError

def __len__(self):
return len(self.offsets) - 1


class Record:
def __init__(self, fields: list=None, contents: list=None, data: dict=None):
if data is None:
data = {f: c for f, c in zip(fields, contents)}
else:
if fields is not None or data is not None:
raise ValueError
self.data = data

def __getitem__(self, item):
if isinstance(item, str):
return self.data[item]
elif isinstance(item, int):
return {f: c[item] for f, c in self.data.items()}
else:
return String(self.offsets.__getitem__(item), self.data)
return Record(data={f: c[item] for f, c in self.data})

def __len__(self):
if self.data:
return len(list(self.data.values())[0])
return 0

0 comments on commit 07776f7

Please sign in to comment.