Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement MicroSeries.drop and MicroDataFrame.drop #211

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 48 additions & 3 deletions microdf/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,25 @@ def copy(self, deep=True):
res = MicroSeries(res, weights=self.weights.copy(deep))
return res

def drop(
self,
labels=None,
axis=0,
index=None,
columns=None,
level=None,
inplace=False,
errors="raise",
):
if inplace:
raise NotImplementedError("inplace not yet implemented.")
res = super().drop(
labels, axis, index, columns, level, inplace, errors
)
# Define weights.
weights = self.weights.drop(labels)
return MicroSeries(res, weights=weights)

def equals(self, other) -> bool:
equal_values = super().equals(other)
equal_weights = self.weights.equals(other.weights)
Expand Down Expand Up @@ -594,6 +613,28 @@ def _link_all_weights(self):
if column != self.weights_col:
self._link_weights(column)

def drop(
self,
labels=None,
axis=0,
index=None,
columns=None,
level=None,
inplace=False,
errors="raise",
):
if inplace:
raise NotImplementedError("inplace not yet implemented.")
res = super().drop(
labels, axis, index, columns, level, inplace, errors
)
# Define weights.
if axis == 0:
weights = self.weights.drop(labels)
else: # If dropping columns, use full weights.
weights = self.weights
return MicroDataFrame(res, weights=weights)

def set_weights(self, weights) -> None:
"""Sets the weights for the MicroDataFrame. If a string is received,
it will be assumed to be the column name of the weight column.
Expand Down Expand Up @@ -641,9 +682,13 @@ def __setattr__(self, key, value):
super().__setattr__(key, value)
self.catch_series_relapse()

def reset_index(self):
res = super().reset_index()
res = MicroDataFrame(res, weights=self.weights)
def reset_index(
self, level=None, drop=False, inplace=False, col_level=0, col_fill=""
):
if inplace:
raise NotImplementedError("inplace not yet implemented.")
res = super().reset_index(level, drop, inplace, col_level, col_fill)
res = MicroDataFrame(res, weights=self.weights.reset_index(drop=True))
return res

def copy(self, deep=True):
Expand Down
29 changes: 24 additions & 5 deletions microdf/tests/test_generic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from microdf.generic import MicroDataFrame, MicroSeries
import numpy as np
import microdf as mdf
import pandas as pd
Expand Down Expand Up @@ -78,7 +77,7 @@ def test_mean():
def test_poverty_count():
arr = np.array([10000, 20000, 50000])
w = np.array([1123, 1144, 2211])
df = MicroDataFrame(weights=w)
df = mdf.MicroDataFrame(weights=w)
df["income"] = arr
df["threshold"] = 16000
assert df.poverty_count("income", "threshold") == w[0]
Expand Down Expand Up @@ -122,14 +121,14 @@ def test_concat():

def test_set_index():
d = mdf.MicroDataFrame(dict(x=[1, 2, 3]), weights=[4, 5, 6])
assert d.x.__class__ == MicroSeries
assert isinstance(d.x, mdf.MicroSeries)
d.index = [1, 2, 3]
assert d.x.__class__ == MicroSeries
assert isinstance(d.x, mdf.MicroSeries)


def test_reset_index():
d = mdf.MicroDataFrame(dict(x=[1, 2, 3]), weights=[4, 5, 6])
assert d.reset_index().__class__ == MicroDataFrame
assert isinstance(d.reset_index(), mdf.MicroDataFrame)


def test_cumsum():
Expand Down Expand Up @@ -202,6 +201,26 @@ def test_subset():
assert not df[["x", "y"]].equals(df_no_z_diff_weights)


def test_drop():
d = mdf.MicroDataFrame({"x": [1, 2], "y": [3, 4]}, weights=[5, 6])
# Drop a row.
d_drop_row = d.drop(0)
assert isinstance(d_drop_row, mdf.MicroDataFrame)
assert d_drop_row.equals(
mdf.MicroDataFrame({"x": [2], "y": [4]}, weights=[6], index=[1])
)
# Drop a column.
d_drop_column = d.drop("y", axis=1)
assert isinstance(d_drop_column, mdf.MicroDataFrame)
assert d_drop_column.equals(
mdf.MicroDataFrame({"x": [1, 2]}, weights=[5, 6])
)
# Drop an item from a MicroSeries.
s_drop = d.x.drop(0)
assert isinstance(s_drop, mdf.MicroSeries)
assert s_drop.equals(mdf.MicroSeries([2], weights=[6]))


def test_value_subset():
d = mdf.MicroDataFrame({"x": [1, 2, 3], "y": [1, 2, 2]}, weights=[4, 5, 6])
d2 = d[d.y > 1]
Expand Down