Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

22 cumulative imputation links are incorrect for fic #29

Merged
merged 21 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions src/apply_imputation_link.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,22 +55,22 @@ def create_and_merge_imputation_values(
imputation_config = {
"c": {
"intermediate_column": "constructed",
"marker": "C",
"marker": "c",
# doesn't actually apply a fill so can be forward or back
"fill_column": auxiliary,
"fill_method": "ffill",
"link_column": construction_link,
},
"fir": {
"intermediate_column": "fir",
"marker": "FIR",
"marker": "fir",
"fill_column": target,
"fill_method": "ffill",
"link_column": cumulative_forward_link,
},
"bir": {
"intermediate_column": "bir",
"marker": "BIR",
"marker": "bir",
"fill_column": target,
"fill_method": "bfill",
"link_column": cumulative_backward_link,
Expand All @@ -80,9 +80,8 @@ def create_and_merge_imputation_values(
# sampled. This is fine for automatic imputation, but should be careful
# if manual construction imputation is done
"intermediate_column": "fic",
"marker": "FIC",
# this has to have the same name as the intermediate column for constructed
"fill_column": "constructed",
"marker": "fic",
"fill_column": "imputed_value",
"fill_method": "ffill",
"link_column": cumulative_forward_link,
},
Expand Down
22 changes: 14 additions & 8 deletions src/cumulative_imputation_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,20 @@ def get_cumulative_links(
dataframe.sort_values([strata, reference, period], inplace=True)
dataframe["missing_value"] = np.where(dataframe[target].isnull(), True, False)

# TODO: These conditions are similar with the ones at flags, consider a fun for this
marker_diff_con = (
dataframe["imputation_marker"]
.ne(dataframe["imputation_marker"].shift().bfill())
.astype(int)
!= 0
)

strat_diff_con = dataframe[strata].diff(time_difference) != 0

reference_diff_con = dataframe[reference].diff(time_difference) != 0

dataframe["imputation_group"] = (
(
(dataframe["missing_value"].diff(time_difference) != 0)
| (dataframe[strata].diff(time_difference) != 0)
| (dataframe[reference].diff(time_difference) != 0)
)
.astype("int")
.cumsum()
(marker_diff_con | strat_diff_con | reference_diff_con).astype("int").cumsum()
)

if forward_or_backward == "f":
Expand All @@ -69,4 +75,4 @@ def get_cumulative_links(
dataframe["cumulative_" + imputation_link],
)

return dataframe[["imputation_group", "cumulative_" + imputation_link]]
return dataframe
14 changes: 7 additions & 7 deletions tests/cumulative_links.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
strata,reference,target,period,forward_imputation_link,backward_imputation_link,imputation_group,cumulative_forward_imputation_link,cumulative_backward_imputation_link
100,100000,200,202402,1,2,1,,
100,100000,,202403,2,0.6,2,2,0.6
100,100000,,202404,3,1,2,6,1
200,100001,,202402,1,4,3,1,2
200,100001,,202403,3,0.5,3,3,0.5
200,100001,300,202404,0.5,1,4,,
strata,reference,target,period,forward_imputation_link,backward_imputation_link,imputation_marker,imputation_group,cumulative_forward_imputation_link,cumulative_backward_imputation_link
100,100000,200,202402,1,2,r,1,,
100,100000,,202403,2,0.6,fir,2,2,0.6
100,100000,,202404,3,1,fir,2,6,1
200,100001,,202402,1,4,bir,3,1,2
200,100001,,202403,3,0.5,bir,3,3,0.5
200,100001,300,202404,0.5,1,r,4,,
18 changes: 9 additions & 9 deletions tests/data/apply_imputation_link/FIR_BIR_C_FIC.csv
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
imputation_class,reference,target,period,forward_imputation_link,backward_imputation_link,auxiliary_variable,construction_link,cumulative_forward_link,cumulative_backward_link,imputation_marker,imputed_value
100,100000,200,202402,1,2,,,,,R,
100,100000,,202403,2,0.6,,,2,0.6,FIR,400
100,100000,,202404,3,1,,,6,1,FIR,1200
200,100001,,202402,1,4,,,1,2,BIR,600
200,100001,,202403,3,0.5,,,3,0.5,BIR,150
200,100001,300,202404,0.5,1,,,,,R,
300,100002,,202402,1,4,1000,0.1,,2,C,100
300,100002,,202403,3,0.5,,,3,0.5,FIC,300
300,100002,,202404,0.5,1,,,1.5,,FIC,150
100,100000,200,202402,1,2,,,,,r,
100,100000,,202403,2,0.6,,,2,0.6,fir,400
100,100000,,202404,3,1,,,6,1,fir,1200
200,100001,,202402,1,4,,,1,2,bir,600
200,100001,,202403,3,0.5,,,3,0.5,bir,150
200,100001,300,202404,0.5,1,,,,,r,
300,100002,,202402,1,4,1000,0.1,,2,c,100
300,100002,,202403,3,0.5,,,3,0.5,fic,300
300,100002,,202404,0.5,1,,,1.5,,fic,150
40 changes: 25 additions & 15 deletions tests/test_cumulative_imputation_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,19 @@ def cumulative_links_test_data():
return load_and_format(Path("tests") / "cumulative_links.csv")


class TestComulativeLinks:
class TestCumulativeLinks:
def test_get_cumulative_links_forward(self, cumulative_links_test_data):
input_data = cumulative_links_test_data.drop(
columns=["cumulative_forward_imputation_link", "imputation_group"]
)

expected_output = cumulative_links_test_data[
[
"imputation_group",
columns=[
"cumulative_backward_imputation_link",
"cumulative_forward_imputation_link",
"imputation_group",
]
]
)

expected_output = cumulative_links_test_data.drop(
columns=["imputation_group", "cumulative_backward_imputation_link"]
)

actual_output = get_cumulative_links(
input_data,
Expand All @@ -36,19 +37,24 @@ def test_get_cumulative_links_forward(self, cumulative_links_test_data):
1,
)

actual_output = actual_output.drop(
columns=["imputation_group", "missing_value"]
)

assert_frame_equal(actual_output, expected_output)

def test_get_cumulative_links_backward(self, cumulative_links_test_data):
input_data = cumulative_links_test_data.drop(
columns=["cumulative_backward_imputation_link", "imputation_group"]
)

expected_output = cumulative_links_test_data[
[
"imputation_group",
columns=[
"cumulative_backward_imputation_link",
"cumulative_forward_imputation_link",
"imputation_group",
]
]
)

expected_output = cumulative_links_test_data.drop(
columns=["imputation_group", "cumulative_forward_imputation_link"]
)

actual_output = get_cumulative_links(
input_data,
Expand All @@ -61,4 +67,8 @@ def test_get_cumulative_links_backward(self, cumulative_links_test_data):
1,
)

actual_output = actual_output.drop(
columns=["imputation_group", "missing_value"]
)

assert_frame_equal(actual_output, expected_output)
Loading