Skip to content

Commit

Permalink
Initial code for flagging rows for winsorisation
Browse files Browse the repository at this point in the history
  • Loading branch information
sarahcollyer committed Jun 20, 2024
1 parent 3280bec commit f2fa736
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 0 deletions.
Empty file added src/flag_for_winsorisation.py
Empty file.
File renamed without changes.
31 changes: 31 additions & 0 deletions tests/test_flag_for_winsorisation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
""" What we need
dataframe
a weight
g weight
a * g to create a new column
then flag anything <=1 as not to be winsorised
"""

import pandas as pd


def winsorisation_flag(df, a_weight, g_weight):

df["new_col"] = df.a_weight * df.g_weight

df["NW_AG_flag"] = df["new_col"].apply(lambda x: "NW_AG" if x <= 1 else "")

return df


data = pd.read_csv(
"/home/cdsw/monthly-business-survey-results/tests/data/winsorisation/flag_data.csv"
)

print(data)

test = winsorisation_flag(data, data.a_weight, data.g_weight)

0 comments on commit f2fa736

Please sign in to comment.