Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

461 turnover analysis tool #84

Merged
merged 3 commits into from
Sep 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions mbs_results/turnover_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import numpy as np
import pandas as pd


def create_turnover_output(
cp_df: pd.DataFrame,
qv_df: pd.DataFrame,
finalsel_df: pd.DataFrame,
winsorisation_df: pd.DataFrame,
winsorisation_period: str,
selected_period: int,
) -> pd.DataFrame:
"""
Creating output for turnover analysis tool.

Parameters
----------
cp_df : pd.DataFrame
cp input dataframe containing reference, sic and error_mkr
qv_df : pd.DataFrame
qv input dataframe containing reference, question_no, adjusted_value and
returned_value
finalsel_df : pd.DataFrame
finalsel input dataframe containing reference, froempment, frotover, cell_no
and entname1
winsorisation_df : pd.DataFrame
winsorisation input dataframe containing question_no, period, reference,
imputation_marker, design_weight, calibration_factor and outlier_weight
winsorisation_period : str
Name of column displaying period in winsorisation
selected_period : int
Period to output results for in the format YYYYMM

Returns
-------
pd.DataFrame
dataframe in correct format for populating turnover analysis tool.
"""

qv_df = qv_df.query("question_no == 40")
winsorisation_df = winsorisation_df.query(
"{} == {} and question_no == 40".format(winsorisation_period, selected_period)
)

turnover_df = (
cp_df.merge(qv_df, how="left", on="reference")
.merge(finalsel_df, how="left", left_on="reference", right_on="ruref")
.merge(winsorisation_df, how="left", on="reference")
)

turnover_df["curr_grossed_value"] = (
turnover_df["adjusted_value"]
* turnover_df["design_weight"]
* turnover_df["outlier_weight"]
* turnover_df["calibration_factor"]
)

# Convert imp_marker to type
# Type 1: Return, Type 2: Construction, Type 3: Imputation
type_conditions = [
turnover_df["imputation_marker"] == "r",
turnover_df["imputation_marker"].isin(["c", "mc"]),
turnover_df["imputation_marker"].isin(["fir", "bir", "fic", "fimc"]),
]

type_values = [1, 2, 3]

turnover_df["type"] = np.select(type_conditions, type_values)

# The error_res_code column exists in the turnover tool input but is ignored, and
# its purpose is not known. Adding as a constant zero column to prevent code used
# for producing tool from erroring.
turnover_df["error_res_code"] = 0

turnover_df = turnover_df[
[
"sic92",
"cell_no",
"reference",
"entname1",
"adjusted_value",
"type",
"curr_grossed_value",
"outlier_weight",
"error_mkr",
"error_res_code",
"frotover",
"froempment",
"returned_value",
]
]

return turnover_df.reset_index(drop=True)
4 changes: 4 additions & 0 deletions tests/data/turnover_analysis/cp_input.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
period,reference,sic92,error_mkr
202301,101,1,O
202301,102,2,C
202301,103,2,E
4 changes: 4 additions & 0 deletions tests/data/turnover_analysis/finalsel_input.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ruref,froempment,frotover,cell_no,entname1
101,4593,594,32,NAME 1
102,62,43,6,NAME 2
103,394,509,19,NAME 3
5 changes: 5 additions & 0 deletions tests/data/turnover_analysis/qv_input.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
period,reference,question_no,returned_value,adjusted_value
202301,101,40,5940.0,4205.4
202301,101,49,4326.0,4265.4
202301,102,40,448.0,448.0
202301,103,40,75940.0,84205.9
4 changes: 4 additions & 0 deletions tests/data/turnover_analysis/turnover_analysis_output.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
sic92,cell_no,reference,entname1,adjusted_value,type,curr_grossed_value,outlier_weight,error_mkr,error_res_code,frotover,froempment,returned_value
1,32,101,NAME 1,4205.4,1,4625.94,1.0,O,0,594,4593,5940.0
2,6,102,NAME 2,448.0,2,448.0,1.0,C,0,43,62,448.0
2,19,103,NAME 3,84205.9,3,378926.55,1.5,E,0,509,394,75940.0
6 changes: 6 additions & 0 deletions tests/data/turnover_analysis/winsorisation_input.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
question_no,period_x,reference,imputation_marker,design_weight,calibration_factor,outlier_weight
40,202301,101,r,1.0,1.1,1.0
49,202301,101,r,1.5,1.0,1.2
40,202302,101,r,0.8,1.0,1.0
40,202301,102,c,1.0,1.0,1.0
40,202301,103,fir,1.0,3.0,1.5
60 changes: 60 additions & 0 deletions tests/test_turnover_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from pathlib import Path

import pandas as pd
import pytest
from pandas.testing import assert_frame_equal

from mbs_results.turnover_analysis import create_turnover_output


@pytest.fixture(scope="class")
def filepath():
return Path("tests/data/turnover_analysis")


@pytest.fixture(scope="class")
def cp_input_data(filepath):
return pd.read_csv(filepath / "cp_input.csv", index_col=False)


@pytest.fixture(scope="class")
def qv_input_data(filepath):
return pd.read_csv(filepath / "qv_input.csv", index_col=False)


@pytest.fixture(scope="class")
def finalsel_input_data(filepath):
return pd.read_csv(filepath / "finalsel_input.csv", index_col=False)


@pytest.fixture(scope="class")
def winsorisation_input_data(filepath):
return pd.read_csv(filepath / "winsorisation_input.csv", index_col=False)


@pytest.fixture(scope="class")
def turnover_analysis_output(filepath):
return pd.read_csv(filepath / "turnover_analysis_output.csv", index_col=False)


class TestTurnoverAnalysis:
def test_turnover_analysis(
self,
cp_input_data,
qv_input_data,
finalsel_input_data,
winsorisation_input_data,
turnover_analysis_output,
):
expected_output = turnover_analysis_output

actual_output = create_turnover_output(
cp_input_data,
qv_input_data,
finalsel_input_data,
winsorisation_input_data,
"period_x",
202301,
)

assert_frame_equal(actual_output, expected_output)
Loading