Skip to content

Commit

Permalink
Merge pull request #56 from UCL-ARC/55-adding-a-redcap_school_name-co…
Browse files Browse the repository at this point in the history
…lumn

55 adding a redcap school name column
  • Loading branch information
katiebuntic authored Aug 30, 2023
2 parents 544e616 + 61314c4 commit 38e80d6
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 6 deletions.
2 changes: 1 addition & 1 deletion src/rred_reports/masterfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def masterfile_columns() -> list[str]:

assert _school_id == school_id, "Sanity check for school ID columns being the same failed, these were not the same"

return [pupil_no, user_id, *other_teacher_fields, *other_school_fields, school_id, *other_pupil_fields]
return [pupil_no, user_id, *other_teacher_fields, *other_school_fields, school_id, *other_pupil_fields, "redcap_school_name"]


def read_and_process_masterfile(data_path: Path) -> pd.DataFrame:
Expand Down
11 changes: 6 additions & 5 deletions src/rred_reports/redcap/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ def preprocess_wide_data(cls, raw_data: pd.DataFrame, labelled_data: pd.DataFram
processed_extract = labelled_data.copy(deep=True)
# Unify on using the raw_data column names, labelled uses the questions given on the survey as column names
processed_extract.columns = raw_data.columns
cls._fill_school_id_with_coalesce(raw_data, processed_extract)
cls._fill_school_column_with_coalesce(raw_data, processed_extract, "school_id")
cls._fill_school_column_with_coalesce(processed_extract, processed_extract, "redcap_school_name")
cls._fill_region_with_coalesce(processed_extract)
cls._convert_timestamps_to_dates(processed_extract)
# Making a copy, so we have a de-fragmented frame for adding row number, was getting a performance warning
Expand All @@ -97,9 +98,9 @@ def preprocess_wide_data(cls, raw_data: pd.DataFrame, labelled_data: pd.DataFram
return cls._rename_wide_cols_with_student_number_suffix(filtered)

@staticmethod
def _fill_school_id_with_coalesce(raw_data, processed_extract):
school_id_cols = [col for col in raw_data if col.startswith("entry_school_")]
processed_extract["school_id"] = raw_data[school_id_cols].bfill(axis=1).iloc[:, 0]
def _fill_school_column_with_coalesce(school_data: pd.DataFrame, processed_extract: pd.DataFrame, column_name: str):
school_id_cols = [col for col in school_data if col.startswith("entry_school_")]
processed_extract[column_name] = school_data[school_id_cols].bfill(axis=1).iloc[:, 0]

@staticmethod
def _fill_region_with_coalesce(extract: pd.DataFrame):
Expand Down Expand Up @@ -135,7 +136,7 @@ def _rename_wide_cols_with_student_number_suffix(extract: pd.DataFrame) -> pd.Da
# Hardcoded columns for exporting, could finesse this but probably isn't worth the time
# The final columns output are under unit testing so will catch any changes to input or output data
_parsing_cols = {
"non_wide_columns": ["reg_rr_title", "rrcp_country", "rrcp_area", "school_id"],
"non_wide_columns": ["reg_rr_title", "rrcp_country", "rrcp_area", "redcap_school_name", "school_id"],
"wide_columns": [
"assessi_engtest2",
"assessi_iretest1",
Expand Down

0 comments on commit 38e80d6

Please sign in to comment.