Merge pull request #56 from UCL-ARC/55-adding-a-redcap_school_name-co…

…lumn 55 adding a redcap school name column
UCL-ARC · Aug 30, 2023 · 38e80d6 · 38e80d6
2 parents 544e616 + 61314c4
commit 38e80d6
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 6 deletions.
diff --git a/src/rred_reports/masterfile.py b/src/rred_reports/masterfile.py
@@ -167,7 +167,7 @@ def masterfile_columns() -> list[str]:
 
     assert _school_id == school_id, "Sanity check for school ID columns being the same failed, these were not the same"
 
-    return [pupil_no, user_id, *other_teacher_fields, *other_school_fields, school_id, *other_pupil_fields]
+    return [pupil_no, user_id, *other_teacher_fields, *other_school_fields, school_id, *other_pupil_fields, "redcap_school_name"]
 
 
 def read_and_process_masterfile(data_path: Path) -> pd.DataFrame:

diff --git a/src/rred_reports/redcap/main.py b/src/rred_reports/redcap/main.py
@@ -86,7 +86,8 @@ def preprocess_wide_data(cls, raw_data: pd.DataFrame, labelled_data: pd.DataFram
         processed_extract = labelled_data.copy(deep=True)
         # Unify on using the raw_data column names, labelled uses the questions given on the survey as column names
         processed_extract.columns = raw_data.columns
-        cls._fill_school_id_with_coalesce(raw_data, processed_extract)
+        cls._fill_school_column_with_coalesce(raw_data, processed_extract, "school_id")
+        cls._fill_school_column_with_coalesce(processed_extract, processed_extract, "redcap_school_name")
         cls._fill_region_with_coalesce(processed_extract)
         cls._convert_timestamps_to_dates(processed_extract)
         # Making a copy, so we have a de-fragmented frame for adding row number, was getting a performance warning
@@ -97,9 +98,9 @@ def preprocess_wide_data(cls, raw_data: pd.DataFrame, labelled_data: pd.DataFram
         return cls._rename_wide_cols_with_student_number_suffix(filtered)
 
     @staticmethod
-    def _fill_school_id_with_coalesce(raw_data, processed_extract):
-        school_id_cols = [col for col in raw_data if col.startswith("entry_school_")]
-        processed_extract["school_id"] = raw_data[school_id_cols].bfill(axis=1).iloc[:, 0]
+    def _fill_school_column_with_coalesce(school_data: pd.DataFrame, processed_extract: pd.DataFrame, column_name: str):
+        school_id_cols = [col for col in school_data if col.startswith("entry_school_")]
+        processed_extract[column_name] = school_data[school_id_cols].bfill(axis=1).iloc[:, 0]
 
     @staticmethod
     def _fill_region_with_coalesce(extract: pd.DataFrame):
@@ -135,7 +136,7 @@ def _rename_wide_cols_with_student_number_suffix(extract: pd.DataFrame) -> pd.Da
     # Hardcoded columns for exporting, could finesse this but probably isn't worth the time
     # The final columns output are under unit testing so will catch any changes to input or output data
     _parsing_cols = {
-        "non_wide_columns": ["reg_rr_title", "rrcp_country", "rrcp_area", "school_id"],
+        "non_wide_columns": ["reg_rr_title", "rrcp_country", "rrcp_area", "redcap_school_name", "school_id"],
         "wide_columns": [
             "assessi_engtest2",
             "assessi_iretest1",