From e636398fac7749feb9beb2c19eb82e23c0240c70 Mon Sep 17 00:00:00 2001 From: Stef Piatek Date: Thu, 7 Sep 2023 10:08:42 +0100 Subject: [PATCH 1/8] Allow for account for typer magic on optional types --- src/rred_reports/reports/interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rred_reports/reports/interface.py b/src/rred_reports/reports/interface.py index a68ccfef..5cfa72a0 100644 --- a/src/rred_reports/reports/interface.py +++ b/src/rred_reports/reports/interface.py @@ -158,7 +158,7 @@ def send_school( dispatch_list = top_level_dir / dispatch_path - if not manual_id: + if not manual_id or (not manual_id.exists): manual_id = [] report_directory = top_level_dir / "output" / "reports" / str(year) / "schools" for report_path in sorted(report_directory.glob("report_*.pdf")): From cbed04779479630c27176f4f8ce2fb59aebccefa Mon Sep 17 00:00:00 2001 From: Stef Piatek Date: Thu, 7 Sep 2023 10:09:25 +0100 Subject: [PATCH 2/8] Warn if a school report has duplicate rows --- src/rred_reports/reports/schools.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/rred_reports/reports/schools.py b/src/rred_reports/reports/schools.py index 9e77e30f..c3bae0b6 100644 --- a/src/rred_reports/reports/schools.py +++ b/src/rred_reports/reports/schools.py @@ -2,6 +2,7 @@ from pathlib import Path import pandas as pd +from loguru import logger from rred_reports.reports.filler import TemplateFiller @@ -239,7 +240,12 @@ def populate_school_tables(school_df: pd.DataFrame, template_path: Path, report_ columns, filter_function = column_and_filter filtered = filter_function(school_df, report_year) table_to_write = filtered[columns] - template_filler.populate_table(index + 1, table_to_write) + if index == 0 and (table_to_write.shape[0] != table_to_write.drop_duplicates().shape[0]): + logger.warning( + "Table 1 has duplicate values that will be removed, suggests an issue with the masterfile school or teacher data:\n{school_data}", + school_data=table_to_write.to_markdown(), + ) + template_filler.populate_table(index + 1, table_to_write.drop_duplicates()) return template_filler From 9b7e92f48abf4b5b81cad90283f8bd423d97a05d Mon Sep 17 00:00:00 2001 From: Stef Piatek Date: Thu, 7 Sep 2023 10:23:50 +0100 Subject: [PATCH 3/8] Also ensure school summary is unique --- src/rred_reports/reports/schools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rred_reports/reports/schools.py b/src/rred_reports/reports/schools.py index c3bae0b6..c81fde9f 100644 --- a/src/rred_reports/reports/schools.py +++ b/src/rred_reports/reports/schools.py @@ -187,7 +187,7 @@ def get_outcome_from_summary(outcome_df: pd.DataFrame, outcome_type: str) -> int return 0 filtered = filter_by_entry_and_exit(school_df, report_year) - filtered_summary_table = filtered[columns_used].copy() + filtered_summary_table = filtered[columns_used].drop_duplicates().copy() # let's try and reduce the pain with exit outcome labels filtered_summary_table["exit_outcome"] = filtered_summary_table["exit_outcome"].str.lower().str.strip() From 36885a5d5dc25cbb212a3ddab515edd6966d8151 Mon Sep 17 00:00:00 2001 From: Stef Piatek Date: Thu, 7 Sep 2023 10:36:59 +0100 Subject: [PATCH 4/8] Fix typer optional list --- src/rred_reports/reports/interface.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/rred_reports/reports/interface.py b/src/rred_reports/reports/interface.py index 5cfa72a0..48f59b65 100644 --- a/src/rred_reports/reports/interface.py +++ b/src/rred_reports/reports/interface.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Optional +from typing import Annotated, Optional import typer from loguru import logger @@ -133,7 +133,7 @@ def create(level: ReportType, year: int, config_file: Path = "src/rred_reports/r @app.command() def send_school( year: int, - manual_id: Optional[list[str]] = typer.Option(None), # noqa: B008 + manual_id: Annotated[Optional[list[str]], typer.Option([])] = (), attachment_name: str = "RRED_report.pdf", config_file: Path = "src/rred_reports/reports/report_config.toml", top_level_dir: Optional[Path] = None, @@ -157,16 +157,15 @@ def send_school( top_level_dir = TOP_LEVEL_DIR dispatch_list = top_level_dir / dispatch_path - - if not manual_id or (not manual_id.exists): - manual_id = [] + school_ids = list(manual_id) + if not manual_id: report_directory = top_level_dir / "output" / "reports" / str(year) / "schools" for report_path in sorted(report_directory.glob("report_*.pdf")): - manual_id.append(report_path.stem.split("_")[-1]) + school_ids.append(report_path.stem.split("_")[-1]) email_details = [] logger.info("Getting dispatch list details for each school report pdf found") - for school_id in tqdm(manual_id): + for school_id in tqdm(school_ids): email_info = get_mailing_info(school_id, dispatch_list, override_mailto) email_details.append({"school_id": school_id, "mail_info": email_info}) @@ -177,7 +176,7 @@ def send_school( school_mailer(email_detail["school_id"], year, email_detail["mail_info"], report_name=attachment_name) emailed_ids.add(email_detail["school_id"]) except Exception as error: - all_schools = set(manual_id) + all_schools = set(school_ids) schools_to_send = sorted(all_schools.difference(emailed_ids)) school_command = f"--manual-id {' --manual-id '.join(schools_to_send)}" logger.error( @@ -198,8 +197,13 @@ def main(): if __name__ == "__main__": - create(ReportType.SCHOOL, 2022, TOP_LEVEL_DIR / "src/rred_reports/reports/report_config.toml") + # create(ReportType.SCHOOL, 2022, TOP_LEVEL_DIR / "src/rred_reports/reports/report_config.toml") ## test sending reports to specific UCL user - # send_school(2021, config_file=TOP_LEVEL_DIR / "src/rred_reports/reports/report_config.toml", top_level_dir=TOP_LEVEL_DIR, override_mailto="username@ucl.ac.uk") + send_school( + 2022, + config_file=TOP_LEVEL_DIR / "src/rred_reports/reports/report_config.toml", + top_level_dir=TOP_LEVEL_DIR, + override_mailto="sejjpia@ucl.ac.uk", + ) ## test sending reports to RRED email for UAT # send_school(2021, config_file=TOP_LEVEL_DIR / "src/rred_reports/reports/report_config.toml", top_level_dir=TOP_LEVEL_DIR, override_mailto="ilc.comms@ucl.ac.uk") From 8556e58ad25fc93df23f0c038dc953a7365298e1 Mon Sep 17 00:00:00 2001 From: Stef Piatek Date: Thu, 7 Sep 2023 18:06:55 +0100 Subject: [PATCH 5/8] Test logging of warning for template filling --- pyproject.toml | 2 ++ tests/conftest.py | 2 ++ tests/test_report_writing.py | 16 ++++++++++++++++ 3 files changed, 20 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index ec97922c..98729596 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,12 +47,14 @@ test = [ "pytest-cov >=4", "pytest-mock >=3.10.0", "email-validator==1.3.0", + "loguru_caplog >= 0.2.0", ] dev = [ "pytest >=7", "pytest-cov >=4", "pytest-mock >=3.10.0", "pre-commit >= 3", + "loguru_caplog >= 0.2.0", ] [project.urls] diff --git a/tests/conftest.py b/tests/conftest.py index 126c385a..62caebd7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,5 @@ +from loguru_caplog import loguru_caplog # noqa: F401 + pytest_plugins = [ "tests.fixtures.test_template_files", "tests.fixtures.test_redcap_files", diff --git a/tests/test_report_writing.py b/tests/test_report_writing.py index 1c5ff0a1..94a37f0d 100644 --- a/tests/test_report_writing.py +++ b/tests/test_report_writing.py @@ -57,6 +57,22 @@ def test_school_tables_filled(example_school_data: pd.DataFrame, templates_dir: assert output_doc.exists() +def test_duplicate_student_warning(example_school_data: pd.DataFrame, templates_dir: Path, temp_out_dir: Path, loguru_caplog): + """ + Given a masterfile dataframe with one duplicated row + When the school template is populated + Then the resulting table will have one less row than the input data, and there will be a loguru message for the duplication + """ + output_doc = temp_out_dir / "school.docx" + + duplicate_school_data = example_school_data.copy() + duplicate_school_data.iloc[3] = duplicate_school_data.iloc[5] + + populated_template = populate_school_data(duplicate_school_data, templates_dir / "2021/2021-22_template.docx", 2021, output_doc) + assert (duplicate_school_data.shape[0] - 1) == len(populated_template.tables[1].rows) + assert "Duplicate students found" in loguru_caplog.text + + def test_school_name_replaced_in_paragraphs(example_school_data, templates_dir: Path, temp_out_dir: Path): """ Given a school template with the first non-blank paragraph having a "School A" placeholder From a2ca2f36f529a44845007d6bf7c07552c99a37d7 Mon Sep 17 00:00:00 2001 From: Stef Piatek Date: Thu, 7 Sep 2023 18:12:41 +0100 Subject: [PATCH 6/8] Log warning when school has duplicate information --- src/rred_reports/masterfile.py | 4 +++ ...masterfile_with_school_in_two_regions.xlsx | Bin 0 -> 12781 bytes tests/test_masterfile.py | 24 ++++++++++++++++++ 3 files changed, 28 insertions(+) create mode 100644 tests/data/masterfile_with_school_in_two_regions.xlsx diff --git a/src/rred_reports/masterfile.py b/src/rred_reports/masterfile.py index 50ef24ce..0bb0ece2 100644 --- a/src/rred_reports/masterfile.py +++ b/src/rred_reports/masterfile.py @@ -129,6 +129,10 @@ def clmnlist(i: int, data: pd.DataFrame = full_data) -> list: all_schools_df = School.new(clmnlist(6), clmnlist(3), clmnlist(4), clmnlist(5)) # pylint: disable=E1121 all_schools_df = all_schools_df.drop_duplicates() # pylint: disable=E1101 + is_duplicated = all_schools_df.duplicated(["rrcp_school"]) + if any(is_duplicated): + duplicated_schools = all_schools_df[all_schools_df["rrcp_school"].isin(all_schools_df.loc[is_duplicated, "rrcp_school"])] + logger.warning("The following School IDs had duplicate information:\n{duplicated_df}", duplicated_df=duplicated_schools.to_markdown()) teach_df = Teacher.new(clmnlist(1), clmnlist(2), clmnlist(6)) # pylint: disable=E1121 teach_df.drop_duplicates(subset=["rred_user_id", "school_id"], inplace=True) # pylint: disable=E1101 diff --git a/tests/data/masterfile_with_school_in_two_regions.xlsx b/tests/data/masterfile_with_school_in_two_regions.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..7c2343aa4d07c62f5d958a9e31029ef69469a044 GIT binary patch literal 12781 zcmeHtWmH}14(P$%iWaB1ySr;~FBB>6oP%re;%oTcR;;)c_u}sU4xPDoW_stn z_5Ro!ccI!Y9VNQIH5z!Sj- znJh{F{1VN`>X>lOY&?{sBiM;rIE98eVF(IBO8uw>nN0cf^W62#Zs;()}oc zy!pN-*x#uGDNpN+XUyXkpvs6Zx1`n5qQb_mNl~5#BGO<=xQe93s#I>Qxh0X^9T>;T z)~~d9$tO-CVp#(5$(D@-nLQW*y^3R( z=ES`<5FQkaZn+3pYC6C;Cj^y3ZX=wWYjWcrey^NRwJyM1JrYaAuvFR=K@>s(8NlW!fJ;pnJ!`2;nx5%4>#qz{OoPu7#PNX4y(j>mLaiGoI)MkyNRVT5# z0owNYQ-;CwBeRYE-l=8qO8=+HtdeI2wLk#?DhL1o8W;j>GJiFe5)BpmJRWo(;hjgQ zkjq4TOj2l09Z`UOfs00grRSA8uVI+Rmvq+GMOIIDk+L%5Lpuq3ZxiRf6ASsWpA9+O zIIAVj(PJ~NU#R!uek>c~+k9D^Re1Zr#wwt1P5GH!_4GHz3+}mH1M~JrVB?Q9)1I4f zSb#JC6cQ}~Ne&mIh2$L4(kQ+nkcsY<5pAZ*JiJ_0UADR=WtGB&B(_m)o8)m1FP^}N zjvRTW3wG%yj>Kr3O!hKPi6vYOogiG@OARbz3#P;FqY!(gnHYv-E$IiXqZ^`500eW#-AdjL4YByCc^KO@?-CxZQ*~sFFk)k)5DEDKXAlPPy%{ zH8WyW)|-h-h*ie8Z9s&Hg@|+x)7B}YnpHCQ<`q&Y=BB#z6(diyt}Mt&xLrzY^c#B3 zs#u+je=p;}2a0W(Pa8FKj{)kkMfcL^?-~0K&Z0X2Wt}7&ZuCL0BU)pe`5RF!V*Vnq zvt>Tz>abEu&U9Y)=Rz+2Ul15(Ql!bazBLjN7;%sg)|74JB~c>bK8L5HWdE zsJoVG(^EZaoe2x%G2>x1>WOM{1CRJHXoolqgP(3awyO^g+P!7l3`v4xJwu}$L|I6V zptG^)6kiKYA#gD&^`zTvIMd&~>>gHsZ^Mq4hB?o&9{cY55;UYqG{ZYojIazx!k(PG z$(%N+$EUIQyWh@2M$(OS(8-eh^O=tw^+p8(EocfgUIUijRpqP){eY*LLr_E}P*NaA z)}MFCHQ9)8uTa5`?t}yYVEyjsAPXa)sfn5s(9+Hv^c)%@G?$g;SFF(|caPxc6VS57h z^2g3hIPj>Doy?|Hx6U?3dPW{BJJWGJ7!$@=l(<&n@_O!E*|!A5^$&i?u6kcEzSh?i zp`b8Ga32@t^3F%3bSE_=m+h1%lM<&3(?Zl4!W|&9%}3xIfT^h`Xyf9x zojPk`=%_0|(ZAkyIEWdocE-J>5Tu4!YdKV~6G1oH$evOEu!6+MzLSU4&R!HaY#ocK zqYfKFRIn=oB6MkUS?qDdHc_P^>=G*@4Z;b7_{$b8T9va@3yX+;BJVUp*O>#ZM9G;8HECn zH=u%pVLQjWG$EN+K&Q5sDMd@15?8m$4mL2^v%qHJoUy53#@WyF+2p3cQ2E@-{qB&W ztfPSTl9}Va9>O#)vYKIte4^^%x89>9dtLc1jtjTR3{jKLmZFx0^Ue5V@K}{& zNIq%K(a486>CG)8`1~uC&hWX0uOkS(ad7AK z?T!+it9$@jJ-z&IBinw66@jREqhbUmIXGV=R~W+Nxb_=z{q2!mzlO;%T?RA?&m@mD zC;`!{sX5$uuGpXWv-6`dQXI&lQjy3&!!eY~CP^<7yPzZ2o4rw3X}3fC-n1W4H`fOr z2C!rjlD2j+%vz|4JrQw$GU3#c;5BvnVpr!g?rNKYk0R@EtT^Q56zg?WR-yUR-F7L? znQN@V4x(_c-V4!)zbTeBk~vyQK9-h7$z~z5BRYPX(rI;`%Aa20a-4NG-3pHRvz{$7d>>~r$j*_W%_FO9V%GyPByws3|dMua4Dv)bg;`)P0m`522|3_ z%J7uM7WDgzj;uX}C&DEgrGd?63m443qje!NCzf989~qHJ@)>f zKWVMsBzt)oR>X6_8)Nw1)p5o1rhvq4EMmCzW}ofylm~vM+QOM=>m<T^D=m`>NQB=8@iL8Kqf`80A`EA+cJHT(mz`=_AcXxWuJ3$tv zrcR(gPAxwlSpR)y35@Ot>tw-_yn=cZck_&&4nh`J<9^kku?1y(I|n73D&E zW3;i6DuYxg9`L$+x@Rk+5I)BU{g{;vb!X|n2`3mWDYCLXHGr#OR;)c@*znd0;1g8U zdEAfkCK0g9Yaf<59SM}>jWHC;mErA^JUD%CEV}1b|HY(_rAWcQlVOTk0t=K89pjtCf1qLVPd* zrz$WoT_k>Mrxz_gK>bYy6doKxfuf9^0;h}q5D;Dc$`ppOtv~<839}VYE@qpy&jRkiP)lesDCMt>2uQ3 zCVRv2!;@{4!>3(6o6)#{Pr@z6%f%F(Oq@Vu-J2Dk6LNt7UE_dZj3N8G&1vsPxMvgl z>1hdX^g)=?==byqz9Ke@85ALGWph2+@G(QDj$SQ|gj$=S`7DxgzFGQ~1!;#|>&@KT znTfYj?DOm|(iflHU9Z;He)JCA%wDxWx}QGGj_q&vKP+=EAD(4pGATu6-t+I-*=0SR zEcYxv94yw8j6^B=SVs+y*vZ@Ub-ZjG@qKdlnM)^n5)kHZD>Z=z25*4mGf7>Ic=@h6 zf#<==N!BNvTAP)1Xs)C4M&0tB9#IQ(K%|YgI(DH8)-n)4dxk51AVD9XU<*-ZV1zR! zA-i5gHTXqLLb|Jns>>3T?9L58I#b29dmBREQ)~LJr^fm6r#1%KSO-;m7JT{eKU3@bFO}= zeo}5S<@Mp@xsHXpBQ1N!X!G-F6NsE zKZEJ%cfr~VZ1aGVX4!WCpIQPOkKjm<8DR*T`@?F$60e$9i`#$+Fc650^N; zGVyONJ4)|XaC{u-g_{yd76+o;MyeOZW<5w745YZdn9-oAnkMB zw?r=NzF{7HzZPpC$OfGp$#f@BZKju!aeV3ukwr*wgT#{=0>M?R`rx9sLdjfmS$@2C z$K3*r!o~!kUx%#t^EMcORntXVSa1%t=)uP%;5HMufIjc&x6E?Y*|pw@x@A6yqTG1{ zHB?|=cvJRC{lQ3gg|JzJWT6VpZV|5jIPcB%7~{P*;?qnNnnwAAzb6ipR|d{Pwek+o zrBs8QwVe2jXdJb# zw);GyyOxwdw9G{YH(p5x1gQ@GZp5UN%-9AT6LJ|gczB_VR}Ij4D@4XH@(Lp|dPr3m z6fdEnpM?GiHghveT)O;au`P#OSn%0XjU=rmK*YNIAzRwMNoETRwlnR!+SQb&7`(x-+eVR~mj%%~P4)pJ#RRw3x83aNR`i=SmH<1Zxq*6IAa(0_;A8~&^e7A!pPsP3m;nn2IX>-uj_r=B^T`U&|9O}^)^ z|FIZg={3O8!)t9li^TU#lC&jlBYocDHp9H0Pj?l63Ms*NckJIB@c-n%`QK92VEPE$ zbLT$&EZzJJ;~dfRR?poGgPv`n;)&^l7rSCg!EcdaJt50gUQ`dWg;hblI}$3)bG;4B zKbrX(Eykn=qr+No{gE)yh^Ui|dejDgJYPW@GE2l;nJ*a=ukhzyTr&Xw7T68m> z?qgK9n4kGdu0f#MigtjPIXQcSKPN~AE{vp4Jliepf=I~yP>gUA`%Psw$#gc4(?ctc zTzwhDh1{!ipF(2q0D&>cs!-s$ZwlK+Gc(OQ)ozG9w|+6CQ0?z<{lOj{VI!}3ds4h# zw4(^-+WelF{Myp%z(sFU1m`VZNuN9KKTFR}7N)kQ%s+pB=KcFRL*WG6SnasC!pI=k zd(QP}%B7VNoA^ZvlZ*ty+PZxeO?I}p76MF2O5O`qn*4%fF?+#;aS2P}BgJT=MnDO{6Q$_U3 zbdqP`^b!&UET!Pk7pRuvk;h%X^*2`b^kyR?-$zN5^Y#4PqO{5yD~1w5v#eEANtsi* z-c=>`A!u9mkce#V)F_jcpLor4{EFtq3m=Oc%7-o7dS;w4O)7~^SRHY@>?gKH1w$|G zG0^Ewht)VKe1NWJfdSMfz21wx2i^9%@j?g-4*$fDb>I2+676D)P?T?{a+D@3ExG<+5$FnoA>GIPCV|i_r;b{G|}8VAyLXPS!m^P+3LF^s&FGe)VmW1 z;f!t;vFh{)@x zcf|VLTygTi?eWHNJoZ>Ym+z6v0-!mCxV~;5DpWGbSl0kqT(9U^ehEQwLggyfg?fa# z&Ihws4KN#4za>zAxHTOns%(i!Z6(2V@~;RtofD}0UON=Z&d+!DIdQHTPt7UfE1qY) z5J-E}h<$LYaZKx#VItAKTnT;Ou*OBYA%tmMk@Yk!Fd3fS^y(zRw!+-r(ccY+CKO{i z{{~-TQBI#{GhN*yBMSBP3Mb?y$wv4D?F`H|OI&li>|P|ij(M^N&Y_pS1HF%Zb+4hp zlHfrb^h+G>J8m876HfdXExczwz{iMGAzyP1fG$C8G(v3o{9mg z7}i_!z_+uh+G9@IW6Hb9Il5Agn}{s7*w}8Hmn)L3n<1ieqQPc_30v>53u{taBFYel zQ>8rO**~X3C>t}glSVtK%{Do@TRB@6wMynQl1DgIN3x8w_~WiJ@DC;3>3nTU4Vh$9 zYBX*n0J85!qG4D|zM^jMqoPt9?dG?;skMTyrz7XEzQNqa>zZ+cZ6u$3gPW_?;PO&7 z)9%{UDCs3`Rft3apI*G;04!^4if2MMqXDCDA_~VCVXiH`)_Zgz-Syu@=tFR5BGw{8rV<93G2$mWfIzW(hl6Ng|_Lq4>7AG4i#wnd#x8Lf?l60C$I2 z{n9G1V>fSH*9-?NW?I6=c@v_@`NEZ5a#Kt)J2^wkzKto-li(@M{??JR$0U$a1a;Y} zw8%{|H2{LO{9*pFhi-S$DSjt}e*>lXRD80EuE6d=kr7yC&D4ncV#-~vWp&a!9IAtb zG3y)KYn>LwuCVHAmH8TW26$wpNv>;KXDJ@RlQL@IzKn0W&LRwZav9gR0}CxvW^k^4 z0v-;&JcX*;%3}7(Pd>CZaEo%^858i0+R@bmRC6%u zii2G6GzT9l^md);&{hm%RrF9N_HH5cj7BA0EX#Xmm=4!B8Ha)}!ow8|zTwuc-mbOj z&3ZW=VUU7LlLOg0wk}pdX0?Je(5;Gjqx;i*L2pan9HDqDR*Vq}9Ji9Apw3O%DerM3 z)&lF_z*|w8`sC?+f=iU+|5Q)#ko`bP|IToe*4v~FaAnn8_j^|HYxw=1cCnHR&Np6z zi`wYmVF45ScgjF}2N1K7gTo(FZ~q$+0H-DC@vrUYpT|m|E=iCV33wNkk^ORw<+;x* z3?brN%oRVr%c(Q@P!hX86J4h=v*R)sNy7YAFrNbCX|-F@B_R}CtIGsTG;*jsUTx4s z3$fSAQkO!`dCv!{SIi#QRC~0XsvpKkbt#jo4Xs-o)~Ij!x~vn{g6x1agY6@%_PEnz zB|~dh^L8cya!L8wz0`UrE0(riZF~+skrvTYxh01Fh;(!ai2lp8m)-mvzhUaQU8{Vo zasMn%IvU~UfwoM-X{ixYDW0qC%=oS9p~%cav!m>pI5>K;wLQ%S9!wSKrR$b@$%@(Kr%kX80w7+0kw zS$YA7MU1P3+mzhxU){eho(XWHJ?cdE8Qxu zo5zIHdrw@yZw|eBiW!tonTqgvfV+I2SNPAmYKWVM=CfUkfbAONuXg>;9%%YM!v^c@ zk1H_l55tDJ1sgWe?C>osnWLZGwq*w_hh+GodYIzWTpTH@^TQGKd}zEOkqc&z7FxfS z-hvm0^J$cjg>@1`mMMcMl%{31PF2W-zQ<6U$T*G8n*jM~BSY@Zn_0+oz}`u6J*uE3 zvMfa^=@-UXq*%;!IJXX|U>)_8l#&vAik?yXhWKMD?!It_<_(lM^*D+nzt3qOY(mso z$QGw(phO^g@d-Gv+=K>iMCG^Bj&}mcK>0Qt&P>BK)>kQ|h6;+i(b?2S^L5oOLNyq7 zBR3Pfy$>u_MX!&rN$GJBp)HFl^^Ut(GdnMDu>HR7(SATUrs=;07Ie%T(#!d1Cc-KZ zL*7DEX`L^%8_npJi*2M!VC1vBpNU$Fj?z$M%56&W(>w{MkKC=h)9Pps^45<&YFbXL zJS0)`srhZ9L?*o0*@yO20qbn}!HxRJ^XY52+03?Y zsKEro3uXyN#x^}djH~xRZ5`Q$$~J=e5L*1YS!KOK&$yb!S;Wiu;`I#Xe?BjbtVq{% zf(@Gi8vvjNuOWC;#{t~BcQOU4nL0T=_wavxW@R$AwA)u7l3P&56J)PE*#3x$wcwX;+v|{Nov^%iYWMD- zEAJ`eH2-@#a|7`TcF-CI9m`FbnfnFE-;4h`zasoPN__GJmG3Ez4_}X4IUuy&qDnLG zrS^IW#)QeDiMn|iyZvZ=D(4tomA%Qi!g2+J(>e_bJ9guPO3Lk+DEuHYiCrCG=BxKicd~x&qT-dpO7TE(6^v9^jwnZ!K?VVu3#=D(P@M(;!-2(rh zX+!-yT<~sVceb~ndin%yy5xkI&Q%Yw_%yLn>xQqi@Ko~n5#jC|lgoD~b@QF7(tqwl%Bxt^q z{1ep`T*LT-^Z!>D^H+kuwgLYn=nfv!`8&Zsn}WYm`L&kvClwEppHzOWto%yg@7ds= z=m5YC6#(#SZul$q-(&7yvBzltg8i=u{44seJKLWGiWq)!|NpbkDa*rvZ4dxJ2LDlk L&A5{3`RV@vcr%pj literal 0 HcmV?d00001 diff --git a/tests/test_masterfile.py b/tests/test_masterfile.py index 32c38d9c..31cb428c 100644 --- a/tests/test_masterfile.py +++ b/tests/test_masterfile.py @@ -16,3 +16,27 @@ def test_masterfile_read(data_path): assert nested_data["teachers"].shape == (11, 3) assert nested_data["schools"].shape == (10, 4) assert joined_data.shape == (40, 69) # should be the same number of students as in the pupils df + + +def test_masterfile_warns_duplicate_school(data_path, loguru_caplog): + """ + Given a masterfile with two pupils in the same school, but the region is different for each pupil + When the masterfile is parsed + The nested data should have two schools, and there should be a loguru message about the duplicate ID + """ + file_path = data_path / "masterfile_with_school_in_two_regions.xlsx" + nested_data = parse_masterfile(file_path) + # two schools, even though same id + assert nested_data["schools"].shape[0] == 2 + assert "The following School IDs had duplicate information" in loguru_caplog.text + + +def test_masterfile_no_duplicate_school(data_path, caplog): + """ + Given a masterfile with no duplicate school details + When the masterfile is parsed + There should be no logging about duplicate school IDs + """ + file_path = data_path / "example_masterfile.xlsx" + parse_masterfile(file_path) + assert "The following School IDs had duplicate information" not in caplog.text From 28ae423a7b8fdabc475b0b03ac21e060a474f005 Mon Sep 17 00:00:00 2001 From: Stef Piatek Date: Thu, 7 Sep 2023 18:13:48 +0100 Subject: [PATCH 7/8] Log warning when school has duplicate information --- src/rred_reports/reports/schools.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rred_reports/reports/schools.py b/src/rred_reports/reports/schools.py index c81fde9f..40871f1b 100644 --- a/src/rred_reports/reports/schools.py +++ b/src/rred_reports/reports/schools.py @@ -240,9 +240,9 @@ def populate_school_tables(school_df: pd.DataFrame, template_path: Path, report_ columns, filter_function = column_and_filter filtered = filter_function(school_df, report_year) table_to_write = filtered[columns] - if index == 0 and (table_to_write.shape[0] != table_to_write.drop_duplicates().shape[0]): + if index == 0 and any(table_to_write.duplicated()): logger.warning( - "Table 1 has duplicate values that will be removed, suggests an issue with the masterfile school or teacher data:\n{school_data}", + "Duplicate students found, this suggests an issue with the masterfile school or teacher data. Table 1 data:\n{school_data}", school_data=table_to_write.to_markdown(), ) template_filler.populate_table(index + 1, table_to_write.drop_duplicates()) From f4c5ea186ca7ce74eb178c0ffd27545534cb5904 Mon Sep 17 00:00:00 2001 From: Stef Piatek Date: Thu, 7 Sep 2023 18:22:44 +0100 Subject: [PATCH 8/8] Add tabulate To make CI happy, seems to be installed locally --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 98729596..72117441 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ dependencies = [ "pypdf == 3.8.1", "python-docx == 0.8.11", "numpy == 1.24.2", + "tabulate == 0.9.0", "tomli == 2.0.1", "tomli-w == 1.0.0", "typer == 0.7.0",