diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 37e56d7..6d7787b 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -28,4 +28,4 @@ jobs: run: pip install black flake8 - name: Run linters - run: flake8 --ignore=E114,E115,E116,E126,E127,E128,E201,E202,E203,E222,E225,E226,E231,E241,E251,E261,E262,E265,E301,E302,E303,E306,E501,E502,W291,W292,W293,W391 --exclude=./tests + run: flake8 --ignore=E114,E115,E116,E126,E127,E128,E201,E202,E203,E222,E225,E226,E231,E241,E251,E261,E262,E265,E301,E302,E303,E306,E501,E502,W291,W292,W293,W391,W504 --exclude=./tests diff --git a/README.md b/README.md index bcd9351..2759ed1 100644 --- a/README.md +++ b/README.md @@ -57,8 +57,17 @@ When you see "Password:" in the terminal, enter your comps password and hit ente ## Option 1_Run One Site with Python Scripts ```bash +cd simulations python3 run_sims.py -s {site_name} -n {nSims} -python3 wait_for_experiment.py -s {site_name} +python3 add_suite.py(optional step) +python3 run_analyzers.py -s {site_name} +python3 download_wi.py -s {site_name} +``` + +Run Plotting and reportting scripts with site(s) that you ran: +```bash +Rscript create_plots\run_generate_validation_comparisons_site.R +python3 report\create_pdf_report_3.py ``` ## Option 2_Run all Sites with Snakemake (Recommended) @@ -79,7 +88,19 @@ python3 generate_site_rules.py snakemake -j ``` -- If you want to re-run simulations for certain sites, delete COMPS ID files for those sites that you want to -re-run(/simulations/COMPS_ID/{site_name}_COMPS_ID_submit and _done files) and run "snakemake -j" again. +- If you want to re-run the analyzers steps with previous experiments you ran, you can delete the analyzer id files and run: +```bash +snakemake clean_ana clean_download -j +snakemake -j +``` + +- Simular to previous scenario, if you want to run only the download and plotting steps: +```bash +snakemake clean_download -j +snakemake -j +``` + +- If you want to re-run simulations for certain sites, delete COMPS ID files for those sites that you want to -re-run(/simulations/COMPS_ID/{site_name}_COMPS_ID_exp_submit, _analyzers and _download files) and run "snakemake -j" again. -- If you want to re-run the analyzers and plotting steps for certain sites, delete the {site_name}_COMPS_ID_done files only and re-run "snakemake -j". +- If you want to re-run the analyzers and plotting steps for certain sites, delete the {site_name}_COMPS_ID_analyzers and _download files only and re-run "snakemake -j". diff --git a/create_plots/archive_organization/_20220603/helper_functions_infectiousness.R b/create_plots/archive_organization/_20220603/helper_functions_infectiousness.R index 1bdc240..6c84536 100644 --- a/create_plots/archive_organization/_20220603/helper_functions_infectiousness.R +++ b/create_plots/archive_organization/_20220603/helper_functions_infectiousness.R @@ -1,4 +1,4 @@ -# helper_functions_infectiousness.R +=# helper_functions_infectiousness.R library(ggplot2) diff --git a/create_plots/helper_functions_par_dens.py b/create_plots/helper_functions_par_dens.py new file mode 100644 index 0000000..cddc7e2 --- /dev/null +++ b/create_plots/helper_functions_par_dens.py @@ -0,0 +1,179 @@ +from plotnine import ggplot, aes, geom_bar, scale_fill_brewer, facet_grid, geom_line, geom_point, scale_x_continuous, \ + geom_errorbar, theme_bw, xlab, ylab, scale_color_manual +import numpy as np +import pandas as pd +from scipy.stats import beta +from pandas.api.types import CategoricalDtype + + +def get_age_bin_averages(sim_df): + """ + Get average parasite densities in each age bin, weighting all ages in bin equally (e.g., not weighted by + population size) + Args: + sim_df (): + + Returns: age_agg_sim_df + + """ + # age_bins = sim_df['agebin'].unique() + # remove rows where there are zero people of the measured age bin in the simulation + sim_df = sim_df[sim_df['Pop'] > 0] + # get average across all years in age bins and across simulation run seeds + age_agg_sim_df = sim_df.group_by(['month', 'agebin', 'densitybin', 'Site']).agg( + asexual_par_dens_freq=('asexual_par_dens_freq', np.mean), + gametocyte_dens_freq=('gametocyte_dens_freq', np.mean), + Pop=('Pop', np.mean) + ) + return age_agg_sim_df + + +def plot_par_dens_ref_sim_comparison(age_agg_sim_df, ref_df): + """ + Plot parasite density comparisons with reference + Stacked barplots of parasite density bins by age + Args: + age_agg_sim_df (): + ref_df (): + + Returns: + + """ + months_of_year = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + + # subset simulation output to months in reference dataset + months = sorted(ref_df['month'].unique()) + cur_df = age_agg_sim_df[age_agg_sim_df['month'].isin(months)] + + # if the maximum reference density bin is < (maximum simulation density bin / 1000), + # aggregate all simulation densities >= max ref bin into the max ref bin + # the final bin will be all densities equal to or above that value + max_ref_dens = ref_df['densitybin'].dropna().max() + max_cur_dens = cur_df['densitybin'].dropna().max() + if max_ref_dens < (max_cur_dens / 1000): + # get sum of frequencies within higher bins + all_higher_dens = cur_df[cur_df['densitybin'] >= max_ref_dens] + sim_agg_higher_dens = all_higher_dens.group_by(['month', 'agebin', 'Site']).agg( + densitybin=('densitybin', np.min), + asexual_par_dens_freq=('asexual_par_dens_freq', np.sum), + gametocyte_dens_freq=('gametocyte_dens_freq', np.sum), + Pop=('Pop', np.mean)) + # remove higher density bins from df + cur_df_lower = cur_df[cur_df['densitybin'] < max_ref_dens] + # add back in the aggregated frequencies across higher density bins + cur_df = pd.merge(cur_df_lower, sim_agg_higher_dens, how="outer") + + # add zeros for unobserved reference densities up to max_ref_dens + all_zeros_df = cur_df[['month', 'agebin', 'densitybin', 'Site']] + ref_df = pd.merge(ref_df, all_zeros_df, how="outer") + ref_df.fillna(0, inplace=True) + + # combine reference and simulation dataframes + cur_df['source'] = 'simulation' + ref_df['source'] = 'reference' + combined_df0 = pd.concat([cur_df, ref_df], join='outer') + + # = = = = = = = = = # + # stacked barplots + # = = = = = = = = = # + # change type to factors for barplot groupings + combined_df = combined_df0 + convert_dict = {'densitybin': 'category', + 'agebin': 'category'} + combined_df = combined_df.astype(convert_dict) + + # colors + # len_density_bin = len(combined_df['densitybin'].unique()) + # num_colors = len_density_bin + 1 if len_density_bin % 2 == 0 else len_density_bin + # colors = brewer.pal(n=num_colors, name='BrBG') + # names(colors) = sorted(combined_df['densitybin'].unique()) + # plot + gg1 = (ggplot(combined_df, aes(x='agebin', y='asexual_par_dens_freq', fill='densitybin')) + + geom_bar(position="stack", stat="identity") + + scale_fill_brewer(palette="BrBG") + + # scale_fill_manual(values=colors, limits=names(colors)) + + facet_grid('month~source') + ) + + # = = = = = = = = = # + # grid of line plots + # = = = = = = = = = # + + # calculate reference error bounds using Jerrerys interval + ci_width = 0.95 + alpha = 1 - ci_width + combined_df0['min_asex'] = np.nan + combined_df0['max_asex'] = np.nan + combined_df0['min_gamet'] = np.nan + combined_df0['max_gamet'] = np.nan + for rr in range(len(combined_df0.index)): + if combined_df0['source'].iloc[rr] == 'reference': + if ((combined_df0['count_asex'].iloc[rr] > 0) & + (combined_df0['count_asex'].iloc[rr] < combined_df0['bin_total_asex'].iloc[rr])): + combined_df0['min_asex'].ilo[rr] = beta.ppf( + p=alpha / 2, + a=combined_df0['count_asex'].iloc[rr] + 0.5, + b=combined_df0['bin_total_asex'].iloc[rr] - combined_df0['count_asex'][rr] + 0.5) + + combined_df0['max_asex'].iloc[rr] = beta.ppf( + p=1 - alpha / 2, + a=combined_df0['count_asex'].iloc[rr] + 0.5, + b=combined_df0['bin_total_asex'].iloc[rr] - combined_df0['count_asex'].iloc[rr] + 0.5) + + if ((combined_df0['count_gamet'].iloc[rr] > 0) & + (combined_df0['count_gamet'].iloc[rr] < combined_df0['bin_total_gamet'].iloc[rr])): + combined_df0['min_gamet'].iloc[rr] = beta.ppf( + p=alpha / 2, + a=combined_df0['count_gamet'].iloc[rr] + 0.5, + b=combined_df0['bin_total_gamet'].iloc[rr] - combined_df0['count_gamet'].iloc[rr] + 0.5) + + combined_df0['max_gamet'].iloc[rr] = beta.ppf( + p=1 - alpha / 2, + a=combined_df0['count_gamet'].iloc[rr] + 0.5, + b=combined_df0['bin_total_gamet'].iloc[rr] - combined_df0['count_gamet'].iloc[rr] + 0.5) + + # change facet values to intuitive labels + combined_df0['month'] = months_of_year[combined_df0['month']] + month_cat = CategoricalDtype(categories=months_of_year, ordered=True) + combined_df0['month'] = combined_df0['month'].astype(month_cat) + all_age_bins = sorted(combined_df0['agebin'].unique()) + age_bin_labels = ['<=' + all_age_bins[1] + " years"] + for aa in range(len(all_age_bins) - 1): + age_bin_labels.append(all_age_bins[aa] + '-' + all_age_bins[aa + 1] + ' years') + + combined_df0['agebin_index'] = combined_df0['agebin'].isin(all_age_bins) + combined_df0['agebin'] = age_bin_labels[combined_df0['agebin'].isin(all_age_bins)] + age_bin_labels_cat = CategoricalDtype(categories=age_bin_labels, ordered=True) + combined_df0['agebin'] = combined_df0['agebin'].astype(age_bin_labels_cat) + + # plot asexual densities + gg2 = (ggplot(combined_df0, aes(x="densitybin", y='asexual_par_dens_freq', color='source'), alpha=0.8) + + geom_line(size=2) + + geom_point() + + scale_x_continuous(trans='log10') + + geom_errorbar(aes(ymin='min_asex', ymax='max_asex'), width=0.2) + + theme_bw() + + ylab('fraction of population') + + xlab('asexual parasite density bin') + + scale_color_manual(values={"reference": 'red', + "simulation": 'blue'}) + + facet_grid('agebin~month') + # scale_fill_brewer(palette = "BrBG") + + # scale_fill_manual(values=colors, limits=names(colors)) + + ) + + # plot gametocyte densities + gg3 = (ggplot(combined_df0, aes(x='densitybin', y='gametocyte_dens_freq', color='source')) + + geom_line(size=2) + + geom_point() + + scale_x_continuous(trans='log10') + + geom_errorbar(aes(ymin='min_gamet', ymax='max_gamet'), width=0.2) + + theme_bw() + + ylab('fraction of population') + + xlab('gametocyte density bin') + + scale_color_manual(values={"reference": 'red', + "simulation": 'blue'}) + + facet_grid('agebin~month') + ) + + return list(gg1, gg2, gg3) diff --git a/create_plots/plot_sim_sweep.py b/create_plots/plot_sim_sweep.py index 11002e0..71d018d 100644 --- a/create_plots/plot_sim_sweep.py +++ b/create_plots/plot_sim_sweep.py @@ -1,7 +1,7 @@ import os import pandas as pd -import numpy as np -import matplotlib.pyplot as plt +# import numpy as np +# import matplotlib.pyplot as plt import matplotlib from plotnine import ggplot, aes, geom_line, facets @@ -18,10 +18,10 @@ gg = ( - ggplot(df) - + aes(x='Age', y='Incidence', color='EIR') # , linetype="season" - + geom_line() - + facets.facet_grid("season ~ CM") + ggplot(df) + + aes(x='Age', y='Incidence', color='EIR') + # , linetype="season" + geom_line() + + facets.facet_grid("season ~ CM") # + facets.facet_wrap(facets="season", nrow=1) ) diff --git a/report/create_pdf_report.py b/report/create_pdf_report.py index a3affde..b527fbf 100644 --- a/report/create_pdf_report.py +++ b/report/create_pdf_report.py @@ -2,6 +2,7 @@ title = 'Malaria Model Overview' + class PDF(FPDF): def header(self): # Logo @@ -36,6 +37,7 @@ def footer(self): # Page number self.cell(0, 10, 'Page ' + str(self.page_no()) + '/{nb}', 0, 0, 'C') + # Instantiation of inherited class pdf = PDF() pdf.alias_nb_pages() diff --git a/report/create_pdf_report_2.py b/report/create_pdf_report_2.py index 036d7fe..37e7db3 100644 --- a/report/create_pdf_report_2.py +++ b/report/create_pdf_report_2.py @@ -9,7 +9,8 @@ pdf_name = f'Malaria_model_validation_output_{date}_{time}.pdf' -def new_section(pdf: FPDF, text: str, image_name: str = None, new_x: int = XPos.LMARGIN, new_y: int = YPos.NEXT, **kwargs): +def new_section(pdf: FPDF, text: str, image_name: str = None, new_x: int = XPos.LMARGIN, new_y: int = YPos.NEXT, + **kwargs): pdf.multi_cell( w=pdf.epw, h=pdf.font_size, @@ -36,7 +37,7 @@ def render_toc(pdf, outline): pdf.set_link(link, page=section.page_number) text = f'{" " * section.level * 2} {section.name}' text += ( - f' {"." * (60 - section.level*2 - len(section.name))} {section.page_number}' + f' {"." * (60 - section.level * 2 - len(section.name))} {section.page_number}' ) pdf.multi_cell( w=pdf.epw, @@ -48,6 +49,7 @@ def render_toc(pdf, outline): link=link, ) + class PDF(FPDF): def header(self): # Logo @@ -82,7 +84,6 @@ def footer(self): # Page number self.cell(0, txt='Page ' + str(self.page_no()) + '/{nb}' + f'\t\t{now_str}') - def new_section(self, section_name, section_content, image_name): pdf.set_font('Times', '', 24) pdf.cell(0, 30, txt=section_name, new_x=XPos.LEFT, new_y=YPos.NEXT) @@ -101,29 +102,29 @@ def new_section(self, section_name, section_content, image_name): # add outlines pdf.set_font("Helvetica") pdf.set_section_title_styles( - # Level 0 titles: - TitleStyle( - font_family="Times", - font_style="B", - font_size_pt=24, - color=128, - underline=True, - t_margin=10, - l_margin=10, - b_margin=0, - ), - # Level 1 subtitles: - TitleStyle( - font_family="Times", - font_style="B", - font_size_pt=20, - color=128, - underline=True, - t_margin=10, - l_margin=20, - b_margin=5, - ), - ) + # Level 0 titles: + TitleStyle( + font_family="Times", + font_style="B", + font_size_pt=24, + color=128, + underline=True, + t_margin=10, + l_margin=10, + b_margin=0, + ), + # Level 1 subtitles: + TitleStyle( + font_family="Times", + font_style="B", + font_size_pt=20, + color=128, + underline=True, + t_margin=10, + l_margin=20, + b_margin=5, + ), +) pdf.add_page() pdf.set_y(50) pdf.set_font(size=40) @@ -135,46 +136,52 @@ def new_section(self, section_name, section_content, image_name): section_and_content = {'Vector model overview': {'Vector model overview': ['The EMOD vector model inherits the generic model functionality and introduces vector ' - 'transmission and mosquito population dynamics. Interventions can be deployed within ' - 'simulations for a variety of transmission settings with different transmission ' - 'intensities, vector behaviors, and seasonally-driven ecologies. Climate data is ' - 'necessary to simulate the effect of climatalogical impacts on vector biology. To use ' - 'the vector model, set the configuration parameter Simulation_Type to VECTOR_SIM.\nThe' - ' figure below demonstrates the main components of the vector EMOD simulation type.', - 'malariaSIR.png'], + 'transmission and mosquito population dynamics. Interventions can be deployed within ' + 'simulations for a variety of transmission settings with different transmission ' + 'intensities, vector behaviors, and seasonally-driven ecologies. Climate data is ' + 'necessary to simulate the effect of climatalogical impacts on vector biology. To use ' + 'the vector model, set the configuration parameter Simulation_Type to VECTOR_SIM.\nThe' + ' figure below demonstrates the main components of the vector EMOD simulation type.', + 'malariaSIR.png' + ], 'Model implementation structure': ['There are two categories of possible implementations of the basic model, each with different ' - 'computational efficiencies, resolutions, and flexibilities. The first is an individual model, ' - 'where it simulates every individual mosquito in the population or can utilize a sampled subset' - ' of mosquitoes to represent the population as the whole. The second is a modified cohort ' - 'simulation, with or without explicit mosquito ages.', - None]}, + 'computational efficiencies, resolutions, and flexibilities. The first is an individual model, ' + 'where it simulates every individual mosquito in the population or can utilize a sampled subset' + ' of mosquitoes to represent the population as the whole. The second is a modified cohort ' + 'simulation, with or without explicit mosquito ages.', + None + ] + }, 'Malaria model': {'Malaria model': ['The malaria model inherits the functionality of the vector model and introduces human immunity,' - ' within-host parasite dynamics, effects of antimalarial drugs, and other aspects of malaria ' - 'biology to simulate malaria transmission. For example, individuals can have multiple infections' - ' and both innate and adaptive responses to antigens. To use the malaria model, set the ' - 'configuration parameter Simulation_Type to MALARIA_SIM.', - None], + ' within-host parasite dynamics, effects of antimalarial drugs, and other aspects of malaria ' + 'biology to simulate malaria transmission. For example, individuals can have multiple infections' + ' and both innate and adaptive responses to antigens. To use the malaria model, set the ' + 'configuration parameter Simulation_Type to MALARIA_SIM.', + None + ], 'Model components': ['The malaria model is complex, with numerous configurable parameters. ' - 'The following network diagram breaks down the model into various model components, ' - 'and illustrates how they interact with one another. The components on the network ' - 'diagram correspond to the structural components listed below. Note that there is not ' - 'perfect overlap between the labels on the network diagram and the structural ' - 'components; this is because the network is drawn with increased detail in order to ' - 'provide clarity in how the model functions and the components interact. The following ' - 'pages will describe in detail how the structural components function.', - 'malaria_network_schematic.png'] - }} + 'The following network diagram breaks down the model into various model components, ' + 'and illustrates how they interact with one another. The components on the network ' + 'diagram correspond to the structural components listed below. Note that there is not ' + 'perfect overlap between the labels on the network diagram and the structural ' + 'components; this is because the network is drawn with increased detail in order to ' + 'provide clarity in how the model functions and the components interact. The following ' + 'pages will describe in detail how the structural components function.', + 'malaria_network_schematic.png' + ] + } + } for i, (section_tile, section_content) in enumerate(section_and_content.items()): pdf.start_section(f"{i + 1}. {section_tile}") subsection_names, text_and_images = section_content.keys(), section_content.values() for j, subsection_name, text_and_images in zip(range(len(subsection_names)), subsection_names, text_and_images): section_text, image_name = text_and_images - pdf.start_section(f'{i+1}.{j+1} ' + subsection_name, level=1) + pdf.start_section(f'{i + 1}.{j + 1} ' + subsection_name, level=1) new_section( pdf, section_text, diff --git a/report/create_pdf_report_3.py b/report/create_pdf_report_3.py index 6e7efc3..4432562 100644 --- a/report/create_pdf_report_3.py +++ b/report/create_pdf_report_3.py @@ -12,7 +12,8 @@ pdf_name = f'Malaria_model_validation_output_{date}_{time}.pdf' -def new_section(pdf: FPDF, text: str, image_list: list = None, table_name: str = None, new_x: int = XPos.LMARGIN, new_y: int = YPos.NEXT, **kwargs): +def new_section(pdf: FPDF, text: str, image_list: list = None, table_name: str = None, new_x: int = XPos.LMARGIN, + new_y: int = YPos.NEXT, **kwargs): pdf.multi_cell( w=pdf.epw, h=pdf.font_size, @@ -42,7 +43,7 @@ def new_section(pdf: FPDF, text: str, image_list: list = None, table_name: str = if isinstance(datum, (int, float)): datum = str(round(datum, 2)) pdf.multi_cell(col_width, line_height, datum, border=1, - new_x="RIGHT", new_y="TOP", max_line_height=pdf.font_size) + new_x=XPos.RIGHT, new_y=YPos.TOP, max_line_height=pdf.font_size) pdf.ln(line_height) pdf.set_font(size=12) @@ -61,7 +62,7 @@ def render_toc(pdf, outline): pdf.set_link(link, page=section.page_number) text = f'{" " * section.level * 2} {section.name}' text += ( - f' {"." * (70 - section.level*2 - len(section.name))} {section.page_number}' + f' {"." * (70 - section.level * 2 - len(section.name))} {section.page_number}' ) pdf.multi_cell( w=pdf.epw, @@ -73,6 +74,7 @@ def render_toc(pdf, outline): link=link, ) + class PDF(FPDF): def header(self): # Logo @@ -108,17 +110,6 @@ def footer(self): self.cell(0, txt='Page ' + str(self.page_no()) + '/{nb}' + f'\t\t{now_str}') - # def new_section(self, section_name, section_content, image_name): - # pdf.set_font('Times', '', 16) - # pdf.cell(0, 30, txt=section_name, new_x=XPos.LEFT, new_y=YPos.NEXT) - # pdf.set_font('Times', '', 12) - # a = pdf.epw - 2 * pdf.c_margin - 1 - # pdf.multi_cell(a, 10, - # txt=section_content, - # align='L', new_x=XPos.LEFT) - # pdf.image(image_name, w=150) - - # Instantiation of inherited class pdf = PDF() pdf.alias_nb_pages() @@ -126,29 +117,29 @@ def footer(self): # add outlines pdf.set_font("Helvetica") pdf.set_section_title_styles( - # Level 0 titles: - TitleStyle( - font_family="Helvetica", - font_style="B", - font_size_pt=16, - color=(44, 147, 194), - underline=True, - t_margin=10, - l_margin=10, - b_margin=0, - ), - # Level 1 subtitles: - TitleStyle( - font_family="Helvetica", - font_style="B", - font_size_pt=14, - color=(44, 147, 194), - underline=True, - t_margin=10, - l_margin=10, - b_margin=5, - ), - ) + # Level 0 titles: + TitleStyle( + font_family="Helvetica", + font_style="B", + font_size_pt=16, + color=(44, 147, 194), + underline=True, + t_margin=10, + l_margin=10, + b_margin=0, + ), + # Level 1 subtitles: + TitleStyle( + font_family="Helvetica", + font_style="B", + font_size_pt=14, + color=(44, 147, 194), + underline=True, + t_margin=10, + l_margin=10, + b_margin=5, + ), +) pdf.add_page() pdf.set_y(50) pdf.set_font(size=25) @@ -160,150 +151,164 @@ def footer(self): pdf.set_text_color(0, 0, 0) pdf.insert_toc_placeholder(render_toc) - - # define sections and content section_and_content = {'Introduction': - {'Background': - ['The goal of this report is to help users quickly identify whether updated versions of ' - 'the malaria model are still well-calibrated to capture a range of relevant real-world ' - 'malaria observations. \n\n' - 'The figures and tables compare simulation output generated with a particular version of ' - 'the Eradication.exe and of emodpy-malaria with 1) the simulation results generated by ' - 'earlier versions of Eradication.exe and emodpy-malaria (the versions used to calibrate ' - 'the model) and 2) reference datasets from real-world observations. \n\n' - 'This report was generated by running the malaria model validation workflow available at ' - 'https://github.com/InstituteforDiseaseModeling/malaria-model_validation. ' - 'Additional information on the reference datasets and on the simulation assumptions ' - 'are available from the repo in "Notes on reference datasets and simulation ' - 'assumptions.docx," and instructions on how to re-run the validation comparisons are ' - 'in the README file.', - None, - None]}, - 'Results summary': - {'Validation test performance': - ['The table below shows the number of sites examined in each validation relationship and ' - 'the fraction of sites that passed each of several tests for that validation ' - 'relationship.\n\n' - 'PLACEHOLDER TABLE: These are made up numbers!', - None, - '_plots/placeholder_results_summary_table.csv'], - 'Performance compared to model version from calibration': - ['The number of sites where the current simulation match to the reference data was found ' - 'to be substantially WORSE than the version used in calibration are shown in the table ' - 'below. \n\n' - 'PLACEHOLDER TABLE: These are made up numbers!', - None, - '_plots/placeholder_results_summary_sites_worse_than_calib.csv']}, - 'Visual comparison of reference data and matched simulations': - {'Incidence by age': - ['The plots below compare the age-incidence relationships from reference datasets and matched simulations.', - ['_plots/site_compare_incidence_age.png'], - None], - 'Prevalence by age': - ['The plots below compare the age-prevalence relationships from reference datasets and matched simulations.', - ['_plots/site_compare_prevalence_age.png'], - None], - 'Infectiousness to vectors': - ['Each of the below plot panels corresponds to a site. Within a plot panel, each row corresponds to an age group and each column corresponds to the month when sampling occurred.\n' - 'The x-axis shows the gametocyte density in an infection. The y-axis shows how infectious an individual is to mosquitoes. ' - 'The dot size shows how often a person of a given age and gametocyte density falls into each of the infectiousness bins ' - "(each column's dot sizes sum to one).\n" - "In the reference datasets, the sample size is sometimes quite small.", - ['_plots/%s' % ff for ff in os.listdir('./_plots') if re.match(r'site_compare_infectiousness.*\.', ff)], - None], - 'Duration of infection - all ages': - ['The plots below compare the duration over which individuals had positive tests in the reference dataset and matched simulations. The sampling design from the reference data was matched in the simulations.' - 'Observed infections are divided into two groups. "Censored" infections refer to infections where the individual was positive at the first or final survey of the study (so the infection may have extended beyond the period observed). ' - '"Start & finish observed" infections refer to infections were the individual was observed to have a negative test at the start and end of the infection. The two types of infection duration records are illustrated in the figure below.', - ['infection_duration_censoring_illustration.png', '_plots/site_compare_infect_duration_navrongo_2000.png'], - None], - 'Duration of infection - by age': - ['The plots below compare the duration over which individuals had positive tests in the reference dataset and matched simulations. The sampling design from the reference data was matched in the simulations.' - 'Observed infections are divided into two groups. "Censored" infections refer to infections where the individual was positive at the first or final survey of the study (so the infection may have extended beyond the period observed). ' - '"Start & finish observed" infections refer to infections were the individual was observed to have a negative test at the start and end of the infection. The two types of infection duration records are illustrated in the figure below.\n' - 'In the plot panel below, columns correspond to the age group (in years) and rows correspond to whether or not the start and end of the infection was observed.', - ['_plots/site_compare_infect_duration_age_navrongo_2000.png'], - None], - 'Asexual parasite density by age': - ['The plots below compare the distribution of parasite densities across ages and ' - 'seasons from reference datasets and matched simulations. Each plot panel corresponds ' - 'to a site. Note that some of the reference datasets have small sample sizes, ' - 'especially in the youngest age groups.', - ['_plots/%s' % ff for ff in os.listdir('./_plots') if re.match(r'site_compare_par_dens_age.*\.', ff)], - None], - 'Gametocyte density by age': - ['The plots below compare the distribution of gametocyte densities across ages and ' - 'seasons from reference datasets and matched simulations. Each plot panel corresponds ' - 'to a site. Note that some of the reference datasets have small sample sizes, ' - 'especially in the youngest age groups.', - ['_plots/%s' % ff for ff in os.listdir('./_plots') if re.match(r'site_compare_gamet_dens_age_.*\.', ff)], - None]}, - 'Additional comparisons of reference data and matched simulations': - {'Incidence by age': - ['Below, the plots show the correlation between the simulation and reference incidence ' - 'in each age group (left plot) and the correlation between the simulation and ' - 'reference slopes when moving between the incidence in one age group and the incidence ' - 'in the next oldest age group (right plot).', - ['_plots/scatter_regression_incidence_age.png'], - '_plots/comparison_metric_table_incidence_age.csv'], - 'Prevalence by age': - ['Below, the plots show the correlation between the simulation and reference prevalence ' - 'in each age group (left plot) and the correlation between the simulation and ' - 'reference slopes when moving between the prevalence in one age group and the prevalence ' - 'in the next oldest age group (right plot).', - ['_plots/scatter_regression_prevalence_age.png'], - '_plots/comparison_metric_table_prevalence_age.csv'], - # 'Infectiousness to vectors': - # ['...', - # None, - # None], - # 'Duration of infection': - # ['...', - # None, - # None], - # 'Asexual parasite density by age': - # ['...', - # None, - # None], - # 'Gametocyte density by age': - # ['...', - # None, - # None] - }, - # 'Visual comparison of reference data and simulation sweeps': - # {'': - # ['...\n...', - # None, - # None], - # '': - # ['...', - # None, - # None]}, - 'Comparisons from prior EMOD publications': - {'Incidence and prevalence by age': - ['The top plots come from McCarthy et al. 2015 and the bottom plots come from Selvaraj et al. 2018.', - ['_prior_recalibration_published_figures/McCarthy_2015_age_pfpr_incidence.png', - '_prior_recalibration_published_figures/Selvaraj_2018_age_incidence.png'], - None], - 'Infectiousness to vectors': - ['The following plot comes from Selvaraj et al. 2018', - ['_prior_recalibration_published_figures/Selvaraj_2018_infectiousness.png'], - None], - 'Parasite densities': - ['The following plots come from Selvaraj et al. 2018', - ['_prior_recalibration_published_figures/Selvaraj_2018_parasite_densities.png', - '_prior_recalibration_published_figures/Selvaraj_2018_parasite_densities2.png', - '_prior_recalibration_published_figures/Selvaraj_2018_parasite_densities3.png'], - None]}, - } + {'Background': + [ + 'The goal of this report is to help users quickly identify whether updated versions of ' + 'the malaria model are still well-calibrated to capture a range of relevant real-world ' + 'malaria observations. \n\n' + 'The figures and tables compare simulation output generated with a particular version of ' + 'the Eradication.exe and of emodpy-malaria with 1) the simulation results generated by ' + 'earlier versions of Eradication.exe and emodpy-malaria (the versions used to calibrate ' + 'the model) and 2) reference datasets from real-world observations. \n\n' + 'This report was generated by running the malaria model validation workflow available at ' + 'https://github.com/InstituteforDiseaseModeling/malaria-model_validation. ' + 'Additional information on the reference datasets and on the simulation assumptions ' + 'are available from the repo in "Notes on reference datasets and simulation ' + 'assumptions.docx," and instructions on how to re-run the validation comparisons are ' + 'in the README file.', + None, + None]}, + 'Results summary': + {'Validation test performance': + [ + 'The table below shows the number of sites examined in each validation relationship and ' + 'the fraction of sites that passed each of several tests for that validation ' + 'relationship.\n\n' + 'PLACEHOLDER TABLE: These are made up numbers!', + None, + None], + # '_plots/placeholder_results_summary_table.csv'], + 'Performance compared to model version from calibration': + [ + 'The number of sites where the current simulation match to the reference data was found ' + 'to be substantially WORSE than the version used in calibration are shown in the table ' + 'below. \n\n' + 'PLACEHOLDER TABLE: These are made up numbers!', + None, + None]}, + # '_plots/placeholder_results_summary_sites_worse_than_calib.csv']}, + 'Visual comparison of reference data and matched simulations': + {'Incidence by age': + [ + 'The plots below compare the age-incidence relationships from reference datasets and matched simulations.', + ['_plots/site_compare_incidence_age.png'], + None], + 'Prevalence by age': + [ + 'The plots below compare the age-prevalence relationships from reference datasets and matched simulations.', + ['_plots/site_compare_prevalence_age.png'], + None], + 'Infectiousness to vectors': + [ + 'Each of the below plot panels corresponds to a site. Within a plot panel, each row corresponds to an age group and each column corresponds to the month when sampling occurred.\n' + 'The x-axis shows the gametocyte density in an infection. The y-axis shows how infectious an individual is to mosquitoes. ' + 'The dot size shows how often a person of a given age and gametocyte density falls into each of the infectiousness bins ' + "(each column's dot sizes sum to one).\n" + "In the reference datasets, the sample size is sometimes quite small.", + ['_plots/%s' % ff for ff in os.listdir('./_plots') if + re.match(r'site_compare_infectiousness.*\.', ff)], + None], + 'Duration of infection - all ages': + [ + 'The plots below compare the duration over which individuals had positive tests in the reference dataset and matched simulations. The sampling design from the reference data was matched in the simulations.' + 'Observed infections are divided into two groups. "Censored" infections refer to infections where the individual was positive at the first or final survey of the study (so the infection may have extended beyond the period observed). ' + '"Start & finish observed" infections refer to infections were the individual was observed to have a negative test at the start and end of the infection. The two types of infection duration records are illustrated in the figure below.', + ['infection_duration_censoring_illustration.png', + '_plots/site_compare_infect_duration_navrongo_2000.png'], + None], + 'Duration of infection - by age': + [ + 'The plots below compare the duration over which individuals had positive tests in the reference dataset and matched simulations. The sampling design from the reference data was matched in the simulations.' + 'Observed infections are divided into two groups. "Censored" infections refer to infections where the individual was positive at the first or final survey of the study (so the infection may have extended beyond the period observed). ' + '"Start & finish observed" infections refer to infections were the individual was observed to have a negative test at the start and end of the infection. The two types of infection duration records are illustrated in the figure below.\n' + 'In the plot panel below, columns correspond to the age group (in years) and rows correspond to whether or not the start and end of the infection was observed.', + ['_plots/site_compare_age_infect_duration_navrongo_2000.png'], + None], + 'Asexual parasite density by age': + ['The plots below compare the distribution of parasite densities across ages and ' + 'seasons from reference datasets and matched simulations. Each plot panel corresponds ' + 'to a site. Note that some of the reference datasets have small sample sizes, ' + 'especially in the youngest age groups.', + ['_plots/%s' % ff for ff in os.listdir('./_plots') if + re.match(r'site_compare_par_dens_age.*\.', ff)], + None], + 'Gametocyte density by age': + ['The plots below compare the distribution of gametocyte densities across ages and ' + 'seasons from reference datasets and matched simulations. Each plot panel corresponds ' + 'to a site. Note that some of the reference datasets have small sample sizes, ' + 'especially in the youngest age groups.', + ['_plots/%s' % ff for ff in os.listdir('./_plots') if + re.match(r'site_compare_gamet_dens_age_.*\.', ff)], + None]}, + 'Additional comparisons of reference data and matched simulations': + {'Incidence by age': + ['Below, the plots show the correlation between the simulation and reference incidence ' + 'in each age group (left plot) and the correlation between the simulation and ' + 'reference slopes when moving between the incidence in one age group and the incidence ' + 'in the next oldest age group (right plot).', + ['_plots/scatter_regression_incidence_age.png'], + '_plots/comparison_metric_table_incidence_age.csv'], + 'Prevalence by age': + [ + 'Below, the plots show the correlation between the simulation and reference prevalence ' + 'in each age group (left plot) and the correlation between the simulation and ' + 'reference slopes when moving between the prevalence in one age group and the prevalence ' + 'in the next oldest age group (right plot).', + ['_plots/scatter_regression_prevalence_age.png'], + '_plots/comparison_metric_table_prevalence_age.csv'], + # 'Infectiousness to vectors': + # ['...', + # None, + # None], + # 'Duration of infection': + # ['...', + # None, + # None], + # 'Asexual parasite density by age': + # ['...', + # None, + # None], + # 'Gametocyte density by age': + # ['...', + # None, + # None] + }, + # 'Visual comparison of reference data and simulation sweeps': + # {'': + # ['...\n...', + # None, + # None], + # '': + # ['...', + # None, + # None]}, + + # 'Comparisons from prior EMOD publications': + # {'Incidence and prevalence by age': + # ['The top plots come from McCarthy et al. 2015 and the bottom plots come from Selvaraj et al. 2018.', + # ['_prior_recalibration_published_figures/McCarthy_2015_age_pfpr_incidence.png', + # '_prior_recalibration_published_figures/Selvaraj_2018_age_incidence.png'], + # None], + # 'Infectiousness to vectors': + # ['The following plot comes from Selvaraj et al. 2018', + # ['_prior_recalibration_published_figures/Selvaraj_2018_infectiousness.png'], + # None], + # 'Parasite densities': + # ['The following plots come from Selvaraj et al. 2018', + # ['_prior_recalibration_published_figures/Selvaraj_2018_parasite_densities.png', + # '_prior_recalibration_published_figures/Selvaraj_2018_parasite_densities2.png', + # '_prior_recalibration_published_figures/Selvaraj_2018_parasite_densities3.png'], + # None]}, +} for i, (section_tile, section_content) in enumerate(section_and_content.items()): pdf.start_section(f"{i + 1}. {section_tile}") subsection_names, text_and_images = section_content.keys(), section_content.values() for j, subsection_name, text_and_images in zip(range(len(subsection_names)), subsection_names, text_and_images): section_text, image_list, table_name = text_and_images - pdf.start_section(f'{i+1}.{j+1} ' + subsection_name, level=1) + pdf.start_section(f'{i + 1}.{j + 1} ' + subsection_name, level=1) new_section( pdf, section_text, diff --git a/report/create_pdf_report_draft.py b/report/create_pdf_report_draft.py index ec7df7d..2a776e3 100644 --- a/report/create_pdf_report_draft.py +++ b/report/create_pdf_report_draft.py @@ -1,8 +1,6 @@ import os import re -import pandas as pd from fpdf import FPDF -import simulations.manifest as manifest title = 'Malaria Model Validation Results' section_title_size = 18 @@ -11,9 +9,9 @@ paragraph_spacing = 6 figure_width = 160 + class PDF(FPDF): def header(self): - # # Arial bold 15 self.set_font('Arial', 'B', 15) # # Move to the right @@ -36,7 +34,6 @@ def header(self): # # Line break # self.ln(10) - # Page footer def footer(self): # Position at 1.5 cm from bottom @@ -48,14 +45,15 @@ def footer(self): # Logo self.image('IDMlogo_small.png', 10, 282, 25) + # Instantiation of inherited class pdf = PDF() pdf.alias_nb_pages() pdf.add_page() # calculate page width -effective_page_width = pdf.w - 2*pdf.l_margin +effective_page_width = pdf.w - 2 * pdf.l_margin -############ title ############ +# _______ title _________ # Move to the right pdf.cell(80) # Title @@ -76,7 +74,7 @@ def footer(self): # Line break pdf.ln(10) -# ############ overview of validation process ############ +#________ overview of validation process ____________ # pdf.set_font('Times', '', section_title_size) # pdf.set_text_color(44, 147, 194) # pdf.cell(0, 10, 'Validation overview', 0, 1) @@ -86,18 +84,19 @@ def footer(self): # # pdf.image('malaria_network_schematic.png', w=figure_width) # pdf.ln(10) -############ brief summary ############ +#__________ brief summary ________________ pdf.set_font('Times', '', section_title_size) pdf.set_text_color(44, 147, 194) pdf.cell(0, 10, 'Results summary', 0, 1) pdf.set_font('Times', '', body_text_size) pdf.set_text_color(0, 0, 0) -pdf.multi_cell(0, paragraph_spacing, 'Include some summary metrics on how the model performed for each of the validation relationships.', 0, 1) +pdf.multi_cell(0, paragraph_spacing, + 'Include some summary metrics on how the model performed for each of the validation relationships.', 0, + 1) pdf.multi_cell(0, paragraph_spacing, 'PLACEHOLDER: Insert a table here.', 0, 1) pdf.ln(10) - -############ age-incidence ############ +#______ age-incidence _________ pdf.set_font('Times', '', section_title_size) pdf.set_text_color(44, 147, 194) pdf.cell(0, 10, 'Incidence by age', 0, 1) @@ -106,13 +105,19 @@ def footer(self): pdf.cell(0, 10, 'Matched-site simulations', 0, 1) pdf.set_font('Times', '', body_text_size) pdf.set_text_color(0, 0, 0) -pdf.multi_cell(0, paragraph_spacing, 'The plots below compare the age-incidence relationships from reference datasets and matched simulations.', 0, 1) +pdf.multi_cell(0, paragraph_spacing, + 'The plots below compare the age-incidence relationships from reference datasets and matched simulations.', + 0, 1) pdf.image('_plots/site_compare_incidence_age.png', w=figure_width) pdf.ln(10) -pdf.multi_cell(0, paragraph_spacing, 'Next we look at the correlation between simulation and reference points (left plot) and at the correlation between the slopes of the lines between data points (right plot).', 0, 1) +pdf.multi_cell(0, paragraph_spacing, + 'Next we look at the correlation between simulation and reference points (left plot) and at the correlation between the slopes of the lines between data points (right plot).', + 0, 1) pdf.image('_plots/scatter_regression_incidence_age.png', w=figure_width) pdf.ln(10) -pdf.multi_cell(0, paragraph_spacing, 'The table below shows some summary metrics describing the match between the reference and simulation datasets.', 0, 1) +pdf.multi_cell(0, paragraph_spacing, + 'The table below shows some summary metrics describing the match between the reference and simulation datasets.', + 0, 1) pdf.multi_cell(0, paragraph_spacing, '{PLACEHOLDER: Insert a table with quantitative comparison results here.}', 0, 1) # df = pd.read_csv('_plots/comparison_metric_table_incidence_age.csv') # data = df.to_dict('records') @@ -122,9 +127,9 @@ def footer(self): # for k, v in row.items(): # pdf.multi_cell(col_width, line_height, str(v), border=1, align="L", max_line_height=body_text_size) # ln=3, # pdf.ln(line_height) - # for datum in row: - # pdf.multi_cell(col_width, line_height, datum, border=1) #, new_x="RIGHT", new_y="TOP", max_line_height=body_text_size) - # pdf.ln(line_height) +# for datum in row: +# pdf.multi_cell(col_width, line_height, datum, border=1) #, new_x="RIGHT", new_y="TOP", max_line_height=body_text_size) +# pdf.ln(line_height) # site sweeps pdf.set_text_color(44, 147, 194) @@ -132,15 +137,13 @@ def footer(self): pdf.cell(0, 10, 'Sweep-site simulations', 0, 1) pdf.set_font('Times', '', body_text_size) pdf.set_text_color(0, 0, 0) -pdf.multi_cell(0, paragraph_spacing, 'The plots below show a wider range of reference datasets plotted against a sweep of simulation-site characteristics.', 0, 1) +pdf.multi_cell(0, paragraph_spacing, + 'The plots below show a wider range of reference datasets plotted against a sweep of simulation-site characteristics.', + 0, 1) pdf.multi_cell(0, paragraph_spacing, '{PLACEHOLDER: Insert sweep plots here.}', 0, 1) pdf.ln(10) - - - - -############ age-prevalence ############ +#__________ age-prevalence __________ pdf.set_font('Times', '', section_title_size) pdf.set_text_color(44, 147, 194) pdf.cell(0, 10, 'Prevalence by age', 0, 1) @@ -149,13 +152,19 @@ def footer(self): pdf.cell(0, 10, 'Matched-site simulations', 0, 1) pdf.set_font('Times', '', body_text_size) pdf.set_text_color(0, 0, 0) -pdf.multi_cell(0, paragraph_spacing, 'The plots below compare the age-prevalence relationships from reference datasets and matched simulations.', 0, 1) +pdf.multi_cell(0, paragraph_spacing, + 'The plots below compare the age-prevalence relationships from reference datasets and matched simulations.', + 0, 1) pdf.image('_plots/site_compare_prevalence_age.png', w=figure_width) pdf.ln(10) -pdf.multi_cell(0, paragraph_spacing, 'Next we look at the correlation between simulation and reference points (left plot) and at the correlation between the slopes of the lines between data points (right plot).', 0, 1) +pdf.multi_cell(0, paragraph_spacing, + 'Next we look at the correlation between simulation and reference points (left plot) and at the correlation between the slopes of the lines between data points (right plot).', + 0, 1) pdf.image('_plots/scatter_regression_prevalence_age.png', w=figure_width) pdf.ln(10) -pdf.multi_cell(0, paragraph_spacing, 'The table below shows some summary metrics describing the match between the reference and simulation datasets.', 0, 1) +pdf.multi_cell(0, paragraph_spacing, + 'The table below shows some summary metrics describing the match between the reference and simulation datasets.', + 0, 1) pdf.multi_cell(0, paragraph_spacing, '{PLACEHOLDER: Insert a table with quantitative comparison results here.}', 0, 1) # site sweeps @@ -164,14 +173,13 @@ def footer(self): pdf.cell(0, 10, 'Sweep-site simulations', 0, 1) pdf.set_font('Times', '', body_text_size) pdf.set_text_color(0, 0, 0) -pdf.multi_cell(0, paragraph_spacing, 'The plots below show a wider range of reference datasets plotted against a sweep of simulation-site characteristics.', 0, 1) +pdf.multi_cell(0, paragraph_spacing, + 'The plots below show a wider range of reference datasets plotted against a sweep of simulation-site characteristics.', + 0, 1) pdf.multi_cell(0, paragraph_spacing, '{PLACEHOLDER: Insert sweep plots here.}', 0, 1) pdf.ln(10) - - - -############ infectiousness ############ +#____________ infectiousness _________ pdf.set_font('Times', '', section_title_size) pdf.set_text_color(44, 147, 194) pdf.cell(0, 10, 'Infectiousness to vectors', 0, 1) @@ -183,14 +191,11 @@ def footer(self): site_name = ff.replace('.png', '') site_name = site_name.replace('site_compare_infectiousness_', '') pdf.multi_cell(0, 10, site_name, 0, 1) - pdf.image('_plots/%s' % ff, w=figure_width*3/4) + pdf.image('_plots/%s' % ff, w=figure_width * 3 / 4) pdf.multi_cell(0, 10, 'Insert a table with quantitative comparison results here.', 0, 1) pdf.ln(10) - - - -############ infection duration ############ +#__________ infection duration __________ pdf.set_font('Times', '', section_title_size) pdf.set_text_color(44, 147, 194) pdf.cell(0, 10, 'Duration of infection', 0, 1) @@ -199,24 +204,26 @@ def footer(self): pdf.cell(0, 10, 'Across individuals of all ages', 0, 1) pdf.set_font('Times', '', body_text_size) pdf.set_text_color(0, 0, 0) -pdf.multi_cell(0, paragraph_spacing, 'The plots below compare the duration over which individuals had positive tests in the reference dataset and matched simulations. The sampling design from the reference data was matched in the simulations.' - 'Observed infections are divided into two groups. "Censored" infections refer to infections where the individual was positive at the first or final survey of the study (so the infection may have extended beyond the period observed). ' - '"Start & finish observed" infections refer to infections were the individual was observed to have a negative test at the start and end of the infection.', 0, 1) +pdf.multi_cell(0, paragraph_spacing, + 'The plots below compare the duration over which individuals had positive tests in the reference dataset and matched simulations. The sampling design from the reference data was matched in the simulations.' + 'Observed infections are divided into two groups. "Censored" infections refer to infections where the individual was positive at the first or final survey of the study (so the infection may have extended beyond the period observed). ' + '"Start & finish observed" infections refer to infections were the individual was observed to have a negative test at the start and end of the infection.', + 0, 1) pdf.image('_plots/site_compare_infect_duration_navrongo_2000.png', w=figure_width) pdf.ln(10) # all ages pdf.cell(0, 10, 'Infection duration distribution by age', 0, 1) pdf.set_font('Times', '', body_text_size) pdf.set_text_color(0, 0, 0) -pdf.multi_cell(0, paragraph_spacing, 'The plots below compare the duration over which individuals had positive tests in the reference dataset and matched simulations. The sampling design from the reference data was matched in the simulations.' - 'Observed infections are divided into two groups. "Censored" infections refer to infections where the individual was positive at the first or final survey of the study (so the infection may have extended beyond the period observed). ' - '"Start & finish observed" infections refer to infections were the individual was observed to have a negative test at the start and end of the infection.', 0, 1) +pdf.multi_cell(0, paragraph_spacing, + 'The plots below compare the duration over which individuals had positive tests in the reference dataset and matched simulations. The sampling design from the reference data was matched in the simulations.' + 'Observed infections are divided into two groups. "Censored" infections refer to infections where the individual was positive at the first or final survey of the study (so the infection may have extended beyond the period observed). ' + '"Start & finish observed" infections refer to infections were the individual was observed to have a negative test at the start and end of the infection.', + 0, 1) pdf.image('_plots/site_compare_age_infect_duration_navrongo_2000.png', w=figure_width) pdf.ln(10) - - -############ age-parasite density ############ +#__________ age-parasite density ___________ pdf.set_font('Times', '', section_title_size) pdf.set_text_color(44, 147, 194) pdf.cell(0, 10, 'Asexual parasite density by age', 0, 1) @@ -228,12 +235,11 @@ def footer(self): site_name = ff.replace('.png', '') site_name = site_name.replace('site_compare_par_dens_age_', '') pdf.multi_cell(0, 10, site_name, 0, 1) - pdf.image('_plots/%s' % ff, w=figure_width*1/2) + pdf.image('_plots/%s' % ff, w=figure_width * 1 / 2) pdf.multi_cell(0, 10, 'Insert a table with quantitative comparison results here.', 0, 1) pdf.ln(10) - -############ age-gametocyte density ############ +#__________ age-gametocyte density ___________ pdf.set_font('Times', '', section_title_size) pdf.set_text_color(44, 147, 194) pdf.cell(0, 10, 'Gametocyte density by age', 0, 1) @@ -245,11 +251,8 @@ def footer(self): site_name = ff.replace('.png', '') site_name = site_name.replace('site_compare_gamet_dens_age_', '') pdf.multi_cell(0, 10, site_name, 0, 1) - pdf.image('_plots/%s' % ff, w=figure_width*1/2) + pdf.image('_plots/%s' % ff, w=figure_width * 1 / 2) pdf.multi_cell(0, 10, 'Insert a table with quantitative comparison results here.', 0, 1) pdf.ln(10) - - - -pdf.output('Malaria_model_validation_output2.pdf', 'F') \ No newline at end of file +pdf.output('Malaria_model_validation_output2.pdf', 'F') diff --git a/requirements.txt b/requirements.txt index 20fbe7a..9586903 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ fpdf2 seaborn sklearn scipy -snakemake \ No newline at end of file +snakemake +plotnine \ No newline at end of file diff --git a/setup.py b/setup.py index 46805f4..cd2978a 100644 --- a/setup.py +++ b/setup.py @@ -56,6 +56,8 @@ def load_arguments_from_file(filepath, arguments_list): "Programming Language :: Python :: 3", "Operating System :: OS Independent" ], + package_data={'simulation_inputs': ['*/*.csv', '*.csv'], + 'simulations': ['snakefile_bak']}, include_package_data=True, exclude_package_data={'': ['tests']}, package_dir={}, diff --git a/simulations/analyzers/AnnualSummaryReportAnalyzer.py b/simulations/analyzers/AnnualSummaryReportAnalyzer.py index 054a7ae..8b64b26 100644 --- a/simulations/analyzers/AnnualSummaryReportAnalyzer.py +++ b/simulations/analyzers/AnnualSummaryReportAnalyzer.py @@ -1,4 +1,3 @@ -import json import os import pandas as pd import numpy as np @@ -15,11 +14,11 @@ from idmtools.core import ItemType from idmtools.core.platform_factory import Platform + mpl.use('Agg') class AnnualSummaryReportAnalyzer(BaseAnalyzer): - def __init__(self, expt_name, sweep_variables=None, working_dir="."): super().__init__(filenames=["output\\MalariaSummaryReport_Annual_Report.json"]) self.expt_name = expt_name @@ -88,7 +87,6 @@ def reduce(self, all_data: Dict[Union[IWorkflowItem, Simulation], Any]) -> Any: if __name__ == '__main__': - # Set the platform where you want to run your analysis # In this case we are running in BELEGOST since the Work Item we are analyzing was run on COMPS logger = getLogger() diff --git a/simulations/analyzers/InfectiousnessByParDensAgeAnalyzer.py b/simulations/analyzers/InfectiousnessByParDensAgeAnalyzer.py index c136aed..8df0b9b 100644 --- a/simulations/analyzers/InfectiousnessByParDensAgeAnalyzer.py +++ b/simulations/analyzers/InfectiousnessByParDensAgeAnalyzer.py @@ -1,8 +1,5 @@ import os -import warnings import pandas as pd -import numpy as np -from typing import Dict, Any, Union from logging import getLogger from idmtools.core.platform_factory import Platform from idmtools.analysis.analyze_manager import AnalyzeManager @@ -11,11 +8,11 @@ class InfectiousnessByParDensAgeAnalyzer(BaseAnalyzer): - def __init__(self, expt_name, sweep_variables=None, working_dir=".", start_year=0, end_year=65): - super(InfectiousnessByParDensAgeAnalyzer, self).__init__(working_dir=working_dir, - filenames=["output/MalariaSummaryReport_Infectiousness_Monthly_Report_%d.json" % x - for x in range(start_year, end_year)]) + super(InfectiousnessByParDensAgeAnalyzer, self).__init__( + working_dir=working_dir, + filenames=["output/MalariaSummaryReport_Infectiousness_Monthly_Report_%d.json" % x + for x in range(start_year, end_year)]) self.sweep_variables = sweep_variables or ["Run_Number"] self.expt_name = expt_name self.start_year = start_year @@ -30,9 +27,7 @@ def initialize(self): if not os.path.exists(os.path.join(self.working_dir, self.expt_name)): os.mkdir(os.path.join(self.working_dir, self.expt_name)) - def map(self, data, simulation): - agebins = data[self.filenames[0]]['Metadata']['Age Bins'] gam_bins = data[self.filenames[0]]['Metadata']['Gametocytemia Bins'] frac_infected_bins = data[self.filenames[0]]['Metadata']['Infectiousness Bins'] @@ -62,9 +57,7 @@ def map(self, data, simulation): adf[sweep_var] = simulation.tags[sweep_var] return adf - def reduce(self, all_data): - selected = [data for sim, data in all_data.items()] if len(selected) == 0: print("No data have been returned... Exiting...") @@ -76,7 +69,6 @@ def reduce(self, all_data): if __name__ == '__main__': - # Set the experiment id you want to analyze experiment_id = 'b7126585-30b6-ec11-a9f6-9440c9be2c51' end_year = 65 diff --git a/simulations/analyzers/InsetChartAnalyzer.py b/simulations/analyzers/InsetChartAnalyzer.py index be3ebc0..80da6c1 100644 --- a/simulations/analyzers/InsetChartAnalyzer.py +++ b/simulations/analyzers/InsetChartAnalyzer.py @@ -1,4 +1,3 @@ -import json import os import pandas as pd import numpy as np @@ -12,18 +11,19 @@ from logging import getLogger from idmtools.analysis.analyze_manager import AnalyzeManager -from idmtools.analysis.csv_analyzer import CSVAnalyzer from idmtools.core import ItemType from idmtools.core.platform_factory import Platform + mpl.use('Agg') class InsetChartAnalyzer(BaseAnalyzer): - - def __init__(self, title='idm', tags=['Baseline', 'Run_Number', 'Larval_Capacity', 'Transmission_To_Human', - 'Infected_Progress']): + def __init__(self, title='idm', tags=None): super().__init__(filenames=["output\\InsetChart.json"]) + if tags is None: + tags = ['Baseline', 'Run_Number', 'Larval_Capacity', 'Transmission_To_Human', + 'Infected_Progress'] self.tags = tags print(title) @@ -103,7 +103,6 @@ def reduce(self, all_data: Dict[Union[IWorkflowItem, Simulation], Any]) -> Any: if __name__ == '__main__': - # Set the platform where you want to run your analysis # In this case we are running in BELEGOST since the Work Item we are analyzing was run on COMPS logger = getLogger() diff --git a/simulations/analyzers/MonthlySummaryReportAnalyzer.py b/simulations/analyzers/MonthlySummaryReportAnalyzer.py index a14b707..a0ec493 100644 --- a/simulations/analyzers/MonthlySummaryReportAnalyzer.py +++ b/simulations/analyzers/MonthlySummaryReportAnalyzer.py @@ -1,8 +1,5 @@ import os -import warnings import pandas as pd -import numpy as np -from typing import Dict, Any, Union from logging import getLogger from idmtools.core.platform_factory import Platform from idmtools.analysis.analyze_manager import AnalyzeManager @@ -11,11 +8,11 @@ class MonthlySummaryReportAnalyzer(BaseAnalyzer): - def __init__(self, expt_name, sweep_variables=None, working_dir=".", start_year=0, end_year=65): super(MonthlySummaryReportAnalyzer, self).__init__(working_dir=working_dir, - filenames=["output/MalariaSummaryReport_Monthly_Report_%d.json" % x - for x in range(start_year, end_year)]) + filenames=[ + "output/MalariaSummaryReport_Monthly_Report_%d.json" % x + for x in range(start_year, end_year)]) self.sweep_variables = sweep_variables or ["Run_Number"] self.expt_name = expt_name self.start_year = start_year @@ -30,9 +27,7 @@ def initialize(self): if not os.path.exists(os.path.join(self.working_dir, self.expt_name)): os.mkdir(os.path.join(self.working_dir, self.expt_name)) - def map(self, data, simulation): - agebins = data[self.filenames[0]]['Metadata']['Age Bins'] adf = pd.DataFrame() @@ -62,9 +57,7 @@ def map(self, data, simulation): adf[sweep_var] = simulation.tags[sweep_var] return adf - def reduce(self, all_data): - selected = [data for sim, data in all_data.items()] if len(selected) == 0: print("No data have been returned... Exiting...") @@ -76,7 +69,6 @@ def reduce(self, all_data): if __name__ == '__main__': - # Set the experiment id you want to analyze experiment_id = 'b7126585-30b6-ec11-a9f6-9440c9be2c51' end_year = 65 diff --git a/simulations/analyzers/ParDensAgeAnalyzer.py b/simulations/analyzers/ParDensAgeAnalyzer.py index 0ff309f..402f4a6 100644 --- a/simulations/analyzers/ParDensAgeAnalyzer.py +++ b/simulations/analyzers/ParDensAgeAnalyzer.py @@ -1,8 +1,6 @@ import os import warnings import pandas as pd -import numpy as np -from typing import Dict, Any, Union from logging import getLogger from idmtools.core.platform_factory import Platform from idmtools.analysis.analyze_manager import AnalyzeManager @@ -11,7 +9,6 @@ class ParDensAgeAnalyzer(BaseAnalyzer): - def __init__(self, expt_name, sweep_variables=None, working_dir=".", start_year=0, end_year=65): super(ParDensAgeAnalyzer, self).__init__(working_dir=working_dir, filenames=["output/MalariaSummaryReport_Monthly_Report_%d.json" % x diff --git a/simulations/generate_site_rules.py b/simulations/generate_site_rules.py index abacf1c..6db725d 100644 --- a/simulations/generate_site_rules.py +++ b/simulations/generate_site_rules.py @@ -5,7 +5,6 @@ def generate_rule(site, n, script_name="run_sims.py"): - exp_id_file = get_comps_id_filename(site=site) analyzer_id_file = get_comps_id_filename(site=site, level=2) download_id_file = get_comps_id_filename(site=site, level=3) @@ -32,11 +31,11 @@ def generate_rule(site, n, script_name="run_sims.py"): return rule -def run(snakefile='snakefile_bak'): - with open(snakefile, 'r') as file: +def run(snakefile_bak='snakefile_bak', snakefile='snakefile'): + with open(snakefile_bak, 'r') as file: snakefile_str = file.read() snakefile_str = delete_old_rules(snakefile_str) - write_rules(snakefile_str, 'snakefile') + write_rules(snakefile_str, snakefile) def delete_old_rules(snakefile_str): diff --git a/simulations/helpers.py b/simulations/helpers.py index a4efae7..2647247 100644 --- a/simulations/helpers.py +++ b/simulations/helpers.py @@ -1,10 +1,8 @@ -import math import os import warnings import pandas as pd import numpy as np from functools import partial -from pathlib import Path import emod_api.demographics.Demographics as Demographics from emodpy_malaria.interventions.diag_survey import add_diagnostic_survey diff --git a/simulations/run_sims.py b/simulations/run_sims.py index 6f5bb21..a567c8b 100644 --- a/simulations/run_sims.py +++ b/simulations/run_sims.py @@ -10,12 +10,14 @@ from emodpy.emod_task import EMODTask from emodpy_malaria.reporters.builtin import add_report_intervention_pop_avg -from simulations.helpers import * +from simulations.helpers import set_param_fn, update_sim_random_seed, set_simulation_scenario_for_characteristic_site, \ + set_simulation_scenario_for_matched_site, get_comps_id_filename + import simulations.params as params from simulations import manifest as manifest -def general_sim(site=None, nSims=1, characteristic=False, priority=manifest.priority): +def submit_sim(site=None, nSims=1, characteristic=False, priority=manifest.priority): """ This function is designed to be a parameterized version of the sequence of things we do every time we run an emod experiment. @@ -27,16 +29,15 @@ def general_sim(site=None, nSims=1, characteristic=False, priority=manifest.prio # create EMODTask print("Creating EMODTask (from files)...") - - task = EMODTask.from_default2( - config_path="my_config.json", - eradication_path=str(manifest.eradication_path), - ep4_custom_cb=None, - campaign_builder=None, - schema_path=str(manifest.schema_file), - param_custom_cb=set_param_fn, - demog_builder=None, - ) + + task = EMODTask.from_default2(config_path="my_config.json", + eradication_path=str(manifest.eradication_path), + ep4_custom_cb=None, + campaign_builder=None, + schema_path=str(manifest.schema_file), + param_custom_cb=set_param_fn, + demog_builder=None, + ) # add html intervention-visualizer asset to COMPS add_inter_visualizer = False @@ -47,7 +48,6 @@ def general_sim(site=None, nSims=1, characteristic=False, priority=manifest.prio # Create simulation sweep with builder builder = SimulationBuilder() - exp_name = "validation_" + site # Sweep run number @@ -62,7 +62,7 @@ def general_sim(site=None, nSims=1, characteristic=False, priority=manifest.prio builder.add_sweep_definition(set_simulation_scenario_for_matched_site, [site]) # create experiment from builder - print(f"Prompting for COMPS creds if necessary...") + print("Prompting for COMPS creds if necessary...") experiment = Experiment.from_builder(builder, task, name=exp_name) # The last step is to call run() on the ExperimentManager to run the simulations. @@ -83,9 +83,10 @@ def general_sim(site=None, nSims=1, characteristic=False, priority=manifest.prio # print("Retrieving Eradication and schema.json from Bamboo...") # get_model_files( plan, manifest ) # print("...done.") - + parser = argparse.ArgumentParser(description='Process site name') - parser.add_argument('--site', '-s', type=str, help='site name', default="test_site")#params.sites[0]) # todo: not sure if we want to make this required argument + parser.add_argument('--site', '-s', type=str, help='site name', + default="test_site") # params.sites[0]) # todo: not sure if we want to make this required argument parser.add_argument('--nSims', '-n', type=int, help='number of simulations', default=params.nSims) parser.add_argument('--characteristic', '-c', action='store_true', help='site-characteristic sweeps') parser.add_argument('--priority', '-p', type=str, @@ -94,4 +95,4 @@ def general_sim(site=None, nSims=1, characteristic=False, priority=manifest.prio args = parser.parse_args() - general_sim(site=args.site, nSims=args.nSims, characteristic=args.characteristic, priority=args.priority) + submit_sim(site=args.site, nSims=args.nSims, characteristic=args.characteristic, priority=args.priority) diff --git a/simulations/snakefile b/simulations/snakefile index 67b163a..16ef2ca 100644 --- a/simulations/snakefile +++ b/simulations/snakefile @@ -103,7 +103,7 @@ rule chonyi_1999_analyzer: rule chonyi_1999_download: input: 'COMPS_ID/chonyi_1999_analyzers' - output: 'COMPS_ID/chonyi_1999_done' + output: 'COMPS_ID/chonyi_1999_download' run: shell(get_command(script="download_wi.py", site="chonyi_1999")) @@ -122,7 +122,7 @@ rule ngerenya_1999_analyzer: rule ngerenya_1999_download: input: 'COMPS_ID/ngerenya_1999_analyzers' - output: 'COMPS_ID/ngerenya_1999_done' + output: 'COMPS_ID/ngerenya_1999_download' run: shell(get_command(script="download_wi.py", site="ngerenya_1999")) @@ -141,7 +141,7 @@ rule dielmo_1990_analyzer: rule dielmo_1990_download: input: 'COMPS_ID/dielmo_1990_analyzers' - output: 'COMPS_ID/dielmo_1990_done' + output: 'COMPS_ID/dielmo_1990_download' run: shell(get_command(script="download_wi.py", site="dielmo_1990")) @@ -160,7 +160,7 @@ rule ndiop_1993_analyzer: rule ndiop_1993_download: input: 'COMPS_ID/ndiop_1993_analyzers' - output: 'COMPS_ID/ndiop_1993_done' + output: 'COMPS_ID/ndiop_1993_download' run: shell(get_command(script="download_wi.py", site="ndiop_1993")) @@ -179,7 +179,7 @@ rule ebolakounou_1997_analyzer: rule ebolakounou_1997_download: input: 'COMPS_ID/ebolakounou_1997_analyzers' - output: 'COMPS_ID/ebolakounou_1997_done' + output: 'COMPS_ID/ebolakounou_1997_download' run: shell(get_command(script="download_wi.py", site="ebolakounou_1997")) @@ -198,7 +198,7 @@ rule koundou_1997_analyzer: rule koundou_1997_download: input: 'COMPS_ID/koundou_1997_analyzers' - output: 'COMPS_ID/koundou_1997_done' + output: 'COMPS_ID/koundou_1997_download' run: shell(get_command(script="download_wi.py", site="koundou_1997")) @@ -217,7 +217,7 @@ rule dongubougou_1999_analyzer: rule dongubougou_1999_download: input: 'COMPS_ID/dongubougou_1999_analyzers' - output: 'COMPS_ID/dongubougou_1999_done' + output: 'COMPS_ID/dongubougou_1999_download' run: shell(get_command(script="download_wi.py", site="dongubougou_1999")) @@ -236,7 +236,7 @@ rule sotuba_1999_analyzer: rule sotuba_1999_download: input: 'COMPS_ID/sotuba_1999_analyzers' - output: 'COMPS_ID/sotuba_1999_done' + output: 'COMPS_ID/sotuba_1999_download' run: shell(get_command(script="download_wi.py", site="sotuba_1999")) @@ -255,7 +255,7 @@ rule matsari_1970_analyzer: rule matsari_1970_download: input: 'COMPS_ID/matsari_1970_analyzers' - output: 'COMPS_ID/matsari_1970_done' + output: 'COMPS_ID/matsari_1970_download' run: shell(get_command(script="download_wi.py", site="matsari_1970")) @@ -274,7 +274,7 @@ rule rafin_marke_1970_analyzer: rule rafin_marke_1970_download: input: 'COMPS_ID/rafin_marke_1970_analyzers' - output: 'COMPS_ID/rafin_marke_1970_done' + output: 'COMPS_ID/rafin_marke_1970_download' run: shell(get_command(script="download_wi.py", site="rafin_marke_1970")) @@ -293,7 +293,7 @@ rule sugungum_1970_analyzer: rule sugungum_1970_download: input: 'COMPS_ID/sugungum_1970_analyzers' - output: 'COMPS_ID/sugungum_1970_done' + output: 'COMPS_ID/sugungum_1970_download' run: shell(get_command(script="download_wi.py", site="sugungum_1970")) @@ -312,7 +312,7 @@ rule navrongo_2000_analyzer: rule navrongo_2000_download: input: 'COMPS_ID/navrongo_2000_analyzers' - output: 'COMPS_ID/navrongo_2000_done' + output: 'COMPS_ID/navrongo_2000_download' run: shell(get_command(script="download_wi.py", site="navrongo_2000")) @@ -331,7 +331,7 @@ rule laye_2007_analyzer: rule laye_2007_download: input: 'COMPS_ID/laye_2007_analyzers' - output: 'COMPS_ID/laye_2007_done' + output: 'COMPS_ID/laye_2007_download' run: shell(get_command(script="download_wi.py", site="laye_2007")) @@ -350,7 +350,7 @@ rule dapelogo_2007_analyzer: rule dapelogo_2007_download: input: 'COMPS_ID/dapelogo_2007_analyzers' - output: 'COMPS_ID/dapelogo_2007_done' + output: 'COMPS_ID/dapelogo_2007_download' run: shell(get_command(script="download_wi.py", site="dapelogo_2007")) \ No newline at end of file diff --git a/tests/BaseTest.py b/tests/BaseTest.py new file mode 100644 index 0000000..69cb055 --- /dev/null +++ b/tests/BaseTest.py @@ -0,0 +1,9 @@ +import unittest + + +class BaseTest(unittest.TestCase): + def setUp(self) -> None: + super(BaseTest, self).setUp() + print(f"Running test: {self._testMethodName}\n") + pass + diff --git a/tests/test_generate_rules.py b/tests/test_generate_rules.py new file mode 100644 index 0000000..1c78c07 --- /dev/null +++ b/tests/test_generate_rules.py @@ -0,0 +1,28 @@ +from BaseTest import BaseTest +import unittest +from simulations.generate_site_rules import generate_rule, run +import os +import simulations.manifest as manifest + + +class GenerateRuleTest(BaseTest): + def test_generate_rule(self): + site = "test_site_name" + n = 99 + script_name = "test.py" + test_string = generate_rule(site=site, n=n, script_name=script_name) + rules = [f'rule {site}_run_sim', f'rule {site}_analyzer', f'rule {site}_download'] + for rule in rules: + self.assertIn(rule, test_string) + + def test_run(self): + snakefile_bak = manifest.CURRENT_DIR / 'snakefile_bak' + snakefile = manifest.CURRENT_DIR / 'snakefile_test' + if os.path.isfile(snakefile): + os.remove(snakefile) + run(snakefile_bak=snakefile_bak, snakefile=snakefile) + self.assertTrue(os.path.isfile(snakefile)) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_imports.py b/tests/test_imports.py index a77b172..e9fd0b1 100644 --- a/tests/test_imports.py +++ b/tests/test_imports.py @@ -1,12 +1,20 @@ import unittest +from BaseTest import BaseTest -class IDMTestImportTest(unittest.TestCase): +class MyImportTestClass(object): + def __init__(self): + self.expected_items = None + self.found_items = None + def setUp(self) -> None: + super(MyImportTestClass, self).setUp() self.expected_items = None self.found_items = None pass + +class ImportTest(BaseTest, MyImportTestClass): def verify_expected_items_present(self, namespace): self.found_items = dir(namespace) for item in self.expected_items: @@ -33,7 +41,10 @@ def test_simulations_helpers_import(self): 'add_nmf_hs_from_file', 'ptr_config_builder', 'add_broadcasting_survey', - 'build_demog' + 'build_demog', + 'get_comps_id_filename', + 'load_coordinator_df', + 'get_suite_id' ] import simulations.helpers as helpers self.verify_expected_items_present(namespace=helpers) @@ -89,18 +100,18 @@ def test_AnnualSummaryReportAnalyzer_import(self): def test_run_sims_import(self): self.expected_items = [ - 'general_sim' + 'submit_sim' ] import simulations.run_sims as run_sims self.verify_expected_items_present(namespace=run_sims) pass - def test_run_sims_import(self): + def test_add_suite_import(self): self.expected_items = [ - 'check_experiment' + 'add_suite' ] - import simulations.wait_for_experiment as wait_for_experiment - self.verify_expected_items_present(namespace=wait_for_experiment) + import simulations.add_suite as add_suite + self.verify_expected_items_present(namespace=add_suite) pass diff --git a/tests/test_load_inputs.py b/tests/test_load_inputs.py new file mode 100644 index 0000000..a7a2799 --- /dev/null +++ b/tests/test_load_inputs.py @@ -0,0 +1,14 @@ +from BaseTest import BaseTest +import unittest +from simulations.load_inputs import load_sites + + +class LoadInputsTest(BaseTest): + def test_load_sites(self): + sites, nSims, script_names = load_sites() + self.assertEqual(len(sites), len(nSims)) + self.assertEqual(len(sites), len(script_names)) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_run_sims.py b/tests/test_run_sims.py new file mode 100644 index 0000000..75f5d0b --- /dev/null +++ b/tests/test_run_sims.py @@ -0,0 +1,15 @@ +import unittest +from BaseTest import BaseTest +# from simulations.run_sims import submit_sim +# import os +# import simulations.manifest as manifest + + +class RunSimsTest(BaseTest): + def test_submit_sim(self): + # todo: NYI, need to figure out a way to test this function without run the real experiment in Comps. + pass + + +if __name__ == '__main__': + unittest.main()