Skip to content

Commit

Permalink
Merge pull request #15 from YeChen-IDM/plotting_update
Browse files Browse the repository at this point in the history
Python plotting script + misc updates
  • Loading branch information
YeChen-IDM authored Jun 7, 2022
2 parents 2140441 + 0836e44 commit 8ef20d0
Show file tree
Hide file tree
Showing 25 changed files with 643 additions and 370 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ jobs:
run: pip install black flake8

- name: Run linters
run: flake8 --ignore=E114,E115,E116,E126,E127,E128,E201,E202,E203,E222,E225,E226,E231,E241,E251,E261,E262,E265,E301,E302,E303,E306,E501,E502,W291,W292,W293,W391 --exclude=./tests
run: flake8 --ignore=E114,E115,E116,E126,E127,E128,E201,E202,E203,E222,E225,E226,E231,E241,E251,E261,E262,E265,E301,E302,E303,E306,E501,E502,W291,W292,W293,W391,W504 --exclude=./tests
27 changes: 24 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,17 @@ When you see "Password:" in the terminal, enter your comps password and hit ente

## Option 1_Run One Site with Python Scripts
```bash
cd simulations
python3 run_sims.py -s {site_name} -n {nSims}
python3 wait_for_experiment.py -s {site_name}
python3 add_suite.py(optional step)
python3 run_analyzers.py -s {site_name}
python3 download_wi.py -s {site_name}
```

Run Plotting and reportting scripts with site(s) that you ran:
```bash
Rscript create_plots\run_generate_validation_comparisons_site.R
python3 report\create_pdf_report_3.py
```

## Option 2_Run all Sites with Snakemake (Recommended)
Expand All @@ -79,7 +88,19 @@ python3 generate_site_rules.py
snakemake -j
```

- If you want to re-run simulations for certain sites, delete COMPS ID files for those sites that you want to -re-run(/simulations/COMPS_ID/{site_name}_COMPS_ID_submit and _done files) and run "snakemake -j" again.
- If you want to re-run the analyzers steps with previous experiments you ran, you can delete the analyzer id files and run:
```bash
snakemake clean_ana clean_download -j
snakemake -j
```

- Simular to previous scenario, if you want to run only the download and plotting steps:
```bash
snakemake clean_download -j
snakemake -j
```

- If you want to re-run simulations for certain sites, delete COMPS ID files for those sites that you want to -re-run(/simulations/COMPS_ID/{site_name}_COMPS_ID_exp_submit, _analyzers and _download files) and run "snakemake -j" again.

- If you want to re-run the analyzers and plotting steps for certain sites, delete the {site_name}_COMPS_ID_done files only and re-run "snakemake -j".
- If you want to re-run the analyzers and plotting steps for certain sites, delete the {site_name}_COMPS_ID_analyzers and _download files only and re-run "snakemake -j".

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# helper_functions_infectiousness.R
=# helper_functions_infectiousness.R
library(ggplot2)


Expand Down
179 changes: 179 additions & 0 deletions create_plots/helper_functions_par_dens.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
from plotnine import ggplot, aes, geom_bar, scale_fill_brewer, facet_grid, geom_line, geom_point, scale_x_continuous, \
geom_errorbar, theme_bw, xlab, ylab, scale_color_manual
import numpy as np
import pandas as pd
from scipy.stats import beta
from pandas.api.types import CategoricalDtype


def get_age_bin_averages(sim_df):
"""
Get average parasite densities in each age bin, weighting all ages in bin equally (e.g., not weighted by
population size)
Args:
sim_df ():
Returns: age_agg_sim_df
"""
# age_bins = sim_df['agebin'].unique()
# remove rows where there are zero people of the measured age bin in the simulation
sim_df = sim_df[sim_df['Pop'] > 0]
# get average across all years in age bins and across simulation run seeds
age_agg_sim_df = sim_df.group_by(['month', 'agebin', 'densitybin', 'Site']).agg(
asexual_par_dens_freq=('asexual_par_dens_freq', np.mean),
gametocyte_dens_freq=('gametocyte_dens_freq', np.mean),
Pop=('Pop', np.mean)
)
return age_agg_sim_df


def plot_par_dens_ref_sim_comparison(age_agg_sim_df, ref_df):
"""
Plot parasite density comparisons with reference
Stacked barplots of parasite density bins by age
Args:
age_agg_sim_df ():
ref_df ():
Returns:
"""
months_of_year = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

# subset simulation output to months in reference dataset
months = sorted(ref_df['month'].unique())
cur_df = age_agg_sim_df[age_agg_sim_df['month'].isin(months)]

# if the maximum reference density bin is < (maximum simulation density bin / 1000),
# aggregate all simulation densities >= max ref bin into the max ref bin
# the final bin will be all densities equal to or above that value
max_ref_dens = ref_df['densitybin'].dropna().max()
max_cur_dens = cur_df['densitybin'].dropna().max()
if max_ref_dens < (max_cur_dens / 1000):
# get sum of frequencies within higher bins
all_higher_dens = cur_df[cur_df['densitybin'] >= max_ref_dens]
sim_agg_higher_dens = all_higher_dens.group_by(['month', 'agebin', 'Site']).agg(
densitybin=('densitybin', np.min),
asexual_par_dens_freq=('asexual_par_dens_freq', np.sum),
gametocyte_dens_freq=('gametocyte_dens_freq', np.sum),
Pop=('Pop', np.mean))
# remove higher density bins from df
cur_df_lower = cur_df[cur_df['densitybin'] < max_ref_dens]
# add back in the aggregated frequencies across higher density bins
cur_df = pd.merge(cur_df_lower, sim_agg_higher_dens, how="outer")

# add zeros for unobserved reference densities up to max_ref_dens
all_zeros_df = cur_df[['month', 'agebin', 'densitybin', 'Site']]
ref_df = pd.merge(ref_df, all_zeros_df, how="outer")
ref_df.fillna(0, inplace=True)

# combine reference and simulation dataframes
cur_df['source'] = 'simulation'
ref_df['source'] = 'reference'
combined_df0 = pd.concat([cur_df, ref_df], join='outer')

# = = = = = = = = = #
# stacked barplots
# = = = = = = = = = #
# change type to factors for barplot groupings
combined_df = combined_df0
convert_dict = {'densitybin': 'category',
'agebin': 'category'}
combined_df = combined_df.astype(convert_dict)

# colors
# len_density_bin = len(combined_df['densitybin'].unique())
# num_colors = len_density_bin + 1 if len_density_bin % 2 == 0 else len_density_bin
# colors = brewer.pal(n=num_colors, name='BrBG')
# names(colors) = sorted(combined_df['densitybin'].unique())
# plot
gg1 = (ggplot(combined_df, aes(x='agebin', y='asexual_par_dens_freq', fill='densitybin')) +
geom_bar(position="stack", stat="identity") +
scale_fill_brewer(palette="BrBG") +
# scale_fill_manual(values=colors, limits=names(colors)) +
facet_grid('month~source')
)

# = = = = = = = = = #
# grid of line plots
# = = = = = = = = = #

# calculate reference error bounds using Jerrerys interval
ci_width = 0.95
alpha = 1 - ci_width
combined_df0['min_asex'] = np.nan
combined_df0['max_asex'] = np.nan
combined_df0['min_gamet'] = np.nan
combined_df0['max_gamet'] = np.nan
for rr in range(len(combined_df0.index)):
if combined_df0['source'].iloc[rr] == 'reference':
if ((combined_df0['count_asex'].iloc[rr] > 0) &
(combined_df0['count_asex'].iloc[rr] < combined_df0['bin_total_asex'].iloc[rr])):
combined_df0['min_asex'].ilo[rr] = beta.ppf(
p=alpha / 2,
a=combined_df0['count_asex'].iloc[rr] + 0.5,
b=combined_df0['bin_total_asex'].iloc[rr] - combined_df0['count_asex'][rr] + 0.5)

combined_df0['max_asex'].iloc[rr] = beta.ppf(
p=1 - alpha / 2,
a=combined_df0['count_asex'].iloc[rr] + 0.5,
b=combined_df0['bin_total_asex'].iloc[rr] - combined_df0['count_asex'].iloc[rr] + 0.5)

if ((combined_df0['count_gamet'].iloc[rr] > 0) &
(combined_df0['count_gamet'].iloc[rr] < combined_df0['bin_total_gamet'].iloc[rr])):
combined_df0['min_gamet'].iloc[rr] = beta.ppf(
p=alpha / 2,
a=combined_df0['count_gamet'].iloc[rr] + 0.5,
b=combined_df0['bin_total_gamet'].iloc[rr] - combined_df0['count_gamet'].iloc[rr] + 0.5)

combined_df0['max_gamet'].iloc[rr] = beta.ppf(
p=1 - alpha / 2,
a=combined_df0['count_gamet'].iloc[rr] + 0.5,
b=combined_df0['bin_total_gamet'].iloc[rr] - combined_df0['count_gamet'].iloc[rr] + 0.5)

# change facet values to intuitive labels
combined_df0['month'] = months_of_year[combined_df0['month']]
month_cat = CategoricalDtype(categories=months_of_year, ordered=True)
combined_df0['month'] = combined_df0['month'].astype(month_cat)
all_age_bins = sorted(combined_df0['agebin'].unique())
age_bin_labels = ['<=' + all_age_bins[1] + " years"]
for aa in range(len(all_age_bins) - 1):
age_bin_labels.append(all_age_bins[aa] + '-' + all_age_bins[aa + 1] + ' years')

combined_df0['agebin_index'] = combined_df0['agebin'].isin(all_age_bins)
combined_df0['agebin'] = age_bin_labels[combined_df0['agebin'].isin(all_age_bins)]
age_bin_labels_cat = CategoricalDtype(categories=age_bin_labels, ordered=True)
combined_df0['agebin'] = combined_df0['agebin'].astype(age_bin_labels_cat)

# plot asexual densities
gg2 = (ggplot(combined_df0, aes(x="densitybin", y='asexual_par_dens_freq', color='source'), alpha=0.8) +
geom_line(size=2) +
geom_point() +
scale_x_continuous(trans='log10') +
geom_errorbar(aes(ymin='min_asex', ymax='max_asex'), width=0.2) +
theme_bw() +
ylab('fraction of population') +
xlab('asexual parasite density bin') +
scale_color_manual(values={"reference": 'red',
"simulation": 'blue'}) +
facet_grid('agebin~month')
# scale_fill_brewer(palette = "BrBG") +
# scale_fill_manual(values=colors, limits=names(colors)) +
)

# plot gametocyte densities
gg3 = (ggplot(combined_df0, aes(x='densitybin', y='gametocyte_dens_freq', color='source')) +
geom_line(size=2) +
geom_point() +
scale_x_continuous(trans='log10') +
geom_errorbar(aes(ymin='min_gamet', ymax='max_gamet'), width=0.2) +
theme_bw() +
ylab('fraction of population') +
xlab('gametocyte density bin') +
scale_color_manual(values={"reference": 'red',
"simulation": 'blue'}) +
facet_grid('agebin~month')
)

return list(gg1, gg2, gg3)
12 changes: 6 additions & 6 deletions create_plots/plot_sim_sweep.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# import numpy as np
# import matplotlib.pyplot as plt
import matplotlib
from plotnine import ggplot, aes, geom_line, facets

Expand All @@ -18,10 +18,10 @@


gg = (
ggplot(df)
+ aes(x='Age', y='Incidence', color='EIR') # , linetype="season"
+ geom_line()
+ facets.facet_grid("season ~ CM")
ggplot(df) +
aes(x='Age', y='Incidence', color='EIR') + # , linetype="season"
geom_line() +
facets.facet_grid("season ~ CM")
# + facets.facet_wrap(facets="season", nrow=1)
)

Expand Down
2 changes: 2 additions & 0 deletions report/create_pdf_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

title = 'Malaria Model Overview'


class PDF(FPDF):
def header(self):
# Logo
Expand Down Expand Up @@ -36,6 +37,7 @@ def footer(self):
# Page number
self.cell(0, 10, 'Page ' + str(self.page_no()) + '/{nb}', 0, 0, 'C')


# Instantiation of inherited class
pdf = PDF()
pdf.alias_nb_pages()
Expand Down
Loading

0 comments on commit 8ef20d0

Please sign in to comment.