From 616d6ec53f113c4801d2c8f2eb7e2c4730ac913d Mon Sep 17 00:00:00 2001 From: Jonathan Bloedow Date: Thu, 8 Aug 2024 12:53:56 -0700 Subject: [PATCH] Cleaned up a sped up the eula calculation code. --- src/idmlaser/numpynumba/population.py | 31 ++++++++++++++++++--------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/idmlaser/numpynumba/population.py b/src/idmlaser/numpynumba/population.py index c93be43..18a241a 100644 --- a/src/idmlaser/numpynumba/population.py +++ b/src/idmlaser/numpynumba/population.py @@ -4,9 +4,16 @@ from tqdm import tqdm import numpy as np +import numba as nb import h5py import pdb +@nb.njit(parallel=True) +def accumulate_deaths_parallel(nodeid_filtered, death_year, nodeid_indices_array, total_population_per_year): + for i in nb.prange(len(death_year)): + node_index = nodeid_indices_array[nodeid_filtered[i]] + total_population_per_year[node_index, death_year[i]] += 1 + class Population: """Array-based Agent Based Population Class""" @@ -246,27 +253,31 @@ def expected_pops_over_years(self, eula_age_in_years=5, years=10): # Calculate the year of death for each individual (0-based index for years 1-years) death_year = dod_filtered // 365 - death_year[death_year >= years] = years-1 # Cap deaths at year years + # Let's initially count for all the years, even if we only keep first 10. + # No, too slow. Let's count 0 through years-1, and put 'everything else' in 'years' + death_year[death_year >= years] = years-1 # Cap deaths at years-1 - # Initialize the total_population_per_year array + # Initialize the expected_new_deaths array unique_nodeids = np.unique(nodeid_filtered) nodeid_indices = {nodeid: i for i, nodeid in enumerate(unique_nodeids)} - self.total_population_per_year = np.zeros((len(unique_nodeids), years), dtype=int) self.expected_new_deaths_per_year = np.zeros((len(unique_nodeids), years), dtype=int) - # Accumulate deaths by year and node - for i in tqdm(range(len(death_year))): - node_index = nodeid_indices[nodeid_filtered[i]] - self.total_population_per_year[node_index, death_year[i]] += 1 + nodeid_indices_array = np.zeros(unique_nodeids.max() + 1, dtype=np.int32) + for i, nodeid in enumerate(unique_nodeids): + nodeid_indices_array[nodeid] = i + accumulate_deaths_parallel(nodeid_filtered, death_year, nodeid_indices_array, self.expected_new_deaths_per_year) # Convert deaths to populations by subtracting cumulative deaths from the initial population - initial_population_counts = np.bincount(nodeid_filtered, minlength=len(unique_nodeids)) - cumulative_deaths = np.cumsum(self.total_population_per_year, axis=1) + cumulative_deaths = np.cumsum(self.expected_new_deaths_per_year, axis=1) # xtra dupe element at end I don't understand yet # Calculate new deaths per year - self.expected_new_deaths_per_year[:, 0] = self.total_population_per_year[:, 0] + self.expected_new_deaths_per_year[:, 0] = self.expected_new_deaths_per_year[:, 0] self.expected_new_deaths_per_year[:, 1:] = np.diff(cumulative_deaths, axis=1) + self.expected_new_deaths_per_year = self.expected_new_deaths_per_year[:, 0:10] # discard extra killall column + # Calculate the initial population counts, though not currently used + initial_population_counts = np.bincount(nodeid_filtered, minlength=len(unique_nodeids)) + # Calculate total_population_per_year at the end, using cumulative deaths self.total_population_per_year = initial_population_counts[:, None] - cumulative_deaths # Optional: print the resulting populations