diff --git a/jb/src/idmlaser/schema.json b/jb/src/idmlaser/schema.json new file mode 100644 index 0000000..e14bf59 --- /dev/null +++ b/jb/src/idmlaser/schema.json @@ -0,0 +1,44 @@ +{ + "title": "Agents", + "type": "object", + "properties": { + "id": { + "type": "integer", + "description": "Primary key, auto-incremented" + }, + "node": { + "type": "integer", + "description": "Node identifier" + }, + "age": { + "type": "number", + "description": "Age of the agent" + }, + "infected": { + "type": "boolean", + "description": "Infection status of the agent" + }, + "infection_timer": { + "type": "integer", + "description": "Timer for infection duration" + }, + "incubation_timer": { + "type": "integer", + "description": "Timer for incubation period" + }, + "immunity": { + "type": "boolean", + "description": "Immunity status of the agent" + }, + "immunity_timer": { + "type": "integer", + "description": "Timer for immunity duration" + }, + "expected_lifespan": { + "type": "integer", + "description": "Expected lifespan of the agent" + } + }, + "required": ["id", "node", "age", "infected", "infection_timer", "incubation_timer", "immunity", "immunity_timer", "expected_lifespan"] +} + diff --git a/jb/src/idmlaser/sir_numpy.py b/jb/src/idmlaser/sir_numpy.py index 1418097..5d87c38 100644 --- a/jb/src/idmlaser/sir_numpy.py +++ b/jb/src/idmlaser/sir_numpy.py @@ -10,55 +10,34 @@ from . import report from .model_numpy import eula -def load_births_data(file_path): - # Load data from CSV - births_data = pd.read_csv(file_path) - - # Sort data by ID - births_data.sort_values(by='ID', inplace=True) - - # Group data by Elapsed_Years and transform into dictionary - births_dict = births_data.groupby('Elapsed_Years').apply(lambda x: x['Births'].values).to_dict() - - return births_dict - -# Example usage -#births_data = load_births_data(settings.births_file) - -def load( pop_file ): +def _get_beta_samples(number): """ - Load population from csv file as np arrays. Each property column is an np array. + Generate a list of expected lifespans using a beta distribution. + + This function uses a beta distribution to generate a specified number of + samples representing the expected lifespans of individuals. The parameters + of the beta distribution are chosen to produce a plausible distribution of + lifespans centered around a mean value of 75 years, with a maximum possible + lifespan of 110 years. The samples are scaled and shifted to fit within + this range. + + Args: + number (int): The number of lifespan samples to generate. + + Returns: + np.ndarray: An array of lifespan samples. + + Example: + lifespans = _get_beta_samples(100) + print(lifespans) # Array of 100 expected lifespan values. + + Notes: + - The beta distribution is parameterized with alpha = 4 and beta = 2, + which produces a distribution with a higher concentration of values + around the lower end of the scale. + - The generated samples are scaled to fit the range of 1 to 110 years. """ - # Load the entire CSV file into a NumPy array - header_row = np.genfromtxt(pop_file, delimiter=',', dtype=str, max_rows=1) - - # Load the remaining data as numerical values, skipping the header row - data = np.genfromtxt(pop_file, delimiter=',', dtype=float, skip_header=1) - # Extract headers from the header row - headers = header_row - - # Load each column into a separate NumPy array - columns = {header: data[:, i] for i, header in enumerate(headers)} - columns['infected'] = columns['infected'].astype(bool) - columns['immunity'] = columns['immunity'].astype(bool) - columns['node'] = columns['node'].astype(np.uint32) - columns['infection_timer'] = columns['infection_timer'].astype(np.float32) # int better? - columns['incubation_timer'] = columns['incubation_timer'].astype(np.float32) # int better? - columns['immunity_timer'] = columns['immunity_timer'].astype(np.float32) # int better? - columns['age'] = columns['age'].astype(np.float32) - columns['expected_lifespan'] = columns['expected_lifespan'].astype(np.float32) - if "mcw" in columns: - columns.pop( "mcw" ) - - settings.pop = len(columns['infected']) - # print( f"Population={settings.pop}" ) - - add_expansion_slots( columns ) - # Pad with a bunch of zeros - return columns - -def get_beta_samples(number): from scipy.stats import beta # Define parameters lifespan_mean = 75 @@ -72,6 +51,51 @@ def get_beta_samples(number): return scaled_samples def add_expansion_slots( columns, num_slots=settings.expansion_slots ): + """ + Adds 'expansion slots' to the agent population for future births. + + This function prepends a specified number of "expansion slots" to the existing + agent population columns. Each expansion slot represents an agent to be born + later and is initialized with default values. The function ensures that the + new slots are contiguous in memory, allowing efficient management of the agent + population as agents are born and the start index of the population array is + decremented. + + Args: + columns (dict): A dictionary where keys are column headers and values are + NumPy arrays representing the agent population attributes. + num_slots (int, optional): The number of expansion slots to add. Defaults + to settings.expansion_slots. + + Returns: + None: The function modifies the 'columns' dictionary in place by appending + new expansion slots to each column. + + Example: + columns = { + 'id': np.array([1, 2, 3]), + 'node': np.array([0, 1, 0]), + 'age': np.array([25.0, 30.0, 22.0]), + 'infected': np.array([False, True, False]), + 'infection_timer': np.array([0, 5, 0]), + 'incubation_timer': np.array([0, 3, 0]), + 'immunity': np.array([True, False, True]), + 'immunity_timer': np.array([120.0, 0.0, 60.0]), + 'expected_lifespan': np.array([80.0, 75.0, 85.0]) + } + add_expansion_slots(columns) + + Notes: + - The function initializes new agents with default values such as -1 for + 'node' and 'age', False for 'infected', and specified values for other + attributes. The goal is that as few properties as possible have to be set + upon birth, but there is still a way to check that an agent is unborn + (e.g., age=-1, node=-1). + - It also updates the 'settings.nodes' and 'settings.num_nodes' based on + the unique nodes in the 'columns'. This may be deprecated soon. + + """ + num_slots = int(num_slots) print( f"Adding {num_slots} expansion slots for future babies." ) new_ids = [ x for x in range( num_slots ) ] @@ -88,7 +112,7 @@ def add_expansion_slots( columns, num_slots=settings.expansion_slots ): new_infection_timer = np.zeros( num_slots ).astype( np.float32 ) new_incubation_timer = np.zeros( num_slots ).astype( np.float32 ) - lifespan_samples = get_beta_samples( num_slots ) + lifespan_samples = _get_beta_samples( num_slots ) new_expected_lifespan = np.array( lifespan_samples ).astype( dtype=np.float32 ) settings.nodes = [ node for node in np.unique(columns['node']) ] @@ -106,162 +130,36 @@ def add_expansion_slots( columns, num_slots=settings.expansion_slots ): columns['immunity_timer'] = np.concatenate((columns['immunity_timer'], new_immunity_timer)) columns['expected_lifespan'] = np.concatenate((columns['expected_lifespan'], new_expected_lifespan)) -def initialize_database(): - return load( settings.pop_file ) - -def eula_init( df, age_threshold_yrs = 5, eula_strategy=None ): - # Create a boolean mask for elements to keep - def filter_strategy(): - # test out what happens if we render big chunks of the population epi-borrowing - condition = np.logical_and(~columns['infected'], columns['age']>age_threshold_yrs ) - columns['immunity'][condition] = 1 - columns['immunity_timer'][condition] = -1 - - def purge_strategy(): - # Note this is just for testing; for real work we need to keep track of our total pop - mask = (df['age'] <= age_threshold_yrs) | (df['infected'] != 0) - for column in df.keys(): - df[column] = df[column][mask] - - def downsample_strategy(): - # mask = (df['age'] <= age_threshold_yrs) | (df['infected'] != 0) - filter_arr = df['age']>=0 - # For permanently recovereds, we want those over threshold age and not infected - mask = ((df['age'] >= age_threshold_yrs) & (~df['infected']))[filter_arr] - # need this by node - - # For actual removal, we want thsoe not infected and those over threshold age but don't remove those with age == -1 - # So keep the rest - mask = ((df['age'] < age_threshold_yrs) | (df['infected'] )) - - eula.init() - - for column in df.keys(): - df[column] = df[column][mask] - - - print( "Ignoring requested strategy; using downsample only for now." ) - downsample_strategy() - return df - -def collect_report( data ): - """ - Report data to file for a given timestep. +def births_from_cbr_fast( node_pops_array, rate=30 ): """ - #print( "Start timestep report." ) - # THIS IS MESSED UP BUT I WASTED AN HOUR ON THE ALTERNATIVE!!! - condition_mask = np.logical_and(~data['infected'], ~data['immunity']) - unique_nodes, counts = np.unique(data['node'][condition_mask], return_counts=True) - - # Display the result - susceptible_counts_db = list(zip(unique_nodes, counts)) - susceptible_counts = {values[0]: values[1] for idx, values in enumerate(susceptible_counts_db)} - for node in settings.nodes: - if node not in susceptible_counts: - susceptible_counts[node] = 0 - - # Because we put dead people in "purgatory"... - if 4294967295 in susceptible_counts.keys(): # uint32(-1) - susceptible_counts.pop(4294967295) - if -1 in susceptible_counts.keys(): - susceptible_counts.pop(-1) - if len(susceptible_counts) > len(settings.nodes): - pdb.set_trace() - raise ValueError( f"Too many susceptible nodes." ) - - unique_nodes, counts = np.unique(data['node'][data['infected']], return_counts=True) - infected_counts_db = list(zip(unique_nodes, counts)) - infected_counts = {values[0]: values[1] for idx, values in enumerate(infected_counts_db)} - for node in settings.nodes: - if node not in infected_counts: - infected_counts[node] = 0 - if len(infected_counts) > len(settings.nodes): - pdb.set_trace() - raise ValueError( f"Too many infected nodes." ) - - def count_recos( node, immunity ): - # Boolean indexing to filter rows where immunity is 1 - filtered_rows = (immunity==1) - - # Use the filtered rows to get the corresponding node - filtered_node = node[filtered_rows] + Calculate the number of births for each node based on the crude birth rate (CBR). - # Use numpy.bincount to calculate the sum of Rs for each node - unique_nodes, counts = np.unique(filtered_node, return_counts=True) - counts_by_node = dict(zip(unique_nodes, counts)) + This function computes the expected number of births for each node in a population + using a given crude birth rate (CBR). The computation is performed in a vectorized + manner for high performance. - # Display the result - # Does this code assume there's an entry for each node even if the total is 0? - recovered_counts_eula = eula.get_recovereds_by_node() - for node in range( settings.num_nodes ): - # Now add in eulas - if node not in counts_by_node: - counts_by_node[node] = 0 - #counts_by_node[node] += sum(recovered_counts_eula[node].values()) - counts_by_node[node] += recovered_counts_eula[node] - return counts_by_node + Args: + node_pops_array (np.ndarray): An array containing the population of each node. + rate (float, optional): The crude birth rate per 1,000 individuals per year. + Defaults to 30. - recovered_counts = count_recos( data['node'], data['immunity'] ) + Returns: + np.ndarray: An array containing the number of new births for each node. - #print( "Stop timestep report." ) - return infected_counts, susceptible_counts, recovered_counts + Example: + node_pops = np.array([1000, 2000, 1500]) + births = births_from_cbr_fast(node_pops, rate=30) + print(births) # Array of new births for each node. + Notes: + - The function uses a fertility interval defined in `settings.fertility_interval` + to adjust the birth rate. + - The crude birth rate (CBR) is scaled by the node populations and the number + of days in a year (365). + - Poisson-distributed random numbers are generated to simulate the number of new + births, reflecting the natural variation in birth events. -def update_ages( data, totals, timestep ): """ - import numba - @numba.jit(parallel=True,nopython=True) - def update_ages_nb( ages ): - n = len(ages) - for i in range(n): - ages[i] += 1/365 - return ages - #data['age'] += 1/365 - """ - def update_ages_np( ages ): - ages[ages>0] += 1/365 - return ages - - data['age'] = update_ages_np( data['age'] ) - # data = - new_births = 0 - if timestep % settings.fertility_interval == 0: - new_births = births( data, totals ) - #data = - new_deaths = 0 - if timestep % settings.mortality_interval == 0: - new_deaths = deaths( data ) - return ( new_births, new_deaths ) - #return data - -def births_from_lorton_algo( timestep ): - # Calculate the year and day of year - year = timestep // 365 - doy = (timestep % 365) + 1 - - global births_data - if not births_data: - births_data = load_births_data(settings.births_file) - # Filter data for the specified city IDs - city_births_data = np.array(births_data[year]).astype(int) - births_today = np.zeros(len(settings.nodes)).astype(int) - births = (city_births_data * doy // 365) - (city_births_data * (doy - 1) // 365) - - births_today_dict = {node_id: births for node_id, births in zip(settings.nodes, births)} - return births_today_dict - -def births_from_cbr( node_pops, rate=30 ): - # TBD: births = CBR & node_pop / 1000 - # placeholder: just say 10 per node for now to test rest of code path - new_babies = {} - for node in node_pops: - cbr_node = settings.fertility_interval * rate * (node_pops[node]/1000.0)/365.0 - new_babies[node] = np.random.poisson( cbr_node ) - return new_babies - -def births_from_cbr_fast( node_pops_array, rate=30 ): - # Convert node_pops values to a NumPy array - #node_pops_array = np.array(list(node_pops.values())) # Compute the cbr_node for all nodes in a vectorized manner cbr_node_array = settings.fertility_interval * rate * (node_pops_array / 1000.0) / 365.0 @@ -270,283 +168,3 @@ def births_from_cbr_fast( node_pops_array, rate=30 ): new_babies_array = np.random.poisson(cbr_node_array) return new_babies_array -def births_from_cbr_var( node_pops, rate=30 ): - # rate can be array now - # TBD: births = CBR & node_pop / 1000 - # placeholder: just say 10 per node for now to test rest of code path - new_babies = {} - for node in node_pops: - act_rate = 17.5 - if node < len(rate): - act_rate = rate[node] - else: - print( f"WARNING: {node} not found in rate array! Defaulting to 17.5." ) - pdb.set_trace() - cbr_node = settings.fertility_interval * act_rate * (node_pops[node]/1000.0)/365.0 - new_babies[node] = np.random.poisson( cbr_node ) - return new_babies - -def births(data,totals_by_node): - # Births - # 1) demographic_dependent_Rate: - # Calculate number of women of child-bearing age: constant across nodes - # Add new babies as percentage of that. - # Can't do demographic_dependent_Rate if downsampling recovereds to N, hence: - # Or CBR - # totals_by_node supports CBR - - # Function to add newborns - def add_newborns( nodes ): - babies = len(nodes) - # Generate newborn data - #last_id = data['id'][-1] - # find an entry with age==-1 to use, or find a bunch - indices = np.where( data['age'] == -1 )[0][:babies] - #new_ids = np.arange(last_id + 1, last_id + 1 + babies) - #new_ids = data['id'][indices][:babies] - new_nodes = np.full(babies, nodes) - new_ages = np.zeros(babies) - new_infected = np.full(babies,False) - new_infection_timer = np.zeros(babies) - new_incubation_timer = np.zeros(babies) - new_immunity = np.full(babies,False) - new_immunity_timer = np.zeros(babies) - new_expected_lifespan = np.random.normal(loc=75, scale=7, size=babies).astype(np.float32) - - def reincarnate( data, indices, new_nodes, new_ages, new_infected, new_infection_timer, new_incubation_timer, new_immunity, new_immunity_timer, new_expected_lifespan ): - # This is memory-smarter option where we recycle agents - # TBD: Make c version - data['node'][indices] = new_nodes - data['age'][indices] = new_ages - data['infected'][indices] = new_infected - data['infection_timer'][indices] = new_infection_timer - data['incubation_timer'][indices] = new_incubation_timer - data['immunity'][indices] = new_immunity - data['immunity_timer'][indices] = new_immunity_timer - data['expected_lifespan'][indices] = new_expected_lifespan - # We have selected all the indices of unborn babies (age=-1) to birth. - reincarnate( data, indices, new_nodes, new_ages, new_infected, new_infection_timer, new_incubation_timer, new_immunity, new_immunity_timer, new_expected_lifespan ) - - new_babies = births_from_cbr( totals_by_node, rate=settings.cbr ) - #print( f"New babies by node: {new_babies}" ) - # Iterate over nodes and add newborns - #for node, count in new_babies.items(): - # if count > 0: - # add_newborns(node, count) - keys = np.array(list(new_babies.keys())) - values = np.array(list(new_babies.values())) - - # Create the numpy array by repeating each key according to its corresponding value - result_array = np.repeat(keys, values) - add_newborns( result_array ) - - return data - -def deaths(data): - # Non-disease deaths - # Create a boolean mask for the deletion condition - def old_style(): - delete_mask = (data['age'] >= data['expected_lifespan']) & (data['age']>=0) - if np.count_nonzero( delete_mask ): - - #data['infected'] = np.delete( data['infected'], np.where( delete_mask ) ) - #data[col] = np.delete( data[col], np.where( delete_mask ) ) - #data[col] = data[col][~delete_mask] - data['node'][delete_mask] = -1 - data['age'][delete_mask] = -1 - data['infected'][delete_mask] = 0 - data['immunity'][delete_mask] = 0 - data['infection_timer'][delete_mask] = 0 - data['immunity_timer'][delete_mask] = 0 - data['incubation_timer'][delete_mask] = 0 - data['expected_lifespan'][delete_mask] = -1 - - print( f"{np.count_nonzero(delete_mask)} new deaths." ) - - def eula_death_by_rate(): - eula.progress_natural_mortality( settings.mortality_interval ) - - eula_death_by_rate() - return data - -def update_births_deaths( data ): - data = deaths(data) - data = births(data) - return data - -def progress_infections( data ): - # Update infected agents - # infection timer: decrement for each infected person - data['infection_timer'][data['infection_timer'] >= 1] -= 1 - data['incubation_timer'][data['incubation_timer'] >= 1] -= 1 - # some people clear - condition = np.logical_and(data['infected'], data['infection_timer'] == 0) - data['infected'][condition] = False - # recovereds gain immunity - data['immunity_timer'][condition] = np.random.randint(10, 41, size=np.sum(condition)) - data['immunity'][condition] = True - - return data - -# Update immune agents -def progress_immunities( data ): - # immunity decays - condition = np.logical_and(data['immunity'], data['immunity_timer'] > 0) - data['immunity_timer'][condition] -= 1 - # Recoverd->Susceptible - condition = np.logical_and(data['immunity'], data['immunity_timer'] == 0) - data['immunity'][condition] = False - - return data - -def calculate_new_infections( data, inf, sus, totals ): - # We are currently only passing in inf and sus fractions and totals. Not passing in incubators. - # We want to count the number of incubators by now all at once not in a for loop. - node_counts_incubators = np.zeros(len(inf)) - node_counts_incubators = np.bincount( data['node'][data['age']>=0], weights=(data['incubation_timer']>=1)[data['age']>=0] ) - #exposed_fraction = {} - for idx in range(len(node_counts_incubators)): - exposed_fraction = node_counts_incubators[idx]/totals[idx] - inf[idx] -= exposed_fraction - #print( f"infectious fraction for node {idx} = {inf[idx]} after subtracting {node_counts_incubators[idx]} incubators." ) - ret_ni = np.zeros(settings.num_nodes).astype( np.uint32 ) - for node in range(settings.num_nodes): - ret_ni[node] = int(sus[node]*inf[node]*settings.base_infectivity*totals[node]) - #print( f"New_Infections: {new_infections} = {np.array(sorted(sus.values()))} * {np.array(sorted(inf.values()))} * {settings.base_infectivity}" ) - return ret_ni - -def handle_transmission_by_node( data, new_infections, node=0 ): - def handle_new_infections(new_infections): - # print( f"We are doing transmission to {new_infections} in node {node}." ) - # Create a boolean mask based on the conditions in the subquery - subquery_condition = np.logical_and(~data['infected'], ~data['immunity']) - subquery_condition = np.logical_and(subquery_condition, (data['node'] == node)) - - # Get the indices of eligible agents using the boolean mask - eligible_agents_indices = np.where(subquery_condition)[0] - - # Randomly sample 'new_infections' number of indices - selected_indices = np.random.choice(eligible_agents_indices, size=min(new_infections, len(eligible_agents_indices)), replace=False) - - # Update the 'infected' column based on the selected indices - data['infected'][selected_indices] = True - - return selected_indices - - return handle_new_infections(new_infections) - -def handle_transmission( data_in, new_infections_in ): - # We want to do this in parallel; - htbn = partial( handle_transmission_by_node, data_in, new_infections_in ) - with concurrent.futures.ThreadPoolExecutor() as executor: - results = list(executor.map(htbn, settings.nodes)) - return np.concatenate(results).tolist() - -def add_new_infections( data ): - # Actually this just sets the new infection timers (globally) for all the new infections - # New infections themselves are set node-wise - def add_new_infections_np( data ): - condition = np.logical_and(data['infected'], data['infection_timer'] == 0) - min_infection_dur = 11 - max_infection_dur = min_infection_dur + 20 # 20 is a bit long to keep - data['incubation_timer'][condition] = min_infection_dur - data['infection_timer'][condition] = np.random.randint(min_infection_dur, max_infection_dur, size=np.sum(condition)) - return data - - data = add_new_infections_np( data ) - return data - -def migrate( data, timestep, num_infected=None ): - # Migrate 1% of infecteds "downstream" every week; coz - if timestep % settings.migration_interval == 0: # every week - infected = np.where( data['infected'] )[0] - fraction = int(len(infected)*0.05) - selected = np.random.choice( infected, fraction ) - #print( f"Migrating {len(selected)} infecteds on day {timestep}." ) - # Update the 'nodes' array based on the specified conditions - data['node'][selected] = np.where(data['node'][selected] == 0, settings.num_nodes - 1, data['node'][selected] - 1 ) - return data - -def distribute_interventions( ctx, timestep ): - def ria_9mo(): - condition_mask = ( - (ctx['age'] > 290/365.0) & - (ctx['age'] < 291/365.0) & - (ctx['immunity'] == 0) & - (ctx['node'] == settings.campaign_node) - ) - - # Apply the update using the boolean mask - ctx['immunity'][condition_mask] = 1 - ctx['immunity_timer'][condition_mask] = 3650 - - def campaign( coverage = 1.0 ): - # Create a boolean mask for the conditions specified in the WHERE clause - condition_mask = ( - (ctx['immunity'] == 0) & - (ctx['age'] < 16) & - (ctx['node'] == settings.campaign_node) - ) - - # Shuffle the array to simulate ORDER BY RANDOM() - #np.random.shuffle(ctx[condition_mask]) - - # Get the indices of elements that satisfy the condition - selected_indices = np.where(condition_mask)[0] - - # Calculate the number of elements to select based on the specified coverage - num_to_select = int(len(selected_indices) * coverage) - - # Randomly select X% of indices - selected_indices_subset = np.random.choice(selected_indices, size=num_to_select, replace=False) - - # Calculate the limit based on the specified coverage - #limit = int(np.sum(condition_mask) * coverage) - - # Apply the update to the limited subset - ctx['immunity'][selected_indices_subset] = 1 - ctx['immunity_timer'][selected_indices_subset] = -1 - - ria_9mo() - if timestep == settings.campaign_day: - campaign(settings.campaign_coverage) - return ctx - -# Function to run the simulation for a given number of timesteps -def run_simulation(data, csvwriter, num_timesteps): - currently_infectious, currently_sus, cur_reco = collect_report( data ) - report.write_timestep_report( csvwriter, 0, currently_infectious, currently_sus, cur_reco ) - - for timestep in range(1, num_timesteps + 1): - data = update_ages( data ) - - data = progress_infections( data ) - - data = progress_immunities( data ) - - new_infections = calculate_new_infections( data, currently_infectious, currently_sus ) - - data = handle_transmission( data_in=data, new_infections_in=new_infections ) - - data = add_new_infections( data ) - - data = migrate( data, timestep ) - - currently_infectious, currently_sus, cur_reco = collect_report( data ) - report.write_timestep_report( csvwriter, timestep, currently_infectious, currently_sus, cur_reco ) - - - print("Simulation completed. Report saved to 'simulation_report.csv'.") - -# Main simulation -if __name__ == "__main__": - data = initialize_database() - - # Create a CSV file for reporting - csvfile = open('simulation_report.csv', 'w', newline='') - csvwriter = csv.writer(csvfile) - csvwriter.writerow(['Timestep', 'Node', 'Susceptible', 'Infected', 'Recovered']) - - # Run the simulation for 1000 timesteps - run_simulation(data, csvwriter, num_timesteps=settings.duration ) - diff --git a/jb/src/idmlaser/sir_sql.py b/jb/src/idmlaser/sir_sql.py index b317ea7..16ab1a8 100644 --- a/jb/src/idmlaser/sir_sql.py +++ b/jb/src/idmlaser/sir_sql.py @@ -4,9 +4,12 @@ import concurrent.futures import numpy as np # not for modeling from scipy.stats import beta +import json import pdb import sys import os +import importlib.resources as pkg_resources +import idmlaser # seems odd and circular # We'll fix this settings stuff up soon. @@ -162,6 +165,41 @@ def eula_init( cursor, age_threshold_yrs = 5, eula_strategy="from_db" ): raise ValueError( f"Unknown eula strategy: {eula_strategy}." ) return cursor + +# Function to map JSON schema types to SQLite types +def map_json_type_to_sqlite(json_type): + if json_type == 'integer': + return 'INTEGER' + elif json_type == 'number': + return 'REAL' + elif json_type == 'boolean': + return 'BOOLEAN' + else: + raise ValueError(f"Unsupported JSON type: {json_type}") + +# Construct the CREATE TABLE SQL statement +def construct_create_table_sql(schema): + table_name = schema['title'] + columns = schema['properties'] + required = schema.get('required', []) + + column_defs = [] + for column_name, column_info in columns.items(): + column_type = map_json_type_to_sqlite(column_info['type']) + if column_name == 'id': + column_def = f"{column_name} {column_type} PRIMARY KEY AUTOINCREMENT" + else: + column_def = f"{column_name} {column_type}" + if column_name in required: + column_def += " NOT NULL" + column_defs.append(column_def) + + column_defs_str = ",\n ".join(column_defs) + create_table_sql = f"CREATE TABLE {table_name} (\n {column_defs_str}\n);" + + return create_table_sql + + # Function to initialize the SQLite database def initialize_database( conn=None, from_file=True ): # TBD: Make programmatic option to init db instead of load from csv. @@ -174,6 +212,20 @@ def initialize_database( conn=None, from_file=True ): conn = sql.connect(":memory:") # Use in-memory database for simplicity cursor = conn.cursor() + # Load schema.json + #with open('schema.json', 'r') as file: + with pkg_resources.open_text(idmlaser, 'schema.json') as file: + schema = json.load(file) + + # Create the table + create_table_sql = construct_create_table_sql(schema) + #print(create_table_sql) # Print the SQL statement for debugging purposes + with conn: + conn.execute(create_table_sql) + + #print("Table created successfully.") + + """ # Create agents table cursor.execute(''' CREATE TABLE agents ( @@ -196,7 +248,7 @@ def initialize_database( conn=None, from_file=True ): #cursor.execute( "CREATE INDEX idx_agents_node ON agents(id, node)" ) #cursor.execute( "CREATE INDEX idx_agents_node_infected ON agents(node, infected)" ) #cursor.execute( "CREATE INDEX idx_agents_node_immunity ON agents(node, immunity)" ) - + """ # Insert 10,000 agents with random age and all initially uninfected #agents_data = [(i, random.randint(0, num_nodes-1), random.randint(0, 100), False, 0, 0, False, 0) for i in range(1, pop)] diff --git a/jb/src/idmlaser/utils/create_pop_as_csv.py b/jb/src/idmlaser/utils/create_pop_as_csv.py index 7a9e356..e8fd111 100644 --- a/jb/src/idmlaser/utils/create_pop_as_csv.py +++ b/jb/src/idmlaser/utils/create_pop_as_csv.py @@ -14,17 +14,20 @@ # 2) Convert the modeled population into a csv file print( f"Writing population file out out to csv: {settings.pop_file}." ) cursor = conn.cursor() -get_all_query = f"SELECT * FROM agents WHERE age<{settings.eula_age} ORDER BY age" +cursor.execute('PRAGMA table_info(agents)') +columns = [col[1] for col in cursor.fetchall()] +get_all_query = f"SELECT * FROM agents WHERE age<{settings.eula_age} ORDER BY age" cursor.execute( get_all_query ) rows = cursor.fetchall() print( f"Modeled population size = {len(rows)}" ) +# Don't hard-code column names. Get them from db source same as data. csv_output_file = settings.pop_file.strip( ".gz" ) with open( csv_output_file , "w", newline='' ) as csvfile: csv_writer = csv.writer( csvfile ) - csv_writer.writerow( ['id', 'node', 'age', 'infected', 'infection_timer', 'incubation_timer', 'immunity', 'immunity_timer', 'expected_lifespan' ] ) + csv_writer.writerow( columns ) csv_writer.writerows( rows ) print( f"Wrote uncompressed modeled population file as {csv_output_file}. Compressing..." )