Skip to content

Commit

Permalink
README.rst update for met-types generation
Browse files Browse the repository at this point in the history
  • Loading branch information
yann.roussel committed Dec 10, 2024
1 parent 85b09e5 commit 7b7fddf
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 72 deletions.
12 changes: 12 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,18 @@ The command outputs the density files in the output-dir and a metadata json file
// ...
}
Compute MET-types densities update
-------------------------------------------------

An update to the probabilistic map allows to estimate Morphological and Electrical type densities of inhibitory
and excitatory neurons in the whole brain using `Verasztó et al. (202X)`_'s pipeline.
This pipeline produces transcriptomic types (t-types) densities and a mapping from T-types to canonical neuronal
ME-types as defined in the paper.

.. code-block:: bash
sbatch atlas_densities/app/process_t_types_job.sh
Subdivide excitatory files into pyramidal subtypes
--------------------------------------------------

Expand Down
183 changes: 115 additions & 68 deletions atlas_densities/app/convert_t_type_nrrds_to_me_type_nrrds.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import argparse
import glob
import os
from concurrent.futures import ProcessPoolExecutor, as_completed
from concurrent.futures import as_completed, ProcessPoolExecutor
from tqdm import tqdm
import pandas as pd
import nrrd
Expand All @@ -12,102 +13,148 @@

def load_p_map(path_to_p_map, max_me_types=None):
"""
Load the P-map and format it with MET_TYPE keys (ME_TYPE|T_TYPE).
Args:
path_to_p_map (str): Path to the P-map CSV file.
max_me_types (int, optional): Limit the number of ME-types.
Returns:
pd.DataFrame: P-map reformatted with MET_TYPE keys as columns.
Load and preprocess the P-map, filtering out invalid T-type and M-type combinations
based on the provided rules:
- T-types containing 'Glut' can only combine with M-types containing 'PC'.
- T-types containing 'Gaba' can only combine with M-types containing 'IN'.
"""
# Load the P-map as a DataFrame
# Load the P-map
p_map = pd.read_csv(path_to_p_map, index_col=0)
print(f"Loaded P-map shape: {p_map.shape}")

# Normalize rows to ensure probabilities sum to 1
p_map = p_map.div(p_map.sum(axis=1), axis=0)

# Create a new DataFrame with MET_TYPE keys (ME_TYPE|T_TYPE)
met_types_df = pd.DataFrame()

for t_type in p_map.index: # Iterate over T-types (rows)
renamed_row = p_map.loc[t_type].rename(lambda me_type: f"{me_type}|{t_type}") # Create MET_TYPE keys
met_types_df = pd.concat([met_types_df, renamed_row.to_frame().T], axis=1) # Append renamed row

# Limit the number of ME-types if specified
if max_me_types is not None:
met_types_df = met_types_df.iloc[:, :max_me_types]

return met_types_df
# Fill NaNs with zeros
p_map = p_map.fillna(0)
print(f"Shape after filling NaNs: {p_map.shape}")

# Apply filtering rules
def is_unvalid_combination(t_type, m_type):
if "Glut" in t_type and "IN" in m_type:
return True
if "Gaba" in t_type and "PC" in m_type:
return True
return False

# Filter out invalid combinations
for col in p_map.columns:
for t_type in p_map.index:
if is_unvalid_combination(t_type, col):
p_map.at[t_type, col] = 0.0

# Normalize rows
row_sums = p_map.sum(axis=1)
print(f"Row sums before normalization: {row_sums.describe()}")
p_map = p_map.div(row_sums, axis=0).fillna(0) # Normalize and handle NaNs
print(f"Shape after normalization: {p_map.shape}")

# Create MET keys
stacked_p_map = p_map.stack()
print(f"Stacked P-map shape: {stacked_p_map.shape}")
met_types = [f"{me_type}|{t_type}" for t_type, me_type in stacked_p_map.index]
met_df = pd.DataFrame([stacked_p_map.values], columns=met_types)
print(f"Initial MET DataFrame shape: {met_df.shape}")

# Filter out MET columns with zero values
met_df = met_df.loc[:, (met_df != 0).any(axis=0)]
print(f"Filtered MET DataFrame shape: {met_df.shape}")

return met_df

# Initialize output directory
def setup_output_path(output_path):
"""Create output directory if it doesn't exist."""
if not os.path.exists(output_path):
os.makedirs(output_path)

# Process a single T-type
def process_t_type(t_type, t_type_path, p_map_subset, sizes):
def process_met_type(met_type, met_data, t_type_paths, sizes):
"""Compute NRRD data for a single MET-type."""
try:
t_type_data, _ = nrrd.read(t_type_path)
t_type = met_type.split("|")[2].replace("-","_") # Extract T-type from MET-type
# Check if T-type exists in t_type_paths
if t_type not in t_type_paths:
print(f"Skipping MET-type {met_type}: T-type {t_type} not found in t_type_paths.")
return None
t_type_data, _ = nrrd.read(t_type_paths[t_type])
if t_type_data.shape != tuple(sizes):
raise ValueError(f"Incorrect dimensions for {t_type}: {t_type_data.shape}")

me_type_results = {}
for me_type, probability in p_map_subset.iteritems():
me_type_results[me_type] = t_type_data * probability

return me_type_results
return t_type_data * met_data.values
except Exception as e:
print(f"Error processing T-type {t_type}: {e}")
print(f"Error processing MET-type {met_type}: {e}")
return None

# Save ME-type results
def save_me_type_results(me_type_accum, output_path):
for me_type, data in me_type_accum.items():
nrrd.write(os.path.join(output_path, f"{me_type}.nrrd"), data)
def save_met_type_result(met_type, data, output_path):
"""Save NRRD file for a single MET-type."""
try:
file_path = os.path.join(output_path, f"{met_type}.nrrd")
nrrd.write(file_path, data)
except Exception as e:
print(f"Error saving MET-type {met_type}: {e}")

def process_batch(met_types_batch, p_map_subset, t_type_paths, output_path, sizes):
"""Process a batch of MET-types."""

# Process T-types in a batch
def process_batch(t_type_list, t_type_paths, p_map, output_path, header):
with tqdm(total=len(t_type_list), desc="Processing T-types", position=0) as progress:
for t_type in t_type_list:
try:
p_map_subset = p_map.loc[t_type]
me_type_results = process_t_type(t_type, t_type_paths[t_type], p_map_subset, header['sizes'])
if me_type_results:
save_me_type_results(me_type_results, output_path)
except Exception as e:
print(f"Error processing T-type {t_type}: {e}")
progress.update(1)
for met_type in tqdm(met_types_batch, desc="Processing MET-types"):
try:
data = process_met_type(met_type, p_map_subset[met_type], t_type_paths, sizes)
if data is not None:
save_met_type_result(met_type, data, output_path)
except Exception as e:
print(f"Error processing batch for {met_type}: {e}")

# Main function
def main():
parser = argparse.ArgumentParser(description="Process T-types for ME-types generation.")
parser.add_argument("--start", type=int, required=True, help="Start index of T-types to process")
parser.add_argument("--end", type=int, required=True, help="End index of T-types to process")
parser = argparse.ArgumentParser()
parser.add_argument("--start", type=int, required=True, help="Start index of T-types")
parser.add_argument("--end", type=int, required=True, help="End index of T-types")
parser.add_argument(
"--workers", type=int, default=1, help="Number of worker processes to use"
)
args = parser.parse_args()

# Load p_map and setup output
# p_map = load_p_map(PATH_TO_P_MAP, max_me_types=50)
p_map = load_p_map(PATH_TO_P_MAP, max_me_types=None)
print(f"Processing T-types from index {args.start} to {args.end}...")

# Ensure indices are valid
t_type_files = sorted(glob.glob(f"{PATH_TO_T_TYPES_NRRDS}/*.nrrd"))
if args.start < 0 or args.end >= len(t_type_files):
raise ValueError(f"Invalid range: {args.start} to {args.end}")

t_type_batch = t_type_files[args.start:args.end + 1]
print(f"Loaded {len(t_type_batch)} T-types for processing.")
t_type_batch_list = [f.split("/")[-1].replace(".nrrd", "") for f in t_type_batch]

# Load P-map
p_map = load_p_map(PATH_TO_P_MAP)
setup_output_path(OUTPUT_PATH)

# Load T-type paths
t_type_files = [f for f in os.listdir(PATH_TO_T_TYPES_NRRDS) if f.endswith(".nrrd")]
t_type_list = sorted([f.replace(".nrrd", "") for f in t_type_files])
t_type_paths = {t_type: os.path.join(PATH_TO_T_TYPES_NRRDS, f"{t_type}.nrrd") for t_type in t_type_list}

# Filter T-types to process in this batch
batch_t_types = t_type_list[args.start:args.end]
if not batch_t_types:
print(f"No T-types to process in range {args.start}-{args.end}.")
# Get MET-types and filter range
met_types = list(p_map.columns)
print("all met-types :", len(met_types))
print(t_type_batch_list[:10])
print(met_types[:10])
met_types_batch = [met_type for met_type in met_types if met_type.split("|")[2].replace("-","_") in t_type_batch_list] #met_types[args.start:args.end]
print("batch size :", len(met_types_batch))
if not met_types_batch:
print(f"No MET-types to process in range {args.start}-{args.end}.")
return

# Get header from the first T-type
init_nrrd, header = nrrd.read(t_type_paths[batch_t_types[0]])
# Get header from a T-type
init_nrrd, header = nrrd.read(t_type_paths[t_type_list[0]])

# Process the batch
process_batch(batch_t_types, t_type_paths, p_map, OUTPUT_PATH, header)
# Process MET-types in parallel
batch_size = max(1, len(met_types_batch) // args.workers)
with ProcessPoolExecutor(max_workers=args.workers) as executor:
futures = []
for i in range(0, len(met_types_batch), batch_size):
batch = met_types_batch[i:i + batch_size]
futures.append(executor.submit(process_batch, batch, p_map, t_type_paths, OUTPUT_PATH, header["sizes"]))

for future in as_completed(futures):
try:
future.result()
except Exception as e:
print(f"Error in parallel execution: {e}")

if __name__ == "__main__":
main()

14 changes: 10 additions & 4 deletions atlas_densities/app/process_t_types_job.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,17 @@ fi

# Calculate start and end indices for this job
START=$((SLURM_ARRAY_TASK_ID * BATCH_SIZE))
END=$((START + BATCH_SIZE))
if [ $END -gt $TOTAL_T_TYPES ]; then
END=$TOTAL_T_TYPES
END=$((START + BATCH_SIZE - 1))
if [ $END -ge $TOTAL_T_TYPES ]; then
END=$((TOTAL_T_TYPES - 1)) # Adjust for the last batch
fi

# Ensure there is at least one T-type to process
if [ $START -gt $END ]; then
echo "No T-types to process for task ID $SLURM_ARRAY_TASK_ID."
exit 0
fi

# Run the Python script
echo "Processing T-types from index $START to $END..."
python convert_t_type_nrrds_to_me_type_nrrds.py --start $START --end $END
python convert_t_type_nrrds_to_me_type_nrrds.py --start $START --end $END --workers 4

0 comments on commit 7b7fddf

Please sign in to comment.