From aacdf7dc4b30297840047a398799146075a4678d Mon Sep 17 00:00:00 2001 From: olapuentesantana Date: Thu, 5 Sep 2024 15:40:34 +0200 Subject: [PATCH 01/10] set parameter to indicate whether slides are TCGA --- run_pipeline.sh | 3 +- run_scripts/1_extract_histopatho_features.sh | 30 ++++++++++++-------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/run_pipeline.sh b/run_pipeline.sh index 6e90915..22dfa5e 100644 --- a/run_pipeline.sh +++ b/run_pipeline.sh @@ -33,6 +33,7 @@ slide_type="FF" tumor_purity_threshold=80 class_names="SKCM_T" model_name="inception_v4" +is_tcga=false echo "Create output directory: ${output_dir}..." mkdir -p ${output_dir} @@ -50,7 +51,7 @@ apptainer exec \ --cleanenv \ -c \ ${spotlight_sif} \ - bash "/project/run_scripts/1_extract_histopatho_features.sh" ${checkpoint} ${clinical_files_dir} ${slide_type} ${class_names} ${tumor_purity_threshold} ${model_name} + bash "/project/run_scripts/1_extract_histopatho_features.sh" ${checkpoint} ${clinical_files_dir} ${slide_type} ${class_names} ${tumor_purity_threshold} ${model_name} ${is_tcga} echo "Tile level cell type quanitification (2 out of 3)" apptainer exec \ diff --git a/run_scripts/1_extract_histopatho_features.sh b/run_scripts/1_extract_histopatho_features.sh index f8c1a27..875ee7d 100755 --- a/run_scripts/1_extract_histopatho_features.sh +++ b/run_scripts/1_extract_histopatho_features.sh @@ -39,22 +39,28 @@ slide_type=$3 class_names=$4 tumor_purity_threshold=$5 model_name=$6 +is_tcga=$7 # true or false # ---------------------------------- # # ---- create new clinical file ---- # # ---------------------------------- # -python $repo_dir/Python/1_extract_histopathological_features/myslim/create_clinical_file.py \ - --class_names $class_names \ - --clinical_files_dir $clinical_files_dir \ - --tumor_purity_threshold $tumor_purity_threshold \ - --output_dir $output_dir/1_histopathological_features \ - --path_codebook ${path_codebook} - -clinical_file=$output_dir/1_histopathological_features/generated_clinical_file.txt - -ls $slides_dir | tee ${output_dir}/list_images.txt -awk -v a=81 -v b="${class_names}" -v c=41 'FNR==NR{print; next}{split($1, tmp, "."); OFS="\t"; print tmp[1], tmp[1], $1, a, b, c}' $clinical_file ${output_dir}/list_images.txt > $output_dir/1_histopathological_features/final_clinical_file.txt +if [ "$is_tcga" = true ] +then + python $repo_dir/Python/1_extract_histopathological_features/myslim/create_clinical_file.py \ + --class_names $class_names \ + --clinical_files_dir $clinical_files_dir \ + --tumor_purity_threshold $tumor_purity_threshold \ + --output_dir $output_dir/1_histopathological_features \ + --path_codebook ${path_codebook} + + clinical_file=$output_dir/1_histopathological_features/generated_clinical_file.txt +else + clinical_file=$output_dir/1_histopathological_features/tmp_clinical_file.txt + ls $slides_dir | tee ${output_dir}/list_images.txt + awk -v a=81 -v b="${class_names}" -v c=41 'FNR==NR{print; next}{split($1, tmp, "."); OFS="\t"; print tmp[1], tmp[1], $1, a, b, c}' $clinical_file ${output_dir}/list_images.txt > $output_dir/1_histopathological_features/final_clinical_file.txt + clinical_file=$output_dir/1_histopathological_features/final_clinical_file.txt +fi # --------------------------------------------------------- # # ---- image tiling and image conversion to TF records ---- # @@ -63,7 +69,7 @@ awk -v a=81 -v b="${class_names}" -v c=41 'FNR==NR{print; next}{split($1, tmp, " python $repo_dir/Python/1_extract_histopathological_features/pre_processing.py \ --slides_folder $slides_dir \ --output_folder $output_dir/1_histopathological_features \ - --clinical_file_path $output_dir/1_histopathological_features/final_clinical_file.txt + --clinical_file_path $clinical_file # ------------------------------------------------------ # # ---- Compute predictions and bottlenecks features ---- # From 67bd161b2fcdbb2290c67d73f5ab926981b39afb Mon Sep 17 00:00:00 2001 From: olapuentesantana Date: Mon, 9 Sep 2024 11:46:46 +0200 Subject: [PATCH 02/10] working for non-TCGA datasets --- .gitignore | 1 + .../myslim/create_tiles_from_slides.py | 2 +- .../pre_processing.py | 8 +++----- data/tmp_clinical_file.txt | 1 + run_scripts/1_extract_histopatho_features.sh | 4 ++-- run_scripts/2_tile_level_cell_type_quantification.sh | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) create mode 100644 data/tmp_clinical_file.txt diff --git a/.gitignore b/.gitignore index 02a6e52..816a037 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,4 @@ spatial_features_matrix_TCGA.csv TEST-* clincial_file*.tsv data/clinical_file_TCGA_SKCM.tsv +*.tar.gz diff --git a/Python/1_extract_histopathological_features/myslim/create_tiles_from_slides.py b/Python/1_extract_histopathological_features/myslim/create_tiles_from_slides.py index 2e476df..eb0e0bc 100644 --- a/Python/1_extract_histopathological_features/myslim/create_tiles_from_slides.py +++ b/Python/1_extract_histopathological_features/myslim/create_tiles_from_slides.py @@ -52,7 +52,7 @@ def create_tiles_from_slides(slides_folder, output_folder, clinical_file_path): print(len(images_for_tiling), 'images available:') counter=1 for slide_filename in images_for_tiling: - if slide_filename.endswith(('.svs','.ndpi')): + if slide_filename.endswith(('.svs','.ndpi', '.tiff', '.tif')): print(counter, ':', slide_filename) slide = OpenSlide("{}/{}".format(slides_folder, slide_filename)) slide_name = slide_filename.split(".")[0] diff --git a/Python/1_extract_histopathological_features/pre_processing.py b/Python/1_extract_histopathological_features/pre_processing.py index 06e5078..bb1ffbd 100755 --- a/Python/1_extract_histopathological_features/pre_processing.py +++ b/Python/1_extract_histopathological_features/pre_processing.py @@ -7,10 +7,6 @@ from myslim.create_tiles_from_slides import create_tiles_from_slides from myslim.datasets.convert import _convert_dataset -# sys.path.append(f"{os.path.dirname(os.getcwd())}/Python/libs") -# REPO_DIR = os.path.dirname(os.getcwd()) - - def execute_preprocessing(slides_folder, output_folder, clinical_file_path, N_shards=320): """ Execute several pre-processing steps necessary for extracting the histopathological features @@ -39,7 +35,9 @@ def execute_preprocessing(slides_folder, output_folder, clinical_file_path, N_sh # Perform image tiling, only kept images of interest create_tiles_from_slides(slides_folder=slides_folder, - output_folder=output_folder, clinical_file_path=clinical_file_path) + output_folder=output_folder, + clinical_file_path=clinical_file_path + ) # File required for training format_tile_data_structure( diff --git a/data/tmp_clinical_file.txt b/data/tmp_clinical_file.txt new file mode 100644 index 0000000..76aa7ff --- /dev/null +++ b/data/tmp_clinical_file.txt @@ -0,0 +1 @@ +slide_submitter_id sample_submitter_id image_file_name percent_tumor_cells class_name class_id diff --git a/run_scripts/1_extract_histopatho_features.sh b/run_scripts/1_extract_histopatho_features.sh index 875ee7d..8c7398c 100755 --- a/run_scripts/1_extract_histopatho_features.sh +++ b/run_scripts/1_extract_histopatho_features.sh @@ -53,10 +53,10 @@ then --tumor_purity_threshold $tumor_purity_threshold \ --output_dir $output_dir/1_histopathological_features \ --path_codebook ${path_codebook} - + clinical_file=$output_dir/1_histopathological_features/generated_clinical_file.txt else - clinical_file=$output_dir/1_histopathological_features/tmp_clinical_file.txt + clinical_file=${repo_dir}/data/tmp_clinical_file.txt ls $slides_dir | tee ${output_dir}/list_images.txt awk -v a=81 -v b="${class_names}" -v c=41 'FNR==NR{print; next}{split($1, tmp, "."); OFS="\t"; print tmp[1], tmp[1], $1, a, b, c}' $clinical_file ${output_dir}/list_images.txt > $output_dir/1_histopathological_features/final_clinical_file.txt clinical_file=$output_dir/1_histopathological_features/final_clinical_file.txt diff --git a/run_scripts/2_tile_level_cell_type_quantification.sh b/run_scripts/2_tile_level_cell_type_quantification.sh index 989d19c..3ca9e9a 100644 --- a/run_scripts/2_tile_level_cell_type_quantification.sh +++ b/run_scripts/2_tile_level_cell_type_quantification.sh @@ -9,7 +9,7 @@ # ----------------------------------- # # General setup -repo_dir=project +repo_dir="/project" # command line arguments echo "Slide type: $1"; From 613ec87df670cb0825da60401190fd831bec7871 Mon Sep 17 00:00:00 2001 From: olapuentesantana <42207933+olapuentesantana@users.noreply.github.com> Date: Mon, 9 Sep 2024 11:49:19 +0200 Subject: [PATCH 03/10] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 27c6f20..723edbc 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,9 @@ tumor_purity_threshold=80 class_names="SKCM_T" model_name="inception_v4" +# Indicate whether the slides arise from the TCGA project +is_tcga=false + ```` ## Output documentation From d9ef779678dd35f0a422a11319ee7b5748f7340c Mon Sep 17 00:00:00 2001 From: olapuentesantana <42207933+olapuentesantana@users.noreply.github.com> Date: Mon, 9 Sep 2024 11:52:09 +0200 Subject: [PATCH 04/10] Update README.md --- README.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/README.md b/README.md index 723edbc..7da3705 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,35 @@ model_name="inception_v4" # Indicate whether the slides arise from the TCGA project is_tcga=false +```` +6. Run the pipeline by executing `run_pipeline.sh` + +```` +export APPTAINER_BINDPATH=${work_dir}/data/:/project/data:ro,${folder_images}:/project/images:ro,${output_dir}:/project/output:rw,${work_dir}/run_scripts:/project/run_scripts:ro,${work_dir}/Python:/project/Python:ro + +echo "Run pipeline..." +echo "Extract histopathological features (1 out of 3)" +apptainer exec \ + --cleanenv \ + -c \ + ${spotlight_sif} \ + bash "/project/run_scripts/1_extract_histopatho_features.sh" ${checkpoint} ${clinical_files_dir} ${slide_type} ${class_names} ${tumor_purity_threshold} ${model_name} ${is_tcga} + +echo "Tile level cell type quanitification (2 out of 3)" +apptainer exec \ + --cleanenv \ + -c \ + ${spotlight_sif} \ + bash "/project/run_scripts/2_tile_level_cell_type_quantification.sh" $slide_type + +echo "Compute spatial features (3 out of 3)" +apptainer exec \ + --cleanenv \ + -c \ + ${spotlight_sif} \ + bash "/project/run_scripts/3_compute_spatial_features.sh" ${slide_type} + +echo "COMPLETED!" ```` ## Output documentation From b7da2d34d6751de86cefa236724c9af35d764dae Mon Sep 17 00:00:00 2001 From: olapuentesantana <42207933+olapuentesantana@users.noreply.github.com> Date: Fri, 13 Sep 2024 11:47:35 +0200 Subject: [PATCH 05/10] Update README.md --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7da3705..4bd3467 100644 --- a/README.md +++ b/README.md @@ -31,12 +31,12 @@ docker save run_spotlight:vfinal -o {output_dir}/spotlight_docker.tar.gz apptainer build {output_dir}/spotlight_apptainer.sif docker-archive:spotlight_docker.tar.gz ``` -2. Add your FF histopathology slides to a subdirectory in the `spotlight_docker` directory, e.g. `data_example/images` +2. Add your FF histopathology slides to a subdirectory in the `spotlight_docker` directory, e.g. `data_example/images`. * Please rename your images file names, so they only include "-", to follow the same sample coding used by the TCGA. 3. Download retrained models to extract the histopathological features, available from Fu et al., Nat Cancer, 2020 ([Retrained_Inception_v4](https://www.ebi.ac.uk/biostudies/bioimages/studies/S-BSST292)). Once you unzip the folder, extract the files to the `data/checkpoint/Retrained_Inception_v4/` folder. -4. If a TCGA dataset is used, please download metadata (i.e. "biospecimen -> TSV", unzip and keep slide.tsv), then rename `slide.tsv` to `clinical_file_TCGA_{cancer_type_abbrev}` such as `clinical_file_TCGA_SKCM.tsv` and copy to `/data`. Example dataset TCGA-SKCM can be downloaded [here](https://portal.gdc.cancer.gov/projects/TCGA-SKCM) +4. If a TCGA dataset is used, please download metadata (i.e. "biospecimen -> TSV", unzip and keep slide.tsv), then rename `slide.tsv` to `clinical_file_TCGA_{cancer_type_abbrev}` such as `clinical_file_TCGA_SKCM.tsv` and copy to `/data`. Example dataset TCGA-SKCM can be downloaded [here](https://portal.gdc.cancer.gov/projects/TCGA-SKCM). For non-TCGA datasets, please omit this step. 5. Setup your paths and variables in `run_pipeline.sh` @@ -55,7 +55,8 @@ output_dir="/path/to/output_dir" # Relative to docker, i.e. start with /data checkpoint="/data/checkpoint/Retrained_Inception_v4/model.ckpt-100000" -clinical_files_dir="/data/path/to/clinical/TCGA/file.tsv" +clinical_files_dir="/data/path/to/clinical/TCGA/file.tsv" # TCGA files +clinical_files_dir=NULL # Non-TCGA files # Remaining parameters (this configuration has been tested) slide_type="FF" From f8387bde4599f941e31d22f5a99aaa70d4b0a5ec Mon Sep 17 00:00:00 2001 From: olapuentesantana <42207933+olapuentesantana@users.noreply.github.com> Date: Fri, 13 Sep 2024 11:53:43 +0200 Subject: [PATCH 06/10] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4bd3467..169d8dd 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,9 @@ output_dir="/path/to/output_dir" # Relative to docker, i.e. start with /data checkpoint="/data/checkpoint/Retrained_Inception_v4/model.ckpt-100000" -clinical_files_dir="/data/path/to/clinical/TCGA/file.tsv" # TCGA files +# TCGA H&E images +# clinical_files_dir="/data/path/to/clinical/TCGA/file.tsv" +# Non-TCGA H&E images clinical_files_dir=NULL # Non-TCGA files # Remaining parameters (this configuration has been tested) From b96e0e8ffc5b29dc5a7e0bbfbdb636c225bc2ac5 Mon Sep 17 00:00:00 2001 From: olapuentesantana <42207933+olapuentesantana@users.noreply.github.com> Date: Fri, 13 Sep 2024 12:01:32 +0200 Subject: [PATCH 07/10] Update README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 169d8dd..cde4613 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,10 @@ is_tcga=false 6. Run the pipeline by executing `run_pipeline.sh` ```` +echo "Create output directory: ${output_dir}..." +mkdir -p ${output_dir} + +echo "Binding directories..." export APPTAINER_BINDPATH=${work_dir}/data/:/project/data:ro,${folder_images}:/project/images:ro,${output_dir}:/project/output:rw,${work_dir}/run_scripts:/project/run_scripts:ro,${work_dir}/Python:/project/Python:ro echo "Run pipeline..." From f93568f4f9f8cf273f45a6cc8c99aad46bf6e0c6 Mon Sep 17 00:00:00 2001 From: joan-yanqiong Date: Fri, 13 Sep 2024 08:52:14 -0400 Subject: [PATCH 08/10] swap OpenSlide with tiffslide --- .gitignore | 3 + .../myslim/create_file_info_train.py | 23 ++--- .../create_list_avail_img_for_tiling.py | 57 ++++++++++++ .../myslim/create_tiles_from_slides.py | 87 ++++++++++--------- .../inception_preprocessing_dataAug.py | 2 +- env_requirements.txt | 2 +- 6 files changed, 119 insertions(+), 55 deletions(-) create mode 100644 Python/1_extract_histopathological_features/myslim/create_list_avail_img_for_tiling.py diff --git a/.gitignore b/.gitignore index 816a037..4e0d86b 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,6 @@ TEST-* clincial_file*.tsv data/clinical_file_TCGA_SKCM.tsv *.tar.gz + +# Skip data files +output_* diff --git a/Python/1_extract_histopathological_features/myslim/create_file_info_train.py b/Python/1_extract_histopathological_features/myslim/create_file_info_train.py index 9429dbf..2cdc0cd 100644 --- a/Python/1_extract_histopathological_features/myslim/create_file_info_train.py +++ b/Python/1_extract_histopathological_features/myslim/create_file_info_train.py @@ -1,3 +1,5 @@ +import tiffslide as openslide +import DL.utils as utils import argparse import os import os.path @@ -8,10 +10,9 @@ REPO_DIR = os.path.dirname(os.getcwd()) # trunk-ignore(flake8/E402) -import DL.utils as utils # trunk-ignore(flake8/E402) -from openslide import OpenSlide + def format_tile_data_structure(slides_folder, output_folder, clinical_file_path): """ @@ -53,16 +54,16 @@ def format_tile_data_structure(slides_folder, output_folder, clinical_file_path) jpg_tiles_df = pd.merge( jpg_tile_names_df, clinical_file, on=["slide_submitter_id"], how="left" ) - # 4) Determine jpeg_quality of slides slide_quality = [] for slide_name in jpg_tiles_df.image_file_name.unique(): print("{}/{}".format(slides_folder, slide_name)) - img = OpenSlide("{}/{}".format(slides_folder, slide_name)) - #print(img.properties.values) - #image_description = img.properties.values.__self__.get("tiff.ImageDescription").split("|")[0] - #image_description_split = image_description.split(" ") - #jpeg_quality = image_description_split[-1] + # img = openslide.OpenSlide("{}/{}".format(slides_folder, slide_name)) + # TODO have to generalize this for TCGA and non-TCGA cases. + # print(img.properties.values) + # image_description = img.properties.values.__self__.get("tiff.ImageDescription").split("|")[0] + # image_description_split = image_description.split(" ") + # jpeg_quality = image_description_split[-1] jpeg_quality = "80" slide_quality.append([slide_name, "RGB" + jpeg_quality]) @@ -76,9 +77,11 @@ def format_tile_data_structure(slides_folder, output_folder, clinical_file_path) # Create output dataframe output = jpg_tiles_df[ - ["tile_path", "class_name", "class_id", "jpeg_quality", "percent_tumor_cells"] + ["tile_path", "class_name", "class_id", + "jpeg_quality", "percent_tumor_cells"] ] - output.to_csv(output_folder + "/file_info_train.txt", index=False, sep="\t") + output.to_csv(output_folder + "/file_info_train.txt", + index=False, sep="\t") print("Finished creating the necessary file for computing the features in the next step") diff --git a/Python/1_extract_histopathological_features/myslim/create_list_avail_img_for_tiling.py b/Python/1_extract_histopathological_features/myslim/create_list_avail_img_for_tiling.py new file mode 100644 index 0000000..888b229 --- /dev/null +++ b/Python/1_extract_histopathological_features/myslim/create_list_avail_img_for_tiling.py @@ -0,0 +1,57 @@ +#!/usr/bin/python +import argparse +import DL.image as im +import tiffslide as openslide +import os +import sys + +import numpy as np +import pandas as pd +from PIL import Image + + +def create_list_avail_img_for_tiling(slides_folder, clinical_file_path, output_folder): + """ + Create tiles from slides + Dividing the whole slide images into tiles with a size of 512 x 512 pixels, with an overlap of 50 pixels at a magnification of 20x. In addition, remove blurred and non-informative tiles by using the weighted gradient magnitude. + + Source: + Fu, Y., Jung, A. W., Torne, R. V., Gonzalez, S., Vöhringer, H., Shmatko, A., Yates, L. R., Jimenez-Linan, M., Moore, L., & Gerstung, M. (2020). Pan-cancer computational histopathology reveals mutations, tumor composition and prognosis. Nature Cancer, 1(8), 800–810. https://doi.org/10.1038/s43018-020-0085-8 + + Args: + slides_folder (str): path pointing to folder with all whole slide images (.svs files) + clinical_file_path (str): path pointing to file with clinical file + + Returns: + txt containing list of slides available for tiling + """ + + if not os.path.exists(output_folder): + os.makedirs(output_folder) + + # Subset images of interest (present in generated clinical file) + clinical_file = pd.read_csv(clinical_file_path, sep="\t") + clinical_file.dropna(how="all", inplace=True) + clinical_file.drop_duplicates(inplace=True) + clinical_file.drop_duplicates(subset="slide_submitter_id", inplace=True) + subset_images = clinical_file.image_file_name.tolist() + print(subset_images) + + # Check if slides are among our data + available_images = os.listdir(slides_folder) + images_for_tiling = list(set(subset_images) & set(available_images)) + + pd.DataFrame([[name.split(".")[0], name] for name in images_for_tiling], columns=["slide_id", "slide_filename"]).to_csv( + (f"{output_folder}/avail_slides_for_img.csv"), index=False) + + print("Generated list of available images for tiling...") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--slides_folder", help="Set slides folder") + parser.add_argument("--output_folder", help="Set output folder") + parser.add_argument("--clinical_file_path", help="Set clinical file path") + args = parser.parse_args() + create_list_avail_img_for_tiling(slides_folder=args.slides_folder, + output_folder=args.output_folder, clinical_file_path=args.clinical_file_path) diff --git a/Python/1_extract_histopathological_features/myslim/create_tiles_from_slides.py b/Python/1_extract_histopathological_features/myslim/create_tiles_from_slides.py index eb0e0bc..c3f67ca 100644 --- a/Python/1_extract_histopathological_features/myslim/create_tiles_from_slides.py +++ b/Python/1_extract_histopathological_features/myslim/create_tiles_from_slides.py @@ -1,4 +1,5 @@ #!/usr/bin/python +import tiffslide as openslide import os import sys @@ -9,12 +10,9 @@ sys.path.append(f"{os.path.dirname(os.getcwd())}/Python/libs") REPO_DIR = os.path.dirname(os.getcwd()) -# trunk-ignore(flake8/E402) -from openslide import OpenSlide - -# trunk-ignore(flake8/E402) import DL.image as im + def create_tiles_from_slides(slides_folder, output_folder, clinical_file_path): """ Create tiles from slides @@ -43,52 +41,55 @@ def create_tiles_from_slides(slides_folder, output_folder, clinical_file_path): clinical_file.dropna(how="all", inplace=True) clinical_file.drop_duplicates(inplace=True) clinical_file.drop_duplicates(subset="slide_submitter_id", inplace=True) - subset_images=clinical_file.image_file_name.tolist() + subset_images = clinical_file.image_file_name.tolist() print(subset_images) # Check if slides are among our data - available_images=os.listdir(slides_folder) - images_for_tiling=list(set(subset_images) & set(available_images)) + available_images = os.listdir(slides_folder) + images_for_tiling = list(set(subset_images) & set(available_images)) print(len(images_for_tiling), 'images available:') - counter=1 + counter = 1 for slide_filename in images_for_tiling: - if slide_filename.endswith(('.svs','.ndpi', '.tiff', '.tif')): - print(counter, ':', slide_filename) - slide = OpenSlide("{}/{}".format(slides_folder, slide_filename)) - slide_name = slide_filename.split(".")[0] - if ( - str(slide.properties.values.__self__.get("tiff.ImageDescription")).find( - "AppMag = 40" + # Accept different file types + if slide_filename.endswith(('.svs', '.ndpi', '.tif')): + print(counter, ':', slide_filename) + slide = openslide.OpenSlide( + "{}/{}".format(slides_folder, slide_filename)) + slide_name = slide_filename.split(".")[0] + if ( + str(slide.properties["tiff.ImageDescription"]).find( + "AppMag = 40" + ) + != -1 + ): + region_size = 1024 + tile_size = 924 + else: + region_size = 512 + tile_size = 462 + [width, height] = slide.dimensions + for x_coord in range(1, width, tile_size): + for y_coord in range(1, height, tile_size): + slide_region = slide.read_region( + location=(x_coord, y_coord), + level=0, + size=(region_size, region_size), ) - != -1 - ): - region_size = 1024 - tile_size = 924 - else: - region_size = 512 - tile_size = 462 - [width, height] = slide.dimensions - for x_coord in range(1, width, tile_size): - for y_coord in range(1, height, tile_size): - slide_region = slide.read_region( - location=(x_coord, y_coord), - level=0, - size=(region_size, region_size), + slide_region_converted = slide_region.convert("RGB") + tile = slide_region_converted.resize( + (512, 512), Image.ANTIALIAS) + grad = im.getGradientMagnitude(np.array(tile)) + unique, counts = np.unique(grad, return_counts=True) + if counts[np.argwhere(unique <= 20)].sum() < 512 * 512 * 0.6: + tile.save( + "{}/{}_{}_{}.jpg".format( + tiles_folder, slide_name, x_coord, y_coord + ), + "JPEG", + optimize=True, + quality=94, ) - slide_region_converted = slide_region.convert("RGB") - tile = slide_region_converted.resize((512, 512), Image.ANTIALIAS) - grad = im.getGradientMagnitude(np.array(tile)) - unique, counts = np.unique(grad, return_counts=True) - if counts[np.argwhere(unique <= 20)].sum() < 512 * 512 * 0.6: - tile.save( - "{}/{}_{}_{}.jpg".format( - tiles_folder, slide_name, x_coord, y_coord - ), - "JPEG", - optimize=True, - quality=94, - ) - counter=counter+1 + counter = counter + 1 print("Finished creating tiles from the given slides") diff --git a/Python/1_extract_histopathological_features/myslim/preprocessing/inception_preprocessing_dataAug.py b/Python/1_extract_histopathological_features/myslim/preprocessing/inception_preprocessing_dataAug.py index f6310fb..68a517d 100644 --- a/Python/1_extract_histopathological_features/myslim/preprocessing/inception_preprocessing_dataAug.py +++ b/Python/1_extract_histopathological_features/myslim/preprocessing/inception_preprocessing_dataAug.py @@ -246,7 +246,7 @@ def preprocess_for_train(image, height, width, bbox, tf.expand_dims(rotated_image, 0)) # crop image in the center - #rotated_image = tf.image.central_crop(rotated_image, 0.6) + # rotated_image = tf.image.central_crop(rotated_image, 0.6) if add_image_summaries: tf.summary.image('5_centralcropped_image', diff --git a/env_requirements.txt b/env_requirements.txt index 1bc1cb1..0221518 100644 --- a/env_requirements.txt +++ b/env_requirements.txt @@ -8,7 +8,7 @@ six==1.16.0 tensorflow==2.11.0 tf_slim==1.1.0 opencv-python==4.6.0.66 -openslide-python==1.2.0 +tiffslide==2.4.0 tornado==6.2 scikit-learn==1.2.0 dask==2022.12.1 From a88a9ab31553be061f9866e02ce38e85da593224 Mon Sep 17 00:00:00 2001 From: joan-yanqiong Date: Mon, 23 Sep 2024 23:31:33 -0400 Subject: [PATCH 09/10] clean-up + implementation of nextflow pipeline --- .../myslim/__init__.py => .gitattributes | 0 .github/.dockstore.yml | 6 + .github/CONTRIBUTING.md | 125 +++ .github/ISSUE_TEMPLATE/bug_report.yml | 50 ++ .github/ISSUE_TEMPLATE/config.yml | 7 + .github/ISSUE_TEMPLATE/feature_request.yml | 11 + .github/PULL_REQUEST_TEMPLATE.md | 26 + .github/workflows/awsfulltest.yml | 39 + .github/workflows/awstest.yml | 33 + .github/workflows/branch.yml | 44 + .github/workflows/ci.yml | 46 + .github/workflows/clean-up.yml | 24 + .github/workflows/download_pipeline.yml | 86 ++ .github/workflows/fix-linting.yml | 89 ++ .github/workflows/linting.yml | 68 ++ .github/workflows/linting_comment.yml | 28 + .github/workflows/release-announcements.yml | 75 ++ .gitignore | 32 +- .../__init__.py => CITATIONS.md | 0 Dockerfile | 28 +- .../codebook.txt | 42 - .../myslim/create_clinical_file.py | 154 ---- .../myslim/create_file_info_train.py | 100 --- .../myslim/create_tiles_from_slides.py | 98 -- .../myslim/post_process_features.py | 95 -- .../myslim/post_process_predictions.py | 215 ----- .../myslim/python_test.py | 7 - .../post_processing.py | 49 - .../pre_processing.py | 83 -- Python/2_train_multitask_models/checks.ipynb | 841 ------------------ .../processing_transcriptomics.py | 243 ----- .../run_TF_pipeline.py | 220 ----- .../compute_clustering_features.py | 262 ------ .../compute_network_features.py | 175 ---- .../computing_features.py | 681 -------------- README.md | 159 ++-- Python/libs/DL/__init__.py => assets/NO_FILE | 0 .../TF_models/SKCM_FF/CAFs/outer_models.pkl | Bin .../TF_models/SKCM_FF/CAFs/x_train_scaler.pkl | Bin .../SKCM_FF/T_cells/outer_models.pkl | Bin .../SKCM_FF/T_cells/x_train_scaler.pkl | Bin .../endothelial_cells/outer_models.pkl | Bin .../endothelial_cells/x_train_scaler.pkl | Bin .../SKCM_FF/tumor_purity/outer_models.pkl | Bin .../SKCM_FF/tumor_purity/x_train_scaler.pkl | Bin assets/adaptivecard.json | 67 ++ .../cell_types.txt | 0 assets/email_template.html | 53 ++ assets/email_template.txt | 39 + assets/methods_description_template.yml | 29 + assets/multiqc_config.yml | 15 + assets/samplesheet.csv | 3 + assets/schema_input.json | 33 + assets/sendmail_template.txt | 53 ++ assets/slackreport.json | 34 + .../task_selection_names.pkl | Bin .../tissue_classes.csv | 0 {data => assets}/tmp_clinical_file.txt | 0 .../myslim => bin}/bottleneck_predict.py | 28 +- bin/clustering_schc_individual.py | 143 +++ bin/clustering_schc_simultaneous.py | 138 +++ bin/combine_all_spatial_features.py | 95 ++ bin/combine_clustering_features.py | 76 ++ bin/combine_network_features.py | 86 ++ bin/compute_colocalization.py | 155 ++++ bin/compute_connectedness.py | 153 ++++ bin/compute_frac_high.py | 78 ++ bin/compute_nclusters.py | 115 +++ bin/compute_node_degree_with_es.py | 197 ++++ bin/compute_nshortest_with_max_length.py | 191 ++++ ...mpute_proximity_from_indiv_schc_between.py | 148 +++ ...mpute_proximity_from_indiv_schc_combine.py | 78 ++ ...ompute_proximity_from_indiv_schc_within.py | 159 ++++ ...ompute_proximity_from_simultaneous_schc.py | 150 ++++ bin/create_clinical_file.py | 217 +++++ .../create_list_avail_img_for_tiling.py | 65 +- bin/create_tiles_from_slides.py | 118 +++ bin/format_tile_data_structure.py | 130 +++ bin/generate_graphs.py | 98 ++ bin/post_process_features.py | 132 +++ bin/post_process_predictions.py | 242 +++++ bin/pre_processing.py | 98 ++ .../tile_level_cell_type_quantification.py | 117 ++- conf/base.config | 139 +++ conf/igenomes.config | 440 +++++++++ conf/modules.config | 120 +++ conf/test.config | 29 + conf/test_full.config | 24 + custom.config | 114 +++ docs/README.md | 10 + docs/images/mqc_fastqc_adapter.png | Bin 0 -> 23458 bytes docs/images/mqc_fastqc_counts.png | Bin 0 -> 33918 bytes docs/images/mqc_fastqc_quality.png | Bin 0 -> 55769 bytes docs/output.md | 71 ++ docs/usage.md | 226 +++++ {Python/libs/MFP => lib/DL}/__init__.py | 0 {Python/libs => lib}/DL/image.py | 0 {Python/libs => lib}/DL/utils.py | 0 .../MFP/portraits => lib/MFP}/__init__.py | 0 {Python/libs => lib}/MFP/license.md | 0 .../MFP/portraits}/__init__.py | 0 {Python/libs => lib}/MFP/portraits/utils.py | 0 .../MFP/signatures/gene_signatures.gmt | 0 .../MFP/signatures/gene_signatures_order.tsv | 0 .../libs/model => lib/features}/__init__.py | 0 {Python/libs => lib}/features/clustering.py | 0 {Python/libs => lib}/features/features.py | 8 +- {Python/libs => lib}/features/graphs.py | 0 lib/features/lcc.py | 93 ++ lib/features/tests/test_determine_lcc.py | 9 + {Python/libs => lib}/features/utils.py | 0 {Python/libs => lib}/features/vis.py | 0 lib/model/__init__.py | 0 {Python/libs => lib}/model/constants.py | 4 +- {Python/libs => lib}/model/evaluate.py | 0 {Python/libs => lib}/model/preprocessing.py | 0 {Python/libs => lib}/model/utils.py | 0 lib/myslim/__init__.py | 0 lib/myslim/bottleneck_predict.py | 124 +++ .../myslim/datasets/__init__.py | 0 .../myslim/datasets/convert.py | 0 .../myslim/datasets/dataset_factory.py | 0 .../myslim/datasets/dataset_utils.py | 0 .../myslim/datasets/tumors_all.py | 0 .../myslim/deployment/__init__.py | 0 .../myslim/deployment/model_deploy.py | 0 .../myslim/eval_image_classifier.py | 0 .../myslim/nets/__init__.py | 0 .../myslim/nets/inception.py | 0 .../myslim/nets/inception_alt.py | 0 .../myslim/nets/inception_utils.py | 0 .../myslim/nets/inception_v4.py | 0 .../myslim/nets/inception_v4_alt.py | 0 .../myslim/nets/nets_factory.py | 0 .../myslim/nets/overfeat.py | 0 lib/myslim/post_process_features.py | 134 +++ lib/myslim/post_process_predictions.py | 240 +++++ .../myslim/preprocessing/__init__.py | 0 .../preprocessing/inception_preprocessing.py | 0 .../inception_preprocessing_dataAug.py | 0 .../preprocessing/preprocessing_factory.py | 0 .../myslim/run/bottleneck_predict.sh | 0 .../myslim/run/convert.sh | 0 .../myslim/run/eval.sh | 0 .../myslim/run/load_inception_v4.sh | 0 .../myslim/run/load_inception_v4_alt.sh | 0 .../myslim/train_image_classifier.py | 0 .../myslim/train_image_classifier_jpeg.py | 0 run_pipeline.sh | 70 -- run_scripts/1_extract_histopatho_features.sh | 100 --- .../2_tile_level_cell_type_quantification.sh | 45 - run_scripts/3_compute_spatial_features.sh | 75 -- run_scripts/create_tmp_clinical_file.sh | 5 - run_scripts/requirements.txt | 15 - run_scripts/task_selection_names.pkl | Bin 478 -> 0 bytes tower.yml | 0 156 files changed, 6102 insertions(+), 3767 deletions(-) rename Python/1_extract_histopathological_features/myslim/__init__.py => .gitattributes (100%) create mode 100755 .github/.dockstore.yml create mode 100755 .github/CONTRIBUTING.md create mode 100755 .github/ISSUE_TEMPLATE/bug_report.yml create mode 100755 .github/ISSUE_TEMPLATE/config.yml create mode 100755 .github/ISSUE_TEMPLATE/feature_request.yml create mode 100755 .github/PULL_REQUEST_TEMPLATE.md create mode 100755 .github/workflows/awsfulltest.yml create mode 100755 .github/workflows/awstest.yml create mode 100755 .github/workflows/branch.yml create mode 100755 .github/workflows/ci.yml create mode 100755 .github/workflows/clean-up.yml create mode 100755 .github/workflows/download_pipeline.yml create mode 100755 .github/workflows/fix-linting.yml create mode 100755 .github/workflows/linting.yml create mode 100755 .github/workflows/linting_comment.yml create mode 100755 .github/workflows/release-announcements.yml rename Python/3_spatial_characterization/__init__.py => CITATIONS.md (100%) delete mode 100644 Python/1_extract_histopathological_features/codebook.txt delete mode 100644 Python/1_extract_histopathological_features/myslim/create_clinical_file.py delete mode 100644 Python/1_extract_histopathological_features/myslim/create_file_info_train.py delete mode 100644 Python/1_extract_histopathological_features/myslim/create_tiles_from_slides.py delete mode 100644 Python/1_extract_histopathological_features/myslim/post_process_features.py delete mode 100644 Python/1_extract_histopathological_features/myslim/post_process_predictions.py delete mode 100644 Python/1_extract_histopathological_features/myslim/python_test.py delete mode 100644 Python/1_extract_histopathological_features/post_processing.py delete mode 100755 Python/1_extract_histopathological_features/pre_processing.py delete mode 100755 Python/2_train_multitask_models/checks.ipynb delete mode 100644 Python/2_train_multitask_models/processing_transcriptomics.py delete mode 100644 Python/2_train_multitask_models/run_TF_pipeline.py delete mode 100644 Python/3_spatial_characterization/compute_clustering_features.py delete mode 100644 Python/3_spatial_characterization/compute_network_features.py delete mode 100755 Python/3_spatial_characterization/computing_features.py rename Python/libs/DL/__init__.py => assets/NO_FILE (100%) mode change 100644 => 100755 rename {data => assets}/TF_models/SKCM_FF/CAFs/outer_models.pkl (100%) mode change 100644 => 100755 rename {data => assets}/TF_models/SKCM_FF/CAFs/x_train_scaler.pkl (100%) mode change 100644 => 100755 rename {data => assets}/TF_models/SKCM_FF/T_cells/outer_models.pkl (100%) mode change 100644 => 100755 rename {data => assets}/TF_models/SKCM_FF/T_cells/x_train_scaler.pkl (100%) mode change 100644 => 100755 rename {data => assets}/TF_models/SKCM_FF/endothelial_cells/outer_models.pkl (100%) mode change 100644 => 100755 rename {data => assets}/TF_models/SKCM_FF/endothelial_cells/x_train_scaler.pkl (100%) mode change 100644 => 100755 rename {data => assets}/TF_models/SKCM_FF/tumor_purity/outer_models.pkl (100%) mode change 100644 => 100755 rename {data => assets}/TF_models/SKCM_FF/tumor_purity/x_train_scaler.pkl (100%) mode change 100644 => 100755 create mode 100755 assets/adaptivecard.json rename {Python/2_train_multitask_models => assets}/cell_types.txt (100%) mode change 100644 => 100755 create mode 100755 assets/email_template.html create mode 100755 assets/email_template.txt create mode 100755 assets/methods_description_template.yml create mode 100755 assets/multiqc_config.yml create mode 100755 assets/samplesheet.csv create mode 100755 assets/schema_input.json create mode 100755 assets/sendmail_template.txt create mode 100755 assets/slackreport.json rename {Python/2_train_multitask_models => assets}/task_selection_names.pkl (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => assets}/tissue_classes.csv (100%) mode change 100644 => 100755 rename {data => assets}/tmp_clinical_file.txt (100%) rename {Python/1_extract_histopathological_features/myslim => bin}/bottleneck_predict.py (85%) mode change 100644 => 100755 create mode 100755 bin/clustering_schc_individual.py create mode 100755 bin/clustering_schc_simultaneous.py create mode 100755 bin/combine_all_spatial_features.py create mode 100755 bin/combine_clustering_features.py create mode 100755 bin/combine_network_features.py create mode 100755 bin/compute_colocalization.py create mode 100755 bin/compute_connectedness.py create mode 100755 bin/compute_frac_high.py create mode 100755 bin/compute_nclusters.py create mode 100755 bin/compute_node_degree_with_es.py create mode 100755 bin/compute_nshortest_with_max_length.py create mode 100755 bin/compute_proximity_from_indiv_schc_between.py create mode 100755 bin/compute_proximity_from_indiv_schc_combine.py create mode 100755 bin/compute_proximity_from_indiv_schc_within.py create mode 100755 bin/compute_proximity_from_simultaneous_schc.py create mode 100755 bin/create_clinical_file.py rename {Python/1_extract_histopathological_features/myslim => bin}/create_list_avail_img_for_tiling.py (53%) mode change 100644 => 100755 create mode 100755 bin/create_tiles_from_slides.py create mode 100755 bin/format_tile_data_structure.py create mode 100755 bin/generate_graphs.py create mode 100755 bin/post_process_features.py create mode 100755 bin/post_process_predictions.py create mode 100755 bin/pre_processing.py rename {Python/2_train_multitask_models => bin}/tile_level_cell_type_quantification.py (75%) mode change 100644 => 100755 create mode 100755 conf/base.config create mode 100755 conf/igenomes.config create mode 100755 conf/modules.config create mode 100755 conf/test.config create mode 100755 conf/test_full.config create mode 100755 custom.config create mode 100755 docs/README.md create mode 100755 docs/images/mqc_fastqc_adapter.png create mode 100755 docs/images/mqc_fastqc_counts.png create mode 100755 docs/images/mqc_fastqc_quality.png create mode 100755 docs/output.md create mode 100755 docs/usage.md rename {Python/libs/MFP => lib/DL}/__init__.py (100%) mode change 100644 => 100755 rename {Python/libs => lib}/DL/image.py (100%) mode change 100644 => 100755 rename {Python/libs => lib}/DL/utils.py (100%) mode change 100644 => 100755 rename {Python/libs/MFP/portraits => lib/MFP}/__init__.py (100%) mode change 100644 => 100755 rename {Python/libs => lib}/MFP/license.md (100%) mode change 100644 => 100755 rename {Python/libs/features => lib/MFP/portraits}/__init__.py (100%) mode change 100644 => 100755 rename {Python/libs => lib}/MFP/portraits/utils.py (100%) mode change 100644 => 100755 rename {Python/libs => lib}/MFP/signatures/gene_signatures.gmt (100%) mode change 100644 => 100755 rename {Python/libs => lib}/MFP/signatures/gene_signatures_order.tsv (100%) mode change 100644 => 100755 rename {Python/libs/model => lib/features}/__init__.py (100%) mode change 100644 => 100755 rename {Python/libs => lib}/features/clustering.py (100%) mode change 100644 => 100755 rename {Python/libs => lib}/features/features.py (99%) mode change 100644 => 100755 rename {Python/libs => lib}/features/graphs.py (100%) mode change 100644 => 100755 create mode 100755 lib/features/lcc.py create mode 100755 lib/features/tests/test_determine_lcc.py rename {Python/libs => lib}/features/utils.py (100%) mode change 100644 => 100755 rename {Python/libs => lib}/features/vis.py (100%) mode change 100644 => 100755 create mode 100755 lib/model/__init__.py rename {Python/libs => lib}/model/constants.py (89%) mode change 100644 => 100755 rename {Python/libs => lib}/model/evaluate.py (100%) mode change 100644 => 100755 rename {Python/libs => lib}/model/preprocessing.py (100%) mode change 100644 => 100755 rename {Python/libs => lib}/model/utils.py (100%) mode change 100644 => 100755 create mode 100755 lib/myslim/__init__.py create mode 100755 lib/myslim/bottleneck_predict.py rename {Python/1_extract_histopathological_features => lib}/myslim/datasets/__init__.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/datasets/convert.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/datasets/dataset_factory.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/datasets/dataset_utils.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/datasets/tumors_all.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/deployment/__init__.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/deployment/model_deploy.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/eval_image_classifier.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/nets/__init__.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/nets/inception.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/nets/inception_alt.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/nets/inception_utils.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/nets/inception_v4.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/nets/inception_v4_alt.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/nets/nets_factory.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/nets/overfeat.py (100%) mode change 100644 => 100755 create mode 100755 lib/myslim/post_process_features.py create mode 100755 lib/myslim/post_process_predictions.py rename {Python/1_extract_histopathological_features => lib}/myslim/preprocessing/__init__.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/preprocessing/inception_preprocessing.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/preprocessing/inception_preprocessing_dataAug.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/preprocessing/preprocessing_factory.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/run/bottleneck_predict.sh (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/run/convert.sh (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/run/eval.sh (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/run/load_inception_v4.sh (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/run/load_inception_v4_alt.sh (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/train_image_classifier.py (100%) mode change 100644 => 100755 rename {Python/1_extract_histopathological_features => lib}/myslim/train_image_classifier_jpeg.py (100%) mode change 100644 => 100755 delete mode 100644 run_pipeline.sh delete mode 100755 run_scripts/1_extract_histopatho_features.sh delete mode 100644 run_scripts/2_tile_level_cell_type_quantification.sh delete mode 100644 run_scripts/3_compute_spatial_features.sh delete mode 100644 run_scripts/create_tmp_clinical_file.sh delete mode 100644 run_scripts/requirements.txt delete mode 100644 run_scripts/task_selection_names.pkl create mode 100644 tower.yml diff --git a/Python/1_extract_histopathological_features/myslim/__init__.py b/.gitattributes similarity index 100% rename from Python/1_extract_histopathological_features/myslim/__init__.py rename to .gitattributes diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml new file mode 100755 index 0000000..191fabd --- /dev/null +++ b/.github/.dockstore.yml @@ -0,0 +1,6 @@ +# Dockstore config version, not pipeline version +version: 1.2 +workflows: + - subclass: nfl + primaryDescriptorPath: /nextflow.config + publish: True diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100755 index 0000000..6576937 --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,125 @@ +# nf-core/spotlight: Contributing Guidelines + +Hi there! +Many thanks for taking an interest in improving nf-core/spotlight. + +We try to manage the required tasks for nf-core/spotlight using GitHub issues, you probably came to this page when creating one. +Please use the pre-filled template to save time. + +However, don't be put off by this template - other more general issues and suggestions are welcome! +Contributions to the code are even more welcome ;) + +> [!NOTE] +> If you need help using or modifying nf-core/spotlight then the best place to ask is on the nf-core Slack [#spotlight](https://nfcore.slack.com/channels/spotlight) channel ([join our Slack here](https://nf-co.re/join/slack)). + +## Contribution workflow + +If you'd like to write some code for nf-core/spotlight, the standard workflow is as follows: + +1. Check that there isn't already an issue about your idea in the [nf-core/spotlight issues](https://github.com/nf-core/spotlight/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this +2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/spotlight repository](https://github.com/nf-core/spotlight) to your GitHub account +3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) +4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged + +If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). + +## Tests + +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` + +When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. +Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. + +There are typically two types of tests that run: + +### Lint tests + +`nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. +To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. + +If any failures or warnings are encountered, please follow the listed URL for more documentation. + +### Pipeline tests + +Each `nf-core` pipeline should be set up with a minimal set of test-data. +`GitHub Actions` then runs the pipeline on this data to ensure that it exits successfully. +If there are any failures then the automated tests fail. +These tests are run both with the latest available version of `Nextflow` and also the minimum required version that is stated in the pipeline code. + +## Patch + +:warning: Only in the unlikely and regretful event of a release happening with a bug. + +- On your own fork, make a new branch `patch` based on `upstream/master`. +- Fix the bug, and bump version (X.Y.Z+1). +- A PR should be made on `master` from patch to directly this particular bug. + +## Getting help + +For further information/help, please consult the [nf-core/spotlight documentation](https://nf-co.re/spotlight/usage) and don't hesitate to get in touch on the nf-core Slack [#spotlight](https://nfcore.slack.com/channels/spotlight) channel ([join our Slack here](https://nf-co.re/join/slack)). + +## Pipeline contribution conventions + +To make the nf-core/spotlight code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. + +### Adding a new step + +If you wish to contribute a new step, please use the following coding standards: + +1. Define the corresponding input channel into your new process from the expected previous process channel +2. Write the process block (see below). +3. Define the output channel if needed (see below). +4. Add any new parameters to `nextflow.config` with a default (see below). +5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core schema build` tool). +6. Add sanity checks and validation for all relevant parameters. +7. Perform local tests to validate that the new code works as expected. +8. If applicable, add a new test command in `.github/workflow/ci.yml`. +9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. +10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. + +### Default values + +Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. + +Once there, use `nf-core schema build` to add to `nextflow_schema.json`. + +### Default processes resource requirements + +Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. + +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. + +### Naming schemes + +Please use the following naming schemes, to make it easy to understand what is going where. + +- initial process channel: `ch_output_from_` +- intermediate and terminal channels: `ch__for_` + +### Nextflow version bumping + +If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core bump-version --nextflow . [min-nf-version]` + +### Images and figures + +For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/nf-core/spotlight/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100755 index 0000000..a75bac4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,50 @@ +name: Bug report +description: Report something that is broken or incorrect +labels: bug +body: + - type: markdown + attributes: + value: | + Before you post this issue, please check the documentation: + + - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) + - [nf-core/spotlight pipeline documentation](https://nf-co.re/spotlight/usage) + + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true + + - type: textarea + id: command_used + attributes: + label: Command used and terminal output + description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal. + render: console + placeholder: | + $ nextflow run ... + + Some output where something broke + + - type: textarea + id: files + attributes: + label: Relevant files + description: | + Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed. + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files. + + - type: textarea + id: system + attributes: + label: System information + description: | + * Nextflow version _(eg. 23.04.0)_ + * Hardware _(eg. HPC, Desktop, Cloud)_ + * Executor _(eg. slurm, local, awsbatch)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ + * OS _(eg. CentOS Linux, macOS, Linux Mint)_ + * Version of nf-core/spotlight _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100755 index 0000000..3689217 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,7 @@ +contact_links: + - name: Join nf-core + url: https://nf-co.re/join + about: Please join the nf-core community here + - name: "Slack #spotlight channel" + url: https://nfcore.slack.com/channels/spotlight + about: Discussion about the nf-core/spotlight pipeline diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100755 index 0000000..d2a1c0f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,11 @@ +name: Feature request +description: Suggest an idea for the nf-core/spotlight pipeline +labels: enhancement +body: + - type: textarea + id: description + attributes: + label: Description of feature + description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100755 index 0000000..e398068 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,26 @@ + + +## PR checklist + +- [ ] This comment contains a description of changes (with reason). +- [ ] If you've fixed a bug or added code that should be tested, add tests! +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/spotlight/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/spotlight _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] Make sure your code lints (`nf-core lint`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). +- [ ] Usage Documentation in `docs/usage.md` is updated. +- [ ] Output Documentation in `docs/output.md` is updated. +- [ ] `CHANGELOG.md` is updated. +- [ ] `README.md` is updated (including new tool citations and authors/contributors). diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml new file mode 100755 index 0000000..52f7b86 --- /dev/null +++ b/.github/workflows/awsfulltest.yml @@ -0,0 +1,39 @@ +name: nf-core AWS full size tests +# This workflow is triggered on published releases. +# It can be additionally triggered manually with GitHub actions workflow dispatch button. +# It runs the -profile 'test_full' on AWS batch + +on: + release: + types: [published] + workflow_dispatch: +jobs: + run-platform: + name: Run AWS full tests + if: github.repository == 'nf-core/spotlight' + runs-on: ubuntu-latest + steps: + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 + # TODO nf-core: You can customise AWS full pipeline tests as required + # Add full size test data (but still relatively small datasets for few samples) + # on the `test_full.config` test runs with only one set of parameters + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/spotlight/work-${{ github.sha }} + parameters: | + { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/spotlight/results-${{ github.sha }}" + } + profiles: test_full + + - uses: actions/upload-artifact@v4 + with: + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml new file mode 100755 index 0000000..4b0c80b --- /dev/null +++ b/.github/workflows/awstest.yml @@ -0,0 +1,33 @@ +name: nf-core AWS test +# This workflow can be triggered manually with the GitHub actions workflow dispatch button. +# It runs the -profile 'test' on AWS batch + +on: + workflow_dispatch: +jobs: + run-platform: + name: Run AWS tests + if: github.repository == 'nf-core/spotlight' + runs-on: ubuntu-latest + steps: + # Launch workflow using Seqera Platform CLI tool action + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/spotlight/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/spotlight/results-test-${{ github.sha }}" + } + profiles: test + + - uses: actions/upload-artifact@v4 + with: + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml new file mode 100755 index 0000000..bb06097 --- /dev/null +++ b/.github/workflows/branch.yml @@ -0,0 +1,44 @@ +name: nf-core branch protection +# This workflow is triggered on PRs to master branch on the repository +# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` +on: + pull_request_target: + branches: [master] + +jobs: + test: + runs-on: ubuntu-latest + steps: + # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches + - name: Check PRs + if: github.repository == 'nf-core/spotlight' + run: | + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/spotlight ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + + # If the above check failed, post a comment on the PR explaining the failure + # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 + with: + message: | + ## This PR is against the `master` branch :x: + + * Do not close this PR + * Click _Edit_ and change the `base` to `dev` + * This CI test will remain failed until you push a new commit + + --- + + Hi @${{ github.event.pull_request.user.login }}, + + It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. + The `master` branch on nf-core repositories should always contain code from the latest release. + Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. + + You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. + Note that even after this, the test will continue to show as failing until you push a new commit. + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100755 index 0000000..4bec948 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,46 @@ +name: nf-core CI +# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +on: + push: + branches: + - dev + pull_request: + release: + types: [published] + +env: + NXF_ANSI_LOG: false + +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true + +jobs: + test: + name: Run pipeline with test data + # Only run on push if this is the nf-core dev branch (merged PRs) + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/spotlight') }}" + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "23.04.0" + - "latest-everything" + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - name: Run pipeline with test data + # TODO nf-core: You can customise CI pipeline run tests as required + # For example: adding multiple test runs with different parameters + # Remember that you can parallelise this by using strategy.matrix + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100755 index 0000000..0b6b1f2 --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100755 index 0000000..2d20d64 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,86 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + - edited + - synchronize + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml new file mode 100755 index 0000000..7d075b5 --- /dev/null +++ b/.github/workflows/fix-linting.yml @@ -0,0 +1,89 @@ +name: Fix linting from a comment +on: + issue_comment: + types: [created] + +jobs: + fix-linting: + # Only run if comment is on a PR with the main repo, and if it contains the magic keywords + if: > + contains(github.event.comment.html_url, '/pull/') && + contains(github.event.comment.body, '@nf-core-bot fix linting') && + github.repository == 'nf-core/spotlight' + runs-on: ubuntu-latest + steps: + # Use the @nf-core-bot token to check out so we can push later + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + with: + token: ${{ secrets.nf_core_bot_auth_token }} + + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + + # Action runs on the issue comment, so we don't get the PR by default + # Use the gh cli to check out the PR + - name: Checkout Pull Request + run: gh pr checkout ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} + + # Install and run pre-commit + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true + + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" + + - name: Commit & push changes + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' + run: | + git config user.email "core@nf-co.re" + git config user.name "nf-core-bot" + git config push.default upstream + git add . + git status + git commit -m "[automated] Fix code linting" + git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/spotlight/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100755 index 0000000..1fcafe8 --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,68 @@ +name: nf-core linting +# This workflow is triggered on pushes and PRs to the repository. +# It runs the `nf-core lint` and markdown lint tests to ensure +# that the code meets the nf-core guidelines. +on: + push: + branches: + - dev + pull_request: + release: + types: [published] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + run: pre-commit run --all-files + + nf-core: + runs-on: ubuntu-latest + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install nf-core + + - name: Run nf-core lint + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + + - name: Save PR number + if: ${{ always() }} + run: echo ${{ github.event.pull_request.number }} > PR_number.txt + + - name: Upload linting log file artifact + if: ${{ always() }} + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 + with: + name: linting-logs + path: | + lint_log.txt + lint_results.md + PR_number.txt diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml new file mode 100755 index 0000000..40acc23 --- /dev/null +++ b/.github/workflows/linting_comment.yml @@ -0,0 +1,28 @@ +name: nf-core linting comment +# This workflow is triggered after the linting action is complete +# It posts an automated comment to the PR, even if the PR is coming from a fork + +on: + workflow_run: + workflows: ["nf-core linting"] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Download lint results + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 + with: + workflow: linting.yml + workflow_conclusion: completed + + - name: Get PR number + id: pr_number + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT + + - name: Post PR comment + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + number: ${{ steps.pr_number.outputs.pr_number }} + path: linting-logs/lint_results.md diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml new file mode 100755 index 0000000..03ecfcf --- /dev/null +++ b/.github/workflows/release-announcements.yml @@ -0,0 +1,75 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" >> $GITHUB_OUTPUT + + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.gitignore b/.gitignore index 4e0d86b..19c927e 100644 --- a/.gitignore +++ b/.gitignore @@ -11,11 +11,9 @@ data_example .docker_temp* .txt *.pyc -data/checkpoint .vscode slurm_out output -output_example # Ignore nextflow files in development nf-* .nextflow.log* @@ -33,3 +31,33 @@ data/clinical_file_TCGA_SKCM.tsv # Skip data files output_* + + +.nextflow* +work/ +data/ +results/ +.DS_Store +testing/ +testing* +*.pyc + +output/ +assets/checkpoint +assets/codebook.txt +asse + +spotlight.sif +plugins/* + + +DUMMY-* +BACKUP-* +test-data + + +ARCHIVE + + +gaitilab.config +nf_run_spotlight.sh diff --git a/Python/3_spatial_characterization/__init__.py b/CITATIONS.md similarity index 100% rename from Python/3_spatial_characterization/__init__.py rename to CITATIONS.md diff --git a/Dockerfile b/Dockerfile index cb0edf7..b786d78 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,17 +7,31 @@ RUN apt-get update && \ apt-get install -y python3-pip && \ apt-get install -y openslide-tools && \ apt-get install -y python3-openslide && \ - apt-get install -y libgl1-mesa-glx + apt-get install -y libgl1-mesa-glx && \ + apt-get install -y python3.8-dev && \ + apt-get install -y build-essential && \ + apt-get install -y pkg-config && \ + apt-get install -y python-dev && \ + apt-get install -y libhdf5-dev && \ + apt-get install -y libblosc-dev + # Set up python environment # RUN apt install python3.8-venv -RUN python3 -m venv /spotlight_venv -RUN . spotlight_venv/bin/activate + +# Add nf-bin with all Python/R scripts +ENV VIRTUAL_ENV=/spotlight_venv +RUN python3 -m venv ${VIRTUAL_ENV} +ENV PATH="${VIRTUAL_ENV}/bin:$PATH" + +# RUN . spotlight_venv/bin/activate COPY ./env_requirements.txt ./ -RUN pip3 install -r env_requirements.txt +RUN pip3 install --upgrade pip setuptools wheel +RUN pip3 install --default-timeout=900 -r env_requirements.txt # Set up directories # -RUN mkdir -p /project/Python/libs -WORKDIR / +# WORKDIR /nf-bin + +ENV PATH="nf-bin:${PATH}" + -ENV PYTHONPATH /project/Python/libs diff --git a/Python/1_extract_histopathological_features/codebook.txt b/Python/1_extract_histopathological_features/codebook.txt deleted file mode 100644 index 42ce278..0000000 --- a/Python/1_extract_histopathological_features/codebook.txt +++ /dev/null @@ -1,42 +0,0 @@ -ACC_T 0 -BLCA_T 1 -BRCA_N 2 -BRCA_T 3 -CESC_T 4 -COAD_N 5 -COAD_T 6 -ESCA_N 7 -ESCA_T 8 -GBM_T 9 -HNSC_N 10 -HNSC_T 11 -KICH_N 12 -KICH_T 13 -KIRC_N 14 -KIRC_T 15 -KIRP_N 16 -KIRP_T 17 -LGG_T 18 -LIHC_N 19 -LIHC_T 20 -LUAD_N 21 -LUAD_T 22 -LUSC_N 23 -LUSC_T 24 -MESO_T 25 -OV_N 26 -OV_T 27 -PCPG_T 28 -PRAD_N 29 -PRAD_T 30 -READ_T 31 -SARC_T 32 -STAD_N 33 -STAD_T 34 -TGCT_T 35 -THCA_N 36 -THCA_T 37 -THYM_T 38 -UCEC_T 39 -UVM_T 40 -SKCM_T 41 diff --git a/Python/1_extract_histopathological_features/myslim/create_clinical_file.py b/Python/1_extract_histopathological_features/myslim/create_clinical_file.py deleted file mode 100644 index 12ab064..0000000 --- a/Python/1_extract_histopathological_features/myslim/create_clinical_file.py +++ /dev/null @@ -1,154 +0,0 @@ -import argparse -import os -import os.path -import numpy as np -import pandas as pd -import sys - - -def create_TCGA_clinical_file( - class_names, - clinical_files_dir, - output_dir=None, - tumor_purity_threshold=80, - path_codebook=None -): - """ - Create a clinical file based on the slide metadata downloaded from the GDC data portal - 1. Read the files and add classname and id based on CODEBOOK.txt - 2. Filter tumor purity - 3. Save file - - Args: - class_names (str): single class name e.g. LUAD_T or path to file with class names - clinical_files_dir (str): String with path to folder with subfolders pointing to the raw clinical files (slide.tsv) - output_dir (str): Path to folder where the clinical file should be stored - tumor_purity_threshold (int): default=80 - multi_class_path (str): path to file with class names to be merged into one clinical file - - Returns: - {output_dir}/generated_clinical_file.txt" containing the slide_submitter_id, sample_submitter_id, image_file_name, percent_tumor_cells, class_name, class_id in columns and records (slides) in rows. - - """ - # ---- Setup parameters ---- # - if not os.path.isdir(output_dir): - os.mkdir(output_dir) - - if (os.path.isfile(class_names)): # multi class names - class_names = pd.read_csv( - class_names, header=None).to_numpy().flatten() - else: # single class names - class_name = class_names - - CODEBOOK = pd.read_csv( - path_codebook, - delim_whitespace=True, - header=None, names=["class_name", "value"] - ) - - # ---- 1. Constructing a merged clinical file ---- # - # Read clinical files - # a) Single class - if os.path.isfile(clinical_files_dir): - clinical_file = pd.read_csv(clinical_files_dir, sep="\t") - # only keep tissue (remove _T or _N) to check in filename - clinical_file["class_name"] = class_name - clinical_file["class_id"] = int( - CODEBOOK.loc[CODEBOOK["class_name"] == class_name].values[0][1] - ) - print(clinical_file) - print(CODEBOOK) - # b) Multiple classes - elif os.path.isdir(clinical_files_dir) & (len(class_names) > 1): - clinical_file_list = [] - # Combine all clinical raw files based on input - for class_name in class_names: - clinical_file_temp = pd.read_csv( - f"{clinical_files_dir}/clinical_file_TCGA_{class_name[:-2]}.tsv", - sep="\t", - ) - # only keep tissue (remove _T or _N) to check in filename - clinical_file_temp["class_name"] = class_name - clinical_file_temp["class_id"] = int( - CODEBOOK.loc[CODEBOOK["class_name"] == class_name].values[0][1] - ) - clinical_file_list.append(clinical_file_temp) - clinical_file = pd.concat( - clinical_file_list, axis=0).reset_index(drop=True) - - # ---- 2) Filter: Availability of tumor purity (percent_tumor_cells) ---- # - # Remove rows with missing tumor purity - clinical_file["percent_tumor_cells"] = ( - clinical_file["percent_tumor_cells"] - .replace("'--", np.nan, regex=True) - .astype(float) - ) - - # Convert strings to numeric type - clinical_file["percent_tumor_cells"] = pd.to_numeric( - clinical_file["percent_tumor_cells"] - ) - clinical_file = clinical_file.dropna(subset=["percent_tumor_cells"]) - clinical_file = clinical_file.where( - clinical_file["percent_tumor_cells"] >= float(tumor_purity_threshold) - ) - # ---- 3) Formatting and saving ---- # - clinical_file["image_file_name"] = [ - f"{slide_submitter_id}.{str(slide_id).upper()}.svs" - for slide_submitter_id, slide_id in clinical_file[ - ["slide_submitter_id", "slide_id"] - ].to_numpy() - ] - - clinical_file = clinical_file.dropna(how="all") - clinical_file = clinical_file.drop_duplicates() - clinical_file = clinical_file.drop_duplicates(subset="slide_submitter_id") - clinical_file = clinical_file[ - [ - "slide_submitter_id", - "sample_submitter_id", - "image_file_name", - "percent_tumor_cells", - "class_name", - "class_id", - ] - ] - clinical_file = clinical_file.dropna(how="any", axis=0) - clinical_file.to_csv( - f"{output_dir}/generated_clinical_file.txt", - index=False, - sep="\t", - ) - print("\nFinished creating a new clinical file") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--class_names", - help="Either (a) single classname or (b) Path to file with classnames according to codebook.txt (e.g. LUAD_T)", required=True - ) - parser.add_argument( - "--clinical_files_dir", - help="Path to folders containing subfolders for multiple tumor types.", required=True - ) - parser.add_argument( - "--tumor_purity_threshold", - help="Integer for filtering tumor purity assessed by pathologists", - default=80, required=False - ) - parser.add_argument( - "--output_dir", help="Path to folder for saving all created files", default=None, required=False - ) - parser.add_argument( - "--path_codebook", help="Path to codebook", default=None, required=False - ) - args = parser.parse_args() - - create_TCGA_clinical_file( - class_names=args.class_names, - tumor_purity_threshold=args.tumor_purity_threshold, - clinical_files_dir=args.clinical_files_dir, - output_dir=args.output_dir, - path_codebook=args.path_codebook - ) diff --git a/Python/1_extract_histopathological_features/myslim/create_file_info_train.py b/Python/1_extract_histopathological_features/myslim/create_file_info_train.py deleted file mode 100644 index 2cdc0cd..0000000 --- a/Python/1_extract_histopathological_features/myslim/create_file_info_train.py +++ /dev/null @@ -1,100 +0,0 @@ -import tiffslide as openslide -import DL.utils as utils -import argparse -import os -import os.path -import sys -import pandas as pd - -sys.path.append(f"{os.path.dirname(os.getcwd())}/Python/libs") -REPO_DIR = os.path.dirname(os.getcwd()) - -# trunk-ignore(flake8/E402) - -# trunk-ignore(flake8/E402) - - -def format_tile_data_structure(slides_folder, output_folder, clinical_file_path): - """ - Specifying the tile data structure required to store tiles as TFRecord files (used in convert.py) - - Args: - slides_folder (str): path pointing to folder with all whole slide images (.svs files) - output_folder (str): path pointing to folder for storing all created files by script - clinical_file_path (str): path pointing to formatted clinical file (either generated or manually formatted) - - Returns: - {output_folder}/file_info_train.txt containing the path to the individual tiles, class name, class id, percent of tumor cells and JPEG quality - - """ - tiles_folder = output_folder + "/tiles" - - clinical_file = pd.read_csv(clinical_file_path, sep="\t") - clinical_file.dropna(how="all", inplace=True) - clinical_file.drop_duplicates(inplace=True) - clinical_file.drop_duplicates(subset="slide_submitter_id", inplace=True) - - # 2) Determine the paths paths of jpg tiles - all_tile_names = os.listdir(tiles_folder) - jpg_tile_names = [] - jpg_tile_paths = [] - - for tile_name in all_tile_names: - if "jpg" in tile_name: - jpg_tile_names.append(tile_name) - jpg_tile_paths.append(tiles_folder + "/" + tile_name) - - # 3) Get corresponding data from the clinical file based on the tile names - jpg_tile_names_stripped = [ - utils.get_slide_submitter_id(jpg_tile_name) for jpg_tile_name in jpg_tile_names - ] - jpg_tile_names_df = pd.DataFrame( - jpg_tile_names_stripped, columns=["slide_submitter_id"] - ) - jpg_tiles_df = pd.merge( - jpg_tile_names_df, clinical_file, on=["slide_submitter_id"], how="left" - ) - # 4) Determine jpeg_quality of slides - slide_quality = [] - for slide_name in jpg_tiles_df.image_file_name.unique(): - print("{}/{}".format(slides_folder, slide_name)) - # img = openslide.OpenSlide("{}/{}".format(slides_folder, slide_name)) - # TODO have to generalize this for TCGA and non-TCGA cases. - # print(img.properties.values) - # image_description = img.properties.values.__self__.get("tiff.ImageDescription").split("|")[0] - # image_description_split = image_description.split(" ") - # jpeg_quality = image_description_split[-1] - jpeg_quality = "80" - slide_quality.append([slide_name, "RGB" + jpeg_quality]) - - slide_quality_df = pd.DataFrame( - slide_quality, columns=["image_file_name", "jpeg_quality"] - ) - jpg_tiles_df = pd.merge( - jpg_tiles_df, slide_quality_df, on=["image_file_name"], how="left" - ) - jpg_tiles_df["tile_path"] = jpg_tile_paths - - # Create output dataframe - output = jpg_tiles_df[ - ["tile_path", "class_name", "class_id", - "jpeg_quality", "percent_tumor_cells"] - ] - output.to_csv(output_folder + "/file_info_train.txt", - index=False, sep="\t") - - print("Finished creating the necessary file for computing the features in the next step") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--slides_folder", help="Set slides folder") - parser.add_argument("--output_folder", help="Set output folder") - parser.add_argument("--clin_path", help="Set clinical file path") - args = parser.parse_args() - - format_tile_data_structure( - slides_folder=args.slides_folder, - output_folder=args.output_folder, - clinical_file_path=args.clin_path, - ) diff --git a/Python/1_extract_histopathological_features/myslim/create_tiles_from_slides.py b/Python/1_extract_histopathological_features/myslim/create_tiles_from_slides.py deleted file mode 100644 index c3f67ca..0000000 --- a/Python/1_extract_histopathological_features/myslim/create_tiles_from_slides.py +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/bin/python -import tiffslide as openslide -import os -import sys - -import numpy as np -import pandas as pd -from PIL import Image - -sys.path.append(f"{os.path.dirname(os.getcwd())}/Python/libs") -REPO_DIR = os.path.dirname(os.getcwd()) - -import DL.image as im - - -def create_tiles_from_slides(slides_folder, output_folder, clinical_file_path): - """ - Create tiles from slides - Dividing the whole slide images into tiles with a size of 512 x 512 pixels, with an overlap of 50 pixels at a magnification of 20x. In addition, remove blurred and non-informative tiles by using the weighted gradient magnitude. - - Source: - Fu, Y., Jung, A. W., Torne, R. V., Gonzalez, S., Vöhringer, H., Shmatko, A., Yates, L. R., Jimenez-Linan, M., Moore, L., & Gerstung, M. (2020). Pan-cancer computational histopathology reveals mutations, tumor composition and prognosis. Nature Cancer, 1(8), 800–810. https://doi.org/10.1038/s43018-020-0085-8 - - Args: - slides_folder (str): path pointing to folder with all whole slide images (.svs files) - output_folder (str): path pointing to folder for storing all created files by script (i.e. .jpg files for the created tiles) - - Returns: - jpg files for the created tiles in the specified folder {output_folder}/tiles - - """ - - # Create folder for storing the tiles if non-existent - tiles_folder = "{}/tiles".format(output_folder) - if not os.path.exists(tiles_folder): - os.makedirs(tiles_folder) - print(tiles_folder) - - # Subset images of interest (present in generated clinical file) - clinical_file = pd.read_csv(clinical_file_path, sep="\t") - clinical_file.dropna(how="all", inplace=True) - clinical_file.drop_duplicates(inplace=True) - clinical_file.drop_duplicates(subset="slide_submitter_id", inplace=True) - subset_images = clinical_file.image_file_name.tolist() - print(subset_images) - # Check if slides are among our data - available_images = os.listdir(slides_folder) - images_for_tiling = list(set(subset_images) & set(available_images)) - - print(len(images_for_tiling), 'images available:') - counter = 1 - for slide_filename in images_for_tiling: - # Accept different file types - if slide_filename.endswith(('.svs', '.ndpi', '.tif')): - print(counter, ':', slide_filename) - slide = openslide.OpenSlide( - "{}/{}".format(slides_folder, slide_filename)) - slide_name = slide_filename.split(".")[0] - if ( - str(slide.properties["tiff.ImageDescription"]).find( - "AppMag = 40" - ) - != -1 - ): - region_size = 1024 - tile_size = 924 - else: - region_size = 512 - tile_size = 462 - [width, height] = slide.dimensions - for x_coord in range(1, width, tile_size): - for y_coord in range(1, height, tile_size): - slide_region = slide.read_region( - location=(x_coord, y_coord), - level=0, - size=(region_size, region_size), - ) - slide_region_converted = slide_region.convert("RGB") - tile = slide_region_converted.resize( - (512, 512), Image.ANTIALIAS) - grad = im.getGradientMagnitude(np.array(tile)) - unique, counts = np.unique(grad, return_counts=True) - if counts[np.argwhere(unique <= 20)].sum() < 512 * 512 * 0.6: - tile.save( - "{}/{}_{}_{}.jpg".format( - tiles_folder, slide_name, x_coord, y_coord - ), - "JPEG", - optimize=True, - quality=94, - ) - counter = counter + 1 - - print("Finished creating tiles from the given slides") - - -if __name__ == "__main__": - create_tiles_from_slides(sys.argv[1], sys.argv[2]) diff --git a/Python/1_extract_histopathological_features/myslim/post_process_features.py b/Python/1_extract_histopathological_features/myslim/post_process_features.py deleted file mode 100644 index bbc84a0..0000000 --- a/Python/1_extract_histopathological_features/myslim/post_process_features.py +++ /dev/null @@ -1,95 +0,0 @@ -#  Module imports -import argparse -import os -import sys -import dask.dataframe as dd -import pandas as pd - -#  Custom imports -import DL.utils as utils - - -def post_process_features(output_dir, slide_type, data_source="TCGA"): - """ - Format extracted histopathological features from bot.train.txt file generated by myslim/bottleneck_predict.py and extract the 1,536 features, tile names. Extract several variables from tile ID. - - Args: - output_dir (str): path pointing to folder for storing all created files by script - - Returns: - {output_dir}/features.txt contains the 1,536 features, followed by the sample_submitter_id, tile_ID, slide_submitter_id, Section, Coord_X and Coord_Y and in the rows the tiles - """ - # Read histopathological computed features - if slide_type == "FF": - features_raw = pd.read_csv( - output_dir + "/bot_train.txt", sep="\t", header=None) - # Extract the DL features (discard: col1 = tile paths, col2 = true class id) - features = features_raw.iloc[:, 2:] - features.columns = list(range(1536)) - # Add new column variables that define each tile - features["tile_ID"] = [utils.get_tile_name( - tile_path) for tile_path in features_raw.iloc[:, 0]] - features["Coord_X"] = [i[-2] - for i in features["tile_ID"].str.split("_")] - features["Coord_Y"] = [i[-1] - for i in features["tile_ID"].str.split("_")] - # FIX add sample_submitter_id and slide_submitter_id depending on data_source - if (data_source == "TCGA"): - features["sample_submitter_id"] = features["tile_ID"].str[0:16] - features["slide_submitter_id"] = features["tile_ID"].str[0:23] - features["Section"] = features["tile_ID"].str[20:23] - else: - features["sample_submitter_id"] = features['tile_ID'].str.split( - '_').str[0] - - #  Save features to .csv file - features.to_csv(output_dir + "/features.txt", sep="\t", header=True) - - elif slide_type == "FFPE": - features_raw = dd.read_csv( - output_dir + "/bot_train.txt", sep="\t", header=None) - features_raw['tile_ID'] = features_raw.iloc[:, 0] - features_raw.tile_ID = features_raw.tile_ID.map( - lambda x: x.split("/")[-1]) - features_raw['tile_ID'] = features_raw['tile_ID'].str.replace( - ".jpg'", "") - features = features_raw.map_partitions( - lambda df: df.drop(columns=[0, 1])) - new_names = list(map(lambda x: str(x), list(range(1536)))) - new_names.append('tile_ID') - features.columns = new_names - # FIX add sample_submitter_id and slide_submitter_id depending on data_source - if (data_source == "TCGA"): - features["sample_submitter_id"] = features["tile_ID"].str[0:16] - features["slide_submitter_id"] = features["tile_ID"].str[0:23] - features["Section"] = features["tile_ID"].str[20:23] - else: - features["sample_submitter_id"] = features['tile_ID'].str.split( - '_').str[0] - features['Coord_X'] = features['tile_ID'].str.split('_').str[1] - features['Coord_Y'] = features['tile_ID'].str.split('_').str[-1] - # Save features using parquet - # TODO TESTING move function to utils and convert to def instead of lambda - # name_function=lambda x: f"features-{x}.parquet" - OUTPUT_PATH = f"{output_dir}/features_format_parquet" - if os.path.exists(OUTPUT_PATH): - print("Folder exists") - else: - os.makedirs(OUTPUT_PATH) - - features.to_parquet(path=OUTPUT_PATH, compression='gzip', - name_function=utils.name_function) - - print("Formatted all features") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--output_dir", help="Set output folder") - parser.add_argument( - "--slide_type", help="Type of tissue slide (FF or FFPE)]") - parser.add_argument( - "--data_source", help="Data source, default='TCGA'") - args = parser.parse_args() - post_process_features(output_dir=args.output_dir, - slide_type=args.slide_type, data_source=args.data_source) diff --git a/Python/1_extract_histopathological_features/myslim/post_process_predictions.py b/Python/1_extract_histopathological_features/myslim/post_process_predictions.py deleted file mode 100644 index e126a8c..0000000 --- a/Python/1_extract_histopathological_features/myslim/post_process_predictions.py +++ /dev/null @@ -1,215 +0,0 @@ -# Module imports -import argparse -import os -import sys -import dask.dataframe as dd -import pandas as pd - -#  Custom imports -import DL.utils as utils -import numpy as np - - -def post_process_predictions(output_dir, slide_type, path_codebook, path_tissue_classes): - """ - Format predicted tissue classes and derive tumor purity from pred.train.txt file generated by myslim/bottleneck_predict.py and - The pred.train.txt file contains the tile ID, the true class id and the 42 predicted probabilities for the 42 tissue classes. - - Args: - output_dir (str): path pointing to folder for storing all created files by script - - Returns: - {output_dir}/predictions.txt containing the following columns - - tile_ID, - - pred_class_id and true_class_id: class ids defined in codebook.txt) - - pred_class_name and true_class_name: class names e.g. LUAD_T, defined in codebook.txt) - - pred_probability: corresponding probability - - is_correct_pred (boolean): correctly predicted tissue class label - - tumor_label_prob and normal_label_prob: probability for predicting tumor and normal label (regardless of tumor or tissue type) - - is_correct_pred_label (boolean): correctly predicted 'tumor' or 'normal' tissue regardless of tumor or tissue type - In the rows the tiles. - """ - - # Initialize - # path_codebook = f"{os.path.dirname(os.getcwd())}/Python/1_extract_histopathological_features/codebook.txt" - # path_tissue_classes = f"{os.path.dirname(os.getcwd())}/Python/1_extract_histopathological_features/tissue_classes.csv" - codebook = pd.read_csv(path_codebook, delim_whitespace=True, header=None) - codebook.columns = ["class_name", "class_id"] - tissue_classes = pd.read_csv(path_tissue_classes, sep="\t") - - # Read predictions - if slide_type == "FF": - predictions_raw = pd.read_csv( - output_dir + "/pred_train.txt", sep="\t", header=None) - # Extract tile name incl. coordinates from path - tile_names = [utils.get_tile_name(tile_path) - for tile_path in predictions_raw[0]] - # Create output dataframe for post-processed data - predictions = pd.DataFrame(tile_names, columns=["tile_ID"]) - # Get predicted probabilities for all 42 classes + rename columns - pred_probabilities = predictions_raw.iloc[:, 2:] - pred_probabilities.columns = codebook["class_id"] - # Get predicted and true class ids - predictions["pred_class_id"] = pred_probabilities.idxmax( - axis="columns") - predictions["true_class_id"] = 41 - # Get corresponding max probabilities to the predicted class - predictions["pred_probability"] = pred_probabilities.max(axis=1) - # Replace class id with class name - predictions["true_class_name"] = predictions["true_class_id"].copy() - predictions["pred_class_name"] = predictions["pred_class_id"].copy() - found_class_ids = set(predictions["true_class_id"]).union( - set(predictions["pred_class_id"])) - for class_id in found_class_ids: - predictions["true_class_name"].replace( - class_id, codebook["class_name"][class_id], inplace=True - ) - predictions["pred_class_name"].replace( - class_id, codebook["class_name"][class_id], inplace=True - ) - - # Define whether prediction was right - predictions["is_correct_pred"] = ( - predictions["true_class_id"] == predictions["pred_class_id"]) - predictions["is_correct_pred"] = predictions["is_correct_pred"].replace( - False, "F") - predictions.is_correct_pred = predictions.is_correct_pred.astype(str) - # Get tumor and tissue ID - # TODO ERROR - temp = pd.DataFrame( - {"tumor_type": predictions["true_class_name"].str[:-2]}) - temp = pd.merge(temp, tissue_classes, on="tumor_type", how="left") - # Set of IDs for normal and tumor (because of using multiple classes) - IDs_tumor = list(set(temp["ID_tumor"])) - if list(set(temp.tumor_type.tolist()))[0] == 'SKCM': - # Probability for predicting tumor and normal label (regardless of tumor [tissue] type) - predictions["tumor_label_prob"] = np.nan - predictions["normal_label_prob"] = np.nan - for ID_tumor in IDs_tumor: - vals = pred_probabilities.loc[temp["ID_tumor"] - == ID_tumor, ID_tumor] - predictions.loc[temp["ID_tumor"] == - ID_tumor, "tumor_label_prob"] = vals - - predictions["is_correct_pred_label"] = np.nan - else: - IDs_normal = list(set(temp["ID_normal"])) - # Probability for predicting tumor and normal label (regardless of tumor [tissue] type) - predictions["tumor_label_prob"] = np.nan - predictions["normal_label_prob"] = np.nan - for ID_tumor in IDs_tumor: - vals = pred_probabilities.loc[temp["ID_tumor"] - == ID_tumor, ID_tumor] - predictions.loc[temp["ID_tumor"] == - ID_tumor, "tumor_label_prob"] = vals - - for ID_normal in IDs_normal: - vals = pred_probabilities.loc[temp["ID_normal"] - == ID_normal, ID_normal] - predictions.loc[temp["ID_normal"] == - ID_normal, "normal_label_prob"] = vals - - # Check if the correct label (tumor/normal) is predicted - temp_probs = predictions[["tumor_label_prob", "normal_label_prob"]] - is_normal_label_prob = ( - temp_probs["normal_label_prob"] > temp_probs["tumor_label_prob"] - ) - is_tumor_label_prob = ( - temp_probs["normal_label_prob"] < temp_probs["tumor_label_prob"] - ) - is_normal_label = predictions["true_class_name"].str.find( - "_N") != -1 - is_tumor_label = predictions["true_class_name"].str.find( - "_T") != -1 - - is_normal = is_normal_label & is_normal_label_prob - is_tumor = is_tumor_label & is_tumor_label_prob - - predictions["is_correct_pred_label"] = is_normal | is_tumor - predictions["is_correct_pred_label"].replace( - True, "T", inplace=True) - predictions["is_correct_pred_label"].replace( - False, "F", inplace=True) - - #  Save features to .csv file - predictions.to_csv(output_dir + "/predictions.txt", sep="\t") - - elif slide_type == "FFPE": - predictions_raw = dd.read_csv( - output_dir + "/pred_train.txt", sep="\t", header=None) - predictions_raw['tile_ID'] = predictions_raw.iloc[:, 0] - predictions_raw.tile_ID = predictions_raw.tile_ID.map( - lambda x: x.split("/")[-1]) - predictions_raw['tile_ID'] = predictions_raw['tile_ID'].str.replace( - ".jpg'", "") - predictions = predictions_raw.map_partitions( - lambda df: df.drop(columns=[0, 1])) - new_names = list(map(lambda x: str(x), codebook["class_id"])) - new_names.append('tile_ID') - predictions.columns = new_names - predictions = predictions.map_partitions(lambda x: x.assign( - pred_class_id=x.iloc[:, 0:41].idxmax(axis="columns"))) - predictions["true_class_id"] = 41 - predictions = predictions.map_partitions(lambda x: x.assign( - pred_probability=x.iloc[:, 0:41].max(axis="columns"))) - predictions["true_class_name"] = predictions["true_class_id"].copy() - predictions["pred_class_name"] = predictions["pred_class_id"].copy() - predictions.pred_class_id = predictions.pred_class_id.astype(int) - res = dict(zip(codebook.class_id, codebook.class_name)) - predictions = predictions.map_partitions(lambda x: x.assign( - pred_class_name=x.loc[:, 'pred_class_id'].replace(res))) - predictions = predictions.map_partitions(lambda x: x.assign( - true_class_name=x.loc[:, 'true_class_id'].replace(res))) - predictions["is_correct_pred"] = ( - predictions["true_class_id"] == predictions["pred_class_id"]) - predictions["is_correct_pred"] = predictions["is_correct_pred"].replace( - False, "F") - predictions.is_correct_pred = predictions.is_correct_pred.astype(str) - temp = predictions.map_partitions(lambda x: x.assign( - tumor_type=x["true_class_name"].str[:-2])) - temp = temp.map_partitions(lambda x: pd.merge( - x, tissue_classes, on="tumor_type", how="left")) - if (temp['tumor_type'].compute() == 'SKCM').any(): - # Probability for predicting tumor and normal label (regardless of tumor [tissue] type) - predictions["tumor_label_prob"] = np.nan - predictions["normal_label_prob"] = np.nan - predictions = predictions.map_partitions( - lambda x: x.assign(tumor_label_prob=x.loc[:, '41'])) - predictions["is_correct_pred_label"] = np.nan - else: - # TO DO - predictions["tumor_label_prob"] = np.nan - predictions["normal_label_prob"] = np.nan - # predictions = predictions.map_partitions(lambda x: x.assign(tumor_label_prob=x.loc[:, '41'])) - # predictions = predictions.map_partitions(lambda x: x.assign(tumor_label_prob=x.loc[:, '41'])) - - # Save features using parquet - def name_function(x): return f"predictions-{x}.parquet" - OUTPUT_PATH = f"{output_dir}/predictions_format_parquet" - if os.path.exists(OUTPUT_PATH): - print("Folder exists") - else: - os.makedirs(OUTPUT_PATH) - - predictions.to_parquet( - path=OUTPUT_PATH, compression='gzip', name_function=name_function) - - print("Formatted all predicted tissue labels") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--output_dir", help="Set output folder") - parser.add_argument( - "--slide_type", help="Type of tissue slide (FF or FFPE)]") - parser.add_argument( - "--path_codebook", help="codebook.txt file", required=True, type=str) - parser.add_argument( - "--path_tissue_classes", help="Tissue_classes.csv file", required=True, type=str) - - args = parser.parse_args() - post_process_predictions(output_dir=args.output_dir, slide_type=args.slide_type, path_codebook=args.path_codebook, - path_tissue_classes=args.path_tissue_classes) - - -# $cur_dir/codebook.txt $cur_dir/tissue_classes.csv $output_dir diff --git a/Python/1_extract_histopathological_features/myslim/python_test.py b/Python/1_extract_histopathological_features/myslim/python_test.py deleted file mode 100644 index 82f0164..0000000 --- a/Python/1_extract_histopathological_features/myslim/python_test.py +++ /dev/null @@ -1,7 +0,0 @@ -import os -import DL.utils as utils -import sys -print(sys.path) - -print(os.path.dirname(os.getcwd())) -print(os.getcwd()) diff --git a/Python/1_extract_histopathological_features/post_processing.py b/Python/1_extract_histopathological_features/post_processing.py deleted file mode 100644 index c05ef15..0000000 --- a/Python/1_extract_histopathological_features/post_processing.py +++ /dev/null @@ -1,49 +0,0 @@ -import argparse -import os -from myslim.post_process_features import post_process_features -from myslim.post_process_predictions import post_process_predictions - - -def execute_postprocessing(output_dir, slide_type, path_codebook, path_tissue_classes, data_source): - """ - 1. Format extracted histopathological features - 2. Format predictions of the 42 classes - - Args: - output_dir (str): path pointing to folder for storing all created files by script - - Returns: - {output_dir}/features.txt - {output_dir}/predictions.txt - """ - post_process_features(output_dir=output_dir, - slide_type=slide_type, data_source=data_source) - post_process_predictions(output_dir=output_dir, slide_type=slide_type, - path_codebook=path_codebook, path_tissue_classes=path_tissue_classes) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--output_dir", help="Set output folder", type=str) - parser.add_argument( - "--slide_type", help="Type of tissue slide (FF or FFPE)", required=True, type=str) - parser.add_argument( - "--path_codebook", help="codebook.txt file", required=True, type=str) - parser.add_argument( - "--path_tissue_classes", help="Tissue_classes.csv file", required=True, type=str) - parser.add_argument( - "--data_source", help="Data source, default='TCGA'") - args = parser.parse_args() - - if os.path.exists(args.output_dir): - print("Output folder exists") - else: - os.makedirs(args.output_dir) - - execute_postprocessing( - output_dir=args.output_dir, - slide_type=args.slide_type, - path_codebook=args.path_codebook, - path_tissue_classes=args.path_tissue_classes, - data_source=args.data_source - ) diff --git a/Python/1_extract_histopathological_features/pre_processing.py b/Python/1_extract_histopathological_features/pre_processing.py deleted file mode 100755 index bb1ffbd..0000000 --- a/Python/1_extract_histopathological_features/pre_processing.py +++ /dev/null @@ -1,83 +0,0 @@ -import argparse -import os -import pandas as pd -import sys - -from myslim.create_file_info_train import format_tile_data_structure -from myslim.create_tiles_from_slides import create_tiles_from_slides -from myslim.datasets.convert import _convert_dataset - -def execute_preprocessing(slides_folder, output_folder, clinical_file_path, N_shards=320): - """ - Execute several pre-processing steps necessary for extracting the histopathological features - 1. Create tiles from slides - 2. Construct file necessary for the deep learning architecture - 3. Convert images of tiles to TF records - - Args: - slides_folder (str): path pointing to folder with all whole slide images (.svs files) - output_folder (str): path pointing to folder for storing all created files by script - clinical_file_path (str): path pointing to formatted clinical file (either generated or manually formatted) - N_shards (int): default: 320 - checkpoint_path (str): path pointing to checkpoint to be used - - Returns: - {output_folder}/tiles/{tile files} - {output_folder}/file_info_train.txt file specifying data structure of the tiles required for inception architecture (to read the TF records) - {output_folder}/process_train/{TFrecord file} files that store the data as a series of binary sequencies - - """ - - # Create an empty folder for TF records if folder doesn't exist - process_train_dir = f"{output_folder}/process_train" - if not os.path.exists(process_train_dir): - os.makedirs(process_train_dir) - - # Perform image tiling, only kept images of interest - create_tiles_from_slides(slides_folder=slides_folder, - output_folder=output_folder, - clinical_file_path=clinical_file_path - ) - - # File required for training - format_tile_data_structure( - slides_folder=slides_folder, - output_folder=output_folder, - clinical_file_path=clinical_file_path - ) - - # Convert tiles from jpg to TF record1 - file_info = pd.read_csv(f"{output_folder}/file_info_train.txt", sep="\t") - training_filenames = list(file_info["tile_path"].values) - training_classids = [int(id) for id in list(file_info["class_id"].values)] - tps = [int(id) for id in list(file_info["percent_tumor_cells"].values)] - Qs = list(file_info["jpeg_quality"].values) - - _convert_dataset( - split_name="train", - filenames=training_filenames, - tps=tps, - Qs=Qs, - classids=training_classids, - output_dir=process_train_dir, - NUM_SHARDS=N_shards, - ) - - print("Finished converting dataset") - print( - f"The converted data is stored in the directory: {process_train_dir}") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--slides_folder", help="Set slides folder") - parser.add_argument("--output_folder", help="Set output folder") - parser.add_argument("--clinical_file_path", help="Set clinical file path") - parser.add_argument("--N_shards", help="Number of shards", default=320) - args = parser.parse_args() - execute_preprocessing( - slides_folder=args.slides_folder, - output_folder=args.output_folder, - clinical_file_path=args.clinical_file_path, - N_shards=args.N_shards, - ) diff --git a/Python/2_train_multitask_models/checks.ipynb b/Python/2_train_multitask_models/checks.ipynb deleted file mode 100755 index d2af40b..0000000 --- a/Python/2_train_multitask_models/checks.ipynb +++ /dev/null @@ -1,841 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\" Compute and combine cell type abundances from different quantification methods necessary for TF learning\n", - "Args: \n", - " clinical_file (str): path pointing to a txt or tsv file\n", - " path_published_data (str): path pointing to a folder containing published computed features\n", - " path_computed_features (str): path pointing to a folder containing the files generated with immunedeconv\n", - " data_path (str): path pointing to a folder where the dataframe containing all features should be stored, stored as .txt file\n", - "\n", - "Returns: \n", - " ./task_selection_names.pkl: pickle file containing variable names. \n", - " {data_path}/TCGA_{cancer_type}_ensembled_selected_tasks.csv\" containing the following cell type quantification methods: \n", - " tumor_purity = [\n", - " 'tumor purity (ABSOLUTE)',\n", - " 'tumor purity (ESTIMATE)',\n", - " 'tumor purity (EPIC)'\n", - " ]\n", - "\n", - " T_cells = [\n", - " 'CD8 T cells (Thorsson)', \n", - " 'Cytotoxic cells',\n", - " 'Effector cells',\n", - " 'CD8 T cells (quanTIseq)', \n", - " 'TIL score',\n", - " 'Immune score', \n", - " ]\n", - "\n", - " endothelial_cells = [\n", - " 'Endothelial cells (xCell)',\n", - " 'Endothelial cells (EPIC)', \n", - " 'Endothelium', ]\n", - "\n", - " CAFs = [\n", - " 'Stromal score',\n", - " 'CAFs (MCP counter)', \n", - " 'CAFs (EPIC)',\n", - " 'CAFs (Bagaev)',\n", - " ]\n", - "\n", - "\"\"\"\n", - "\n", - "import os\n", - "import sys\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import pandas as pd\n", - "import seaborn as sns\n", - "\n", - "sys.path.append(f\"{os.path.dirname(os.getcwd())}/libs\")\n", - "import joblib\n", - "import model.preprocessing as preprocessing" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [], - "source": [ - "# Final feature selection\n", - "tumor_purity = [\n", - " 'tumor purity (ABSOLUTE)',\n", - " 'tumor purity (ESTIMATE)',\n", - " 'tumor purity (EPIC)'\n", - "]\n", - "\n", - "T_cells = [\n", - " 'CD8 T cells (Thorsson)', \n", - " 'Cytotoxic cells',\n", - " 'Effector cells',\n", - " 'CD8 T cells (quanTIseq)', \n", - " 'TIL score',\n", - " 'Immune score', \n", - "]\n", - "\n", - "endothelial_cells = [\n", - " 'Endothelial cells (xCell)',\n", - " 'Endothelial cells (EPIC)', \n", - " 'Endothelium', ]\n", - "\n", - "CAFs = [\n", - " 'Stromal score',\n", - " 'CAFs (MCP counter)', \n", - " 'CAFs (EPIC)',\n", - " 'CAFs (Bagaev)',\n", - "]\n", - "\n", - "IDs = ['slide_submitter_id', 'sample_submitter_id',\n", - " ]\n", - "\n", - "tile_vars = ['Section', 'Coord_X', 'Coord_Y', \"tile_ID\"]\n", - "var_dict = {\n", - " \"CAFs\": CAFs, \n", - " \"T_cells\": T_cells,\n", - " \"tumor_purity\": tumor_purity,\n", - " \"endothelial_cells\": endothelial_cells,\n", - " \"IDs\":IDs,\n", - " \"tile_IDs\": tile_vars\n", - "}\n" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [], - "source": [ - "ensembled_tasks = pd.read_csv(\"/Users/joankant/Library/CloudStorage/OneDrive-TUEindhoven/spotlight/data/TCGA_FF_SKCM_ensembled_selected_tasks.csv\", sep=\"\\t\")" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0slide_submitter_idsample_submitter_idStromal scoreCAFs (MCP counter)CAFs (EPIC)CAFs (Bagaev)Endothelial cells (xCell)Endothelial cells (EPIC)EndotheliumCD8 T cells (Thorsson)Cytotoxic cellsEffector cellsCD8 T cells (quanTIseq)TIL scoreImmune scoretumor purity (ABSOLUTE)tumor purity (ESTIMATE)tumor purity (EPIC)
00TCGA-D3-A8GP-06A-01-TSATCGA-D3-A8GP-06A-1344.9512.365915-0.558672-1.405636-9.9657840.398655-0.791017NaN-0.792303-0.322414-1.050480NaN-701.880.960.9539876.554462
11TCGA-D3-A8GP-06A-02-TSBTCGA-D3-A8GP-06A-1344.9512.365915-0.558672-1.405636-9.9657840.398655-0.791017NaN-0.792303-0.322414-1.050480NaN-701.880.960.9539876.554462
22TCGA-RP-A693-06A-01-TSATCGA-RP-A693-06A-111.9614.0268811.4724940.0342851.3815971.4864560.980811NaN0.3173240.6944571.946440NaN1021.120.320.7395196.392896
33TCGA-EE-A3JD-06A-01-TSATCGA-EE-A3JD-06A171.9513.0457820.177346-0.9535390.1729581.3463541.078788NaN0.6439991.8770633.106620NaN2555.570.180.5357876.133479
44TCGA-FS-A4FC-06A-01-TS1TCGA-FS-A4FC-06A-354.1914.8088032.4319911.194521-9.9657841.3193560.237883NaN-0.225290-0.333677-1.005219NaN-60.200.590.8555636.446450
............................................................
378378TCGA-ER-A194-01A-01-TSATCGA-ER-A194-01A-45.3213.4929521.2109050.4335633.5159781.7541012.1887830.95060.4215371.1539370.8964930.3931117.540.600.7231976.495422
379379TCGA-EE-A2MH-06A-01-TSATCGA-EE-A2MH-06A-466.2512.613355-0.395582-0.8359962.2859151.0993731.341252NaN0.3274901.2654091.950049NaN1472.690.430.7298316.422688
380380TCGA-EE-A3AF-06A-01-TSATCGA-EE-A3AF-06A-225.5413.9566271.8711600.340345-9.9657840.202723-0.229261NaN-0.0386720.3622250.714307NaN862.830.780.7657866.490166
381381TCGA-EE-A2MU-06A-02-TSBTCGA-EE-A2MU-06A375.1115.9270674.2094111.266537-9.965784-2.4617480.000569NaN0.5964991.6613152.790765NaN1806.580.290.6016486.178854
382382TCGA-EE-A2MD-06A-01-TSATCGA-EE-A2MD-06A-734.3213.6891842.073051-0.727678-9.965784-0.753840-0.625993NaN-0.330836-0.172356-0.605996NaN98.210.630.8719566.351655
\n", - "

383 rows × 19 columns

\n", - "
" - ], - "text/plain": [ - " Unnamed: 0 slide_submitter_id sample_submitter_id Stromal score \\\n", - "0 0 TCGA-D3-A8GP-06A-01-TSA TCGA-D3-A8GP-06A -1344.95 \n", - "1 1 TCGA-D3-A8GP-06A-02-TSB TCGA-D3-A8GP-06A -1344.95 \n", - "2 2 TCGA-RP-A693-06A-01-TSA TCGA-RP-A693-06A -111.96 \n", - "3 3 TCGA-EE-A3JD-06A-01-TSA TCGA-EE-A3JD-06A 171.95 \n", - "4 4 TCGA-FS-A4FC-06A-01-TS1 TCGA-FS-A4FC-06A -354.19 \n", - ".. ... ... ... ... \n", - "378 378 TCGA-ER-A194-01A-01-TSA TCGA-ER-A194-01A -45.32 \n", - "379 379 TCGA-EE-A2MH-06A-01-TSA TCGA-EE-A2MH-06A -466.25 \n", - "380 380 TCGA-EE-A3AF-06A-01-TSA TCGA-EE-A3AF-06A -225.54 \n", - "381 381 TCGA-EE-A2MU-06A-02-TSB TCGA-EE-A2MU-06A 375.11 \n", - "382 382 TCGA-EE-A2MD-06A-01-TSA TCGA-EE-A2MD-06A -734.32 \n", - "\n", - " CAFs (MCP counter) CAFs (EPIC) CAFs (Bagaev) \\\n", - "0 12.365915 -0.558672 -1.405636 \n", - "1 12.365915 -0.558672 -1.405636 \n", - "2 14.026881 1.472494 0.034285 \n", - "3 13.045782 0.177346 -0.953539 \n", - "4 14.808803 2.431991 1.194521 \n", - ".. ... ... ... \n", - "378 13.492952 1.210905 0.433563 \n", - "379 12.613355 -0.395582 -0.835996 \n", - "380 13.956627 1.871160 0.340345 \n", - "381 15.927067 4.209411 1.266537 \n", - "382 13.689184 2.073051 -0.727678 \n", - "\n", - " Endothelial cells (xCell) Endothelial cells (EPIC) Endothelium \\\n", - "0 -9.965784 0.398655 -0.791017 \n", - "1 -9.965784 0.398655 -0.791017 \n", - "2 1.381597 1.486456 0.980811 \n", - "3 0.172958 1.346354 1.078788 \n", - "4 -9.965784 1.319356 0.237883 \n", - ".. ... ... ... \n", - "378 3.515978 1.754101 2.188783 \n", - "379 2.285915 1.099373 1.341252 \n", - "380 -9.965784 0.202723 -0.229261 \n", - "381 -9.965784 -2.461748 0.000569 \n", - "382 -9.965784 -0.753840 -0.625993 \n", - "\n", - " CD8 T cells (Thorsson) Cytotoxic cells Effector cells \\\n", - "0 NaN -0.792303 -0.322414 \n", - "1 NaN -0.792303 -0.322414 \n", - "2 NaN 0.317324 0.694457 \n", - "3 NaN 0.643999 1.877063 \n", - "4 NaN -0.225290 -0.333677 \n", - ".. ... ... ... \n", - "378 0.9506 0.421537 1.153937 \n", - "379 NaN 0.327490 1.265409 \n", - "380 NaN -0.038672 0.362225 \n", - "381 NaN 0.596499 1.661315 \n", - "382 NaN -0.330836 -0.172356 \n", - "\n", - " CD8 T cells (quanTIseq) TIL score Immune score \\\n", - "0 -1.050480 NaN -701.88 \n", - "1 -1.050480 NaN -701.88 \n", - "2 1.946440 NaN 1021.12 \n", - "3 3.106620 NaN 2555.57 \n", - "4 -1.005219 NaN -60.20 \n", - ".. ... ... ... \n", - "378 0.896493 0.393 1117.54 \n", - "379 1.950049 NaN 1472.69 \n", - "380 0.714307 NaN 862.83 \n", - "381 2.790765 NaN 1806.58 \n", - "382 -0.605996 NaN 98.21 \n", - "\n", - " tumor purity (ABSOLUTE) tumor purity (ESTIMATE) tumor purity (EPIC) \n", - "0 0.96 0.953987 6.554462 \n", - "1 0.96 0.953987 6.554462 \n", - "2 0.32 0.739519 6.392896 \n", - "3 0.18 0.535787 6.133479 \n", - "4 0.59 0.855563 6.446450 \n", - ".. ... ... ... \n", - "378 0.60 0.723197 6.495422 \n", - "379 0.43 0.729831 6.422688 \n", - "380 0.78 0.765786 6.490166 \n", - "381 0.29 0.601648 6.178854 \n", - "382 0.63 0.871956 6.351655 \n", - "\n", - "[383 rows x 19 columns]" - ] - }, - "execution_count": 64, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ensembled_tasks" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAd8AAAEFCAYAAACipe0RAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAABRzUlEQVR4nO2dd7wdVbn+v08SeheQagy9Q+jSi4jYKAoG8KpgQZQici38hEtTFIVrAxEQKSpSBQVEikDoJUgaoRMiIIpwgUAgtOT5/bHWTiaTvfeZfc4+5+xz8n75rE9m1rxrrXdmH2bNas+SbYIgCIIg6DuG9LcDQRAEQTCvEZVvEARBEPQxUfkGQRAEQR8TlW8QBEEQ9DFR+QZBEARBHxOVbxAEQRD0MVH5BkEQBIMeSedK+o+kBxtcl6RfSHpC0gRJm/SmP1H5BkEQBPMC5wO7Nbn+EWCNHA4CftWbzkTlGwRBEAx6bN8GvNTEZA/gt07cAywpaYXe8mdYb2UcDFYeC0m0IAgqsqZ6knqh4ftVft+8+czFXyG1WGucbfvsFopbCXimcP5sjvtXC3lUJirfIAiCoCORqnfO5oq2lcp2ruLqZduD/JoS3c5dIGlEeYBe0vGSvtlfPlWhqo+S/l+eYPCopA/3hW9BEARVEEMqhzbwLPC+wvnKwHPtyLgeUfnOw0haF9gXWI80EeEMSUP716sgCIKENKRyaANXAZ/Ls54/AEy13StdzhCVb4+RNFrSjyTdJ+kxSdvl+AMkXSHpOkmPS/pxIc2vJN0vaZKkEwrxUyT9QNLd+fomkq6X9KSkgwt235I0Jk+HL6Y/Ordg/wasVcH9PYCLbb9l+yngCWCLNjyWIAiCHjNkyNDKoSskXQTcDawl6VlJX5R0cOHdei0wmfQe/DXwtd66L4gx33YxzPYWkj4KHAfskuNHAhsDbwGPSjrN9jPA0bZfyq3MmyRtaHtCTvOM7a0k/ZQ0NX4bYEFgEnCmpF1JU+G3II1RXCVpe+B1Uit2Y9Lv+gDwd4DaH5ftM0t+rwTcUzivTTAIgiDoANrXPrS9XxfXDRzStgK7IFq+XdNowL0Yf0X+9+/AiEL8Tban2n4TeAh4f47/tKQHgLGkLt91C2muyv9OBO61/ZrtF4A3JS0J7JrDWFIFuzapMt4OuNL2G7ZfLeSD7TPrVLxQcYKBpINyS/z+s8++pE6SIAiC9tPH3c59SrR8u+b/gKVKce8Bniqcv5X/ncGcz/StwvEMYJikVYBvApvbflnS+aSWbTnNzFL6mTlvAT+0fVbRIUlH0PrMvEoTDOacRRhLjYIg6BsGYqValcF7Z23C9jTgX5I+CCDpPaTJSXd0M8vFSV3EUyUtR1JVaYXrgS9IWjT7s5Kk9wK3AXtJWkjSYsAnKuR1FbCvpAXyR8EawH0t+hMEQdAr9PFs5z4lWr7V+BzwS0n/m89PsP1kdzKyPV7SWNIY7mTgzhbT3yBpHeBuSQDTgP+y/YCkS4BxwD+A22tpGo352p4k6VJSl/i7wCG2Z3TnvoIgCNrNYG75Ko0xB0FVots5CIKq9Ezhapm1jqj8vnnx0Z/1qKy+Jlq+QRAEQUeiunNCBwdR+QZBEAQdyWDudo7KNwiCIOhIBnPlO6DvLKtLbZaPr83rYMs2faLDLGlaD9Pflf8dIWn/QvzILN7RYyStKOnyBtdmPcsgCIJOYDCv8x14HjfA9kdtv9LffnQX21vnwxHA/oVLI4GWKl9JdXs0bD9ne+/u+BcEQdD3DGkhDCwGjMeSFpH0F0njJT0oaVTp+hRJy+TjuhrHklbLWst/l3S7pLWblLdPLme8pNty3AGSTi/YXCNpx8L5/0p6QNJNkpbNcaMl/VTSbZIelrR51nx+XNL3C2lrLeeTge0kjZP0HeBEYFQ+H5Wfw7lZ23mspD0Kvl0m6Wrghgb3NGuHprwe+OKsD30JsFCXP0IQBEEfMmTIsMphoDFgKl+SsMVztjeyvT5wXT0jSZsyW+P4k8DmhctnA4fZ3pSkMnVGk/KOBT5seyNg9wr+LQI8YHsT4FaSxnONt21vD5wJ/JmkH7o+cICkpUv5HAXcbnuk7R9lPy7J55cARwM3294c2Ak4RdIiOe1WwOdt71zB368Cb9jeEDgJ2LSRYchLBkHQH4TIRmcwEThV0o+Aa2zfnkUmyszSOAaQdFX+d1Fga+CyQroFmpR3J3B+FqG4ooldjZlArWb6fSlNUa95Um2bKkmTSfKO/1ch/xq7ArsXxrEXBIbn4xttv1Qxn+2BXwDYniBpQiPDkJcMgqA/GIhjuVUZMJWv7cdyq/ajwA8l1e1arZnXiRsCvGJ7ZMXyDpa0JfAxYJykkSQVqOJfw4L10tbxoSu95lYQ8Cnbj84RmXx9vcW8oiINgqBjadDAGhQMmM8KSSuSukl/D5wKbNLAtK7Gcd7p5ylJ++T8JGmjJuWtZvte28cCL5JaqFOAkZKGSHofc+59OwSoTWban+5rP78GLNbk/HrgMOW/Skkbd7Oc24DP5DzWBzbsZj5BEAS9wmCe7TxgWr7ABqTxzZnAO6Qxy1PLRs00jkmVza8kHQPMB1wMjG9Q3imS1iC1NG8q2D1F6j5+kLSlX43XgfUk/R2YCswxIawFJgDvShpP2s/3AuAoSeOAHwLfA34GTMgV8BTg490o51fAebm7eRyxoUIQBB3GkPoLNwYFoe0ctEiM+QZBUJWeaTuvsvGPK79vnhr77QHVRz14PyuCIAiCAc1AnMVclXm+8pV0NLBPKfoy2yf1hz/tQNIGwO9K0W/Z3rI//AmCIOgWA3AstyrzROUraTTwTdv3l6/lSrbHFW0W2/im7e6Mv5KlHT9n+/Cc19u2a5KTewKP2X6oSl62J5KUseqVszuwru2T61ybZnvR7vgfBEHQbgbiRKqqzBOV70AgfxjUPg52BKYBd+XzPYFrSJveV0LSMNvv1innKmavOw6CIOhYYqlRL1BPLlLSsVk28UFJZxeW03Qp0ZilEx+RdEGWTLxc0sJ1yt1V0t1ZBvKyLL7RyMeTJT2U8zs1x50vae+CTXFDhcUlXZnTnKn82SZpmqQfKcla/k3SFvmeJueWKJJ2VJKrHAEcDHxDSVJyB5LC1in5fDU1kMnMvv1E0i3Ajxrc0yyJTEmr5GcxRtL3Kv94QRAEfcAQDascBhr92aavJxd5uu3N8/lCzLmEpopE41rA2Vky8VXga8UClbSfjwF2yTKQ9wNH1nNO0nuAvYD1cn7fr2dXYgvgv0nLolYjyVtCkp4cnWUtX8t5fSjnf2IxA9tT8j3+NEtK3kpqqX4rnz9Jc5nMNfP9/XcFf38O/CpLVf67gn0QBEHfIVUPA4z+rHwnArvkFuF2tqcCO0m6V9JEYGdgvYL9XBKNtt8CahKNAM/YvjMf/x7YtlTmB4B1gTvzutnPA+9v4N+rwJvAOZI+CbxR4Z7usz3Z9gzgokL5bzNbi3oicKvtd/LxiAr5zkJzymSOA84CViiYXJbLr8I22U+Ye4JWsczQdg6CoO8ZvJsa9d+YbwO5yEOAzWw/I+l45pRvrCLRWF4TVj4XSf94vwr+vStpC+CDpI0aDiV9EMySmMzd4vM3Ka92/o5nL6ie5b/tmWqw/V8TupLJbLvEZGg7B0HQLwzAFm1V+nPMt5Fc5Iu5ddedfWeHS9oqH+/H3BKP9wDbSFo9+7CwpDUb+LcosITta4EjmD17eAqzdwDag6SUVWOLPI46hKRw1XaJyVZlMrvgTtKHBWSpySAIgo4hup17hQ2A+3LX6dGkcdBfk7pi/wSM6UaeDwOfz5KJ7yFJKM7C9gvAAcBF2eYeoNGevosB12S7W4Fv5PhfAztIug8ob2ZwN2k/3gdJMpRXduMeAK4m6VOPk7QdSQbzW0r7965Gqii/qCRBOYn0EdAdvg4cImkMsEQ38wiCIOgdBnG386CRl8yzhK/Jk7WCXiO6nYMgqErP5CXX2P6syu+bx2/7yoBq/g68+dlBEATBvMGQAVWftsQAbKzXx/aU7rZ689rccaXw4Xb72JdIOrDOPf2yv/0KgqD3WGj4cf3tQnuJMd9Bz/2kiVO15/EV29dLOqKeUEdvUhPb6Gk+ts/L64KL4ZB2+BgEQdAnqIUwwJjnu53z7OiPA5vYfisLcdSWDx1BWi881xpfSUNbWE/bcaiB/GQQBEHHEN3Og5oVgBezYAe2X7T9nKTDgRWBW7JcY00m8kRJ9wJbSTpSSQrzQUlHZJuazOU5Of5CSbtIulNJDnOLbLeFpLvyDOa7JK3VzElJ60m6L3cfT5C0Ro7/XD4fL+l3Oe79km7K8TdJGp7j55CfVAOZyiAIgo5giKqHAUZUvnAD8D5Jj0k6Q0lLGdu/AJ4DdrK9U7ZdBHgwb803HTiQtNzoA8CXJW2c7VYnSTduSFrKtD9J7eqbwHezzSPA9rY3Bo4FftCFnwcDP8/iGpsBz0paj7RMa2fbG5GWDgGcDvw2y2JeCPyikE9RfrKZTGUQBEH/EpXv4MX2NJJoxkHAC8Alkg5oYD4D+GM+3ha40vbrOY8rgO3ytadsT7Q9k7QO96ascFWUk1yCJBH5IPBT5pTSrMfdwHclfQd4v+3pJMWty22/mO/lpWy7FfCHfPw75pTZvMz2jAoylbMIeckgCPqFGPMd3OSx29HA6Kwr/Xng/DqmbxbGeZv93GX5y6I0Zu2Zfw+4xfZeeY3y6C58/EPu7v4YcL2kL2UfqqyDK9rUREG6kqkslh3ykkEQ9Dlu8yxmSbuReiWHAueU9zWXtARpns9w0rv6VNvntdWJzDzf8pW0Vm38NDMS+Ec+Lss8FrkN2DNLVC5C2qHo9haKXgL4Zz4+oIKfqwKTc3f4VaQu7ZuATyvv6qS0ExOkfYCLspFzyVy2WaYyCIKg/bSx21nSUOCXwEdIG+zsJ2ndktkhwEN5GG9H4H8lzU8vMM9XvsCiwAXK+/aSfpTj87Wzgb/WJlwVsf0AqXV8H3Av6StqbAvl/pi0ocSdpK+wrhgFPJi7iNcmjelOAk4Cbs1Skz/JtocDB+b7+Syzx4LLtEumMgiCoP20t9t5C+CJvPPc2yTZ3vI7z8BikkSqG14ibabTdgaNvGTQV0S3cxB0KgsNP47pT5/Q324U6Jm85Op7/rby++bJP3/+K6S5OzXOzkNmAEjaG9jN9pfy+WeBLW0fWrBZjNSzuDap13OU7b/05B4aEWO+QRAEQWfSQtU959yUyrmVK/cPA+NIk1lXA26UdHsepmsr0e0cBEHQgEEn1zjQaK+85LPA+wrnK5OWkxY5ELjCiSdIu9P1iv5BVL5BEARBZ9LeyncMsIbSnuvzkyalXlWyeRr4YCpaywFrAZPbeEez6KjKV9Lyki6W9GSeAHWtCpvdS/qGpDfzdPBa3I6SphY2D/hbC+WtUNNRzvlY0hcL1zfOcd8sxH0zK1g9mFWlPpfjR0t6NMfd2ZViVW+Q72HrbqTbQNL5veBSEARB92njfr5ZTvdQ4HrS3u+X2p4k6WBJB2ez7wFb5yWnNwHfqekotJuOGfPNs8uuBC6wvW+OGwksBzyWzfYjfb3sxZzrcG+3/fFuFHsk8OvC+UTSrOLf5PN9gfEFHw8GPgRsYfvV/BGwZyH9Z2zfL+kg4BRg92741BN2BKaRlhpVQknjeaKklSUNt/10r3kXBEHQCm1WrrJ9LXBtKe7MwvFzwK5tLbQBndTy3Ql4p/Qgxtm+HUDSaqSp38eQKuGmSNqn0Dq9rYHZp4DrCudPAwtKWi5/DOwG/LVw/bvA12qD77an2r6gTr63kSQmyz6tLulv2acHlLSVJemU7OtESaOy7Ry7G0k6XVl5S9IUSSfkPCZKWjsLdRwMfCP3AGwnaVlJf5Q0JodtcvrjJZ0t6Qbgt7mIq5m9NjgIgqDf8RBVDgONjmn5AusDf29yfT/gIpKQxVqS3mv7P/nadnn9KyT5xJNIeskftv1PSUuWM5O0CvBybUOFApcD+wBjgQfI6lR5Cvpitp+scC+fILWiy1wInGz7SkkLkj5+PkkS9tgIWAYY0+RjociLtjeR9DXgm7a/JOlMYJrtU7PPfwB+avsOpc0VrgfWyek3BbbNMpWQtlU8irT+OAiCoP8ZgPv0VqWTWr5dsS9wcdZLvoJUQda4vbBn7Uk57k7gfElfpr6IxQokLecyl+a8a5V9jSpSjhfmj4BtSBsVzE6cKu+VbF8JYPtN22+QdJcvsj3D9vPArcDmXZQD6RlA+mAZ0cBmF+D07NNVwOLZD4CrChUvwH9IuzjNhULbOQiC/iC0nfuEScDe9S5I2hBYg7TmCtJ+u5NJUmF1sX2wpC1JWsjjJI20/X8Fk+nAgnXS/VvSO6Sx3a+TNh8gj/G+LmlV241mv33G9v0NrjX682gU/y5zfhyVfa212GfQ+HccAmxVqmTJz/D1ku2CpGcyF6HtHARBvzAAu5Or0kkt35uBBXJLFQBJmytt8bcfcLztETmsCKwk6f2NMpO0mu17bR8LvMic67sgTeIa0SD5saRZbjNK8T8Efilp8VzG4nlyVZfkceJnJe2Z0y4gaWHS+PAoSUMlLQtsT5Ks/AewbrZbgjz9vQvKWtQ3kGb3kcsc2STtmsCDVe4lCIKgT2jvUqOOomMq37zl3l7Ah5SWGk0iaSw/R+pyvrKU5EqaTxA6JU9GepBUwY0vXrT9OvCkpLkmRtm+y/af6uT5K+AW0rjsg6Qu4jcq3F6NzwKHK2ku3wUsn+9jQvbvZuDbtv9t+xlSF/gE0lhxFd3oq4G9ahOuSBrPm0maIOkh0oSsRuwE9IqMWhAEQbcYquphgDFPaztL2gvY1PYx/e1LfyJpAdKHxLZ5LVwTots5mHfoPK3k5nSevz3Tdl7ti5dV13b+zT4DqgbupDHfPifPOl66v/3oAIYDR3Vd8QZBEPQdHlDVaWvM05UvgO1z+tuH/sb248Dj/e1HEATBHAziCVfzfOUbBEEQdCgDcCJVVTpmwtVAQf2vP13MZ5ykXfK1Gfn8QUmX5ZnUSJpWyGvN7O8Tkh6WdKmSmldoOwdB0HkMUfUwwIiWbwtIHaE/3Sif6bZHZp8uJM1s/knB9wVJs5mPtH11jtsJWDa0nYMg6EgG4CzmqkTLtzU6QX+6Crczt7b0/sDdtYo3+36L7dra3tB2DoKgsxjELd+ofFujZf3pwrXtCl3FR+e4mv70RtTZAamB/nQxn3G5wi+mGQZ8hLm1pbvy/X5gu3oXQl4yCIL+wFLlMNCIbuf2si+wl+2Zkmr60zUJzHrdxTX96UuZrdVcpJ7+dKNu54UKm0vczuxtEavSUNs55CWDIOgXBnHzMCrf1ugI/ekGzBrzbeL7Dk2uN9R2DoIg6BcGYHdyVQbxd0Wv0En6063yB2BrSR8rlL+bpA3yaWg7B0HQWQwdUj0MMAaex/1Ih+hPl8d867bE6/g+Hfg4cJikx7PW8wGk7mYIbecgCDqN2FIwqGH7OeDTdS6tUsf2yMLp6DrXP1mhyNNJleQxtkcDS9Qzsr1oV/G2HwF2K9tkbefNgCMq+BMEQdAneBB3O0fl2+H0kf50aDsHQdB5ROUb9Ce9rT8d2s5BEHQkA3AJUVXmyTHfDpKInCDpb6X1wP1Cfh5r9LcfQRAEsxjSQhhgDECXe0ZBInK07dVsrwt8lyQRWaMoEVnkdtsjc9ilhWLrSUSOtL1hLueQlm+k/fwK+HZ/OxEEQTCLmO08qOgYicj8IbAY8HI+30LSXZLG5n/XyvEL500QJki6RNK9kjbL136V1acmSTqhkPemkm6V9HdJ1+fW9zqS7ivYjJA0IZ/eDuySFbKCIAj6n0EsLzkvvmhbloi0XVuOs11BReoy2ycxWyLyn5KWLGfWTCISWBp4ndTyBngE2N72u0q7Ff2AVHF/LeexoaT1gXGFvI62/ZKkocBNWezjYeA0YA/bL0gaBZxk+wuS5pe0qu3JwCjgUoCsyvUEsFEXzycIgqBPGIiykVWZF1u+XbEvcLHtmSTJx30K14rdzifluJpE5JeBoXXyayQROdL2+4DzgB/n+CWAy/K6358C6+X4bYGLAfJGCBMKeX1a0gPA2Gy/LrAW6SPjxlzJHwOsnO0vZfZSqVFAUay5rsRkaDsHQdAvDOIx33mx5dtpEpFXAX/Mx98DbrG9l6QRzF4bXPfzL7eqvwlsbvtlpT15F8z2k2xvVSfZJaQK/orkvouznOtKTIa2cxAE/UK0fAcVnSYRuS3wZD5eAvhnPj6gYHMHubUqaV2gJgm5OKnbeqqk5Ui7GQE8CiwraaucZj5J6wHYfhKYAfwPc7Z6IUlMTmriaxAEQd8RY76DB9uWtBfwM0lHAW8CU0jqTvsyuwKrUZOIvLdBlqfkJToCbqKORGRe0rS67SdydG3MV8BU4Es5/sfABZKOJH0k1Dgjx08gdS9PAKbaflzSWFKFOZnUBY7tt7Ps5C/ycqlhwM+YXbFeApxCQZUrV97Tbf+rwX0GQRD0LUMHXqVaFSW54qA3yZX9praP6Wb6ocB8tt/Ms7FvAta0/XYbffwG8KrtLrYijG7nYN5hoeHHMf3pE7o27BA6z981e1R7Dv/JLZXfN08fuVOf1tSSlgW+Q5pnM2to0fbOVdLPcy3f/qANEpELA7dImo/UWv5qOyvezCvA79qcZxAEQffp7DHfC0m9iB8DDgY+z9yTaxsSlW8f0ROJSNuvkTY+6DVsn9eb+QdBELRMZ4/lLm37N5K+bvtW4FZJt1ZNHJVvEARB0Jl0dN3LO/nffyntk/4cs5d0dkmPZztLmqE595c9qsX0UyQt0+T6kpK+VjjfsaaT3EIZ5+cJSEg6J88YbmY/uqYg1V2yetSD+bglnyUtlNWp6q0bLtptIek2SY9KeiTf28JN7Gf5IekASafn40MlHVjVvyAIgr5g6NDqoR/4fp7Q+t+kJZ/n0MK2rO1o+U63PbIN+TRiSZLC0xntyMz2l7q26ne+AFxhe0Yjgzw7+TJgX9t3Ky1M/hRJrvKNFss7lzRTOrqegyDoGNo95CtpN+DnJEGkc2yfXMdmR9LqkPmAF23v0CC7l21PJa1Y2Smn3aaqL722zje3aE+Q9ICkiZLWzvFLS7pBSb/4LAodC5KOVNJJflDSETn6ZGC13Ko+JcctKuny3Nq7MFc8dfWM6/g1q1WrBrrITe5pcyXN5fGS7pO0mKShkk6RNEZJe/krXeSxQ6GXYKykxeqYfQb4c7bfS2nnIynpMz8maXnSZgwX2L4b0hIq25fbfl7SIpLOzT6NlbRHM59svwFMkbRFV88gCIKgr5BUOVTIayhJMOkjpBnK+5V7QZUkgs8Adre9HnMqHJY5rWJcXdpR+S5U6nYeVbj2ou1NSDvmfDPHHQfcYXtjkrrTcEgVJ3AgsCXwAeDLkjYGjgKezHKM38p5bExq3q8LrApsozQT+DRgb9ubklpzNQnIRhxtezNgQ2AHJYWrukianzSz7eu2NwJ2IalBfZG05nZzYPPs9yqN8snP4ZDcW7AdJUWpXM6qtqdAmikN/JtU2f4aOM72v2muUX00cHP2aSfSWuRFmvgEcH/2Zy4U8pJBEPQDUvVQgS2AJ2xPzqtFLgbKDZP9Sb2OTwMUdP0LPmkrSf9NEjI6shCOp77EcF16u9v5ivzv34FP5uPta8e2/yLp5Ry/LXCl7dcBlOQPtyNV0GXus/1sthtHUpB6hdl6xpAeQleCEZ+WdBDpOaxAqswnNLBdC/iX7THZ91dz+bsCGyqPKZNUqtYgKVvV407gJ5IuJP3Iz5auL5PvpchhwIPAPbYv6uKeAHYFdpdU++BZkPyR04T/AGvXuxDykkEQ9AetdDvnd/lBhaiz87urxkrAM4XzZ0mNvSJrAvNJGk0awvu57d+WbOYn7Xw3LNvUeJUG0sX16O3ZzrWdfGaUyqr3Am+ld7+4Q1At72Z6xnMX1lgXuWESGvt9mO3rS/mPqJeJ7ZMl/QX4KHCPpF1sP1IwqacFvRIwE1hO0pC86cMkYFNy93Qdnz5l+9GST8vVsa1RV9c5CIKgv1ALfbNzNhLqZ1cvWel8GOm9+kFgIeBuSffYntWYKiwrOt/2P6p7OCf9oe18G2lME0kfAZYqxO+ptHftIqSN7G8HXmPOr4tGNNQzbkAjXeRGPAKsKGnznP9iSnvfXg98NXd7I2nNZl28SlrQE23/iNTVO0dr0/bLwFBJC2b7YaSJUPuTtgo8MpueDnxeaVOHWt7/lceDrwcOK4yFb9zFvUH64nuwgl0QBEGfMHRI9VCBZ5lTe39l0vKgss11tl+3/SKpXtqoQX5v5Pk+10q6uRaq3ltvjPnONXusxAnA9krb4O0K1PrWHwDOB+4j6SifY3ts3iHoTqVJWKc0yJPch7838CNJ40l73m7dxH48SSd5ErNn+zYk5z8KOC3nfyOptXgO8BDwgNLSorNo3qNwRL6X8aSW5l/r2NxA6oaHtNfv7bZvJ1W8X5K0ju3nSZrTpyotNXqY1E3/Kml3pPmACdmn7zW7t8w2wN8q2AVBEPQJbR7zHQOsIWmVPLdmX+Ye1vwzSXt/mNKyzS1JjZ56XEhqlK1Cqtem5DKq3VtoO3ceuaV6pO3Pdl55MeYbzDt0nlZyczrP355pO6933m2V3zeTDty+y7IkfZS0jGgocK7tkyQdDGD7zGzzLdLk35mkRuDPGuT1d9ubSppge8Mcd2uTpUlzEApXHYjtsZJukTS02VrfNrIMaYvBIAiCjqHKEqJWsH0tcG0p7szS+SmkXd+6okcKV1H5dii2z+3Dsm7sq7KCIAiq0sqEq36gqHB1Gmke0TeqJm7rrSmkJhvl0RapyZzP9NIz/ly2m6IkZjJeScRk+UL8Mvl4eUkXK+0v/FCeKLCmpGUlXdeTewyCIGg3bR7zbSu2r7E91faDtneyvantektj69Lulm9ITbafWVKTuQvmySbPeCfbL0r6AWmi1uG1C3nm85UkVax9c9xIYDnbj0n6l6RtbDedeBYEQdBXDOnQlq+knYBDmb1a5WHgdNujq+bRJ7emkJpslEdLUpMtcBuweiluJ+Cd4viG7XF5FjXAn3JZQRAEHcEQVQ99RR7fPRe4hrQE9DOkceRz84SuSrS75buQkuJUjR/arukRvmh7k9xt/E3gS8yWmjwx39BBMJfUpIB7lfZJPApYv9byUxLA3hhYjzTYfSdJavJeUh/8HrZfUJK8PInUimzE0bZfUtL/vEnShrbrql1pttTkKNtjJC1OSWpS0gKkJVI3UF+cA2ZLTd4paVHgzTrlzJKazKxWesaHFSrQGh8HJpbimslRQlpz/P0m14MgCPqU/uhOrsC3gD3zctUa4yTdT6p3rq2fbE76sts5pCbnpjtSk826nW+RNCP7fUwDm0b8B1ix3gUVZNvOOutEDjpoVD2zIAiCttKhle/ypYoXANsT1FxFcA76crZzSE2W6KbUZDN2yqos9ZhEc93RhvKSoe0cBEF/oL7sT67O6928Ngf9PZwdUpMtSE32kJuBBSR9uVD+5pJqC8JDXjIIgo5iyJDqoQ9ZTdJVdcLVpF32KtHbY77X2W623OgE4CIlqclbKUhN5tbnfdnuHNtjASTdqbRs56/AX+plavvt3PX7C6V1WMNIqiaTGtiPl1STmpxMBanJPI58mqSFSC3GXUhSkyNIUpMCXgD2bJLVEXnW3AySRGUzqcma9GN5zPdc279o5m/22ZL2An6mtATsTZIc2hHZZCcaPM8gCIL+oEO7nZvtj35q1UxCXrLDUR9JTUq6jTRB7eXmltHtHMw7dJ5cY3M6z9+eyUtufcUdld83d31y286sqhsQClcdTl9ITUpaFvhJ1xVvEARB39GhLd+2EJXvAKC3pSZtv0Ba5xsEQdAxdLi8ZI8YxLfWczQI5TKDIAgGCp0sL9lTouXbnJDLDIIg6CeGdOZSIwDy7ObymPRU0sqVs2y/OXeq2UTLtxtoYMhlTivE751nj9dayr/K48iTlSQuz5X0cM0mCIKgE+jwlu9kYBrw6xxeBZ4nLdv8dVeJo+XbnIEsl9mMpYCdgd2Bq4Ftsv9jJI20Pa5J2iAIgj6hw7uTN7a9feH8akm32d5eUt1lrUWi5duc6bZHFsIlhWtFucwR+Xh74PeQ5DKBueQybU/LabdrUOZ9tp+1PRMYl/Nei9lymeNI0pGVN22uw9VOa8wmAs9noY+ZpHXOI8rGkg5S2nTi/rPPvqR8OQiCoFfoxI0VCiwraXjtJB/X5vi83VXiaPl2n46Vy6zjR1khq1bGzFJ5M6nzNxHykkEQ9AcdPOQL8N/AHZKeJL2jVwG+llUNL+gqcbR820unyGUCPC9pHUlDcnlBEAQDiiFy5dDX2L6WtHHOETmsZfsvuYfzZ12lj5ZvcwakXGbmKNJ+k8+QNJsXbWIbBEHQcQzr7JYvwKakobphpB3tsP3bKglDXjJokeh2DuYdOk+usTmd52/P5CU/cePtld83V39ouz6tqiX9DliNNDenpj5o24dXSR8t3yAIgqAj6fAx382Add3NFmxUvkFLLDT8uFnH058+Ya4v7eL1MjW7YpqafS2vRulqacrlN8qj7Fuj9MX4eunLtvX8quJPveuN4srXanT1fBpRxfdiXL1n1yxNzabZs+6unz1N21W6KrR6H/1B0afe8K3Z/xuN7NL5RT0qt8MnJT0ILA/8qzuJo/INgiAIOpIOb/kuAzwk6T4Kq0Zs714lca9VvpKWJ00K2pzk2BTSjLC3gYdJG9IvSJrx+0vbF+R0S5DWyg7P/p1q+7xS3vcCCwDvARYC/pkv7Wl7Sjf9HQFcY3v9LHbxTdsfr5h2IeA64HBmTzEfTpIamwq8CHy/lTz7GkkXA/9j+/H+9iUIggBg6JCOnmJyfE8S90rlmyURrwQusL1vjhsJLEeaffuk7Y1z/KrAFZKG5Er2EOAh25/IW909KulC27MWLdveMqc9ANjM9qG9cR8t8AXgCtvjgZGQZBxJlfnl+XzHnhTQm1sKZn4FfBv4ci+WEQRBUJlO7na2fWtP0vfWve0EvGP7zFqE7XG2by8b2p4MHElqNUISh1gsV+CLAi8B73bHCUmbS7pL0nhJ90laTNJQSadIGiNpgqSvdJHHDoVdjcZKqrcu9zPAnyu41Ei3+YM574lZZ3mBHD9F0rGS7gD2kXS4pIey3xc38k+JU7KO9MQsR1nbNWl0PR9I6453kRRDEUEQdASduM43v4+R9JqkVwvhNUmvVs2nt16065NkF6vyALB2Pj4duIqkbbwYMCpLH7aEpPmBS3L6MZIWB6YDXwSm2t48V3J3SrqB+spUkHSbD7F9p6RFgTl2qsjlrFqxu7uebvP9wPnAB20/Jum3wFdJXfYAb9reNpf1HLCK7bckLdnEv0+SWuAbkcYlxki6rZEPJD3qmZKeyGnm+O0kHUTWqR621GYMW3T1CrcaBEHQMzpxzLf2PrZdRSCpIZ3Sqi8+4g+T1k2tSKpATs8VZ6usBfzL9hgA26/afhfYFfhcFs+4F1iapFLSiDuBn0g6HFgy51FkGeCVij410m1+yvZj2eYCkkZ0jaKY8gTgQkn/xezegHr+bQtcZHuG7edJgh+bN/Ghxn9Iz30ObJ9tezPbm0XFGwRBXzGkhdDXSFqt0Eu5Y+6ZXLJq+t7yeRJJ+aMqG5MmYUHa/ecKJ54AnmJ2q7gVRGOd5cMKmyWsYvuGRpnYPpm0489CwD3K2wcWmM7c2smNaKTb3IzXC8cfA35JerZ/lzSsgX/N8qznQ40FSfcTBEHQ73T4xgp/BGZIWh34DUnb+Q9VE/dW5XszsICkWZN38vjrDmXDPMv4VNKWeZAkGT+Yry1HahlO7oYPjwArSto857VYHs+8HviqpPly/JpKest1kbRa3vXnR6RNkueofG2/DAyVVLUCrufniPwDAnyW1FIt+zEEeJ/tW0gTo5YkjSHX8+82YFQe316W1JK+r5xnHdakuWRlEARBnzFsiCuHfmBm7mncC/iZ7W8Ac+2z3oheGfO1bUl7AT+TdBRpHHIKaakRpA3kxzJ7qdFpheVE3wPOlzSR1IL7ju0Xu+HD23mi0Wl5KdB0YBfgHFJX6wN5stELwJ5NsjpC0k6kVuJDJA3mMjeQunr/1g0/35R0IHBZ/jgYA5xZx3Qo8HulpVgCfmr7FUnfq+Pf28BWwHhS6//btv9dp9U+i/yhM912txaMB0EQtJtOGRdtwDuS9gM+D3wix81XNXGvzWy1/Rzw6QaXF+oi3a4VyzifNFmp0fUxwAfqXPpuDkWmkiaKYXs0MDofH1bBldNJM7ZnVb62Dyj5MivPfH5o4fgmUtd72f8RheN3SBV82aaRf9/KoZIPwP7AWQ3yCoIg6HP6Y7eiFjgQOBg4yfZTklYh7+dehdhYoU1I+gJpXXNvrsXtNXLr+3d1JpSViI0VgnmHztuooDmd52/PNlb42l23VH7fnLH1Tv02N1rSUqRhwQlV08SazjZh+9z+9qEnlFXEgiAI+ptOXGpUQ9JoYHdSPToOeEHSrbaPrJK+w7vUew9Jy0u6WNKTWbjiWklrNrEfKemjFfLdUdLWPfDr2lamq7eY92hJm+XjKZKW6Y1ygiAI2sEwuXKogqTdJD0q6Yk8H6mR3eaSZijto96IJWy/StJVOM/2pqR5RZWYJyvfPNHqSmC07dVsr0saA16uSbKRQJeVL7Aj0O3K1/ZHbb/S3fRBEASDhXYuNZI0lLRU8yPAusB+ktZtYPcj0sqYZgyTtAJpbtM1Ld7avFn50kT+UtLvJO1Ri88SjLsDJ5KW74yTNErSeyT9SUnq8R5JG+ZlUwcD38h220l6v6Sbst1NkoZLWiJ/fa2Vy7iotiyr2CKV9LmcbrzSxs1zIGlRSecpSUhOkPSpHL+rpLslPSDpMiXlq7pIWkTSX3IZD+YZ4kEQBP1Om0U2tgCesD057xVwMbBHHbvDSGt4/9NFfieSKugnsoriqkDljWnm1THfZvKX5wDfAP6cl/VsTZpKfiyFTRwknQaMtb2npJ2B39oeKelMYJrtU7Pd1fnaBXlS1i9ymkNJS6p+Dixl+9dFJyStBxwNbGP7RUnvqePr/5CkMjfIaZbKFfcxwC62X5f0HdJM7BMb3O9uwHO2P5bzWKKLZxcEQdAntDLmq4IMbuZs22cXzlcibexT41lgy1IeK5HW7e7MbFXAuti+DLiscD4Z+FRVf+fVyrchtm+V9EtJ7yX15f/R9rvSXH8F25IftO2bJS3doOLaKucD8DvgxznNjZL2IXWDbFQn3c7A5bU1zrZfqmOzC7BvwfeXJX2c1KVyZ/Z5fuDuJrc8EThV0o9IuzDNtflF8Y/6rLNO5KCDonEcBEHvoxaWGuWK9uwmJvWq8nIBPyNpS8yo887PPunbtn+cG2BzOWj78DrJ5mJerXwnAc0G0n9H2qloX9J2gfWo8kPWwzBLsWodkvjHe0hfYeX8u8qvno2AG23vV8EX8mYOm5LGs38o6QbbJ5ZsCn/UsdQoCIK+oc2znZ8F3lc4X5m0wUyRzYCLc8W7DPBRSe/a/lPBpiaFfH9PnJlXx3y7kr88n6zGZbsmt/gaaZelGreRKujaXr0v5plvZbu7mN06/QxwRz7+BulH3A84V1nussBNwKclLZ3LqNftfAMwSygjrzW7h7Rb0uo5buEuZnGvCLxh+/ckmc9NGtkGQRD0JW2e7TwGWEPSKkq70e1L2kFvFlnrf0QWOLoc+Fqp4sX21fnfC+qFyvdW1XAw0ZX8pe3nJT0M/KmQ7BbgKKXdkH4IHA+cJ2kC8AZpXBjgauDyPGnrMNI+xedK+hZJyvLAXBl+CdjC9mtK2/0dAxxX8HGSpJOAWyXNAMYCB5Ru5fvALyU9SJKXPMH2FZIOAC5S3nEj5/0Y9dkAOEXSTOAd0naGQRAE/U47W755+PBQ0iSpocC5+T17cL5eT9Z3LiRd1ey67d0r5RMKV3MjaWHSWOgmtqf2tz+dRXQ7B/MOnacY1ZzO87dnClcnjP1b5ffNcRvv0ieSHJJeIE3cuoi0Le0c5dqea2OcesyTLd9mSNoFOBf4SVS8QRAE/cfQ/nagPssDHyINGe4P/IW0f3pLO8JF5VvC9t+A4f3tRxAEwbxOJ26skPX7rwOuy0N7+wGjJZ1o+7TmqWczr064aoqSrNi4Qjgqx28naVKOW0jSKfn8lG6UUd5VqVeRNCKPDdckMFtWZAmCIOhL2qlw1U4kLSDpk6RdjA4BfgFc0Uoe0fKtz3TbI+vEfwY4tbYJgaSvAMvafqsbZXwX+EFVY6W577I9sxtlBUEQDDjm68DmoaQLSEJNfyVNcn2wO/l04K11JpK+RNLwPFZJcvIqYBHgXiW5yWUl/VHSmBy2yenmkoCUdDKwUG5BX5jtjszyjg9KOiLHjZD0sKQzgAeYc41abXnUXUrSkPdJWkzS0NwiH5PL+0oX97VDoYU/VtJizeyDIAj6ig5t+X4WWBP4OnCXpFdzeE3Sq1UziZZvfRbKS4pq/ND2OZK2JalAXQ4gaVqthSzpD8BPbd8haThpOvs61JGAtP1HSYcW0m5K2ph5S9LMuXsl3Qq8DKwFHGj7a0UH8zq1S4BRWVd0cZJgxxdzeZvn8Yg7Jd1AY8GObwKH2L5TSQP6zW4/tSAIgjbSoWO+bWm0RuVbn0bdzs3YBVhXsyXJFs+tyLkkIOuk3Ra40vbrAJKuALYjLQD/h+176qRZC/iX7TE531dz2l2BDTV7K6wlgDVovM73TuAnuQV+he2y0lbISwZB0C908n6+PSUq3/YxBNjK9vRiZB6rrSIT2YjXm6Spl6+Aw2zPsR2W0o5Lc2H7ZEl/IclL3iNpF9uPlGxCXjIIgj6nQ5catYUY820fZanHkQ3il8qH72i2pORtwJ5ZCnIR0q4ac21wUOIRYEVJm+d8F5M0jNTd/dVa3pLWzHnWRdJqtifa/hFJq3TtSncbBEHQywwb4sphoBGVb31qk6Fq4eQKaQ4HNsuTnB4i7esLSQJyqTyRajxpL2FILckJki60/QBJT/o+kmLKObbHNiss70c5Cjgt53sjsCBpS8SHgAfy0qKzaN7DcUTBt+mkGXxBEAT9zlBVDwONkJcMWiS6nYN5h86Ta2xO5/nbM3nJ8x67vvL75sA1PzygquAY8w2CIAg6kphwFQRBEAR9TFS+QRAEQdDHDO3Adb7tokcTriQtL+liSU9KekjStXl27QhJ07Ni0sNZfenzhXRLSLo6KzNNknRgnbzvzZOdnpb0QmHy04ge+NttfWMlLedbJfXJ7Pdm9y9piqRl2ljWBpLOb1d+QRAE7WCYqoeBRrdbvnn96pXABbb3zXEjgeVIex0+aXvjHL8qcIWkIVkX+RDgIdufkLQs8Gie9ft2LX/bW+a0BwCb2T6U/uULJBGKGX1RWLP7Lwh5tKusiZJWljTc9tNtzTwIgqCbDOZu5560fHcC3rF9Zi3C9jjbc61PtT0ZOJK0HAeSOMRiuQJfFHgJeLc7Tqjv9I0/A/w520vS6bm1/5fc4t87X5vVKpW0maTR+XiL7OfY/O9aOf4ASVdIuk7S45J+3MK9L5LLH5+XC43K8ZvmVvrfJV0vaYVC/HhJd+fnUxQEv5qCElcQBEF/M1SuHAYaPal81wf+3oL9A8wWcDidpHv8HDAR+Hp3duvRbH3jr9veiCTlOIe+MbA58GVJqzTJqqZvPJIk61hWqZofWNX2lBy1F0necQPgy8DWFdx9BNg+9wYcy5w7Go0krdndABgl6X1zJ6/LbsBztjeyvT5pf8n5gNOAvW1vCpwLnJTtzwMOt71VnbzuJ917EARBR9ChGyu0hb4U2Sg+ng8D44AVSRXP6UobA7TKXPrGtt8FdgU+p7Q5wr3A0iR940bU9I0PB5bMeRRZBnilcL49cJHtGbafA26u4OsSwGW5tflTYL3CtZtsT7X9Jkkg4/0V8oP04bKLpB9J2s72VNIzWR+4Md//McDKkpbI93ZrTvu7Ul7/If0ecyHpIEn3S7r/7LMvqehaEARBzxjMlW9PZjtPAvbu0mo2GwMP5+MDgZOdFD6ekPQUqVV8X4s+9JW+8XSSetQcyRr49C6zP2qKab4H3GJ7r+zH6MK14n7AM6j4u9h+TGlHpI8CP1TavehKYFK5dStpySY+13ydXu9CaDsHQdAfDMRKtSo9afneDCwg6cu1iDz+ukPZMFc2p5K6QwGeBj6Yry1Haq1N7oYPfaJvnHciGiqpVpneBuybx5ZXYLZkJMAUYNN8/KlC/BLAP/PxAS3faX2/VwTesP170vPdBHgUWFbSVtlmPknr2X4FmKq0LSKkMewiawLd2hQ6CIKgN5hviCuHgUa3W762LWkv4GeSjiLtAzsFOCKbrCZpLKlF9RpwWp7pDKkVeL6kiaRW6ndsv9gNH97Ok4xOk7QQqeW2C0nfeARJ31jAC8CeTbI6QtJOpFbnQ9TXN76BtPXf30ity51J3b6PAbcW7E4AfiPpu6Qu7xo/Bi6QdCTVuqmrsAFwiqSZwDvAV/Mz2Rv4Re5qHgb8jNRTcSBwrqQ3SB8oRXYC/tImv4IgCHrMYN58ILSdKyJpY+BI25+tc+184Brbl/e5Y90k90ZcY3t9SQuQPiC2rTPeXSK6nYN5h87TSm5O5/nbM23nm5+7tvL7ZucVPzqgOqlD4aoitsdKukXS0L5a69uHDAeO6rriTf9z15j+9Alz/c9evF6mZldMU7Ov5dUoXS1NufxGeZR9a5S+GF8vfdm2nl9V/Kl3vVFc+VqNrp5PI6r4Xoyr9+yapanZNHvW3fWzp2m7SleFVu+jPyj61Bu+Nft/o5FdOr+oR+UOxN2KqhKVbwvYPrdB/AF97EqPycum1s/HjwOP96tDQRAEJYYMwPW7VYnKt5eRtDRwUz5dnjSu/EI+X9P2wsUu4H5wMQiCoCMZNogHfaPy7WVs/x9pLTOSjgem2T41n0/rL78Gafd5EASDiEFc9w7qextUSNonS0iOl3Rbjhsq6VRJE5VkNA/L8R/MMpYTJZ2bJ1TVpC+PlXQHsI+kXbPU5AOSLpO0aD/eYhAEwRxI1cNAIyrfgcOxwIezjObuOe4gYBVgY9sbAhfmtcjnA6Nsb0Dq3fhqIZ83bdeWTB0D7GJ7E9L65iP75E6CIAgqoBbCQCMq34HDnaS10V8Gatsa7gKcWZulbPslkmDJU7YfyzYXkOQwa9T0IT8ArAvcmWUoP08DWcuivOS7055o4y0FQRA0ZjC3fGPMd4Bg+2BJWwIfA8Ypbd9YT16zqz/D1wt2N9rer0LZs+QlFxq+3+CdfhgEQUcxmFuHg/neBhVZAvNe28cCLwLvI6luHZwlNZH0HpLk5ghJq+ekn2VOBa4a9wDb1OwkLSxpzd6+jyAIgqoMkSuHgUZUvp3DWpKeLYR9StdPyROoHiRpS48nyWg+DUyQNB7YP++MdCBpB6WJwEzgzFJe2H6BpDF9kaQJpMp47bJdEARBfxHdzkFbsH186XzR/O8UYL4u0n6yTvS7pElSR5ZsbyLtIlXOY0Tp/GbSfsdBEAQdxwCsUysTLd8gCIKgI2n3fr6SdpP0qKQn8oZA5eufycs2J0i6S9JG7b6nWWXFxgpBa8TGCkHQqQy2jRUmvXxN5ffNekt9vGlZkoaSdqH7EPAsMAbYz/ZDBZutgYdtvyzpI8DxtrfslvNdEN3OQRAEQUfS5rHcLYAnbE9OeetiYA/SNrIA2L6rYH8PsHJbPSgwT3c796e8YxAEQdCcIS2Eoh5BDgeVslsJeKZw/myOa8QXqb+3e1uIlu88Smg7B0HQ6VQdy4U59QgaUC+3ut3aknYiVb7bVvegNebplm8NSTtKulXSpZIek3RyHni/Ly/vWS3bnS/pV3lf38mSdsjayQ9LOr+Q37TC8d61azn9L/JA/mRJexfsviVpTB7on2vQJus4n5/1nSdK+kaOX13S37Lm8wOSVlPilILtqMJ93iLpD8DEnOcphXK/0kuPOAiCoGXaLC/5LEkfocbKwHNzlSltSFrGuUfeGKdXiJbvbDYC1gFeAiYD59jeQtLXgcOAI7LdUsDOJH3lq4FtgC8BYySNtD2ui3JWIH1NrQ1cBVwuaVdgDdKYhICrJG1v+7ZCupHASrVtByUtmeMvBE62fWXWdR4CfDLbbwQsk32r5bUFsL7tp3K3zFTbm+fNF+6UdIPtpyo+syAIgl5D7RXPGAOsIWkV4J/AvsD+c5an4cAVwGcLEr29QrR8ZzPG9r9svwU8SVKPApgIjCjYXe00RXwi8LztibZnApNKdo34k+2ZeYbdcjlu1xzGAg+QKuY1SukmA6tKOk3SbsCrkhYjVchXAth+0/YbpMr9ItszbD9PUriqree9r1C57gp8Lms73wssXafcOcZSzj77kvLlIAiCXqGdLd+sgX8ocD3wMHCp7UmSDpZ0cDY7lvQePEPSOEn3t/N+ikTLdzZvFY5nFs5nMudzequOTdmu+Lm2YJNyVPj3h7bPauRcnvq+EfBh4BDg08xujZdp9rf4euFYwGG2r29iXxpLiaVGQRD0DUPbrLJh+1rg2lLcmYXjL5F6MnudaPn2Ds9LWkfSEGCvCvbXA19Q3k9X0kqS3ls0kLQMMMT2H4H/ATax/SrwrKQ9s80CkhYmyU+OymO6y5J2NbqvQblflTRfTr+mpEW6c8NBEATtJuQlg1Y5CriGNK39QaDpJvW2b5C0DnC30l/RNOC/gP8UzFYCzssVOsD/y/9+FjhL0onAO8A+wJXAViT9ZwPftv1vSWXt5nNIXeUPKBX8ArBnqzcbBEHQGwzAOrUyoXAVtEh0OwdBpzLYFK6eef3qyu+b9y3yiQFVV0e3czAgWGj4cfNUuUF9uvo9+vr3ald57cqnsyrentPmpUYdRXQ7B0EQBB1JKyIbA42mLV9JS0r6Wl8501tIWlHS5fl4pKSPdiOPPSUdW4obL+miUtz5kp7K09QfkXRc4doXsujFhCyAsUeOl6RjJD2eRT5ukbReId2UPOGqWM600vkBkk6XdHQue5ykGYXjwyUdL+mfhbhx+TfeQAWRkCAIgk5giFw5DDS6avkuCXwNOKP3XUmozbKHkobZfg6oqUmNBDajNN28At8mCWvU8l2H9PGyvaRFbBeX8HzL9uVZ9OIhSb8lTYY6mjRLeWqe2bxstj8E2BrYyPYbWXTjKknr2X6zFSdtnwSclH2cZntkwefjgZ/aPrWU7BVJK0sabvvpVsoLgiDoLQZxw7fLMd+TgdVyC+mULE94Te1ibmkdkI+nSPqBpLuzIMMmkq6X9GRtAXNu4XUpe1h2QtI0Sf+rJJ94U14+g6TRkjbLx8tImpKPD5B0maSrgRskjchlzg+cSFqGM07SqNzarOU3RGmfx3Irc03gLdsvFqL3B35HEuPYnfrU1vi+DrwXeI00kxnb0wpiF98hrbd9I1+7AbgL+EzDX6b9XE1SfAmCIOgIBvNSo64q36OAJ22PtP2tCvk9Y3sr4HbgfFJr8wOkCg/mlD3cBThF0gr52hbA0bbXrZPvIsADtjchqTVVmZ2wFfB52zvXImy/TVIwuSTf0yXA75ldye0CjC9VspAkJB8oxY0CLgEuAvYrXTtFSTXqWeBi2/8hLft5HnhK0nmSPgEgaXFgEdtPlvK4H1iP9vONQpfzLaXytuuF8oIgCLrFYJ5w1e7ZzlflfycC99p+zfYLwJtKWsRVZQ/LzCRVdJAqyyo7Tdxo+6UKducCn8vHXwDOq2OzAmkNLACSNgdesP0P4CZgE0lLFey/lbt7lwc+KGnr3JW+G+mD5DHgp7kbuBGiwY4bTahi/9P84THS9k6F+P8AK9Z1JOQlgyDoB1rZUnCg0eps53eZ8z4bSSc2kl6sKnvYFbVKpuhP2ZdK+dl+RtLzknYGtqR+V+90YInC+X7A2rVubmBx4FMk0Ypi3tMkjSZ9LNyVNaHvA+6TdCNwnu3jJb0uadXaJs+ZWiu/EdMlzZ9b8wDvAcot9lZYkHSfcxHykkEQ9AcaiP3JFenqg+E1YLHC+T+AdZVkDJcAPthieVVlD+v5WZswtT9wRz6eAmyaj/emGuV7glRp/p4ktF1vstfDwOqQxoVJKlIb2h5hewSwB3N3PSNpGKlCf1JpxvUmhcsjSc8T4BTgF5IWyul2IVXYf2hyH7eSVLDI6T4N3NLEvivWJKlxBUEQdARq4b+BRtPKN+9leGeerHSK7WeAS4EJpK3sxrZY3pU57XjgZrLsYYV0rwPrSfo7aTu/2hjyqSRt4rtIW+dV4RbSB8S42oQvUnf5otTvcob00bCx0mfY9sA/bf+zdH3dwvh1bcx3AqkL/gpgPuBUpeVH40hjxl/P9qeRtruaKOlRknbzHraLLdEJkp7N4Sc57SdzXvcAl5W2IGxEccx3nKQROX4n4C8V0gdBEPQJ0pDKYaAxIOQl85KZpvrIPcx/M9JYaMMJR5J+TtpO8G+95Ud/obSX763AtnnbrSb0T7dzf8nmdZ5c37xNV79HX/9e7Spv8P6d9Uxe8pW3/1r5fbPk/B8ZUM3fgfe50GYkHQX8kdkbFTTiB8DCve9RvzAcOKrrijcIgqDvGMzdzgOi5Rt0EjHhKgg6lc5rQfes5Tv17esrv2+WmP/DA6oGbqnlq5CbLOYxS25SjWUbF5Z0YRYUeVDSHZLeX7D5dynd/MqykVkYxJK+VyhzGUnvSDq95Mt4ZZlLSQcW8ns7lz1O0slZfOSFkp/rSlpW0nU9eaZBEATtRhpaOQw0Wl1qtCQhN1ljDrlJ6sg2Svp/wPO2N8jnawH/rkk+5nW+04rpSlPrJwMfJ03AgjTLelKpjLLM5XnkiWN5KdRONdEQJTWyS2wfWr4ZSf+StI3tO6s/giAIgt5jIHYnV6XVMd+Qm6Sh3GQ9VgBmzYq2/ajtt5rYl5kOPFy7J9IM6UtLNlVkLqvwJ/pWzjIIgqApg3nMt9XKN+QmE/XkJotLeGrrbc8FvpM/QL4vaY0Kfpa5GNhX0srADOC50vVmMpf1GKU5u50XyvEhLxkEQYcxeDWuetvjeUJuMjOXbKPtccCqJBGN9wBjcjdxK1wHfIhUsc6h7aiuZS7rcUnBz5GFtcQN5SWDIAj6A0mVw0Cjp5XvoJSbBIpyk3+tYza9Tv6N8ptm+wrbXyN9KLQ0uSu3zv8O/DdpSVSRoszlk8yWuewODeUlFdrOQRD0C4N3a4VWK9+Qm0zMkptshqRtai3RPL68LrMlJVvhf4HvZMWxWt6VZS4r0lBe0vbZtjezvdlBB42qZxIEQdB2xNDKYaDRUuUbcpOzKMpN1iiO+dZkG1cDbpU0kfRs7mfu1muX2J5k+4JSdBWZy3qUx3y3zvEhLxkEQUcxmCdcDUiRDYXcZNuRdBtJT/rl5pYhshEEncpgE9l4c8bdld83Cw7dakDVwANvilgvo3lQbjJ3+f+k64o3CIKgL4kx346iN1u9tk+2/X7bd3Rh97ztq5rZDBRsv2D7T/3tRxB0GgsNr7KKMegtxJDKYaDRqsJVEARBEPQJA3GrwKp05J0pNKSLeVTRkN5R0lRJYyU9LOm4bF9WIPtIXjL0sNK+wqfm+EMlHdieuw6CIGgX0e3c1yxJ0pDuM9RmZW5lDWnbRQ3plitfkoZ0UUu7KOYx0vYrOf522xuTdKr/S9KmxUwkrQ+cDvyX7XWA9Una0ZCERQ7vhm9BEAS9xmDudu5Uj0NDmpY0pGdh+3WSKMdqpUvfBk6y/Ui2e9f2Gfn4DWCKpC2qlhMEQdD7RMu3rwkN6URVDelZSFqadO+TSpfWJ1XKjQht5yAIOorBvM63UyvfVpmnNaQz20kaS9rd6GTb5cq3KxpqO4e8ZBAE/YEGsbbzQJntPCg1pCUVNaTrbec3HViiom+32/54k+uTSNKb4xtcb6jtbPts4Ox0FiIbQRD0DQNRNrIqndryDQ3pRCUN6YqcAnw3jyPXxpmPLFxvqO0cBEHQP8SYb58SGtKzqKoh3SW2JwBHABdJephU0RY1oLcBBoVUZhAEg4N2dztL2k3So3mC61F1rkvSL/L1CZI2aftN1coaiNrOfYXmEQ1pSRsDR9r+bNfW0e0czDt0nlZyczrP355pO5tHK79vxFpNy8rLSR8j7Y/+LDAG2M/2QwWbjwKHkZaFbgn83PaW3XC9Szqy5TsvoM7SkF4G+J9eLiMIgqAl2jzbeQvgCduT8wqUi0nbsBbZA/itE/cAS6r5LnHdx3aECJUDcFC7bSPPyHMw5DnY7qe38uytABxEWjJZCweVru8NnFM4/yxwesnmGmDbwvlNwGa94W+0fINWOagXbCPPyHMw5DnY7qe38uwVbJ9te7NCOLtkUq95XO7WrmLTFqLyDYIgCOYFngXeVzhfGXiuGzZtISrfIAiCYF5gDLCGpFWy5O++zBZoqnEV8Lk86/kDwFTb/+oNZwaKyEbQOZS7ctphG3lGnoMhz8F2P72VZ79g+11JhwLXA0OBc21Pqu0BYPtM4FrSTOcngDeAXtvtLZYaBUEQBEEfE93OQRAEQdDHROUbBEEQBH1MVL5BEARB0MfEhKugKZLWJqm+rERa7/YccJXth/vVsSAIggFMtHyDhkj6DkmCTaRdoMbk44vqiZJXzHMJSSdLekTS/+XwcI5bsmC3WynNb7LQ+R8kLdcpeQbVycs3tpT0SUl75eOu9HgXlbRJo2ceeXZ+nkED+lsSLELnBpII+Xx14ucHHi/F7VY4XgL4DWknqT8AyxWuXQ98B1i+ELd8jruxEPdA4fgc4PvA+4FvAH8qld1veTZ4bosCmwBL1rkmkmD7J4G98rEq/BYDOk9gV9Lyjb/mZ38OcF2O27Vgd0bheFvgadKOYM8AH408B1aeEZr8v9LfDkTo3AA8Ary/Tvz7gUdLcZUqtnK6Uh6PNshvXMmufN5veebzSi+jqi+3QZrnw8CIOs9yFeDhBr/RLcAm+XhV4P5S2sizw/OM0DjEmG/QjCOAmyQ9TnqZAgwHVgcObZJuM9sj8/FPJX2+cO0fkr4NXGD7eYDc5XtAoQyA90o6ktQCW1ySnP8PZ+7hkv7ME+ADhePvAXvafkDSqqR9qK/N134O7GJ7SjGxpFWyzTqDOM9hJOm+Mv8E5qsTD7C47QcAbE9W2hKuSOTZ+XkGDYjKN2iI7eskrUnaimslUqX1LDDG9oySedWKbRRwFHBrrswMPE+Sdft0we7XwGL5+ALStocvSFoeGFcquzfzHK3Z48H18izT7GXUnZfbYMnzXGCMpIuZ/fHyPpLE328KdmtLmkD6OxohaSnbL0saUqfsenkOJ/127cyzN/wcKHn29HkGDQiFq6AtSDquFHWG7VrF9mPbnyvYbgHY9hhJ6wG7kbq1rqUJkn5bzKcQvyXwiO2pkhYmVZqbAJOAH9iemu0OB660XW65NipvddJ45/uAd0lj4BfV8ivYvUHqkhUwAhheeBlNsL1+tvt/pIq73gvzUts/HKx5Ztt1mD1zvvYhd5Xn3Mz8/aWf4Tnb70haBtje9hWlZ9+dPP9l++0+znNdYPc233tv5Nn23yioT1S+Qa8j6UDb5+Xj44CPkFpXN5Ja1bcCuwDX2z4p25UFzwF2Bm4GsL17If9JwEZO2q1nA68DfwQ+mOM/me2m5mtPAhcBl9l+oYHPhwMfB24jab2OA14mVcZfsz26YFv5RVzlhdmNPLt8YXaCn32BpPfa/k9fljnQaOUZSVra9v/1tk/zJO0aPI4QoVEAni4cTySJmi8MvErqqgRYiNRSqtmNBX4P7AjskP/9Vz7eoZR/3ckg+XxcKc8hpAlFvwFeIE0k+jywWCndRGBoPl4YGJ2PhwNj+/uZtvG3WboDfPhr4Xhx4IfA74D9S3ZnlM7fUydMAZYC3lOwK87EX5IGM/Frfz/AMcCqXfi8GWmy0e9JvQI3AlNJy/E2LtkuAZxMmsD4fzk8nOOWbPUZ5fPlgV8BvwSWBo7P93QpsEKTZ7R0vWeUbU8GlsnHmwKTgceBfxT/nys8o9X6+29nIIdY5xu0BaX1svXCRKC4hvZd2zNsvwE8aftVANvTgZkFu02BvwNHk7b1Gg1Mt32r7VtLxT8oqbb7yHhJm2Wf1gTeKdjZ9kzbN9j+IrAicAap23tynduqzYlYgDxWbPtpSuNaqr7OeHFJP5T0O0n7lfI4o3S+vKRfSfqlpKUlHS9poqRLJa1QsCuvXT5Hjdcun5xbpEjaTNJk4F5J/5C0Q8FuM0m3SPq9pPdJulHSK5LGSNq4lOeikk6UNEnSVEkvSLpH0gElu00ahE2BkQXT80gt7T8C+0r6o6QF8rXi5C6AF0l/I8WwEqlyuL9g94PC8amkj7hPkCrKs0p5LkWqoEdLuk/SNyStyNycAfwY+AtwF3CW7SVIQx5nlGwvJfWa7Gh7adtLAzsBrwCXdeMZAZwPPEQaFrgFmE7qqbkdOLPJM7q/wTMC+JjtFwvPaZTtNYAPAf9b5xnd0sUzCprR37V/hMERSJORRpKWFxXDCNKYUM3uXmDhfDykEL8EpVZrjl+Z9II6nUILumSzBOll9GTO/x1SZXorqdu5Zje2if8Llc6/TmpJnE1qsRyY45cFbivZVl1n/EdS62JP0sStPwIL5GvlFvt1wGGkl/mEnNfwHPfngl0ra5cnFo5vATbPx2tSWCJCElT5CLAf6eW+d47/IHB3Kc8/k2aArwwcCfwPsAZpQtsPCnYzSEMGt9QJ0wt240r5Hw3cSWqxlZ/RN/Nz2qAQ91Sd37aVJWZF2+1IFem/s58H1ftbKv9dlv/OqL4UrtIzqlD+uMJxpWeU4x8BhuXje5r87VR6RhGah353IMLgCKSuvG0bXPtD4XiBBjbLFF8Qda5/rPgyb2CzGLARqdW8XJ3ra7Z4T+sBewNrd2FX9eU6rnStWcUytnDc7OXaSsVS9eXarOyxpfPxpfMx+d8hpElwtfgHgTUaPKNnCscPU/goy3GfJ02e+0edtLWPs5/k339yHZtnSR8G/036KFPh2oSSbb0PwKGk3pHzCnF3k4Yv9iF1y+6Z43dg7vWzNwDfZk6xmeVIH1R/a/UZlZ878P3StfI9dfmMst1h2dedSd3YPwO2B04AftfqM4rQPMRSo6AtOHXjNrq2f+H4rQY2L5K6yBrl8RdSF18zH14Dxje5/liz9HXsJ5Fe+l3xD1VbE7yApCG2Z+b8T5L0LGlS16KlPItDQr8tXSsu4Wll7fIvgWslnQxcJ+lnwBWkFu24gt2bknYl9ShY0p62/5S7pstLzF6XtK3tOyR9Angp39tMaQ5ZwuPr+FPjsMLx1aSX/99qEbYvkPQ8cFo5oe1ngX1y2TeSxufLtLLEbK6/EadlddflUONgUrfzTODDwFclnU9ajvXlUhZVl8IdT7VnBPBnSYvanmb7mFqk0gz9Oe6h4jPC9ml5mOirpN6QYfnfP5F6VGpUfUZBM/q79o8QYaAH0hjYj0gty5dyeDjHLVWw+zFJvKKcfjfmlus8EVi0ju3qwOWF8+NKYdkcvzzw2zrpdwQuIU0+m0gSwTiI3CLONhuRutL/CqxNEt14hfQhsnUpvw1J3dSvAHeQexdI3fOHl2zXJlX0i5bvv6LdR+rczyxb0qS99VvMc7dmeXbh5zot5LkFs7v51yO1wueSYizZrUtqsdeVbKxqW7LbgDRZqkqebfEzQv3Q7w5EiDCYA3msuF12AzVP4HDgUVIragqwR+Fasev8sCp2vZhnJdtc9iMV8zwOuIc0wemHwE3AsaQej6Ob2N1cz64V26pl95afEZr8/9HfDkSIMJgDDSaJddduoOZJamUvmo9H5Bf31/P52FbtBmCeVZbXVbIbSHlGaBxizDcIeoiS1F7dSxSWWVW1G4x5ktZMTwOwPUXSjsDlWaRD3bAbSHm+6zQm+oakOZbXSZrZDbuBlGfQgKh8g6DnLEeadPNyKV6kNaCt2g3GPP8taaTtcQC2p0n6OElPeINu2A2kPN+WtLDT2vZNZz0gaQnmXNte1W4g5Rk0or+b3hEiDPRA9WVWlewGaZ4rU1gHXbLbplW7AZZnpeV1Ve0GUp4RGofQdg6CIAiCPibkJYMgCIKgj4nKNwiCIAj6mKh8gyAIgqCPico3CIIgCPqY/w/ERFppM7WbXAAAAABJRU5ErkJggg==", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "sns.heatmap(ensembled_tasks.isna().transpose(),\n", - " cmap=\"YlGnBu\",\n", - " cbar_kws={'label': 'Missing Data'})" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Unnamed: 0 0.000000\n", - "slide_submitter_id 0.000000\n", - "sample_submitter_id 0.000000\n", - "Stromal score 0.007833\n", - "CAFs (MCP counter) 0.013055\n", - "CAFs (EPIC) 0.013055\n", - "CAFs (Bagaev) 0.013055\n", - "Endothelial cells (xCell) 0.013055\n", - "Endothelial cells (EPIC) 0.013055\n", - "Endothelium 0.013055\n", - "CD8 T cells (Thorsson) 0.796345\n", - "Cytotoxic cells 0.010444\n", - "Effector cells 0.013055\n", - "CD8 T cells (quanTIseq) 0.013055\n", - "TIL score 0.796345\n", - "Immune score 0.007833\n", - "tumor purity (ABSOLUTE) 0.023499\n", - "tumor purity (ESTIMATE) 0.007833\n", - "tumor purity (EPIC) 0.013055\n", - "dtype: float64" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "(ensembled_tasks.isna().transpose().sum(axis=1)) / len(ensembled_tasks) #" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "383" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(ensembled_tasks)" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "382\n" - ] - }, - { - "data": { - "text/plain": [ - "tumor purity (ABSOLUTE) 0.020942\n", - "tumor purity (ESTIMATE) 0.005236\n", - "tumor purity (EPIC) 0.010471\n", - "dtype: float64" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "tumor purity (ABSOLUTE) 8\n", - "tumor purity (ESTIMATE) 2\n", - "tumor purity (EPIC) 4\n", - "dtype: int64" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "370\n" - ] - } - ], - "source": [ - "tmp = ensembled_tasks[tumor_purity].dropna(how=\"all\")\n", - "print(len(tmp))\n", - "display((tmp.isna().transpose().sum(axis=1)) / len(tmp))\n", - "display((tmp.isna().transpose().sum(axis=1)))\n", - "print(len(tmp.dropna(how=\"any\")))" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "381\n" - ] - }, - { - "data": { - "text/plain": [ - "CD8 T cells (Thorsson) 0.795276\n", - "Cytotoxic cells 0.005249\n", - "Effector cells 0.007874\n", - "CD8 T cells (quanTIseq) 0.007874\n", - "TIL score 0.795276\n", - "Immune score 0.002625\n", - "dtype: float64" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "CD8 T cells (Thorsson) 303\n", - "Cytotoxic cells 2\n", - "Effector cells 3\n", - "CD8 T cells (quanTIseq) 3\n", - "TIL score 303\n", - "Immune score 1\n", - "dtype: int64" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "77\n" - ] - } - ], - "source": [ - "tmp = ensembled_tasks[T_cells].dropna(how=\"all\")\n", - "print(len(tmp))\n", - "display((tmp.isna().transpose().sum(axis=1)) / len(tmp))\n", - "display((tmp.isna().transpose().sum(axis=1)))\n", - "print(len(tmp.dropna(how=\"any\")))" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "380\n" - ] - }, - { - "data": { - "text/plain": [ - "Stromal score 0.000000\n", - "CAFs (MCP counter) 0.005263\n", - "CAFs (EPIC) 0.005263\n", - "CAFs (Bagaev) 0.005263\n", - "dtype: float64" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "Stromal score 0\n", - "CAFs (MCP counter) 2\n", - "CAFs (EPIC) 2\n", - "CAFs (Bagaev) 2\n", - "dtype: int64" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "378\n" - ] - } - ], - "source": [ - "tmp = ensembled_tasks[CAFs].dropna(how=\"all\")\n", - "print(len(tmp))\n", - "display((tmp.isna().transpose().sum(axis=1)) / len(tmp))\n", - "display((tmp.isna().transpose().sum(axis=1)))\n", - "print(len(tmp.dropna(how=\"any\")))" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "378\n" - ] - }, - { - "data": { - "text/plain": [ - "Endothelial cells (xCell) 0.0\n", - "Endothelial cells (EPIC) 0.0\n", - "Endothelium 0.0\n", - "dtype: float64" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "Endothelial cells (xCell) 0\n", - "Endothelial cells (EPIC) 0\n", - "Endothelium 0\n", - "dtype: int64" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "378\n" - ] - } - ], - "source": [ - "tmp = ensembled_tasks[endothelial_cells].dropna(how=\"all\")\n", - "print(len(tmp))\n", - "display((tmp.isna().transpose().sum(axis=1)) / len(tmp))\n", - "display((tmp.isna().transpose().sum(axis=1)))\n", - "print(len(tmp.dropna(how=\"any\")))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.9.10 ('base')", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.10 | packaged by conda-forge | (main, Feb 1 2022, 21:27:43) \n[Clang 11.1.0 ]" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "aa98a7cfca054c33f70b1b98f933bb29fd610054927dca4b481403e32f3903ef" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/Python/2_train_multitask_models/processing_transcriptomics.py b/Python/2_train_multitask_models/processing_transcriptomics.py deleted file mode 100644 index 70e6c22..0000000 --- a/Python/2_train_multitask_models/processing_transcriptomics.py +++ /dev/null @@ -1,243 +0,0 @@ -# Module imports -import os -import sys -import argparse -import joblib - -import numpy as np -import pandas as pd -import git -REPO_DIR= git.Repo('.', search_parent_directories=True).working_tree_dir -sys.path.append(f"{REPO_DIR}/Python/libs") -import model.preprocessing as preprocessing -from model.constants import TUMOR_PURITY, T_CELLS, ENDOTHELIAL_CELLS, CAFS, IDS, TILE_VARS - -# cancer_type="SKCM" -# slide_type="FFPE" -# # clinical_file = pd.read_csv("../../data/SKCM/slide.tsv", sep="\t") -# clinical_file = pd.read_csv("../../data/FFPE_generated_clinical_file.txt", sep="\t") - -# # Set paths: 1) folder with all published data, 2) folder with all computed data, 3) folder for storing the ensembled tasks -# path_published_data = "../../data/published" -# path_computed_features= "../../data/" -# output_dir = "../../data/" - -def processing_transcriptomics(cancer_type, slide_type, clinical_file_path, tpm_path, output_dir, path_data=None,): - """ Compute and combine cell type abundances from different quantification methods necessary for TF learning - Args: - cancer_type (str): abbreviation of cancer_type - slide_type (str): type of slide either 'FFPE' or 'FF' for naming and necessary for merging data - clinical_file_path (str): clinical_file_path: path to clinical_file - tpm_path (str): path pointing to the tpm file - output_dir (str): path pointing to a folder where the dataframe containing all features should be stored, stored as .txt file - - - Returns: - ./task_selection_names.pkl: pickle file containing variable names. - {output_dir}/TCGA_{cancer_type}_ensembled_selected_tasks.csv" containing the following cell type quantification methods: - tumor_purity = [ - 'tumor purity (ABSOLUTE)', - 'tumor purity (estimate)', - 'tumor purity (EPIC)' - ] - T_cells = [ - 'CD8 T cells (Thorsson)', - 'Cytotoxic cells', - 'Effector cells', - 'CD8 T cells (quanTIseq)', - 'TIL score', - 'Immune score', - ] - endothelial_cells = [ - 'Endothelial cells (xCell)', - 'Endothelial cells (EPIC)', - 'Endothelium', ] - CAFs = [ - 'Stromal score', - 'CAFs (MCP counter)', - 'CAFs (EPIC)', - 'CAFs (Bagaev)', - ] - """ - full_output_dir = f"{output_dir}/2_TF_training" - if not os.path.exists(full_output_dir): - os.makedirs(full_output_dir) - - var_dict = { - "CAFs": CAFS, - "T_cells": T_CELLS, - "tumor_purity": TUMOR_PURITY, - "endothelial_cells": ENDOTHELIAL_CELLS, - "IDs":IDS, - "tile_IDs": TILE_VARS - } - joblib.dump(var_dict, "./task_selection_names.pkl") - clinical_file = pd.read_csv(clinical_file_path, sep="\t") - - var_IDs = ['sample_submitter_id','slide_submitter_id'] - all_slide_features = clinical_file.loc[:,var_IDs] - - # Published Data - Thorsson = pd.read_csv(f"{REPO_DIR}/data/published/Thorsson_Scores_160_Signatures.tsv", sep="\t") - estimate = pd.read_csv(f"{REPO_DIR}/data/published/Yoshihara_ESTIMATE_{cancer_type}_RNAseqV2.txt", sep="\t") - tcga_absolute = pd.read_csv(f"{REPO_DIR}/data/published/TCGA_ABSOLUTE_tumor_purity.txt", sep="\t") - gibbons = pd.read_excel(f"{REPO_DIR}/data/published/Gibbons_supp1.xlsx", skiprows=2, sheet_name="DataFileS1 - immune features") - - # Computed Data: Immunedeconv - mcp_counter = pd.read_csv(f"{output_dir}/immunedeconv/mcp_counter.csv", index_col=0, sep=",") - quantiseq = pd.read_csv(f"{output_dir}/immunedeconv/quantiseq.csv", index_col=0, sep=",") - xCell = pd.read_csv(f"{output_dir}/immunedeconv/xcell.csv", index_col=0, sep=",", header=[0]) - EPIC = pd.read_csv(f"{output_dir}/immunedeconv/epic.csv", index_col=0, sep=",") - - # Re(compute) Fges scores with TPM - Fges_computed = preprocessing.compute_gene_signature_scores(tpm_path) - Fges_computed = Fges_computed.loc[:, ["Effector_cells", "Endothelium", "CAF"]] - Fges_computed.columns = ["Effector cells", "Endothelium", "CAFs (Bagaev)"] - - Fges_computed = Fges_computed.reset_index() - Fges_computed = Fges_computed.rename(columns={"index": "TCGA_sample"}) - - # From immunedeconv - quantiseq = preprocessing.process_immunedeconv(quantiseq, "quanTIseq") - EPIC = preprocessing.process_immunedeconv(EPIC, "EPIC") - mcp_counter = preprocessing.process_immunedeconv(mcp_counter, "MCP") - xCell = preprocessing.process_immunedeconv(xCell, "xCell") - - # Merge cell fractions - cellfrac = pd.merge(xCell, quantiseq, on=["TCGA_sample"]) - cellfrac = pd.merge(cellfrac, mcp_counter, on=["TCGA_sample"]) - cellfrac = pd.merge(cellfrac, EPIC, on=["TCGA_sample"]) - - # Merge cell fractions - cellfrac = pd.merge(xCell, quantiseq, on=["TCGA_sample"]) - cellfrac = pd.merge(cellfrac, mcp_counter, on=["TCGA_sample"]) - cellfrac = pd.merge(cellfrac, EPIC, on=["TCGA_sample"]) - - # estimate data - estimate = estimate.rename(columns={"ID": "TCGA_sample"}) - estimate = estimate.set_index("TCGA_sample") - estimate.columns = ["Stromal score", "Immune score", "ESTIMATE score"] - - # According the tumor purity formula provided in the paper - estimate["tumor purity (ESTIMATE)"] = np.cos( - 0.6049872018 + .0001467884 * estimate["ESTIMATE score"]) - estimate = estimate.drop(columns=["ESTIMATE score"]) - - # Thorsson data - Thorsson = Thorsson.drop(columns="Source") - Thorsson = Thorsson.set_index("SetName").T - Thorsson = Thorsson.rename_axis(None, axis=1) - Thorsson.index.name="TCGA_aliquot" - Thorsson = Thorsson.loc[:, ["LIexpression_score", "CD8_PCA_16704732"]] - Thorsson.columns = ["TIL score", "CD8 T cells (Thorsson)"] - - # TCGA PanCanAtlas - tcga_absolute = tcga_absolute.rename(columns = {"purity": "tumor purity (ABSOLUTE)", "sample": "TCGA_aliquot"}) - tcga_absolute = tcga_absolute.set_index("TCGA_aliquot") - tcga_absolute = pd.DataFrame(tcga_absolute.loc[:, "tumor purity (ABSOLUTE)"]) - - gibbons = gibbons.rename(columns={'Unnamed: 1': "id"}) - gibbons["slide_submitter_id"] = gibbons["id"].str[0:23] - gibbons["Cytotoxic cells"] = gibbons["Cytotoxic cells"].astype(float) - gibbons = gibbons.set_index("slide_submitter_id") - - all_slide_features["TCGA_sample"] = clinical_file["slide_submitter_id"].str[0:15] - - # add IDs - Thorsson["TCGA_sample"] = Thorsson.index.str[0:15] - tcga_absolute["TCGA_sample"] = tcga_absolute.index.str[0:15] - gibbons["TCGA_sample"] = gibbons.index.str[0:15] - - tcga_absolute_merged = pd.merge(all_slide_features, tcga_absolute, on=["TCGA_sample", ], how="left") - Thorsson_merged = pd.merge(all_slide_features, Thorsson, on=["TCGA_sample",], how="left") - gibbons_merged = pd.merge(all_slide_features, gibbons, on=["TCGA_sample"], how="left") - - cellfrac_merged = pd.merge(all_slide_features, cellfrac, on=["TCGA_sample"], how="left") - estimate_merged = pd.merge(all_slide_features, estimate, on=["TCGA_sample" ], how="left") - Fges_computed_merged = pd.merge(all_slide_features, Fges_computed, on=["TCGA_sample"], how="left") - - # Combine in one dataframe - all_merged = pd.merge(all_slide_features, tcga_absolute_merged, how="left") - all_merged = pd.merge(all_merged, Thorsson_merged ,how="left") - all_merged = pd.merge(all_merged, gibbons_merged,how="left") - all_merged = pd.merge(all_merged, estimate_merged, how="left") - all_merged = pd.merge(all_merged, cellfrac_merged, how="left") - all_merged = pd.merge(all_merged, Fges_computed_merged, how="left") - - # ---- Transform features to get a normal distribution (immunedeconv) ---- # - featuresnames_transform = ["CAFs (MCP counter)", - 'CAFs (EPIC)',] - feature_data = all_merged.loc[:, CAFS].astype(float) - data_log2_transformed = feature_data.copy() - data_log2_transformed[featuresnames_transform] = np.log2(feature_data[featuresnames_transform] * 100 + 0.001) - CAFs_transformed = data_log2_transformed - - featuresnames_transform = ["Endothelial cells (xCell)", - "Endothelial cells (EPIC)",] - feature_data = all_merged.loc[:, ENDOTHELIAL_CELLS].astype(float) - data_log2_transformed = feature_data.copy() - data_log2_transformed[featuresnames_transform] = np.log2(feature_data[featuresnames_transform] * 100 + 0.001) - endothelial_cells_transformed = data_log2_transformed - - feature_data = all_merged.loc[:, T_CELLS].astype(float) - featuresnames_transform = ['CD8 T cells (quanTIseq)'] - data_log2_transformed = feature_data.copy() - data_log2_transformed[featuresnames_transform] = np.log2(feature_data[featuresnames_transform] * 100 + 0.001) - T_cells_transformed = data_log2_transformed - - feature_data = all_merged.loc[:, TUMOR_PURITY].astype(float) - featuresnames_transform = ["tumor purity (EPIC)"] - data_log2_transformed = feature_data.copy() - data_log2_transformed[featuresnames_transform] = np.log2(feature_data[featuresnames_transform] * 100 + 0.001) - tumor_cells_transformed = data_log2_transformed - - # Store processed data - IDs = ['slide_submitter_id', 'sample_submitter_id', "TCGA_sample"] - metadata = all_merged[IDs] - merged = pd.concat([ - metadata, - CAFs_transformed, endothelial_cells_transformed, T_cells_transformed, tumor_cells_transformed], axis=1) - merged = merged.fillna(np.nan) - - # Remove slides if there are no values at all - merged = merged.dropna(axis=0, subset=T_CELLS + CAFS + ENDOTHELIAL_CELLS + TUMOR_PURITY, how="all") - merged.to_csv(f"{full_output_dir}/ensembled_selected_tasks.csv", sep="\t") - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Process transcriptomics data for use in TF learning') - parser.add_argument( - "--cancer_type", - help="Abbreviation of cancer type for naming of generated files", - ) - parser.add_argument( - "--clinical_file_path", - help="Full path to clinical file", default=None - ) - parser.add_argument( - "--slide_type", - help="Type of pathology slides either 'FF' (fresh frozen) or 'FFPE' (Formalin-Fixed Paraffin-Embedded) by default 'FF'", - type=str, required=True - ) - parser.add_argument( - "--tpm_path", help="Path to tpm file", type=str, required=True - ) - - parser.add_argument( - "--output_dir", help="Path to folder for generated file") - args = parser.parse_args() - - # old_stdout = sys.stdout - # log_file = open(f"{REPO_DIR}/logs/processing_transcriptomics.log", "w") - # sys.stdout = log_file - - processing_transcriptomics( - cancer_type=args.cancer_type, - slide_type=args.slide_type, - tpm_path=args.tpm_path, - path_data=args.path_data, - clinical_file_path=args.clinical_file_path, - output_dir=args.output_dir, - ) - - # sys.stdout = old_stdout - # log_file.close() diff --git a/Python/2_train_multitask_models/run_TF_pipeline.py b/Python/2_train_multitask_models/run_TF_pipeline.py deleted file mode 100644 index 8cc891e..0000000 --- a/Python/2_train_multitask_models/run_TF_pipeline.py +++ /dev/null @@ -1,220 +0,0 @@ -# Module imports -import argparse -import os -import sys -import dask.dataframe as dd -import joblib -import git -import numpy as np -import pandas as pd -from sklearn import linear_model, metrics -from sklearn.model_selection import GridSearchCV, GroupKFold -from sklearn.preprocessing import StandardScaler - -# Custom imports -import model.evaluate as meval -import model.preprocessing as preprocessing -import model.utils as utils - -def nested_cv_multitask( - output_dir, - category, - alpha_min, - alpha_max, - n_steps=40, - n_outerfolds=5, - n_innerfolds=10, - n_tiles=50, - split_level="sample_submitter_id", - slide_type="FF" -): - """ - Transfer Learning to quantify the cell types on a tile-level - Use a nested cross-validation strategy to train a multi-task lasso algorithm. Tuning and evaluation based on spearman correlation. - - Args: - output_dir (str): Path pointing to folder where models will be stored - category (str): cell type - alpha_min (int): Min. value of hyperparameter alpha - alpha_max (int): Max. value of hyperparameter alpha - n_steps (int): Stepsize for grid [alpha_min, alpha_max] - slide_type (str): slide format (FF or FFPE) - n_outerfolds (int): Number of outer loops - n_innerfolds (int): Number of inner loops - n_tiles (int): Number of tiles to select per slide - split_level (str): Split level of slides for creating splits - - Returns: - {output_dir}/: Pickle files containing the created splits, selected tiles, learned models, scalers, and evaluation of the slides and tiles using the spearman correlation for both train and test sets - """ - # Hyperparameter grid for tuning - alphas = np.logspace(int(alpha_min), int(alpha_max), int(n_steps)) - scoring = meval.custom_spearmanr - N_JOBS = -1 - OUTPUT_PATH = f"{output_dir}/2_TF_training/models/{category}" - - print(slide_type) - - # Load data - var_names = joblib.load("./task_selection_names.pkl") - var_names['T_cells'] = ['Cytotoxic cells', 'Effector cells', 'CD8 T cells (quanTIseq)', 'Immune score'] - target_features = pd.read_csv(f"{output_dir}/2_TF_training/ensembled_selected_tasks.csv", sep="\t", index_col=0) - if slide_type == "FF": - bottleneck_features = pd.read_csv(f"{output_dir}/1_histopathological_features/features.txt", sep="\t", index_col=0) - elif slide_type == "FFPE": - bottleneck_features = dd.read_parquet(f"{output_dir}/1_histopathological_features/features.parquet") - - target_vars = var_names[category] - metadata_colnames = var_names["tile_IDs"] + var_names["IDs"] + var_names[category] - - if os.path.exists(OUTPUT_PATH): - print("Folder exists") - else: - os.makedirs(OUTPUT_PATH) - - # Preprocessing - IDs = ["sample_submitter_id", "slide_submitter_id"] - merged_data = preprocessing.clean_data( - bottleneck_features, target_features.loc[:, target_vars + IDs], slide_type - ) - total_tile_selection = utils.selecting_tiles(merged_data, n_tiles, slide_type) - X, Y = utils.split_in_XY( - total_tile_selection, metadata_colnames, var_names[category] - ) - - # TF learning - ## Create variables for storing - model_learned = dict.fromkeys(range(n_outerfolds)) - x_train_scaler = dict.fromkeys(range(n_outerfolds)) - y_train_scaler = dict.fromkeys(range(n_outerfolds)) - - multi_task_lasso = linear_model.MultiTaskLasso() - - ## Setup nested cv - sample_id = pd.factorize(total_tile_selection[split_level])[0] - cv_outer = GroupKFold(n_splits=n_outerfolds) - cv_inner = GroupKFold(n_splits=n_innerfolds) - cv_outer_splits = list(cv_outer.split(X, Y, groups=sample_id)) - - ## Storing scores - tiles_spearman_train = {} - tiles_spearman_test = {} - slides_spearman_train = {} - slides_spearman_test = {} - - print("Feature matrix dimensions [tiles, features]:", X.shape) - print("Response matrix dimensions:", Y.shape) - - ## Run nested cross-validation - for outerfold in range(n_outerfolds): - print(f"Outerfold {outerfold}") - train_index, test_index = cv_outer_splits[outerfold] - x_train, x_test = X.iloc[train_index], X.iloc[test_index] - y_train, y_test = Y.iloc[train_index], Y.iloc[test_index] - - ### Standardizing predictors - scaler_x = StandardScaler() - scaler_x.fit(x_train) - x_train_z = scaler_x.transform(x_train) - x_test_z = scaler_x.transform(x_test) - - ### Standardizing targets - scaler_y = StandardScaler() - scaler_y.fit(y_train) - y_train_z = scaler_y.transform(y_train) - y_test_z = scaler_y.transform(y_test) - grid = GridSearchCV( - estimator=multi_task_lasso, - param_grid=[{"alpha": alphas}], - cv=cv_inner, - scoring=metrics.make_scorer(scoring), - return_train_score=True, - n_jobs=N_JOBS, - ) - grid.fit(x_train_z, y_train_z, groups=sample_id[train_index]) - - ### Evaluate on tile level (spearmanr) - y_train_z_pred = grid.predict(x_train_z) - y_test_z_pred = grid.predict(x_test_z) - - tiles_spearman_train[outerfold] = meval.custom_spearmanr( - y_train_z, y_train_z_pred, in_gridsearch=False - ) - tiles_spearman_test[outerfold] = meval.custom_spearmanr( - y_test_z, y_test_z_pred, in_gridsearch=False - ) - - ### Evaluate on Slide level - #### For aggregation for evaluation in gridsearch for hyper parameter choosing - slide_IDs_train = total_tile_selection["slide_submitter_id"].iloc[train_index] - slide_IDs_test = total_tile_selection["slide_submitter_id"].iloc[test_index] - - ##### 1. Aggregate on slide level (averaging) - Y_train_true_agg, Y_train_pred_agg = meval.compute_aggregated_scores( - y_train_z, y_train_z_pred, target_vars, slide_IDs_train - ) - Y_test_true_agg, Y_test_pred_agg = meval.compute_aggregated_scores( - y_test_z, y_test_z_pred, target_vars, slide_IDs_test - ) - - ###### 2. Compute spearman correlation between ground truth and predictions - slides_spearman_train[outerfold] = meval.custom_spearmanr( - Y_train_true_agg, Y_train_pred_agg, in_gridsearch=False - ) - slides_spearman_test[outerfold] = meval.custom_spearmanr( - Y_test_true_agg, Y_test_pred_agg, in_gridsearch=False - ) - - ###### Store scalers for future predictions/later use - x_train_scaler[outerfold] = scaler_x - y_train_scaler[outerfold] = scaler_y - model_learned[outerfold] = grid - - ## Store for reproduction of outer scores - joblib.dump(cv_outer_splits, f"{OUTPUT_PATH}/cv_outer_splits.pkl") - joblib.dump(total_tile_selection, f"{OUTPUT_PATH}/total_tile_selection.pkl") - joblib.dump(model_learned, f"{OUTPUT_PATH}/outer_models.pkl") - joblib.dump(x_train_scaler, f"{OUTPUT_PATH}/x_train_scaler.pkl") - joblib.dump(y_train_scaler, f"{OUTPUT_PATH}/y_train_scaler.pkl") - joblib.dump(slides_spearman_train, f"{OUTPUT_PATH}/outer_scores_slides_train.pkl") - joblib.dump(slides_spearman_test, f"{OUTPUT_PATH}/outer_scores_slides_test.pkl") - joblib.dump(tiles_spearman_train, f"{OUTPUT_PATH}/outer_scores_tiles_train.pkl") - joblib.dump(tiles_spearman_test, f"{OUTPUT_PATH}/outer_scores_tiles_test.pkl") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--path_target_features", help="Path pointing to file containing the cell type abundances", - ) - parser.add_argument( - "--output_dir", help="Path pointing to folder where models will be stored" - ) - parser.add_argument( - "--path_bottleneck_features", help="Path pointing to file containing the histopathological features", - ) - parser.add_argument("--category", help="Cell type") - parser.add_argument("--alpha_min", help="Min. value of hyperparameter alpha") - parser.add_argument("--alpha_max", help="Max. value of hyperparameter alpha") - parser.add_argument("--n_steps", help="Stepsize for grid [alpha_min, alpha_max]", default=40) - parser.add_argument("--n_innerfolds", help="Number of inner loops", default=10) - parser.add_argument("--n_outerfolds", help="Number of outer loops", default=5) - parser.add_argument("--n_tiles", help="Number of tiles to select per slide", default=50) - parser.add_argument("--split_level", help="Split level of slides for creating splits", - default="sample_submitter_id", - ) - parser.add_argument("--slide_type", help="Type of tissue slide (FF or FFPE)]") - - args = parser.parse_args() - nested_cv_multitask( - output_dir=args.output_dir, - category=args.category, - alpha_min=args.alpha_min, - alpha_max=args.alpha_max, - n_steps=args.n_steps, - n_outerfolds=args.n_outerfolds, - n_innerfolds=args.n_innerfolds, - n_tiles=args.n_tiles, - split_level=args.split_level, - slide_type=args.slide_type - ) diff --git a/Python/3_spatial_characterization/compute_clustering_features.py b/Python/3_spatial_characterization/compute_clustering_features.py deleted file mode 100644 index c1635a5..0000000 --- a/Python/3_spatial_characterization/compute_clustering_features.py +++ /dev/null @@ -1,262 +0,0 @@ -import multiprocessing -import sys -import joblib -import pandas as pd -from joblib import Parallel, delayed -import argparse - -import features.clustering as clustering # trunk-ignore(flake8/E402) -import features.features as features # trunk-ignore(flake8/E402) -import features.graphs as graphs # trunk-ignore(flake8/E402) - -NUM_CORES = multiprocessing.cpu_count() - - -def compute_clustering_features(tile_quantification_path, output_dir, slide_type="FF", cell_types=None, graphs_path=None): - - if cell_types is None: - cell_types = ["CAFs", "T_cells", "endothelial_cells", "tumor_purity"] - - predictions = pd.read_csv(tile_quantification_path, sep="\t", index_col=0)] - - ##################################### - # ---- Constructing the graphs ---- # - ##################################### - - if graphs_path is None: - results= Parallel(n_jobs=NUM_CORES)( - delayed(graphs.construct_graph)(predictions=predictions, - slide_submitter_id=slide_submitter_id) - for _, slide_submitter_id in slides.to_numpy() - ) - # Extract/format graphs - all_graphs = { - list(slide_graph.keys())[0]: list(slide_graph.values())[0] - for slide_graph in results - } - joblib.dump( - all_graphs, f"{output_dir}/{slide_type}_graphs.pkl") - else: - all_graphs = joblib.load(graphs_path) - - ###################################################################### - # ---- Fraction of cell type clusters (simultaneous clustering) ---- # - ###################################################################### - - # Spatially Hierarchical Constrained Clustering with all quantification of all cell types - slide_clusters= Parallel(n_jobs=NUM_CORES)(delayed(clustering.schc_all)(predictions, all_graphs[slide_submitter_id], slide_submitter_id) for subtype, slide_submitter_id in slides.to_numpy()) - # Combine the tiles labeled with their cluster id for all slides - tiles_all_schc = pd.concat(slide_clusters, axis=0) - - # Assign a cell type label based on the mean of all cluster means across all slides - all_slide_clusters_characterized = clustering.characterize_clusters(tiles_all_schc) - - # Count the number of clusters per cell type for each slide - num_clust_by_slide = features.n_clusters_per_cell_type(all_slide_clusters_characterized) - - ###################################################################################### - # ---- Fraction of highly abundant cell types (individual cell type clustering) ---- # - ###################################################################################### - - # Spatially Hierarchical Constrained Clustering with all quantification of all cell types for each individual cell type - slide_indiv_clusters= Parallel(n_jobs=NUM_CORES)(delayed(clustering.schc_individual)(predictions, all_graphs[slide_submitter_id], slide_submitter_id) for subtype, slide_submitter_id in slides.to_numpy()) - all_slide_indiv_clusters = pd.concat(slide_indiv_clusters, axis=0) - - # Add metadata - all_slide_indiv_clusters = pd.merge(predictions, all_slide_indiv_clusters, on="tile_ID") - - # Add abundance label 'high' or 'low' based on cluster means - slide_indiv_clusters_labeled = clustering.label_cell_type_map_clusters(all_slide_indiv_clusters) - - # Count the fraction of 'high' clusters - frac_high = features.n_high_clusters(slide_indiv_clusters_labeled) - - ################################################################## - # ---- Compute proximity features (simultaneous clustering) ---- # - ################################################################## - - ## Computing proximity for clusters derived with all cell types simultaneously - clusters_all_schc_long = all_slide_clusters_characterized.melt(id_vars=["MFP", "slide_submitter_id", "cluster_label"], value_name="is_assigned", var_name="cell_type") - # remove all cell types that are not assigned to the cluster - clusters_all_schc_long = clusters_all_schc_long[clusters_all_schc_long["is_assigned"]] - clusters_all_schc_long = clusters_all_schc_long.drop(columns="is_assigned") - - results_schc_all= Parallel(n_jobs=NUM_CORES)(delayed(features.compute_proximity_clusters_pairs)(tiles_all_schc, slide_submitter_id, method="all") for _, slide_submitter_id in slides.to_numpy()) - prox_all_schc = pd.concat(results_schc_all) - - # Label clusters (a number) with the assigned cell types - prox_all_schc = pd.merge(prox_all_schc, clusters_all_schc_long, left_on=["slide_submitter_id", "cluster1"], right_on=["slide_submitter_id", "cluster_label"]) - prox_all_schc = prox_all_schc.rename(columns={"cell_type": "cluster1_label"}) - prox_all_schc = prox_all_schc.drop(columns=["cluster_label", "MFP"]) - - prox_all_schc = pd.merge(prox_all_schc, clusters_all_schc_long, left_on=["slide_submitter_id", "cluster2"], right_on=["slide_submitter_id", "cluster_label"]) - prox_all_schc = prox_all_schc.rename(columns={"cell_type": "cluster2_label"}) - # prox_all_schc = prox_all_schc.drop(columns=["cluster_label", "cluster1", "cluster2" ]) - - # Order doesn't matter: x <-> - prox_all_schc["pair"] = [f"{sorted([i, j])[0]}-{sorted([i, j])[1]}" for i, j in prox_all_schc[["cluster1_label", "cluster2_label"]].to_numpy()] - prox_all_schc = prox_all_schc[((prox_all_schc.cluster1 == prox_all_schc.cluster2) & (prox_all_schc.cluster2_label != prox_all_schc.cluster1_label)) | (prox_all_schc.cluster1 != prox_all_schc.cluster2)] - # prox_all_schc.to_csv(f"{output_dir}/{slide_type}_features_clust_all_schc_prox.txt", sep="\t") - - slides = prox_all_schc[["MFP", "slide_submitter_id"]].drop_duplicates().to_numpy() - - # Post Processing - results_schc_all= Parallel(n_jobs=NUM_CORES)(delayed(features.post_processing_proximity)(prox_df=prox_all_schc, slide_submitter_id=slide_submitter_id,subtype=subtype, method="all") for subtype, slide_submitter_id in slides) - all_prox_df = pd.concat(results_schc_all) - # Remove rows with a proximity of NaN - all_prox_df = all_prox_df.dropna(axis=0) - - ########################################################################## - # ---- Compute proximity features (individual cell type clustering) ---- # - ########################################################################## - - ## Computing proximity for clusters derived for each cell type individually - # Between clusters - slides = ( - predictions[["MFP", "slide_submitter_id"]].drop_duplicates().reset_index(drop=True)) - results_schc_indiv= Parallel(n_jobs=NUM_CORES)(delayed(features.compute_proximity_clusters_pairs)(all_slide_indiv_clusters, slide_submitter_id, method="individual_between") for _, slide_submitter_id in slides.to_numpy()) - prox_indiv_schc = pd.concat(results_schc_indiv) - - # Formatting - prox_indiv_schc = pd.merge(prox_indiv_schc,slide_indiv_clusters_labeled, left_on=["slide_submitter_id", "cluster1_label", "cluster1"], right_on=["slide_submitter_id", "cell_type_map", "cluster_label"]) - prox_indiv_schc = prox_indiv_schc.drop(columns=["cell_type_map", "MFP", "cluster_label"]) - prox_indiv_schc = prox_indiv_schc.rename(columns={"is_high": "cluster1_is_high"}) - prox_indiv_schc = pd.merge(prox_indiv_schc,slide_indiv_clusters_labeled, left_on=["slide_submitter_id", "cluster2_label", "cluster2"], right_on=["slide_submitter_id", "cell_type_map", "cluster_label"]) - prox_indiv_schc = prox_indiv_schc.rename(columns={"is_high": "cluster2_is_high"}) - prox_indiv_schc = prox_indiv_schc.drop(columns=["cell_type_map", "cluster_label"]) - - # Order matters - prox_indiv_schc["ordered_pair"] = [f"{i}-{j}" for i, j in prox_indiv_schc[["cluster1_label", "cluster2_label"]].to_numpy()] - prox_indiv_schc["comparison"] = [f"cluster1={i}-cluster2={j}" for i, j in prox_indiv_schc[["cluster1_is_high", "cluster2_is_high"]].to_numpy()] - - # Post-processing - slides = prox_indiv_schc[["MFP", "slide_submitter_id"]].drop_duplicates().to_numpy() - results_schc_indiv= pd.concat(Parallel(n_jobs=NUM_CORES)(delayed(features.post_processing_proximity)(prox_df=prox_indiv_schc, slide_submitter_id=slide_submitter_id,subtype=subtype, method="individual_between") for subtype, slide_submitter_id in slides)) - - # Within clusters - slides = ( - predictions[["MFP", "slide_submitter_id"]].drop_duplicates().reset_index(drop=True)) - results_schc_indiv_within= Parallel(n_jobs=NUM_CORES)(delayed(features.compute_proximity_clusters_pairs)(all_slide_indiv_clusters, slide_submitter_id, method="individual_within") for _, slide_submitter_id in slides.to_numpy()) - prox_indiv_schc_within = pd.concat(results_schc_indiv_within) - - prox_indiv_schc_within = pd.merge(prox_indiv_schc_within,slide_indiv_clusters_labeled, left_on=["slide_submitter_id", "cell_type", "cluster1"], right_on=["slide_submitter_id", "cell_type_map", "cluster_label"]) - prox_indiv_schc_within = prox_indiv_schc_within.drop(columns=["MFP", "cluster_label"]) - prox_indiv_schc_within = prox_indiv_schc_within.rename(columns={"is_high": "cluster1_is_high", "cell_type_map":"cell_type_map1"}) - prox_indiv_schc_within = pd.merge(prox_indiv_schc_within,slide_indiv_clusters_labeled, left_on=["slide_submitter_id", "cell_type", "cluster2"], right_on=["slide_submitter_id", "cell_type_map", "cluster_label"]) - prox_indiv_schc_within = prox_indiv_schc_within.rename(columns={"is_high": "cluster2_is_high", "cell_type_map": "cell_type_map2"}) - prox_indiv_schc_within = prox_indiv_schc_within.drop(columns=["cluster_label"]) - - # Order doesn't matter (only same cell type combinations) - prox_indiv_schc_within["pair"] = [f"{i}-{j}" for i, j in prox_indiv_schc_within[["cell_type_map1", "cell_type_map2"]].to_numpy()] - prox_indiv_schc_within["comparison"] = [f"cluster1={sorted([i,j])[0]}-cluster2={sorted([i,j])[1]}" for i, j in prox_indiv_schc_within[["cluster1_is_high", "cluster2_is_high"]].to_numpy()] - - # prox_indiv_schc_within.to_csv(f"{output_dir}/{slide_type}_features_clust_indiv_schc_prox_within.txt", sep="\t") - slides = prox_indiv_schc_within[["slide_submitter_id", "MFP"]].drop_duplicates().to_numpy() - results_schc_indiv_within= pd.concat(Parallel(n_jobs=NUM_CORES)(delayed(features.post_processing_proximity)(prox_df=prox_indiv_schc_within, slide_submitter_id=slide_submitter_id,subtype=subtype, method="individual_within") for slide_submitter_id, subtype in slides)) - - # Concatenate within and between computed proximity values - prox_indiv_schc_combined = pd.concat([results_schc_indiv_within, results_schc_indiv]) - - # Remove rows with a proximity of NaN - prox_indiv_schc_combined = prox_indiv_schc_combined.dropna(axis=0) - - #################################### - # ---- Compute shape features ---- # - #################################### - - # Compute shape features based on clustering with all cell types simultaneously - slides = ( - predictions[["MFP", "slide_submitter_id"]].drop_duplicates().reset_index(drop=True)) - - all_slide_clusters_characterized = all_slide_clusters_characterized.rename(columns=dict(zip(cell_types, [f"is_{cell_type}_cluster" for cell_type in cell_types]))) - tiles_all_schc = pd.merge(tiles_all_schc, all_slide_clusters_characterized, on=["slide_submitter_id", "MFP", "cluster_label"]) - res = pd.concat(Parallel(n_jobs=NUM_CORES)(delayed(features.compute_shape_features)(tiles=tiles_all_schc, slide_submitter_id=slide_submitter_id,subtype=subtype) for subtype, slide_submitter_id in slides.to_numpy())) - res = res.drop(axis=1, labels=["cluster_label"]) - shape_feature_means = res.groupby(["slide_submitter_id", "cell_type"]).mean().reset_index() - - ############################################## - # ---- Formatting all computed features ---- # - ############################################## - - frac_high_sub = frac_high[frac_high["is_high"]].copy() - frac_high_sub = frac_high_sub.drop(columns=["is_high", "n_clusters", "n_total_clusters"]) - - frac_high_wide = frac_high_sub.pivot(index=["MFP", "slide_submitter_id"], columns=["cell_type_map"])["fraction"] - new_cols=[('fraction {0} clusters labeled high'.format(col)) for col in frac_high_wide.columns] - frac_high_wide.columns = new_cols - frac_high_wide = frac_high_wide.sort_index(axis="columns").reset_index() - - num_clust_by_slide_sub = num_clust_by_slide.copy() - num_clust_by_slide_sub = num_clust_by_slide_sub.drop(columns=["is_assigned", "n_clusters"]) - - num_clust_slide_wide = num_clust_by_slide_sub.pivot(index=["MFP", "slide_submitter_id"], columns=["cell_type"])["fraction"] - new_cols=[('fraction {0} clusters'.format(col)) for col in num_clust_slide_wide.columns] - num_clust_slide_wide.columns = new_cols - num_clust_slide_wide = num_clust_slide_wide.sort_index(axis="columns").reset_index() - - all_prox_df_wide = all_prox_df.pivot(index=["MFP", "slide_submitter_id"], columns=["pair"])["proximity"] - new_cols = [f'prox CC {col.replace("_", " ")} clusters' for col in all_prox_df_wide.columns] - all_prox_df_wide.columns = new_cols - all_prox_df_wide = all_prox_df_wide.reset_index() - - prox_indiv_schc_combined.comparison = prox_indiv_schc_combined.comparison.replace(dict(zip(['cluster1=True-cluster2=True', 'cluster1=True-cluster2=False', - 'cluster1=False-cluster2=True', 'cluster1=False-cluster2=False'], ["high-high", "high-low", "low-high", "low-low"]))) - prox_indiv_schc_combined["pair (comparison)"] = [f"{pair} ({comp})" for pair, comp in prox_indiv_schc_combined[["pair", "comparison"]].to_numpy()] - prox_indiv_schc_combined = prox_indiv_schc_combined.drop(axis=1, labels=["pair", "comparison"]) - prox_indiv_schc_combined_wide = prox_indiv_schc_combined.pivot(index=["MFP", "slide_submitter_id"], columns=["pair (comparison)"])["proximity"] - new_cols = [f'prox CC {col.replace("_", " ")}' for col in prox_indiv_schc_combined_wide.columns] - prox_indiv_schc_combined_wide.columns = new_cols - prox_indiv_schc_combined_wide = prox_indiv_schc_combined_wide.reset_index() - - shape_feature_means_wide = shape_feature_means.pivot(index=["slide_submitter_id"], columns="cell_type")[["solidity", "roundness"]] - new_cols = [f'prox CC {col.replace("_", " ")}' for col in prox_indiv_schc_combined_wide.columns] - shape_feature_means_wide.columns = [f"{i.capitalize()} {j}" for i, j in shape_feature_means_wide.columns] - shape_feature_means_wide = shape_feature_means_wide.reset_index() - - # Store features - all_features = pd.merge(frac_high_wide, num_clust_slide_wide, on=["MFP", "slide_submitter_id"]) - all_features = pd.merge(all_features, all_prox_df_wide) - all_features = pd.merge(all_features, prox_indiv_schc_combined_wide) - all_features = pd.merge(all_features, shape_feature_means_wide) - - tiles_all_schc = tiles_all_schc.drop(axis=1, columns=cell_types) # drop the predicted probabilities - all_slide_indiv_clusters = all_slide_indiv_clusters.drop(axis=1, columns=cell_types)# drop the predicted probabilities - - ################################ - # ---- Store all features ---- # - ################################ - - # tiles_all_schc (DataFrame): dataframe containing the metadata columns and the cluster_label (int) - # all_slide_clusters_characterized (DataFrame): dataframe containing the slide_submitter_id, and the the columns for the cell types filled with booleans (True if the cluster is assigned with that cell type) - # all_slide_indiv_clusters (DataFrame): dataframe containing the metadata columns and columns with to which cell type cluster the tile belongs to - # slide_indiv_clusters_labeled (DataFrame): dataframe containing the slide_submitter_id, cell_type_map, cluster_label (int), and is_high (abundance) - # all_prox_df (DataFrame): dataframe containing slide_submitter_id, pair, proximity - # prox_indiv_schc_combined (DataFrame): dataframe containing slide_submitter_id, comparison (high/low abundance label), pair (cell type pair) and proximity - # shape_features_mean (DataFrame): dataframe containing slide_submitter_id, cell_type, slide_submitter_id, solidity, roundness - frac_high.to_csv(f"{output_dir}/{slide_type}_num_clusters_per_cell_type_indiv_clustering.csv", sep="\t", index=False) - num_clust_by_slide.to_csv(f"{output_dir}/{slide_type}_num_clusters_per_cell_type_all_clustering.csv", sep="\t", index=False) - tiles_all_schc.to_csv(f"{output_dir}/{slide_type}_all_schc_tiles.csv", sep="\t", index=False) - all_slide_clusters_characterized.to_csv(f"{output_dir}/{slide_type}_all_schc_clusters_labeled.csv", sep="\t", index=False) - all_slide_indiv_clusters.to_csv(f"{output_dir}/{slide_type}_indiv_schc_tiles.csv", sep="\t", index=False) - slide_indiv_clusters_labeled.to_csv(f"{output_dir}/{slide_type}_indiv_schc_clusters_labeled.csv", sep="\t", index=False) - all_prox_df.to_csv(f"{output_dir}/{slide_type}_features_clust_all_schc_prox.csv", sep="\t", index=False) - prox_indiv_schc_combined.to_csv(f"{output_dir}/{slide_type}_features_clust_indiv_schc_prox.csv", sep="\t", index=False) - shape_feature_means.to_csv(f"{output_dir}/{slide_type}_features_clust_shapes.csv", sep="\t", index=False) - all_features.to_csv(f"{output_dir}/{slide_type}_clustering_features.csv", sep="\t", index=False) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Compute clustering features") - parser.add_argument("--tile_quantification_path", help="Path to csv file with tile-level quantification (predictions)") - parser.add_argument("--output_dir", help="Path to output folder to store generated files") - parser.add_argument("--slide_type", help="Type of slides 'FFPE' or 'FF' used for naming generated files (by default='FF')", default="FF") - parser.add_argument("--cell_types", help="List of cell types", default=None) # TODO: adapt to external file with the cell types, easier for parsing - parser.add_argument("--graphs_path", help="Path to pkl with generated graphs in case this was done before (OPTIONAL) if not specified, graphs will be generated", default=None) - - args=parser.parse_args() - - compute_clustering_features( - tile_quantification_path=args.tile_quantification_path, - output_dir=args.output_dir, - slide_type=args.slide_type, - cell_types=args.cell_types, - graphs_path=args.graphs_path) diff --git a/Python/3_spatial_characterization/compute_network_features.py b/Python/3_spatial_characterization/compute_network_features.py deleted file mode 100644 index 9888206..0000000 --- a/Python/3_spatial_characterization/compute_network_features.py +++ /dev/null @@ -1,175 +0,0 @@ -import multiprocessing -import sys -import argparse -import joblib -from joblib import Parallel, delayed -import pandas as pd - - -import features.features as features # trunk-ignore(flake8/E402) -import features.graphs as graphs # trunk-ignore(flake8/E402) -import features.utils as utils # trunk-ignore(flake8/E402) -def compute_network_features(tile_quantification_path, output_dir, slide_type="FF", cell_types=None, graphs_path=None): - NUM_CORES = multiprocessing.cpu_count() - - if cell_types is None: - cell_types = ["CAFs", "T_cells", "endothelial_cells", "tumor_purity"] - - predictions = pd.read_csv(tile_quantification_path, sep="\t", index_col=0) - - ##################################### - # ---- Constructing the graphs ---- # - ##################################### - - if graphs_path is None: - results = Parallel(n_jobs=NUM_CORES)( - delayed(graphs.construct_graph)(predictions=predictions, slide_submitter_id=slide_submitter_id) - for _, slide_submitter_id in slides.to_numpy() - ) - # Extract/format graphs - all_graphs = { - list(slide_graph.keys())[0]: list(slide_graph.values())[0] - for slide_graph in results - } - joblib.dump( - all_graphs, f"{output_dir}/{slide_type}_graphs.pkl") - else: - all_graphs = joblib.load(graphs_path) - - ####################################################### - # ---- Compute connectedness and co-localization ---- # - ####################################################### - - all_largest_cc_sizes = [] - all_dual_nodes_frac = [] - for _, slide_submitter_id in slides.to_numpy(): - slide_data = utils.get_slide_data(predictions, slide_submitter_id) - node_cell_types = utils.assign_cell_types(slide_data) - lcc = features.determine_lcc( - graph=all_graphs[slide_submitter_id], cell_type_assignments=node_cell_types - ) - lcc["slide_submitter_id"] = slide_submitter_id - all_largest_cc_sizes.append(lcc) - - dual_nodes_frac = features.compute_dual_node_fractions(node_cell_types) - dual_nodes_frac["slide_submitter_id"] = slide_submitter_id - all_dual_nodes_frac.append(dual_nodes_frac) - - all_largest_cc_sizes = pd.concat(all_largest_cc_sizes, axis=0) - all_dual_nodes_frac = pd.concat(all_dual_nodes_frac, axis=0) - - ####################################################### - # ---- Compute N shortest paths with max. length ---- # - ####################################################### - - results = Parallel(n_jobs=NUM_CORES)( - delayed(features.compute_n_shortest_paths_max_length)( - predictions=predictions, slide_submitter_id=slide_submitter_id, graph=all_graphs[slide_submitter_id] - ) - for _, slide_submitter_id in slides.to_numpy() - ) - # Formatting and count the number of shortest paths with max length - all_shortest_paths_thresholded = pd.concat(results, axis=0) - all_shortest_paths_thresholded["n_paths"] = 1 - proximity_graphs = ( - all_shortest_paths_thresholded.groupby( - ["slide_submitter_id", "source", "target"] - ) - .sum() - .reset_index() - ) - # Post-processing - proximity_graphs["pair"] = [f"{source}-{target}" for source, target in proximity_graphs[["source", "target"]].to_numpy()] - proximity_graphs = proximity_graphs.drop(columns=["path_length"]) - - ############################################### - # ---- Compute ES based on ND difference ---- # - ############################################### - - nd_results= Parallel(n_jobs=NUM_CORES)(delayed(features.node_degree_wrapper)(all_graphs[slide_submitter_id], predictions, slide_submitter_id) for _, slide_submitter_id in slides.to_numpy()) - - # Format results - all_sims_nd = [] - all_mean_nd_df = [] - example_simulations = {} - for sim_assignments, sim, mean_nd_df in nd_results: - all_mean_nd_df.append(mean_nd_df) - all_sims_nd.append(sim) - example_simulations.update(sim_assignments) - - all_sims_nd = pd.concat(all_sims_nd, axis=0).reset_index() - all_mean_nd_df =pd.concat(all_mean_nd_df).reset_index(drop=True) - - # Testing normality - # shapiro_tests = Parallel(n_jobs=NUM_CORES)(delayed(utils.test_normality)(sims_nd=all_sims_nd, slide_submitter_id=slide_submitter_id) for slide_submitter_id in all_sims_nd.slide_submitter_id.unique()) - # all_shapiro_tests = pd.concat(shapiro_tests, axis=0) - # print(f"Number of samples from normal distribution { len(all_shapiro_tests) - all_shapiro_tests.is_not_normal.sum()}/{len(all_shapiro_tests)}") - - # Computing Cohen's d effect size and perform t-test - effect_sizes = Parallel(n_jobs=NUM_CORES)(delayed(features.compute_effect_size)(all_mean_nd_df, all_sims_nd, slide_submitter_id) for slide_submitter_id in all_sims_nd.slide_submitter_id.unique()) - all_effect_sizes = pd.concat(effect_sizes, axis=0) - all_effect_sizes["pair"] = [f"{c}-{n}" for c, n in all_effect_sizes[["center", "neighbor"]].to_numpy()] - - ######################## - # ---- Formatting ---- # - ######################## - all_largest_cc_sizes = all_largest_cc_sizes.reset_index(drop=True) - all_largest_cc_sizes_wide = all_largest_cc_sizes.pivot(index=["slide_submitter_id"], columns="cell_type")["type_spec_frac"] - new_cols = [f'LCC {col.replace("_", " ")} clusters' for col in all_largest_cc_sizes_wide.columns] - all_largest_cc_sizes_wide.columns = new_cols - all_largest_cc_sizes_wide = all_largest_cc_sizes_wide.reset_index() - - shortest_paths_wide = proximity_graphs.pivot(index=["slide_submitter_id"], columns="pair")["n_paths"] - new_cols = [f'Prox graph {col.replace("_", " ")} clusters' for col in shortest_paths_wide.columns] - shortest_paths_wide.columns = new_cols - shortest_paths_wide = shortest_paths_wide.reset_index() - - colocalization_wide = all_dual_nodes_frac.pivot(index=["slide_submitter_id"], columns="pair")["frac"] - new_cols = [f'Co-loc {col.replace("_", " ")} clusters' for col in colocalization_wide.columns] - colocalization_wide.columns = new_cols - colocalization_wide = colocalization_wide.reset_index() - - all_features = pd.merge(all_largest_cc_sizes_wide, shortest_paths_wide) - all_features = pd.merge(all_features, colocalization_wide) - - ################################ - # ---- Store all features ---- # - ################################ - - # all_effect_sizes (DataFrame): dataframe containing the slide_submitter_id, center, neighbor, effect_size (Cohen's d), Tstat, pval, and the pair (string of center and neighbor) - # all_sims_nd (DataFrame): dataframe containing slide_submitter_id, center, neighbor, simulation_nr and degree (node degree) - # all_mean_nd_df (DataFrame): dataframe containing slide_submitter_id, center, neighbor, mean_sim (mean node degree across the N simulations), mean_obs - # all_largest_cc_sizes (DataFrame): dataframe containing slide_submitter_id, cell type and type_spec_frac (fraction of LCC w.r.t. all tiles for cell type) - # shortest_paths_slide (DataFrame): dataframe containing slide_submitter_id, source, target, pair and n_paths (number of shortest paths for a pair) - # all_dual_nodes_frac (DataFrame): dataframe containing slide_submitter_id, pair, counts (absolute) and frac - - all_effect_sizes.to_csv( - f"{output_dir}/{slide_type}_features_ND_ES.csv", sep="\t", index=False) - all_sims_nd.to_csv( - f"{output_dir}/{slide_type}_features_ND_sims.csv", sep="\t", index=False) - all_mean_nd_df.to_csv( - f"{output_dir}/{slide_type}_features_ND.csv", sep="\t", index=False) - joblib.dump(example_simulations, - f"{output_dir}/{slide_type}_features_ND_sim_assignments.pkl") - - all_largest_cc_sizes_wide.to_csv(f"{output_dir}/{slide_type}_features_lcc_fraction.csv", sep="\t", index=False) - proximity_graphs.to_csv(f"{output_dir}/{slide_type}_features_shortest_paths_thresholded.csv", sep="\t", index=False) - all_dual_nodes_frac.to_csv(f"{output_dir}/{slide_type}_features_coloc_fraction.csv", sep="\t", index=False) - - all_features.to_csv(f"{output_dir}/{slide_type}_all_graph_features.csv", sep="\t", index=False) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Compute network features") - parser.add_argument("--tile_quantification_path", help="Path to csv file with tile-level quantification (predictions)") - parser.add_argument("--output_dir", help="Path to output folder to store generated files") - parser.add_argument("--slide_type", help="Type of slides 'FFPE' or 'FF' used for naming generated files (by default='FF')", default="FF") - parser.add_argument("--cell_types", help="List of cell types", default=None) - parser.add_argument("--graphs_path", help="Path to pkl with generated graphs in case this was done before (OPTIONAL) if not specified, graphs will be generated", default=None) - - args=parser.parse_args() - compute_network_features( - tile_quantification_path=args.tile_quantification_path, - output_dir=args.output_dir, - slide_type=args.slide_type, - cell_types=args.cell_types, - graphs_path=args.graphs_path) diff --git a/Python/3_spatial_characterization/computing_features.py b/Python/3_spatial_characterization/computing_features.py deleted file mode 100755 index 2318296..0000000 --- a/Python/3_spatial_characterization/computing_features.py +++ /dev/null @@ -1,681 +0,0 @@ -import os -import sys -import joblib -import pandas as pd -from joblib import Parallel, delayed -import argparse -from os import path - -# Own modules -import features.clustering as clustering -import features.features as features -import features.graphs as graphs -import features.utils as utils -from model.constants import DEFAULT_SLIDE_TYPE, DEFAULT_CELL_TYPES, NUM_CORES, METADATA_COLS - - -def compute_network_features(tile_quantification_path, output_dir, slide_type=DEFAULT_SLIDE_TYPE, cell_types=None, graphs_path=None, - abundance_threshold=0.5, shapiro_alpha=0.05, cutoff_path_length=2): - """ - Compute network features - 1. effect sizes based on difference in node degree between simulated slides and actual slide - 2. fraction largest connected component - 3. number of shortest paths with a max length. - - Args: - tile_quantification_path (str) - output_dir (str) - slide_type (str): type of slide either 'FF' or 'FFPE' - cell_types (list): list of cell types - graphs_path (str): path to pkl file with generated graphs [optional] - abundance_threshold (float): threshold for assigning cell types to tiles based on the predicted probability (default=0.5) - shapiro_alpha (float): significance level for shapiro tests for normality (default=0.05) - cutoff_path_length (int): max. length of shortest paths (default=2) - - Returns: - all_effect_sizes (DataFrame): dataframe containing the slide_submitter_id, center, neighbor, effect_size (Cohen's d), Tstat, pval, and the pair (string of center and neighbor) - all_sims_nd (DataFrame): dataframe containing slide_submitter_id, center, neighbor, simulation_nr and degree (node degree) - all_mean_nd_df (DataFrame): dataframe containing slide_submitter_id, center, neighbor, mean_sim (mean node degree across the N simulations), mean_obs - all_largest_cc_sizes (DataFrame): dataframe containing slide_submitter_id, cell type and type_spec_frac (fraction of LCC w.r.t. all tiles for cell type) - shortest_paths_slide (DataFrame): dataframe containing slide_submitter_id, source, target, pair and n_paths (number of shortest paths for a pair) - all_dual_nodes_frac (DataFrame): dataframe containing slide_submitter_id, pair, counts (absolute) and frac - - """ - if cell_types is None: - cell_types = DEFAULT_CELL_TYPES - - predictions = pd.read_csv(tile_quantification_path, sep="\t") - slide_submitter_ids = list(set(predictions.slide_submitter_id)) - - ##################################### - # ---- Constructing the graphs ---- # - ##################################### - - if graphs_path is None: - results = Parallel(n_jobs=NUM_CORES)( - delayed(graphs.construct_graph)( - predictions=predictions, slide_submitter_id=id) - for id in slide_submitter_ids - ) - # Extract/format graphs - all_graphs = { - list(slide_graph.keys())[0]: list(slide_graph.values())[0] - for slide_graph in results - } - joblib.dump(all_graphs, f"{output_dir}/{slide_type}_graphs.pkl") - else: - all_graphs = joblib.load(graphs_path) - - ####################################################### - # ---- Compute connectedness and co-localization ---- # - ####################################################### - - all_largest_cc_sizes = [] - all_dual_nodes_frac = [] - for id in slide_submitter_ids: - slide_data = utils.get_slide_data(predictions, id) - node_cell_types = utils.assign_cell_types( - slide_data=slide_data, cell_types=cell_types, threshold=abundance_threshold) - lcc = features.determine_lcc( - graph=all_graphs[id], cell_type_assignments=node_cell_types, cell_types=cell_types - ) - lcc["slide_submitter_id"] = id - all_largest_cc_sizes.append(lcc) - - dual_nodes_frac = features.compute_dual_node_fractions( - node_cell_types, cell_types) - dual_nodes_frac["slide_submitter_id"] = id - all_dual_nodes_frac.append(dual_nodes_frac) - - all_largest_cc_sizes = pd.concat(all_largest_cc_sizes, axis=0) - all_dual_nodes_frac = pd.concat(all_dual_nodes_frac, axis=0) - - ####################################################### - # ---- Compute N shortest paths with max. length ---- # - ####################################################### - - results = Parallel(n_jobs=NUM_CORES)( - delayed(features.compute_n_shortest_paths_max_length)( - predictions=predictions, slide_submitter_id=id, graph=all_graphs[ - id], cutoff=cutoff_path_length - ) - for id in slide_submitter_ids - ) - # Formatting and count the number of shortest paths with max length - all_shortest_paths_thresholded = pd.concat(results, axis=0) - all_shortest_paths_thresholded["n_paths"] = 1 - proximity_graphs = ( - all_shortest_paths_thresholded.groupby( - ["slide_submitter_id", "source", "target"] - ) - .sum(numeric_only=True) - .reset_index() - ) - # Post-processing - proximity_graphs["pair"] = [f"{source}-{target}" for source, - target in proximity_graphs[["source", "target"]].to_numpy()] - proximity_graphs = proximity_graphs.drop(columns=["path_length"]) - - # ---- Formatting ---- # - all_largest_cc_sizes = all_largest_cc_sizes.reset_index(drop=True) - all_largest_cc_sizes_wide = all_largest_cc_sizes.pivot( - index=["slide_submitter_id"], columns="cell_type")["type_spec_frac"] - new_cols = [ - f'LCC {col.replace("_", " ")} clusters' for col in all_largest_cc_sizes_wide.columns] - all_largest_cc_sizes_wide.columns = new_cols - all_largest_cc_sizes_wide = all_largest_cc_sizes_wide.reset_index() - - shortest_paths_wide = proximity_graphs.pivot( - index=["slide_submitter_id"], columns="pair")["n_paths"] - new_cols = [ - f'Prox graph {col.replace("_", " ")} clusters' for col in shortest_paths_wide.columns] - shortest_paths_wide.columns = new_cols - shortest_paths_wide = shortest_paths_wide.reset_index() - - colocalization_wide = all_dual_nodes_frac.pivot( - index=["slide_submitter_id"], columns="pair")["frac"] - new_cols = [ - f'Co-loc {col.replace("_", " ")} clusters' for col in colocalization_wide.columns] - colocalization_wide.columns = new_cols - colocalization_wide = colocalization_wide.reset_index() - - all_features = pd.merge(all_largest_cc_sizes_wide, shortest_paths_wide) - all_features = pd.merge(all_features, colocalization_wide) - - # ---- Save to file ---- # - all_largest_cc_sizes_wide.to_csv( - f"{output_dir}/{slide_type}_features_lcc_fraction.csv", sep="\t", index=False) - proximity_graphs.to_csv( - f"{output_dir}/{slide_type}_features_shortest_paths_thresholded.csv", sep="\t", index=False) - all_dual_nodes_frac.to_csv( - f"{output_dir}/{slide_type}_features_coloc_fraction.csv", sep="\t", index=False) - all_features.to_csv( - f"{output_dir}/{slide_type}_all_graph_features.csv", sep="\t", index=False) - - ############################################### - # ---- Compute ES based on ND difference ---- # - ############################################### - # Remove one slide for which node degree could not be resolved (no node with 8 neighbours) - # problematic_slide = 'TCGA-D3-A2JE-06A-01-TS1' # just 63 tiles - # filtered_slides = list(filter(lambda id: id != problematic_slide, slide_submitter_ids)) - nd_results = Parallel(n_jobs=NUM_CORES)(delayed(features.node_degree_wrapper)( - all_graphs[id], predictions, id) for id in slide_submitter_ids) - nd_results = list(filter(lambda id: id != None, nd_results)) - - # Format results - all_sims_nd = [] - all_mean_nd_df = [] - example_simulations = {} - - for sim_assignments, sim, mean_nd_df in nd_results: - all_mean_nd_df.append(mean_nd_df) - all_sims_nd.append(sim) - example_simulations.update(sim_assignments) - - all_sims_nd = pd.concat(all_sims_nd, axis=0).reset_index() - all_mean_nd_df = pd.concat(all_mean_nd_df).reset_index(drop=True) - - # Testing normality - shapiro_tests = Parallel(n_jobs=NUM_CORES)(delayed(utils.test_normality)(sims_nd=all_sims_nd, slide_submitter_id=id, - alpha=shapiro_alpha, cell_types=cell_types) for id in all_sims_nd.slide_submitter_id.unique()) - all_shapiro_tests = pd.concat(shapiro_tests, axis=0) - - # Computing Cohen's d effect size and perform t-test - effect_sizes = Parallel(n_jobs=NUM_CORES)(delayed(features.compute_effect_size)( - all_mean_nd_df, all_sims_nd, slide_submitter_id) for slide_submitter_id in all_sims_nd.slide_submitter_id.unique()) - all_effect_sizes = pd.concat(effect_sizes, axis=0) - all_effect_sizes["pair"] = [ - f"{c}-{n}" for c, n in all_effect_sizes[["center", "neighbor"]].to_numpy()] - - # ---- Save to file ---- # - all_effect_sizes.to_csv( - f"{output_dir}/{slide_type}_features_ND_ES.csv", sep="\t", index=False) - all_sims_nd.to_csv( - f"{output_dir}/{slide_type}_features_ND_sims.csv", sep="\t", index=False) - all_mean_nd_df.to_csv( - f"{output_dir}/{slide_type}_features_ND.csv", sep="\t", index=False) - joblib.dump(example_simulations, - f"{output_dir}/{slide_type}_features_ND_sim_assignments.pkl") - all_shapiro_tests.to_csv( - f"{output_dir}/{slide_type}_shapiro_tests.csv", index=False, sep="\t") - - -def compute_clustering_features( - tile_quantification_path, output_dir, slide_type=DEFAULT_SLIDE_TYPE, cell_types=None, graphs_path=None, n_clusters=8, max_dist=None, max_n_tiles_threshold=2, tile_size=512, overlap=50): - - if cell_types is None: - cell_types = DEFAULT_CELL_TYPES - - predictions = pd.read_csv(tile_quantification_path, sep="\t") - slide_submitter_ids = list(set(predictions.slide_submitter_id)) - - ##################################### - # ---- Constructing the graphs ---- # - ##################################### - - if graphs_path is None: - results = Parallel(n_jobs=NUM_CORES)( - delayed(graphs.construct_graph)( - predictions=predictions, slide_submitter_id=id) - for id in slide_submitter_ids - ) - # Extract/format graphs - all_graphs = { - list(slide_graph.keys())[0]: list(slide_graph.values())[0] - for slide_graph in results - } - joblib.dump( - all_graphs, f"{output_dir}/{slide_type}_graphs.pkl") - else: - all_graphs = joblib.load(graphs_path) - - ###################################################################### - # ---- Fraction of cell type clusters (simultaneous clustering) ---- # - ###################################################################### - - # Spatially Hierarchical Constrained Clustering with all quantification of all cell types - slide_clusters = Parallel(n_jobs=NUM_CORES)(delayed(clustering.schc_all)( - predictions, all_graphs[id], id) for id in slide_submitter_ids) - # Combine the tiles labeled with their cluster id for all slides - tiles_all_schc = pd.concat(slide_clusters, axis=0) - - # Assign a cell type label based on the mean of all cluster means across all slides - all_slide_clusters_characterized = clustering.characterize_clusters( - tiles_all_schc) - - # Count the number of clusters per cell type for each slide - num_clust_by_slide = features.n_clusters_per_cell_type( - all_slide_clusters_characterized, cell_types=cell_types) - - ###################################################################################### - # ---- Fraction of highly abundant cell types (individual cell type clustering) ---- # - ###################################################################################### - - # Spatially Hierarchical Constrained Clustering with all quantification of all cell types for each individual cell type - slide_indiv_clusters = Parallel(n_jobs=NUM_CORES)(delayed(clustering.schc_individual)( - predictions, all_graphs[id], id) for id in slide_submitter_ids) - all_slide_indiv_clusters = pd.concat(slide_indiv_clusters, axis=0) - - # Add metadata - all_slide_indiv_clusters = pd.merge( - predictions, all_slide_indiv_clusters, on="tile_ID") - - # Add abundance label 'high' or 'low' based on cluster means - slide_indiv_clusters_labeled = clustering.label_cell_type_map_clusters( - all_slide_indiv_clusters) - - # Count the fraction of 'high' clusters - frac_high = features.n_high_clusters(slide_indiv_clusters_labeled) - - ################################################################## - # ---- Compute proximity features (simultaneous clustering) ---- # - ################################################################## - - # Computing proximity for clusters derived with all cell types simultaneously - clusters_all_schc_long = all_slide_clusters_characterized.melt( - id_vars=["slide_submitter_id", "cluster_label"], value_name="is_assigned", var_name="cell_type") - # remove all cell types that are not assigned to the cluster - clusters_all_schc_long = clusters_all_schc_long[clusters_all_schc_long["is_assigned"]] - clusters_all_schc_long = clusters_all_schc_long.drop(columns="is_assigned") - - results_schc_all = Parallel(n_jobs=NUM_CORES)(delayed(features.compute_proximity_clusters_pairs)( - tiles=tiles_all_schc, slide_submitter_id=id, n_clusters=n_clusters, cell_types=cell_types, max_dist=max_dist, max_n_tiles_threshold=max_n_tiles_threshold, tile_size=tile_size, overlap=overlap, method="all") for id in slide_submitter_ids) - prox_all_schc = pd.concat(results_schc_all) - - # Label clusters (a number) with the assigned cell types - prox_all_schc = pd.merge(prox_all_schc, clusters_all_schc_long, left_on=[ - "slide_submitter_id", "cluster1"], right_on=["slide_submitter_id", "cluster_label"]) - prox_all_schc = prox_all_schc.rename( - columns={"cell_type": "cluster1_label"}) - prox_all_schc = prox_all_schc.drop(columns=["cluster_label"]) - - prox_all_schc = pd.merge(prox_all_schc, clusters_all_schc_long, left_on=[ - "slide_submitter_id", "cluster2"], right_on=["slide_submitter_id", "cluster_label"]) - prox_all_schc = prox_all_schc.rename( - columns={"cell_type": "cluster2_label"}) - - # Order doesn't matter: x <-> - prox_all_schc["pair"] = [f"{sorted([i, j])[0]}-{sorted([i, j])[1]}" for i, - j in prox_all_schc[["cluster1_label", "cluster2_label"]].to_numpy()] - prox_all_schc = prox_all_schc[((prox_all_schc.cluster1 == prox_all_schc.cluster2) & ( - prox_all_schc.cluster2_label != prox_all_schc.cluster1_label)) | (prox_all_schc.cluster1 != prox_all_schc.cluster2)] - - # slides = prox_all_schc[["MFP", "slide_submitter_id"]].drop_duplicates().to_numpy() - slide_submitter_ids = list(set(prox_all_schc.slide_submitter_id)) - - # Post Processing - results_schc_all = Parallel(n_jobs=NUM_CORES)(delayed(features.post_processing_proximity)( - prox_df=prox_all_schc, slide_submitter_id=id, method="all") for id in slide_submitter_ids) - all_prox_df = pd.concat(results_schc_all) - # Remove rows with a proximity of NaN - all_prox_df = all_prox_df.dropna(axis=0) - - ########################################################################## - # ---- Compute proximity features (individual cell type clustering) ---- # - ########################################################################## - - # Computing proximity for clusters derived for each cell type individually - # Between clusters - slide_submitter_ids = list(set(predictions.slide_submitter_id)) - results_schc_indiv = Parallel(n_jobs=NUM_CORES)(delayed(features.compute_proximity_clusters_pairs)(all_slide_indiv_clusters, slide_submitter_id=id, method="individual_between", - n_clusters=n_clusters, cell_types=cell_types, max_dist=max_dist, max_n_tiles_threshold=max_n_tiles_threshold, tile_size=tile_size, overlap=overlap) for id in slide_submitter_ids) - prox_indiv_schc = pd.concat(results_schc_indiv) - - # Formatting - prox_indiv_schc = pd.merge(prox_indiv_schc, slide_indiv_clusters_labeled, left_on=[ - "slide_submitter_id", "cluster1_label", "cluster1"], right_on=["slide_submitter_id", "cell_type_map", "cluster_label"]) - prox_indiv_schc = prox_indiv_schc.drop( - columns=["cell_type_map", "cluster_label"]) - prox_indiv_schc = prox_indiv_schc.rename( - columns={"is_high": "cluster1_is_high"}) - prox_indiv_schc = pd.merge(prox_indiv_schc, slide_indiv_clusters_labeled, left_on=[ - "slide_submitter_id", "cluster2_label", "cluster2"], right_on=["slide_submitter_id", "cell_type_map", "cluster_label"]) - prox_indiv_schc = prox_indiv_schc.rename( - columns={"is_high": "cluster2_is_high"}) - prox_indiv_schc = prox_indiv_schc.drop( - columns=["cell_type_map", "cluster_label"]) - - # Order matters - prox_indiv_schc["ordered_pair"] = [ - f"{i}-{j}" for i, j in prox_indiv_schc[["cluster1_label", "cluster2_label"]].to_numpy()] - prox_indiv_schc["comparison"] = [ - f"cluster1={i}-cluster2={j}" for i, j in prox_indiv_schc[["cluster1_is_high", "cluster2_is_high"]].to_numpy()] - - # Post-processing - slide_submitter_ids = list(set(predictions.slide_submitter_id)) - results_schc_indiv = pd.concat(Parallel(n_jobs=NUM_CORES)(delayed(features.post_processing_proximity)( - prox_df=prox_indiv_schc, slide_submitter_id=id, method="individual_between") for id in slide_submitter_ids)) - - # Within clusters - slide_submitter_ids = list(set(predictions.slide_submitter_id)) - results_schc_indiv_within = Parallel(n_jobs=NUM_CORES)(delayed(features.compute_proximity_clusters_pairs)(all_slide_indiv_clusters, slide_submitter_id=id, method="individual_within", - n_clusters=n_clusters, cell_types=cell_types, max_dist=max_dist, max_n_tiles_threshold=max_n_tiles_threshold, tile_size=tile_size, overlap=overlap,) for id in slide_submitter_ids) - prox_indiv_schc_within = pd.concat(results_schc_indiv_within) - - prox_indiv_schc_within = pd.merge(prox_indiv_schc_within, slide_indiv_clusters_labeled, left_on=[ - "slide_submitter_id", "cell_type", "cluster1"], right_on=["slide_submitter_id", "cell_type_map", "cluster_label"]) - prox_indiv_schc_within = prox_indiv_schc_within.drop( - columns=["cluster_label"]) - prox_indiv_schc_within = prox_indiv_schc_within.rename( - columns={"is_high": "cluster1_is_high", "cell_type_map": "cell_type_map1"}) - prox_indiv_schc_within = pd.merge(prox_indiv_schc_within, slide_indiv_clusters_labeled, left_on=[ - "slide_submitter_id", "cell_type", "cluster2"], right_on=["slide_submitter_id", "cell_type_map", "cluster_label"]) - prox_indiv_schc_within = prox_indiv_schc_within.rename( - columns={"is_high": "cluster2_is_high", "cell_type_map": "cell_type_map2"}) - prox_indiv_schc_within = prox_indiv_schc_within.drop( - columns=["cluster_label"]) - - # Order doesn't matter (only same cell type combinations) - prox_indiv_schc_within["pair"] = [ - f"{i}-{j}" for i, j in prox_indiv_schc_within[["cell_type_map1", "cell_type_map2"]].to_numpy()] - prox_indiv_schc_within["comparison"] = [ - f"cluster1={sorted([i,j])[0]}-cluster2={sorted([i,j])[1]}" for i, j in prox_indiv_schc_within[["cluster1_is_high", "cluster2_is_high"]].to_numpy()] - - # Post-processing - slide_submitter_ids = list(set(prox_indiv_schc_within.slide_submitter_id)) - results_schc_indiv_within = pd.concat(Parallel(n_jobs=NUM_CORES)(delayed(features.post_processing_proximity)( - prox_df=prox_indiv_schc_within, slide_submitter_id=id, method="individual_within") for id in slide_submitter_ids)) - - # Concatenate within and between computed proximity values - prox_indiv_schc_combined = pd.concat( - [results_schc_indiv_within, results_schc_indiv]) - - # Remove rows with a proximity of NaN - prox_indiv_schc_combined = prox_indiv_schc_combined.dropna(axis=0) - - #################################### - # ---- Compute shape features ---- # - #################################### - - # Compute shape features based on clustering with all cell types simultaneously - # slide_submitter_ids = list(set(predictions.slide_submitter_id)) - # all_slide_clusters_characterized = all_slide_clusters_characterized.rename(columns=dict(zip(cell_types, [f"is_{cell_type}_cluster" for cell_type in cell_types]))) - # tiles_all_schc = pd.merge(tiles_all_schc, all_slide_clusters_characterized, on=["slide_submitter_id", "cluster_label"]) - - # res = pd.concat(Parallel(n_jobs=NUM_CORES)(delayed(features.compute_shape_features)(tiles=tiles_all_schc, slide_submitter_id=id, tile_size=tile_size, overlap=overlap, cell_types=cell_types) for id in -# slide_submitter_ids)) - # res = res.drop(axis=1, labels=["cluster_label"]) - # shape_feature_means = res.groupby(["slide_submitter_id", "cell_type"]).mean().reset_index() - - ############################################## - # ---- Formatting all computed features ---- # - ############################################## - - frac_high_sub = frac_high[frac_high["is_high"]].copy() - frac_high_sub = frac_high_sub.drop( - columns=["is_high", "n_clusters", "n_total_clusters"]) - - frac_high_wide = frac_high_sub.pivot(index=["slide_submitter_id"], columns=[ - "cell_type_map"])["fraction"] - new_cols = [('fraction {0} clusters labeled high'.format(col)) - for col in frac_high_wide.columns] - frac_high_wide.columns = new_cols - frac_high_wide = frac_high_wide.sort_index(axis="columns").reset_index() - - num_clust_by_slide_sub = num_clust_by_slide.copy() - num_clust_by_slide_sub = num_clust_by_slide_sub.drop( - columns=["is_assigned", "n_clusters"]) - - num_clust_slide_wide = num_clust_by_slide_sub.pivot( - index=["slide_submitter_id"], columns=["cell_type"])["fraction"] - new_cols = [('fraction {0} clusters'.format(col)) - for col in num_clust_slide_wide.columns] - num_clust_slide_wide.columns = new_cols - num_clust_slide_wide = num_clust_slide_wide.sort_index( - axis="columns").reset_index() - - all_prox_df_wide = all_prox_df.pivot( - index=["slide_submitter_id"], columns=["pair"])["proximity"] - new_cols = [ - f'prox CC {col.replace("_", " ")} clusters' for col in all_prox_df_wide.columns] - all_prox_df_wide.columns = new_cols - all_prox_df_wide = all_prox_df_wide.reset_index() - - prox_indiv_schc_combined.comparison = prox_indiv_schc_combined.comparison.replace(dict(zip(['cluster1=True-cluster2=True', 'cluster1=True-cluster2=False', - 'cluster1=False-cluster2=True', 'cluster1=False-cluster2=False'], ["high-high", "high-low", "low-high", "low-low"]))) - prox_indiv_schc_combined["pair (comparison)"] = [ - f"{pair} ({comp})" for pair, comp in prox_indiv_schc_combined[["pair", "comparison"]].to_numpy()] - prox_indiv_schc_combined = prox_indiv_schc_combined.drop( - axis=1, labels=["pair", "comparison"]) - prox_indiv_schc_combined_wide = prox_indiv_schc_combined.pivot( - index=["slide_submitter_id"], columns=["pair (comparison)"])["proximity"] - new_cols = [ - f'prox CC {col.replace("_", " ")}' for col in prox_indiv_schc_combined_wide.columns] - prox_indiv_schc_combined_wide.columns = new_cols - prox_indiv_schc_combined_wide = prox_indiv_schc_combined_wide.reset_index() - - # shape_feature_means_wide = shape_feature_means.pivot(index=["slide_submitter_id"], columns="cell_type")[["solidity", "roundness"]] - # new_cols = [f'prox CC {col.replace("_", " ")}' for col in prox_indiv_schc_combined_wide.columns] - # shape_feature_means_wide.columns = [f"{i.capitalize()} {j}" for i, j in shape_feature_means_wide.columns] - # shape_feature_means_wide = shape_feature_means_wide.reset_index() - - # Store features - all_features = pd.merge(frac_high_wide, num_clust_slide_wide, on=[ - "slide_submitter_id"]) - all_features = pd.merge(all_features, all_prox_df_wide) - all_features = pd.merge(all_features, prox_indiv_schc_combined_wide) - # all_features = pd.merge(all_features, shape_feature_means_wide) - - # drop the predicted probabilities - tiles_all_schc = tiles_all_schc.drop(axis=1, columns=cell_types) - all_slide_indiv_clusters = all_slide_indiv_clusters.drop( - axis=1, columns=cell_types) # drop the predicted probabilities - - ################################ - # ---- Store all features ---- # - ################################ - - # tiles_all_schc (DataFrame): dataframe containing the metadata columns and the cluster_label (int) - # all_slide_clusters_characterized (DataFrame): dataframe containing the slide_submitter_id, and the the columns for the cell types filled with booleans (True if the cluster is assigned with that cell type) - # all_slide_indiv_clusters (DataFrame): dataframe containing the metadata columns and columns with to which cell type cluster the tile belongs to - # slide_indiv_clusters_labeled (DataFrame): dataframe containing the slide_submitter_id, cell_type_map, cluster_label (int), and is_high (abundance) - # all_prox_df (DataFrame): dataframe containing slide_submitter_id, pair, proximity - # prox_indiv_schc_combined (DataFrame): dataframe containing slide_submitter_id, comparison (high/low abundance label), pair (cell type pair) and proximity - # shape_features_mean (DataFrame): dataframe containing slide_submitter_id, cell_type, slide_submitter_id, solidity, roundness - tiles_all_schc.to_csv( - f"{output_dir}/{slide_type}_all_schc_tiles.csv", sep="\t", index=False) - all_slide_clusters_characterized.to_csv( - f"{output_dir}/{slide_type}_all_schc_clusters_labeled.csv", sep="\t", index=False) - all_slide_indiv_clusters.to_csv( - f"{output_dir}/{slide_type}_indiv_schc_tiles.csv", sep="\t", index=False) - slide_indiv_clusters_labeled.to_csv( - f"{output_dir}/{slide_type}_indiv_schc_clusters_labeled.csv", sep="\t", index=False) - all_prox_df.to_csv( - f"{output_dir}/{slide_type}_features_clust_all_schc_prox.csv", sep="\t", index=False) - prox_indiv_schc_combined.to_csv( - f"{output_dir}/{slide_type}_features_clust_indiv_schc_prox.csv", sep="\t", index=False) - # shape_feature_means.to_csv(f"{output_dir}/{slide_type}_features_clust_shapes.csv", sep="\t", index=False) - all_features.to_csv( - f"{output_dir}/{slide_type}_clustering_features.csv", sep="\t", index=False) - - -def post_processing(output_dir, slide_type="FF", metadata_path="", is_TCGA=False, merge_var="slide_submitter_id", sheet_name=None): - """ - Combine network and clustering features into a single file. If metadata_path is not None, add the metadata as well, based on variable slide_submitter_id - - Args: - output_dir (str): directory containing the graph and clustering features - slide_type (str): slide type to identify correct files for merging, either "FF" or "FFPE" (default="FF") - metadata_path (str): path to file containing metadata - is_TCGA (bool): whether data is from TCGA - merge_var (str): variable on which to merge (default: slide_submitter_id) - - """ - all_features_graph = pd.read_csv( - f"{output_dir}/{slide_type}_all_graph_features.csv", sep="\t") - all_features_clustering = pd.read_csv( - f"{output_dir}/{slide_type}_clustering_features.csv", sep="\t") - - all_features_combined = pd.merge( - all_features_graph, all_features_clustering) - - # Add additional identifiers for TCGA - if is_TCGA: - all_features_combined["TCGA_patient_ID"] = all_features_combined.slide_submitter_id.str[0:12] - all_features_combined["TCGA_sample_ID"] = all_features_combined.slide_submitter_id.str[0:15] - all_features_combined["sample_submitter_id"] = all_features_combined.slide_submitter_id.str[0:16] - - if path.isfile(metadata_path): - file_extension = metadata_path.split(".")[-1] - if (file_extension.startswith("xls")): - if sheet_name is None: - metadata = pd.read_excel(metadata_path) - elif (file_extension == "txt") or (file_extension == "csv"): - metadata = pd.read_csv(metadata_path, sep="\t") - all_features_combined = pd.merge( - all_features_combined, metadata, on=merge_var, how="left") - all_features_combined.to_csv( - f"{output_dir}/all_features_combined.csv", sep="\t", index=False) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Derive spatial features") - parser.add_argument("--workflow_mode", type=int, - help="Choose which steps to execute: all = 1, graph-based = 2, clustering-based = 3, combining features = 4 (default: 1)", default=1) - parser.add_argument("--tile_quantification_path", type=str, - help="Path to csv file with tile-level quantification (predictions)", required=True) - parser.add_argument("--output_dir", type=str, - help="Path to output folder to store generated files", required=True) - parser.add_argument("--slide_type", type=str, - help="Type of slides 'FFPE' or 'FF' used for naming generated files (default: 'FF')", default="FF") - parser.add_argument("--cell_types_path", type=str, - help="Path to file with list of cell types (default: CAFs, endothelial_cells, T_cells, tumor_purity)", default="") - parser.add_argument("--graphs_path", type=str, - help="Path to pkl with generated graphs in case this was done before (OPTIONAL) if not specified, graphs will be generated", default=None) - - parser.add_argument("--cutoff_path_length", type=int, - help="Max path length for proximity based on graphs", default=2, required=False) - parser.add_argument("--shapiro_alpha", type=float, - help="Choose significance level alpha (default: 0.05)", default=0.05, required=False) - parser.add_argument("--abundance_threshold", type=float, - help="Threshold for assigning cell types based on predicted probability (default: 0.5)", default=0.5, required=False) - - parser.add_argument("--n_clusters", type=int, - help="Number of clusters for SCHC (default: 8)", required=False, default=8) - parser.add_argument("--max_dist", type=int, - help="Maximum distance between clusters", required=False, default=None) - parser.add_argument("--max_n_tiles_threshold", type=int, - help="Number of tiles for computing max. distance between two points in two different clusters", default=2, required=False) - parser.add_argument("--tile_size", type=int, - help="Size of tile (default: 512)", default=512, required=False) - parser.add_argument("--overlap", type=int, - help="Overlap of tiles (default: 50)", default=50, required=False) - - parser.add_argument("--metadata_path", type=str, - help="Path to tab-separated file with metadata", default="") - parser.add_argument("--is_TCGA", type=bool, - help="dataset is from TCGA (default: True)", default=True, required=False) - parser.add_argument("--merge_var", type=str, - help="Variable to merge metadata and computed features on", default=None) - parser.add_argument("--sheet_name", type=str, - help="Name of sheet for merging in case a path to xls(x) file is given for metadata_path", default=None) - - args = parser.parse_args() - - # Common variables - workflow_mode = args.workflow_mode - tile_quantification_path = args.tile_quantification_path - slide_type = args.slide_type - cell_types_path = args.cell_types_path - graphs_path = args.graphs_path - output_dir = args.output_dir - - # Variables for network/graph based features - cutoff_path_length = args.cutoff_path_length - shapiro_alpha = args.shapiro_alpha - abundance_threshold = args.abundance_threshold - - # Variables clustering - n_clusters = args.n_clusters - max_dist = args.max_dist - max_n_tiles_threshold = args.max_n_tiles_threshold - tile_size = args.tile_size - overlap = args.overlap - - # Variables post-processing - metadata_path = args.metadata_path - merge_var = args.merge_var - sheet_name = args.sheet_name - - # FIX create output directory for saving - full_output_dir = f"{output_dir}" - print(full_output_dir) - if not os.path.isdir(full_output_dir): - os.makedirs(full_output_dir) - - if path.isfile(cell_types_path): - cell_types = pd.read_csv( - cell_types_path, header=None).to_numpy().flatten() - else: - cell_types = DEFAULT_CELL_TYPES - - if (workflow_mode in [1, 2, 3]) & (graphs_path is None): - predictions = pd.read_csv(tile_quantification_path, sep="\t") - slide_submitter_ids = list(set(predictions.slide_submitter_id)) - results = Parallel(n_jobs=NUM_CORES)( - delayed(graphs.construct_graph)( - predictions=predictions, slide_submitter_id=id) - for id in slide_submitter_ids - ) - # Extract/format graphs - all_graphs = { - list(slide_graph.keys())[0]: list(slide_graph.values())[0] - for slide_graph in results - } - joblib.dump( - all_graphs, f"{output_dir}/{slide_type}_graphs.pkl") - - graphs_path = f"{output_dir}/{slide_type}_graphs.pkl" - - if workflow_mode == 1: - print("Workflow mode: all steps") - - print("Compute network features...") - compute_network_features( - tile_quantification_path=tile_quantification_path, - output_dir=output_dir, - slide_type=slide_type, - cell_types=cell_types, - graphs_path=graphs_path, cutoff_path_length=cutoff_path_length, shapiro_alpha=shapiro_alpha, abundance_threshold=abundance_threshold) - - print("Compute clustering features...") - compute_clustering_features( - tile_quantification_path=tile_quantification_path, - output_dir=output_dir, - slide_type=slide_type, - cell_types=cell_types, - graphs_path=graphs_path, n_clusters=n_clusters, max_dist=max_dist, max_n_tiles_threshold=max_n_tiles_threshold, tile_size=tile_size, overlap=overlap) - - print("Post-processing: combining all features") - post_processing(output_dir=output_dir, slide_type=slide_type, metadata_path=metadata_path, - is_TCGA=False, merge_var=merge_var, sheet_name=sheet_name) - - print("Finished with all steps.") - elif workflow_mode == 2: - print("Compute network features...") - compute_network_features( - tile_quantification_path=tile_quantification_path, - output_dir=output_dir, - slide_type=slide_type, - cell_types=cell_types, - graphs_path=graphs_path, cutoff_path_length=cutoff_path_length, shapiro_alpha=shapiro_alpha, abundance_threshold=abundance_threshold) - print("Finished.") - elif workflow_mode == 3: - print("Compute clustering features...") - compute_clustering_features( - tile_quantification_path=tile_quantification_path, - output_dir=output_dir, - slide_type=slide_type, - cell_types=cell_types, - graphs_path=graphs_path, n_clusters=n_clusters, max_dist=max_dist, max_n_tiles_threshold=max_n_tiles_threshold, tile_size=tile_size, overlap=overlap) - print("Finished.") - - elif workflow_mode == 4: - print("Post-processing: combining all features") - - post_processing(output_dir=output_dir, slide_type=slide_type, metadata_path=metadata_path, - is_TCGA=False, merge_var=merge_var, sheet_name=sheet_name) - print("Finished.") - else: - raise Exception( - "Invalid workflow mode, please choose one of the following (int): all = 1, graph-based = 2, clustering-based = 3, combining features = 4 (default: 1)") diff --git a/README.md b/README.md index cde4613..690f618 100644 --- a/README.md +++ b/README.md @@ -16,92 +16,34 @@ See also the figures below. ## Run SPoTLIghT -1. Build the docker image as follows: +1. Pull the Docker container: ```bash -docker build -t run_spotlight:vfinal . --platform linux/amd64 +docker pull joank23/spotlight:latest ``` Alternatively you can use Singularity/Apptainer (HPCs): ```bash # 1. save docker as tar or tar.gz (compressed) -docker save run_spotlight:vfinal -o {output_dir}/spotlight_docker.tar.gz +docker save joank23/spotlight -o spotlight.tar.gz # 2. build apptainer (.sif) from docker (.tar) -apptainer build {output_dir}/spotlight_apptainer.sif docker-archive:spotlight_docker.tar.gz +apptainer build spotlight.sif docker-archive:spotlight.tar.gz ``` -2. Add your FF histopathology slides to a subdirectory in the `spotlight_docker` directory, e.g. `data_example/images`. * Please rename your images file names, so they only include "-", to follow the same sample coding used by the TCGA. +> Please rename your images file names, so they only include "-", to follow the same sample coding used by the TCGA. -3. Download retrained models to extract the histopathological features, available from Fu et al., Nat Cancer, 2020 ([Retrained_Inception_v4](https://www.ebi.ac.uk/biostudies/bioimages/studies/S-BSST292)). +1. Download retrained models to extract the histopathological features, available from Fu et al., Nat Cancer, 2020 ([Retrained_Inception_v4](https://www.ebi.ac.uk/biostudies/bioimages/studies/S-BSST292)). Once you unzip the folder, extract the files to the `data/checkpoint/Retrained_Inception_v4/` folder. - -4. If a TCGA dataset is used, please download metadata (i.e. "biospecimen -> TSV", unzip and keep slide.tsv), then rename `slide.tsv` to `clinical_file_TCGA_{cancer_type_abbrev}` such as `clinical_file_TCGA_SKCM.tsv` and copy to `/data`. Example dataset TCGA-SKCM can be downloaded [here](https://portal.gdc.cancer.gov/projects/TCGA-SKCM). For non-TCGA datasets, please omit this step. - -5. Setup your paths and variables in `run_pipeline.sh` +2. If a TCGA dataset is used, please download metadata (i.e. "biospecimen -> TSV", unzip and keep slide.tsv), then rename `slide.tsv` to `clinical_file_TCGA_{cancer_type_abbrev}` such as `clinical_file_TCGA_SKCM.tsv` and copy to `/data`. Example dataset TCGA-SKCM can be downloaded [here](https://portal.gdc.cancer.gov/projects/TCGA-SKCM). For non-TCGA datasets, please omit this step. +3. Setup your paths and variables in `run_pipeline.sh` +4. Set a config ensuring compatibility with available resources, you can use `custom.config` as a template. (see `nextflow.config` for all parameters, if a parameter is 'assets/NO_FILE' or 'dummy', they are optional parameters, if not used please leave as is) +5. Run the Nextflow Pipeline as follows: ```bash -# Directory 'spotlight_docker' - -work_dir="/path/to/spotlight_docker" -spotlight_sif="path/to/spotlight_sif" - -# Define directories/files in container (mounted) - -folder_images="/path/to/images_dir" -output_dir="/path/to/output_dir" - -# Relative to docker, i.e. start with /data - -checkpoint="/data/checkpoint/Retrained_Inception_v4/model.ckpt-100000" -# TCGA H&E images -# clinical_files_dir="/data/path/to/clinical/TCGA/file.tsv" -# Non-TCGA H&E images -clinical_files_dir=NULL # Non-TCGA files - -# Remaining parameters (this configuration has been tested) -slide_type="FF" -tumor_purity_threshold=80 -class_names="SKCM_T" -model_name="inception_v4" - -# Indicate whether the slides arise from the TCGA project -is_tcga=false +nextflow run . -profile apptainer -c "${your_config_file}" -```` -6. Run the pipeline by executing `run_pipeline.sh` - -```` -echo "Create output directory: ${output_dir}..." -mkdir -p ${output_dir} - -echo "Binding directories..." -export APPTAINER_BINDPATH=${work_dir}/data/:/project/data:ro,${folder_images}:/project/images:ro,${output_dir}:/project/output:rw,${work_dir}/run_scripts:/project/run_scripts:ro,${work_dir}/Python:/project/Python:ro - -echo "Run pipeline..." -echo "Extract histopathological features (1 out of 3)" -apptainer exec \ - --cleanenv \ - -c \ - ${spotlight_sif} \ - bash "/project/run_scripts/1_extract_histopatho_features.sh" ${checkpoint} ${clinical_files_dir} ${slide_type} ${class_names} ${tumor_purity_threshold} ${model_name} ${is_tcga} - -echo "Tile level cell type quanitification (2 out of 3)" -apptainer exec \ - --cleanenv \ - -c \ - ${spotlight_sif} \ - bash "/project/run_scripts/2_tile_level_cell_type_quantification.sh" $slide_type - -echo "Compute spatial features (3 out of 3)" -apptainer exec \ - --cleanenv \ - -c \ - ${spotlight_sif} \ - bash "/project/run_scripts/3_compute_spatial_features.sh" ${slide_type} - -echo "COMPLETED!" ```` ## Output documentation @@ -109,42 +51,63 @@ echo "COMPLETED!" SPoTLIghT generates the following output directory structure: ```bash -{output_dir} -├── 1_histopathological_features -│ ├── bot_train.txt -│ ├── features.txt +{outdir} +├── 1_extract_histopatho_features +│ ├── avail_slides_for_img.csv +│ ├── features-0.parquet │ ├── file_info_train.txt -│ ├── final_clinical_file.txt │ ├── generated_clinical_file.txt -│ ├── pred_train.txt -│ ├── predictions.txt +│ ├── ok.txt +│ ├── predictions-0.parquet │ ├── process_train │ │ ├── images_train_00001-of-00320.tfrecord -│ │ └── images_train_00002-of-00320.tfrecord +│ │ ├── images_train_00002-of-00320.tfrecord +│ │ ├── images_train_00004-of-00320.tfrecord │ └── tiles -│ ├── TCGA-EB-A3XC-01Z-00-DX1_2773_15709.jpg -│ └── TCGA-EE-A3JE-01Z-00-DX1_25873_12013.jpg +│ ├── xenium-skin-panel_10165_10165.jpg +│ ├── xenium-skin-panel_10165_10627.jpg +│ ├── xenium-skin-panel_10165_11089.jpg ├── 2_tile_level_quantification │ ├── test_tile_predictions_proba.csv │ └── test_tile_predictions_zscores.csv ├── 3_spatial_features -│ ├── FF_all_graph_features.csv -│ ├── FF_all_schc_clusters_labeled.csv -│ ├── FF_all_schc_tiles.csv -│ ├── FF_clustering_features.csv -│ ├── FF_features_ND.csv -│ ├── FF_features_ND_ES.csv -│ ├── FF_features_ND_sim_assignments.pkl -│ ├── FF_features_ND_sims.csv -│ ├── FF_features_clust_all_schc_prox.csv -│ ├── FF_features_clust_indiv_schc_prox.csv -│ ├── FF_features_coloc_fraction.csv -│ ├── FF_features_lcc_fraction.csv -│ ├── FF_features_shortest_paths_thresholded.csv -│ ├── FF_graphs.pkl -│ ├── FF_indiv_schc_clusters_labeled.csv -│ ├── FF_indiv_schc_tiles.csv -│ ├── FF_shapiro_tests.csv -│ └── all_features_combined.csv -└── list_images.txt +│ ├── clustering_features +│ │ ├── FFPE_all_schc_clusters_labeled.csv +│ │ ├── FFPE_all_schc_tiles.csv +│ │ ├── FFPE_all_schc_tiles_raw.csv +│ │ ├── FFPE_features_clust_all_schc_prox_wide.csv +│ │ ├── FFPE_features_clust_indiv_schc_prox_between.csv +│ │ ├── FFPE_features_clust_indiv_schc_prox.csv +│ │ ├── FFPE_features_clust_indiv_schc_prox_within.csv +│ │ ├── FFPE_frac_high_wide.csv +│ │ ├── FFPE_graphs.pkl +│ │ ├── FFPE_indiv_schc_clusters_labeled.csv +│ │ ├── FFPE_indiv_schc_tiles.csv +│ │ ├── FFPE_indiv_schc_tiles_raw.csv +│ │ └── FFPE_nclusters_wide.csv +│ ├── FFPE_all_features_combined.csv +│ ├── FFPE_all_graph_features.csv +│ ├── FFPE_clustering_features.csv +│ ├── FFPE_graphs.pkl +│ └── network_features +│ ├── FFPE_features_coloc_fraction.csv +│ ├── FFPE_features_coloc_fraction_wide.csv +│ ├── FFPE_features_lcc_fraction_wide.csv +│ ├── FFPE_features_ND.csv +│ ├── FFPE_features_ND_ES.csv +│ ├── FFPE_features_ND_sim_assignments.pkl +│ ├── FFPE_features_ND_sims.csv +│ ├── FFPE_features_shortest_paths_thresholded.csv +│ ├── FFPE_features_shortest_paths_thresholded_wide.csv +│ ├── FFPE_graphs.pkl +│ └── FFPE_shapiro_tests.csv +├── bottleneck +│ ├── bot_train.txt +│ ├── ok.txt +│ └── pred_train.txt +└── pipeline_info + ├── execution_report_2024-09-23_21-07-41.html + ├── execution_timeline_2024-09-23_21-07-41.html + ├── execution_trace_2024-09-23_21-07-41.txt + └── pipeline_dag_2024-09-23_21-07-41.html ``` diff --git a/Python/libs/DL/__init__.py b/assets/NO_FILE old mode 100644 new mode 100755 similarity index 100% rename from Python/libs/DL/__init__.py rename to assets/NO_FILE diff --git a/data/TF_models/SKCM_FF/CAFs/outer_models.pkl b/assets/TF_models/SKCM_FF/CAFs/outer_models.pkl old mode 100644 new mode 100755 similarity index 100% rename from data/TF_models/SKCM_FF/CAFs/outer_models.pkl rename to assets/TF_models/SKCM_FF/CAFs/outer_models.pkl diff --git a/data/TF_models/SKCM_FF/CAFs/x_train_scaler.pkl b/assets/TF_models/SKCM_FF/CAFs/x_train_scaler.pkl old mode 100644 new mode 100755 similarity index 100% rename from data/TF_models/SKCM_FF/CAFs/x_train_scaler.pkl rename to assets/TF_models/SKCM_FF/CAFs/x_train_scaler.pkl diff --git a/data/TF_models/SKCM_FF/T_cells/outer_models.pkl b/assets/TF_models/SKCM_FF/T_cells/outer_models.pkl old mode 100644 new mode 100755 similarity index 100% rename from data/TF_models/SKCM_FF/T_cells/outer_models.pkl rename to assets/TF_models/SKCM_FF/T_cells/outer_models.pkl diff --git a/data/TF_models/SKCM_FF/T_cells/x_train_scaler.pkl b/assets/TF_models/SKCM_FF/T_cells/x_train_scaler.pkl old mode 100644 new mode 100755 similarity index 100% rename from data/TF_models/SKCM_FF/T_cells/x_train_scaler.pkl rename to assets/TF_models/SKCM_FF/T_cells/x_train_scaler.pkl diff --git a/data/TF_models/SKCM_FF/endothelial_cells/outer_models.pkl b/assets/TF_models/SKCM_FF/endothelial_cells/outer_models.pkl old mode 100644 new mode 100755 similarity index 100% rename from data/TF_models/SKCM_FF/endothelial_cells/outer_models.pkl rename to assets/TF_models/SKCM_FF/endothelial_cells/outer_models.pkl diff --git a/data/TF_models/SKCM_FF/endothelial_cells/x_train_scaler.pkl b/assets/TF_models/SKCM_FF/endothelial_cells/x_train_scaler.pkl old mode 100644 new mode 100755 similarity index 100% rename from data/TF_models/SKCM_FF/endothelial_cells/x_train_scaler.pkl rename to assets/TF_models/SKCM_FF/endothelial_cells/x_train_scaler.pkl diff --git a/data/TF_models/SKCM_FF/tumor_purity/outer_models.pkl b/assets/TF_models/SKCM_FF/tumor_purity/outer_models.pkl old mode 100644 new mode 100755 similarity index 100% rename from data/TF_models/SKCM_FF/tumor_purity/outer_models.pkl rename to assets/TF_models/SKCM_FF/tumor_purity/outer_models.pkl diff --git a/data/TF_models/SKCM_FF/tumor_purity/x_train_scaler.pkl b/assets/TF_models/SKCM_FF/tumor_purity/x_train_scaler.pkl old mode 100644 new mode 100755 similarity index 100% rename from data/TF_models/SKCM_FF/tumor_purity/x_train_scaler.pkl rename to assets/TF_models/SKCM_FF/tumor_purity/x_train_scaler.pkl diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100755 index 0000000..e8b1a9f --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/spotlight v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/Python/2_train_multitask_models/cell_types.txt b/assets/cell_types.txt old mode 100644 new mode 100755 similarity index 100% rename from Python/2_train_multitask_models/cell_types.txt rename to assets/cell_types.txt diff --git a/assets/email_template.html b/assets/email_template.html new file mode 100755 index 0000000..a1c2f76 --- /dev/null +++ b/assets/email_template.html @@ -0,0 +1,53 @@ + + + + + + + + nf-core/spotlight Pipeline Report + + +
+ + + +

nf-core/spotlight ${version}

+

Run Name: $runName

+ +<% if (!success){ + out << """ +
+

nf-core/spotlight execution completed unsuccessfully!

+

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

+

The full error message was:

+
${errorReport}
+
+ """ +} else { + out << """ +
+ nf-core/spotlight execution completed successfully! +
+ """ +} +%> + +

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
$commandLine
+ +

Pipeline Configuration:

+ + + <% out << summary.collect{ k,v -> "" }.join("\n") %> + +
$k
$v
+ +

nf-core/spotlight

+

https://github.com/nf-core/spotlight

+ +
+ + + diff --git a/assets/email_template.txt b/assets/email_template.txt new file mode 100755 index 0000000..dfac538 --- /dev/null +++ b/assets/email_template.txt @@ -0,0 +1,39 @@ +---------------------------------------------------- + ,--./,-. + ___ __ __ __ ___ /,-._.--~\\ + |\\ | |__ __ / ` / \\ |__) |__ } { + | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, + `._,._,' + nf-core/spotlight ${version} +---------------------------------------------------- +Run Name: $runName + +<% if (success){ + out << "## nf-core/spotlight execution completed successfully! ##" +} else { + out << """#################################################### +## nf-core/spotlight execution completed unsuccessfully! ## +#################################################### +The exit status of the task that caused the workflow execution to fail was: $exitStatus. +The full error message was: + +${errorReport} +""" +} %> + + +The workflow was completed at $dateComplete (duration: $duration) + +The command used to launch the workflow was as follows: + + $commandLine + + + +Pipeline Configuration: +----------------------- +<% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %> + +-- +nf-core/spotlight +https://github.com/nf-core/spotlight diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100755 index 0000000..3bc853e --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,29 @@ +id: "nf-core-spotlight-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/spotlight Methods Description" +section_href: "https://github.com/nf-core/spotlight" +plot_type: "html" +## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline +## You inject any metadata in the Nextflow '${workflow}' object +data: | +

Methods

+

Data was processed using nf-core/spotlight v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

${tool_citations}

+

References

+
    +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography} +
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml new file mode 100755 index 0000000..81edd73 --- /dev/null +++ b/assets/multiqc_config.yml @@ -0,0 +1,15 @@ +report_comment: > + This report has been generated by the nf-core/spotlight + analysis pipeline. For information about how to interpret these results, please see the + documentation. +report_section_order: + "nf-core-spotlight-methods-description": + order: -1000 + software_versions: + order: -1001 + "nf-core-spotlight-summary": + order: -1002 + +export_plots: true + +disable_version_detection: true diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv new file mode 100755 index 0000000..5f653ab --- /dev/null +++ b/assets/samplesheet.csv @@ -0,0 +1,3 @@ +sample,fastq_1,fastq_2 +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, diff --git a/assets/schema_input.json b/assets/schema_input.json new file mode 100755 index 0000000..772ae5d --- /dev/null +++ b/assets/schema_input.json @@ -0,0 +1,33 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/spotlight/master/assets/schema_input.json", + "title": "nf-core/spotlight pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "sample": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] + }, + "fastq_1": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "fastq_2": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + } + }, + "required": ["sample", "fastq_1"] + } +} diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt new file mode 100755 index 0000000..a7c115c --- /dev/null +++ b/assets/sendmail_template.txt @@ -0,0 +1,53 @@ +To: $email +Subject: $subject +Mime-Version: 1.0 +Content-Type: multipart/related;boundary="nfcoremimeboundary" + +--nfcoremimeboundary +Content-Type: text/html; charset=utf-8 + +$email_html + +--nfcoremimeboundary +Content-Type: image/png;name="nf-core-spotlight_logo.png" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: inline; filename="nf-core-spotlight_logo_light.png" + +<% out << new File("$projectDir/assets/nf-core-spotlight_logo_light.png"). + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' ) %> + +<% +if (mqcFile){ +def mqcFileObj = new File("$mqcFile") +if (mqcFileObj.length() < mqcMaxSize){ +out << """ +--nfcoremimeboundary +Content-Type: text/html; name=\"multiqc_report\" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" + +${mqcFileObj. + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' )} +""" +}} +%> + +--nfcoremimeboundary-- diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100755 index 0000000..9d76280 --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "nf-core/spotlight ${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/Python/2_train_multitask_models/task_selection_names.pkl b/assets/task_selection_names.pkl old mode 100644 new mode 100755 similarity index 100% rename from Python/2_train_multitask_models/task_selection_names.pkl rename to assets/task_selection_names.pkl diff --git a/Python/1_extract_histopathological_features/tissue_classes.csv b/assets/tissue_classes.csv old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/tissue_classes.csv rename to assets/tissue_classes.csv diff --git a/data/tmp_clinical_file.txt b/assets/tmp_clinical_file.txt similarity index 100% rename from data/tmp_clinical_file.txt rename to assets/tmp_clinical_file.txt diff --git a/Python/1_extract_histopathological_features/myslim/bottleneck_predict.py b/bin/bottleneck_predict.py old mode 100644 new mode 100755 similarity index 85% rename from Python/1_extract_histopathological_features/myslim/bottleneck_predict.py rename to bin/bottleneck_predict.py index ae46b59..35e847c --- a/Python/1_extract_histopathological_features/myslim/bottleneck_predict.py +++ b/bin/bottleneck_predict.py @@ -6,19 +6,15 @@ sys.path.append(os.getcwd()) -# trunk-ignore(flake8/E402) import tf_slim as slim - -# trunk-ignore(flake8/E402) from nets import nets_factory - -# trunk-ignore(flake8/E402) from preprocessing import preprocessing_factory tf.compat.v1.disable_eager_execution() tf.app.flags.DEFINE_integer("num_classes", 42, "The number of classes.") -tf.app.flags.DEFINE_string("bot_out", None, "Output file for bottleneck features.") +tf.app.flags.DEFINE_string( + "bot_out", None, "Output file for bottleneck features.") tf.app.flags.DEFINE_string("pred_out", None, "Output file for predictions.") tf.app.flags.DEFINE_string( "model_name", "inception_v4", "The name of the architecture to evaluate.") @@ -26,8 +22,10 @@ "checkpoint_path", None, "The directory where the model was written to.") tf.app.flags.DEFINE_integer("eval_image_size", 299, "Eval image size.") tf.app.flags.DEFINE_string("file_dir", "../Output/process_train/", "") + FLAGS = tf.app.flags.FLAGS + def main(_): model_name_to_variables = { "inception_v3": "InceptionV3", @@ -37,13 +35,16 @@ def main(_): "inception_v4": "InceptionV4/Logits/AvgPool_1a/AvgPool:0", "inception_v3": "InceptionV3/Logits/AvgPool_1a_8x8/AvgPool:0", } - bottleneck_tensor_name = model_name_to_bottleneck_tensor_name.get(FLAGS.model_name) + bottleneck_tensor_name = model_name_to_bottleneck_tensor_name.get( + FLAGS.model_name) preprocessing_name = FLAGS.model_name eval_image_size = FLAGS.eval_image_size model_variables = model_name_to_variables.get(FLAGS.model_name) if model_variables is None: - tf.logging.error("Unknown model_name provided `%s`." % FLAGS.model_name) + tf.logging.error("Unknown model_name provided `%s`." % + FLAGS.model_name) sys.exit(-1) + # Either specify a checkpoint_path directly or find the path if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) print(checkpoint_path) @@ -62,7 +63,8 @@ def main(_): network_fn = nets_factory.get_network_fn( FLAGS.model_name, FLAGS.num_classes, is_training=False ) - processed_image = image_preprocessing_fn(image, eval_image_size, eval_image_size) + processed_image = image_preprocessing_fn( + image, eval_image_size, eval_image_size) processed_images = tf.expand_dims(processed_image, 0) logits, _ = network_fn(processed_images) @@ -72,14 +74,15 @@ def main(_): ) print(FLAGS.bot_out) - + sess = tf.Session() init_fn(sess) fto_bot = open(FLAGS.bot_out, "w") fto_pred = open(FLAGS.pred_out, "w") - filelist = os.listdir(FLAGS.file_dir) + filelist = [file_path for file_path in os.listdir( + FLAGS.file_dir) if (file_path.startswith("images_train") & file_path.endswith(".tfrecord"))] for i in range(len(filelist)): file = filelist[i] fls = tf.python_io.tf_record_iterator(FLAGS.file_dir + "/" + file) @@ -97,7 +100,8 @@ def main(_): example.features.feature["image/class/label"].int64_list.value[0] ) preds = sess.run(probabilities, feed_dict={image_string: x}) - bottleneck_values = sess.run(bottleneck_tensor_name, {image_string: x}) + bottleneck_values = sess.run( + bottleneck_tensor_name, {image_string: x}) fto_pred.write(filenames + "\t" + label) fto_bot.write(filenames + "\t" + label) for p in range(len(preds[0])): diff --git a/bin/clustering_schc_individual.py b/bin/clustering_schc_individual.py new file mode 100755 index 0000000..a7f1ba5 --- /dev/null +++ b/bin/clustering_schc_individual.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +import os +import joblib +import pandas as pd +from joblib import Parallel, delayed +import argparse + +# Own modules +import features.clustering as clustering +import features.graphs as graphs +from model.constants import DEFAULT_CELL_TYPES + +import multiprocessing + +from argparse import ArgumentParser as AP +from os.path import abspath +import time +from pathlib import Path + + +def get_args(): + # Script description + description = """Spatial Clustering with SCHC (individual)""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--tile_quantification_path", type=str, + help="Path to csv file with tile-level quantification (predictions)", required=True) + parser.add_argument("--output_dir", type=str, + help="Path to output folder to store generated files", required=False, default = "") + parser.add_argument("--slide_type", type=str, + help="Type of slides 'FFPE' or 'FF' used for naming generated files (default: 'FF')", default="FF") + parser.add_argument("--cell_types_path", type=str, + help="Path to file with list of cell types (default: CAFs, endothelial_cells, T_cells, tumor_purity)", default=None) + parser.add_argument("--graphs_path", type=str, + help="Path to pkl with generated graphs in case this was done before (OPTIONAL) if not specified, graphs will be generated", default=None) + parser.add_argument("--prefix", type=str, + help="Prefix for output file", default="") + parser.add_argument("--cutoff_path_length", type=int, + help="Max path length for proximity based on graphs", default=2, required=False) + parser.add_argument("--shapiro_alpha", type=float, + help="Choose significance level alpha (default: 0.05)", default=0.05, required=False) + parser.add_argument("--abundance_threshold", type=float, + help="Threshold for assigning cell types based on predicted probability (default: 0.5)", default=0.5, required=False) + + parser.add_argument("--n_clusters", type=int, + help="Number of clusters for SCHC (default: 8)", required=False, default=8) + parser.add_argument("--n_cores", type = int, help = "Number of cores to use (parallelization)") + + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + # Create an empty folder for TF records if folder doesn't exist + arg.output_dir = Path(arg.output_dir,"process_train") + os.mkdir(arg.output_dir) + + if arg.n_cores is None: + arg.n_cores = multiprocessing.cpu_count() + return arg + + +def clustering_schc_individual( + tile_quantification_path, + cell_types=None, graphs_path=None, n_cores = multiprocessing.cpu_count): + + if cell_types is None: + cell_types = DEFAULT_CELL_TYPES + + predictions = pd.read_csv(tile_quantification_path, sep="\t") + slide_submitter_ids = list(set(predictions.slide_submitter_id)) + + ##################################### + # ---- Constructing the graphs ---- # + ##################################### + + if graphs_path is None: + results = Parallel(n_jobs=n_cores)( + delayed(graphs.construct_graph)( + predictions=predictions, slide_submitter_id=id) + for id in slide_submitter_ids + ) + # Extract/format graphs + all_graphs = { + list(slide_graph.keys())[0]: list(slide_graph.values())[0] + for slide_graph in results + } + + else: + all_graphs = joblib.load(graphs_path) + + ###################################################################################### + # ---- Fraction of highly abundant cell types (individual cell type clustering) ---- # + ###################################################################################### + + # Spatially Hierarchical Constrained Clustering with all quantification of all cell types for each individual cell type + slide_indiv_clusters = Parallel(n_jobs=n_cores)(delayed(clustering.schc_individual)( + predictions, all_graphs[id], id) for id in slide_submitter_ids) + all_slide_indiv_clusters = pd.concat(slide_indiv_clusters, axis=0) + + # Add metadata + all_slide_indiv_clusters = pd.merge( + predictions, all_slide_indiv_clusters, on="tile_ID") + + # Add abundance label 'high' or 'low' based on cluster means + slide_indiv_clusters_labeled = clustering.label_cell_type_map_clusters( + all_slide_indiv_clusters) + + all_slide_indiv_clusters_final = all_slide_indiv_clusters.drop( + axis=1, columns=cell_types) # drop the predicted probabilities + + + return all_slide_indiv_clusters, slide_indiv_clusters_labeled, all_slide_indiv_clusters_final, all_graphs + + +def main(args): + all_slide_indiv_clusters, slide_indiv_clusters_labeled, all_slide_indiv_clusters_final,all_graphs = clustering_schc_individual( + tile_quantification_path = args.tile_quantification_path, + cell_types=args.cell_types_path, + graphs_path=args.graphs_path, + n_cores = args.n_cores) + + all_slide_indiv_clusters.to_csv( + Path(args.output_dir, f"{args.prefix}_indiv_schc_tiles_raw.csv", index = False)) + + all_slide_indiv_clusters_final.to_csv(Path(args.output_dir, f"{args.prefix}_indiv_schc_tiles.csv"), index = False) + + slide_indiv_clusters_labeled.to_csv( + Path(args.output_dir,f"{args.prefix}_indiv_schc_clusters_labeled.csv", index= False )) + + if (args.graphs_path is None): + joblib.dump(all_graphs, + Path(args.output_dir, + f"{args.prefix}_graphs.pkl")) + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/clustering_schc_simultaneous.py b/bin/clustering_schc_simultaneous.py new file mode 100755 index 0000000..0f9f3a1 --- /dev/null +++ b/bin/clustering_schc_simultaneous.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +import os +import joblib +import pandas as pd +from joblib import Parallel, delayed +import argparse + +# Own modules +import features.clustering as clustering +import features.graphs as graphs +from model.constants import DEFAULT_CELL_TYPES +import multiprocessing + +from argparse import ArgumentParser as AP +from os.path import abspath +import time +from pathlib import Path + +def get_args(): + # Script description + description = """Compute Spatial Network Features: Compute Connectedness""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--tile_quantification_path", type=str, + help="Path to csv file with tile-level quantification (predictions)", required=True) + parser.add_argument("--output_dir", type=str, + help="Path to output folder to store generated files", required=False, default = "") + parser.add_argument("--slide_type", type=str, + help="Type of slides 'FFPE' or 'FF' used for naming generated files (default: 'FF')", default="FF") + parser.add_argument("--cell_types_path", type=str, + help="Path to file with list of cell types (default: CAFs, endothelial_cells, T_cells, tumor_purity)", default=None) + parser.add_argument("--graphs_path", type=str, + help="Path to pkl with generated graphs in case this was done before (OPTIONAL) if not specified, graphs will be generated", default=None) + parser.add_argument("--prefix", type=str, + help="Prefix for output file", default="") + parser.add_argument("--cutoff_path_length", type=int, + help="Max path length for proximity based on graphs", default=2, required=False) + parser.add_argument("--shapiro_alpha", type=float, + help="Choose significance level alpha (default: 0.05)", default=0.05, required=False) + parser.add_argument("--abundance_threshold", type=float, + help="Threshold for assigning cell types based on predicted probability (default: 0.5)", default=0.5, required=False) + + parser.add_argument("--n_clusters", type=int, + help="Number of clusters for SCHC (default: 8)", required=False, default=8) + parser.add_argument("--n_cores", type = int, help = "Number of cores to use (parallelization)") + + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + # Create an empty folder for TF records if folder doesn't exist + arg.output_dir = Path(arg.output_dir,"process_train") + os.mkdir(arg.output_dir) + + if arg.n_cores is None: + arg.n_cores = multiprocessing.cpu_count() + return arg + + +def clustering_schc_simultaneous( + tile_quantification_path, cell_types=None, graphs_path=None, n_cores = multiprocessing.cpu_count()): + + if cell_types is None: + cell_types = DEFAULT_CELL_TYPES + print(cell_types) + + predictions = pd.read_csv(tile_quantification_path, sep="\t") + slide_submitter_ids = list(set(predictions.slide_submitter_id)) + + ##################################### + # ---- Constructing the graphs ---- # + ##################################### + + if graphs_path is None: + results = Parallel(n_jobs=n_cores)( + delayed(graphs.construct_graph)( + predictions=predictions, slide_submitter_id=id) + for id in slide_submitter_ids + ) + # Extract/format graphs + all_graphs = { + list(slide_graph.keys())[0]: list(slide_graph.values())[0] + for slide_graph in results + } + else: + all_graphs = joblib.load(graphs_path) + + ###################################################################### + # ---- Fraction of cell type clusters (simultaneous clustering) ---- # + ###################################################################### + + # Spatially Hierarchical Constrained Clustering with all quantification of all cell types + slide_clusters = Parallel(n_jobs=n_cores)(delayed(clustering.schc_all)( + predictions, all_graphs[id], id) for id in slide_submitter_ids) + # Combine the tiles labeled with their cluster id for all slides + tiles_all_schc = pd.concat(slide_clusters, axis=0) + + # Assign a cell type label based on the mean of all cluster means across all slides + all_slide_clusters_characterized = clustering.characterize_clusters( + tiles_all_schc) + + formatted_tiles_all_schc = tiles_all_schc.drop(axis=1, columns=cell_types) + # drop the predicted probabilities + return tiles_all_schc, all_slide_clusters_characterized, formatted_tiles_all_schc, all_graphs + + +def main(args): + tiles_all_schc, all_slide_clusters_characterized,formatted_tiles_all_schc, all_graphs = clustering_schc_simultaneous( + tile_quantification_path = args.tile_quantification_path, + cell_types=args.cell_types_path, + graphs_path=args.graphs_path, + n_cores = args.n_cores) + + tiles_all_schc.to_csv( + Path(args.output_dir, f"{args.prefix}_all_schc_tiles_raw.csv", index = False)) + + formatted_tiles_all_schc.to_csv( + Path(args.output_dir, f"{args.prefix}_all_schc_tiles.csv", index = False) + ) + + all_slide_clusters_characterized.to_csv( + Path(args.output_dir,f"{args.prefix}_all_schc_clusters_labeled.csv", index= False )) + + + if (args.graphs_path is None): + joblib.dump(all_graphs, + Path(args.output_dir, + f"{args.prefix}_graphs.pkl")) + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/combine_all_spatial_features.py b/bin/combine_all_spatial_features.py new file mode 100755 index 0000000..c25806c --- /dev/null +++ b/bin/combine_all_spatial_features.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +import os +import pandas as pd +import argparse +from os import path +from argparse import ArgumentParser as AP +from os.path import abspath +import time +from pathlib import Path + + +def get_args(): + # Script description + description = """Combining all computed spatial features""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--output_dir", type=str, + help="Path to output folder to store generated files", required=False, default = "") + parser.add_argument("--prefix", type=str, + help="Prefix for output file", default="") + parser.add_argument("--graph_features", type=str, + help="Path to tab-separated file with the graph features", default="") + + parser.add_argument("--clustering_features", type=str, + help="Path to tab-separated file with the graph features", default="") + parser.add_argument("--metadata_path", type=str, + help="Path to tab-separated file with metadata", default="") + parser.add_argument("--is_tcga", type=int, + help="dataset is from TCGA (default: True)", default=True, required=False) + parser.add_argument("--merge_var", type=str, + help="Variable to merge metadata and computed features on", default="slide_submitter_id") + parser.add_argument("--sheet_name", type=str, + help="Name of sheet for merging in case a path to xls(x) file is given for metadata_path", default=None) + + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + # Create an empty folder for TF records if folder doesn't exist + arg.output_dir = Path(arg.output_dir,"process_train") + os.mkdir(arg.output_dir) + return arg + + +def combine_all_spatial_features(graph_features, clustering_features, metadata_path="", is_TCGA=False, merge_var="slide_submitter_id", sheet_name=None): + """ + Combine network and clustering features into a single file. If metadata_path is not None, add the metadata as well, based on variable slide_submitter_id + + Args: + output_dir (str): directory containing the graph and clustering features + slide_type (str): slide type to identify correct files for merging, either "FF" or "FFPE" (default="FF") + metadata_path (str): path to file containing metadata + is_TCGA (bool): whether data is from TCGA + merge_var (str): variable on which to merge (default: slide_submitter_id) + + """ + all_features_graph = pd.read_csv(graph_features, sep=",", index_col=False) + all_features_clustering = pd.read_csv(clustering_features, sep=",", index_col=False) + + all_features_combined = pd.merge( + all_features_graph, all_features_clustering) + + # Add additional identifiers for TCGA + if is_TCGA: + all_features_combined["TCGA_patient_ID"] = all_features_combined.slide_submitter_id.str[0:12] + all_features_combined["TCGA_sample_ID"] = all_features_combined.slide_submitter_id.str[0:15] + all_features_combined["sample_submitter_id"] = all_features_combined.slide_submitter_id.str[0:16] + + # Add metadata if available + if path.isfile(metadata_path): + file_extension = metadata_path.split(".")[-1] + if (file_extension.startswith("xls")): + if sheet_name is None: + metadata = pd.read_excel(metadata_path) + elif (file_extension == "txt") or (file_extension == "csv"): + metadata = pd.read_csv(metadata_path, sep="\t") + all_features_combined = pd.merge( + all_features_combined, metadata, on=merge_var, how="left") + return all_features_combined + +def main(args): + print("Post-processing: combining all features") + combine_all_spatial_features(args.graph_features, args.clustering_features, args.metadata_path, args.is_tcga, args.merge_var, args.sheet_name).to_csv( + Path(args.output_dir, f"{args.prefix}_all_features_combined.csv"), sep="\t", index=False) + + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/combine_clustering_features.py b/bin/combine_clustering_features.py new file mode 100755 index 0000000..163fd71 --- /dev/null +++ b/bin/combine_clustering_features.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +import os +import pandas as pd +import argparse +from argparse import ArgumentParser as AP +from os.path import abspath +import time +from pathlib import Path + +def get_args(): + # Script description + description = """Combine all clustering features""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + + + parser.add_argument("--output_dir", type=str, + help="Path to output folder to store generated files", required=False, default = "") + parser.add_argument("--prefix", type=str, + help="Prefix for output file", default="") + + parser.add_argument("--frac_high_wide", type=str, + help="Path to csv", default="") + + parser.add_argument("--num_clust_slide_wide", type=str, + help="Path to csv", default="") + + parser.add_argument("--all_prox_df_wide", type=str, + help="Path to csv", default="") + parser.add_argument("--prox_indiv_schc_combined_wide", type=str, + help="Path to csv", default="") + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + # Create an empty folder for TF records if folder doesn't exist + arg.output_dir = Path(arg.output_dir,"process_train") + os.mkdir(arg.output_dir) + return arg + +def combine_clustering_features(frac_high_wide, + num_clust_slide_wide, + all_prox_df_wide, + prox_indiv_schc_combined_wide): + + frac_high_wide = pd.read_csv(frac_high_wide, index_col = False, header = 0) + num_clust_slide_wide = pd.read_csv(num_clust_slide_wide, index_col = False, header = 0) + + all_prox_df_wide = pd.read_csv(all_prox_df_wide, index_col = False, header =0) + prox_indiv_schc_combined_wide = pd.read_csv(prox_indiv_schc_combined_wide, index_col = False, header = 0, sep = "\t") + + + # Store features + all_features = pd.merge(frac_high_wide, num_clust_slide_wide, on=[ + "slide_submitter_id"]) + all_features = pd.merge(all_features, all_prox_df_wide) + all_features = pd.merge(all_features, prox_indiv_schc_combined_wide) + # all_features = pd.merge(all_features, shape_feature_means_wide) + + return all_features + +def main(args): + + combine_clustering_features(frac_high_wide = args.frac_high_wide, + num_clust_slide_wide = args.num_clust_slide_wide, all_prox_df_wide = args.all_prox_df_wide, prox_indiv_schc_combined_wide = args.prox_indiv_schc_combined_wide).to_csv( + Path(args.output_dir, f"{args.prefix}_clustering_features.csv"), index=False) + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/combine_network_features.py b/bin/combine_network_features.py new file mode 100755 index 0000000..40d2a74 --- /dev/null +++ b/bin/combine_network_features.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +import argparse +from argparse import ArgumentParser as AP +import os +import pandas as pd +from os.path import abspath +import time +from pathlib import Path + +def get_args(): + # Script description + description = """Combine all network Features""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--all_largest_cc_sizes_wide", type=str, + help="Path to csv file ", required=True) + parser.add_argument("--shortest_paths_wide", type=str, + help="Path to csv file ", required=True) + parser.add_argument("--colocalization_wide", type=str, + help="Path to csv file ", required=True) + + parser.add_argument("--output_dir", type=str, + help="Path to output folder to store generated files", required=False, default = "") + + parser.add_argument("--prefix", type=str, + help="Prefix for output file", default="") + + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + # Create an empty folder for TF records if folder doesn't exist + arg.output_dir = Path(arg.output_dir,"process_train") + os.mkdir(arg.output_dir) + + return arg + +def combine_network_features(all_largest_cc_sizes_wide, shortest_paths_wide, colocalization_wide): + """ + Compute network features + 1. effect sizes based on difference in node degree between simulated slides and actual slide + 2. fraction largest connected component + 3. number of shortest paths with a max length. + + Args: + tile_quantification_path (str) + output_dir (str) + slide_type (str): type of slide either 'FF' or 'FFPE' + cell_types (list): list of cell types + graphs_path (str): path to pkl file with generated graphs [optional] + abundance_threshold (float): threshold for assigning cell types to tiles based on the predicted probability (default=0.5) + shapiro_alpha (float): significance level for shapiro tests for normality (default=0.05) + cutoff_path_length (int): max. length of shortest paths (default=2) + + Returns: + all_effect_sizes (DataFrame): dataframe containing the slide_submitter_id, center, neighbor, effect_size (Cohen's d), Tstat, pval, and the pair (string of center and neighbor) + all_sims_nd (DataFrame): dataframe containing slide_submitter_id, center, neighbor, simulation_nr and degree (node degree) + all_mean_nd_df (DataFrame): dataframe containing slide_submitter_id, center, neighbor, mean_sim (mean node degree across the N simulations), mean_obs + all_largest_cc_sizes (DataFrame): dataframe containing slide_submitter_id, cell type and type_spec_frac (fraction of LCC w.r.t. all tiles for cell type) + shortest_paths_slide (DataFrame): dataframe containing slide_submitter_id, source, target, pair and n_paths (number of shortest paths for a pair) + all_dual_nodes_frac (DataFrame): dataframe containing slide_submitter_id, pair, counts (absolute) and frac + + """ + all_largest_cc_sizes_wide = pd.read_csv(all_largest_cc_sizes_wide, index_col = False, sep = "\t") + shortest_paths_wide = pd.read_csv(shortest_paths_wide, index_col = False, sep = "\t") + colocalization_wide = pd.read_csv(colocalization_wide, index_col = False, sep = "\t") + + + all_features = pd.merge(all_largest_cc_sizes_wide, shortest_paths_wide) + all_features = pd.merge(all_features, colocalization_wide) + return (all_features) + +def main(args): + combine_network_features(args.all_largest_cc_sizes_wide, args.shortest_paths_wide, args.colocalization_wide + ).to_csv(Path(args.output_dir, f"{args.prefix}_all_graph_features.csv"), index = False) + + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/compute_colocalization.py b/bin/compute_colocalization.py new file mode 100755 index 0000000..3348742 --- /dev/null +++ b/bin/compute_colocalization.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 +import argparse +from argparse import ArgumentParser as AP +import os +import joblib +import pandas as pd +from joblib import Parallel, delayed +import argparse +import multiprocessing +# Own modules +import features.features as features +import features.graphs as graphs +import features.utils as utils +from model.constants import DEFAULT_CELL_TYPES + +from os.path import abspath +import time +from pathlib import Path + + +def get_args(): + # Script description + description = """Compute Spatial Network Features: Compute co-localization""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--tile_quantification_path", type=str, + help="Path to csv file with tile-level quantification (predictions)", required=True) + parser.add_argument("--output_dir", type=str, + help="Path to output folder to store generated files", required=False, default="") + parser.add_argument("--cell_types_path", type=str, + help="Path to file with list of cell types (default: CAFs, endothelial_cells, T_cells, tumor_purity)", default=None) + parser.add_argument("--graphs_path", type=str, + help="Path to pkl with generated graphs in case this was done before (OPTIONAL) if not specified, graphs will be generated", default=None) + parser.add_argument("--prefix", type=str, + help="Prefix for output file", default="") + parser.add_argument("--abundance_threshold", type=float, + help="Threshold for assigning cell types based on predicted probability (default: 0.5)", default=0.5, required=False) + + parser.add_argument("--n_cores", type=int, + help="Number of cores to use (parallelization)") + + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + # Create an empty folder for TF records if folder doesn't exist + arg.output_dir = Path(arg.output_dir, "process_train") + os.mkdir(arg.output_dir) + + if arg.n_cores is None: + arg.n_cores = multiprocessing.cpu_count() + return arg + + +def compute_colocalization(tile_quantification_path, + cell_types=None, + graphs_path=None, + abundance_threshold=0.5, n_cores=multiprocessing.cpu_count()): + """ + Compute network features: co-localization + + Args: + tile_quantification_path (str) + output_dir (str) + cell_types (list): list of cell types + graphs_path (str): path to pkl file with generated graphs [optional] + abundance_threshold (float): threshold for assigning cell types to tiles based on the predicted probability (default=0.5) + + Returns: + all_dual_nodes_frac (DataFrame): dataframe containing slide_submitter_id, pair, counts (absolute) and frac + + """ + if cell_types is None: + cell_types = DEFAULT_CELL_TYPES + + predictions = pd.read_csv(tile_quantification_path, sep="\t") + slide_submitter_ids = list(set(predictions.slide_submitter_id)) + + ##################################### + # ---- Constructing the graphs ---- # + ##################################### + + # TODO use 'generate_graphs.py' for this to replace + if graphs_path is None: + results = Parallel(n_jobs=n_cores)( + delayed(graphs.construct_graph)( + predictions=predictions, slide_submitter_id=id) + for id in slide_submitter_ids + ) + # Extract/format graphs + all_graphs = { + list(slide_graph.keys())[0]: list(slide_graph.values())[0] + for slide_graph in results + } + else: + all_graphs = joblib.load(graphs_path) + + ####################################################### + # ---- Compute connectedness and co-localization ---- # + ####################################################### + + all_dual_nodes_frac = [] + for id in slide_submitter_ids: + slide_data = utils.get_slide_data(predictions, id) + node_cell_types = utils.assign_cell_types( + slide_data=slide_data, cell_types=cell_types, threshold=abundance_threshold) + + dual_nodes_frac = features.compute_dual_node_fractions( + node_cell_types, cell_types) + dual_nodes_frac["slide_submitter_id"] = id + all_dual_nodes_frac.append(dual_nodes_frac) + + all_dual_nodes_frac = pd.concat(all_dual_nodes_frac, axis=0) + + colocalization_wide = all_dual_nodes_frac.pivot( + index=["slide_submitter_id"], columns="pair")["frac"] + new_cols = [ + f'Co-loc {col.replace("_", " ")} clusters' for col in colocalization_wide.columns] + colocalization_wide.columns = new_cols + colocalization_wide = colocalization_wide.reset_index() + + return (all_dual_nodes_frac, colocalization_wide, all_graphs) + + +def main(args): + all_dual_nodes_frac, colocalization_wide, all_graphs = compute_colocalization( + tile_quantification_path=args.tile_quantification_path, + cell_types=args.cell_types_path, + graphs_path=args.graphs_path, + abundance_threshold=args.abundance_threshold, + n_cores=args.n_cores) + + all_dual_nodes_frac.to_csv( + Path(args.output_dir, + f"{args.prefix}_features_coloc_fraction.csv"), sep="\t", index=False) + + colocalization_wide.to_csv( + Path(args.output_dir, + f"{args.prefix}_features_coloc_fraction_wide.csv"), sep="\t", index=False) + + if (args.graphs_path is None): + joblib.dump(all_graphs, + Path(args.output_dir, + f"{args.prefix}_graphs.pkl")) + + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/compute_connectedness.py b/bin/compute_connectedness.py new file mode 100755 index 0000000..5caf7cd --- /dev/null +++ b/bin/compute_connectedness.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +import argparse +from argparse import ArgumentParser as AP +import os +import sys +import joblib +import pandas as pd +from joblib import Parallel, delayed +import argparse +from os import path +import multiprocessing +# Own modules +import features.clustering as clustering +import features.features as features +import features.graphs as graphs +import features.utils as utils +from model.constants import DEFAULT_SLIDE_TYPE, DEFAULT_CELL_TYPES, NUM_CORES, METADATA_COLS + +from os.path import abspath +import time +from pathlib import Path + +def get_args(): + # Script description + description = """Compute Spatial Network Features: Compute LCC""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--tile_quantification_path", type=str, + help="Path to csv file with tile-level quantification (predictions)", required=True) + parser.add_argument("--output_dir", type=str, + help="Path to output folder to store generated files", required=False, default = "") + parser.add_argument("--cell_types_path", type=str, + help="Path to file with list of cell types (default: CAFs, endothelial_cells, T_cells, tumor_purity)", default=None) + parser.add_argument("--graphs_path", type=str, + help="Path to pkl with generated graphs in case this was done before (OPTIONAL) if not specified, graphs will be generated", default=None) + parser.add_argument("--prefix", type=str, + help="Prefix for output file", default="") + parser.add_argument("--abundance_threshold", type=float, + help="Threshold for assigning cell types based on predicted probability (default: 0.5)", default=0.5, required=False) + parser.add_argument("--n_cores", type = int, help = "Number of cores to use (parallelization)") + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + # Create an empty folder for TF records if folder doesn't exist + arg.output_dir = Path(arg.output_dir,"process_train") + os.mkdir(arg.output_dir) + + if arg.n_cores is None: + arg.n_cores = multiprocessing.cpu_count() + return arg + + + +def compute_connectedness(tile_quantification_path, + cell_types=None, + graphs_path=None, + abundance_threshold=0.5, + n_cores = multiprocessing.cpu_count()): + """ + Compute network features: LCC + Normalized size of the largest connected component (LCC) for cell type A. This is defined as the largest set of nodes of cell type A + connected with at least one path between every pair of nodes, + divided by the total number of nodes of cell type A. Nodes are assigned a cell type label as described above. + + Args: + tile_quantification_path (str) + cell_types (list): list of cell types + graphs_path (str): path to pkl file with generated graphs [optional] + abundance_threshold (float): threshold for assigning cell types to tiles based on the predicted probability (default=0.5) + + Returns: + all_largest_cc_sizes (DataFrame): dataframe containing slide_submitter_id, cell type and type_spec_frac (fraction of LCC w.r.t. all tiles for cell type) + all_graphs (dict): dictionary with graphs [only if not existing] + + """ + if cell_types is None: + cell_types = DEFAULT_CELL_TYPES + + predictions = pd.read_csv(tile_quantification_path, sep="\t") + slide_submitter_ids = list(set(predictions.slide_submitter_id)) + + ##################################### + # ---- Constructing the graphs ---- # + ##################################### + + if graphs_path is None: + results = Parallel(n_jobs=n_cores)( + delayed(graphs.construct_graph)( + predictions=predictions, slide_submitter_id=id) + for id in slide_submitter_ids + ) + # Extract/format graphs + all_graphs = { + list(slide_graph.keys())[0]: list(slide_graph.values())[0] + for slide_graph in results + } + else: + all_graphs = joblib.load(graphs_path) + + ####################################################### + # ---- Compute connectedness and co-localization ---- # + ####################################################### + + all_largest_cc_sizes = [] + for id in slide_submitter_ids: + slide_data = utils.get_slide_data(predictions, id) + node_cell_types = utils.assign_cell_types( + slide_data=slide_data, cell_types=cell_types, threshold=abundance_threshold) + lcc = features.determine_lcc( + graph=all_graphs[id], cell_type_assignments=node_cell_types, cell_types=cell_types + ) + lcc["slide_submitter_id"] = id + all_largest_cc_sizes.append(lcc) + + + all_largest_cc_sizes = pd.concat(all_largest_cc_sizes, axis=0) + all_largest_cc_sizes = all_largest_cc_sizes.reset_index(drop=True) + all_largest_cc_sizes_wide = all_largest_cc_sizes.pivot( + index=["slide_submitter_id"], columns="cell_type")["type_spec_frac"] + new_cols = [ + f'LCC {col.replace("_", " ")} clusters' for col in all_largest_cc_sizes_wide.columns] + all_largest_cc_sizes_wide.columns = new_cols + all_largest_cc_sizes_wide = all_largest_cc_sizes_wide.reset_index() + + return(all_largest_cc_sizes_wide, all_graphs) + +def main(args): + all_largest_cc_sizes_wide, all_graphs = compute_connectedness( + tile_quantification_path=args.tile_quantification_path, + cell_types=args.cell_types_path, + graphs_path=args.graphs_path, + abundance_threshold=args.abundance_threshold, n_cores=args.n_cores) + all_largest_cc_sizes_wide.to_csv( + Path(args.output_dir, + f"{args.prefix}_features_lcc_fraction_wide.csv"), + sep="\t", + index=False) + + if (args.graphs_path is None): + joblib.dump(all_graphs, + Path(args.output_dir, + f"{args.prefix}_graphs.pkl")) + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/compute_frac_high.py b/bin/compute_frac_high.py new file mode 100755 index 0000000..110f658 --- /dev/null +++ b/bin/compute_frac_high.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +import os +import pandas as pd +import argparse + +# Own modules +import features.features as features + + +from argparse import ArgumentParser as AP +from os.path import abspath +import time +from pathlib import Path + +def get_args(): + # Script description + description = """Compute Clustering Features""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--slide_indiv_clusters_labeled", type=str, + help="Path to csv file", required=True) + parser.add_argument("--output_dir", type=str, + help="Path to output folder to store generated files", required=False, default = "") + parser.add_argument("--prefix", type=str, + help="Prefix for output file", default="") + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + # Create an empty folder for TF records if folder doesn't exist + arg.output_dir = Path(arg.output_dir,"process_train") + os.mkdir(arg.output_dir) + + return arg + +def compute_frac_high(slide_indiv_clusters_labeled): + """ + Compute clustering features: + + Args: + slide_indiv_clusters_labeled: dataframe with the labeled clusters based on individual cell type SCHC + + Returns: + frac_high_wide (DataFrame) + + """ + + slide_indiv_clusters_labeled = pd.read_csv(slide_indiv_clusters_labeled) + + # Count the fraction of 'high' clusters + frac_high = features.n_high_clusters(slide_indiv_clusters_labeled) + + frac_high_sub = frac_high[frac_high["is_high"]].copy() + frac_high_sub = frac_high_sub.drop( + columns=["is_high", "n_clusters", "n_total_clusters"]) + + frac_high_wide = frac_high_sub.pivot(index=["slide_submitter_id"], columns=[ + "cell_type_map"])["fraction"] + new_cols = [('fraction {0} clusters labeled high'.format(col)) + for col in frac_high_wide.columns] + frac_high_wide.columns = new_cols + frac_high_wide = frac_high_wide.sort_index(axis="columns").reset_index() + return frac_high_wide + +def main(args): + compute_frac_high( + slide_indiv_clusters_labeled = args.slide_indiv_clusters_labeled).to_csv( + Path(args.output_dir, f"{args.prefix}_frac_high_wide.csv", index = False)) + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/compute_nclusters.py b/bin/compute_nclusters.py new file mode 100755 index 0000000..25c33cf --- /dev/null +++ b/bin/compute_nclusters.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 +import os +import sys +import joblib +import pandas as pd +from joblib import Parallel, delayed +import argparse +from os import path + +# Own modules +import features.clustering as clustering +import features.features as features +import features.graphs as graphs +import features.utils as utils +from model.constants import DEFAULT_SLIDE_TYPE, DEFAULT_CELL_TYPES, METADATA_COLS + + +import multiprocessing + +from argparse import ArgumentParser as AP +from os.path import abspath +import time +from pathlib import Path + + + +def get_args(): + # Script description + description = """Compute Spatial Network Features: Compute Connectedness""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--all_slide_clusters_characterized", type=str, + help="Path to csv file", required=True) + parser.add_argument("--output_dir", type=str, + help="Path to output folder to store generated files", required=False, default = "") + parser.add_argument("--slide_type", type=str, + help="Type of slides 'FFPE' or 'FF' used for naming generated files (default: 'FF')", default="FF") + parser.add_argument("--cell_types_path", type=str, + help="Path to file with list of cell types (default: CAFs, endothelial_cells, T_cells, tumor_purity)", default=None) + parser.add_argument("--graphs_path", type=str, + help="Path to pkl with generated graphs in case this was done before (OPTIONAL) if not specified, graphs will be generated", default=None) + parser.add_argument("--prefix", type=str, + help="Prefix for output file", default="") + parser.add_argument("--cutoff_path_length", type=int, + help="Max path length for proximity based on graphs", default=2, required=False) + parser.add_argument("--shapiro_alpha", type=float, + help="Choose significance level alpha (default: 0.05)", default=0.05, required=False) + parser.add_argument("--abundance_threshold", type=float, + help="Threshold for assigning cell types based on predicted probability (default: 0.5)", default=0.5, required=False) + + parser.add_argument("--n_clusters", type=int, + help="Number of clusters for SCHC (default: 8)", required=False, default=8) + parser.add_argument("--n_cores", type = int, help = "Number of cores to use (parallelization)") + + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + # Create an empty folder for TF records if folder doesn't exist + arg.output_dir = Path(arg.output_dir,"process_train") + os.mkdir(arg.output_dir) + + if arg.n_cores is None: + arg.n_cores = multiprocessing.cpu_count() + return arg + + + +def compute_nclusters(all_slide_clusters_characterized, cell_types = DEFAULT_CELL_TYPES): + + if cell_types is None: + cell_types = DEFAULT_CELL_TYPES + + # # Assign a cell type label based on the mean of all cluster means across all slides + # all_slide_clusters_characterized = clustering.characterize_clusters( + # tiles_all_schc) + all_slide_clusters_characterized = pd.read_csv(all_slide_clusters_characterized, + sep = ",", header = 0) + + + # Count the number of clusters per cell type for each slide + num_clust_by_slide = features.n_clusters_per_cell_type( + all_slide_clusters_characterized, cell_types=cell_types) + + num_clust_by_slide_sub = num_clust_by_slide.copy() + num_clust_by_slide_sub = num_clust_by_slide_sub.drop( + columns=["is_assigned", "n_clusters"]) + + num_clust_slide_wide = num_clust_by_slide_sub.pivot( + index=["slide_submitter_id"], columns=["cell_type"])["fraction"] + new_cols = [('fraction {0} clusters'.format(col)) + for col in num_clust_slide_wide.columns] + num_clust_slide_wide.columns = new_cols + num_clust_slide_wide = num_clust_slide_wide.sort_index( + axis="columns").reset_index() + return num_clust_slide_wide + + +def main(args): + num_clust_slide_wide = compute_nclusters( + all_slide_clusters_characterized = args.all_slide_clusters_characterized, + cell_types=args.cell_types_path) + + num_clust_slide_wide.to_csv( + Path(args.output_dir, f"{args.prefix}_nclusters_wide.csv", sep = "\t", index = False)) + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/compute_node_degree_with_es.py b/bin/compute_node_degree_with_es.py new file mode 100755 index 0000000..7c9ff0a --- /dev/null +++ b/bin/compute_node_degree_with_es.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python3 +import argparse +from argparse import ArgumentParser as AP +import os +import joblib +import pandas as pd +from joblib import Parallel, delayed +import multiprocessing +# Own modules +import features.features as features +import features.graphs as graphs +import features.utils as utils +from model.constants import DEFAULT_CELL_TYPES + +from os.path import abspath +import time +from pathlib import Path + + +def get_args(): + # Script description + description = """Spatial Network (graph) features: Compute 'mean_ND' and 'ND_effsize' features""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--tile_quantification_path", type=str, + help="Path to csv file with tile-level quantification (predictions)", required=True) + parser.add_argument("--output_dir", type=str, + help="Path to output folder to store generated files", required=False, default="") + parser.add_argument("--slide_type", type=str, + help="Type of slides 'FFPE' or 'FF' used for naming generated files (default: 'FF')", default="FF") + parser.add_argument("--cell_types_path", type=str, + help="Path to file with list of cell types (default: CAFs, endothelial_cells, T_cells, tumor_purity)", default=None) + parser.add_argument("--graphs_path", type=str, + help="Path to pkl with generated graphs in case this was done before (OPTIONAL) if not specified, graphs will be generated", default=None) + parser.add_argument("--prefix", type=str, + help="Prefix for output file", default="") + parser.add_argument("--cutoff_path_length", type=int, + help="Max path length for proximity based on graphs", default=2, required=False) + parser.add_argument("--shapiro_alpha", type=float, + help="Choose significance level alpha (default: 0.05)", default=0.05, required=False) + parser.add_argument("--abundance_threshold", type=float, + help="Threshold for assigning cell types based on predicted probability (default: 0.5)", default=0.5, required=False) + + parser.add_argument("--n_clusters", type=int, + help="Number of clusters for SCHC (default: 8)", required=False, default=8) + parser.add_argument("--max_dist", type=int, + help="Maximum distance between clusters", required=False, default=None) + parser.add_argument("--max_n_tiles_threshold", type=int, + help="Number of tiles for computing max. distance between two points in two different clusters", default=2, required=False) + parser.add_argument("--tile_size", type=int, + help="Size of tile (default: 512)", default=512, required=False) + parser.add_argument("--overlap", type=int, + help="Overlap of tiles (default: 50)", default=50, required=False) + + parser.add_argument("--metadata_path", type=str, + help="Path to tab-separated file with metadata", default="") + parser.add_argument("--is_TCGA", type=bool, + help="dataset is from TCGA (default: True)", default=True, required=False) + parser.add_argument("--merge_var", type=str, + help="Variable to merge metadata and computed features on", default=None) + parser.add_argument("--sheet_name", type=str, + help="Name of sheet for merging in case a path to xls(x) file is given for metadata_path", default=None) + parser.add_argument("--n_cores", type=int, + help="Number of cores to use (parallelization)") + + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + # Create an empty folder for TF records if folder doesn't exist + arg.output_dir = Path(arg.output_dir, "process_train") + os.mkdir(arg.output_dir) + + if arg.n_cores is None: + arg.n_cores = multiprocessing.cpu_count() + return arg + + +def compute_node_degree_with_es(tile_quantification_path, + cell_types=None, + graphs_path=None, + shapiro_alpha=0.05, + n_cores=multiprocessing.cpu_count()): + """ + Compute network features + 'mean_ND': Average number of neighbor nodes of cell type B surrounding nodes of cell type A. Nodes are assigned a cell type label if the predicted probability of that node (tile) for the given cell type is higher than 0.5. + 'ND_effsize': Cohen's d measure of effect size computed comparing the mean_ND(A,B) with the null distribution obtained by recomputing the mean_ND randomly assigning the A or B cell type label to each node preserving the total number of cell type A and B nodes in the network. For a negative effect size, the true average mean_ND(A,B) is larger than the simulated average mean_ND(A,B) meaning that the two cell types in the actual slide are closer together compared to a random distribution of these two cell types. Vice versa for a positive effect size. Nodes are assigned a cell type label as described above. + + + Args: + tile_quantification_path (str): path to tile quantification path (csv) + cell_types (list): list of cell types (or path to csv file with cell types) + graphs_path (str): path to pkl file with generated graphs [optional] + shapiro_alpha (float): significance level for shapiro tests for normality (default=0.05) + + Returns: + all_effect_sizes (DataFrame): dataframe containing the slide_submitter_id, center, neighbor, effect_size (Cohen's d), Tstat, pval, and the pair (string of center and neighbor) + all_sims_nd (DataFrame): dataframe containing slide_submitter_id, center, neighbor, simulation_nr and degree (node degree) + all_mean_nd_df (DataFrame): dataframe containing slide_submitter_id, center, neighbor, mean_sim (mean node degree across the N simulations), mean_obs + + """ + if cell_types is None: + cell_types = DEFAULT_CELL_TYPES + + predictions = pd.read_csv(tile_quantification_path, sep="\t") + slide_submitter_ids = list(set(predictions.slide_submitter_id)) + + ##################################### + # ---- Constructing the graphs ---- # + ##################################### + # TODO use 'generate_graphs.py' for this to replace + if graphs_path is None: + results = Parallel(n_jobs=n_cores)( + delayed(graphs.construct_graph)( + predictions=predictions, slide_submitter_id=id) + for id in slide_submitter_ids + ) + # Extract/format graphs + all_graphs = { + list(slide_graph.keys())[0]: list(slide_graph.values())[0] + for slide_graph in results + } + else: + all_graphs = joblib.load(graphs_path) + + ############################################### + # ---- Compute ES based on ND difference ---- # + ############################################### + nd_results = Parallel(n_jobs=n_cores)(delayed(features.node_degree_wrapper)( + all_graphs[id], predictions, id) for id in slide_submitter_ids) + nd_results = list(filter(lambda id: id is not None, nd_results)) + + # Format results + all_sims_nd = [] + all_mean_nd_df = [] + example_simulations = {} + + for sim_assignments, sim, mean_nd_df in nd_results: + all_mean_nd_df.append(mean_nd_df) + all_sims_nd.append(sim) + example_simulations.update(sim_assignments) + + all_sims_nd = pd.concat(all_sims_nd, axis=0).reset_index() + all_mean_nd_df = pd.concat(all_mean_nd_df).reset_index(drop=True) + + # Testing normality + shapiro_tests = Parallel(n_jobs=n_cores)(delayed(utils.test_normality)( + sims_nd=all_sims_nd, + slide_submitter_id=id, + alpha=shapiro_alpha, + cell_types=cell_types) for id in all_sims_nd.slide_submitter_id.unique()) + all_shapiro_tests = pd.concat(shapiro_tests, axis=0) + + # Computing Cohen's d effect size and perform t-test + effect_sizes = Parallel(n_jobs=n_cores)(delayed(features.compute_effect_size)( + all_mean_nd_df, all_sims_nd, slide_submitter_id) for slide_submitter_id in all_sims_nd.slide_submitter_id.unique()) + all_effect_sizes = pd.concat(effect_sizes, axis=0) + all_effect_sizes["pair"] = [ + f"{c}-{n}" for c, n in all_effect_sizes[["center", "neighbor"]].to_numpy()] + + return all_effect_sizes, all_sims_nd, all_mean_nd_df, example_simulations, all_shapiro_tests, all_graphs + + +def main(args): + all_effect_sizes, all_sims_nd, all_mean_nd_df, example_simulations, all_shapiro_tests, all_graphs = compute_node_degree_with_es( + tile_quantification_path=args.tile_quantification_path, + cell_types=args.cell_types_path, + graphs_path=args.graphs_path, + shapiro_alpha=args.shapiro_alpha, + n_cores=args.n_cores + ) + + all_effect_sizes.to_csv( + Path(args.output_dir, f"{args.prefix}_features_ND_ES.csv"), sep="\t", index=False) + all_sims_nd.to_csv( + Path(args.output_dir, f"{args.prefix}_features_ND_sims.csv"), sep="\t", index=False) + all_mean_nd_df.to_csv( + Path(args.output_dir, f"{args.prefix}_features_ND.csv"), sep="\t", index=False) + joblib.dump(example_simulations, + Path(args.output_dir, f"{args.prefix}_features_ND_sim_assignments.pkl")) + all_shapiro_tests.to_csv( + Path(args.output_dir, f"{args.prefix}_shapiro_tests.csv"), index=False, sep="\t") + + if (args.graphs_path is None): + joblib.dump(all_graphs, + Path(args.output_dir, + f"{args.prefix}_graphs.pkl")) + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/compute_nshortest_with_max_length.py b/bin/compute_nshortest_with_max_length.py new file mode 100755 index 0000000..78ef8ef --- /dev/null +++ b/bin/compute_nshortest_with_max_length.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +import argparse +from argparse import ArgumentParser as AP +import os +import sys +import joblib +import pandas as pd +from joblib import Parallel, delayed +import argparse +from os import path +import multiprocessing +# Own modules +import features.clustering as clustering +import features.features as features +import features.graphs as graphs +import features.utils as utils +from model.constants import DEFAULT_SLIDE_TYPE, DEFAULT_CELL_TYPES, NUM_CORES, METADATA_COLS + +from os.path import abspath +import time +from pathlib import Path + + +def get_args(): + # Script description + description = """Compute Spatial Network Features: Compute Connectedness""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--tile_quantification_path", type=str, + help="Path to csv file with tile-level quantification (predictions)", required=True) + parser.add_argument("--output_dir", type=str, + help="Path to output folder to store generated files", required=False, default="") + parser.add_argument("--slide_type", type=str, + help="Type of slides 'FFPE' or 'FF' used for naming generated files (default: 'FF')", default="FF") + parser.add_argument("--cell_types_path", type=str, + help="Path to file with list of cell types (default: CAFs, endothelial_cells, T_cells, tumor_purity)", default=None) + parser.add_argument("--graphs_path", type=str, + help="Path to pkl with generated graphs in case this was done before (OPTIONAL) if not specified, graphs will be generated", default=None) + parser.add_argument("--prefix", type=str, + help="Prefix for output file", default="") + parser.add_argument("--cutoff_path_length", type=int, + help="Max path length for proximity based on graphs", default=2, required=False) + parser.add_argument("--shapiro_alpha", type=float, + help="Choose significance level alpha (default: 0.05)", default=0.05, required=False) + parser.add_argument("--abundance_threshold", type=float, + help="Threshold for assigning cell types based on predicted probability (default: 0.5)", default=0.5, required=False) + + parser.add_argument("--n_clusters", type=int, + help="Number of clusters for SCHC (default: 8)", required=False, default=8) + parser.add_argument("--n_cores", type=int, + help="Number of cores to use (parallelization)") + + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + # Create an empty folder for TF records if folder doesn't exist + arg.output_dir = Path(arg.output_dir, "process_train") + os.mkdir(arg.output_dir) + + if arg.n_cores is None: + arg.n_cores = multiprocessing.cpu_count() + return arg + + +def compute_n_shortest_paths_with_max_length(tile_quantification_path, + cell_types=None, + graphs_path=None, + cutoff_path_length=2, + n_cores=multiprocessing.cpu_count()): + """ + Compute network features + 1. effect sizes based on difference in node degree between simulated slides and actual slide + 2. fraction largest connected component + 3. number of shortest paths with a max length. + + Args: + tile_quantification_path (str) + output_dir (str) + slide_type (str): type of slide either 'FF' or 'FFPE' + cell_types (list): list of cell types + graphs_path (str): path to pkl file with generated graphs [optional] + abundance_threshold (float): threshold for assigning cell types to tiles based on the predicted probability (default=0.5) + shapiro_alpha (float): significance level for shapiro tests for normality (default=0.05) + cutoff_path_length (int): max. length of shortest paths (default=2) + + Returns: + all_effect_sizes (DataFrame): dataframe containing the slide_submitter_id, center, neighbor, effect_size (Cohen's d), Tstat, pval, and the pair (string of center and neighbor) + all_sims_nd (DataFrame): dataframe containing slide_submitter_id, center, neighbor, simulation_nr and degree (node degree) + all_mean_nd_df (DataFrame): dataframe containing slide_submitter_id, center, neighbor, mean_sim (mean node degree across the N simulations), mean_obs + all_largest_cc_sizes (DataFrame): dataframe containing slide_submitter_id, cell type and type_spec_frac (fraction of LCC w.r.t. all tiles for cell type) + shortest_paths_slide (DataFrame): dataframe containing slide_submitter_id, source, target, pair and n_paths (number of shortest paths for a pair) + all_dual_nodes_frac (DataFrame): dataframe containing slide_submitter_id, pair, counts (absolute) and frac + + """ + if cell_types is None: + cell_types = DEFAULT_CELL_TYPES + + predictions = pd.read_csv(tile_quantification_path, sep="\t") + slide_submitter_ids = list(set(predictions.slide_submitter_id)) + + ##################################### + # ---- Constructing the graphs ---- # + ##################################### + + # TODO use 'generate_graphs.py' for this to replace + if graphs_path is None: + results = Parallel(n_jobs=n_cores)( + delayed(graphs.construct_graph)( + predictions=predictions, slide_submitter_id=id) + for id in slide_submitter_ids + ) + # Extract/format graphs + all_graphs = { + list(slide_graph.keys())[0]: list(slide_graph.values())[0] + for slide_graph in results + } + else: + all_graphs = joblib.load(graphs_path) + + ####################################################### + # ---- Compute N shortest paths with max. length ---- # + ####################################################### + + results = Parallel(n_jobs=n_cores)( + delayed(features.compute_n_shortest_paths_max_length)( + predictions=predictions, slide_submitter_id=id, graph=all_graphs[ + id], cutoff=cutoff_path_length + ) + for id in slide_submitter_ids + ) + # Formatting and count the number of shortest paths with max length + all_shortest_paths_thresholded = pd.concat(results, axis=0) + all_shortest_paths_thresholded["n_paths"] = 1 + proximity_graphs = ( + all_shortest_paths_thresholded.groupby( + ["slide_submitter_id", "source", "target"] + ) + .sum(numeric_only=True) + .reset_index() + ) + # Post-processing + proximity_graphs["pair"] = [f"{source}-{target}" for source, + target in proximity_graphs[["source", "target"]].to_numpy()] + proximity_graphs = proximity_graphs.drop(columns=["path_length"]) + + # Additional formatting + shortest_paths_wide = proximity_graphs.pivot( + index=["slide_submitter_id"], columns="pair")["n_paths"] + new_cols = [ + f'Prox graph {col.replace("_", " ")} clusters' for col in shortest_paths_wide.columns] + shortest_paths_wide.columns = new_cols + shortest_paths_wide = shortest_paths_wide.reset_index() + + return (proximity_graphs, shortest_paths_wide, all_graphs) + + +def main(args): + proximity_graphs, shortest_paths_wide, all_graphs = compute_n_shortest_paths_with_max_length( + tile_quantification_path=args.tile_quantification_path, + cell_types=args.cell_types_path, + graphs_path=args.graphs_path, cutoff_path_length=args.cutoff_path_length, + n_cores=args.n_cores) + + proximity_graphs.to_csv( + Path(args.output_dir, + f"{args.prefix}_features_shortest_paths_thresholded.csv"), + sep="\t", + index=False) + + shortest_paths_wide.to_csv( + Path(args.output_dir, + f"{args.prefix}_features_shortest_paths_thresholded_wide.csv"), + sep="\t", + index=False) + + if (args.graphs_path is None): + joblib.dump(all_graphs, + Path(args.output_dir, + f"{args.prefix}_graphs.pkl")) + + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/compute_proximity_from_indiv_schc_between.py b/bin/compute_proximity_from_indiv_schc_between.py new file mode 100755 index 0000000..74216dc --- /dev/null +++ b/bin/compute_proximity_from_indiv_schc_between.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +import os +import pandas as pd +from joblib import Parallel, delayed +import argparse +import features.features as features +from model.constants import DEFAULT_CELL_TYPES + + +import multiprocessing +from argparse import ArgumentParser as AP +from os.path import abspath +import time +from pathlib import Path + +def get_args(): + # Script description + description = """Compute Clustering features Features: Compute proximity (between clusters)""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--slide_clusters", type=str, + help="Path to csv file", required=True) + parser.add_argument("--tiles_schc", type=str, + help="Path to csv file", required=True) + parser.add_argument("--output_dir", type=str, + help="Path to output folder to store generated files", required=False, default = "") + parser.add_argument("--slide_type", type=str, + help="Type of slides 'FFPE' or 'FF' used for naming generated files (default: 'FF')", default="FF") + parser.add_argument("--cell_types_path", type=str, + help="Path to file with list of cell types (default: CAFs, endothelial_cells, T_cells, tumor_purity)", default=None) + parser.add_argument("--graphs_path", type=str, + help="Path to pkl with generated graphs in case this was done before (OPTIONAL) if not specified, graphs will be generated", default=None) + parser.add_argument("--prefix", type=str, + help="Prefix for output file", default="") + parser.add_argument("--cutoff_path_length", type=int, + help="Max path length for proximity based on graphs", default=2, required=False) + parser.add_argument("--shapiro_alpha", type=float, + help="Choose significance level alpha (default: 0.05)", default=0.05, required=False) + parser.add_argument("--max_dist", type=int, + help="Max dist", required=False, default= None) + + parser.add_argument("--max_n_tiles_threshold", type=int, + help="Max dist", required=False, default= 2) + parser.add_argument("--tile_size", type=int, + help="Max dist", required=False, default= 512) + parser.add_argument("--overlap", type=int, + help="Max dist", required=False, default= 50) + + parser.add_argument("--n_clusters", type=int, + help="Number of clusters for SCHC (default: 8)", required=False, default=8) + parser.add_argument("--n_cores", type = int, help = "Number of cores to use (parallelization)") + + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + # Create an empty folder for TF records if folder doesn't exist + arg.output_dir = Path(arg.output_dir,"process_train") + os.mkdir(arg.output_dir) + + if arg.n_cores is None: + arg.n_cores = multiprocessing.cpu_count() + return arg + + +def compute_proximity_from_indiv_schc_between( + slide_clusters, tiles_schc, + cell_types=None, + n_clusters=8, max_dist=None, + max_n_tiles_threshold=2, + tile_size=512, + overlap=50, + n_cores = multiprocessing.cpu_count()): + + if cell_types is None: + cell_types = DEFAULT_CELL_TYPES + + all_slide_indiv_clusters = pd.read_csv(slide_clusters, sep = ",", header = 0) + slide_submitter_ids = list(set(all_slide_indiv_clusters.slide_submitter_id)) + + slide_indiv_clusters_labeled = pd.read_csv(tiles_schc, sep = ",", header = 0) + + ########################################################################## + # ---- Compute proximity features (individual cell type clustering) ---- # + ########################################################################## + + # Computing proximity for clusters derived for each cell type individually + # Between clusters + slide_submitter_ids = list(set(slide_indiv_clusters_labeled.slide_submitter_id)) + results_schc_indiv = Parallel(n_jobs=n_cores)(delayed(features.compute_proximity_clusters_pairs)(all_slide_indiv_clusters, + slide_submitter_id=id, method="individual_between", + n_clusters=n_clusters, + cell_types=cell_types, + max_dist=max_dist, + max_n_tiles_threshold=max_n_tiles_threshold, + tile_size=tile_size, + overlap=overlap) for id in slide_submitter_ids) + prox_indiv_schc = pd.concat(results_schc_indiv) + + # Formatting + prox_indiv_schc = pd.merge(prox_indiv_schc, slide_indiv_clusters_labeled, left_on=[ + "slide_submitter_id", "cluster1_label", "cluster1"], right_on=["slide_submitter_id", "cell_type_map", "cluster_label"]) + prox_indiv_schc = prox_indiv_schc.drop( + columns=["cell_type_map", "cluster_label"]) + prox_indiv_schc = prox_indiv_schc.rename( + columns={"is_high": "cluster1_is_high"}) + prox_indiv_schc = pd.merge(prox_indiv_schc, slide_indiv_clusters_labeled, left_on=[ + "slide_submitter_id", "cluster2_label", "cluster2"], right_on=["slide_submitter_id", "cell_type_map", "cluster_label"]) + prox_indiv_schc = prox_indiv_schc.rename( + columns={"is_high": "cluster2_is_high"}) + prox_indiv_schc = prox_indiv_schc.drop( + columns=["cell_type_map", "cluster_label"]) + + # Order matters + prox_indiv_schc["ordered_pair"] = [ + f"{i}-{j}" for i, j in prox_indiv_schc[["cluster1_label", "cluster2_label"]].to_numpy()] + prox_indiv_schc["comparison"] = [ + f"cluster1={i}-cluster2={j}" for i, j in prox_indiv_schc[["cluster1_is_high", "cluster2_is_high"]].to_numpy()] + + # Post-processing + results_schc_indiv = pd.concat(Parallel(n_jobs=n_cores)(delayed(features.post_processing_proximity)( + prox_df=prox_indiv_schc, slide_submitter_id=id, method="individual_between") for id in slide_submitter_ids)) + + return results_schc_indiv + +def main(args): + compute_proximity_from_indiv_schc_between( + slide_clusters = args.slide_clusters, + tiles_schc = args.tiles_schc, + cell_types=args.cell_types_path, + n_cores = args.n_cores, + n_clusters=args.n_clusters, + max_dist=args.max_dist, + max_n_tiles_threshold=args.max_n_tiles_threshold, + tile_size=args.tile_size, + overlap=args.overlap).to_csv( + Path(args.output_dir, f"{args.prefix}_features_clust_indiv_schc_prox_between.csv"),index=False) + + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/compute_proximity_from_indiv_schc_combine.py b/bin/compute_proximity_from_indiv_schc_combine.py new file mode 100755 index 0000000..73d3953 --- /dev/null +++ b/bin/compute_proximity_from_indiv_schc_combine.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +import os +import pandas as pd +import argparse +import multiprocessing +from argparse import ArgumentParser as AP +from os.path import abspath +import time +from pathlib import Path + +def get_args(): + # Script description + description = """Compute Clustering Features: Proximity""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--prox_within", type=str, + help="Path to csv file", required=True) + parser.add_argument("--prox_between", type=str, + help="Path to csv file", required=True) + parser.add_argument("--prefix", type=str, + help="Prefix for output file", default="") + parser.add_argument("--output_dir", type=str, + help="Path to output folder to store generated files", + required=False, default = "") + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + # Create an empty folder for TF records if folder doesn't exist + arg.output_dir = Path(arg.output_dir,"process_train") + os.mkdir(arg.output_dir) + + return arg + + +def compute_proximity_from_indiv_schc_combine(prox_within, prox_between): + + results_schc_indiv_within = pd.read_csv(prox_within, header = 0, index_col=False) + results_schc_indiv_between = pd.read_csv(prox_between, header = 0, index_col = False) + + # Concatenate within and between computed proximity values + prox_indiv_schc_combined = pd.concat( + [results_schc_indiv_within, results_schc_indiv_between]) + + # Remove rows with a proximity of NaN + prox_indiv_schc_combined = prox_indiv_schc_combined.dropna(axis=0) + + prox_indiv_schc_combined.comparison = prox_indiv_schc_combined.comparison.replace(dict(zip(['cluster1=True-cluster2=True', 'cluster1=True-cluster2=False', + 'cluster1=False-cluster2=True', 'cluster1=False-cluster2=False'], ["high-high", "high-low", "low-high", "low-low"]))) + prox_indiv_schc_combined["pair (comparison)"] = [ + f"{pair} ({comp})" for pair, comp in prox_indiv_schc_combined[["pair", "comparison"]].to_numpy()] + prox_indiv_schc_combined = prox_indiv_schc_combined.drop( + axis=1, labels=["pair", "comparison"]) + + + prox_indiv_schc_combined_wide = prox_indiv_schc_combined.pivot( + index=["slide_submitter_id"], columns=["pair (comparison)"])["proximity"] + new_cols = [ + f'prox CC {col.replace("_", " ")}' for col in prox_indiv_schc_combined_wide.columns] + prox_indiv_schc_combined_wide.columns = new_cols + prox_indiv_schc_combined_wide = prox_indiv_schc_combined_wide.reset_index() + return prox_indiv_schc_combined_wide + +def main(args): + compute_proximity_from_indiv_schc_combine(prox_within=args.prox_within, prox_between= args.prox_between).to_csv( + Path(args.output_dir, + f"{args.prefix}_features_clust_indiv_schc_prox.csv"), sep="\t", + index=False) + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/compute_proximity_from_indiv_schc_within.py b/bin/compute_proximity_from_indiv_schc_within.py new file mode 100755 index 0000000..f2481ed --- /dev/null +++ b/bin/compute_proximity_from_indiv_schc_within.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 +import os +import sys +import pandas as pd +from joblib import Parallel, delayed +import argparse + +# Own modules +import features.features as features +from model.constants import DEFAULT_CELL_TYPES + + +import multiprocessing + +from argparse import ArgumentParser as AP +from os.path import abspath +import time +from pathlib import Path + +def get_args(): + # Script description + description = """Compute Clustering Features: Proximity (within clusters)""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--slide_clusters", type=str, + help="Path to csv file", required=True) + parser.add_argument("--tiles_schc", type=str, + help="Path to csv file", required=True) + parser.add_argument("--output_dir", type=str, + help="Path to output folder to store generated files", required=False, default = "") + parser.add_argument("--slide_type", type=str, + help="Type of slides 'FFPE' or 'FF' used for naming generated files (default: 'FF')", default="FF") + parser.add_argument("--cell_types_path", type=str, + help="Path to file with list of cell types (default: CAFs, endothelial_cells, T_cells, tumor_purity)", default=None) + parser.add_argument("--graphs_path", type=str, + help="Path to pkl with generated graphs in case this was done before (OPTIONAL) if not specified, graphs will be generated", default=None) + parser.add_argument("--prefix", type=str, + help="Prefix for output file", default="") + parser.add_argument("--cutoff_path_length", type=int, + help="Max path length for proximity based on graphs", default=2, required=False) + parser.add_argument("--shapiro_alpha", type=float, + help="Choose significance level alpha (default: 0.05)", default=0.05, required=False) + parser.add_argument("--abundance_threshold", type=float, + help="Threshold for assigning cell types based on predicted probability (default: 0.5)", default=0.5, required=False) + parser.add_argument("--max_dist", type=int, + help="Max dist", required=False, default= None) + + parser.add_argument("--max_n_tiles_threshold", type=int, + help="Max dist", required=False, default= 2) + parser.add_argument("--tile_size", type=int, + help="Max dist", required=False, default= 512) + parser.add_argument("--overlap", type=int, + help="Max dist", required=False, default= 50) + + parser.add_argument("--n_clusters", type=int, + help="Number of clusters for SCHC (default: 8)", required=False, default=8) + parser.add_argument("--n_cores", type = int, help = "Number of cores to use (parallelization)") + + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + # Create an empty folder for TF records if folder doesn't exist + arg.output_dir = Path(arg.output_dir,"process_train") + os.mkdir(arg.output_dir) + + if arg.n_cores is None: + arg.n_cores = multiprocessing.cpu_count() + return arg + + +def compute_proximity_from_indiv_schc_within( + slide_clusters, tiles_schc, + cell_types=None, + n_clusters=8, max_dist=None, + max_n_tiles_threshold=2, + tile_size=512, + overlap=50, + n_cores = multiprocessing.cpu_count()): + + if cell_types is None: + cell_types = DEFAULT_CELL_TYPES + + all_slide_indiv_clusters = pd.read_csv(slide_clusters, sep = ",", header = 0, index_col = False) + slide_submitter_ids = list(set(all_slide_indiv_clusters.slide_submitter_id)) + + slide_indiv_clusters_labeled = pd.read_csv(tiles_schc, sep = ",", header = 0, index_col = False) + + ########################################################################## + # ---- Compute proximity features (individual cell type clustering) ---- # + ########################################################################## + + # Computing proximity for clusters derived for each cell type individually + # Between clusters + slide_submitter_ids = list(set(slide_indiv_clusters_labeled.slide_submitter_id)) + + # # Within clusters + + print(cell_types) + + print(all_slide_indiv_clusters.head()) + + results_schc_indiv_within = Parallel(n_jobs=n_cores)(delayed(features.compute_proximity_clusters_pairs)(all_slide_indiv_clusters, + slide_submitter_id=id, + method="individual_within", + n_clusters=n_clusters, + cell_types=cell_types, + max_dist=max_dist, + max_n_tiles_threshold=max_n_tiles_threshold, + tile_size=tile_size, overlap=overlap,) for id in slide_submitter_ids) + prox_indiv_schc_within = pd.concat(results_schc_indiv_within) + + prox_indiv_schc_within = pd.merge(prox_indiv_schc_within, slide_indiv_clusters_labeled, left_on=[ + "slide_submitter_id", "cell_type", "cluster1"], right_on=["slide_submitter_id", "cell_type_map", "cluster_label"]) + prox_indiv_schc_within = prox_indiv_schc_within.drop( + columns=["cluster_label"]) + prox_indiv_schc_within = prox_indiv_schc_within.rename( + columns={"is_high": "cluster1_is_high", "cell_type_map": "cell_type_map1"}) + prox_indiv_schc_within = pd.merge(prox_indiv_schc_within, slide_indiv_clusters_labeled, left_on=[ + "slide_submitter_id", "cell_type", "cluster2"], right_on=["slide_submitter_id", "cell_type_map", "cluster_label"]) + prox_indiv_schc_within = prox_indiv_schc_within.rename( + columns={"is_high": "cluster2_is_high", "cell_type_map": "cell_type_map2"}) + prox_indiv_schc_within = prox_indiv_schc_within.drop( + columns=["cluster_label"]) + + # Order doesn't matter (only same cell type combinations) + prox_indiv_schc_within["pair"] = [ + f"{i}-{j}" for i, j in prox_indiv_schc_within[["cell_type_map1", "cell_type_map2"]].to_numpy()] + prox_indiv_schc_within["comparison"] = [ + f"cluster1={sorted([i,j])[0]}-cluster2={sorted([i,j])[1]}" for i, j in prox_indiv_schc_within[["cluster1_is_high", "cluster2_is_high"]].to_numpy()] + + # Post-processing + slide_submitter_ids = list(set(prox_indiv_schc_within.slide_submitter_id)) + results_schc_indiv_within = pd.concat(Parallel(n_jobs=n_cores)(delayed(features.post_processing_proximity)( + prox_df=prox_indiv_schc_within, slide_submitter_id=id, method="individual_within") for id in slide_submitter_ids)) + return results_schc_indiv_within + +def main(args): + compute_proximity_from_indiv_schc_within( + slide_clusters = args.slide_clusters, + tiles_schc = args.tiles_schc, + cell_types=args.cell_types_path, + n_cores = args.n_cores, + n_clusters=args.n_clusters, + max_dist=args.max_dist, max_n_tiles_threshold=args.max_n_tiles_threshold, + tile_size=args.tile_size, + overlap=args.overlap).to_csv( + Path(args.output_dir, f"{args.prefix}_features_clust_indiv_schc_prox_within.csv"),index=False) + + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/compute_proximity_from_simultaneous_schc.py b/bin/compute_proximity_from_simultaneous_schc.py new file mode 100755 index 0000000..a4311c3 --- /dev/null +++ b/bin/compute_proximity_from_simultaneous_schc.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +import os +import pandas as pd +from joblib import Parallel, delayed +import argparse +import features.features as features +from model.constants import DEFAULT_CELL_TYPES + +import multiprocessing + +from argparse import ArgumentParser as AP +from os.path import abspath +import time +from pathlib import Path + +def get_args(): + # Script description + description = """Compute Spatial Network Features: Compute Connectedness""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--slide_clusters_characterized", type=str, + help="Path to csv file", required=True) + parser.add_argument("--tiles_schc", type=str, + help="Path to csv file", required=True) + parser.add_argument("--output_dir", type=str, + help="Path to output folder to store generated files", required=False, default = "") + parser.add_argument("--slide_type", type=str, + help="Type of slides 'FFPE' or 'FF' used for naming generated files (default: 'FF')", default="FF") + parser.add_argument("--cell_types_path", type=str, + help="Path to file with list of cell types (default: CAFs, endothelial_cells, T_cells, tumor_purity)", default=None) + parser.add_argument("--graphs_path", type=str, + help="Path to pkl with generated graphs in case this was done before (OPTIONAL) if not specified, graphs will be generated", default=None) + parser.add_argument("--prefix", type=str, + help="Prefix for output file", default="") + parser.add_argument("--cutoff_path_length", type=int, + help="Max path length for proximity based on graphs", default=2, required=False) + parser.add_argument("--shapiro_alpha", type=float, + help="Choose significance level alpha (default: 0.05)", default=0.05, required=False) + parser.add_argument("--abundance_threshold", type=float, + help="Threshold for assigning cell types based on predicted probability (default: 0.5)", default=0.5, required=False) + parser.add_argument("--max_dist", type=int, + help="Max dist", required=False, default= None) + parser.add_argument("--max_n_tiles_threshold", type=int, + help="Max dist", required=False, default= 2) + parser.add_argument("--tile_size", type=int, + help="Max dist", required=False, default= 512) + parser.add_argument("--overlap", type=int, + help="Max dist", required=False, default= 50) + parser.add_argument("--n_clusters", type=int, + help="Number of clusters for SCHC (default: 8)", required=False, default=8) + parser.add_argument("--n_cores", type = int, help = "Number of cores to use (parallelization)") + + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + # Create an empty folder for TF records if folder doesn't exist + arg.output_dir = Path(arg.output_dir,"process_train") + os.mkdir(arg.output_dir) + + if arg.n_cores is None: + arg.n_cores = multiprocessing.cpu_count() + return arg + +def compute_proximity_from_simultaneous_schc(slide_clusters_characterized, tiles_schc, + cell_types = DEFAULT_CELL_TYPES, + n_clusters=8, + max_dist=None, + max_n_tiles_threshold=2, + tile_size=512, + overlap=50, + n_cores = multiprocessing.cpu_count()): + + all_slide_clusters_characterized = pd.read_csv(slide_clusters_characterized, + sep = ",", header = 0, index_col = 0) + + slide_submitter_ids = list(set(all_slide_clusters_characterized.slide_submitter_id)) + + tiles_all_schc = pd.read_csv(tiles_schc, sep = ",", header = 0, index_col = 0) + + + # Computing proximity for clusters derived with all cell types simultaneously + clusters_all_schc_long = all_slide_clusters_characterized.melt( + id_vars=["slide_submitter_id", "cluster_label"], value_name="is_assigned", var_name="cell_type") + # remove all cell types that are not assigned to the cluster + clusters_all_schc_long = clusters_all_schc_long[clusters_all_schc_long["is_assigned"]] + clusters_all_schc_long = clusters_all_schc_long.drop(columns="is_assigned") + + results_schc_all = Parallel(n_jobs=n_cores)(delayed(features.compute_proximity_clusters_pairs)( + tiles=tiles_all_schc, slide_submitter_id=id, n_clusters=n_clusters, cell_types=cell_types, max_dist=max_dist, max_n_tiles_threshold=max_n_tiles_threshold, tile_size=tile_size, overlap=overlap, method="all") for id in slide_submitter_ids) + prox_all_schc = pd.concat(results_schc_all) + + # Label clusters (a number) with the assigned cell types + prox_all_schc = pd.merge(prox_all_schc, clusters_all_schc_long, left_on=[ + "slide_submitter_id", "cluster1"], right_on=["slide_submitter_id", "cluster_label"]) + prox_all_schc = prox_all_schc.rename( + columns={"cell_type": "cluster1_label"}) + prox_all_schc = prox_all_schc.drop(columns=["cluster_label"]) + + prox_all_schc = pd.merge(prox_all_schc, clusters_all_schc_long, left_on=[ + "slide_submitter_id", "cluster2"], right_on=["slide_submitter_id", "cluster_label"]) + prox_all_schc = prox_all_schc.rename( + columns={"cell_type": "cluster2_label"}) + + # Order doesn't matter: x <-> + prox_all_schc["pair"] = [f"{sorted([i, j])[0]}-{sorted([i, j])[1]}" for i, + j in prox_all_schc[["cluster1_label", "cluster2_label"]].to_numpy()] + prox_all_schc = prox_all_schc[((prox_all_schc.cluster1 == prox_all_schc.cluster2) & ( + prox_all_schc.cluster2_label != prox_all_schc.cluster1_label)) | (prox_all_schc.cluster1 != prox_all_schc.cluster2)] + + # slides = prox_all_schc[["MFP", "slide_submitter_id"]].drop_duplicates().to_numpy() + slide_submitter_ids = list(set(prox_all_schc.slide_submitter_id)) + + # Post Processing + results_schc_all = Parallel(n_jobs=n_cores)(delayed(features.post_processing_proximity)( + prox_df=prox_all_schc, slide_submitter_id=id, method="all") for id in slide_submitter_ids) + all_prox_df = pd.concat(results_schc_all) + # Remove rows with a proximity of NaN + all_prox_df = all_prox_df.dropna(axis=0) + + + all_prox_df_wide = all_prox_df.pivot( + index=["slide_submitter_id"], columns=["pair"])["proximity"] + new_cols = [ + f'prox CC {col.replace("_", " ")} clusters' for col in all_prox_df_wide.columns] + all_prox_df_wide.columns = new_cols + all_prox_df_wide = all_prox_df_wide.reset_index() + + return all_prox_df_wide + +def main(args): + compute_proximity_from_simultaneous_schc( + slide_clusters_characterized = args.slide_clusters_characterized, + tiles_schc = args.tiles_schc, + cell_types=args.cell_types_path, + n_cores = args.n_cores, + n_clusters=args.n_clusters, + max_dist=args.max_dist, max_n_tiles_threshold=args.max_n_tiles_threshold, + tile_size=args.tile_size, + overlap=args.overlap).to_csv(Path(args.output_dir, f"{args.prefix}_features_clust_all_schc_prox_wide.csv"), index = False) + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/create_clinical_file.py b/bin/create_clinical_file.py new file mode 100755 index 0000000..8052786 --- /dev/null +++ b/bin/create_clinical_file.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python3 +import argparse +from argparse import ArgumentParser as AP +import os +import os.path +import numpy as np +import pandas as pd + +from os.path import abspath +import time +from pathlib import Path + + +def get_args(): + # Script description + description = """Creating a clinical file for TCGA dataset(s)""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + + # Sections + parser.add_argument( + "--clinical_files_input", + help="Path to either a folder for multiple cancer types or single txt file.", required=False, + default=None + ) + parser.add_argument("--out_file", type=str, required=False, default="generated_clinical_file.txt", + help="Output filename with .txt extension (default='generated_clinical_file.txt')") + parser.add_argument( + "--path_codebook", + help="Path to codebook", + default=None, required=False, type=str + ) + parser.add_argument( + "--output_dir", help="Path to folder for saving all created files", default="", required=False, type=str + ) + parser.add_argument( + "--class_name", + help="Single classname or (b) Path to file with classnames according to codebook.txt (e.g. LUAD_T)", default=None, + type=str + ) + parser.add_argument("--class_names_path", + type=str, + help="Path to file with classnames according to codebook.txt", + default=None) + parser.add_argument( + "--tumor_purity_threshold", + help="Integer for filtering tumor purity assessed by pathologists", + default=80, required=False, type=int + ) + + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + os.mkdir(arg.output_dir) + return arg + +def handle_single_class(input, class_name, codebook): + input = pd.read_csv(input, sep="\t") + # only keep tissue (remove _T or _N) to check in filename + input["class_name"] = class_name + input["class_id"] = int( + codebook.loc[codebook["class_name"] + == class_name].values[0][1] + ) + return (input) + +def handle_multi_class(input, class_names, codebook): + clinical_file_list = [] + # Combine all clinical raw files based on input + for class_name in class_names: + clinical_file_temp = pd.read_csv( + f"{input}/clinical_file_TCGA_{class_name[:-2]}.tsv", + sep="\t", + ) + # only keep tissue (remove _T or _N) to check in filename + clinical_file_temp["class_name"] = class_name + clinical_file_temp["class_id"] = int( + codebook.loc[codebook["class_name"] + == class_name].values[0][1] + ) + clinical_file_list.append(clinical_file_temp) + clinical_file = pd.concat( + clinical_file_list, axis=0).reset_index(drop=True) + return (clinical_file) + + +def filter_tumor_purity(df, threshold = 80): + # ---- 2) Filter: Availability of tumor purity (percent_tumor_cells) ---- # + # Remove rows with missing tumor purity + df["percent_tumor_cells"] = ( + df["percent_tumor_cells"] + .replace("'--", np.nan, regex=True) + .astype(float) + ) + + # Convert strings to numeric type + df["percent_tumor_cells"] = pd.to_numeric( + df["percent_tumor_cells"] + ) + df = df.dropna(subset=["percent_tumor_cells"]) + df = df.where( + df["percent_tumor_cells"] >= float( + threshold) + ) + return(df) + +def is_valid_class_name_input (input, codebook): + res = None + if (input is not None): + if (input in codebook["class_name"].values): + res = "single" + elif(Path(input)): + res = "multi" + return(res) + + +def create_TCGA_clinical_file( + class_name, + class_names_path, + clinical_files_input, + tumor_purity_threshold=80, + path_codebook=None +): + """ + Create a clinical file based on the slide metadata downloaded from the GDC data portal + 1. Read the files and add classname and id based on codebook_df.txt + 2. Filter tumor purity + 3. Save file + + Args: + class_names (str): single class name e.g. LUAD_T or path to file with class names + clinical_files_input (str): String with path to folder with subfolders pointing to the raw clinical files (slide.tsv) + tumor_purity_threshold (int): default=80 + multi_class_path (str): path to file with class names to be merged into one clinical file + + Returns: + {output_dir}/generated_clinical_file.txt" containing the slide_submitter_id, sample_submitter_id, image_file_name, percent_tumor_cells, class_name, class_id in columns and records (slides) in rows. + + """ + codebook_df = pd.read_csv( + path_codebook, + delim_whitespace=True, + header=None, names=["class_name", "value"] + ) + init_check_single_class =is_valid_class_name_input(input = class_name, codebook= codebook_df) + init_check_multi_class = is_valid_class_name_input(input = class_names_path, codebook = codebook_df) + is_single_class = init_check_single_class == "single" + is_multi_class = init_check_multi_class == "multi" + + passes_input_check = (is_single_class| is_multi_class) & (clinical_files_input is not None) + + if passes_input_check: + if (is_multi_class): # multi class names + class_names = pd.read_csv( + class_names_path, header=None).to_numpy().flatten() + if os.path.isdir(clinical_files_input) & (len(class_names) > 1): + clinical_file = handle_multi_class(input = clinical_files_input, class_names = class_names, codebook=codebook_df) + elif (is_single_class): # single class names + # a) Single class + if os.path.isfile(clinical_files_input): + clinical_file = handle_single_class(input = clinical_files_input, class_name=class_name, codebook = codebook_df) + + clinical_file = filter_tumor_purity(df = clinical_file, threshold= tumor_purity_threshold) + + # ---- 3) Formatting ---- # + clinical_file["image_file_name"] = [ + f"{slide_submitter_id}.{str(slide_id).upper()}.svs" + for slide_submitter_id, slide_id in clinical_file[ + ["slide_submitter_id", "slide_id"] + ].to_numpy() + ] + + clinical_file = clinical_file.dropna(how="all") + clinical_file = clinical_file.drop_duplicates() + clinical_file = clinical_file.drop_duplicates( + subset="slide_submitter_id") + clinical_file = clinical_file[ + [ + "slide_submitter_id", + "sample_submitter_id", + "image_file_name", + "percent_tumor_cells", + "class_name", + "class_id", + ] + ] + clinical_file = clinical_file.dropna(how="any", axis=0) + return clinical_file + + +def main(args): + # Generate clinical file + clinical_file = create_TCGA_clinical_file( + class_name=args.class_name, + class_names_path = args.class_names_path, + tumor_purity_threshold=args.tumor_purity_threshold, + clinical_files_input=args.clinical_files_input, + path_codebook=args.path_codebook, + ) + # Save file + clinical_file.to_csv( + Path(args.output_dir, args.out_file), + index=False, + sep="\t", + ) + + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/Python/1_extract_histopathological_features/myslim/create_list_avail_img_for_tiling.py b/bin/create_list_avail_img_for_tiling.py old mode 100644 new mode 100755 similarity index 53% rename from Python/1_extract_histopathological_features/myslim/create_list_avail_img_for_tiling.py rename to bin/create_list_avail_img_for_tiling.py index 888b229..eafbc6b --- a/Python/1_extract_histopathological_features/myslim/create_list_avail_img_for_tiling.py +++ b/bin/create_list_avail_img_for_tiling.py @@ -1,16 +1,35 @@ -#!/usr/bin/python +#!/usr/bin/env python3 import argparse -import DL.image as im -import tiffslide as openslide import os -import sys - -import numpy as np import pandas as pd -from PIL import Image +from argparse import ArgumentParser as AP +from os.path import abspath +import time +from pathlib import Path + +def get_args(): + # Script description + description = """Creating list of available slide images that have to be tiled""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + + parser = argparse.ArgumentParser() + parser.add_argument("--slides_folder", help="Set slides folder", default = None) + parser.add_argument("--output_dir", help="Set output folder", default = "") + parser.add_argument("--clinical_file_path", help="Set clinical file path") + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + arg.slides_folder = abspath(arg.slides_folder) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + os.mkdir(arg.output_dir) + return arg -def create_list_avail_img_for_tiling(slides_folder, clinical_file_path, output_folder): +def create_list_avail_img_for_tiling(slides_folder, clinical_file_path): """ Create tiles from slides Dividing the whole slide images into tiles with a size of 512 x 512 pixels, with an overlap of 50 pixels at a magnification of 20x. In addition, remove blurred and non-informative tiles by using the weighted gradient magnitude. @@ -26,11 +45,9 @@ def create_list_avail_img_for_tiling(slides_folder, clinical_file_path, output_f txt containing list of slides available for tiling """ - if not os.path.exists(output_folder): - os.makedirs(output_folder) - # Subset images of interest (present in generated clinical file) - clinical_file = pd.read_csv(clinical_file_path, sep="\t") + clinical_file = pd.read_csv(clinical_file_path, sep="\t", index_col=False) + print(clinical_file) clinical_file.dropna(how="all", inplace=True) clinical_file.drop_duplicates(inplace=True) clinical_file.drop_duplicates(subset="slide_submitter_id", inplace=True) @@ -39,19 +56,25 @@ def create_list_avail_img_for_tiling(slides_folder, clinical_file_path, output_f # Check if slides are among our data available_images = os.listdir(slides_folder) + print(available_images) images_for_tiling = list(set(subset_images) & set(available_images)) - pd.DataFrame([[name.split(".")[0], name] for name in images_for_tiling], columns=["slide_id", "slide_filename"]).to_csv( - (f"{output_folder}/avail_slides_for_img.csv"), index=False) + return(pd.DataFrame([[name.split(".")[0], name] for name in images_for_tiling], columns=["slide_id", "slide_filename"])) + +def main(args): + list_avail_img = create_list_avail_img_for_tiling(slides_folder=args.slides_folder, + clinical_file_path=args.clinical_file_path) + + list_avail_img.to_csv(Path(args.output_dir, "avail_slides_for_img.csv"), index=False) print("Generated list of available images for tiling...") + + if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--slides_folder", help="Set slides folder") - parser.add_argument("--output_folder", help="Set output folder") - parser.add_argument("--clinical_file_path", help="Set clinical file path") - args = parser.parse_args() - create_list_avail_img_for_tiling(slides_folder=args.slides_folder, - output_folder=args.output_folder, clinical_file_path=args.clinical_file_path) + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/create_tiles_from_slides.py b/bin/create_tiles_from_slides.py new file mode 100755 index 0000000..0c29369 --- /dev/null +++ b/bin/create_tiles_from_slides.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +import tiffslide as openslide +import os +import argparse +import glob +import numpy as np +from PIL import Image + +from argparse import ArgumentParser as AP +from os.path import abspath +import time +from pathlib import Path + +import DL.image as im + +def get_args(): + # Script description + description = """Creating tiles from a slide""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + + # Sections + parser = argparse.ArgumentParser() + parser.add_argument("--filename_slide", help="Name of slide", default = "") + parser.add_argument("--slides_folder", help="Set slides folder", default = None) + parser.add_argument("--slide_path", help="Path to individual slide", default = None) + parser.add_argument("--output_dir", help="Set output folder", default = "") + parser.add_argument("--clin_path", help="Set clinical file path", default = None) + parser.add_argument("--gradient_mag_filter", help = "Threshold for filtering", default = 20) + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + arg.output_dir = Path(arg.output_dir, "tiles") + os.mkdir(arg.output_dir) + return arg + +def create_tiles_from_slide(slide_filename, slides_folder, gradient_mag_filter = 20, slide_path = None): + """ + Create tiles from a single slide + Dividing the whole slide images into tiles with a size of 512 x 512 pixels, with an overlap of 50 pixels at a magnification of 20x. In addition, remove blurred and non-informative tiles by using the weighted gradient magnitude. + + Source: + Fu, Y., Jung, A. W., Torne, R. V., Gonzalez, S., Vöhringer, H., Shmatko, A., Yates, L. R., Jimenez-Linan, M., Moore, L., & Gerstung, M. (2020). Pan-cancer computational histopathology reveals mutations, tumor composition and prognosis. Nature Cancer, 1(8), 800–810. https://doi.org/10.1038/s43018-020-0085-8 + + Args: + slide_filename (str): name of slide to use for creating tiles + slides_folder (str): path pointing to folder with all whole slide images (.svs files) + tiles (str): path pointing to folder for storing all created files by script (i.e. .jpg files for the created tiles) + grad_mag_filter (int): remove tiles that are blurred or non-informative based on weighted gradient magnitude (default=20) + + Returns: + jpg files for the created tiles in the specified folder {output_dir}/tiles + + """ + # Accept different file types + if slide_filename.endswith(('.svs', '.ndpi', '.tif')): + if (slide_path is not None): + slide = openslide.OpenSlide(slide_path) + else: + slide = openslide.OpenSlide( + "{}/{}".format(slides_folder, slide_filename)) + slide_name = slide_filename.split(".")[0] + if ( + str(slide.properties["tiff.ImageDescription"]).find( + "AppMag = 40" + ) + != -1 + ): + region_size = 1024 + tile_size = 924 + else: + region_size = 512 + tile_size = 462 + [width, height] = slide.dimensions + + list_of_tiles = [] + for x_coord in range(1, width, tile_size): + for y_coord in range(1, height, tile_size): + slide_region = slide.read_region( + location=(x_coord, y_coord), + level=0, + size=(region_size, region_size), + ) + slide_region_converted = slide_region.convert("RGB") + tile = slide_region_converted.resize( + (512, 512), Image.ANTIALIAS) + grad = im.getGradientMagnitude(np.array(tile)) + unique, counts = np.unique(grad, return_counts=True) + if counts[np.argwhere(unique <= int(gradient_mag_filter))].sum() < 512 * 512 * 0.6: + list_of_tiles.append((tile, slide_name, x_coord, y_coord)) + return(list_of_tiles) + +def main(args): + list_of_tiles = create_tiles_from_slide(slides_folder=args.slides_folder, gradient_mag_filter=args.gradient_mag_filter, slide_filename = args.filename_slide, slide_path= args.slide_path) + n_tiles = len(list_of_tiles) + for tile in list_of_tiles: + tile[0].save( + "{}/{}_{}_{}.jpg".format( + args.output_dir, tile[1], tile[2], tile[3] + ), + "JPEG", + optimize=True, + quality=94, + ) + # Check if all tiles were saved + assert len(glob.glob1(Path(args.output_dir), "*.jpg")) == n_tiles + + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/format_tile_data_structure.py b/bin/format_tile_data_structure.py new file mode 100755 index 0000000..a69267e --- /dev/null +++ b/bin/format_tile_data_structure.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 + +import tiffslide as openslide +import DL.utils as utils +import glob +import argparse +import os +import os.path +import pandas as pd + +from os.path import abspath +import time +from pathlib import Path +from argparse import ArgumentParser as AP + +def get_args(): + # Script description + description = """Format tile data structure""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + + # Sections + parser.add_argument("--slides_folder", help="Set slides folder", default = "") + parser.add_argument("--tiles_folder", help = "Directory with the tiles", default = "") + parser.add_argument("--output_dir", help="Set output folder", default = "") + parser.add_argument("--clin_path", help="Set clinical file path", default = "") + parser.add_argument("--is_tcga", help="Set output folder", type=int) + + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + os.mkdir(arg.output_dir) + return arg + +def assess_tcga_slide_quality(slide_name, slides_folder): + print("{}/{}".format(slides_folder, slide_name)) + + img = openslide.OpenSlide( + "{}/{}".format(slides_folder, slide_name)) + image_description = str(img.properties["tiff.ImageDescription"]).split("|")[0] + image_description_split = image_description.split(" ") + jpeg_quality = image_description_split[-1] + return([slide_name, "RGB" + jpeg_quality]) + + +def format_tile_data_structure(slides_folder, tiles_folder, output_dir, clinical_file_path, is_tcga=True): + """ + Specifying the tile data structure required to store tiles as TFRecord files (used in convert.py) + + Args: + slides_folder (str): path pointing to folder with all whole slide images (.svs files) + output_dir (str): path pointing to folder for storing all created files by script + clinical_file_path (str): path pointing to formatted clinical file (either generated or manually formatted) + is_tcga (bool): default = TRUE + + Returns: + {output_dir}/file_info_train.txt containing the path to the individual tiles, class name, class id, percent of tumor cells and JPEG quality + + """ + if (tiles_folder == ""): + tiles_folder = output_dir + + clinical_file = pd.read_csv(clinical_file_path, sep="\t") + clinical_file.dropna(how="all", inplace=True) + clinical_file.drop_duplicates(inplace=True) + clinical_file.drop_duplicates(subset="slide_submitter_id", inplace=True) + + # 2) Determine the paths paths of jpg tiles + jpg_tile_names = glob.glob1(Path(args.tiles_folder), "*.jpg") + jpg_tile_paths = [Path(tiles_folder, tile_name) for tile_name in jpg_tile_names] + + # 3) Get corresponding data from the clinical file based on the tile names + jpg_tile_names_stripped = [ + utils.get_slide_submitter_id(jpg_tile_name) for jpg_tile_name in jpg_tile_names + ] + jpg_tile_names_df = pd.DataFrame( + jpg_tile_names_stripped, columns=["slide_submitter_id"] + ) + jpg_tiles_df = pd.merge( + jpg_tile_names_df, clinical_file, on=["slide_submitter_id"], how="left" + ) + # 4) Determine jpeg_quality of slides + slide_quality = [] + if is_tcga: + for slide_name in jpg_tiles_df.image_file_name.unique(): + slide_quality.append(assess_tcga_slide_quality(slide_name = slide_name, slides_folder = slides_folder)) + else: + jpeg_quality = 100 # assuming no loss + slide_quality = [[slide_name, f"RGB{jpeg_quality}"] for slide_name in jpg_tiles_df.image_file_name.unique()] + + slide_quality_df = pd.DataFrame( + slide_quality, columns=["image_file_name", "jpeg_quality"] + ) + jpg_tiles_df = pd.merge( + jpg_tiles_df, slide_quality_df, on=["image_file_name"], how="left" + ) + jpg_tiles_df["tile_path"] = jpg_tile_paths + + # Create output dataframe + output = jpg_tiles_df[ + ["tile_path", "class_name", "class_id", + "jpeg_quality", "percent_tumor_cells"] + ] + return (output) + + +def main(args): + output = format_tile_data_structure( + slides_folder=args.slides_folder, + tiles_folder= args.tiles_folder, + output_dir=args.output_dir, + clinical_file_path=args.clin_path, + is_tcga=args.is_tcga + ) + output.to_csv(Path(args.output_dir, "file_info_train.txt"), + index=False, sep="\t") + + print("Finished creating the necessary file for computing the features in the next step") + + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/generate_graphs.py b/bin/generate_graphs.py new file mode 100755 index 0000000..fbc33ef --- /dev/null +++ b/bin/generate_graphs.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +import argparse +import multiprocessing +from argparse import ArgumentParser as AP +import os +import joblib +import pandas as pd +from joblib import Parallel, delayed +import argparse + +# Own modules +import features.graphs as graphs +from model.constants import DEFAULT_CELL_TYPES + +from os.path import abspath +import time +from pathlib import Path + +def get_args(): + # Script description + description = """Generating graphs for computing spatial network features""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--tile_quantification_path", type=str, + help="Path to csv file with tile-level quantification (predictions)", required=True) + parser.add_argument("--output_dir", type=str, + help="Path to output folder to store generated files", default = "") + parser.add_argument("--slide_type", type=str, + help="Type of slides 'FFPE' or 'FF' used for naming generated files (default: 'FF')", default="FF") + parser.add_argument("--cell_types_path", type=str, + help="Path to file with list of cell types (default: CAFs, endothelial_cells, T_cells, tumor_purity)", default=None) + parser.add_argument("--prefix", type=str, + help="Prefix for output file", default="") + parser.add_argument("--n_cores", type = int, help = "Number of cores to use (parallelization)") + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + os.mkdir(arg.output_dir) + + if arg.n_cores is None: + arg.n_cores = multiprocessing.cpu_count() + return arg + +def generate_graphs(tile_quantification_path, cell_types=None, n_cores = multiprocessing.cpu_count()): + """ + Generating graphs + + Args: + tile_quantification_path (str) + cell_types (list): list of cell types + n_cores (int): Number of cores to use (parallelization) + + Returns: + Graphs for all slides (dict) + + """ + if cell_types is None: + cell_types = DEFAULT_CELL_TYPES + + predictions = pd.read_csv(tile_quantification_path, sep="\t") + slide_submitter_ids = list(set(predictions.slide_submitter_id)) + + ##################################### + # ---- Constructing the graphs ---- # + ##################################### + + results = Parallel(n_jobs=n_cores)( + delayed(graphs.construct_graph)( + predictions=predictions, slide_submitter_id=id) + for id in slide_submitter_ids + ) + # Extract/format graphs + all_graphs = { + list(slide_graph.keys())[0]: list(slide_graph.values())[0] + for slide_graph in results + } + return all_graphs + +def main(args): + all_graphs = generate_graphs( + tile_quantification_path = args.tile_quantification_path, + n_cores=args.n_cores) + out_filepath = Path(args.output_dir, + f"{args.prefix}_graphs.pkl") + + joblib.dump(all_graphs, out_filepath) + print(f"Generated all graphs and stored in: {out_filepath}") + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/post_process_features.py b/bin/post_process_features.py new file mode 100755 index 0000000..cc3bf54 --- /dev/null +++ b/bin/post_process_features.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +#  Module imports +import argparse +from argparse import ArgumentParser as AP +import os +import dask.dataframe as dd +import pandas as pd + +#  Custom imports +import DL.utils as utils +from os.path import abspath +import time +from pathlib import Path + + +def get_args(): + # Script description + description = """Post processing features""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + + # Sections + parser.add_argument("--output_dir", help="Set output folder (default='.')", default = ".") + parser.add_argument("--create_parquet_subdir", help = "Whether to create a subdirectory called 'features_format_parquet' if slide_type == 'FFPE', default=False", default = False) + parser.add_argument( + "--slide_type", help="Type of tissue slide (FF or FFPE)") + parser.add_argument( + "--is_tcga", help="Is TCGA dataset, default=False", type = int, default = 0) + parser.add_argument("--bot_train_file", type = str, default = None, help = "Txt file") + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + + if arg.bot_train_file is None: + arg.bot_train_file = Path(arg.output_dir, "bot_train.txt") + + if (arg.create_parquet_subdir): + arg.output_dir = abspath(Path(arg.output_dir, "features_format_parquet")) + + if not os.path.isdir(arg.output_dir): + os.mkdir(arg.output_dir) + + return arg + + +def handle_ff_slides(bot_train_file, is_tcga): + features_raw = pd.read_csv(bot_train_file, sep="\t", header=None) + # Extract the DL features (discard: col1 = tile paths, col2 = true class id) + features = features_raw.iloc[:, 2:] + features.columns = list(range(1536)) + # Add new column variables that define each tile + features["tile_ID"] = [utils.get_tile_name( + tile_path) for tile_path in features_raw.iloc[:, 0]] + features["Coord_X"] = [i[-2] + for i in features["tile_ID"].str.split("_")] + features["Coord_Y"] = [i[-1] + for i in features["tile_ID"].str.split("_")] + # FIX add sample_submitter_id and slide_submitter_id depending on is_tcga + if is_tcga: + features["sample_submitter_id"] = features["tile_ID"].str[0:16] + features["slide_submitter_id"] = features["tile_ID"].str[0:23] + features["Section"] = features["tile_ID"].str[20:23] + else: + features["sample_submitter_id"] = features['tile_ID'].str.split( + '_').str[0] + return(features) + +def handle_ffpe_slides(bot_train_file, is_tcga): + features_raw = dd.read_csv(bot_train_file, sep="\t", header=None) + features_raw['tile_ID'] = features_raw.iloc[:, 0] + features_raw.tile_ID = features_raw.tile_ID.map( + lambda x: x.split("/")[-1]) + features_raw['tile_ID'] = features_raw['tile_ID'].str.replace( + ".jpg'", "") + features = features_raw.map_partitions( + lambda df: df.drop(columns=[0, 1])) + new_names = list(map(lambda x: str(x), list(range(1536)))) + new_names.append('tile_ID') + features.columns = new_names + # FIX add sample_submitter_id and slide_submitter_id depending on is_tcga + if is_tcga: + features["sample_submitter_id"] = features["tile_ID"].str[0:16] + features["slide_submitter_id"] = features["tile_ID"].str[0:23] + features["Section"] = features["tile_ID"].str[20:23] + else: + features["sample_submitter_id"] = features['tile_ID'].str.split( + '_').str[0] + features['Coord_X'] = features['tile_ID'].str.split('_').str[1] + features['Coord_Y'] = features['tile_ID'].str.split('_').str[-1] + return(features) + +def post_process_features(bot_train_file, slide_type = "FF", is_tcga="TCGA"): + """ + Format extracted histopathological features from bot.train.txt file generated by myslim/bottleneck_predict.py and extract the 1,536 features, tile names. Extract several variables from tile ID. + + Args: + bot_train_file (txt) + slide_type (str) + is_tcga (bool) + + Returns: + features (dataframe) contains the 1,536 features, followed by the sample_submitter_id, tile_ID, slide_submitter_id, Section, Coord_X and Coord_Y and in the rows the tiles + """ + # Read histopathological computed features + if slide_type == "FF": + return(handle_ff_slides(bot_train_file=bot_train_file, is_tcga=is_tcga)) + elif slide_type == "FFPE": + return(handle_ffpe_slides(bot_train_file=bot_train_file, is_tcga=is_tcga)) + else: + raise Exception("Invalid `slide_type`, please choose 'FF' or 'FFPE' ") + +def main(args): + features = post_process_features( + bot_train_file=args.bot_train_file, + slide_type=args.slide_type, + is_tcga=args.is_tcga) + if (args.slide_type == "FF"): + #  Save features to .csv file + features.to_csv(Path(args.output_dir, "features.txt"), sep="\t", header=True) + elif (args.slide_type == "FFPE"): + features.to_parquet(path= args.output_dir, compression='gzip', + name_function=utils.name_function) + print("Finished post-processing of features...") + + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/post_process_predictions.py b/bin/post_process_predictions.py new file mode 100755 index 0000000..7973c92 --- /dev/null +++ b/bin/post_process_predictions.py @@ -0,0 +1,242 @@ +#!/usr/bin/env python + +# Module imports +import argparse +from argparse import ArgumentParser as AP +import os +import dask.dataframe as dd +import pandas as pd + +#  Custom imports +import DL.utils as utils +import numpy as np +from os.path import abspath +import time +from pathlib import Path + +def get_args(): + # Script description + description = """Post-processing predictions""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--output_dir", help="Set output folder", default = ".") + parser.add_argument("--create_parquet_subdir", help = "Whether to create a subdirectory called 'predictions_format_parquet' if slide_type == 'FFPE', default=False", default = False) + parser.add_argument( + "--slide_type", help="Type of tissue slide (FF or FFPE) (default='FF')", type = str, default = "FF") + parser.add_argument( + "--path_codebook", help="codebook.txt file", required=True, type=str) + parser.add_argument( + "--path_tissue_classes", help="Tissue_classes.csv file", required=True, type=str) + parser.add_argument("--cancer_type", help = "Cancer type", required = True, type =str) + parser.add_argument("--pred_train_file", help = "", type = str, default = None) + arg = parser.parse_args() + + if arg.pred_train_file is None: + arg.pred_train_file = Path(arg.output_dir, "pred_train.txt") + + if (arg.create_parquet_subdir): + arg.output_dir = abspath(Path(arg.output_dir, "predictions_format_parquet")) + + if not os.path.isdir(arg.output_dir): + os.mkdir(arg.output_dir) + + return arg + + + +def handle_ff_slides(pred_train_file, codebook, tissue_classes, cancer_type): + predictions_raw = pd.read_csv(pred_train_file, sep="\t", header=None) + # Extract tile name incl. coordinates from path + tile_names = [utils.get_tile_name(tile_path) + for tile_path in predictions_raw[0]] + # Create output dataframe for post-processed data + predictions = pd.DataFrame(tile_names, columns=["tile_ID"]) + # Get predicted probabilities for all 42 classes + rename columns + pred_probabilities = predictions_raw.iloc[:, 2:] + pred_probabilities.columns = codebook["class_id"] + # Get predicted and true class ids + predictions["pred_class_id"] = pred_probabilities.idxmax( + axis="columns") + predictions["true_class_id"] = 41 + # Get corresponding max probabilities to the predicted class + predictions["pred_probability"] = pred_probabilities.max(axis=1) + # Replace class id with class name + predictions["true_class_name"] = predictions["true_class_id"].copy() + predictions["pred_class_name"] = predictions["pred_class_id"].copy() + found_class_ids = set(predictions["true_class_id"]).union( + set(predictions["pred_class_id"])) + for class_id in found_class_ids: + predictions["true_class_name"].replace( + class_id, codebook["class_name"][class_id], inplace=True + ) + predictions["pred_class_name"].replace( + class_id, codebook["class_name"][class_id], inplace=True + ) + + # Define whether prediction was right + predictions["is_correct_pred"] = ( + predictions["true_class_id"] == predictions["pred_class_id"]) + predictions["is_correct_pred"] = predictions["is_correct_pred"].replace( + False, "F") + predictions.is_correct_pred = predictions.is_correct_pred.astype(str) + # Get tumor and tissue ID + temp = pd.DataFrame( + {"tumor_type": predictions["true_class_name"].str[:-2]}) + temp = pd.merge(temp, tissue_classes, on="tumor_type", how="left") + # Set of IDs for normal and tumor (because of using multiple classes) + IDs_tumor = list(set(temp["ID_tumor"])) + if list(set(temp.tumor_type.tolist()))[0] == cancer_type: + # Probability for predicting tumor and normal label (regardless of tumor [tissue] type) + predictions["tumor_label_prob"] = np.nan + predictions["normal_label_prob"] = np.nan + for ID_tumor in IDs_tumor: + vals = pred_probabilities.loc[temp["ID_tumor"] + == ID_tumor, ID_tumor] + predictions.loc[temp["ID_tumor"] == + ID_tumor, "tumor_label_prob"] = vals + + predictions["is_correct_pred_label"] = np.nan + else: + IDs_normal = list(set(temp["ID_normal"])) + # Probability for predicting tumor and normal label (regardless of tumor [tissue] type) + predictions["tumor_label_prob"] = np.nan + predictions["normal_label_prob"] = np.nan + for ID_tumor in IDs_tumor: + vals = pred_probabilities.loc[temp["ID_tumor"] + == ID_tumor, ID_tumor] + predictions.loc[temp["ID_tumor"] == + ID_tumor, "tumor_label_prob"] = vals + + for ID_normal in IDs_normal: + vals = pred_probabilities.loc[temp["ID_normal"] + == ID_normal, ID_normal] + predictions.loc[temp["ID_normal"] == + ID_normal, "normal_label_prob"] = vals + + # Check if the correct label (tumor/normal) is predicted + temp_probs = predictions[["tumor_label_prob", "normal_label_prob"]] + is_normal_label_prob = ( + temp_probs["normal_label_prob"] > temp_probs["tumor_label_prob"] + ) + is_tumor_label_prob = ( + temp_probs["normal_label_prob"] < temp_probs["tumor_label_prob"] + ) + is_normal_label = predictions["true_class_name"].str.find( + "_N") != -1 + is_tumor_label = predictions["true_class_name"].str.find( + "_T") != -1 + + is_normal = is_normal_label & is_normal_label_prob + is_tumor = is_tumor_label & is_tumor_label_prob + + predictions["is_correct_pred_label"] = is_normal | is_tumor + predictions["is_correct_pred_label"].replace( + True, "T", inplace=True) + predictions["is_correct_pred_label"].replace( + False, "F", inplace=True) + return(predictions) + +def handle_ffpe_slides(pred_train_file, codebook, tissue_classes, cancer_type): + predictions_raw = dd.read_csv(pred_train_file, sep="\t", header=None) + predictions_raw['tile_ID'] = predictions_raw.iloc[:, 0] + predictions_raw.tile_ID = predictions_raw.tile_ID.map( + lambda x: x.split("/")[-1]) + predictions_raw['tile_ID'] = predictions_raw['tile_ID'].str.replace( + ".jpg'", "") + predictions = predictions_raw.map_partitions( + lambda df: df.drop(columns=[0, 1])) + new_names = list(map(lambda x: str(x), codebook["class_id"])) + new_names.append('tile_ID') + predictions.columns = new_names + predictions = predictions.map_partitions(lambda x: x.assign( + pred_class_id=x.iloc[:, 0:41].idxmax(axis="columns"))) + predictions["true_class_id"] = 41 + predictions = predictions.map_partitions(lambda x: x.assign( + pred_probability=x.iloc[:, 0:41].max(axis="columns"))) + predictions["true_class_name"] = predictions["true_class_id"].copy() + predictions["pred_class_name"] = predictions["pred_class_id"].copy() + predictions.pred_class_id = predictions.pred_class_id.astype(int) + res = dict(zip(codebook.class_id, codebook.class_name)) + predictions = predictions.map_partitions(lambda x: x.assign( + pred_class_name=x.loc[:, 'pred_class_id'].replace(res))) + predictions = predictions.map_partitions(lambda x: x.assign( + true_class_name=x.loc[:, 'true_class_id'].replace(res))) + predictions["is_correct_pred"] = ( + predictions["true_class_id"] == predictions["pred_class_id"]) + predictions["is_correct_pred"] = predictions["is_correct_pred"].replace( + False, "F") + predictions.is_correct_pred = predictions.is_correct_pred.astype(str) + temp = predictions.map_partitions(lambda x: x.assign( + tumor_type=x["true_class_name"].str[:-2])) + temp = temp.map_partitions(lambda x: pd.merge( + x, tissue_classes, on="tumor_type", how="left")) + if (temp['tumor_type'].compute() == cancer_type).any(): + # Probability for predicting tumor and normal label (regardless of tumor [tissue] type) + predictions["tumor_label_prob"] = np.nan + predictions["normal_label_prob"] = np.nan + predictions = predictions.map_partitions( + lambda x: x.assign(tumor_label_prob=x.loc[:, '41'])) + predictions["is_correct_pred_label"] = np.nan + else: + # TO DO + predictions["tumor_label_prob"] = np.nan + predictions["normal_label_prob"] = np.nan + # predictions = predictions.map_partitions(lambda x: x.assign(tumor_label_prob=x.loc[:, '41'])) + # predictions = predictions.map_partitions(lambda x: x.assign(tumor_label_prob=x.loc[:, '41'])) + return predictions + +def post_process_predictions(pred_train_file, slide_type, path_codebook, path_tissue_classes, cancer_type): + """ + Format predicted tissue classes and derive tumor purity from pred.train.txt file generated by myslim/bottleneck_predict.py and + The pred.train.txt file contains the tile ID, the true class id and the 42 predicted probabilities for the 42 tissue classes. + + Args: + output_dir (str): path pointing to folder for storing all created files by script + + Returns: + {output_dir}/predictions.txt containing the following columns + - tile_ID, + - pred_class_id and true_class_id: class ids defined in codebook.txt) + - pred_class_name and true_class_name: class names e.g. LUAD_T, defined in codebook.txt) + - pred_probability: corresponding probability + - is_correct_pred (boolean): correctly predicted tissue class label + - tumor_label_prob and normal_label_prob: probability for predicting tumor and normal label (regardless of tumor or tissue type) + - is_correct_pred_label (boolean): correctly predicted 'tumor' or 'normal' tissue regardless of tumor or tissue type + In the rows the tiles. + """ + + # Initialize + codebook = pd.read_csv(path_codebook, delim_whitespace=True, header=None) + codebook.columns = ["class_name", "class_id"] + tissue_classes = pd.read_csv(path_tissue_classes, sep="\t") + + # Read predictions + if slide_type == "FF": + return(handle_ff_slides(pred_train_file=pred_train_file, codebook=codebook, tissue_classes=tissue_classes, cancer_type = cancer_type)) + #  Save features to .csv file + elif slide_type == "FFPE": + return(handle_ffpe_slides(pred_train_file=pred_train_file, codebook=codebook, tissue_classes= tissue_classes, cancer_type=cancer_type)) + else: + raise Exception("Invalid `slide_type`, please choose 'FF' or 'FFPE' ") + +def main(args): + predictions = post_process_predictions(pred_train_file = args.pred_train_file, slide_type=args.slide_type, path_codebook=args.path_codebook, + path_tissue_classes=args.path_tissue_classes, cancer_type=args.cancer_type) + if (args.slide_type == "FF"): + predictions.to_csv(Path(args.output_dir, "predictions.txt"), sep="\t") + elif (args.slide_type == "FFPE"): + # Save features using parquet + def name_function(x): return f"predictions-{x}.parquet" + predictions.to_parquet( + path=args.output_dir, compression='gzip', name_function=name_function) + print("Finished post-processing of predictions...") + + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/pre_processing.py b/bin/pre_processing.py new file mode 100755 index 0000000..cc71baa --- /dev/null +++ b/bin/pre_processing.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 + +import argparse +from argparse import ArgumentParser as AP +import os +import pandas as pd +import sys + +import glob +from myslim.datasets.convert import _convert_dataset + +from os.path import abspath +import time +from pathlib import Path + +def get_args(): + # Script description + description = """Convert tiles to TFrecords""" + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + + # Sections + parser.add_argument("--slides_folder", help="Set slides folder", default = "") + parser.add_argument("--output_dir", help="Set output folder", default = "") + parser.add_argument("--file_info_train", + help="Set to path to 'file_info_train.txt' generated by create_file_info_train.py") + parser.add_argument( + "--N_shards", help="Number of shards", default=320, type=int) + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.output_dir = abspath(arg.output_dir) + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + # Create an empty folder for TF records if folder doesn't exist + arg.output_dir = Path(arg.output_dir,"process_train") + os.mkdir(arg.output_dir) + return arg + +def execute_preprocessing(file_info_train, output_dir, N_shards=320): + """ + Execute several pre-processing steps necessary for extracting the histopathological features + 1. Create tiles from slides + 2. Construct file necessary for the deep learning architecture + 3. Convert images of tiles to TF records + + Args: + slides_folder (str): path pointing to folder with all whole slide images (.svs files) + output_dir (str): path pointing to folder for storing all created files by script + clinical_file_path (str): path pointing to formatted clinical file (either generated or manually formatted) + N_shards (int): default: 320 + + Returns: + {output_dir}/tiles/{tile files} + {output_dir}/file_info_train.txt file specifying data structure of the tiles required for inception architecture (to read the TF records) + {output_dir}/process_train/{TFrecord file} files that store the data as a series of binary sequencies + + """ + # Convert tiles from jpg to TF record1 + file_info = pd.read_csv(file_info_train, sep="\t") + training_filenames = list(file_info["tile_path"].values) + training_classids = [int(id) for id in list(file_info["class_id"].values)] + tps = [int(id) for id in list(file_info["percent_tumor_cells"].values)] + Qs = list(file_info["jpeg_quality"].values) + + _convert_dataset( + split_name="train", + filenames=training_filenames, + tps=tps, + Qs=Qs, + classids=training_classids, + output_dir=output_dir, + NUM_SHARDS=N_shards, + ) + +def main(args): + execute_preprocessing( + output_dir=args.output_dir, + file_info_train=args.file_info_train, + N_shards=args.N_shards + ) + + out_files = glob.glob1(Path(args.output_dir), "*.tfrecord") + print(len(out_files)) + + assert len(out_files) == args.N_shards + + print("Finished converting dataset") + print( + f"The converted data is stored in the directory: {args.output_dir}") + + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/Python/2_train_multitask_models/tile_level_cell_type_quantification.py b/bin/tile_level_cell_type_quantification.py old mode 100644 new mode 100755 similarity index 75% rename from Python/2_train_multitask_models/tile_level_cell_type_quantification.py rename to bin/tile_level_cell_type_quantification.py index 4134cc0..5feeb60 --- a/Python/2_train_multitask_models/tile_level_cell_type_quantification.py +++ b/bin/tile_level_cell_type_quantification.py @@ -1,16 +1,61 @@ +#!/usr/bin/env python3 + import os -import sys import pandas as pd import dask.dataframe as dd import argparse import joblib import scipy.stats as stats - +from pathlib import Path from model.constants import DEFAULT_CELL_TYPES from model.evaluate import compute_tile_predictions +import time +from argparse import ArgumentParser as AP + +def get_args(): + # Script description + description = """Tile-level cell type quantification""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--models_dir", type=str, + help="Path to models directory", required=True) + parser.add_argument("--output_dir", type=str, + help="Path to output directory", required=False, default = "") + parser.add_argument("--histopatho_features_dir", type=str, + help="Path to histopathological features file", required=False, default = "") + parser.add_argument("--var_names_path", type=str, + help="Path to variable names pkl file", required=True) + parser.add_argument("--features_input", type = str, default = None) + parser.add_argument("--prediction_mode", type=str, + help="Choose prediction mode 'performance' or 'all' (default='all')", default="all", required=False) + parser.add_argument("--n_outerfolds", type=int, default=5, + help="Number of outer folds (default=5)", required=False) + parser.add_argument("--cell_types_path", type=str, default="", + help="List of cell types by default=['T_cells','CAFs', 'tumor_purity','endothelial_cells']", required=False) + parser.add_argument( + "--slide_type", help="Type of tissue slide (FF or FFPE)", type=str, required=True) + + arg = parser.parse_args() + + if (arg.features_input is None): + if arg.slide_type == "FF": + arg.features_input = Path(arg.histopatho_features_dir, "features.txt") + elif arg.slide_type == "FFPE": + arg.features_input = Path(arg.histopatho_features_dir, "features_format_parquet") -def tile_level_quantification(models_dir, output_dir, var_names_path, histopatho_features_dir, prediction_mode="all", n_outerfolds=5, cell_types_path="", slide_type="FF"): + if (not Path(arg.features_input).exists()): + raise Exception("Invalid argument, please check `features_input` or `histopatho_features_dir`") + + if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): + # Create an empty folder for TF records if folder doesn't exist + os.mkdir(arg.output_dir) + return arg + + +def tile_level_quantification(features_input, models_dir, var_names_path, histopatho_features_dir="", prediction_mode="all", n_outerfolds=5, cell_types_path="", slide_type="FF"): """ Quantify the cell type abundances for the different tiles. Creates three files: (1) z-scores and @@ -29,34 +74,26 @@ def tile_level_quantification(models_dir, output_dir, var_names_path, histopatho """ # Read data + cell_types = DEFAULT_CELL_TYPES if os.path.isfile(cell_types_path): cell_types = pd.read_csv( cell_types_path, header=None).to_numpy().flatten() - else: - cell_types = DEFAULT_CELL_TYPES - print(cell_types) - full_output_dir = f"{output_dir}" - print(full_output_dir) - if not os.path.isdir(full_output_dir): - os.makedirs(full_output_dir) - var_names = joblib.load(var_names_path) print(var_names) if slide_type == "FF": - FEATURES_PATH = f"{histopatho_features_dir}/features.txt" - histopatho_features = pd.read_csv(FEATURES_PATH, sep="\t", index_col=0) + histopatho_features = pd.read_csv( + features_input, sep="\t", index_col=0) elif slide_type == "FFPE": - FEATURES_PATH = f"{histopatho_features_dir}/features_format_parquet" - histopatho_features = dd.read_parquet(FEATURES_PATH) + histopatho_features = dd.read_parquet(features_input) print(histopatho_features.head()) # Compute predictions based on bottleneck features tile_predictions = pd.DataFrame() - bottleneck_features = histopatho_features.loc[:, [ + bottleneck_features = histopatho_features.loc[:, [ str(i) for i in range(1536)]] bottleneck_features.index = histopatho_features.tile_ID var_names['IDs'] = 'sample_submitter_id' @@ -132,42 +169,30 @@ def tile_level_quantification(models_dir, output_dir, var_names_path, histopatho columns={'sample_submitter_id': 'slide_submitter_id'}) pred_proba = pred_proba.rename( columns={'sample_submitter_id': 'slide_submitter_id'}) + return (tile_predictions, pred_proba) - tile_predictions.to_csv( - f"{full_output_dir}/{prediction_mode}_tile_predictions_zscores.csv", sep="\t", index=False) - pred_proba.to_csv( - f"{full_output_dir}/{prediction_mode}_tile_predictions_proba.csv", sep="\t", index=False) - - -if __name__ == "__main__": - - parser = argparse.ArgumentParser( - description="Predict cell type abundances for the tiles") - parser.add_argument("--models_dir", type=str, - help="Path to models directory", required=True) - parser.add_argument("--output_dir", type=str, - help="Path to output directory", required=True) - parser.add_argument("--histopatho_features_dir", type=str, - help="Path to histopathological features file", required=True) - parser.add_argument("--var_names_path", type=str, - help="Path to variable names pkl file", required=True) - - parser.add_argument("--prediction_mode", type=str, - help="Choose prediction mode 'performance' or 'all' (default='all')", default="all", required=False) - parser.add_argument("--n_outerfolds", type=int, default=5, - help="Number of outer folds (default=5)", required=False) - parser.add_argument("--cell_types_path", type=str, default="", - help="List of cell types by default=['T_cells','CAFs', 'tumor_purity','endothelial_cells']", required=False) - parser.add_argument( - "--slide_type", help="Type of tissue slide (FF or FFPE)", type=str, required=True) - args = parser.parse_args() - tile_level_quantification( +def main(args): + tile_predictions, pred_proba = tile_level_quantification( + features_input=args.features_input, models_dir=args.models_dir, - output_dir=args.output_dir, histopatho_features_dir=args.histopatho_features_dir, prediction_mode=args.prediction_mode, n_outerfolds=args.n_outerfolds, cell_types_path=args.cell_types_path, var_names_path=args.var_names_path, slide_type=args.slide_type) + + + tile_predictions.to_csv( + Path(args.output_dir, f"{args.prediction_mode}_tile_predictions_zscores.csv"), sep="\t", index=False) + pred_proba.to_csv( + Path(args.output_dir, f"{args.prediction_mode}_tile_predictions_proba.csv"), sep="\t", index=False) + print("Finished tile predictions...") + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/conf/base.config b/conf/base.config new file mode 100755 index 0000000..4ce8036 --- /dev/null +++ b/conf/base.config @@ -0,0 +1,139 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/spotlight Nextflow base config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A 'blank slate' config file, appropriate for general use on most high performance + compute environments. Assumes that all software is installed and available on + the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. +---------------------------------------------------------------------------------------- +*/ + +process { + + // TODO nf-core: Check the defaults for all processes + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' + + // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // TODO nf-core: Customise requirements for specific processes. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 20.h * task.attempt, 'time' ) } + } + withLabel:process_high_memory { + memory = { check_max( 200.GB * task.attempt, 'memory' ) } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } + + // ---- NEW LABELS for SPOTLIGHT ---- // + // Memory labels + withLabel:mem_4G { + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + // queue = { assign_queue ( 4.GB * task.attempt )} + } + + withLabel:mem_8G { + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + // queue = { assign_queue ( 8.GB * task.attempt )} + + } + + withLabel:mem_16G { + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + // queue = { assign_queue ( 16.GB * task.attempt )} + + } + + withLabel:mem_32G { + memory = { check_max( 32.GB * task.attempt, 'memory' ) } + // queue = { assign_queue ( 32.GB * task.attempt )} + + } + + withLabel:mem_64G { + memory = { check_max( 64.GB * task.attempt, 'memory' ) } + // queue = { assign_queue ( 64.GB * task.attempt )} + + } + + withLabel:mem_128G { + memory = { check_max( 128.GB * task.attempt, 'memory' ) } + // queue = { assign_queue ( 128.GB * task.attempt )} + + } + + // Time label + withLabel:time_10m { + time = { check_max( 10.m * task.attempt, 'time' ) } + } + + withLabel:time_30m { + time = { check_max( 30.m * task.attempt, 'time' ) } + } + + withLabel:time_1h { + time = { check_max( 1.h * task.attempt, 'time' ) } + } + + withLabel:time_2h { + time = { check_max( 2.h * task.attempt, 'time' ) } + } + + withLabel:time_4h { + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withLabel:time_8h { + time = { check_max( 8.h * task.attempt, 'time' ) } + } + + withLabel:time_12h { + time = { check_max( 12.h * task.attempt, 'time' ) } + } + + withLabel:time_24h { + time = { check_max( 1.d * task.attempt, 'time' ) } + } + + withLabel:time_24h { + time = { check_max( 2.d * task.attempt, 'time' ) } + } + +} + + diff --git a/conf/igenomes.config b/conf/igenomes.config new file mode 100755 index 0000000..3f11437 --- /dev/null +++ b/conf/igenomes.config @@ -0,0 +1,440 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for iGenomes paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines reference genomes using iGenome paths. + Can be used by any config that customises the base path using: + $params.igenomes_base / --igenomes_base +---------------------------------------------------------------------------------------- +*/ + +params { + // illumina iGenomes reference file paths + genomes { + 'GRCh37' { + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" + } + 'GRCh38' { + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" + mito_name = "chrM" + } + 'GRCm38' { + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "1.87e9" + blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" + } + 'TAIR10' { + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" + mito_name = "Mt" + } + 'EB2' { + fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" + } + 'UMD3.1' { + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" + mito_name = "MT" + } + 'WBcel235' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" + mito_name = "MtDNA" + macs_gsize = "9e7" + } + 'CanFam3.1' { + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" + mito_name = "MT" + } + 'GRCz10' { + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'BDGP6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" + mito_name = "M" + macs_gsize = "1.2e8" + } + 'EquCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" + mito_name = "MT" + } + 'EB1' { + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" + } + 'Galgal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'Gm01' { + fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" + } + 'Mmul_1' { + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" + mito_name = "MT" + } + 'IRGSP-1.0' { + fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'CHIMP2.1.4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" + mito_name = "MT" + } + 'Rnor_5.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'Rnor_6.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'R64-1-1' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = "1.2e7" + } + 'EF2' { + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "1.21e7" + } + 'Sbi1' { + fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" + } + 'Sscrofa10.2' { + fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" + mito_name = "MT" + } + 'AGPv3' { + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'hg38' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + } + 'hg19' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" + } + 'mm10' { + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "1.87e9" + blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" + } + 'bosTau8' { + fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'ce10' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "9e7" + } + 'canFam3' { + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" + mito_name = "chrM" + } + 'danRer10' { + fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.37e9" + } + 'dm6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.2e8" + } + 'equCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" + mito_name = "chrM" + } + 'galGal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" + mito_name = "chrM" + } + 'panTro4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" + mito_name = "chrM" + } + 'rn6' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'sacCer3' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" + readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "1.2e7" + } + 'susScr3' { + fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" + mito_name = "chrM" + } + } +} diff --git a/conf/modules.config b/conf/modules.config new file mode 100755 index 0000000..437f64d --- /dev/null +++ b/conf/modules.config @@ -0,0 +1,120 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + + + + // Setting defaults + publishDir = [ + path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + // The three main subworkflows + withLabel: 'extract_histo_patho_features' { + publishDir = [ + path: { "${params.outdir}/1_extract_histopatho_features" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withLabel: 'tf_learning_celltyp_quant' { + publishDir = [ + path: { "${params.outdir}/2_tile_level_quantification" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + // Type of spatial features + withLabel: 'spatial_clustering_features' { + publishDir = [ + path: { "${params.outdir}/3_spatial_features/clustering_features" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + } + + withLabel: 'spatial_features' { + publishDir = [ + path: { "${params.outdir}/3_spatial_features" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + } + + withLabel: 'spatial_network_features' { + publishDir = [ + path: { "${params.outdir}/3_spatial_features/network_features" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + + } + + // Individual modules (processes) + withName: 'CREATE_CLINICAL_FILE' { + ext.prefix = {"generated_clinical_file"} + } + + withName: 'TILING_SINGLE_SLIDE' { + publishDir = [ + path: { "${params.outdir}/1_extract_histopatho_features/tiles" }, + mode: "symlink", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'PREPROCESSING_SLIDES' { + publishDir = [ + path: { "${params.outdir}/1_extract_histopatho_features/process_train" }, + mode: "symlink", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withLabel: 'compute_spatial_features' { + cpus = check_max( 16, "cpu") + time = { check_max ( 8.h * task.attempt, 'time')} + memory = {check_max ( 32.GB * task.attempt, 'memory')} + + } + + withName: 'COMPUTE_NETWORK_FEATURES' { + publishDir = [ + path: { "${params.outdir}/3_spatial_features/network_features" }, + mode: "copy", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + } + + withName: FASTQC { + ext.args = '--quiet' + } + + withName: 'MULTIQC' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + +} diff --git a/conf/test.config b/conf/test.config new file mode 100755 index 0000000..db52cbd --- /dev/null +++ b/conf/test.config @@ -0,0 +1,29 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/spotlight -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + + // Genome references + genome = 'R64-1-1' +} diff --git a/conf/test_full.config b/conf/test_full.config new file mode 100755 index 0000000..227b4ca --- /dev/null +++ b/conf/test_full.config @@ -0,0 +1,24 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/spotlight -profile test_full, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + + // Input data for full size test + // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + + // Genome references + genome = 'R64-1-1' +} diff --git a/custom.config b/custom.config new file mode 100755 index 0000000..90a3cf8 --- /dev/null +++ b/custom.config @@ -0,0 +1,114 @@ +params { + config_profile_description = 'GaitiLab cluster profile' + + max_cpus = 24 + max_memory = 184.GB + // 7.d for himem, but not sure yet how to handle htis + max_time = 5.d + maxRetries = 10 + + clinical_files_input = "${projectDir}/assets/codebook.txt" + path_codebook= 'assets/NO_FILE' + class_name='SKCM' + clinical_file_out_file = 'generated_clinical_file' + tumor_purity_threshold=80 + is_tcga = false + image_dir = "${projectDir}/data_example/tiny_xenium_set" + gradient_mag_filter=10 + n_shards=320 + bot_out = 'bot_train' + pred_out = 'pred_train' + model_name='inception_v4' + + + checkpoint_path = "${projectDir}/assets/checkpoint/Retrained_Inception_v4/model.ckpt-100000" + slide_type = 'FFPE' + path_tissue_classes= "${projectDir}/assets/tissue_classes.csv" + + celltype_models = "${projectDir}/assets/TF_models/SKCM_FF" + var_names_path = "${projectDir}/assets/task_selection_names.pkl" + prediction_mode='test' + + cell_types_path = 'assets/NO_FILE' + n_outerfolds = 5 + + // Prefix for spatial features output filenames, else 'slide_type' is used + out_prefix = 'dummy' + + + // Spatial features parameters + graphs_path = 'assets/NO_FILE' + n_outerfolds = 5 + abundance_threshold = 0.5 + shapiro_alpha = 0.05 + cutoff_path_length = 2 + + n_clusters = 8 + max_dist = 'dummy' + max_n_tiles_threshold = 2 + tile_size = 512 + overlap = 50 + + metadata_path = 'assets/NO_FILE' + merge_var = "slide_submitter_id" + sheet_name = 'dummy' + + outdir = "output" + + +} +nextflow.enable.moduleBinaries = true + +process { + executor = "local" +} + +// Preform work directory cleanup after a successful run +cleanup = true +// env.PYTHONPATH = "${projectDir}/lib:${projectDir}/lib/myslim" + +// Profile to deactivate automatic cleanup of work directory after a successful run. Overwrites cleanup option. +profiles { + debug { + cleanup = false + } + slurm { + process { + executor = "slurm" + jobName = { "$task.hash" } + // Select right queue + queue = { assign_queue( task.memory * task.attempt ) } + } + } + h4h { + // When on cluster ensure apptainer and java/18 are loaded + process { + beforeScript = """module load apptainer""".stripIndent() + } + } + apptainer { + process.container = "${projectDir}/spotlight.sif" + + } +} + + +def assign_queue (mem){ + def queue = "" + switch ( mem ) { + case { it > 185.GB }: + queue = 'superhimem' + break + case { it > 61.4.GB }: + queue = 'veryhimem' + break + case { it > 30.72.GB }: + queue = 'himem' + break + default: + queue = 'all' + break + } + return queue +} + diff --git a/docs/README.md b/docs/README.md new file mode 100755 index 0000000..0a970e6 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,10 @@ +# nf-core/spotlight: Documentation + +The nf-core/spotlight documentation is split into the following pages: + +- [Usage](usage.md) + - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +- [Output](output.md) + - An overview of the different results produced by the pipeline and how to interpret them. + +You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/images/mqc_fastqc_adapter.png b/docs/images/mqc_fastqc_adapter.png new file mode 100755 index 0000000000000000000000000000000000000000..361d0e47acfb424dea1f326590d1eb2f6dfa26b5 GIT binary patch literal 23458 zcmeFZ2UJtryD!S#x<#o93es(Ww4k)maRbte0-+a?-g^xY-3myTE`8G_KvA54)F1tn})nJ5u%TA4Y;^!^{48eL_}p#q-Umo0M|F1 z74+PQh^X8N|9_jcWbq~ zzn+tZC9B75nKdz=gQ8wo9GJ$P{D~3knlI_`-PRhCw34f1oYDLr^;oEbgxa#A^J%*2 z>FfDE*(~JzKFs$t_oeLz))qDU?s}%Q?7b~3Y;lUi^Oy-2@3g?joA4Wkgb6-2=ih*jub)~7yZ`T=L=Z`B`{1jhkB-iSjea94&Eo9A zxN59pv1p_}RO1>EC^q}Z2)ZI;b7JV_x4lMr=Bker2+EK;8~!;JO7re*@ZkDmoV878S*N^yX(F@U1yqt?Is3nnV>7}#(5pk`V3C) zWhB8;CwWIwsVIjH+`<9=YA(j&3DgQdFOOGU~*`36wNC&QDv8> zr?h2PQgnHkp&t^S)q^K!68h~`$PjZW&-Wns;Zlw$M2sc z1xR!u{m|Kih*|Hht#M@eOMM#8O*={^6b9k5B5^eBsrnhVHD7XZ5BWO&F?q(>Y=QFl z`f>yQ9NCoxZCH-1F{#mz_j{QeyY~4h*VeyYZ#S@Z(Pnb7G=ud!RW)5svqM*&GI_za zzn;8LkOTT?``1Ygt6w!2;5arK*o5k15cdIJnMg)IQhF_zVK%!ma$z&jL zZt>Q{!PqKl^`Qw?nJUOEm@@qX(y(TwSJ~dqW&M@7-N4Wk_wC4izx(xJMrmNjsl$XR zCyK&INt}7@FzNAbbg-nW)sJ>3->I1+2~YdlPsaS}^X-H0GR_CEsw`PGjpq`uX}8VP zJ)HC34>D(z{KR9;E&z=@?@q_|I{NPOj~g>w!$gR?Tlu~F+L$Mk%}xQEm+{&T(5zkH zacVy0k3w!T9r*p2sgX@V;^+PfUYUrEde07XSV=KSDbkIZU!j!Rk3MQV=h-!y@kWVB zdYkmu^fiU~pp#ixe4hBEMx7^LdHa z_L*14aVIHtrsR)SO?=&kQS&JR#^AVvln=P=bUXEIy$QB&!s34znCV@y(C%j9V=}SU zoYLHn+-Lalm0$-=QQ}a(+2dR*{DPF+)J4y!ukiA_T%dF zVKEk;c?LWheG#A5{A20}CKjMw5G%2}cT5@Oce=wqdobHC70=kY7}dxt3diH9(Zcwr zCabx8yObHQ@#e_wjl%wp8s_!Wvxe5f-Duin@obgt>qOcqN$$@{X^C_rEDh3fmM;|X z$zu4;D`{YRbaJ?o!KkazII&|th9v5MG2Mao$ytOHtW+wo;XJJdtLuGjg;d020qT++ zpD}e&o?SeKSqR`}4`OdkWNC7K)Wltn zbwBrWGM;bBGm8uP_RiqfwvDD1f+uRX>b=nTH9Y%vpg{ka0e*E>%<+3!G3#s*-1D>q zHg~1@BT52a*L>mVcP>6y*0iX8@!3tDFJLE+sRlnU(cl``hF`0Q>e4i6P8|wKmqIqI zoY+a0V*Bib0`F9nG#sR(8$^!IWLR)cE8@7XZTN%L-ucJ{9yijy)w5Pom%XG7V<^PX z$Z$U82w0qgcGmld-O6*e)?pm$g@!6`Pps5SPKccjDf(|vX9zcLs7t!7cyyckZI#R* z#lj(HqfVeqyZ+Va{)>65sAb3IQ%a{9W^_F!5!;w=XD}ZUHFH$8=Xjw+VE)s$q(nt> zE2^aDYki5`e73RQ=DxaBNZ6CK?XKCv@V}=y(g?YHnFaHfXnl}Lo;36@?471W;&#Se z>pE*@M{Y?CevLG8il9#HXG#W3>;o$1``EYBY5i<;JlBqj2M8Y2!+6bPj1(S_bOksY z<34UQE;=Z>KiL``pYd}5fpOOT)GJQnXfNiAc5wgJ>F|$Eqw&D*Vmz+#mM0oFD^`-^ zB~SXe{T+5hd$gnKd7Afo9cy&Lii@syPDFDK)^V{iWEAEO@?xzx1bd`ta z;$(vG+=i3~9|D=GX%f~<>eOVjy~-yRAhLf2dR8V<@M_`C^ev(yOTg{uf=L3uyDb-w z&)l7KXS_HTo87BxI}fXF{ge&5p&IHk9M1}eNAwqw)`eZSOPFhqjS70{hyE@C{oSN$ zam*`-UH3RF-RWEP`^Su1q#n_J{AncekkV4m7YITf%QHBo60h@pk4N4O}hhf%rxuIZGiQpprVMal%h7?8+cY#L>pYnx6v!EnuIgInW` z)w!NuTp;fz9md^}*x@K9+`^2LO*bZp1^?BG#iS@(4i%AB6YP023T8Eb?M5K7ElSpe z9-wA22Mm}VwDkmECLd*}a=7bCf(}@SHs6UBe)Xvk(+hQ^^unj5JBeo$=><{4PBI%P z4_9XQ=XnE``;1Daa6f`~rGwNj9{YXY)eIw3G90Ip+QEWg0%?g=i$UHuQ?Qc0OR0!w zv?BvlQa!QMyI*IP!0>goBt$xo2^hlD&wRp?$=}}#?q~Yw z{**_|5&yL*Epz|4V#SJjg-lNaIx_{sCL3R=_VH&_;oOn5J2P=h!0enu-i%FAZ- zw`Hm*u6N*}&A7pAqr>-?%0(lveb{r8>hpDmex?Yo*8!-%1?YV0R~VEPBFp>)ba=mv+2(#>WEy0yxHZX=Cr2 zKmew%=^>HsD3BtRR*#H!@!TTGcI&fHrVh)P&|X;>)OHML+uWDn(dlsDjXa;5uBM$r zdt!r~ig?5iGbx!GpH+kdG8k0%;~)Q#0L6wFROJ}^Z%DvO3x#yNk13^&ccd&l)BP9h zD5cU-qZg-rV3Sg&?)`x}cI3`zw#zq{-eN4pNf(+?QuOG4oZ7zMGSVqOUe>`u=GfKM z{xPCciJFw9%Pk+uDSoormR&c=fS#hGOk=RGUtizBOoY^8P(>!Si|I9i=1ZCQbcc)5 zgE6UED;+b$4u&#dhZjdXwO3tpG0QaQwXrLOx5YP#TOaS@FP!h|G!z!Pbv?hTp0eQL zoUsiv4d@*Ck#ID9-ua|zPbQepcC4a>>9-bJApd()Wg%}hj#%A4pO-q{jIJ$f-SL7- zo&=keG_jhq$Ty4e|J^l6j6TQ=W)|~&Ei6gRn<{*^cFG*tS19#kHpMD7Y;wb~!3_%X zS_-3NQoGiWCX!M-Id;Nsg7oSi4VJ=Hi{bYNfjnmTq?IyK@@&_uacfb&8h@DIe70-Q zZ^KaT(4UX*vf7@A7CY;P!IVGIuXPRIe^&71Z1EyHO5&^=jUUKHF+h&m!4!dOA+!Ed zfA#uQ&p6vD7|O8(?5`bf8^gK)6p`>+$c*yG?Sw29;OD+tp}kDD9augDAEXWbSVoie zpHF1Wj8lWfIZ}mx%(2XREqF9!{fNd&iurAaoQDMCSNo!vRHE8wH%QLLZf9u;ADqnxOaAD#VE%Yg z?Gb?EmGbY}a0|vSZPlF3z6;Kf669Bf%h zlSGiY-}E4LFurm_CJN)(*l?=uX);o&R&qLuzENz?9I%S&YQ2>rVhx#c!hbvWLL!CI zA8mXM$zjnnJ#Me@-99}hjxCE!w8|9w{SBlj%Miq#dvS5GHP!DxO$sDx^4PF^#`;A! zb=bZ1pyj{R#9h$r7svB$QlJqeF1cp*ubT12UZ!deKFG%1N<@S2x&2UtqsVz zn=gF&$D4i3x7&vdoa#^cS?bQuP69OpspVPxm*%@DSWf!NG`o`y^R~o1Hvta;#!r%i zvEB~Jsi~sJ7Y35P!bf?OQin->fAk+TpU$Ow1st|l9|i2rrOneBP3&aDyoUj3K{a7! zOYpnJyYD#nr4GNJ;@$ce2dSN=eS7f-VptzM(|Ek^ze)mPVrpAEgrFs3mL>f(ZwriH zCZ65HdO0|W@2<+v9t?J=-4U9>bvM@@Ew4uVZy@c^Ovw9`k|$!+CTAn(u#4kC7TVTB zXuy#d+GC@RIMaPyp|Y2jS%RJkktCracCaLqfs^i^XFqK#3z+d}n02*VDF&My)vp)lNzWx<< zGB7hEAH?7_joYR?>+&+JIas*%Oiux%kr*X*B=8N8Ulowx0MkRK?pR)K1F_m8>dSe54 z)48k>#|F!OV#yOs7xQNQ@1iun5pl;py{tx+o044?r{W2O{f}3r{#QS#4bf(|f9R3y#6*0YY) z5Ey{M`dj)yHl)B{sdmvti^b0IE5xFx%jJM&5w69;`PGy0vGk2ztSW|5H3~zhXO?mn z+4mo>;Y7=4&gC}HifyMO`#70u3H6;0|| z!l=0lP|zVF`bfxm{%i98943^7y4Iz};Z9F$oY3iUI*FIsYa=o=nS^d`;3?*wDxi&| z=?oqs6uDcd1e_e5z7M5q(+I^PilSRE(T6%z<=U8%sq63V!wELY9Rj%#Y@2Y+TEJ8(f_Kh0ih?l6E6~wDl3~?-5%7>d{ zKs0XHUeORoi5+U#M{kE!Ae%|)^dabh1DsJI9N~LVXp*8$XlOfc6J+Cc?}SM zsc3N~L7hzcpXn2>b(_YN=J*C0N}$f_NINTiV!~L}nA{wn^XfBogd5hu!G?*THg^mF zFJm@9m{X~X3t5{7 z#lWIO++R8;BTByGl7U;fz|JBB^*4R|bLvm18x;DF*U`=kyxbH2nD*RIH5AWfJ4^5o z&Nr;*|NreNKo$fUI5}~n#Xcbjr0T-7MV;wZXA(QPt^`x;=ZK)5^`AFgQM?7ry_(Tm z0|EhWs&cYJW?|uvc3af(tfuyDf$28~R=HOa#}3Edru##Wwm0a$Vnk=_8+eQ; zfyq+GVt0Twr^QS*HtI+&&>_<%-Gq-!{iQr-3LYn-6bqW0VW)>%iat!2IP)Jd+LgnS zgI+jJ-I9HMJ8Z*$2FjwK1T0RpF%U`&x)S{3HqRJ z5^;r?VoA(k7*aP@tzB`O5Y26jv#x54xNH;E`KzzLxC)FEnQ<}IR#w*>9sq|zFzZq< zdM1%ynXvcLfZ{Xm=l(Op?=XGV8`BwRiQ%@@A-GnjD+y3K zN2Pm011b!s`3368%P&MapW-PDulXKfpeyRXNjN`lKKgC%CplwE#GrRw#0FE#Q4>R+ z23B4CmO%uy8Y@;F$hCHU6+oJ}_cKgm|4Amr{$`38ue-?+GX1T!hd$w@x=z{w30Z*W za@$MLl^=f#*oR+8(&a&`E@Bj{{1O;DPjj$g9U7~{m*?^Tj}Rrc^wc=(SycXVT?bW{ zUus*6{74fo{nOh@zQyv0g{)t}Qekl*>KXQYCI9m2jqge|&Ntj{V?gLs*_GkeODYhf zW39Q1L1~vk+#E^S!nCyO&z9Wh}2=K}`9#{=`j&)^}8=U|lz}DqgAteVsos){s zDhK`>&pK%cVuhO7tPu7@Y4|yXAdHs!(uKDuLL@i$Okc6Gs;2456Br??ZNZiONAe!~ zvY5w1(C)E9fRmpWgWU2Su0u6~9{@wIm<-lha;uuEN>&C^FJ#^|oopkg``l#i0&{OX z%rI6Q>l^9J++K19D;HrFU#V9o0M`MBTT#-(q&A{|n-`T~CgAFET=$E_&pIQTPE;J#&nrwf2N^I*d zH)ev~7d=Sy8<@syK<`PFvNtyfa#8^JceG^ua^o%!fl6R&j--jGkz8wS`EgfEZouOD zr97H059Dj(#$*$-!UQLvb92wS40!wJc!4K~lq-K2h2rXunCs?SjQERnvv9Fs?tF;y zWUTcQ&PtDMbsUY6_&np`UGMS0ZZIhnDh~p{`Bryj7XS~*R}%z6 zUO^hJn$_-CW(;$)hHu0ej1BNqv^o%*D2gR6zUvCZyw)ddNB6JE$;okhf7PEEz|dRN z$sP&o`MU(L_I8mDW33;)3!U*;HRm$zVV%%zaDn^*Qj~RdWdFNb;^fRhnF&{oeY-tv zq$p~pZw)Ls$EWKsEZubtx_9bpdCfsjdy*<8_Io8VtCIC+8kk@Qxdti>xnu}nRYJ-y zp8$3YP7u;u+YlPQ2`o_>S?mpXvd0-x!Z3=}>ceWDg*e)+#wQLE)Uwhneo z;*y`VfoY<#lwT^k4BP(ytfI;M`FoYsedi}L{1V|Ho}ciBs=`@vtgnieHdpWz%Vyy$ zlnn?k0KJWOnlJD9>6y64*X=G{lyl&%pV8Uo&>tXw%1za!6*YYVB$jR$Y0XhB#1mVx zvjd8N4X~{Dd&28RVEkCw9TLN9*Ng!?9F88l2Bl)w%7!97mtx5(Qx%1u6h+$OGa4#qGGGI{Pj4d)5yg8F4O2sfu61u0uM}?$_nH8=0St?`ogZ@1LAr@*uC4Z9(|dIQ z?OH<_%?PD56K*Kty@PQT;W#)tazY~|I7-aq)tQ($$#Q?{gEbJwJK3mnk)|l>XgmJQ z_POHzee+4NEWu0i0zUFmLTF(zvD3B%sp1_F7 z<|O7{-oZ2>t9k~zX0MDQ(4&(YZ#~baV{$ah?o_K1p$Ad`PAvgtuhW(xO{@bMjNb>Y z-k>lsDx?xX;x5*9RSpJe~BwLtb79%{p~+JTs5HZ&#({u>j3kAOLx*Y zW{7^+`OD%vhcxVW39F$jZ;I@H`3X?>Wwt@269f1o{V4-t-|dX4x7L3j zUHltoa@jqToWvn&=0CF%6%D0h50m^)qaXkRMC&Owv8iG~$}1PBgld3nBE#Rg(5)8n zga7!2@yjoBBoF_e3M$ongy7N1L_hT@!LUaCXX6QLZFKcq1r;;Z$sca}zfwaCji7PcbfW7H9p`7Eh$-j*7-=%{5f&}TidFWiMr=NYvc}Q@gh_z)<;^d&F zd@za3ugvK(BbprUX|)`Rk0&+6)#sm5S8a7;dzrqn*f)iXpvW$BVu6u)bR+ywtGne@B61Om=Q)yvb`45S}|LKt&5@)wSOfk;LhZ^UofjlQz0h zm)>a9f&40n$;-ndr=xntY3nOFGmA5POfiIsfgTzT*Cl zU{P;It;qo}n}IeEA1&?GRONCJp3=_!ce2$kKRZonNV+tS_uFPWzeS zhqSPws(Jp?TsgNT7yGtphSz=h2-}y#HTWNE#@LHFs^pseT#RfN*P8yLUm`jG1N5s* zfU25qv2akmjD=Q`s4SJxi@i`xIOCdT5B%W6wj1Fz8)Kuv*iB`}b^(em~z zz4~VcUB9M5@W}s3-SOWXu+*?)Al7p)Bw?jh8_#s)>lYp{{b%_vCY00=iC@I3$FcpY zYuOjg948l-C~}cDxL!%j&X1(H6ZC7U5?oVLQ<)zh*qg)k6HdNPB;PQcbVRXucl7>@ zE`Ga=^8RPrIRE!3E#e-v8MTy%%a1yk_k{s|V-=5ML7(Mg#S@LA3;rEyjF&X1w*^R&VJ>2%B@{=W9BD)oa@0!_Gl{G8Oe+Vki1QQWd~<<~Et zEV_YlJ=t8VXv>#L|FKXIJ)GZ1(d6xUoSPZVFOzMhM$6tgyhWq=@}=HzWm&b4o8R}L zQd7<0PV(LqaHYNNcXtTN4rc2ov$)VeRm&}XS-vamGB^G4tspa#HrPa5#22^pb?s&W zS%!p!fba6R+WLMjkeUo!qpKob}#cMpU4(`C+U6R8i>qlJ&Hbh52enW<`FmyjlhwlfIlxyu$Pg z3uS-Qau7K~%A$hBFocIe2<$LBIbEI!uddh9(JX=++R9aM|DO2#5*qKh#Zq^~O40f6 z0#s@~v{DPy=4^A}ieKe(Idu22Ex4~>p=#u?w_Lx>bHE@Z4Dh%iKrDJj2IJ+qNDIxj&WPRXRSaNz$JyFkpFK#gLAB6G;4KKql{+5w z{2yWKln-fjDCc()q_W&mmIx?JvpXPb{)hR&ok40*!M7lC!&?b|=efwVb@r0;FeD2( z*x!h~5OA8DEVr>6PS6o_oYt+7HY+d${lh@ruB?hP=`vq;@uLNGIb%@~*X54+`NY0- z35nZLFQArwtL~;t?sb(T6k;wi@v0FFLV}%b1@;p|R%u%8ROV= zRWO3*fG33>>}We#nQ5Vk3gY2ODY5fL+-E@ zvWG%=(;1n3UEEjqSDn9V_C*FMSXjR{uYKa`>$>D#@FacqRX4qmy{)y4&Gf)@V_BVr zvNEa@r<%e5HW?jhEb!SY6v|~N%22Y0992I>~ud8In`Lf`QStH3E)x@G=`2&AraN&V){PF%a=v)Pu{I zuQ7a;TZAlAgDiVUO+`B+z-8%M0kCiylcazP7I(w|^h*D4Sn6R#-jd7ZMN@iJo=6v2GyL zo;~Df{e7CCta*U4B1pD0lfi=EwI3CTf2}#(`mwSD-u-%XLU(&V?BTG?P-Fx}R5*E5 zcvSdpxqh`s3e`yRJ6%Efp|NYd2}SjJ)h@$9391YRLSU!qq4E=W9yx#}_KqRcG)(~r z!+&i&OckDJQ2El}fI8mdeCHPcJ2=byp-dT&ZFDzLuqc{lvh)^vKB2 zL}g}~j~QUN0Fo{!0BTTKwrDjx#j6KVb>MsCz=!G& z0?uz!q)+3>Q|KAM0zy>+^zjMt4}XE)t2HIfc*Tmi?$;KdI7B#Aw9_O-Zg>98L}4}% zna0Es9syWr5+f5RGVqawtNUt}*r|Zy#6ay+mEGaSGMmMOW%88u6mXzDD_wlGT6!zy zpLOrO442P{0J&IYJjqwrVrEF87ZDTT<9iz5xv)C#pUTTj+d73+z7GI`Ehx*q&zxS(F>^b?4*udLeSbU~XBKKi_PI+| z`R!s3tpv7gX^R3~Cce0vX(P9@UCS)XwG6mNX_eM`6X(`UW>OMp*nTlrcUU?`gCzDr zKR0P?yj9z#ME0=e!>GupM|%&t{Qcx)sN)wVzW*5E>yxt5g6NEc!GR+F(!Nysd6n&^ zN?K|Q@t>y$%H^ z1}}eMB%-GY`CK5%Pj}AkUNRem1zBUE6y}0KA;6;dZu&VyB`KCwPfdQ5Xri>Osl*$@qxi zNUlL!r3OOxC4C`xXPqL4Ec)b`ajpfaw12E4xMZ6=Yyb-WN0LL2RUzLj zAKS$6X%>ekm|3yQ$#-`3N8ah|B+0f4bxDc4nfJcHZ{dlBeXYRL5bY2afSAF|vcc%G!HPxGS8==1)_U|T zNvWWGt}f~OGmCtqW8>q3f@5Go0Rce)p>g@dgop$3UUF3))$Wn6gRX7M3GQ}?tC)i6 z5#2fg?U#)GsvTF-;w zY-Nw9hPGMC9F9(W5F-PUEmiuS(F06nlcE{I)}b=%A7_~A6cEH$BClS~DB|X6Z*IT2 zIpOX|#S?qiLR2Osk#^=DtNG&ym+&FR*Kv8P<@ep!ZLZtJSjcEO2t@V!3dE-*!yhNO z<`xWq;JT2z{)iLD9MQ;&^p<*B%Gv z9;zH_>TGtlGO@9MT_xDkFS4=QaZA)){{?|_B)8Hw-q)H3IPzKPiHM2|2?0GNX^+EI zRf5>q`4yE?GgaPuK8|(quyuVfv-aF(wlXs_w}4}Na=7tnIA2P*pcwxEhcBp%Q-6rI3Rc0j@jnbz>h=|(@M6C7U>fx%lJG+#q2Q4af?@H7>c`6Fw&JpwfW1WFvJ!J#H z%4DH$Nww@r6h6K-1K$M;1QOi8g)GMGRywKGssy2=E7s%k;ESt|W)#O-pRtb)vf8-D zxR2gI3De!E>)xMZTl>m(C!Tx|_c}u7mC!FmY~hT4&*t)mO76L0VQ$Zm)=+l7>+9FH zfQZjFC%h{enbPhuNz~lx(beZsjm#JG@8B$iw_cTSX-?0fRc}lkFJafCcF=wqJsUd8 zMn~$&N!wK2xp3mXuom2=TlzBdg~W^u`*x0IxUuITUpwpCCpIqO47DsRfB}i?8mn+k zO?VOK*oa)bFN6F7oN04eyGiZR6q#;01`nk`g-ro<5USFo8#dEMz{N z)FLtwpl>inBl;{0syyqD<@D`l$#Jfl)EJHXIv_2TJFdCbB1tJq2^~2}iq9XvxA^o{ zn0YLREmF;vJ(gM2^u>gGlpZOM>hd=@e@%v3L4CC$gdajz11>;t>9B37u4gN+c2EaN z7N{PzCO`Ov_B8QVS#5&Tgk_TYRF@xdXvUjab#=&lP?prpL~g4|3*W;OC@JF8+0RZoP6YS5=9t%X5j<@=9s zJZx5j1kEdx-027b#7vEm4TRT9soiaOv=y$Y#MT=^nhP%|fDdU^7Ez#Ft2I{)2fQ7` zW7SkW?%wkBWnL)w_~|{}hkUWMk@uEt@uS1%?(3-dK@CnX)?b$25^pIgnsh^HS!eiB z?gK|C)llrf;ga;b^r9EOF`p3yYRe*y*MIBz1Bd-qR8TlBdJn2ur@`?phF`DfaY8;D zCwmvCvRQoWVlI$tetKk}o?MNTX9H3!Y@C`PXWV>S%$VZ{%|p4jHr#UH_Ryyow;{{;KtygLxrG7(#ca)wTYK z-Y0sN6h;=V$f!GPone8y(zPnL+1N>PyLSs(y=`1y*FQ1lR8e`3s=cW#m$+c=3)Tb3 zN7!8_R~a%Ek8tTvTN6~|O}BoxmiKrt8Mkh0)vSD{hV=%yVvnL*%!|m2!23pSnTfsT zwQ-^GnI8{pLlWXKtGU!5h-Pk2LFIGB{oj=);~!Nlji{=PmP~Mqtb8I%bKzXfV~y`v zhZpp~H7qb%5D%?Sa5$&Vmvl)54qk6v;W{B~UlL4_ z81zf;L5bb3SJPuc^~%Ua_>tB)$VLK>FZvy&b%*eB+g)qdbU(k_R*eJS(gX< zJxL0apH$ji6sKDr)n`3{aNlN^Qwkhtd8DRdnV96&?L&8b5Co{7; zvmmb;3CdwVs8W1GMY~|zn1^&RO1t0hBt(ULtGJTf^IAMxRpD7HU;6{ij?XXdjHv`a zw9!c(a5cYpR_vk~eKYL+k6gM+5023LHvMEY_p}y=4k&Q!!C<*zC^2Ia3C3Ji zL1sbM+*p_j602gKXP|mF$s?~%_vnUv zj52~Vd_MWnLq+!(*+*-Lw~%K)_w>^_onjFhcBsl-1z4eAVzf$ZoD9yB+;Sysedi;%NXg8B1{e-#F_eG|zvUc4YC2OlIpARjmdsP@u05 zr*U3jsq00uHQh{r5KWSeeT?KjD!)FjzCJInzFM??L^jL9NcW`?Lr-^4X;Bzlu&Q?y z02M)ULBT=3$s#1Y9wAzg8-+0n||g$cI`eH$?LAzF9rpS6h3c^3UB*o~o`&^2bx~YDhrzULrno%G+^r zq3*RFmK+#R^m@8?svWLq){v0z;Az zxet5`c$dkiO>9f|6fbU>MAIx-Kjc(r4SckyK$1&9Ug3)mVCA8Y1>GV0bcjayWKU?1 z;d6`Ui1G&YLMmdtb&4SB(ffffFqD_1Okq%F3-y=7Xr$+V_G^RS{QgC zXKOBBq9L5K2Qnz3y##l~^f-q^dVo0JTO6ysmtjFF?tQ4=Mh9FhB)1vUcK2(Quo8ja4+LSJ)Y<8ba zuA}O{%Nltg%FD9=r+$Zri;I)XEgq8j;?A9Ap0;b5j5DIM+@eRt2of>UaXBan>ZY7* zVXIJgT25e+vU`n3vm9;wD-XX>S5Izts;k7?q0ifUbXFZ ztu890yFSO?daUUr!gp4FD4cm`X`a_ImZ)oY+O^`2sgS=Z-sfHvxbI807yFk_pf??D z)@elHpxFmUW>0G7ey-bx)DpdGO}*NS(z-#}PYqNxLg1@YN}fvhUtBLqKc+GUT;OW% zO_B<`R#rcqET`udx*1pLFro0I)_p#G&G^C(J)_;ph87-;WP@^*-yrWnJiD`bUJP4q znYR1%sd_A6GDQ|qpc%2A)KEGs;Y;857S{2jmRaCehP?GUgH%@%HTz-B?uYLBrVgP} zH@h;%V${F6+&AJkBG1T_xqmSr-oU0c++uF-EFD zir8XIv!Ke#t=O)W|8PyRa?ZUc=)2$4uI5;dauysN?Iuy7nk&-rwtj_ zbqWwtQli>QcMkpbLD<<#ef^2AtKAu7XV^+t%ng>C+4%Wb9$F58#E^h`#n9f!Ps zj#E`k*Ev&FK`3R|?l*-YBQmL)w`1e~thLbiWK69X#vg3g_b_#aGcF(hyvqEk72SD; zu~^e}9oE2m94b1C2NhicobMMlg}U1!FA|mJle8de9Xe&=-H(MvA(68kA0+z|@_;-# z&(b*W+h^U$FizY_L_j1L?db`Rywq|kJ8nKA;QjfTaq4P?Nw-t8PTt*s02E}f>sbOX zogFNsq@})oI`S|>iHp=g?5*Ri>{ zfB@dk5v}dqihux<=+%{)tOw&-*p;K#;k0?3?5LDv#-^~Bshk-i29xz)oSMVH0{UfE_@k=$Td6mLADmA5HCS>H;8Elg7$zuRGQ_PzI@ zO7f{m&I)ngat~(Q!A^05yQ_P6@m+rB1*YFo4Y=~o+^59v4+%;&=jKhGbUydp4sH`1 zy;I`gK$wj(W`yp3Yj2)F9^2eqVW8uZJUv^BWHR7|G0X^Vuta6p*nh6WK_UPW?g|4H zCB73}#_XrDiYLG?L;{a;A`xflU$&e61X|e>FFS;FXT~~Nej^;8D;T+(JOGZ)-YCl! zDic2c`~DhIAgQ(OXEkNRICxKJ<<&$(86$}P>l1x?yCEt=imFk`Pe$TW&4$L37fnx4(%*=smL>0uH114m_}1+sdfuU!A0Zqzr@~p)h_Rae)3fnObHlP6C?me#TrO zCzi%;E6iC);zLiV*o22GEXIF{NL2tM-wS{K&aCtKGNF+iOQ+JaXYw|H4%FRB?7R&T z1KbAY2p!11zb8icU0Q6TPkZCL#ztpG;uZYw`xg!FyJfa%ZgI;OhQyI`fsLCle_S+t z4uqjjj%#Gy0#Ipt92R{W{euP*jXIOxh~qaUFM9L1FgE=XM~3_=Bba|6C*-;_c4HdFiehcxh0 z3i5W02=DV{(OsRR{NTp{O}%1D0O?=QOrHWG;?)^(Uyagt?*2oVuw0Pnoh8{=0EzL^H|PjFP(dF&|L7WETT0GcVgY_ zx1oq}^k1#{aimB=*)HzvnsDIHm*|-4-oMfmwO_ThrZR-9o)Q(i2K8OOn)fj<5|I>i zrMN-NYx$b70)BeTtJLb1l@(5>DzdL{44E$Db`c|6v{j8rk`njaT(d`!Q+zvdV+~uc zwOi(`abOznKOr4><!y3?&Pn`#_&3l#Gef?)=p3_f^Ui;vfzaAOR#H0C- zC_m1^677NRcZrEQlhb%^AG}2eIicl$V9+BoV;Y&B{w1=n5~3`>l3tCJ_iei91O5sJ zlfRNrKdWsWxAWWhrxQmbuci*ftO7n7Oc}WO%lj>uVaUiDKPF^(#js~|dl-WEB(b%;R&%wBZo4s*Feg>11~T!zk!KqRO#H>GQupBCvQnt=r+5tC~|_jcwZextGmQ=bxnE*pJAI!;`6FR9y=}o5@Ho683hnm=2#mq1!K9 z;~t#M?%xqQa&ju$A*O`A5Y;)3bM=^-yRtSfb`+m*&?NHD1^&k_^1V`zUUp zBQjO}+aSl}wx4UqTg2FEd)wQlHv^*CRVd!3FhGRo(ku4))jpO12ugP&rZjKiwWfRW zYw>!=HK|cBWxk2w*r^o8&xo`u5~q#7C$1%JvzI7GnjkBxN}y~)MsK5FzthqT)I+i9 zLQUJe#tLyOp$}IIr$A@HkBqga9H3%Ak12)kQ{#!2%+*+9#70XhbyV%2UkvY~D0|mM zOicCza3cpNf8-DDqMQ{MkW2mhk21pBOx#yO@k>+nz1ZeIc+LzQXaBES&Mc^@EREx+ zqiBmVE)B9tyJ8C(1%!qWVxu&JY>L`J5QAF>)IcL^2uZMMRMdci4TdEsixgYJCJ-=e z(Lp2&ix5o$VGm(RSON)Tn;Yzh>4%xBd6>6bx9&ano^!tXf8ROv|DAg`e-7-iRZ8cm z=ml-2W49d)ss}v#)i{V&<{UK+J~DWlkr^ixT(|EP4_lGEv+7l6mX7 z`rnoA>yKLGlLdp#ymRS3uTeX~bc`pDe>eR8u{uRKGM^xch?2hX5Bxxz6(kXw^chB# z#7h9KbJ}H`x6PI{mOk`b>sfNpaaH^>y|DfmqK}?)K;U6OD{UDN0WtzaUnVZ#(spqZ zVUr8UHtKKJjt*vN1d8xgpq!jad2C3(uDSb@6AQqAzw;SdN2f_9m=Y%6(PT^t2e zg=!ibR|V#v11NDo)>*m?5o>hTQnM~G5obZpgu!tGj(YQzF70x0uAV}pwc8nXX9bNO zbd)kXD!8@U4%A|o<87&s*`|`dnky@hr;;ZAo2~Bu2g7qn%3zfDbCVL7wu5 zo6Tn~<`BAK((ct9AG1D;F6BcA^^r>vEU%LrOxsOA%-~5M z#X&|sFPm7+R$g01eYw6pxAtP}a&bw{TPi%16;?Qf0?g2_F$#<3}XnXEmOcm0X z!{Mfdfq*I2fU-a1TZs929@5Rg{4M{z@?9Cko|M^ReIRLnw|jnGRaL}G1ibFOa|A7s z+co|6Dsuoxs)B@lW!!Fy@jnb5RF(!^gPXPin?1IG|04fYi3yRqp(DWls)4f1ZERc>4-}4==@QsXQg#VCX`Pjnxeb({{Mj4zJ&j-1gzqTJ&ZexJiN=qXShYkaMiouM$* zihdgSA>BBh>UG8sz{fP)%#B>6)ZZ=Zve3ylD#}%J_s_FUjp|p?zS5nme$D^s9D%?1 zd2a%1f&hF>jr5)w_Qg&=>>L|+n_ZGJ{}HuB-aWy6I|{a6W`Hnb;cfm6{HJ~AA5ZV+ zO^P4X_D8eT5KMzCi0L0n3XE^`Xqp2~J~>=whP^9u!!3KaNy^5JOLz)Qwu7R8tf2ks zjisRN+T82EvVNsTX1X}xJ+r&E1Ana8Qpn2QD&fVB#c4QXwtxn8H8-fA^k_PfU1K3X z>IqazcZf<=_}R)j8P@aQ7;I*x%o;+#m133p4|1XdRsx)DWgq8qRCq~o16CxrvV~U` z$2#Ub_snsmq87&UH8fBu1S$k8W-@S#nO1mvLoQ#oa#qzo1j5WsbiT7n#x9E6xctup zJJ%*Op$=MhR$JZqbv_dwGf|=jmqw4H=Qe2mw@dI%LXLx+E_G`7=_yvYv(qNF3xrZR3f^9WzweTrZ7WqEQ>&+*-xiy?FBw3-ZWJN4Th}bQmbtp<+ZqlYjQPJ zzNJfa4MuhJC8X&CS?MdFHTA9?=isQw$nkr*(2+Po!G*E?U$K}~)F4_CUzSe8@O3kZ^Er5IyP;Rw( z35J!UL`-m9!A;qPy7nr*dZ@-uSCrN8P)B_V9{n(?zi#F`+gKxs#*j zIH*Icy{ipTSyFy2@?sB~?5qc-cE2IAHt=n!gOV&jwpC}hxH_Kx% ztE2W0xmBmGr@cJg0cyO-?r1X(kr9xzu3+5V>1YzBtuK6Ra+RToix@7>2?<#qlBORE zbPI%~d_ybB0wTJa@)1vVt^ENOxF^N8TUJ5l82Ua|j9w5GM!ns$6;8y2MsryfV`-qN zEznw|%v2>{C)I{qY-dkz`?}Fkw&fQ zBN#PretyOeaJs1{;WawCpt=$SI;XBPp7InnGa1cDG>a+B>Gj%*6DIE9rWl)H8{q`X zVd*sdD=SM1z|Vy6zDVL-OqDUa_)7$Y%8SwTNc$fK$`(EpOnd?|qD%^KF$$pzZLs>; zv5g|58uwUn(Y{xXl&jn#G4$KyOX%KD$tr1&*MWVUnx;mKg3#9O_l|8-Q|n3o{>>eu z!`5^oYumbF>)9rC1!*L0!jnc)RWy#I)ou2c_^7-jK29i+|GW6{gJ3&?o*?PGQU4@` z$7-B=gU6FGBh1l6I?5Y{G*rvYh!1zuM?w70^DH5@`^PXicUM2_WGwV*Cy$rqr&KUs z;}joZDc2XLy+|3^isfRqI4kTS5mliCSf3Z_X+6tS(ggtRztKx~?*aru3zmUEkLmby!sE-ZloZO_Y`t>6Y$Ly1P@lk?ycSK)R&6OFD*7$sq=57)m6D?#^$`jN9!w z$Ftw}yzlq@^{wmjQf8PnYd!0E?%(f@$3O)+@w>P1Z=s-|+?A9NQ9?mM?L$Gi>i)-7 z;FZH#{oBA_R~(hZpP`gM2$z8$uA4oTeTsro7IypWIV$k;%@-1yjwmP?PVhfhrcFuQ zP*C1rN{T#HanoBrM|UIK_dfItqc6S?i^K#wb=ab?`wf!gEn-xkev5WY+aryTcai40c^)|>K>E+ec<8oTH!6Jvz?Pot=)BPAz*Z5>N7QUnkVti;^*btsSu9JUB@m~FS*n@cgXc6=9G3|4JYC@2aKBbRSEYonlO za7Xp=p9IuQxwVwM&PZnCJ#%x~OjH`hZAy4prD3VfDMm6~t%mQtl1`0vY z*HSSM%jBKyrWm|{+j6?LEI}Y3GvqKEDtH)kdJrmQRpWguolR0j=(SSeI_c4Jel05F zE(*$y81yR2r!Hccg3dmurS^Q(HErm&J9Lcb19agHm=hjsYU3Xc8JP81a5~KKILPL7JFyC z^*y&LQk#x%OoY^&&%X9NV8Xxp!e{Yo1&Fv(yp%lKzl_l9%%8x6n5Y`}aGHU!@%d=C z%jwtMQ?X)wPTTQXsI6($fxrBiWKUnp@$!V6r|EpIV72dz`))g5bBFxBNjs7q0h_?| z+eB8$4^{il7xeGQr?`&Hv+-V>O$Tf^Z*KOwdfAV%mO|c1H&BWl2sj+taB>rPpM2Ks zBTjfYnw03!%t6XgR&N&9DCQ*5^#-(%(Jz$S5s>P!v_TB(teM{aHrGek#kJFI=zD-| zcF#h8!oH(eZMS`5FU^Vlw!V6P zQzEMlGS7gS9xjcGDfav+vr-4~BAJaDGUC(`T{j2v{X^#xw?pNF?_27&6{QB-d@81T z-jvQ!gz*74P}1rns(}HmjXUJydQr5B-n6IgyBo%&<#RShWtQss{dV*2*RaN!muBb} zZBwb|QQl@PVS=EU>8^+Z)QZ_ATzx_hx8TNFo3PrwHnftOgs4nG#~VdD!^6)nyJlbO z60GZ^q1Vss__}XBJROZK>0Z}AUiyRIlw@c7XzjF`2{syyG6|e@>Q88&&ncr@ zyL*nFhnc(7S6a{Y@q4H*1@~P-uU$@Y??fFAT^^bIgMnpt^lYt6P)Fa+jKb4p zZ?a(y9I-9h^0XbT>Ehd`CI8bVkHh_97f{nGrvBL(!@$zC_yMt0=!XydN3CR@_mZc# zzSR&{_SqO)=z+GUr^3#2Z|8}7`RJTNUqcfKh?g2YU$bK6U3AHNE#Iz@u-ounY9?{0 z-hv)})tBIH+I?|E1_`mA!fP^WBqy3Y4a;XR(;wR(FXiVP^nw}5Q*d-Ej6L8FeIGK` z%;B=&-IU%>;#5Q2qwWxVl-YB)%VX;np!}q(Hrr5%~#e840K*K^J zXcHTx3)+WF6rWzaCOLOne!#;jc)rSiKz3TfJ8HH{jDli7`g34i??`x8>?ZHGakeMr ztT#S{d9E&*&kEl+Jr9sDc9uJ{rKTST%iDCs3SLZK9zkHq@v^LBWkl&IM4ozkJwiOb zFJ@BFr3c!#LQ)h73OTLoo<_E(o`IQKgW`QBL8B`n1TD=mdM|4BpF!RqRe0{f z!}sj9;oIzeC<8$;nc#j@&rR`xcC?El2&4SX+3Fm*)tPOw4vf0Cqe0)YKCS5&Gt~@r zw0Ch`M8b9}Ac`y5Jh^pQ;}Om0p;gUQhyK-E=%sI<`?H{G4fJCE8Bg0~Yw`eyyzlZ$ z0{*b26E)cV%nm-^VM5cm%T8daTZY4zIv?Z-=4^S0c1e}bT|tl0Q2xF!2)*JqxoqPu zzwg1BW^PPsEACOnTf)3YM2VZz=W7+7O@!6*ZcbkFflHf{n<}Jb=R0k%wKvp8K{95! z$pt;c_|DCr`-q29D}0Jo1$0`sIRo}!YjT$oixKNbi+kz)J?`?l;~g>YNifUW=0DG- zYBrDfcnL$m0;t6Onbp&hY^G8DV;IwC;Q3l8RRB%qZ4@Cjcp0VdUOW2yl8X4`m3NTNM5AZhNpzK~ z&uW>?=+MOHR+1U}-QJq1&EjV(W>ck82ABBmrymA;NF&-Rd0H%aM(Q(##X91M6JK1h zncX~}GIHf%?%Gl(hQdac_|HqCK*lo7_1hODTyeKpJCZ``dDdph+Zf*EjY@iNgKfUEl!h{(dmX0U zNbz!;kR{sBr3x_OwFRwzHcMjq+Qd^|;_NSb_QkcJeIirtLHIsFi9?W?mw5}-ntn@w zp8ke;z?rkP`_|2xrp?dKrxG{l6MPoj=vB_NSmHOjeCA(FV=LXNeov;i7%CAVc28G9 z@mmb6hyFD8B|rL1Rd%Mk%g!+s02W^9s-9O+^623Mj%Ds*tiBicI(O9ew4&MLXpmsU z^r71~MeXK;ldWsM2Wu6V=byFJqzATP#3zt}Dvptv`red+?eANkC&_Tz^}X6lIz4QT z=4|gqkA#pk4_}<`Z8htj)rv+ko*pr928n7rCSsBi*6(HW;cM+m29P2} z!v`B^9BA)Z01N_^hi#`)S9UH|+jgs0bD&Dk5vERZb3*!ZH>T|x0ZVYP*VcijfX(_@ zUGo`;5LO${U%N>I@>!{7n%wXrt*M;e83%!iq%TYl2Q6T%O|_HmG6MnCTs1}_o}a12 zmX_+frrnPAIVWAZxGn5czTuRDpLn{lWgd>$xrCl&94NcW4WeSC4<8m=z>K0w~a56+P1wDksK7nRmdn4Ee zq=bJC5eDh$Rl;@wG!s7z9W8A>EKEHl7uX-2KHbtCX+rmz6ZCCyq+AJ}JL=rJ9XaG> zc0_4LFR^}Nqu(@GPlJ{U<%~RiBSj!!U+O(`X~9)oy?SiFzO8#ni7%Pq)>~AwwRPmE ze_7!j-)1dPzAo*;;{0NBCUkzAQ$uN$Dg)j2qs!sZXqAq8_glj4a-dQO+U3WY9(o@K zpZe4dRjqQ`o(k4zxSoPv&Q{9ykqo5Z$7Yp)1U;p{WA(VZs*`H@nl$cjcABq(>)V z4s?5N_!w`pHsiSp$B%E%>iSm8TTbt6;YQAcua^$WT|6m2^lZuSvvmlU-t|Yju5Ca5Cb>mVJixq34`PMiwUGtt}AZ4}nLGr6Kod{&6Y zL23K+JOusXTZFb&$KkZ^W+s%0(kz*mg_oJfTo7q5DSX1X@*xE5(7!Q*j*vk2PPuCYwgK zvyhqQUV+>`k?(d+J}#z)d*3Qfo3=a9DO}4r_BxH4XV_0)Gl?0IWpq%Yub)OOVcJzs z@5FQn_}c7jruw>Kr>!mumWzMqYjm9{gbh+4*yAQFA z`s72sHv3!!_uuPgnCw$EZFA~3wt-&mR~@(I9$pBYf-i)lQkcnfn=dui!fKp`f=qMf zGFt>Mv~3KG=W#P_DMC)VM_j%4>g6vMd$p@|Mu$n8G62@#JE88MO+eyvu>Dd0q4p}r z*_wDCKkHd0uK2x1i}li`xrDIGkxl>2S{v!n?{=e@WS*C+Df7D1Zgah99)mCAHRME+#PX!(3lN1tyq=wT z4A#BN&r~(!hl?8D-(8q?pbPBoHJJs7`@|k~muzS?`<%BY3SNMFYl-# zSpNE*;$dCwjgys>^i6)kf_KLvz&kOo>VZ$g4^g2h;ERF7FZdOpHo%Xx4-x>mh95zJ z|G&Qk*S3oEGcz-Fb#*srb?`S+5oBUZl{ ztFc@4{$KCIbmON+V<1@XIkP&EV_d%Z0;RhHk5Kd@szVHg4sn+t6ke?YtZ=e*eNt@7uFX{LH`VP z^yuQ?DeNfC5hYr{6eFhO_!#y4>pYskSNdV*DC%HvK6rS&(8|h66ttI=%Cy&vI|72Om90UCr7>1mT5s8(#7L*CZeotBrN>eyyZ1y+y3kbcz4m? z-vfEW9v<~|b#Ecyu9c+N*w~Yk;0f+g-I}NLF)?J~p&BI4_yh!^1j|KeVf%`?#l^Cf zv(LTd?p?oHTwI)S7k&r8o%W^hPxSYbLb=HYu?J!Y7IGNu8gRMHF{b0PPqda(o9krR zfCnMf6Qi!TJs-u~PfeG_a3P`Xb)Ooz&ok_V>L=2FGr426Yed6D4eK>rI!RThXoL4Z zf2^+%$BEOJta5P6g<@7tw5Ju^!y9>3s}{sORA`w4DiS%(2m&pAJtZrv1$}_V7~jip zOlV{Z8)9#aa}htS_B@PZG!k5PB|W?gp&jRqcTImZWJBXR1eZCp-`6w51l2PLP|JP? zM$46ErF!W+LZau+=Gv}Q_oJR`^%63KCl{3lVv+O3mipCrU+{*qhztYzH!4Ls@KlV9 zp08Tsu#;Of1_r<4-;nw|U0ANUrWLkt`PuyYD>oUUo_8iJG~f_f*>(A;6&+44G*3=T zbFcz(rmCcU8N}ho36_>(W3DtVOQVP$Bs#|Z* zzeLHps63DlHS0g@i0LH|%|vN`Za4Nohl=1@0dJZp$=57}*hGUn2NtW5n!(AZ*Vktm zgb#drNEu4r#HCy(|6t@_DQD^g*UbT-8!9iDXT%o1zFtNZxGX%fxzTzQd37vPC2Qk_ zLtZd{996+m**lZV_Ps!9M#nrmp<4kB0ZJL(mKp;pt304=i3{bIYumgICnbo}q3k%= zLnN_OI8Z6hEj$$h`9sW&(#zf|)4A$uDQX)jgtU_L@|SfKiabuqpk*}sBu(z^6IGS& zVGu<$C;=?*AyPZ`c)55`TYzyxjnXG3D*#(2~YjfQBB=%Uc-N3od4ttKbpexVfi(dnjDP% zP)qx|aoO*D;_YcU(mOdDB9Dz$&}67?NX@m<*)uSEN{rrkFB&Lw@4G-`4dPsWuNcfI zBg&^zY{;aN#>#Us4ou&w3Nr6q^XFxvA=R`H4b%#FA1tlnsitVzCpKBH6?-hTqo#US zQmfRH!n0Ebx<;b*87&`E?4wSGru(E;y7_a1h~btRvq^RYgfcZD<`*=R~q$@dq?Wh%Bt%nbs1AI*a|w7 zm4RUOm;mts1-ZOP?fOaDIt19VbY`!y%b%Z7U9MYY0PibYEos;ZqDp-qD5jY%RU%k0 zf0A~;2pBOERR`qNsA0f|6F7vJ;leEZz{33b5<`tt32|_%Q`uU$a6!E)&g$#u&Sqis zjAgY}3tMtkROU4yPgRMY6rtJ|V;SYC56ie}1|EoFyY{CaiW}OyGFQ=o36(tAJ@tw6 ztvs04Ll0~YH<)zWeFiq4Z4e~I?>kj@U+>ZbVPZ^wLel_o!6A8pQE#O`*m*xGm2yt|-dK zogz9zqRwH56>=3Xpz*o*i)8CNc^iH>-a=8&G;LookL4Cin=-g;U{(gya0yHQBN*#V z-+9Djl$3?2p?)jnMYMI&ZTFvgu1Ol6gztlRnVYgu4ydv7d6NiN4Eq)WX+7u-$D5hG zzejcxt`LNOA>B-m&f|^isE63nL>{UhSZ^hY8QNd z%9wY=@rL0}Gm4O^7DVQ;35b6}ESjs#M4n=;_g0~g;S$;%PlI=3#T5TN(1vIx?RG|& ze?9D=$d!>9Kz$#HT;vNmrq7>$K4ItKfesHZloYtZd!?*Cneqz4G95ori}yN13AMYs zw@=c+oYS`n+4=%iskM8R1uwzArwQi34YnZPTKkws->Nji~nkb z-JKxW#*N=)Wo1kCrt}!YlB73}wlQU8L+;+ai|AZCw&yw$6A}pUS40VjfesufM~jO% zJXCarj#^q;E2~VlFdf&a8)YhLd6BDOKe4HUJCHUYvD(XAw|k|Uvh3E)k+~7JUI;{P zbwQ};*;OQkIPt1B?M0N7QYl{P~Z32{(ltt)fva$`&O@I;js25et z^u|d}?fNZ&B|_gU27y1YynqVGMFqIb!0}1ymy(7o9!I`}yT|?LvRaAB@yV_=Xo%l4 zc?lGXp&^M;o&Jqo$9=ST3k1{%9j8m#E;|&?kFc>5r;=f58-FfQ9GaYLD5&n?feBtL zqZQx9J?999Xtt42MeV`4%QxS zvSxn6oF~cKdM|UzA~2LWuf6@t$S}R7#DE7TE~@8b%&SIqlZvq_;??0-{jI3mA9y}I z=r&f0BuGqvrgGJCXGuOdyt*1G`gG9nz;-B{QxrMhhcmV+MZ?;@M`Fm{VbG+f?v6~q zn|1Z3w}^WEF8(a3T?nOX;hQhz#`u9l?S!oJvOxp}ol}Vpn3zN12FD^2R@LN#~aAA#Z%DCzEEK4h?B5E47AWNEtgHd_*&qz=gnKjQADb(QFEGm z=k_MMV*S*9_G1JV*GIwaek=EA`_b5Fq8BLfUVB69jYkY&0#7~Ny2Beu93_J3W-B$N zeR`OMwW!P{pnPjYKU$V>TTNAmijMm<|E2)R3pki=YaH0gq}I-}1f1N+deP}gO##jI zr;x2Gsn8DMs(8O+7&a3z=t_b2I)M>89E!MRKTF4dtw7I%e^Y_L8MHScesK~fXOvdL z`=2Ozb0TD9L-K^B?@HSb5*`W#=Sp!`IlRVIIznnIDh(#t4B%IkuaXtBaMNNuZPnMb z>gxG@b3a8e0FAuo#Ut0rE=Zo?x_hqjEly%-I#sJMF)*P+#$m_aMjrpI_IxdZd-zaW zGc`q9xfmU*O%H4Pguzr9TjZp60LB_Y5@O>;=?#C+5|j%@{;B>rwE^`fWpT_*B#5rR za!?D|4jL=|Re#)ZjA4XA0c+?@7 zrL9%1YoxjaPml%ZLv8RuCq9{T0U2^&Cu3QoB*ty~svl6uS&zTQ^{lWSmUmzUI0I`G zH4RXH$_lev+b9b73#qHj$ZT~Py1gje3k&?oi$@zH`Hd-UTq2oFK&+{qbykpzK|3{Q zB@Ob#(f>ppxZ7+8%_td4ch)l=2>hNm9J8jV&3Mf@_XB6hV@W+xIl8U?E~wpsh}$8n zv9YnNOtCV;7EmmztE&-O1T#B3_8-@^w6zfs-W)|GpTh51otY_I=_rvyH~gVG`u0F< z5TcwEJhbSh5Q2VxE%X^!-=$wG7rrN50kSc`k*4*V2KYBG*~?`NETlx4Ygux6eYqg` zZ1q&@Lt=9A?dxj8(VB*NzL$mj&g>cX{XG!KjjJyc5`ulwSSp|J@`?jgA~CVBShvbj zwHQeqI61YowaxZJ5kEa|d_Fwf&pobc2|I(9Is;!59O8&^{H>A~UK5h8)H~E#bO(%7 z71>&06own{+sY2Et*uq+-D{;K2P(=U3|8D{W;Ie&CeR$DD&e}f)DI{*i;Jd6fydDB z%gKw8zgWun$ukL#+w$k;=Hx&pCRSJS z7UIDkZ9wVOYpidSA>oeuv^__akbqBsk1v9##B&{Cob2qJY(v2ud_Vyj931TJWdLfV z8mzLia%fcD09lwTb%t!V#iwvcqA9n5(vvA=yYON#_RlsZ534sy@DzM`j+{*Rz-0R1 zh@or!v&7~_A{)eyk$}!zc1e*j9Dh(HxYmnS2 zQ?TOqoZ+2SHlA=}foXlWR3%eEZScKDL5yHfaK5hOVmP#L{B%b`chJ+qwbBmc>buNx z5aoj#$vGD3UQxcaCugdTD8y0-6G)(9oV+V>Vq(T`rTEv1l(+=1Nbhl&{ZmF_ z%pZ4@l_tyRMfXl^JQIk1AraetCnEB?X9k#F@@By6NbZfeRO*SSr;(G6pvUn6js2L2 z^_XXkn#*wVj$e^_4L8NQJTu76fiJj8u*7?Eza&)LEAw_IN0vR2%Af*hI`-BQ|-sIu32GbNaWR!8W# z(^e18lCO$alRw7TJbpcCPsf`XR0T_xqnUK0FIFk$$ER@Y44ftz1ZBF6J;!ZUZFwp@ z(J1m+D_5$d%9X#Gt9MzRlGFW3fC!h!5R#C@(EP6}mRH|`b?R-&TlvSRtcdGQ%fJ$- z77Y{wt#4CZm_4n=d~o`o6fe-5t_%@MG$sGvHWgjoZV{Y1uvitC!9`TPX-tCpIJbYN{& zxKz6lvqs8lQ4!_EZDx-XA6ap^ml(rgL;Jc(kdfQOFf#U54)Wom=4)zbeDnzk4RvvL zt}CQXQC{QlHdUIAu^XhvpC!YsqTDz;d*x%k6LNSJt=G{In^tspzRzdJ*H;%VP!+W2 z3SeJ+!Oh4h(-99Pw6L?Yv$n>v$x2K~DJd?tv9iLnag&jiMZNlRWJC>t-JA2^D6_tl z^`)iz>x7ZZQtUYl3$H4(U%_jW---y-;b!>%f=Yd@j~%v=HN?g!>L|8INKQ_EDfE-U zTy#c|0Tm^`un@B_d}FCUlYxPux3?EboLXB&00%-D(@sMZC_hD`^MHm2@FpZ)DN>B0 zy*2O#ILvPW)}*Z`DP{MP+uZ{KUF%tE0P!Qnmil%U1D)yfryl#om;!>Ojprp}Sco^G z(E-hDa0FxNVqY$m#H3NzJGU&Q8A*;7-Z)~!Fdim}3@WwEVjj%=p?7=W%jBB1?xT+d z{%o|EfKjuaB;@TKqC%!dI<+=wU2O8B{yuk>OCIKQlH)+QFad+y&V_2*wkfE|b9Nh( zIsi!=7R}H_Z5O+^I7$Sv22GIho?vb+DH zJP6)BFnqZ)?mN;%hrh7QnpziCncZrC1I~ef=N9u9yERF!25LrxL^Gonyj(03v50h! zf6BQRZ>TD_7`|e=Dz)BfdMD`i@YBr|oxKkrXYyE=ImB6nu=Cc+7##W_O-*@^wcHgl zyh8zrqkyU-qNd>OTIX~KexxXJWvF19VwhyV5iVyloo5Y2`YfM!Xti09UN5ic1$l+Z3$%;>iTx!rb0 zULiG>g|rJ?byj@y33+{3zf&#nGG-MrT*_i!F-RHBhZoo~KrJ$1Fx)-ir~nwgo`;!Q z5#l#@-E`3!h0yS9#HP$_e=X8n7AOD zg^kMw-{3pMo77am+Wy6SH4i&4Ec+>N*E3`X)7JSQh2N(!li3Q8L7+hgnp615{MiP1 zHL#zx)Qz*UvlrqQ^*o>>=-xLOOMNQW@6ri!2U(>p{lEdJYE2fz89qVi=EyTW+zU zR>$w{Baxi7K>9eBVOu2xOPZchP5(Y%8FtSqTu}~p_zH-&_uevjA=h7;PW12BY}Z1$ z3l1wF?C*aG=tNwKU-@U53^uu#$-KwQWqZm**gXO*5mDp!s}S!hm`G^jC}${&26Y&A z_W>GtDdpRtXAuAEh<9nPTS#+Au|aKc?KJhK;k?*@>r38`E5!g7H=s_gf1!Je#&~j3 zOCF!FqT*+-^NAWr$pMFg?LXM~1wm%;ewq~j9)%^Y70p-%n;4^|>?G0#pRMzcn~ujW zgn#Z)O`Pjx?%}kjJez`mz-~P6W*y8iqwE>rd|!PjWMx%oPB!(A-t-S85)L|kufnUN zX#lTU-5mP2`&=??rI#I6tCMcAHTtXptNIP9#dBMiYR3B-s=|gJ0wLS8E^=v2O=1NP z3d3z(Y^z7g3)Cv%Yvm(PE@Xv(hl&6h7+6lKS1oko?0W^--mdWW6H)WHtH zqena(0y+4QqT_Fuhe=z5r={)Lm_;gy(N1O6c-`*q#sT~Rprp}TXfE>^1em^ z@ZuQlS6JF)dAM=;7+>@Ycc9k`C=mi=fXog2_$^WE;;~`&_aKY#(XAu|Xwm?$@w?cH zm$F1GZ3Rg^q{CAqG0?zXJQ-a)X?EYk{`1B2-dbgwZ|ro1btIzv72A5W9xd!w8ZM zfhDYjv{3U57gDQR|Ea2K<~(``s9Q9%^9nyc?F9UmQ?L?UiFu7iBVR^?jZDx%KL67) z7BHU5@JoZrG$|wlNb7nMMg2>m#c34GARf!YKrU1i{VaxHn*O}UZAR0W=nr38(wB(1 z9z1#d2jUWs$ZWu3@Fx5_!(%&UKzzGH^&0WmP&BUoS%X{e>AXL>LZ&&;mVVFSN6!+j z+xz9qt9>gcr^>>@Ze7*wB*PjD`@r&suA0Xok`clMS`CBPy?sne0hH){>kQiOs&4f*+X>FIii<^3Tg z#n#p~9Z?~(v$LC0AmEHIJh1vzj(6FQXOlz(xYptM9uhOZlAr6?`IlCEr28dcIP-LL zoSmITkcp2JX)3FC4AO#tvaFS=pO~14^dtfUZ?3jzDl13*(1|Fu_5WB-Dk_5fNgm*C z`OhSc{f(t^W=9XmC2W3~+p1!B*M$&itpNT@caWw=xSsdwo4!6PyXIAEczzW)gt$p< zG?{G}UT)}b?j0+ROprydSpH=&Pbk$-)-&W@l`SRVWl~f9h%f1Ywq1+;vUp+sl}Ug3 zer@=L6*88L-G$C)SZ5PNA?(>uDW4Sy55SRPauXINCgw z3`mG1^w{^1$_CZqYQ!y-QC!7s^u07KtHO_Ei$S)$ewJTkGKzjtNVH8{`|HW!_|kkP zGM;kBZ61iOfcYBcKOr?s1!ka+X6?9Rk(~5Sqv2M!+~4;Gu{09!42cvM_mIiWdJcom z^cPng;}I7u6i;_qnXMhIWiJY9TUmIpU}L0IDZhR*C`J-)7GBRhR(n-;yWs<=YA9eS6R?za z39lg~N7|b|+lL44!Q4Zf23!wi^!6@35dUJ5KDGfvxPvQn-9+Qa$$UOZ#5&pMy%sR@ z8vz_o@Q_MbaT~7`ag78RA%Z6-KI*9J zdk=3+U5c^=8UKe`GftW@f}3YNvZ-rD7S&s_+VIdQ{P@+*{Efr;^Q9kE($d;@CPI1F z5IYiQE$A!2z6&iS@8G68detTm4m4N}qdG%oYo_(s1s>zaEd2276sQm@1fUc3>FG@+ zp%5_8aoDd6<@@{J04O?7hxl7(h_0&*ru08l*k70f*yrzxrEusY4Frs56ICC;4QHC^LBg3uSO9cY?v)Fk{Rve4!L zIh|cfrhD932NcF)3`VmyM#wcjS$_T%A)Qm*fi4piK zNG%{dRY^vB&qq}ox7X-PXfGaT_BTq3h=O@zLPlyHW;iPKEFtw9g}ec2Z85`x%CuH% zAf+M{GB!YYy{_!t_@<6wH;-;7o`+UkeG539QTjzk_nVy*Zsbx4S8xD?=TQpfRe~PE zzzl0wx`MrYQdS(rfCk4`-^4gk1*g47muU8QIs zbl)W83cI?bw!0NMAzS5@zP71;k+-;YFc(o4^rd`yu`to0Yl%Z%892f4{75|UZgeM- z5q9d+jMxBjilqc(mGD_)mbHpQTt!vk`pVRCte>R9+7=~oH*5(x10G5-+mv-`51ZFy zbqtu@sdJKLO%89%wpLSO4I5ag0Q}R0e34y(;YhJS9&su=B#NQ}&R$!FwfZ`c7~J>+ z*C=l^KhH35S!yU{J<6cwRfbaDeegE1vQB(?TXq_e%VT&k5}EpsyeT}Odqv(#e}WNSLsXX|#4qM^5(OCX zv0;GRx4ym}5)zUT;sp3DRaI3sHZ~b|!+=b)(4((VC@maT&XW1uch<%$h=_r=(pqJ+(64TIjLi_UZ7fNiR_W; z>c*i^oPpsDQ99}sQO8zVF_p3r;=PjUJVH&c3 ztXlM}{=d>lkVy9ckz)RtX2_IcL_DD1Bsczw{lOr8pb13v^D7sEmPg8^B zu+-4tv2m-LI*y{CzP@3S%2lo5;T=xI+Dl7%fwUo){=}==4{E7Lha~3I@Lc`PV7F6lk0Dch*+& zLTjd`-XfCK71T6fA~P5v@ zwe}q)3=_{C|8D*ox=44fnHIz_`t7I(Sp-j)TCQfe%Z!yhoXf$Q%pzBcNqXOcDoVBZ zfwVX(j`Lb)cauBf8`Bb^^`I;m6}hMsrq|pbUbAeC-^kXGO!RcfD>FW6O^Vr6Pt_TL8bS*QSUbok1spKPn97(M zu`f@B3AS`5iDa>)>{qi0zbb3KCl1a-u z`W2{TSOklXmq1zlJ*FNo0<}+Bu?=G|CXauD>a#7X=oMW%Zydm|;bIMpEH~lg<}$N~ zIJ(K+@b=Y-l<94J8hRU#0@*Nj$^H`^eGf!YB@#WOiD%|*6!CvCV*YN4{NI2+9Ygpk zN;3?vR$(2$Awhbdm7+>PzrT=s?3)zTiIzJB*IeiB ze1%82N*XPlz0-g!_pAL{cG-%Gia`(VpRwo~fz)EnikyxsA zfiE#JTHH&z>;n%vj+nw=>s)sb6B8cTz^?fCsPSavW@_r_w9n}Hd*nVRKZj>XX=$o? zdU-dqs79Rn7f@8F$#$x9)|Nv}&=YjgE21}yIuB(p{Exzf_k;k z@|I*~`Sei{ovr|#!+zqSYAj%HWj*tCCQW4eSsW5ep2sepN89 zc8}AB`%lfQ>t%j^X0sQ<67;*}&_UEJ4pquW@K$8wp&|Jbn*XwjvQ=u@fIxMX0T3=Q zwgAG>8k3rv$Y^%RdudRn_r#PgB7eXW92q%j?*f^<(;uE?pfNQb#plPIS8(n7muwf~ zendM75555+qcUQ{i%>S8aiV5Ao~g=A;qWiY>Jd6ftV?&k*J}Tg-z_rq7?7zdg^Pk+ zs4(vfN~u_vXv};##Y{{TPQbEf`p5`25(ffo3M)7n1#I31$r=c3RmmQZ(SDyk{o$d~ zE zP~2h+p&5sT(E2>ry&!a>$>>*!(IN$rQTDZIeyxP8SZysRVW(Iab} zWu98km0)kVV2Txmyb1|rpl!vdTJ6TaW?3RtxicccWo~{gB^Z<$cqWVpfnW2W4emEW z(B;&;w(r1>5|^BgND2qcJs(%`AK?5+{+~Nfr3Gu&@nM(!4KL|W@AScWH;PI)@5WK1#JpZVwXm|XGO!w}s#Fnb+wUDa8fC;f$y3QckY`UL7=2`i?%yvE*DGCSWCqz=|Hr_5R5yxxG)E9x0Ig zF$Bn#KVz|_g@8-;r+=3Y_;*1F--_39QAW0x7J&!rC7|lSY!(qx4WyW@^3$aId#e3^ z&!qdEevXj!H->BEj?Nkm4nP0|LzI8P*~sZpjIC3PoD$^vSO}o4%kD0Y1i9Eu#5=MZ zV)IevQmWUK0=Wh3^;4=N?9$uGQ8B~ZK-ge^-$@SGRnr_FA5~RV$f&1zxLPvtD7Nc9 zGF!k!r3epuwK(2oYGkETOXtzS;mY>re+*v>Lg3oD(3xN)1S9AOkl99p%J25PDANqv zF#oTZdhLsRBF$gh-vS)?|A2*}kdQZ_^cg^QY-L~zqk9xC5FtCoV9AUvd$GdupbAjr zDA(_=W=sLQ>Nx)->DIRQER58zWRQLa2o(rW9rPj>`f%3& z3~7zmB?z9(D{!SU^B^8Z8cVbeG^4{AJalq{RXl@w0yA6T83JsCqqnmQBdBeUAaoCUQCy4(yz%qwVj~CIj|`+;wBz z2&LRXuaWDz!XMKH>_r6j3MR-88QK@jYw->mfidcCdNhMF&oXcvC7f9aGJcqrGXH%5 z?mg6j9Ndh_;wwBu5{oV+fLMr57l?r<_+tf(I>rt0i2KQtV!wU+_DE@ee}72{qw8=Ge2VrekHh((m8dC;yac0QM;ZTR;%GrGWi}$&nE;n6Zho9I#i~$S4!x zsvvi=Sn<~Z0>Xd2Veda>?q*see=&DJx`Wr9pB@=X?VIVdRi=k?Mu;tYlmaLHVSEQ; zHKJs8$XykPsqkCU{!3@5NTCkjDuIOvrj~VmFNta49ZpFDwd1X*vJdLUDorE`Tb7#E z(h)gGsMd7BMSVAQ?Pzm-l?UC+EH05gMv)+g!?lv0-o}O4$$;)_zz#tJ6NJneO;#|k zcV|I|Vw5k9DheyOY33$9Mh_`_20)v=C3&+19$1cH^-^67btEHpCk9sJ-lXw_$W%O3XhRC$M_ZTzqZTW1rMQrh;#tCrYJsL`$&n$ zV4xJnZ7Q*9ES8HLx@R$8Wikv7DY?15J5Q3iSH+tqInTZtJxF(@Hj)Vf_SH$wzPQkY zM_dg*Fh*Yy2&9J(r@+O%%eHY z{fdsKWLh=Vfau|*|J=&_@HZh0A!rggMZJi1)D#fHxR<{&l99~e@sAxG$|s7wMSWi| z9tkE~EN9v75A&HX>u6%YcL(y_KQ@JhI03PIKF~5#=u9;Mdjb&2 zi+Mx%rZ4$^ZUMO@uKuwxgo8W0o;-TlSj@aXgMlE)8II+=K4)&q%8tUqjR+KA=I5W9 zoP34=2Vjq{H-B;zJPl~NXbfnLh%9|aPtW^(?vMCCT;2vigC~KJ7yJ+G-D9s~ zHhJvs>WP?|3OInj0&IYB>cw6c5LEa5nqr}8Wb>!asOlgcr%h2)cJ3`M$J}5NfeJ!4 z!v7|;#uMad=D5uRtAbso<_Ni)t^R&<7%=$2rJF&L^7A#@#+%ALHXB)iF0SDJly{zC zO{H7kcg9g%ac%cTYalgN&8m;+>7;sRAQzKcsL! z9pdSp-)^vD46y^}ZSo8jw7~|G+H&sxaLztL2KDbbZ0?mi)ClgWC9UwIH- z17CgkS`JW8#g)EVwxU^5+l4f*{DI-wYZ4s7KrOL2cH>;^Xnc(=#Kr}~2eBT{{rL|d z+T{I0lC7_u7L1*@nrq^;#*J{QMywSe;GdeohQ!z2&9Usb4zV2je%+=8FuN-Wo4osyaw zOG%I|3KuP~O(nBoAZKvJ6A99jOgB+t0cj4+Lo|*^>p>a>K0)hdeQ;2Wa;}St#?YC# zjqH^IvcbLR39D`;M=8&11eM|>vtMMy>F8U)yuzWf&YxuZ`#?v2-hm>X!;}?Q@tB8` z!fOmsT#}Re+TGXCMhEnH$C*(=;_j?TzK#I@Ha!F&iI-)cfvO?E8!?-H!PX~Qs5H>v`6bfxFdo14N~kp_>vNA47z9PSn7%X5y^mcq};(@5$Yu`t-EWoV}Nke?`&98vC<*d=66R>Ot`8# z&|CP-8zazRrzcgs{y+q9pK1zgX=wp%_ij|<3-f&wm;7*oWDp6(W09gQ^?%W3)zQ`@ zzb#zM(6}c2hLvGwM~6Y$Vc`5p7&xHw=!*Y~s(2_abuNrPxCD|&3ZLl?0n1h_W93W6 zFEtnb*4Fnm5r3wf;R3RsCNFa5`GaNrx3MNj=_*sq%2s7biEbNm29*0`N+J z?>wQ`W|IhmA&~T7V>k%FP@5# zIm6X<<~=8J)gLm7G<$|s_klLm>pVM&mt!%X>V{ z8OkVf2)fqC1ux?`7>>0(P8yDl9eONSW-J802x>U_D7SKUVN8OdWk4J=8-pFp!QLzd zQ%7n6R@!8d(e^m}AW)q8#|XNO65@Hx-2Y3)5!FR3g(cfI~Sf_55# z2s+Q)#^7fO;5k~N$-(_(>659=$+0#FiLsZUhdqwx`I<~ zHJ^Q!4_~#&g-4JXVg8$PBEVpu$lIAT^{I`@OmXtS5TUWE%kBwo!4fhe^S4{{(awhkNpg=`Jfxt7In5W3@)d7Pu!C9DL?p53ulWm`KA<$hwy zq|f8_?1?44Zy54Vm(HE2uSTB_I+peknNFArf~kp+JZ9*00w|{PTT3>oo<;tUdKP;E zy3bp;%Lhlg%MoWZ%*s8ohb!q*bw_O%fZ<+mo_x_QS2Ig97-(r{b~x1dX;w(Ahb3P@ zhB;Alm@+MXF1aLp@Qm?jd?)fPdg$v)W)C_WnY`pBO^y}|gCZsZQvLGB&i0}7jVtQ4 zJF#^&B;?E?-DxY9y?KP`1a+kHKbQ(h?p5%cI-ETT&0w^qwUaaj4qjZ2f1|$t&3}D0 z=~Qp!^=;k*bN=5r0H|vh{?%{)sc*Hc?H`6{zFYe$%gej})i-mCY?U-p=O-g_;x;c1 z`5Tfk0{;XE5c;eAZ%apj{E;*OJV&qN{r!zUqns`1R*`?yMtRU__9FUccfm@=5%t>o z?GxnE^u3F+rkLTd{Cg(8CbL<;l{g`}i)|vBn-57K zgG0xIe}6tAb`OVR+#5H$A-{lbmRKc1&N^fc4GkH!=M5*buiqLGE^I;Tj{?kcbTdyxjot~Y4)i{T@hjy<+1ZtZ6PrYMk#S__K>z!*sk7$GKuvkx z?Djz=T;wW-XPZA})EM)jR{O|pP}9628^AQ~KT|3*P(rZ--w8P$(%*a3&ZNbbSHVA= zSSGuu62hoS|SV#5o~d8Ie%3Kn`pAEv$wGmycK$6 ze2tBqH2Gep-~V1)3x<$uYp13^YwHA1TXQJD*?-6^4+O%+rmG?xOed7*-k1l0A%y=; zo+&mm`J)$+vXlK+AJ>@J-q3;xcxli~dtfOboSmlY92GpecZHh?CF9sl(lAfhRNWWM zS%{$~_s|hk3?4am*~o(9T@QU=P`KarDm_!i*_LDL%FD<{HfKPzgzMUSJ74=1`@zxV z$zvx=tug__=U0JRc+R9+5pkQ|S1`rD&hp@UF6ZZePd%IOY?4w>Go}>l*@NnwtOf?l zNfmKVC=2@BGUqJ4=s;c|>1}a3!>md^EtYnIogbdvoH@It#ZV)P(E0qw*=GJP)G$AF zNo#UDhNK1p>`?3tho8JH$#>;i7FThZyp{;Wn8=TSgW-^4?RQ#+;u0n4ORbwuGN?V& zW*`w|wo(VHzF8mtAtkMN&W-w^n(tU5k-g#!ov#Xj2@Cn>({ds{Y)Z@PWUO1W*0RWrMHS< znBh&n?wo%r=RcECC0y5m1D&HcJ|^j#>#_g;G++H4`2p&|1&=PJPlJSdw(L1z3E~^1 zeF2=%`h77B`~ZyTCXt=x*T*ByS<{=XHUM5n7UgQL)Z)5`>Yjm-b_L13+3FNOZ{DL` zN~Q*m$Ayp(+}AlOWUh8LBO~K{aslYufSv+iH+}-SC^;|1)(1xG0n+WW|Ji(Gz9$%e zKS#nT0^CdknSN%p)XG8T=afjZ8w<3PWlG=~KQOWyC_OpwKK>PIY5DNrYbq-WF88}D z=%5>{>1wlm&Gt2LAjGU0B^}<~|2DW|_Mct+|NU>}{s0=fkxOzeVt898QykPk8WzyC zN)(a`?^2$3WL45|84$tLP3Fx&)eG4o=bgqD%<~KP!{u4iFP#)~J`LgE7=y)&f*=9#d);a7Q8)-D$BoJ^VS zw)A8ajO299nwOo#LNTv>@nxfy+|-&&Y|Juq+c=H=RaWNdxL^ExT-==3J-$u%NR<0|q1J2|-=;+~ zZvV89e1rUh!wxsG3>03jkj!n}M;a9p+h!V#*OkUI-{2e1C3qKF))`H`pwXSmRZI8m zN!63M$~>)KK?NJ27VWY*W zQ)DezvXGXox+lf_XG3Y=;j-Q;AX9Fpc3lBjt^GyOe9CK!=1*F6+I%S)mnNLzBgdiW z5wRFv3J(0jCurDdnG4<#Se5veK#DPYDG#lEbGMmv-sbX81BaIQ6tv<-UF~T@P{n4x zdqIkQA zOodNJUK(13$SPhA9L3h7bd3rL{ z1}>QfUr6?f$HV>3vIIu>u_zfUYk3sixQ{=dyjyP)*-<>Rl-WpN;Dk@-#=pbd%1u;3 zI}77;buE^c4VC9g#%G%EG`Ky6xkT|SFxAOSJyz1}vVNK+j@;#k@1UGcsw;Np7(&b#e*M}=eAT-#<-voHLR(k94qFB!M`88NHLy&+9NzwOjvB}Dc^j3w*(SZ! z$>r%KIZ-I3PZ}Bm!Q#}d$##p4_|J~8xGT$(l(aiTeGJQ`=l@vfn_jb#F&cHx#281d zTV%aw&vzZvj?=#Pz9;X6=dy%dptg@S3bVx_!D5ioU43vZt5prXDPW-JTi^nY1 zduhn)cB})E7hrmc9eMY`%JodPjoov$CC*+P+7*}y&>@`DE7s{&`FQyYe25|qj*sh9 z`FJE?gKs#H-I-fS?fs&SLeXwLh5ls;$cD%L*3U**Whf>~YD1+`W=9V*;xM(IzwO*e z5MUNS69f8NQ{#1e#Q3Xh6%5qWu9#MPj#Ad)f=maFvUlyYhEMJz?Iq`e5U>r05PT={ zY;$ziZ&6YieT26!PTJ8DTg}E9DJf`ZDi)aZ|ImzJ-&8H8OCe&{N{F(&_|`l68AV9K z`~xF-A~F}$=&>=4Ma;DphRLhaC{9z&_a8s{jIhivFePR;dFWJ_8IM9Zz|%DwRQ82> zCe+sOMnYGIms+(lz9Zl|Sa;r}br;K=ZJ0JD-|iR3+2yX$xlGI`GTSN8mrKM~RL|3X zG_wFXTFzjlE>t6VXMfQK`6U;3x__y~qE~{gTXQ!hR#rM?njmwN_Z2jIP4C2BjheDf zalH&D&klP1KAXgJF~~+CJg&m&o}=_;*qPijdrEQ7hcGCywgBAV$TK6Sw>h7P=gNk% z#D$2sT8pYK`jcq*lw`tuvb?1HFJMKX*X<@bK2UUBR@ee3AC=bTM_FA2tCz0^D~h8n zsy7B*rI`Q5Y|MjxWxFU%rvEqlmp#5&#T3nOLuCGlU_i;MYLE!O`|@%;cLx>55t=*F z+@g(5+4YKAzx8%8V?-)@s_?{a?dL(3TLtE+C1+^cG50=E0P$`2?F%HXIh1-29v^_q zj9;xJ(r~x;A_M8}__gSs*rOSlQn#wL2)l6EuZJJqaCQs}m^$LnQyPn6@6YLprz!j< za9!FrVMslV2|VmfHJ*7mA}bAvQj!Ffw$~> z+aXTVb@q9_-aO<6ux|$DeWb~l;!U;xqWp%Qmg{M48sE^Bb!>@J1j0( znVzA#l=qu0x16mf!IOJL2%$BYL0u9h^BQ-RcTXNbY{Pokw}^jmrd{%i+D;ioXf6as zeF*`8h>S;x7i0qNZ0&Y*sA!Z2-$70HnrdRKelU?9)CqTQaP-o)kaPj?`n$1??|{_* zOkn+g^jmK&{duW1DX6-u<$$m5@lp(vzdVKw=p6S*o}D;aAgjr-;;Zedm*W?oavRyS zkxd4}w%V0#mO$C&k|hZk>BpO`iZ^Preg+8VGqsXjpc#<!dv!hWLF=PxZdsvP zxxdjp(oJ3Btv>~>HJNW8_X1;AW_8enh_2;GL)Qg_}dl$aoik?y6oCZzkgwBS*tGN zWq+e*&En@~`5T(W>VhE4hw~R=61r!`UueU#prxGCMG;es6dM89yOkjb&yJZH7VozX zVLHwAe~4XeGZPTi^}Wh17IOhOGCjMjKw)u&4C%B{QR?7qyNcjq6a!|;a;*%xrrnoE z1R+Y;N?E#XR^d2E!kOh_OiW#%WJ2jY=zV-3Pk?Y)SxRfFw#Qd8OgD#7X&simU$O}k ztavikwkFOkJb}D(UL+LR{l9Tfa<9Xskn%CEpK<|yb z%cMqs@~)iOIKvItCbOF!ze=7RLYtlAbcCqF6C_>QTRWvKC+4o)xaId{{bn_ZG!=^P zQXiZ4>vslir3*HSg}h)<98;`<#-iudnoVrEV}&l}KBd$H)By4W%;gCtY2xILTO{(G z9V!@4%}`SUgPL-~&e%&+$%f&=yG0(qIrl{3NbXKur)g?Kp-3=zf>Z9a=H_d(DS zW{09il11yfqvVbxD5jM)p55zRGO=cs@-E$WRZAkyq?Qj)jt)IJ23P}UGJhzH4yw0n zFTkb~RtJjie>}l_V9)#iXa|Ts%no$j^;Rcysx-s_n7VHaF)|0PPY_l2Cx4I&vp#G{p!F-iaeM|p}i^0f+VJ;eAR^MA{7~hUf+n)w> zh%sR>=|pTNdh`MV6sAw#d=>!&pErXCTY{uBricm=D+SU5939lkdQBS;liLVrnqB$~ zzKbZf-|0#iTIkJ|ml#9Ku;9lgs3Jh!{H34?MzMCMmKb@AaslO7un~1lx=N72_QfSF-e(t>6VS4+W?n1q(M(FE1yW)@S&9g@Z(#V-pv60ZT`MAxOH1}X9w(ma~ltK zkz#Rj)1Mh_edt51gJ#ui4Qe}LO7xfO^nbb8e|5bktt7}8veHbS7PmFrPDwMYzg#oD z{Lwx7k}B9bM2~mY!bil`bjC!SAJR1_Dk+ZHH)|V*jx}sXbcqXgjzbeuA6Y9<>z#z+ z7MqccdbWm3uQA?w{w!jxr?2)TC@k+@Q$y0t3O?O=FdV#OyJ8_AAnBj9XV8gf_yQd@ z%R_=3DvPA=X_y+F`_&ig=$vy}g}w=g!@oUhZ<;9NF6$rY)g8RbvX5A=)2Uuc{bJ)| z3R4)pNbC2EX-CC2v$4V$QHj`DHBOdY4wP0&XB&K^m@Lrevl@k5ZUhYnzRMnI_(uU_ z@tD_)%qc|;D#R?BLMOi&*m64}_$~f?P?)!mPk2_=r-6aW%F3{tgnpmdy~IoCj9N^lB3VLA*FFw0(l*lnVV+3&PuyJ2b3Y6J5D3U-^fXYjp#seSEaJ3C4sJw-vVrNw4Te&sQ3yZO^Uu;)9 zAkoki_0WebPq)Mm zw+dv!g$ix$!6Ns)bY*BcT7ZM_{lF+b{i`78Eb8@*2I$7x&9J_L``(FQCsZ~pt=&-8 zG3lSxqc|&->?wL5IhbRcDU0iflJtJaQj!lH%($2=@U{waSqxXb4(*mqoC)0Kv$IT_ zH42b{pfk^m2oIPrpCCrr%~aU;QZ;NEUyZo=Q;d*}OY7w|xnBguX2i_6SF^j4cVcUC zv0Jt5!Qceh(W-p@r{;o=&uqS_n}>nW4lJtR_ALgm8xVgJ41(Ks+NeR zFZ%UML6MR>1F+!~eh~zeOWoDxRGOcFEhzbap?;!mA_I)N(-f*5Wa#spDGU z3Fh>CdOyuNEHay*mGr@ibE_<_HH|RnnIE%xeQVGbp`_E%d85PA&_le>1J6Q4qFrlO z!Jy`liFaRU{Z2CxW_RXVTxvObOq4^VXYFw!B#RgsBjQ~TIFn&jR?QX;zqz@Wl1F1YlWBeEWsWBJj=nNkCOvK(k4cYPWYD_ot+aYV;7X+7 zI7P6x_gGy+_g3`nI=j7Lw=`%1U8VKSmuoph_9!QjQ8bFKc-wOX<~lSTM5Q+9W4wZ7mwpdC{~$5n#h%3)AK*U6)o} zdv&9DlP<~!DQE7Cq`u!{4>sRzV+;O50eO70dc@yf?>A4@&M&v|J)0Wz{s=8dMZ5Sli6wZCTqbg1 z?BgTW7>b_5IMlM(w#gCOTmjKko*bhE9Ko4htrr(dK@$AH!&{6=he+0th5;bg-KOZ98*t1i7d(5%nP=ag3FOAMZl+T8U$4nc->{a?L;C>flNRi zplitg`cJtJq_-!%{+56LU%uB5P9$3L+j40a9^aH9M%4`By43^kv@=3>r~GEIdz;(n zz;r8t0AeUIenpCf&ek_ zno^0AIi3)fg&{*e~y@EJqFwi!ipU__DEJ#qQ-16{S z|DA|a*G?q5O0iV7i(~(D6kl4E{cEYy_BBE@==cV8lj#gjFUXbf@>n=b zEJMbnZqy}v!6f+6%(8<2Y$UwDAFi~=Q&>wt8FfXri$1iOoABPdws zqp4Fuq@c@$;J8b5){re~y#^Ji-qxefjCD`a#-j2dMgkCus)7Z(^5Cq6TAati zYguGLr0DXY_ihR{LPF?m(?y&>3v5>+k&z4QeFnt0fC_ghUBafT%Md?QuNKo zai}G~GY-WHamRcpCBiEB4Trm4q!Nr~*^ zn{_>80{RM3`+JWeo5c%fb2krHP5;I@y)#h8>^)rSvV5H%^C7XhAmhoBj5M!dO?hl$ zBhL6Wfz5breR5*QV5vhDWmnw!$bGnYcIl3ZV_e{T-vLP3{=%$yj=& z!hNZ)8~fzwbtamRjIC`6b?s-EeiS)RguQhYmDf~jz_070-W;*v0~f)4uGx0kp^UC( zaV1p7ZL9Avn-3J>yfU*yk<412vaUdwZ9eQmInrKOwXeEw=uU<1nQMO#CX6;7sFxUt z)8iQE_Z#0y9AJzaDR?kku5*h$-zv*Ogs2TwOZ{9C6Ukjz7SmxEw^}zuoBQPlZl9PuT?ut@#>I4jtKjOCkMqHdziOPd>sSE(3jidh}P9 z&>ODr9aGYG!0lOlqs;yTgX-HLYii(20Dr>&;*%fYezh literal 0 HcmV?d00001 diff --git a/docs/images/mqc_fastqc_quality.png b/docs/images/mqc_fastqc_quality.png new file mode 100755 index 0000000000000000000000000000000000000000..a4b89bf56ab2ba88cab87841916eb680a816deae GIT binary patch literal 55769 zcmeFZRal$t)-Fn+z*nS{Vx>rm6qiDAOL2F1cMtAuDNvx0;#Q!zyE_zjcbDMqmSlzR zn{)pEI@tSUUwdu2)&Y>bJb7fuJ?=5a1EER^lGqq;F_4guu%)HMRFIHRN0E?_z5hZ+ zJaJ}X&O!Wm=At4gf>b&}x`%l4+)`Lx7zwEYjQMDcig^FRNlM!V3F)=#)7P^V3xFpQ z(!7JTn6R3s!6EcTteK|QPPjx@DDOv5T2*CXB}Z%z@|SP-DsObzPh`FaVcdV&m0)j; zcZ>LN@}*RhsyUw6to^1IV&KrBgSL*D84<+V=b92tLUGmkCzrla{Dr!*h^X~IGAQjM zyD9lfz=>mTe@ql{QdCq_QdAt=(BA&2YBUsY=dfzD{{p(Xxaz)h;YCF8?Ul%1e}5}@ zO@0yZuh)nND%kn8|Na%lH#NLM=KqYOnC|MbCw}whr}=*yP7H-Y`-r9qwQ2rq9Dz|0 zBdN65Kl4A$DgS>m=QkV7|7=EzGh^Yu&HaDh$NCi3wnS$c$@$FVUp#HFss7?l0LJ~{ z!`SL7tNPPP=8^Kq8)3(i@(qbit!IaRj$Duu3h(VXaI4Sdu3~_@H&ak|A1shtFJP;$ z&Ff|ziaT$FS{aiU@Te#m;Cp!+I*IbJ@XxAqIeeeH<$>FQ&-YdyTH@a_&X?%>7*prF zp2!e%;=M(CLssc(k6U1h(+Z6N7fk4b1$pU zx+k}@k}uu*?&UWT+g}Y#gV?3_XQkIe!hs%Suq9Q))|Tlh`Wr-J#)v6)bNt9IQZ-?zd%Hw*=ZrCzD^f-D3r^0KBi$+ip$`A6Mk<3rtrZFNxAf zKk90T99Gb#t7ndaGJ(*jcpaOR-2zFV|0MH`0H4>cX|8kH-A>yB@PzO5QPgAAeG<9~ z(7IdVikhJ^RFhx&6*~Cd*30U>;FKs>ES%nYuI$%8RM=1({ChUX}X7!Wu zAA=&In$O5ezi+pM8LtJ8`oW`oa28+E!&*f>9{W97;k4XXkIS^H4+UAGvZx7D{UOIK zH$}ZEkpj2NC%)GxA>My-R{)`xdTyO1fcg{J)!T^@lJhkw=vrQzj&$^Qa(I7Cu2xl- zg5af(2k=sEQGeBmBNF1c9B_MFCIG7eR|`T^)>Jws({-d$>S9rNoIs$o1qKW1U(s7gPai5(qrX(&Um zwy;AI@AZ}{%d9#&PBP>zwc8=%jgWWGH2jQp`DWYPw4k^T`^Nvelzg_m4tOygvshAx zSic)*_56B2$iwR{sdtKA-$NW8Cffewvz4#abf1JwCg*y2X*Lu~6edkmydt&um&!Yh;0Fgz!I z8S zXW#cIlDgIR7Kgd*mV>IL1+VdR*KujmVe6Bnrwi2`nyj5h(N`umHB#h26X zt}BBFa)TAfq5C^R?mPC5nk4!GljuO$+PG#|*B4a_2>^!?m-qb{I`I10^!40&Ah?Xo z5pt;rAZdrM_}>Q86li@(J8)D#f?(9Br`@U}FA1>Jx%%}~}bmH|q8K|Y!jaNAu?dYM~6 zRZJc^eBV;Y!Mnx?kn&2<<#2q|Pp)+P>ZBPmqA2KkX?Et2s&9LqBzZimIWVsmGYatA zRXt~RY=fjB;A5x~rSrZ2e#S!_7>vCGqC{9lj*|V8LTb}g!H@mpp{+Rn_v>x&(6H+J z7}nKf@B4Ld%Z-a7|M0=og<;D>XSx@Y&lV$4Ekin}o2SXK^<>^M{r+%K-I&?XE$nJSn(xJK4qrH|bnqfPU>4jm=e=x!oc#?Jke&g(g- zUucQtw<$SVY?d~P}!t-c2Lo8mx6d`@70 zvP5TBSUX%%C7-WOwciMN4WbKqP5B%ow3f{Z-jx6kgNKYV|^tpbL^<*qZ-A^30n?FBY*Hn_q~jp%0Mg-<>UCF!!;rL{!Y{b z*3Cv>f1?;licgf`G`bG-zLl-3R|wc#Q538g0z$S#C86oCbHSjNy?ANChiOIVH2rMI zG5nGlT3Axtm$CYA3AoOV^jpuMy|ROZ?T(T^1UI_*!$t2I@DM>^@!2%tQ*2Px;zGGh z02fo5-BK-N3cz|cST76mXYkO_egPK}#MwY7cUixalk{5k7n=LGIBj3hTJKhyeXzl~ zGo3fkBcT7$3Q6oSx65M@pbZ+YC;(b=HY>1%!!mZp6Fqznq0rpI#0pXZU|dVnIlk9-%u>~`h}VhYjz zmPod{6t5ndj-zKD=!WOo(!>9dq!*2ld8_8dca!LG1x9m|yPCUXkoxbbV)V`B^QlP* z2QLUMxOI2m3%(x6c>7K);Oa-%C(!K#N~N9Ef%3qRq9J)~x4KpV>itdW?%7A43LDIa z8X^^jrZk!ojDyDSMXww70zLApJntoe%=xcBD#D>RDy64nfaU_M6Z)d7V4v3O7+UfM zI23&xL2-PqOi$oj<6nQBorePGYWBHH+x}3PF;m>1({p~`Te}(*tYP8JcKw|ZaIa3W z5|KeaW+a1}*~V9jOh9(L$~YKYYcNd}*`l$FOU6yA(HR-(cSZ&9*~&v1R}oErionDF zkmE|SIb~(H=VJ$DZ4b&-CQ)fO@a_a4)*zSnmv493+6k&S(%z0p_QJ>psX^O_V9lhrb>BAr9 z#!w93wGILaXkvaRP39@H;n)|GB8ih{1e-l>kB{FBn1qGHL%+#NzbvY3$Xf&5Ir5z2 zPG9!I*3-qPiSN%$8O#PHBV)1VD}P1)O~7Dhj2?72@pBcduzphsN8H)`k=p3Wh%;_$ zOeXLMp7o@Qaw@rwstN}`?{)X08s5C`DQlRw*eDrX7{@P}7d8#NUz6uvKJSkcQF?Ne z6pViyWiT|=e=Doa?LjcWpUG)555Bnx)chgcgWJ97&2EQZf!xal z)p2nI02nbGF^RF>u>$hlk&33=WQ-^JoI>Si0u8 zV07Zbz#>r^qAXD{lBu!00RKml^p=Cv64=~UMF`M+kogAK za9tvbFb_5Czmu~*!Wcf7X4}nlOhFn>z@2UYs5e8zXiDYQ=Ox))S3>&zy2o(u2h5!JvYvSsLq$lAJ%%c;J%Lb@e5mEkCW z?eZ|Dux0i&Si?wGLD+e^#G`KKbCx{u6gsr?6jUM?pE*3wAGiPuHc1MIvY4|WVosn|)%172v_ zuJ9qyLTdW=-$|n#8!G@V$$7Z3oifYzxs!m`vv;S}RV*&e|L#YrvkJalcR(jP&|ivp zdX?VXKmoSP&tSH<4&P*Xc=vJz77}8-1B8!d0cW#BxWLd8o=iJfUfU`0+(QVsx$4{8 zM%dD+!cq1`U^-K(q~!|)T~eLAZia5FB+I+)`mCM=ATeKEa>FyeeU0P0N(2$?H5_a% z1c?1K;t}s!d86fx%Dsml&FIN>)%>u!tJSay-_BD*KV3b8rOY0MRDF}8&W3rMO8Cvd zq4No{`UQOiAyeW&=;8TZg&{D6<%2^Z z!|qE6iY8+BPguq9y#O>n~H+h-giBAsF%%~f&;2z zHSJ9+elB|j$&@GebI=dtreMMQ&ghri{%!G?7SS%=%2G0KqHH#RkD(za3ny=Hi$(=p zLGvS3B|d!WGOoC}J8#If=~Y0uQMxBB0Dao47Ri8W79ysyRyY66Fcmx+Tm-DB zhy25cx=95+#qc?ToUlOnSSf2{HM2o=*VzYQSjU+-RrVoQq-g{FF4Zg zE~D2d*8doXY~?Q)$%+d%R^R5T*Ja|j(efj$qMbfNU$|`D4f(?#^kdi{t)k*vJRUdL zlxcwb4m#}66CTp`2n9CPSQhv#x;!Mn5l~6yO6GGaT9+UCvj-#Cg^PfUgy(9?6bFXL zpNb`ZMW&HB#=RloUUl{4T*WAYN0#{>9S=giO>#Fy+5dV^K*r~FnE~_`y9;cG`R|Z< zoOm=C`0i!|j9q)!?A~%82Uz7BM!4{L-9s2&lDz;lp6G%f*Hh2|EjuF*ZTdWkb~fij z6_P^E5528|&KH1y9o-vpP$5xCn_I}+iK{MC;6&BY+8Fs=m!-n;b%SD?b{UHjMD=vl z=|HehRp36=l!l{Nb=j)%E)c-p>$yu+7f<0NCv?~F0Cqtaf)`7bVV&u>BhZse9N&i(A3$x{)K4e9C)`q;|M{`52%Ol-Fg#F@RhIVC{{nI!7gqddBASWD!btp-(BBw zy3b`l5s_nR2<)6q^Y+vd*eWbZ{zSIO{;S}l*pU8|lJn$|PvBuKUqx7+=-R09e`&ej zfx{|HP3Z%AGj5jsR!`dCO19@yQ~>yvW;*!(X7#4zWHpB}1(BEfJf?t!{10!5-z-JJ zQX-eGqE>l9_7%!}cZXT{YORv&H@6?!P^VBI%uu6V6=U2bfK z-nUhXzIRgAtSRD^1sRqBr@J>`*yP8cp7G0o-9a4q`1%ZFqkHR25(W(nc!>F8Rev?+ z2p#E#0X>$-*t{U__3WWm|LRC(^ku5R)_I#q+`)twhDXu$zH2tK)}SV;F#zE0@2 zg?0JR?v@D90Hrb{11&%10Dztc$r&o2>~^QX>Hg!vk;( z#!o$oW+d2aJ3E!HTRLmi#ku04&fiTkl>~TQ=DSMO6nU&V@0^f&T|`G#xX*^A`Jd~q zJ}%Ne)$q(Ccl0IwAN0|Wt_{zb<)PfG{R#-xbxpIXTB^TSg|zin6u zSh5q{v1O+fzBxjo@#?QW1SARF$04v2_)CFv*=aWK_yOuc#x(QJ=Ett;&FUqs;sfxq zCIB|&O^N=5HrZJJV02Sr(xjsQLk19jeTIiI@V|PQ~{$B-zwT*x3pGviT$60%8 zCF!>divF-$D){m87X$&aRcy6G_WdbycC+L(o9?%>1B5-W24q|AHU&J)RiTV0+o^D# zT@WW6EHpXfOd)pp&5q{s?`;3C`S)0Y*FJT?+vbC9;6s04-B?QK(}F_(bAgv9`a9z3 z6M28iWc~@r|2+7AU-9?vZT>GSHUD2*%^6Xwe{?i5`rX!MSZEWDhZAtQj+cwo7%6a? zSLc=zv`#AoZy(3i_dRGaga;nDKI!IPS|BN(j!XSr`)E`qYOKB0Wf*X2oba7V#{I5) zk=%1laIo%)G5j-l9>dPfyf>2it=GmbYZG{h1;(^o*K*Rh-V5gQHTu_th|#qnsfD#z z@N=S0eaEKKL8ivW8}}v!0nvu1qUJx#E)FXw=}JTjohk=?^dIb7E2n>IU)7z^yXKN5>F_agCUG}=!;#J&CZeBX*c`T6-#zh=YC zndemokzv74zo3(!G~OKC6xP?%!8h!~ZNg_vh8nM8JRn4`F)hCQXDep(R~_D}48xI{ zy4B6+;dRhGlsf5MLde2Kp_-kt&0xj4>3R zhquhEz2pj?@1^q#2>W9fj)Lo|e>Qu;f1NoyY^u>Q{MwRUOwH>_4=8z=h;cgr9=^=* z?xGoVzo&BQKig6XySlGE%#IRELH|3M`R8%$1||7_>z7ob{BH;Pi(>l!kOxD5aw~vz80WD^z{{}CSKKBaMsdz*X zg6)>mlPEl1p-B3iKpQu{PzB-uPdhWO{u5Cs7TY70bf2c^q^bito#+l%nrww;wH*q9 z9^AY$9%^s&xgT$p@9X{}TC>IZXEuYUIBot@Zd+L=dt8Ib>xM9s`UCq}w*sdfH-c>$0J>4`lZ*J!KJWf!Y{KJ18 zO*eu+eRMMb1qB7s`&Lme!UCS%p^vnj9Q2HvZ-t@@!T%j}87W(a>}+UdXigJcB$4Fw!o$e+tk>*3^i~SJOF4C(3^hQo`+k zUHc7b-*l>D~O}$@DWtwNsB+WB=I-1wY3B z)aL(26^f6bcMLQ!gU#$v8OoT`dO;}%ZkQ@+oL)F*{Gtk~zA0_h*@O(Wo!zyFkK)04I`B2uMsXC_I zU!z7c!RhYhJk8D~`gE!0=iP>pQ1&?a zB!)_?vR+2ekCH#{3X(;%F)T=$KuNw;e-z^P__rCKy7~zHo4Nd6PA>hsiCK;Rkg$~!x* z1oZ}mhF_&o*#{n_Gl6O4`E5MaZ`8*?L(y-2KH65;x&P}1M}c~Nt(r)Z&EUbuGWgb` zq7h*-WJ2sQ%Gao%mg#yU&%gCFZGLyHw3wSiqxS1=ra7 zhfVM<(E_q=xL(ERoMH|F6v6KtK8Lk~#`=qi2h8)gZN zpyUxJ+PA&F!GFW~&t>#~6y)_7(HpW8GA#0Jj)JnO8cp|o$d$>=w7`eLBf~3W4w@?I z3W{(h>8dd`6ru&FGa6{(H&J8WF#<6i9@Pa!~XE?j?N_|er(s~ zoQnPL+2qvYPfp!VWX_=|XJ`LT_K`)B)Hpg6`5Jj1h*XuWGaakV^^5GAL8 z1<+W`_)7+Y9;rgWz7UMAb3^H0$qF~P}9YX$|(l68N)eOTs+-Qe#c_pox#H>9Hd=PVCb?037 zc_zYv+uwJQsXssy&e|r6osX(3gtZO%F+;}1ED_{DN(OKVGEW(OEgOHy`z;Y7edqUg zys_WA|GWh3p==edvj;U(>@0s)K za$RXeodzH`gT9(d)4eY`^}kKtGx+twpn!(!VK&>E+`yXpuh(v|Wpi(xTH=d7h;v5M zR!OVLI0!YPL@|EdV)~92GWb13R$pt`GEOT?Qb3x8FL#*Qs?^3PjDp30bwiH;|K&TnmI{XS_VTuIA^Xnk) zsnw>~BEwGBj$xwjGp_8r=GxpTbLY>4v$JC!E~~?Hz8N?^Ndu^6cq%-o7f>+JKkXTPIu#nTp1%Bf8oJEn+~#k zN$lGfo=h(}gTm<=NmRx#HWubhurWa9!z_j0mirhQKozcX)o-MCKS+U+)JmbYr=O&@ zqxm_+j`#c2m5$2FzBZCB1j*|si#Xvy3^!Fg04#vUxMh?he_JB87X1Pu^@Js}Al%lvRC}tTS?07wM`*eC|2fyacbu0nu1^PZ>k4AuS6p2pa8h}3!lXb z7r_gjW1#8@siJi4P7|_X)OLVfrXKQ1D=O4MjItz#=B=8o?40SD-1vq-P6EOgSr>U~Z9S?C>u(HvJCbLw4qC ztop8mY8GXcZ~_~n((s%NJy11JVUEbad`sQH;>i#eZ%GutbswFi`1%Pt)KH$zcr%DNDbV>DfG#DbOi8HOuFJpN&gT2;Iw>eOv}O#o z4R?4w{O&%K5Vb8@eB}{yeS>?T6RABQWkJM`{;QZIfGnGhyGq@IV*-6knvpw|-p9>L z8_Al3s`00QS`2aOB3S!KJ6PoClJHk*^e<9Ad|2h$i@?&-W7MU;?%kal^yz-r<+G^1 z3ePEaFu4kt4B8S>_b4Tog*3~bz8YIp2aKD9eM`&~kMoKBWiRy9>3*ex{3JikcJ}Fb z%F|>X-1Il#2ykyN?PknmKS5VQ>R)oG6|@i!HKt@e_*{`e6InENts%!y^}F{k;`8W< zOrqN3znhy>Y9D=`Y^b~%VAL%YTfa)04G_FL@T75=u?EDHHkKYcahGyN8oqe$#fkN- zL8ZX;gEHG~1>0NUj1-Y$rY3Fo=O%*5W=W@_?&iwRXu`HWXo{>Xyp@Hhxe!iZ?z&aD z4#nffwZ_Qzzrns#X;7I)Zjo{zoMhLa+xqy$Lg_DE<4d}V4`)a2&!Cd8UrIb`$7hQ~ z=rk3pL_>uShe-#nDQLLow4nimpL(^LXX95){J{Vs+#}lAx7hhMZKMAmM z@F@}Uj3|<`r$;{V-DHE@vA-qpGrh)EZ5nLHWL(KsXXqLi6M2tSeldQ*-*^A#+2(TN zh$e0D&p8p<0o2}CZ?Hhg*9_EEM8poNPOG1Aa2MN4ah2O+F;TTtw>uGr!H)Gh>J2rH zXFLlZh85r9yE4=+UxGnHePi3;6^A7(&UUa7E_@yVU?4Y_-Fl<@d%Quv-C`T%DQ|3``&(L^MPUn-q&sCZ zIsW1CvgOQcUB>3?@6N76^$4n~f@AH|@$r9Ikk}0E6n$%+>4bIhw}NC?o0k^zHGQCq zxp%a2gBW2V&eD+hK-KcNgv_rD{9j9$3M3nTudV&qOyVhqdTQ*bNTlgAZR#YREPi=I zfkqQU1+uZ!r~ zapTZw$fVK7r9vJg-B@Ml62+w5DO-4xdbOHw%~CT+&0R2hKK6+*aN;}#xCcXC8`-rj z#;6lm-Bt>#;*zI)V_WakvCNkFRBe|M;i6nIt8_Sqf)GD$y4Ebet;_EQ-h36+-}Hwi z*G}Fgdp~G<3==(#xp-|EIBy&Mupf-xtXVY1eM0f9a^eqffibJ*| zFeh(6S1byR5ldEw}h82UX3!s5W0g3eUd%q+f2x+?Q9?AJ$OF(NzRM^O0ul)+F&srRw4rpP9NNM zC+6g5Exi}AgJU;t`_6WH(mrCoZ3b*c%ri})d9Ihd2^NoS7gwNk za5jd{cQ*6X&O$wBl|Mpu%G zfG|V3AiCEMp;(0hIdu;xI$DRF-Q+5CzoEklgGPL8%wa`qXo-C(ae{e2;oprIn(;Y@Rg$=FML#BVB8#k+Rsl+tItuyeq~L*%@f2v&d2@{8TD zM4U=vKs?;y0D1T4AlMAjt@pZ4y~b5b@2%c%N=e{S-}#nshr*)&pdIT`hWpYx&!zQe zjQd!}?*!y1TmKrsOhSFkV0&vQpSUeJ3^??Yn_vhJE!C@OqdrT8p(8U?oK zh4%j8J@{vmM&n5g*a{t_Z9=H#&%@^O?8k?dY_{BgDp+AGs7eel>=}gdqYj%0RVi$( zsT+LAc6Q%axVf$PzQhzC+57B3hfK@;tUU~41cfVo{!Kj}NUffe)J3ZeQ!*z(w z>Yf&dPaI1$fq6}(4-q#NuR(Tjuk+8QT?>!Z%}?WO-j#B?w@`gzPQ`$y$X_?XzFGTR zq4hP-)!S%(Z9A9kK-iSIk7=8q-+i=TuFWi-ym*_>eUoPt=U@$W&Du0xolIbxFcuds z4|Sb9PnETL$71WkID^fx}bZ->Qs>AzZ!# z)c%0bGRnt2(({R^w`7S zQ7`JPVihS~JElzLcg&Jdd}{iZFO;O*+4PfZg117qLHd0iCL@#g)Gf`g%DXKUr@=Yy zaQwqceMb;fi5;K|T|B z`ANT$P7xM#`E`EtzTje-z>i*~rOcq&w0y=+5+UNB=7_ZR+xavh$!gMiy9+D2V)I5) zXmTO4S339dDqho((|)vpY7L~`^o1fNL?K(C>SAW7+0tP}5O6WnD~RdrArPuwYBrFn z0t9YDTYbmUanM0m#&K`|H1tT-76<{b^1V|*ZWLDqsJ;U0k+kIi?txp3rqAApczcKB zo-dSweIHV#%4W#2=aTn${B1Sv+UK<<0kN}qKR$ZB4bCuBx0k6_9x~vVoKV+ z&(}WQ=Jfd5nXXxN3SCvQlpXd}JoI-|b2eC!WgJd}PGeu$0!A_7d^#zIInYxi2_?*Ae@&^G z$PDnH`PPs*7BM*M79tWQTA8;<+CjnjahNS z)TAw}dr@;mwFV9luiSC7%1XKG3xtoE5sB2~ygqfPHmK?D`3S&-UbuAZDCpu%&f(5$ zZ=tm6>C+h!4NRlD7~_9!xK|Rw7kh7$EdN8&O|Q*;*ZCaD z4jJd=S~Xv{DiBm!zi9n!b0}i$`%OoeZgb9z_M07f<{%w$=I`(F7_&6GM`$zITB8MB8N6Ln8`vU|&v^H% zzlI7CK3Iehb#r8caRv?DU*F)1A3F@2*T^{A{zQd`>S=|uUQsZ&KA$%6(}JuU$Osz{88r^rp+Wi2e{`0T9QV1?p4 za~L#5T~1-Vhe|5^Tiu~ICc2J`73V*Tefm#B~4=bveHUwyMjMBL|;cX%8)=8 zoFo#i&)!T+)w-21=sR3;km9s1*flcnP%RDC*F=Tm+O94aEg_pD%leF8vta2*Az+P5 zADCIRacf?WQ5yN&B7R1q%5=w5DPM1NI*8FkNSjOkOD-biO1n=>Yb5tgEnr6RP3U8p z5Y3K}dS=;@c)-P$KCeSaK>{xIyvtA`@hFg}FUHmS*FTS48)2aw_y`Ge$ znPdOp^4YsOOpB;eHiXpO*`L}sIyT{J3b~>{{`Hm*>q&-6fwqLN*}Hm*SJZr0npYDr z?=PMOu;BO2GP-?w@jR;0&XjsqFWugHNL(Ya_7gUH7>j4_c5%P9E#H1=OZjV-#{l0u_)~I>-0fUVyiYkdf9XWUa zM1Xd3e6i;hJ1jx+30m4J7u2Est`0T%J8*(f$K%%KjgCZsHvMO3bvqCnPh3H|?xQma z4rSbdWu=z(`9a-Vy*y?Xf&ekh=h1@{dte9L4d-_~uQ60YMb*`Oc8Afv+%Yp?VF6=U zBVxaZSM8}7nHB{T5Ec5;B(df4+%q?_-G3OE5S=3EkUl8VV4L_ckv;LF(c9jrKJ0u# zcUAY~BU|YBk+VVlfiscRFj_~_Mj8R6yWmfL^BTYEytrmUr|}&luY{yq2gBhj`^c5Z z^S(cSkrU0?2?&(}>)0c{^rSVWrQMSY%$yc?UR!hrcSNmq+0&B!svJ0?5C~GA8}c>6 zj3N{*t4OCfKpu_^evK+tV7fprL3p;sL9(|iBI7Pia)v6MwpCc}&x=Mz?g403Xl<e;viOll%5G z0F13z2bFa2Hzg%Djq*8s(f={4DAR z_VYbC*mT3k8^YwXI%jshm2GBx>{5ieUdx1_gq9OvdT$5b@dmgLq=((RU{ZK6<-f+T zm}DK>i(S6*_7hf2xOTX|1-7HO4%Lop@E&^79{! z@9zg?%&B$Nbb{u$4&`iUl7ECne{W^Zt*<`qAxIkdiPu5@9OKNSobC�)v~C(0C)c zgd3@mu<_@wnt>uVJydQ~oz|jKOy0;^`Z?+o2D0^+hp!@j_=nH5zG^AYBuV|wimv<8 zJ-BGiO^XI}T+0%OK+mPa+&L+!)PYa5H}wL${$XzJBCc;XV=Co{g^!)F^tz?jpNo4b zH_VuCMYaCaZVyd48bC?#x#Q0K4CK%<=X&Zv)V@IQ!g5ZVK?zTp+C(vj*rq zre0*ZTR%sn9`4BUqa`iQwuwP$!iTu9y z*^Aa8nvPt{NV`}cy5l$vTGknczicBgdPa#+$B~_lxB0^l39bW-wL`u?WXo>LbCrxs zHO}TPn@o1wSYvVPGZi62B3}9ADk9<9rEQFD-?ViCJHyk~ulRlQ*z07+ zmqT0+dAd*&o$#ah@3U!@BqPvJ}Ns=MjBuIqf9PCEedGznEA@4tG^@#xdHP z5}hhW*p9vTm8p^F2zoA2iJy%YoUT99TiNM^!6xPDkXY%@^R6F7n4GGx+4V!RemOu` z=Bso5M|O}5LA6BSOdLB#UmR7s1}UL!yoSsl_4aP{66T2X(LM*|9)bk2fjUQG@;XV5 za7g2iD)Klhxr?NUp}g%l7S(du@pSRzjsod24a*3J?<_x#8}8QdV|kf7grum zMHRS^M;MRa{Q64RKHpz0W`#~YUyQ#oG(l?D10Z|E)=~C)c9e1bRQzl_KE8L*d#S4H zGq*7)2eRPeh6YhjH3bvBj1tQl|SyY`C6lvas01T(9PNZJK6 zP3wxPDqmT-KbA4>ntJkBD=r{uh>P2dKe_5iem*i@&Qi7(JIJESfjBKGU&VlMgWXOZ z+grrgAg-ko&vt-qp3qk_{Jyj{S5C8tp_aWI-lcFeqdCorB>t+{;r}X*a{YZ_D7jsx@3ZLF5~Y0 zEmA^FHl-=O@oYTk=b{3)f#6wrVMR^aAFkWt`K!X;*hkOEJ}h?qih1@jUzl5Auc6L~ zxmKdYX`}A(wIiw@Nvhre3EN-J<9T?KI85Pa#lXhN0pxf~!g)YyRJC$%aOPVO z1|N}Vm(EBijEx+5zwlamO7S~iGl_`D(3_AYNv=Tp-B zLfLb!LWW&-P|dCrm$Sp?uU4-Z9Z(L)Y`Z^8vKv;BwSQutkP{9P7Ks==4@J%CYWj*9 zM}5&B_xX$_jmo8fH#TZaygRjP#vD;JIFLu_3CL=zp!gk|koyVmeEXBMat*taN>zb& zg&Kq-YKy~J*#7QCz^h^O!Y`}mn!;bvx)sw2>M`%V$C^-PmWPOs%LdR>R9a zjk<;fPnjUHaeQF}hq2MN56#UAxS3c@3Q9#gOvfR69IJ)f)#IIsnP!H1MzFJ+M~v3H zm2atRwZuz(u=p#QW$W$iOXDKnfSyYt`5~>Wm|Mz|({I|E$#NdL=fer>#3u1y5dSj4 zhbTlcNm<$ZXDm5+&{w;^Vnmq)aShdk!HJ)q1*3!J?c7eue z4Ayl-cd=DH3Kr87G6hlUw+4yt%YStriba0x#%6h8yWB{-wpg`bEXk>vAuT`8CMCZ= z-ET)=GS~U_weHAuj!N8$QxriRCC_$2*OZ)z1s7+y0Y=tKL9QtIwdQO;E))*V`;X)q z!yVh(pIlUb7qE?K#Tiudee6%#>#9!n7viM7$pyuCMEsl%le^k_Q@40@a~s%d)S`(E zEoa4Rt!`>1A*l{oFdqaZ%8$Gp!HH!0fyIoqj-0fBJZJCd=cuTUbI%~>YWI-?Xf_iU z;p(r4yd|!ntJP(HtQYRCvJmF3CM-fcN?4UOu~xNlO#K4l9UutOL;i*TcD40HZNfNZ z48=KpV`9#O&p~l1lqXnxeu_{R(_Fy18x?Do2vyIpfsMNi==h3*DeaW9KFeGKVIEUk zFA=1Sbsa>aOw&?cN(-LAsQGLQI*QKv_J(QxZW9@`w79A$t3iTm_8RU}= zPk1~jn1_ubHVP*Y=ty%DSKZCk_LL+S4BZt3ps?hcWV7U@v&+g|tce!uuT zoaf$auXWTi2^OKA6T^5VDK+&=LRZ zh}nwN4f|Wi2H;M29qxDsS1;ds?$L2%vs&=*`}(}x?fu@t5*h?7mkz7o7{o ziz|$({9mgQP|Q^QNr%LsNmqXDY%h(Z4D5=5G#s8mXc;bGXjqNhviHGjue>Uo%4SRF z*bqwj7Nod}m)P&L4UmIEG5T06`^F6ydHyGsz7w|bSdf}FmmV{OAIoAn zvSLZ+%SiQOM*3+%Bp+W1Lg$l}=r{Uk#**4isDECH=%jX5K&c!$Byp5BG?w8J;=YkIeXoqkj znKUFjOl-m^nECRn!;La!Lg$gJIgh_m;Fm}zxFr*;hzA!C9k~v(P>w8rpF(hXh1ovr zzA%Rm`6u4?vDUSNLT~;c9KJVF;WP;$)M+Y!vNGWDe8gda@!UuX;bF}B<-Nf*2T4sj z3>#r!`)cWpK08bL@-hHE@LQROyQGIdK{mv!k;3mAV~Y*& zSx9%5c6=H`R2c<5TZom~S)T3I8*R!KE9Z zGy!Hum?_Ifj#-ah^FhR$lt)QpLd z4Z=r(dZzP@l^;2su|VZMmnmOEH~2N&6&pO_5y1FY{2%~AEy}vnB0qX?;I+BeKcB&f z|5-n=5l=bT!BIq+;RyxX6beD)7x>UAtobc61SA?P_ozwGiB-Aj_c@!Lx0)r0&$Q*; z7-Q3p>Q8fJ@t8ETi=ab%YjAt}qA~>G@Vs;N-`I%rADs}msjm0>eWY*01Gn@It7Gr) zvfk|JHY~V9eI(H5^?}anqY4?%?)Xku8F<& z>_)a|3WD-J7>6{IyHJ7Ny`sr%kPEeFA5=8sz8I;*LW|uf$ijVCB$3K8y`x{FJORg-`CT zC}*oRScJZ^5!az4e_~k*L8Kie5o|%0U=n+}6MSoXJV^q{avZhx_N7Rh6~0qzf$Y&r zdu6)*)REIY#^T(0%7wuvlqQEMvE;#rG+58^o-`ukh`jLP##HQy1~6-E4c@rB3Pqh8 zDUnBX7mjDFaBO-{#bn&eWY$}&K#}-hW>rwhHS7<%)64c=7yoZj1-pKq1+iGlPBJuV zKWWI?fcdcbKl5WJrm2fffh~(~uvkVjp*vVr(~|$L=|8=URvWRpUf6Lsh5vzbQvm?> zx`zl(i*xr!4lxhdG3~Y`Q1gGiOqdro9<4s_DQ8>s)cb318F(RE9jSx=U_oa)!&<@6 zW>xI-V$Y4~$-l&cpIC)?eD<+JdcA$LeW$*9XCE(FnjzJSg_7=*jN^W1@WeUBcjDH4 zDPL7o!srDPfz9aXRG;qPXHjo@CM^=WfXt`E4qzoma*pJ40+uSL4biBj23qPqe)@#A-O+O882J9sS zx^ICqC-ENXg873a)hiL?Yz@}dc-2eO3P(wUqi2Mlig-`}Xn^2<>c-!c)nYA2ANpSM zuX$`hTok?gLtX^Ds38~f)saMV)hGjY49J#-6JXcd)fmPuT>MU&!;gXb^H(>&Zpei{ zD6$?;nhRf>Cl)J|l?%H+@7`H_THjT#q2NZFv}4$jI?{y^AFw)t(<3NOQOC{@uK$`a zoPZm>!1K=HBz(h-CC8)qCeFF)q=Y?4W0+Y>aYM_;Ck3GXj6bx#QiT@aGiN1BTVkl{ z$_soMv^o*z|IS*ibD=5ke1x4mH+90p^=6jL+vCqdmy>bpw>AThce8)=@3y`C^n)S` z2As*5mQq-ZofZMgl3aFv4EY~!kc=DVgPk4%_|XB9(t z&pkSvEgC-Fd2cJ<#I~D^+)wy<2|Dc}KteTsyumg~<4T`RTwO73uT1x6b7?Nz2m-zv zqyOe#?uynui^nat&s)saS#K051fD3HM8_dfRsv_4@!qD$rGwLBE5@Z2j9$ta(Iy%Q zyI?(ek&`*!o}zI)2_mMe+s^6{Ncvh8eAY-1@6{vYFcn>k8*Sfm zy$cr$g*55TbyE3$Y-}MsJmS0A>(>=$`3LA|Pq1!y36T*z%Y;3sBPxQ9<3LzLbMRC2 z^lI6cc)`I^f-xhbbhyc!6GZwVIRv`9)wSdf+(mLG-yGJyMG40l%UHu-3#%X;qlpQ4 zI#_zNF=lp0{;4(>6BbnpqPK82Py0fT!H1JSM(`6+d>88_BgyPd;`e|gGv!)&v8f|h zKFe}=GlJEsk%FxPR7!jXRBNR>!wcL`rav1Gca&M6@ZFqE% z`4Mh^%VfTB>88(OnS}XjA%!~1TgzdO3p7|7|926;mpc4??7wq26+B<|^nJ2fDzywu zFo?l1EdtXHOpk5ff@z1DS-<$rG(ZFiXuFs|}Y34Kpxiz9w9v)SYh`Qlsa!LK_OFPk$W_-wQcU; zqnMAG5Q$Prs$WQkS8`znPLX==kuQ7CiAW{Rl1k9zUL&)gL2Ky%RI6%ljx`3Lym78HOG_r#NWZ`h;UmT; z8Q;NB(OjT-ypxw`C{7rz=Ah6?Ilf*d)0!r@p+-^-rj8xi z_6SQ&${Rp@207;QK;#<376gviKcGm_O;|y6$pBqF&Tj(sX+L)PBhju%zN5&)Py{q84S1 z!u8GCK6^gp(|xu;h?PPKnUh7Lmhp+RzfjWm!UtOhw9(KveIW^uIn_ z_4XfElclN`*ZUd3r=6|g_*_mCYn{^noi)emliSaY^fz<49-|%;zdlvkVbJWlK+ewK zY*{HA(P$@!lXVkSTpg#-w&~WQVm=nA@QV~tjbwOd-7zb2C?(IOw{6?D(sBB$ncUFf zOE(5xIKJ9Pt&il#NG9BsH`1^QjnQt{9LJsje&!xuc&TL(@ zAuXdsJ#S?ulhXa4ohB~W21ju2HEmn9;Ale><}Dj~ZAt1pw2jd+HpPP}W)J-w1RDseHl7A;l`H-f zBR?QsBau>#e*U!E>9Dp@ArRa{F&#eiGa?C9X0D*u+HD^SnppyBly#h5H*jF%%7=!sw59c9vD zehhfcSO<-^K!2XtS}}-6ld)lbeq<@ttMA$#^BVn6O>T$3LxpcObE-NtEn)SH3DAgsjf%Hy@L@o z>)9|}Njhf6u=~m;LtCH0meC4`1j`X@*Usz5Oj(WAi)jVKP9?vMg6!#`W_aJeyzA9E z8Et=&jhAK;rplBlx~kENNni)V)@4o#6iK~r3DI>TTeDky--t|0k4HK@%pgO9xQ%UD zyh!gX7B7xtM3{)5K!6}U%CGpooZ#bwfJBA8TNJ|w2h=#+HMy)2qAkKu)x~cv^MTR5 zgRFZprT~ARVEa$0VJl_teYh6S_m})2e(B2S7D%gA2}!UY_BEL%&Tpl&tiC2nrB;xd z>BKo49MIQG#xbHH@XVM6HDxXHxI_x8HLWh^aO2<0Q|I4KOH9SCksvdzy{{R;Q_qkt zt6QqxbuiwIc%>4LsbH_z77CuZ(N3Eh{Hjl*tq**sjUxsbL00hB%O`K$_t@x|s{n4T zNd=a$$ae5z7;Rcbu!eQO`0qOBG$j8>tyuBKRunfzdwqI*M)DkXw4BTY9#k;h5lpSc zQ`n|Bngm4zP!!TzK$%?Z-G;AmCHO7HG zJ4a(MJnx8jrjb>P`5nQ+l}d5)GCk*Icu;gi*^oOINvafMb|ZIakvKmN9Bc9!zuX@| z8c!6fcJBtgI}cj%Z*hu}cIGcMT*eEDaRt3viG8Pz`YPlFCsx%E3 ze|0qp+oBM@_a-zIsY9^~(nq26QCP#uvzBLITT-Fz1pxTVGcnL9>X6Hfuvh0pCi`ERa%Md2+UxG~gfM-;9Wc)ekf>K{tXe9Mtf!(RFbeqz0o?=Tkh6Nvrj3gQ`mk*o^N zm!-*o=#C|``9cYa3e9*JN%R@qkelPrEPd#e)szjS?u45l-g~tSiv;RefFk~@$ll69Yelw0B?`5LzC;tmCJSyx_+HqT%Gc-2 zhqa7V;q8X$f6QtH%hylOT@X$Mzo#h71A{SUK$?cZ-d!_6boCTtWx6T|zRb+Ik5lZx zC5dG%G$-g=G*YM6F_`aAlH>GIDIqE;_y7oJh498JT}+&LXR4d;+c`H(r3h&!=?z9x z4Q9TKSxmY$n+qmpaZ(L5^RA7HmY@KNAqINP#5>dVozR%cDNn*ch4az#C??EvxggEz zsSOE4zWxw3&F#htFngbgdsT{RM~3V7uK!%; zSN!T%2CcRzG~5cBOfItKldRJy+p^9QA@i?}dZ znE+cDmfM=j?ciR(FH$XL?toJf-0P#?``x(7+V%+5_T&Q}4ryu>>On>|O2>w&hEpt* z5)Q%Yc&uncx(~56ht=CiOPu^_jEY%zk8Kpx8pu5Vbwy1^yuRo6Z{#hTke{V6p)&Tv=g`ZHv@IDp| z9-YRIOoK7?Vhu_H48|kcl8_9){<@Y7i_RF`qbV6-7s>n$_Pk7Q+O8Ny@3HclM47Ac z6zq|t>*>*jzQ1Q3l^j2@k0ZK+I`N0qp{^YV!oBYzZE5 zSvR>;F(^9oMiSA@_%a>wFdl#lN12STlFn`{Qmaf}rDn#9RS6j!Q3~}X zj=UMxLXAIWT*~kt-mDJCc)Cpz=ibFBQnyK#3pFG)Am4l|0PbQn#eT`Vij|AEU5G%h z$?8@IdZ=eNwR^{eh9<;Pjkqg_&CZ`Hvor z^fGvd$l6WXOdtBDp6J#m__((+#YK7r9MVZZf^jwc^VldYv>MnCwxEHmjCA-@!jTj?aPs5l^liizJ(^&FE1FpZ{Ym2#`r~ z3$WnCaEA?+aPxO%`B{1|`gSd*Ka{eb%NZ?ZKVE^@Xr40xBKY^cL=YK*9#^7FK>)h( zQSI76fgkV{B@bpHxC!faVCy9_0+fD8)Zyl>Oz5wZTeI&x21V>$btPM->8wm90k^yf zdoyGD<+a&Jz#pF3h!1alyPUX(tHDr~S87UyD+l>$24NU?oQO9D4|DnM<<{P-5v z0EfE~)@KAjemmaKTCM0`k3tG8krF!R2_~LbrBR2%teCVPh=veVmQB9mWCw` zRBgo9P5Zjdo9INN96~`85TLimeAWEwn27-7gW?#U5e%o(cE$*1-b}L?*H}@0i!8#D z>Uo|PP&r6F`v|C&?si$#j^150fj%x~5ONvfry{1>s%V^z?BIVI6%;awoqIAAE+1r% zr%okZN!tCI+p9joS~>M{6SzZ;3?!2Dhs9X!)6EG?W`;1=K2r-_=(Wi~M!Bb|OgmT_ z`2VC)SopD@PttM9_!%^JN0ir>nt%q^UFnwBe^6%XTT+3YDSb?Ycreb%B%%D&Nya3+ z2w8xJsD7FRj?pAvgW`tTb`Y4^yWJDg1&-?3wn>%6BsC2_CNkshL&e|3s0g6 zCp}stZhun&7%~}K)l7`s*HIU=ZT@Ig^~ciyxVAo{|#log(TGcqhFz2n>YD}PfA{!SqL*%27i3L zVt~5xwo(|dpyWNbTT%Xq90l-OjX0{cQ19gm4a+43;MeNTZ=^*pQErF466HVSl3n+B>}KhjI4M{vNuAyFoXS1WABDQ=ro#C9LHsinW@c$u zat7*s0VfDf|5M;;M0)rQl0tU8yk)AY$&F5i9w5cuIvS^~N4`8Er&8j=LloSD zIB@a!n7j^ZL*-A|ES~z_uESM3XAG>{e-s_b5@Y`0H<8?2V(vtNLcG>P#L70QDc=)3S59YTUZanCyxMgJ9IkJd@Js*GAR@QbFvEkyRt*ihX00jFbI`A{T@Hi7a>$ z9dv>9Zj5Nb)QrZRk2L02K06WlI?fU!y<7-R6wIRSDQm0??g)lKHj%zN!@_9%(a0V@-q0Y8JIgQw0k zW7KL3JY)7Dk5n5?r)jU5j0mN7vF}HdGu<)aLXMCHNd@t)OBd>dOcSQhVqu3=2eTsJ zgNs889adQocnYQEJQ%-no23VQ4pIz4bPKzPwc4-DLBR#uam?%N00hJ1njr|mOjTE{ zuR*ca{PW6n35vM9iK!*t8#DOOToBZaHj4?8k)~387a3NBLhj#R<;uK?z!bpJAS{wMPPYv6QFvJ; z1pm(5kCd0#WeWoFpwEhy?MR{TpwFJvXUtWgmeSGOP~>%i;$uC8L4s7CRaGSMz)fV7 zUH@X6>SJwD$y@wy2ft<@D9oe0{#fa=1O4+V;?Bu0XBj9@M&lTPmY1jKr%$u)t-%0H z3-xW%={G`|GW$M+@#1R2?cK`Es+e7a%3W&Y1={ajI{pp38a*BZf*cLMk@lcca%YXg zlb1((z53>tdl)5ewLO~{@W(aPGbV;*m_@yq z!qTY3JAN1dwSq6%J#P}Te0+5klVk5cW$!ppnl4pN5rBxnk}NjD;mr^O8WxI(tuyk`0_N-ZINriG=?|u0V*1~khV8VY1|dGfHsb!! z+(Ui-?Et=|dkl0Y1P6cph=LaS8TfA9T!yz?PpqW;y^36HLg)!o#r+qiEHMP~Vi977 z$7(}MP96Xy$AJ4j@)5S$ z2snd)MC1dM)y=FAI%aa~((I9!l;V~J2~%)Ps1pnWdtN_h)#4y1#Z|)Fy9R6MzFoTe zsG`5SF9Og>19#F$6A!2U5?$CmJUloKIWH2K!Pd!8Gl`-1B`tWbEj% zwiRkjD6ZDTM|sd?csJIOZSX&P3A_*kqq5%5i_x!yzuk!p2uJdXg!FMp@@_6aB7IoK zTfZ~n1_C0XsCgX-MJnqGCJnx&_GY%K+A@wwo}wu?zoJ5#%SCTshjddm*NlVOA60_o!t^8= zI0W__5IW`8Nk&UmI_i37>*#cFxlw+_lofMOq0LpPidbt%JRf+;51US0iZ2wkzhXBU z{sXo$ZRM!4y-fB)6GIa>mYK;(pHg%hKn`sr{vXS;Aw-_P)O1OwGV)Fmp4(3wz9Z;JL^LazLgBqs3c>31Ete zkvJ1G`mg2RFVoXBnbHFFXWG}DO5nA2ddz$^Q8rNcLw=sroH}ESu(vXg%7D4dr20c9 zVNbh2>kz^V5OkSK&mtMk#;7y~;;>bHPfBU~h1=K)Dez%9_oT_M9oq@hXPaCI-KAEa zu{h^qo^D~8_;yJU*(bQ2%Oy5pYPXS<8wW+^w*v_EnVFo=7Mxz0CO69%AvIkDua;ml zz0U!d&tone{&(zC2X!Ary4j(iv_c8}woL+hqX_34lAb%E5GR|RK3+PiU)tc&EO!lKt<)6Q?q{01?$TSpi z38`d+Wo9~JQFS7;L2m6=S4)!eGXEzn&)k-^*? zd1y`4oT}4%G%!z%}xCXHc>M$mhmTVAT336kckoBel%Bj z)&g8&jvAf@O!Xhv1y`%@vuHDzBU2eIKJHE-d^ihaG#+dinEZ??qTvKcSlIFl81&S% zoHEM=3Op{yn%GAlOe-^MQu7mA{UvC{^itXKzvVGn(In#i#7D#%-g`5-t%^txqr;ss zRa0U@3P+4G!CJk))@m4Yv!C;=t6-d2%gT=&k-LlU|HZLBjegiyu>*aHJ!<&T@twR$ z^k4HAr3$u8`D~&vUEwT~q%_-kU^k{QgYV^l6xU@aP~?)2R7Ni$;PRB>bq>wO4x z2Q47emNCk?Js?qGe-5jolGaEsMPNIPaN$dtXL$dp|N+K@#;;e$!}L;e9} z9|)HU8%z}N04-t!fy*cV-| z&}2yI^chFepYwSOh4h{7N6VIfD{fU8et0cv8q!pPWz}4dDhN9|6I4wEbU6S->l0aK z?`%!J%XqGI<%f9I^uH^v<41c29XWsR#SV7|oO?9xCy>;&NqxDJX*3)v0PF5mQe}Es z@{;McY=s=QsWN-j8l0i~VYxwu_RW_Ls(MO$M{F8D_^*6~WTdgNv!&mSpEEAgV7HKY zTz%Wg9D9(mFuZm&NL&x$k&5rqgW!Yx@a3u(zOIv;Ue;XgsP!R%QYvY);a(757zH9- zc4Ud;32BE97bj;-a`!?>KVi0llNL>XV{9ku{Qmt2^8w^JR*d2BdNFU}#jr1+?>tXidnE0BuK=S-> z=h>P=fbRnz5T;}T#2o|*n;igrz#sHq*Bq9%ys)H0F?pyPCv1_YM@pkxZGk0jT@WbQ z5KDokY=z2KTuDMU4aqZi^4=l86&mO^S~CWqFJ#i%2anIL^fydaUH znXJV@%IYSNofgsOQP}Cg&4d09K3VJd-5y#GZ}o0}XOvHnK&sdphlZ&~#{|6}+ePr)l?$_|NKwLRKN(BdZ3 zo#DJ@U=>sU752Y!1jPp&lbVL#t1ET51sA7t1e0$u;%X|Ct*=X&mew+NwOB)Prz=`#`&@WnIu3xwe)a~C4 zL3v7x3@n3V8V#$U@_G!`_`vmnCMluP{oO7rK%lLl3x8yU+u<%d=vI7RcD(rIYmub< zT~sKdn`Pe^#RKp{qrZlIH+Iz?rGH+&5V9Psbt{^s~I1Ml@4D2Us9a; zf4SJtwo@OBo~(qNojBF^%Gy!d?!UHHei#89mXzm%#QE2`WDj{{{~$+0LOqi*%6P%0 z%3*@i?u*OGyVk3B*A@ywsLuGBl2XYGDBy!kJtwQF*UaS`^K4pW=iof1FET}khs3Pk z`NJ&y!b>98;h~${_Too$)x{x$R6!8lWcpKg1iM0@TPL@5L~j{1C5nuVnU4R5xHDw3 zqy^a<2LKeQ&$;g-_YXS^u5A2l7-&=BGi7NvGn(RPbh&U4IM@v9x)hMm*~+kBFCBdP zu4W6LX$?j_MX-4Jo@9aOZxENUak7i;55J?NPMBy`KM7T5ki?o8-nY?+u$qaWER8=g zX0`0P5AGVR99*~Hw`{`*p!!-^knJK}Mz1=QZU%3}(R)yvgcrj?|fbhq#uk$67 zMp4}MhtDq#SrBar_6ynA{zL$l`8iMX#AmJRP2+R3}^5MRaqpmbj8GW4!Z$hLkza1`zr z@k1u&zx9zVlB`!`#B2Lg5tCAMDrTA+UfcW6Nk5kMr}E;uAB)ID3+Z}V$xKiXWLCGu zb&@@Pb=!WfDCLy2e{fUTg0SW%7c@zmHGmJkn5=1dILIl&6ZLKPV0MRz{m^T^tnU0UCMJ`aMmWMX6AQLqmL;?q?P zsbsx@f@LdX-&7D>Q*qjpw6tK(m1T$qYAVZXr#d;VCrG*3N1uYBJ$*>h8d-xGYpn=o zUXj?>QLCMN@Z(K7T^8!Pfq%bg=|gHJDV*VtQ|Rre}=?E(~;cSh>N0a!&!`UV$bA_ zrNERQ=kmQr#)YKfW1eZN?^ZaROvEf+Yg$8b;+I~$(Pc$u*9{X-G#3IEkEt*`$QSVIog6J# zA`y-Qp5M6VpbaKYFu}LMRK3jUvBOu0mF2z1`>m?1rp5!TB?KT<)b`${2^}{Z=Kap0 z{@V3UP2Cu&xngy8UO?MRAL3Ui;OO2=NV3gbgfYwkP86@NxCxSNd?D*Z;Zxl1p2TPq zrfV*YYx>zPG-*J6HTk{i<}%v5b&p^5)+`-ncA=7+ncNZE0?ZkE3V~-}!vX1E{LVMpgh3KmU##d}~-$~?0L z!|)PA9W6o#giPgsU|Bd3WY?@A&mz2kBdC8gH59E4D;y?C1g*@8X)44>)LvUB+KSRrZn=Pa@>glXfFN%iKv9F#NG)hABKjwmrQf`7$ zE^WH##}=w5_T5xu{lMbWSxb-&^K6pkh!Q&d0xdri^MFOgdH#*LE+|n)iWM|pweW{VTV9CFXr9w? zT@lQL5&`5YX#i=(c#8(v!80ed^u*m4}!_GKMeCmXy@wwvgds+K#6l{NU|Do5{(O1B!Z{bv(e>!|OAEauS zFeCzQ!T5<^)IA>Yesp68z2Lp{xE_t0@12s0l`&0uW2#aSd@}jt+iIPR$@|wAI{##s zO~&Eqz$0ku7AcgPbRy%=czUPh9_h?#Y7j1-_uwi+$vayFT~X+LPFx#MV3UgN7xq*W zdRE@0<>|@hX2qG>alJKa2Lf$fQ{-%T4DfS`J5Uf9P!LYt8I`KK-+Y^67+c?upqH?A zbu+jCX>IsTy&Mr$c#Z{Qw{IN)7_C$@ll$C^JjFaM4UaBV3d+sjB%0sMUs6dF*N}-xms`V{CaT%m*h#p@O z>BQbq6`f=qyyS0ry8-B=tf6jBpPis4XrLe+l{eb)ECZnKA49`I8v$CsCnT;z#CU*a z3rJ6pN9ZOU#7HD0wcJsit~-$nq-<+5xq1!z^C_`6szx(sQ!bfJfwoLDM^!hV!6YSJ z+0L#W|7eCMNd}#2)Rrn)R4P|t<_mHSDlSf8mDcyxcR%pilbomaJVaG_erwu*dH6n; zqfkc$7&t{y139)h%fUV|pyCnKR07)+)&mzNl~E!yFB_feQ(|~4lV8CVewB`IK~pJV z&M*5ev^{b(giYFsq`_n9ZtN>{C@9!j#P?p^RxU&>uHm3yb=kO%=F>&qmOf-m(WdU_ z|GyTDdlZ_dFE9Y<2rhwQ#LPA(L4NcFlH`}C(gvI9b*L6E0yhqi4ydqdDEI}QbYJ#w z6s3BOr4oJ1EEBU=s*~`r&>xDG?ao@fK z-5cUhSAgf=s%@m1wL)&1?g>1;v`GxC45skT;j)yN7-vDMotdI z3OSDKnsivlGMbhGKdZ2B)r5|NC4od58dXW%bW&>Fm^=Eey|!iZb?s;alW-ume{ME6 z^-@gBV6DY|joezuIF0uoWhvV7FGr*jd;7XXF#8r@)E{3E0EdqiKw}A+tfszOT1xAM zI@Yp=1WjEk8mu1Q_};EU1QG6i8p@7^)KpTH<|>_KzF@VKS?)}5?*^>Muh{Dbomv}C zZ)MM%Wl3xss_PQ69Hptk8=e64H@5$<)w6K{ka$v-q*jkReP%Hpze^vX@;;S^oiF#p zP^ZC<|BZbn$a_rk_ND!%!^nzsbP&HxMfr4&>`&zRfbmN4n7}mH0brX_P`(N#XNl#< zmlf3~Eab19m+!$p{M;v`C0hYbGa_hx+LXnSpxzr-XRM%bQN=*EL!~-s>=JoHgqoiD zmVUtXU2Q0#koE<;u(ea_d7+7=)KNo`nZe3H+js%Zapby%dzMdg8Q?dPc>0LC=XW%$ zA&94IY=F+HD-W#y=xdOp2alN6y9Fl0=p-sQ1-ZEslOzb)HC zFhk+y8%GUGuIY{$8=Ly=tk*N+t09D{jR&g)Q+MN9*#U%VFjBCoYKH{i_rn4lrfa>o z|Ip`>IH&N+O+v3&tywmNYXlqo#0uK=MYXTRWm&c7fih5AWF1K^{7`h}&tQ%WMSXlH zROqnOkl9@Ep_(hq0c+Lm%78cqD5!7Hhd0}Sm(MfNEQPfILeGVu3nP>A1{j(9C!*9% ze%Y-f92R*nz*5!ps^FtUL*f%R2QFQZ?qg>85EhKo2PkKZ?fG5MUQ(OS#3l1T7ru+F zj{*hHy1JjQSmy((?D|kgxB4pGy3VpoV$y(Rb%Ou@QQXk+LK+jk1>2b~=1%HZh4Dy`vziB=x^Yls~C#>020lv-;?LpQ~-2kH;EQQ~}+TdG)vi3@3};f$5i3CQ3^ zYuR*OoV=rykE7K;8F2*>kUmk|ppqG+Wg5r&D9;dTq!bzT=#>%e^-IZIqXezVLBrT& z@UWkNe@2~93z#=99oN6=eT_z!x91M{2FA`8&61U;EHu_+{`Z+zQ}A4Ix8FtM{{Ptf z%BU*4w@*+36#)eWk$R*XrKLqWr8}j&J5&UuyG!Xt>KwYeI}aeufkSuCMxXyXGi%M4 zS!>pOdOykWu6^(O>iAtNOJpgMtw<0u=ihwTrl^KTyoGbW!|`F5VD^;|{;*Ck`6BwK z;R!>C7GoQZuIm}L!o>aW6XTd5)NV}ssjS7%Bne6|c$O3=(!|DcO2obc5h<%vtQa7IKA^Y(eaz^nI_J}jXD6Qbc0+zw*m zGAIlpF_r2+duF^JU?lZXDB#CXv2-iSNV9zV=2n^iF}4MD^%w0|x+=}D5%*+(Z+p)n zGcHG)kIj}gk@-va5Iz_UmCi7B(sM-TG9gZ}QMBu+aG7*L>S^TK`ae}ldtf4`t3`*4 zS+Go=c!Y$kP>Ok=f!pk;I~OzWHnjn_M&IKy?9^)CuV?9YyHgdXu4(;7Bd5 zQBNYajdS@nDLd2>L`LZ_uqL%P^s?e#6x`!(UOu7E#8ZB2dT(B!9;#i)q>$wuuwA^h z1As!TH~iTQ%?dE+i+}q5Ts+rXiQ4Zbt;Os7rw1K@bJs%jRGxR}QP$xyB(hl|UGzI{ z_&}Bl{<|`5m=#psfJY=E?{IQ)LLo3%Td_LJuKal7>!>LA_aF(-0WAGk`b#2n8oQuR zBXSrK%_V)B-RXe|Lo6jl_-`$PR(VcOtlCKd8NuQV~m%VsU#5A;sxAif^%f2W!v zV6na%<#KXl>0(A?!t>d|Xs6GdrDS?=5%hQbgnWqO&}rE3oN3R2{281Vn#d2EoVz@B zFNsQTDcvkO^}5C)G@p3%M-UpQ=)qV!vgOej0_~u zxVm?()qPlQu+IR^jSYtx)EOOxcHyV4N>Mx8W1m86nCC2Aq}jL3u;Zzt0>tq%$*_Zg z&GV8S1T?JU?YpbxzgXO#7f|@|2zNjV06!N&KF*F8sq|(Fg7m&tlTDpz=v;hi6_F}?!{@{|?Ly{}xL_P%Q^5Mf!3Uv<6(a-(z0BoMwi+9SaqTkg#>?mqAtcx z7Vh2pH*2+T)_C~?zp_=^DTZ1|e#lm#W1_Vlgs`z7dTFc5)y!=)yBXI-q93sE$jN)W zci(K*?77VK`%s(xh#R+Q~3K z_SwGZ*lrDT=#Mw+#TV5Lh&{A|&l%X$hAv(%Jbc;)oh`WA`CHg`HO0zn^yJ?xXia%> zY$BfiLyFS#=9dCN5Pa)_=e%*kN9L;KaGTbp9fi%{(1NmOTlM$WOpd2na~su$2FzP8YrqpiD@lmitMf1)uah)UIlDowLgx;4CIVWA`=~L--eODx>>w0 zq42Eoza~BAJ$%bJ8Q@=ev~=X5hW6KsUuq+grCk-ylG{ChyStG|2W^?vp5IkS1!|R| zJSPJ+XDyG$!`L6Bm17Q=bH6bt)CN0vhdsU=$w}W%*ORs^itINANY8Cb2CVGrJspQ` zb)d7%O^4T_1pw(B^m`ENeE5N!-7XZc0m)L83yNq5Ii!L#^uAxITrXC#pbdEI`eu*v z#E0BJaTx@Uo~e9t8hIOS_`46)_Yv|b{mzas8ou{kUhRy)ro0!yLl7r4i6TRolRV}n zz-b$y`%$$Iokcs&O|=MfK(P&vM=x10xL%c2mnubaFlTN1%ctRr)FX*W-I!^U`wo+i zI-^egAkap=9LUdqa}}h(l>NB8Yf;Z7cl&ARwr@Ayo=ud*FQ^{V<~}t`@2c&7K7)kz zyBVdYim}v8y6~A}!9RB7>w@1h#(aCtmq=hdK;2j1FUGnr_YR@HWSDx=ZKq)<6Hr6Q_OlXKN8P8$@+TzJM)aIEAUWv3 zRqdt7&kapo0e$O~MVW5fCL9lD+K$`%mK__~j;r%g3SKioa1-)p~6CIl7WCx&<1X52k`&E#vUN_LjxZ=#tYs}e7C}f@Xbwd?wN6I)TQcH2O z@5phbWfo`MPTKAqrfOkfq9=v|)5=zU=+cfCgud1f%5fmbfuHk`W((P-W)v1iwI)-# zTTw^evY{)a)4mqLo2YoA7YM3Gxm#068=i-tQ=<$RvO;o68E$ctQBJ1Sa@yiRVIdk} zL=b9xV0Un+?$XP$2Q1o(0S4>|1Npxj?(l%Ge|wek#Dct)dyLE%#oYoGJE@PoZ|C<; z@)J&;GVmBE7WbN<@i=`{Eg{7Dbq{hzio)Y-6WX=!z)WCDZV)D?Ctnk;_MI}L>ZwtX zq3*g$rM9E=EZfxURP~agWyVx(C)$<#uvSu-H&`7L~=IWbY`erWU!GmxK~32z&7iUb+4*)M{62<(fbyUL}X z;gLm}Me|4C>eTss;;XQP>xoXUeV5lBizj>0%{g1R)I0IYWtBK63}X;0EhH7hLQ8V% z&Om<@Nl(RSGmZ4NM3d2HhT)ech{7#I(Uv79d#if5Ql5nb4U;ciMlm(CS+y)@o4N&_ z{#9|!`p$5O@O?)9JeGu3iqbtzYq7Wpi&>&;f(%-8*3}2kD_Px)daZ;a znk{{2M~%;IcIhlz@B$u?f|ir$Ee}Uwu6A6X!*;bG+>FQSp%Jg5dz~>OjdfER!Hgc2 zT^048Zs#3gx&VRG(F35LS%gfHvX}iqLC+*XDfZHS&(dK__!}bD{u5%5pkn z7n#LZcQwzs7b~;B)y6MFzNeECGlF>$ce|L_o+43@7eQsrt6(qxD|?McH8|!+ zi~&PUPFv{vaG(@l1+Ui{n-B=zCyWgUsRQv~->GuKGC1xZjYvO^bI=im)K{aT(C@qA z#}k2~RC=rwBn4zh)Cy?h$VQQ>9B05SnMGgDWEh*k-}&|hnc&GufLcy76!=D+pO()y zOV6e(>{dC4K*$4dzk9CM>Y`JxWx|WBFFz^D&<{W;$)#;>9HC)^Y0^bktoQ4W>w!j6(8#7d2(>HFoYbWxPa;=9VaWbohWgh0wIqJUyA;R;LdJ;Q%B>TbjyysI8lR36tBt z*F(=XO&(Q%$)4OFQXseJpCeeXN$>+qW61gL^>!B8eBL!fr#{c7gZUD!vgLgBYtI!S zXjja|Ll6cT2_qA}pijQTowea`BG`{%3k?X@5@b$NY`xD?3ST+0FjMxUZ$JJg8^G?S zw~Ia13HUvWu(o;x88d}GgT)xtGEhbJ3XN_Og2@`3`$~T3kNiRX{E+Q^ne~<{-`lqr z{HS=iS}K7}2@P4>3@Yq8rqv9HtLpvr)HJtwVkF;*rWtefVj9t?7M#iwaZ`?h@=sv4 zwfFU}Ei5Trm~;xVn}N$)fwy;pv`aaXfTUMiW{s*NVx5xmAPT3tJHUh9NSUd%+&HY# zxTMlL&3Kp3e3wt5wzgX|WBPF24sXDiDOohs$f4-v{q{2Yiuo^+g*TFgl8lZVV-vqJ z7Tfl^6QX?fo4Z#GSaGz9l`X#EdP{n1-QLt(U$$Iw`J@aC(U!xf4@(c%m)9e7zU!zC z4}7VdAlTeSKR)(VGCPJQzMyDAKe6#Rvp^scd|8b3jk6U-jeLDjbz0~5vRKWi&9lSw=8yHd5Ypk-r=N=*>&*L`*@5vnFxto1Bx7H98)pfdGR2n=eWjXGX?eq@pEG%q4pLag@G(l6N7amC4vea^al|i&J zo8DR}R@#f7i!z1mpj9l$6W7y3u_#7*Ctk;1O@MHwe38G#PD zXK4WD6J!+7$M8do`F=p4;H%MORtoN>AL4I6m)cIUrudR*Z*#v^Lk%)SC<6O8lf z=qF5psNO-g+DoF4qNl#1s1Lt+F2)K-O6F$0n}TiVFnd0FZQuw7DND&}`x&?2VW+be zzom_~X4GoV_&^Em=ntJ`SqcO3YRfQCKr@#(V3pLi*Rls#8-&yhpP@}JOnGZ{I=Vbv zd}nWmSOJEUkv$!{Z0u}J-TA?XZU4QlmL)iRbc%RTHQM_$e?g0-YfP9o(q!~+csQI$ zK)aoBALEJpAlRWN8Ja5%5zs;@9Z@%L=!8y9IRmRQ-hL{9+*0rKv)e7a!eJVPt$%h8 zvxlwXPV%n=toc+k6kgGB)4uzZ16)oi(Els1D|9?|dNg+I;Kvyr2u66}yDMNz{W9!-8T&0< z9`tLV5LKyQC`jb%NvOiU<7S9Zx%z-+2|nS_vTw@MU-zVdrvN5Yxqn*2m`yO0H5hc< zo?Mjk8+8TMg;C2?Dz5B1Aqd_vuUx41yZq#^ROedQSyiDr%6|oXUUOqQldf`eBe+=* z1TPO#@lWWV%VIh;asl>;g0>-AZY#M92GUD^P`#CM{+3l=v?B??h9y~ zMbgEK3L|ktg{6D<(H}cSKkutKzK<>;y{_P=omYFkncFbMmzW3essXsRB-@|bErFiYvPPVZ!)vc1PQ;Jo_0&@kl0D?z9*FXtQcPj ztMzyy*Xeb2Z>yFNa}rRlp@L4rW1|zNHFNrboj@s2ULkLv-tte{ciH$CTWz48mk9vt z>3;gh*>45~RB=G?or>l4@9C)bya_rZli4?X!4%^{8G0Xra}r?vb}LqHx4`-lEfi1u z*B0crsH33Mi*5^f(#Zkxv0M=zRWJ)NKuSM`p!~TuZ)JF-ZpEN_Mx$H@R^oUJwq&PF zXqpF@7wo>n&Vy0BRkahDEeT^h_1*B*3BF1nqd!9mt0btk=9%&sqL0g78^dK&I$Un0 z)}&%VO>sHP=(L831;_M%{%hVcQo`WDr-<*=OcL+ER{NuA&u}OEo}J0LFz=b4z>`&#jB*MLq2J&h!&9@o{VO zwYu({G*vbgPE=Qxu5zJ}!VmFiJOnOx$?15~i*MoiUoSoRKq;xb{iFVkFColaGzrqN z@>(D)dGes>A7c6{*LM4&*F#VDg(nJR*}x2?IR?4DvV@+1ON zfuGxXg4k8DO-p573F@$PwK^6%qc6$Ol*>RS%d^KeDH`{ncFrpoa#ww_LfVm-dbo)! zN}KX_*Qg-eJhvCZzLrP|Y|~@X&Xq*6>Jb)Mo#-kBQwo)OzFd&Ne^R?l_YJ8F!jZ!` z7u8U~7G8(S~@urM;F z7b4B;``hMIlP^ua4Uc16d>O9n8Jv5w0y1}`4c~8jHO&SJHBd24L8k6Hn4Rr{AV|=S3HYCloaak< z`wC}VdCjdWA7_6SXq0pqgE?Y@A$+F?N4>(LU#-ufDpwli9}@v=&6tBABSl$mx6eSm zYym_5K>|URD$7U9KPr9aJq8;WH-ac_UusZI!9EqfaS+c$7YR^V5$QyFWeg$jR{B*H z4a?hwrRGJqS|j>0NanjXQn4K*Pu6f{_|1i_xjrH?!!ws9Lj9w`_=A z@pXIADP9D)JMFL(*+HgIoweJ3Hw*{pgB4)VKkK zdwNC9X6lE|b^zGsSGab(>>#KT*`tn^kqRQ~OSE#1W7Bc^u#Qo{gLZI!WnNyALdg9t z=FQ>IVr*mnYCcH#iPx>m$foh}*%2;;9_(sg*SPIRPiq)yx{(?5Y%xorkii72G zv$3bKYY4;r{q~+Yw0drlXJiJaPo;(TrJ7Pe-(pJ?vLR0#;$v0IykGro{+7<-2}dv8m)YC4 zsesa{czQQjDu9Ldmh99J%9}1_5ulTe#mTnV;5*2{f=w9Wn*A+_xGPUfk`r4GB;`aEQkpd)ZSj8EYN`#wd6z05IlD;7Z|)jhM^WA ztus>Vv$o>r%7U#>)(htR(8rRRcRmV^{mk*()>Zd;3{J*--*OC~DdMH*YW91nUu$@P zY3I@%DnXG!TGKa7Q{{)wyDpS`Z@6vP-JITVZ3N>4f7*HIjIf4zi!W0YT*=5h%tP6G zevw9YYww^pMsHrTRb!24C}pXeA&L8W{u3Av1j!`P!q8dIANx%jT=QRzea8yLL-H7O zg)YnEQE+IX6Mv1Rr)9RV=|VQvMQ)BwUXCSh{`?g`#N!jE`E{jFp(jq8Z$-5dcG%X>nL1+YPd`8n>(p}-c@!<}9T(=L#1zT=fIv`13~G>80;F0BH6%20Ep=KO z0GZ3ZQBrTNe&fA}fKA)muLqLW{dQM!iR-v7NV5DEzKtTAdi(B*e^7KV$q>Wpkf7E| zb50UPwrE`>jhn@}gT7YNGlI_}pRK~_pY0h14X1m5V~>LQq1Za8oiPYIDa-f;sd#Y zcDUVzqhptwmjsumY>2I*T{fjxgzSjoa(m+-%2-VIR*7s=SYwXYpqp_z#WxF#s#Rd< zcmwlq{S(??Ak?uDAm$*K*I~PSOeW-Zb-SpbcjKMsE~&Ebf96|>O94G0T`GR?Co%9X zoT16tY0BM7k%kE`yzlA7YUZW8;uPL99k*HO?e?$6l$-oT9@^m_*(*^F_^g*M=v=>eI2o^n9%Pr5?lmlmp>E{s5Nj~x!};_dDqpH0koFDG0kXL zOWPnD#(!R|Bc>!zdfifZ0}bhnRv_su>9P?TJUn@xx&A&>MiT@u~uqLW{da5j3+G9YU>3JeCn1OS>p0UCopmL8 z3)Va5{Yq;o;M3uCTO0t}RY&%wMoh~Sh?-)n+8XMApiyATWal=`dP8w(gb=MsFVnoT zyPj>(f0(eoiiNac<1>?3RvTWUwe8gK{6LVn$3CVkXcye|KCU}O{9@BW9FhXOr@k92 z$DPX>kV3QT=cdV|v-k;`e6-VCJzeysOfh3f5$LtUOm+$KsZ4Lu_Fgr*(a(bkX&MW& z3X`J>3-`@I8^j(6nA*G)9+5S!viDxTQ!GibBAY}ZA^OYq_C2zqW>#B`MNA`9hJs>6 zU#L0`aR$>~az_kgNyiXVAFZ8m=*&88qt1<*S&_>P2MZ-82E|DJjZ|l5+vKpI>~DZ=Kxi@a-b-h5%ME5J4XTS`&6 zZoq&RFO}Z-dwWjt-9z>F7N3>6E$oEZazGU>9TTV+`7({1d45!fbtSnpsc-`1EC1JqGzR>|7byEk!PP2vt36DJ<{bj?GRJu-Ds4qfdx1-m^^NoE`-XN2CT6~CW{)68e>}wpg-DpXx=y;3)#Prr zT?F!FlC3wq&qTT@3`8Rb*LA=^E4-!hi~CT z-&zk1$K0(dGS9I03{T=eGr=1MEJS;SNgMh)qtDWPFfIo|U5w&fjHgyMTYI*0Nyn<)KQ&tm=LitCT53i%K7fgfu<3Wf@sP2)f1t* zMJYz^w2-9yd&E#<*)YPk4EL-j=I2 zp{YK3I)Bny-&{u7csL1VgBG)wR{T;j>y`KvU}i=5tm*Iwk>8Vs|k+7eXO0ndvY&uPPR?yvQV4#3s%v-inRcYoC_suE5G3pt*+;hn$H zUP&!JAzC@W8O-vFiXzLSiHW3@U7<~Gdgub%`9&4qzrIwxBv2PSJ4#?u0{uE{apj@^ zwyKYp7pg^U6s;-fMC;QXaLcvNuN{V!VA$VW)3C7H&`%$o-Qa4SnWgNZG4^B#^g0ut zjn39cPK=@ctIinZ5ArI+us~YqRc}Z!Az|An>^FQ%xd;7#SBo)ivT$l~WqmCManNy& zX!1q)K2z9gBHGiqbT7K^UU)55pY62%CMtnMS~}=~&pi<2&`+t-D*n-#X1^L0nkQw! zb=}{k;epXO=~*xa0J<2L;R#e!Vf_5JeritDJ6o3mvOmV@qkm+B$RL*Y(Z+oG&ktt0 z!_{P!Yjgjmtqh!X+v1vsVJO?@%x~+zt_O8)!%dXRBz58{{hr&O1_%#~T7aO2s(yX8a?l*)v6m#lqT zDX6HNHn|CZ(<7;KDvZ5H5jTh#YJi3sGuS)bd?jf66en(W8*X(PcwqNqP^(eFCnh*6 zTPHBZ-E|Qrpidq*m@tD~HB2F8`%H3BJbFCsI-{NhaRA*g6YSdgN)|x-^{*HH5P+?C zXp^t?t{mAd&k{X0TNMs_H#56kT>DZ#d#!^qWye=gyiIiR@haS)Jc=Ys#TFSR^5OQGeh)Gwp3p0MdYBY7OnJZB0jKGQeSC zNcN<0+8LknO^1iTe#OM*nFr4bb`@uxjKvZm|JCkK%VZ7$6i>!k;5rTAu5d?%tWw6g zt=b*h-Jd>Ijf09>^zqdp15Zd-73lirKx>XCbE{klcSS4ZxEBN8*+EP7Xz5`_o~eRT z)AET}A0FWCGV}k10K~FZJ_Q_g$1yj0=ygBu&-E{Ra{O+|K_d|j^yd7TjDFJYZ+ZGBG0$k9r!7sDI7{D8-G?mk-p+JcU(&G z!QapOtm(dwXu}N}8*Y{FzXUM-rn)=fsJwB2=TzUyXh3n%mz(fN+kMD+E(Qn=vw@_b zXUSDXb-Ch|af_yA;SXyiT;Uchm29$HX|4?HE?iDGljz24%o1`JV+~l9myD4}yx+nd z3^ zuvtE%$N_pOfkL z=U^?Ts`-NT6!z?2f>=qXit4W0OMHwt*u>A-_zk#3%QUpP9B zBT#hpp_x_2jrPJ%Ivy?Vj&@(IL-Bd{tf1qKqMf7lFrp{%Jwb`WtE+t|Ig?=_Ia$M_v!=(6YVI{W z?lmyvMz!}3U(ZU12zQTf2GZc!o@_f~#$m^Qs6{*?l}_b&u{r5$SpyXz%DuVOtz1u%iCx0XpHy*s>u=Yz`Y6ztlGP zP#8gf893Kf%1AwWn}P%>vHCu zf@Snh=Wv6Gv{AYLHTxA6XNW|G2x z!x&&kMEPoT@6`rN#ph?aBoag)jEutJ!t;w(!SOHfcwJSjB!YlIEXNbE`;bA0>S0?w zmkKe;k~(&RCoiGD&g>b>y(^pHzu03^`gwVRM(iSMDcq&>pS!aOSh?_U^TZM)bYX_9 z`gI(lzb)6N*|GVE!V2F$a&T6yCrUlRE!W2jPl_MF2r(QCGZ@6m2$wA;Z}@KiG||L5 z%-EXa@g2MvZ5HJiZdOs%&h-UJylPb|zsK({o#+u7W(qbx|D=>b9xu$p;Wal;s)DK1 zi;ir~>SVR`rtMQ8_t*}^^4_Er)l$#wv?)5-up0B+2|^fO+AEt1Xy?qV<@T1X=w{zz z!G|K`@y($20XwMgiMTG{06`lW;-NzRlTDCNpm0 zYznetu>CM{(X4iP63P%pvt??2qFrEsXCB6xzDvohwz_BMMV@mMw+LGa&U5})TF}quF=FDk_9~}1H!*++63B)oqR6uKBMi^jtx;&0q5a!%L z)9^DTb;1vsL&x<&$PVTpN%3d5SJEldB#gCP80E0I$Lq3$t1l%fxT~ZboJi5zGZUeG|2~}-vVCAX*hvN3qS~h zMehJS4r3iR-s>y6={U6H#IM{Nr`onn?#G4`FVHx@ib%H?`4M6CT8L&(tUjK*zC9s^ zwL9Uwu6>!$@Z$YnKjs^P`2g;4vWiSmTX*Efw`#Mx=T;xLd#G(+eVQ)`dwpR`U1scG zw(e)=^Qjr@s>FmuLGt0WG$?y~_#a_58QE>5?L~HYMVAn#ql2w9xm=2gi0BT6MQ|yI zgEfP3OaJw>a0~Xs9(?euGxeL>h57pS4#)LVWd6DhtC?7aX_j;;joJpwIz}gf5`+;> z#v?nL4Iu}1VYv+PFA(Z(l)#gp+mdqM$bJZa{2}YQfjOR&ju{}8v_6cVtk+#RUx zmRN|<8#@_jD9!>gkYu-1!;2iXH^TJ)AW=cFD%=0_=v)A4&~UBK=7x*KzTxWD`<96@ zli-t<++b7ad?)edwFZ{6HJd224P7Ke6VDVK38^B%b87=}>u!J2pT-!Vm7eR~$y?8V z_`9Z)I2dn48VUM2G>0K(#3V10vBUt*Bdqq1B{I_I-u_AB1y?5c_CW{t@nBqE1gzfD ze0LeE^VaQRSDFJER#(hs3AZY~kAy@&IX8Z}cb~xfP{r!fd1034;B=DrxTtuRo#V7G zjn95x7Axhl{`TbD`-%yV^44PK+RUCCsZ@zrT#+WE;bNsttbk0i&TFH)(9t3QK6?)d zNyT_)V}E)wO!J~!<5-qYl7r1*!PR|ccJ+n`PWd^hz4F8oPJJdnfu!98X-05cRc5OB&^lXja+EC#W7c^H>wi%$U2Lz zfGaZBsW6t2p|r&a2}u_N4sUdBExCckdLM^Duadl9F;zUS>PtI6TDm>oufDzF=f9jA z@xAtDc0O{6KFUF>@+~x*i6rP!>Rm{)AZS)g@z^hr*Z}WrE^!Je+VbAd>%U!sT3{Z%lE!-mbJ#Mc^u55O4I@4XN(QPDEuWK0M`aec5DA4mo z$*M35&fy{omtLyG4rY@Rd1iWTd^X4$DG^)I$k@xZ<;yjFBoCC78yy1+T7-n_86kmYk+H5-72Z}ir-B<=&(2iZeqiNL;rD)B-+blaxpsISMKVzDcrX(p0r{mq0s9yb;o}a5Mf_L1wG4rdzcyi#FUt{Vlsj=)l?Y4FH=DHDf zP;%Ryy+Eve8zg(|wY;U}3^|T$WaW0Qb28ne!t1%c)P$e%U#2WvUOAt7?(5wCZn?c^ zEVr&>xgDN9GD6~jZHAIx>~%KYQmv<+abt;!YI~hWiF#iL6n8IqyPcOe8{baru2Ftr zk9>%PRF-Gno4w<{v*T%_I|pqjy;)EDetXP!AmDskKL=fy7@yO+UGiY%U#K&@zVba+ zFkTBKPP^`Hjl*nkg8x23M4YbipHT-|ms@E~W{31AA!`;$g^-(tQm9YFQSjG6Iin?2 z%38!ok&sj~HjmF0NCs78+0aP(mG}$257cVR^NOVjYMtk2N7Jsh<`cFWwhEY%krK-| z?mJkPacaxZtujhUMZfz)LTco^nxWoroJr3)yz3w%;pxR8TeZ8rr-(iZHaB0UrnsK} z(D`plC4O()8zIZ$h(-^!voco&S#RvxOkN$xeCiHTm+H(&VidL3Amg3Xg}sX0TXnfR zlYFtaGcA)lR-z>?MH~_NjcK2M5gj(e90RG4y-K$Hvjz%^*3fxtUnY{iG_}_r(-o!b zUv5Gcu2+j^ttB~-p^?EMHJD*0AQAx&!@c%%qqMl{<;rs$aM?NQ-0&|r z^yG-|#-`>TOoEvs(quYV2xGbcO!o$ok1^^S(=JtMFYI!>*s-4A7L=b%9A{sC*66Ox zW|-@DL_$J}h0j!!o-U$I+_pp|-3*r#q+PPfq1(jt0Sp>z@JdL(?s)=kM?&I)qbhbY zsEo$oI^O;M%tof*sgWPG(8yy3o`h7DP;`+jB)4`^su^%c&`3>>na817dn>v%55O;* zAk{hAYTt;`T*c(VtOD>qNF4RQ$pRvWKg2k=Qsl1y34~D5uTSj#CsNe0LX)^6~hn zT=`cFp75@pEvn27)RKMTcgrvQhs+-PZZ)uUZe}|)=6`VEXYMy5$dAzdJCNd7sGqZC3$#y8`^$&>> zX274XAfxfY6wHQgOk7}rA^PRHOC4YzKlQ+8#C-z5)t@nYy<%Y5naWm{vZZHI>g3Qe z>k5bTdXt?40?j11`ipsUI5Rj;AW0fJXTJ`)9Epjk9Eqt6hm27MEw93+gbKb&7P|dV zO`fTbhiJmtCw09VE}GH)y=XpY9lCHkUfTUiLPL3@BC?H6q4pHlKQT)qQbTx>2tw|u zftiT>3Ou0d>ntkj1*%m({tw9**xttKvX9+|R-f^M8zU{)=1NeEviRM%`i$A*vJjiu z+cOg2_t=t1H9u;(-OfHWy}2|XqVfGy`d@BaI z{-KzM;&=KC>1kvI3i#(A@;_$@h~4oV(&z9yMnXb*E&hk71tTGMzrK>RQ)@v5_Dg`ufZviPSX%1&>B?v&`<+Pgu47RqDZjZR`I_<_;2tLBUS2mlH#ZK3hD8pBMcE7? zE{0~O^GhGg!Gvj6^}u3o3-OWINo~ovJ7G6tQL~=Py<5wqr8Yeys}YI+g8;c#tgeXb zUFwko4WGSlKzfNpy*97Qo4+@=pKTIYXcDL?D^sp1^Vtl{k`}7^?@>F3bN>xf-KNc6W!Fa|*OeI{8D1d27rki`TN*e*RIUS}^Wt z>*C43`W0|&crRQ2;N$}5fnJSZtY*Hmv*>YZ@rpOi^jnSH&?Ez`Nsk&Cqqc2qsEq7n z9W}3cU6SF1Ca)LM)`4HFv`n%^;A|FMpj!&tG!93%W<9r6V%3+f#Et-k-DAJlx8=uG z;>9QCP1%malZ{T+e>qcmG*+aJxzgR*Hdn1C3s^hClLQcP$w;BT}X=w$Mm+Z%xTLvOmRww&?h!p7Y38yLZ8p60diT$X}+62y(V7n-P9fWSb zuNGAtMPY1Y1hqh@?Y4Et4>rUHmAvAxK4SaF-e`R*&4b!1nD?5w#xnY)1J3l`h3sIPwc+dzEWS7j zpCpA>hxfXjg9Mfc7U}J{vYc{iRlRkB0q2_D+u4_$JU)TN%|?PV*9Qh0T#pb?;_6x| zxR(%w@ZAY~Erj>_l+(5>%k2Wzw;o5_a2x8t`|VE7WmL9^*`5iRvdYn)h6SkKkrTb@ zC{e<}2X`uYajZXf%>awV6L8@F&K42Oc64^kl584>&(<+&kxEXSUNrR=A8%F2h*)Ya zL@^?(bWS35g%-Qj6W?;W9c>hA)g~r^ryx}+7dZ&e2>K~vJrBAp*cbG=GyWQ?OYyo`5ss3_VGD*ZV_mbtXwQTA6Jy zd#YnjpXy=ivEqzLKi5xNKz!y^ARGx%H3^Q-h8J#r*$?pTP@Q1iFOJy1Ki*-d!D8z} zu`XPAJvPKjY+b+6y*{us z4ptt$GOq2iidT{HUNXtFdy@^SK&SQgV*;W;ra`rP7vG99sA=_2eL5c|o@(-t1)X9{%$!Bf5wnAB<&)?;)41Iew<|Ie(j}@j>7L}M2>34Yp7#VrO%BV9;4+se zC*-d>V?i1`S5fWcR+T1?QslWOHougZmSvWeD5_m)mJlXd-A=>|o{Em=1!5f%&^0(| z)={ecFlCkmi#Rr5=-FmuEfI(v0*~W;Be!E+Ut*dVDye-ak;j?f!D0SDZ;<^^LV8pW zNIV_Hl>lG9Qk2mMEB?sC_8C6sNTYm0GtC}y6;_`h@2RC4v)A(F4 zPW?Se;W38>;0=uSn}ZFL!x9Y#?Zd&wNyU#L1Qh%gP}dQu;N!TUB1yM0-5Q6D+5Qe1 z%yrtV6VBi#-%DO*@MgdtJ}mnQoGZ@C+ISC+g4j;cppHxfp$uJHNAFU6VvEU%g|G~`=rPM9as(*y&Vi++ENO&a$J#4ne8d41GsHj$DnvW2UN78N5gd-+ue zbL^3Y^v#JpEUIKDP3&eT-Ly=1aaXUjl&EtFRZJc1tN2K1u2#mnoRw%@>9Ag-)=0^! z+W~N>65{9(14=pB8giZ^)5VrmWE_IW0=A3Gbs^c^#Vt`j+iVVz|Ijzq+H9vi(@cX{ ztCpS}yyeiexEf={&oHFP*s$ULJ^k^Kl!tq)<`fd@4%-P50%>_(L#KNl-HA0 z+K)U(%AGBC1tD&nBE}b)okXFDO{ao;`FI4k%v$`*My6GlKFvp~?*_?E$7T9yZvnei zcFPwG+Q@TzzTKup;19^gjeZf9?8zV1OQhs}<(rEu>1m#b8PvGM82ipddp2j($s}<= za&t*%5sNl4yZqID&r&dZ$kIRPlY!uZM4V!V=RAOXBMDv+Yi_)pKZBX}SJpVxY z2tL|0A5|)uTqY3>Bc7`?SFy)&P|RXYjE>b*-u)r>HuHR;{w-!%X?srG^VwQI(?l6{kK>ZP3$Q+O^AzCBPCPjUZzLBo znE2u`)HHD*UmCZw7kyzQ*6Z02Ys%P(mD4$gf%NFJ?q2O$1WJiaC|+;>p852;j61iM zlkLT-Iy~^NZ~IxfM*pu*@c-Gp70?~OpVh5i_Hmkni;GXq(xT2RW~4!)<{?s{G;p;4 z(a1*&%#e&O=6BDP?&wtCztL$ptpP$Y?~5R#R;`oo;>|&B6AIGAoeLlS-nTR$yHrq- zM$7&*90iEg<);`iBO50B0<#gZ2#hRw+Ht=|j%Znx649H4#TEw|k0%e1VAOZd>3!Vl zejvB4`bl%()kofs#Vby?7+ermibluP_O1SSq|Y)@z{58e{e&3&N|C}p(@DbMq^m|q zr%1!*rF=@oA!+@~gIsRp-0*#=noE}H&nt;7RJvpCJmu{C^EuyDA`RTMlO;U@Sx&xz zB_9Y0YaN3V^==&$s(GSm0g;w_s6MDwlHhxk?rGzv~s}vT<7f6k#!$Pyr zN@9W*!bAxCi3kc~J7>dQ@tYjR?~|?3WkJ4E0WUGX)4>Y)bLE|{YM=t*$mzMfrltuFev!U8<`6GHijVw!)&De8So2^o7;`?4a>x1fhe|5@$d?j?;mO z+|(~{x8RSL$wDewZ$|2DD|z_bSftW43ntQgQ7Mp-%)bGeR>fi5vKWcaGcgsPA1L{*R_Z=pk5kU7ucPZ%>U!a{-r#U1D<447=)Na`FF~eFg%5S|*TatjGp@5B*BEU9R7%jwSX9z3V@IDVlbo(R76 zyC787atv<4HhaNH#YoC#_sodKJtXshyG4=NeQ2+5mHYH~UDdSa4Z9qn+1fMHggBux z&!4p0^5;KyG1kpj&u)SggqX~p7pBOBDZofDcI!9gq%0%HjHdhgeLiIj3mxXJnw08W zeb7V9`oF48Y?RqTrdz!pH?q`4(q-7ppWNCH%McCQnW-$OeuVUSO9kY~IDfG!Re#<5 zqMw1f_kuLVU@~AaAi^BW9qDtZSr**|AixJoFX?vpAervHm3h&^3`oB^?tJNcz5Fb( zn6@>Cn9<%fd{|L>w+|9iyYPe@eGpX#*UuC99Objq6NG-bPg zb=>|e%QL1(JTo?C4}-(3v|N*s*83bU`NuDj+Q%o^?< zncUo8ASQ_u0kymrgVYxoJ!9Xz6Bb^9t(SE8pJudq-Hr zd)39HpZH#qG+Nt}d7HqNeHeVO*svOZ!MDRQf`*9}zVD7tC4b-5 z_TrzMiiB-$uVoOX!cH@)n``I2ZW?b5=6-(|9`WZqJ#nxc%e9NBQvOavW;pF$ILz&U=hg#^G!(p`jrmEV7o+YyB(~ zLIp*<)@QL+jLhLYI0}u5p*yCiKFkxmIFcbL?0e#|y;&1%AxpAe8?sQp`nY6#PUF&O zpiPwjYNxy5l0+@>M3d!Dv=?^d^nBza8NQGGL5%1B*hcZV`7b0aukwwq0Er}f<#pt=s&-;&I!&RFpNhjn=13e}f^lf1lE%(44X zb1U%a%egOgr+NQsTe5Cd!kcfqC)X)0x9fUW|Ky_Er=lN^XUfL!o>g79(p~@AV&=?R~j!`T6hP`EI3K;1p0={86)cK~BzX=kN3X zf8?K(wPoXyS8o@W$5vFox|;I$(pzi0s`OQXOUiElVXy!Acx4*r?Z$TYbN>GWtNM@K zJIlPYRkyg-+HUWTOwXxzj%?fcDqiMhz>ljx949-=-i-Kh_1KBUKX&esw4a``^RJ>* zXwhtT%ei{n#FzEH|C;yZ>+$!u_x#*+`=L8{b9SH^9&27u3G_Gxqxe`L2UJtdxghk z&-wzDFvLvW{chK5u3{n6GSKKy!P&C6w^IFpbD0bcp^A{{2lcLh_DXj@ybtYvc^;(2 M)78&qol`;+0Fu7JivR!s literal 0 HcmV?d00001 diff --git a/docs/output.md b/docs/output.md new file mode 100755 index 0000000..c9b0705 --- /dev/null +++ b/docs/output.md @@ -0,0 +1,71 @@ +# nf-core/spotlight: Output + +## Introduction + +This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. + +The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. + + + +## Pipeline overview + +The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: + +- [FastQC](#fastqc) - Raw read QC +- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline +- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution + +### FastQC + +
+Output files + +- `fastqc/` + - `*_fastqc.html`: FastQC report containing quality metrics. + - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. + +
+ +[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). + +![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) + +![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png) + +![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) + +:::note +The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. +::: + +### MultiQC + +
+Output files + +- `multiqc/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + +
+ +[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. + +Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . + +### Pipeline information + +
+Output files + +- `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`. + +
+ +[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/docs/usage.md b/docs/usage.md new file mode 100755 index 0000000..83a672f --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,226 @@ +# nf-core/spotlight: Usage + +## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/spotlight/usage](https://nf-co.re/spotlight/usage) + +> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ + +## Introduction + + + +## Samplesheet input + +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. + +```bash +--input '[path to samplesheet file]' +``` + +### Multiple runs of the same sample + +The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: + +```csv title="samplesheet.csv" +sample,fastq_1,fastq_2 +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz +CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz +CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz +``` + +### Full samplesheet + +The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. + +A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. + +```csv title="samplesheet.csv" +sample,fastq_1,fastq_2 +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz +CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz +CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz +TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, +TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, +TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, +TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +``` + +| Column | Description | +| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | + +An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. + +## Running the pipeline + +The typical command for running the pipeline is as follows: + +```bash +nextflow run nf-core/spotlight --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker +``` + +This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. + +Note that the pipeline will create the following files in your working directory: + +```bash +work # Directory containing the nextflow working files + # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow +# Other nextflow hidden files, eg. history of pipeline runs and old logs. +``` + +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. + +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +:::warning +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +::: + +The above pipeline run specified with a params file in yaml format: + +```bash +nextflow run nf-core/spotlight -profile docker -params-file params.yaml +``` + +with `params.yaml` containing: + +```yaml +input: './samplesheet.csv' +outdir: './results/' +genome: 'GRCh37' +<...> +``` + +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). + +### Updating the pipeline + +When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: + +```bash +nextflow pull nf-core/spotlight +``` + +### Reproducibility + +It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. + +First, go to the [nf-core/spotlight releases page](https://github.com/nf-core/spotlight/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. + +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. + +To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: + +## Core Nextflow arguments + +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: + +### `-profile` + +Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. + +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. + +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: + +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). + +Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! +They are loaded in sequence, so later profiles can overwrite earlier profiles. + +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. + +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. + +### `-resume` + +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). + +You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. + +### `-c` + +Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. + +## Custom configuration + +### Resource requests + +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. + +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. + +### Custom Containers + +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. + +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. + +### Custom Tool Arguments + +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. + +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. + +### nf-core/configs + +In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. + +See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files. + +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). + +## Azure Resource Requests + +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. + +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). + +## Running in the background + +Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. + +The Nextflow `-bg` flag launches Nextflow in the background, detached from your terminal so that the workflow does not stop if you log out of your session. The logs are saved to a file. + +Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time. +Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs). + +## Nextflow memory requirements + +In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. +We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): + +```bash +NXF_OPTS='-Xms1g -Xmx4g' +``` diff --git a/Python/libs/MFP/__init__.py b/lib/DL/__init__.py old mode 100644 new mode 100755 similarity index 100% rename from Python/libs/MFP/__init__.py rename to lib/DL/__init__.py diff --git a/Python/libs/DL/image.py b/lib/DL/image.py old mode 100644 new mode 100755 similarity index 100% rename from Python/libs/DL/image.py rename to lib/DL/image.py diff --git a/Python/libs/DL/utils.py b/lib/DL/utils.py old mode 100644 new mode 100755 similarity index 100% rename from Python/libs/DL/utils.py rename to lib/DL/utils.py diff --git a/Python/libs/MFP/portraits/__init__.py b/lib/MFP/__init__.py old mode 100644 new mode 100755 similarity index 100% rename from Python/libs/MFP/portraits/__init__.py rename to lib/MFP/__init__.py diff --git a/Python/libs/MFP/license.md b/lib/MFP/license.md old mode 100644 new mode 100755 similarity index 100% rename from Python/libs/MFP/license.md rename to lib/MFP/license.md diff --git a/Python/libs/features/__init__.py b/lib/MFP/portraits/__init__.py old mode 100644 new mode 100755 similarity index 100% rename from Python/libs/features/__init__.py rename to lib/MFP/portraits/__init__.py diff --git a/Python/libs/MFP/portraits/utils.py b/lib/MFP/portraits/utils.py old mode 100644 new mode 100755 similarity index 100% rename from Python/libs/MFP/portraits/utils.py rename to lib/MFP/portraits/utils.py diff --git a/Python/libs/MFP/signatures/gene_signatures.gmt b/lib/MFP/signatures/gene_signatures.gmt old mode 100644 new mode 100755 similarity index 100% rename from Python/libs/MFP/signatures/gene_signatures.gmt rename to lib/MFP/signatures/gene_signatures.gmt diff --git a/Python/libs/MFP/signatures/gene_signatures_order.tsv b/lib/MFP/signatures/gene_signatures_order.tsv old mode 100644 new mode 100755 similarity index 100% rename from Python/libs/MFP/signatures/gene_signatures_order.tsv rename to lib/MFP/signatures/gene_signatures_order.tsv diff --git a/Python/libs/model/__init__.py b/lib/features/__init__.py old mode 100644 new mode 100755 similarity index 100% rename from Python/libs/model/__init__.py rename to lib/features/__init__.py diff --git a/Python/libs/features/clustering.py b/lib/features/clustering.py old mode 100644 new mode 100755 similarity index 100% rename from Python/libs/features/clustering.py rename to lib/features/clustering.py diff --git a/Python/libs/features/features.py b/lib/features/features.py old mode 100644 new mode 100755 similarity index 99% rename from Python/libs/features/features.py rename to lib/features/features.py index 5436730..9cf38ba --- a/Python/libs/features/features.py +++ b/lib/features/features.py @@ -16,9 +16,6 @@ from scipy.spatial import ConvexHull from sklearn.metrics import pairwise_distances_argmin_min -# Point to folder with custom imports -sys.path.append(f"{os.path.dirname(os.getcwd())}/Python/libs") - # Own modules from model.constants import * import features.utils as utils @@ -34,6 +31,7 @@ def determine_lcc(graph, cell_type_assignments, cell_types=None): Args: graph (Networkx Graph): graph representing the slide constructed with Networkx cell_type_assignments (DataFrame): Dataframe containing the cell type labels of the individual tiles indicated with booleans based on P > threshold + slide_submitter_id (str): string with slide submitter ID (default='Slide_1') cell_types (list): list of cell types """ if cell_types is None: @@ -52,7 +50,8 @@ def determine_lcc(graph, cell_type_assignments, cell_types=None): graph_temp.nodes() ) lcc.append([cell_type, lcc_frac]) - return pd.DataFrame(lcc, columns=["cell_type", "type_spec_frac"]) + lcc = pd.DataFrame(lcc, columns=["cell_type", "type_spec_frac"]) + return lcc def compute_dual_node_fractions(cell_type_assignments, cell_types=None): @@ -398,6 +397,7 @@ def _individual_between( cluster_pairs = list( itertools.product(list(range(n_clusters)), list(range(n_clusters))) ) + print(tiles.head()) for cell_type1, cell_type2 in cell_type_pairs: for i, j in cluster_pairs: cluster1_tiles = tiles.loc[ diff --git a/Python/libs/features/graphs.py b/lib/features/graphs.py old mode 100644 new mode 100755 similarity index 100% rename from Python/libs/features/graphs.py rename to lib/features/graphs.py diff --git a/lib/features/lcc.py b/lib/features/lcc.py new file mode 100755 index 0000000..9924454 --- /dev/null +++ b/lib/features/lcc.py @@ -0,0 +1,93 @@ +import sys +import os +import argparse +from argparse import ArgumentParser as AP +import networkx as nx +import pandas as pd +import time +from os.path import abspath + +# Own modules +import features.features as features +import features.utils as utils +from model.constants import DEFAULT_CELL_TYPES + +# Point to folder with custom imports +sys.path.append(f"{os.path.dirname(os.getcwd())}/Python/libs") + +# def get_args(): +# # Script description +# description = """Computing LCC""" + +# # Add parser +# parser = AP(description=description, +# formatter_class=argparse.RawDescriptionHelpFormatter) + +# # Sections +# parser.add_argument( +# "--clinical_files_input", +# help="Path to either a folder for multiple cancer types or single txt file.", required=False, +# default=None +# ) +# # TODO add arguments +# parser.add_argument("--version", action="version", version="0.1.0") +# arg = parser.parse_args() +# arg.output = abspath(arg.output) +# return arg + +def determine_lcc(graph, cell_type_assignments, cell_types=None): + """ Determine the fraction of the largest connected component (LCC) of a + cell type w.r.t. to all nodes (tiles) of that cell type. + 1. Determine the number of nodes N in the LCC for the probability map of a + cell type. + 2. Determine the total number of nodes (tiles) T for that cell type + 3. Determine the fraction of nodes that are connected: N/T + + Args: + graph (Networkx Graph): graph representing the slide constructed with Networkx + cell_type_assignments (DataFrame): Dataframe containing the cell type labels of the individual tiles indicated with booleans based on P > threshold + cell_types (list): list of cell types + """ + if cell_types is None: + cell_types = DEFAULT_CELL_TYPES + + lcc = [] + for cell_type in cell_types: + graph_temp = graph.copy() + graph_temp.remove_nodes_from( + list(cell_type_assignments[~cell_type_assignments[cell_type]].index) + ) + if len(graph_temp.nodes()) > 0: + # Get largest component + # include only cell type specific tiles + lcc_frac = len(max(nx.connected_components(graph_temp), key=len)) / len( + graph_temp.nodes() + ) + lcc.append([cell_type, lcc_frac]) + lcc = pd.DataFrame(lcc, columns=["cell_type", "type_spec_frac"]) + return lcc + + +def lcc_wrapper(id, slide_data, predictions, graph, cell_types, abundance_threshold): + slide_data = utils.get_slide_data(predictions, id) + node_cell_types = utils.assign_cell_types( + slide_data=slide_data, cell_types=cell_types, threshold=abundance_threshold) + lcc = features.determine_lcc( + graph=graph, cell_type_assignments=node_cell_types, cell_types=cell_types + ) + lcc["slide_submitter_id"] = id + + + +# def main(args): +# if not os.path.isdir(args.output_dir): +# os.mkdir(args.output_dir) +# lcc_wrapper(args.id, args.slide_data, args.predictions, args.graph, args.cell_types, args.abundance_threshold) + +# if __name__ == "__main__": +# args = get_args() +# st = time.time() +# main(args) +# rt = time.time() - st +# print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") + diff --git a/lib/features/tests/test_determine_lcc.py b/lib/features/tests/test_determine_lcc.py new file mode 100755 index 0000000..c568423 --- /dev/null +++ b/lib/features/tests/test_determine_lcc.py @@ -0,0 +1,9 @@ +import features + + +def test_determine_lcc(): + graph = + cell_type_assignments = + slide_submitter_id = + + features.determine_lcc(graph = graph, cell_type_assignments= cell_type_assignments, slide_submitter_id= slide_submitter_id) diff --git a/Python/libs/features/utils.py b/lib/features/utils.py old mode 100644 new mode 100755 similarity index 100% rename from Python/libs/features/utils.py rename to lib/features/utils.py diff --git a/Python/libs/features/vis.py b/lib/features/vis.py old mode 100644 new mode 100755 similarity index 100% rename from Python/libs/features/vis.py rename to lib/features/vis.py diff --git a/lib/model/__init__.py b/lib/model/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/Python/libs/model/constants.py b/lib/model/constants.py old mode 100644 new mode 100755 similarity index 89% rename from Python/libs/model/constants.py rename to lib/model/constants.py index 29ec09e..bdac420 --- a/Python/libs/model/constants.py +++ b/lib/model/constants.py @@ -1,7 +1,7 @@ import multiprocessing import sys -NUM_CORES = multiprocessing.cpu_count() - 4 +NUM_CORES = multiprocessing.cpu_count() METADATA_COLS = ['tile_ID', 'slide_submitter_id', 'Section', 'Coord_X', 'Coord_Y', 'TCGA_patient_ID', ] DEFAULT_CELL_TYPES = ["CAFs", "T_cells", "endothelial_cells", "tumor_purity"] @@ -38,4 +38,4 @@ ] IDS = ['slide_submitter_id', 'sample_submitter_id'] -TILE_VARS = ['Section', 'Coord_X', 'Coord_Y', "tile_ID"] \ No newline at end of file +TILE_VARS = ['Section', 'Coord_X', 'Coord_Y', "tile_ID"] diff --git a/Python/libs/model/evaluate.py b/lib/model/evaluate.py old mode 100644 new mode 100755 similarity index 100% rename from Python/libs/model/evaluate.py rename to lib/model/evaluate.py diff --git a/Python/libs/model/preprocessing.py b/lib/model/preprocessing.py old mode 100644 new mode 100755 similarity index 100% rename from Python/libs/model/preprocessing.py rename to lib/model/preprocessing.py diff --git a/Python/libs/model/utils.py b/lib/model/utils.py old mode 100644 new mode 100755 similarity index 100% rename from Python/libs/model/utils.py rename to lib/model/utils.py diff --git a/lib/myslim/__init__.py b/lib/myslim/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/lib/myslim/bottleneck_predict.py b/lib/myslim/bottleneck_predict.py new file mode 100755 index 0000000..35e847c --- /dev/null +++ b/lib/myslim/bottleneck_predict.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python +import os +import sys +import time +import tensorflow.compat.v1 as tf + +sys.path.append(os.getcwd()) + +import tf_slim as slim +from nets import nets_factory +from preprocessing import preprocessing_factory + +tf.compat.v1.disable_eager_execution() + +tf.app.flags.DEFINE_integer("num_classes", 42, "The number of classes.") +tf.app.flags.DEFINE_string( + "bot_out", None, "Output file for bottleneck features.") +tf.app.flags.DEFINE_string("pred_out", None, "Output file for predictions.") +tf.app.flags.DEFINE_string( + "model_name", "inception_v4", "The name of the architecture to evaluate.") +tf.app.flags.DEFINE_string( + "checkpoint_path", None, "The directory where the model was written to.") +tf.app.flags.DEFINE_integer("eval_image_size", 299, "Eval image size.") +tf.app.flags.DEFINE_string("file_dir", "../Output/process_train/", "") + +FLAGS = tf.app.flags.FLAGS + + +def main(_): + model_name_to_variables = { + "inception_v3": "InceptionV3", + "inception_v4": "InceptionV4", + } + model_name_to_bottleneck_tensor_name = { + "inception_v4": "InceptionV4/Logits/AvgPool_1a/AvgPool:0", + "inception_v3": "InceptionV3/Logits/AvgPool_1a_8x8/AvgPool:0", + } + bottleneck_tensor_name = model_name_to_bottleneck_tensor_name.get( + FLAGS.model_name) + preprocessing_name = FLAGS.model_name + eval_image_size = FLAGS.eval_image_size + model_variables = model_name_to_variables.get(FLAGS.model_name) + if model_variables is None: + tf.logging.error("Unknown model_name provided `%s`." % + FLAGS.model_name) + sys.exit(-1) + # Either specify a checkpoint_path directly or find the path + if tf.gfile.IsDirectory(FLAGS.checkpoint_path): + checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) + print(checkpoint_path) + if checkpoint_path is None: + sys.exit(-1) + else: + checkpoint_path = FLAGS.checkpoint_path + + image_string = tf.placeholder(tf.string) + image = tf.image.decode_jpeg( + image_string, channels=3, try_recover_truncated=True, acceptable_fraction=0.3 + ) + image_preprocessing_fn = preprocessing_factory.get_preprocessing( + preprocessing_name, is_training=False + ) + network_fn = nets_factory.get_network_fn( + FLAGS.model_name, FLAGS.num_classes, is_training=False + ) + processed_image = image_preprocessing_fn( + image, eval_image_size, eval_image_size) + processed_images = tf.expand_dims(processed_image, 0) + + logits, _ = network_fn(processed_images) + probabilities = tf.nn.softmax(logits) + init_fn = slim.assign_from_checkpoint_fn( + checkpoint_path, slim.get_model_variables(model_variables) + ) + + print(FLAGS.bot_out) + + sess = tf.Session() + init_fn(sess) + + fto_bot = open(FLAGS.bot_out, "w") + fto_pred = open(FLAGS.pred_out, "w") + + filelist = [file_path for file_path in os.listdir( + FLAGS.file_dir) if (file_path.startswith("images_train") & file_path.endswith(".tfrecord"))] + for i in range(len(filelist)): + file = filelist[i] + fls = tf.python_io.tf_record_iterator(FLAGS.file_dir + "/" + file) + tf.logging.info("reading from: %s" % file) + start_time = time.time() + c = 0 + for fl in fls: + example = tf.train.Example() + example.ParseFromString(fl) + x = example.features.feature["image/encoded"].bytes_list.value[0] + filenames = str( + example.features.feature["image/filename"].bytes_list.value[0] + ) + label = str( + example.features.feature["image/class/label"].int64_list.value[0] + ) + preds = sess.run(probabilities, feed_dict={image_string: x}) + bottleneck_values = sess.run( + bottleneck_tensor_name, {image_string: x}) + fto_pred.write(filenames + "\t" + label) + fto_bot.write(filenames + "\t" + label) + for p in range(len(preds[0])): + fto_pred.write("\t" + str(preds[0][p])) + fto_pred.write("\n") + for p in range(len(bottleneck_values[0][0][0])): + fto_bot.write("\t" + str(bottleneck_values[0][0][0][p])) + fto_bot.write("\n") + c += 1 + used_time = time.time() - start_time + tf.logging.info("processed images: %s" % c) + tf.logging.info("used time: %s" % used_time) + + fto_bot.close() + fto_pred.close() + sess.close() + + +if __name__ == "__main__": + tf.app.run() diff --git a/Python/1_extract_histopathological_features/myslim/datasets/__init__.py b/lib/myslim/datasets/__init__.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/datasets/__init__.py rename to lib/myslim/datasets/__init__.py diff --git a/Python/1_extract_histopathological_features/myslim/datasets/convert.py b/lib/myslim/datasets/convert.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/datasets/convert.py rename to lib/myslim/datasets/convert.py diff --git a/Python/1_extract_histopathological_features/myslim/datasets/dataset_factory.py b/lib/myslim/datasets/dataset_factory.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/datasets/dataset_factory.py rename to lib/myslim/datasets/dataset_factory.py diff --git a/Python/1_extract_histopathological_features/myslim/datasets/dataset_utils.py b/lib/myslim/datasets/dataset_utils.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/datasets/dataset_utils.py rename to lib/myslim/datasets/dataset_utils.py diff --git a/Python/1_extract_histopathological_features/myslim/datasets/tumors_all.py b/lib/myslim/datasets/tumors_all.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/datasets/tumors_all.py rename to lib/myslim/datasets/tumors_all.py diff --git a/Python/1_extract_histopathological_features/myslim/deployment/__init__.py b/lib/myslim/deployment/__init__.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/deployment/__init__.py rename to lib/myslim/deployment/__init__.py diff --git a/Python/1_extract_histopathological_features/myslim/deployment/model_deploy.py b/lib/myslim/deployment/model_deploy.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/deployment/model_deploy.py rename to lib/myslim/deployment/model_deploy.py diff --git a/Python/1_extract_histopathological_features/myslim/eval_image_classifier.py b/lib/myslim/eval_image_classifier.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/eval_image_classifier.py rename to lib/myslim/eval_image_classifier.py diff --git a/Python/1_extract_histopathological_features/myslim/nets/__init__.py b/lib/myslim/nets/__init__.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/nets/__init__.py rename to lib/myslim/nets/__init__.py diff --git a/Python/1_extract_histopathological_features/myslim/nets/inception.py b/lib/myslim/nets/inception.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/nets/inception.py rename to lib/myslim/nets/inception.py diff --git a/Python/1_extract_histopathological_features/myslim/nets/inception_alt.py b/lib/myslim/nets/inception_alt.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/nets/inception_alt.py rename to lib/myslim/nets/inception_alt.py diff --git a/Python/1_extract_histopathological_features/myslim/nets/inception_utils.py b/lib/myslim/nets/inception_utils.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/nets/inception_utils.py rename to lib/myslim/nets/inception_utils.py diff --git a/Python/1_extract_histopathological_features/myslim/nets/inception_v4.py b/lib/myslim/nets/inception_v4.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/nets/inception_v4.py rename to lib/myslim/nets/inception_v4.py diff --git a/Python/1_extract_histopathological_features/myslim/nets/inception_v4_alt.py b/lib/myslim/nets/inception_v4_alt.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/nets/inception_v4_alt.py rename to lib/myslim/nets/inception_v4_alt.py diff --git a/Python/1_extract_histopathological_features/myslim/nets/nets_factory.py b/lib/myslim/nets/nets_factory.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/nets/nets_factory.py rename to lib/myslim/nets/nets_factory.py diff --git a/Python/1_extract_histopathological_features/myslim/nets/overfeat.py b/lib/myslim/nets/overfeat.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/nets/overfeat.py rename to lib/myslim/nets/overfeat.py diff --git a/lib/myslim/post_process_features.py b/lib/myslim/post_process_features.py new file mode 100755 index 0000000..84204c6 --- /dev/null +++ b/lib/myslim/post_process_features.py @@ -0,0 +1,134 @@ +#  Module imports +import argparse +from argparse import ArgumentParser as AP +import os +import dask.dataframe as dd +import pandas as pd + +#  Custom imports +import DL.utils as utils +from os.path import abspath +import time +from pathlib import Path + + +def get_args(): + # Script description + description = """Post processing features""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + + # Sections + parser.add_argument("--output_dir", help="Set output folder (default='.')", default = ".") + parser.add_argument("--create_parquet_subdir", help = "Whether to create a subdirectory called 'features_format_parquet' if slide_type == 'FFPE', default=False", default = False) + parser.add_argument( + "--slide_type", help="Type of tissue slide (FF or FFPE)]") + parser.add_argument( + "--is_tcga", help="Is TCGA dataset, default=False", type = bool, default = False) + # TODO add more details + parser.add_argument("--bot_train_file", type = str, default = None, help = "Txt file") + parser.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + + if arg.bot_train_file is None: + arg.bot_train_file = Path(arg.output_dir, "bot_train.txt") + + if (arg.create_parquet_subdir): + arg.output_dir = abspath(Path(arg.output_dir, "features_format_parquet")) + + if not os.path.isdir(arg.output_dir): + os.mkdir(arg.output_dir) + + return arg + + +def handle_ff_slides(bot_train_file, is_tcga): + features_raw = pd.read_csv(bot_train_file, sep="\t", header=None) + # Extract the DL features (discard: col1 = tile paths, col2 = true class id) + features = features_raw.iloc[:, 2:] + features.columns = list(range(1536)) + # Add new column variables that define each tile + features["tile_ID"] = [utils.get_tile_name( + tile_path) for tile_path in features_raw.iloc[:, 0]] + features["Coord_X"] = [i[-2] + for i in features["tile_ID"].str.split("_")] + features["Coord_Y"] = [i[-1] + for i in features["tile_ID"].str.split("_")] + # FIX add sample_submitter_id and slide_submitter_id depending on is_tcga + if is_tcga: + features["sample_submitter_id"] = features["tile_ID"].str[0:16] + features["slide_submitter_id"] = features["tile_ID"].str[0:23] + features["Section"] = features["tile_ID"].str[20:23] + else: + features["sample_submitter_id"] = features['tile_ID'].str.split( + '_').str[0] + return(features) + +def handle_ffpe_slides(bot_train_file, is_tcga): + features_raw = dd.read_csv(bot_train_file, sep="\t", header=None) + features_raw['tile_ID'] = features_raw.iloc[:, 0] + features_raw.tile_ID = features_raw.tile_ID.map( + lambda x: x.split("/")[-1]) + features_raw['tile_ID'] = features_raw['tile_ID'].str.replace( + ".jpg'", "") + features = features_raw.map_partitions( + lambda df: df.drop(columns=[0, 1])) + new_names = list(map(lambda x: str(x), list(range(1536)))) + new_names.append('tile_ID') + features.columns = new_names + # FIX add sample_submitter_id and slide_submitter_id depending on is_tcga + if is_tcga: + features["sample_submitter_id"] = features["tile_ID"].str[0:16] + features["slide_submitter_id"] = features["tile_ID"].str[0:23] + features["Section"] = features["tile_ID"].str[20:23] + else: + features["sample_submitter_id"] = features['tile_ID'].str.split( + '_').str[0] + features['Coord_X'] = features['tile_ID'].str.split('_').str[1] + features['Coord_Y'] = features['tile_ID'].str.split('_').str[-1] + return(features) + + +def post_process_features(bot_train_file, slide_type = "FF", is_tcga="TCGA"): + """ + Format extracted histopathological features from bot.train.txt file generated by myslim/bottleneck_predict.py and extract the 1,536 features, tile names. Extract several variables from tile ID. + + Args: + bot_train_file (txt) + slide_type (str) + is_tcga (bool) + + Returns: + features (dataframe) contains the 1,536 features, followed by the sample_submitter_id, tile_ID, slide_submitter_id, Section, Coord_X and Coord_Y and in the rows the tiles + """ + # Read histopathological computed features + if slide_type == "FF": + return(handle_ff_slides(bot_train_file=bot_train_file, is_tcga=is_tcga)) + elif slide_type == "FFPE": + return(handle_ffpe_slides(bot_train_file=bot_train_file, is_tcga=is_tcga)) + else: + raise Exception("Invalid `slide_type`, please choose 'FF' or 'FFPE' ") + + +def main(args): + features = post_process_features( + bot_train_file=args.bot_train_file, + slide_type=args.slide_type, + is_tcga=args.is_tcga) + if (args.slide_type == "FF"): + #  Save features to .csv file + features.to_csv(Path(args.output_dir, "features.txt"), sep="\t", header=True) + elif (args.slide_type == "FFPE"): + features.to_parquet(path= args.output_dir, compression='gzip', + name_function=utils.name_function) + print("Finished post-processing of features...") + + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/lib/myslim/post_process_predictions.py b/lib/myslim/post_process_predictions.py new file mode 100755 index 0000000..9730749 --- /dev/null +++ b/lib/myslim/post_process_predictions.py @@ -0,0 +1,240 @@ +# Module imports +import argparse +from argparse import ArgumentParser as AP +import os +import dask.dataframe as dd +import pandas as pd + +#  Custom imports +import DL.utils as utils +import numpy as np +from os.path import abspath +import time +from pathlib import Path + +def get_args(): + # Script description + description = """Post-processing predictions""" + + # Add parser + parser = AP(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--output_dir", help="Set output folder", default = ".") + parser.add_argument("--create_parquet_subdir", help = "Whether to create a subdirectory called 'predictions_format_parquet' if slide_type == 'FFPE', default=False", default = False) + parser.add_argument( + "--slide_type", help="Type of tissue slide (FF or FFPE) (default='FF')", type = str, default = "FF") + parser.add_argument( + "--path_codebook", help="codebook.txt file", required=True, type=str) + parser.add_argument( + "--path_tissue_classes", help="Tissue_classes.csv file", required=True, type=str) + parser.add_argument("--cancer_type", help = "Cancer type", required = True, type =str) + parser.add_argument("--pred_train_file", help = "", type = str, default = None) + arg = parser.parse_args() + + if arg.pred_train_file is None: + arg.pred_train_file = Path(arg.output_dir, "pred_train.txt") + + if (arg.create_parquet_subdir): + arg.output_dir = abspath(Path(arg.output_dir, "predictions_format_parquet")) + + if not os.path.isdir(arg.output_dir): + os.mkdir(arg.output_dir) + + return arg + + + +def handle_ff_slides(pred_train_file, codebook, tissue_classes, cancer_type): + predictions_raw = pd.read_csv(pred_train_file, sep="\t", header=None) + # Extract tile name incl. coordinates from path + tile_names = [utils.get_tile_name(tile_path) + for tile_path in predictions_raw[0]] + # Create output dataframe for post-processed data + predictions = pd.DataFrame(tile_names, columns=["tile_ID"]) + # Get predicted probabilities for all 42 classes + rename columns + pred_probabilities = predictions_raw.iloc[:, 2:] + pred_probabilities.columns = codebook["class_id"] + # Get predicted and true class ids + predictions["pred_class_id"] = pred_probabilities.idxmax( + axis="columns") + predictions["true_class_id"] = 41 + # Get corresponding max probabilities to the predicted class + predictions["pred_probability"] = pred_probabilities.max(axis=1) + # Replace class id with class name + predictions["true_class_name"] = predictions["true_class_id"].copy() + predictions["pred_class_name"] = predictions["pred_class_id"].copy() + found_class_ids = set(predictions["true_class_id"]).union( + set(predictions["pred_class_id"])) + for class_id in found_class_ids: + predictions["true_class_name"].replace( + class_id, codebook["class_name"][class_id], inplace=True + ) + predictions["pred_class_name"].replace( + class_id, codebook["class_name"][class_id], inplace=True + ) + + # Define whether prediction was right + predictions["is_correct_pred"] = ( + predictions["true_class_id"] == predictions["pred_class_id"]) + predictions["is_correct_pred"] = predictions["is_correct_pred"].replace( + False, "F") + predictions.is_correct_pred = predictions.is_correct_pred.astype(str) + # Get tumor and tissue ID + # TODO ERROR + temp = pd.DataFrame( + {"tumor_type": predictions["true_class_name"].str[:-2]}) + temp = pd.merge(temp, tissue_classes, on="tumor_type", how="left") + # Set of IDs for normal and tumor (because of using multiple classes) + IDs_tumor = list(set(temp["ID_tumor"])) + if list(set(temp.tumor_type.tolist()))[0] == cancer_type: + # Probability for predicting tumor and normal label (regardless of tumor [tissue] type) + predictions["tumor_label_prob"] = np.nan + predictions["normal_label_prob"] = np.nan + for ID_tumor in IDs_tumor: + vals = pred_probabilities.loc[temp["ID_tumor"] + == ID_tumor, ID_tumor] + predictions.loc[temp["ID_tumor"] == + ID_tumor, "tumor_label_prob"] = vals + + predictions["is_correct_pred_label"] = np.nan + else: + IDs_normal = list(set(temp["ID_normal"])) + # Probability for predicting tumor and normal label (regardless of tumor [tissue] type) + predictions["tumor_label_prob"] = np.nan + predictions["normal_label_prob"] = np.nan + for ID_tumor in IDs_tumor: + vals = pred_probabilities.loc[temp["ID_tumor"] + == ID_tumor, ID_tumor] + predictions.loc[temp["ID_tumor"] == + ID_tumor, "tumor_label_prob"] = vals + + for ID_normal in IDs_normal: + vals = pred_probabilities.loc[temp["ID_normal"] + == ID_normal, ID_normal] + predictions.loc[temp["ID_normal"] == + ID_normal, "normal_label_prob"] = vals + + # Check if the correct label (tumor/normal) is predicted + temp_probs = predictions[["tumor_label_prob", "normal_label_prob"]] + is_normal_label_prob = ( + temp_probs["normal_label_prob"] > temp_probs["tumor_label_prob"] + ) + is_tumor_label_prob = ( + temp_probs["normal_label_prob"] < temp_probs["tumor_label_prob"] + ) + is_normal_label = predictions["true_class_name"].str.find( + "_N") != -1 + is_tumor_label = predictions["true_class_name"].str.find( + "_T") != -1 + + is_normal = is_normal_label & is_normal_label_prob + is_tumor = is_tumor_label & is_tumor_label_prob + + predictions["is_correct_pred_label"] = is_normal | is_tumor + predictions["is_correct_pred_label"].replace( + True, "T", inplace=True) + predictions["is_correct_pred_label"].replace( + False, "F", inplace=True) + return(predictions) + +def handle_ffpe_slides(pred_train_file, codebook, tissue_classes, cancer_type): + predictions_raw = dd.read_csv(pred_train_file, sep="\t", header=None) + predictions_raw['tile_ID'] = predictions_raw.iloc[:, 0] + predictions_raw.tile_ID = predictions_raw.tile_ID.map( + lambda x: x.split("/")[-1]) + predictions_raw['tile_ID'] = predictions_raw['tile_ID'].str.replace( + ".jpg'", "") + predictions = predictions_raw.map_partitions( + lambda df: df.drop(columns=[0, 1])) + new_names = list(map(lambda x: str(x), codebook["class_id"])) + new_names.append('tile_ID') + predictions.columns = new_names + predictions = predictions.map_partitions(lambda x: x.assign( + pred_class_id=x.iloc[:, 0:41].idxmax(axis="columns"))) + predictions["true_class_id"] = 41 + predictions = predictions.map_partitions(lambda x: x.assign( + pred_probability=x.iloc[:, 0:41].max(axis="columns"))) + predictions["true_class_name"] = predictions["true_class_id"].copy() + predictions["pred_class_name"] = predictions["pred_class_id"].copy() + predictions.pred_class_id = predictions.pred_class_id.astype(int) + res = dict(zip(codebook.class_id, codebook.class_name)) + predictions = predictions.map_partitions(lambda x: x.assign( + pred_class_name=x.loc[:, 'pred_class_id'].replace(res))) + predictions = predictions.map_partitions(lambda x: x.assign( + true_class_name=x.loc[:, 'true_class_id'].replace(res))) + predictions["is_correct_pred"] = ( + predictions["true_class_id"] == predictions["pred_class_id"]) + predictions["is_correct_pred"] = predictions["is_correct_pred"].replace( + False, "F") + predictions.is_correct_pred = predictions.is_correct_pred.astype(str) + temp = predictions.map_partitions(lambda x: x.assign( + tumor_type=x["true_class_name"].str[:-2])) + temp = temp.map_partitions(lambda x: pd.merge( + x, tissue_classes, on="tumor_type", how="left")) + if (temp['tumor_type'].compute() == cancer_type).any(): + # Probability for predicting tumor and normal label (regardless of tumor [tissue] type) + predictions["tumor_label_prob"] = np.nan + predictions["normal_label_prob"] = np.nan + predictions = predictions.map_partitions( + lambda x: x.assign(tumor_label_prob=x.loc[:, '41'])) + predictions["is_correct_pred_label"] = np.nan + else: + # TO DO + predictions["tumor_label_prob"] = np.nan + predictions["normal_label_prob"] = np.nan + # predictions = predictions.map_partitions(lambda x: x.assign(tumor_label_prob=x.loc[:, '41'])) + # predictions = predictions.map_partitions(lambda x: x.assign(tumor_label_prob=x.loc[:, '41'])) + +def post_process_predictions(pred_train_file, slide_type, path_codebook, path_tissue_classes, cancer_type): + """ + Format predicted tissue classes and derive tumor purity from pred.train.txt file generated by myslim/bottleneck_predict.py and + The pred.train.txt file contains the tile ID, the true class id and the 42 predicted probabilities for the 42 tissue classes. + + Args: + output_dir (str): path pointing to folder for storing all created files by script + + Returns: + {output_dir}/predictions.txt containing the following columns + - tile_ID, + - pred_class_id and true_class_id: class ids defined in codebook.txt) + - pred_class_name and true_class_name: class names e.g. LUAD_T, defined in codebook.txt) + - pred_probability: corresponding probability + - is_correct_pred (boolean): correctly predicted tissue class label + - tumor_label_prob and normal_label_prob: probability for predicting tumor and normal label (regardless of tumor or tissue type) + - is_correct_pred_label (boolean): correctly predicted 'tumor' or 'normal' tissue regardless of tumor or tissue type + In the rows the tiles. + """ + + # Initialize + codebook = pd.read_csv(path_codebook, delim_whitespace=True, header=None) + codebook.columns = ["class_name", "class_id"] + tissue_classes = pd.read_csv(path_tissue_classes, sep="\t") + + # Read predictions + if slide_type == "FF": + return(handle_ff_slides(pred_train_file=pred_train_file, codebook=codebook, tissue_classes=tissue_classes, cancer_type = cancer_type)) + #  Save features to .csv file + elif slide_type == "FFPE": + return(handle_ffpe_slides(pred_train_file=pred_train_file, codebook=codebook, tissue_classes= tissue_classes, cancer_type=cancer_type)) + else: + raise Exception("Invalid `slide_type`, please choose 'FF' or 'FFPE' ") + +def main(args): + predictions = post_process_predictions(output_dir=args.output_dir, slide_type=args.slide_type, path_codebook=args.path_codebook, + path_tissue_classes=args.path_tissue_classes, cancer_type=args.cancer_type) + if (args.slide_type == "FF"): + predictions.to_csv(Path(args.output_dir, "predictions.txt"), sep="\t") + elif (args.slide_type == "FFPE"): + # Save features using parquet + def name_function(x): return f"predictions-{x}.parquet" + predictions.to_parquet( + path=args.output_dir, compression='gzip', name_function=name_function) + print("Finished post-processing of predictions...") + + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/Python/1_extract_histopathological_features/myslim/preprocessing/__init__.py b/lib/myslim/preprocessing/__init__.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/preprocessing/__init__.py rename to lib/myslim/preprocessing/__init__.py diff --git a/Python/1_extract_histopathological_features/myslim/preprocessing/inception_preprocessing.py b/lib/myslim/preprocessing/inception_preprocessing.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/preprocessing/inception_preprocessing.py rename to lib/myslim/preprocessing/inception_preprocessing.py diff --git a/Python/1_extract_histopathological_features/myslim/preprocessing/inception_preprocessing_dataAug.py b/lib/myslim/preprocessing/inception_preprocessing_dataAug.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/preprocessing/inception_preprocessing_dataAug.py rename to lib/myslim/preprocessing/inception_preprocessing_dataAug.py diff --git a/Python/1_extract_histopathological_features/myslim/preprocessing/preprocessing_factory.py b/lib/myslim/preprocessing/preprocessing_factory.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/preprocessing/preprocessing_factory.py rename to lib/myslim/preprocessing/preprocessing_factory.py diff --git a/Python/1_extract_histopathological_features/myslim/run/bottleneck_predict.sh b/lib/myslim/run/bottleneck_predict.sh old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/run/bottleneck_predict.sh rename to lib/myslim/run/bottleneck_predict.sh diff --git a/Python/1_extract_histopathological_features/myslim/run/convert.sh b/lib/myslim/run/convert.sh old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/run/convert.sh rename to lib/myslim/run/convert.sh diff --git a/Python/1_extract_histopathological_features/myslim/run/eval.sh b/lib/myslim/run/eval.sh old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/run/eval.sh rename to lib/myslim/run/eval.sh diff --git a/Python/1_extract_histopathological_features/myslim/run/load_inception_v4.sh b/lib/myslim/run/load_inception_v4.sh old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/run/load_inception_v4.sh rename to lib/myslim/run/load_inception_v4.sh diff --git a/Python/1_extract_histopathological_features/myslim/run/load_inception_v4_alt.sh b/lib/myslim/run/load_inception_v4_alt.sh old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/run/load_inception_v4_alt.sh rename to lib/myslim/run/load_inception_v4_alt.sh diff --git a/Python/1_extract_histopathological_features/myslim/train_image_classifier.py b/lib/myslim/train_image_classifier.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/train_image_classifier.py rename to lib/myslim/train_image_classifier.py diff --git a/Python/1_extract_histopathological_features/myslim/train_image_classifier_jpeg.py b/lib/myslim/train_image_classifier_jpeg.py old mode 100644 new mode 100755 similarity index 100% rename from Python/1_extract_histopathological_features/myslim/train_image_classifier_jpeg.py rename to lib/myslim/train_image_classifier_jpeg.py diff --git a/run_pipeline.sh b/run_pipeline.sh deleted file mode 100644 index 22dfa5e..0000000 --- a/run_pipeline.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env bash -#SBATCH -J spotlight-docker -#SBATCH --mail-type=END,FAIL -#SBATCH --mail-user=JohnDoe@mail.com -#SBATCH --partition=veryhimem -#SBATCH --ntasks=1 -#SBATCH --nodes=1 -#SBATCH --cpus-per-task=1 -#SBATCH --mem=64G -#SBATCH --time=01:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --error=slurm_out/%x_%j.out - -module load apptainer - -# Directory 'spotlight_docker' - -work_dir="/path/to/spotlight_docker" -spotlight_sif="path/to/spotlight_sif" - -# Define directories/files in container (mounted) - -folder_images="/path/to/images_dir" -output_dir="/path/to/output_dir" - -# Relative to docker, i.e. start with /data - -checkpoint="/data/checkpoint/Retrained_Inception_v4/model.ckpt-100000" -clinical_files_dir="/data/path/to/clinical/TCGA/file.tsv" - -# Remaining parameters (this configuration has been tested) -slide_type="FF" -tumor_purity_threshold=80 -class_names="SKCM_T" -model_name="inception_v4" -is_tcga=false - -echo "Create output directory: ${output_dir}..." -mkdir -p ${output_dir} - -echo "Binding directories..." -# Bind directories + give r/o/w access (do not touch) -# Automatically binds the following -# - Included in repository: /data, /Python -# - Defined by used: {folder_images}, {output_dir} -export APPTAINER_BINDPATH=${work_dir}/data/:/project/data:ro,${folder_images}:/project/images:ro,${output_dir}:/project/output:rw,${work_dir}/run_scripts:/project/run_scripts:ro,${work_dir}/Python:/project/Python:ro - -echo "Run pipeline..." -echo "Extract histopathological features (1 out of 3)" -apptainer exec \ - --cleanenv \ - -c \ - ${spotlight_sif} \ - bash "/project/run_scripts/1_extract_histopatho_features.sh" ${checkpoint} ${clinical_files_dir} ${slide_type} ${class_names} ${tumor_purity_threshold} ${model_name} ${is_tcga} - -echo "Tile level cell type quanitification (2 out of 3)" -apptainer exec \ - --cleanenv \ - -c \ - ${spotlight_sif} \ - bash "/project/run_scripts/2_tile_level_cell_type_quantification.sh" $slide_type - -echo "Compute spatial features (3 out of 3)" -apptainer exec \ - --cleanenv \ - -c \ - ${spotlight_sif} \ - bash "/project/run_scripts/3_compute_spatial_features.sh" ${slide_type} - -echo "COMPLETED!" \ No newline at end of file diff --git a/run_scripts/1_extract_histopatho_features.sh b/run_scripts/1_extract_histopatho_features.sh deleted file mode 100755 index 8c7398c..0000000 --- a/run_scripts/1_extract_histopatho_features.sh +++ /dev/null @@ -1,100 +0,0 @@ -#!/bin/bash - -# Adjusted pipeline from PC-CHiP workflow: -# Fu, Y., Jung, A.W., Torne, R.V. et al. Pan-cancer computational histopathology reveals mutations, tumor composition and prognosis. Nat Cancer 1, 800–810 (2020).> - -# To execute the pipeline, define: input_dir, output_dir, cancer_type, class_name, checkpoint_path and TCGA_clinical_files. -# cancertype = TCGA_abbreviation_tumor/normal e.g. TCGA_COAD_tumor (see https://gdc.cancer.gov/resources-tcga-users/tcga-code-tables/tcga-study-abbreviations) -# class_name = e.g. COAD_T (see codebook.txt) - -# Define type of slide (Fresh-Frozen [FF] vs Formalin-Fixed Paraffin-Embedded [FFPE]) - -# General setup -repo_dir="/project" - -echo "REPO_DIR: ${repo_dir}" -echo "Model checkpoints: $1"; -echo "Dir w/ clinical files: $2"; - -echo "Slide type: $3"; -echo "Class names: $4"; -echo "Tumor purity threshold: $5"; -echo "Model name: $6"; - -# ---- Relative to /project ---- # -# User input -checkpoint_path=${repo_dir}/$1 -clinical_files_dir=${repo_dir}/$2 - -# Fixed dir -slides_dir=${repo_dir}/images -output_dir=${repo_dir}/output - -# Fixed files -path_codebook=${repo_dir}/Python/1_extract_histopathological_features/codebook.txt -path_tissue_classes=${repo_dir}/Python/1_extract_histopathological_features/tissue_classes.csv - -# ---- Parameters ---- # -slide_type=$3 -class_names=$4 -tumor_purity_threshold=$5 -model_name=$6 -is_tcga=$7 # true or false - -# ---------------------------------- # -# ---- create new clinical file ---- # -# ---------------------------------- # - -if [ "$is_tcga" = true ] -then - python $repo_dir/Python/1_extract_histopathological_features/myslim/create_clinical_file.py \ - --class_names $class_names \ - --clinical_files_dir $clinical_files_dir \ - --tumor_purity_threshold $tumor_purity_threshold \ - --output_dir $output_dir/1_histopathological_features \ - --path_codebook ${path_codebook} - - clinical_file=$output_dir/1_histopathological_features/generated_clinical_file.txt -else - clinical_file=${repo_dir}/data/tmp_clinical_file.txt - ls $slides_dir | tee ${output_dir}/list_images.txt - awk -v a=81 -v b="${class_names}" -v c=41 'FNR==NR{print; next}{split($1, tmp, "."); OFS="\t"; print tmp[1], tmp[1], $1, a, b, c}' $clinical_file ${output_dir}/list_images.txt > $output_dir/1_histopathological_features/final_clinical_file.txt - clinical_file=$output_dir/1_histopathological_features/final_clinical_file.txt -fi - -# --------------------------------------------------------- # -# ---- image tiling and image conversion to TF records ---- # -# --------------------------------------------------------- # - -python $repo_dir/Python/1_extract_histopathological_features/pre_processing.py \ - --slides_folder $slides_dir \ - --output_folder $output_dir/1_histopathological_features \ - --clinical_file_path $clinical_file - -# ------------------------------------------------------ # -# ---- Compute predictions and bottlenecks features ---- # -# ------------------------------------------------------ # - -# Compute predictions and bottlenecks features using the Retrained_Inception_v4 checkpoints -python $repo_dir/Python/1_extract_histopathological_features/myslim/bottleneck_predict.py \ - --num_classes=42 \ - --bot_out=$output_dir/1_histopathological_features/bot_train.txt \ - --pred_out=$output_dir/1_histopathological_features/pred_train.txt \ - --model_name $model_name \ - --checkpoint_path $checkpoint_path \ - --file_dir $output_dir/1_histopathological_features/process_train - -# ----------------------------------------------------- # -# ---- Post-processing of predictions and futures ----- # -# ----------------------------------------------------- # - -# Transform bottleneck features, add dummy variable for tissue type for each tile, save predictions in seperate files -# (= input for pipeline part 2) - -python $repo_dir/Python/1_extract_histopathological_features/post_processing.py \ - --output_dir $output_dir/1_histopathological_features \ - --slide_type $slide_type \ - --path_codebook $path_codebook \ - --path_tissue_classes $path_tissue_classes \ - -# # outputs two files: $output_dir/features $output_dir/predictions diff --git a/run_scripts/2_tile_level_cell_type_quantification.sh b/run_scripts/2_tile_level_cell_type_quantification.sh deleted file mode 100644 index 3ca9e9a..0000000 --- a/run_scripts/2_tile_level_cell_type_quantification.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash - -##################################################################### -## Compute cell-type quantification from transfer learning models ## -##################################################################### - -# ----------------------------------- # -# --------- Setup file paths -------- # -# ----------------------------------- # - -# General setup -repo_dir="/project" - -# command line arguments -echo "Slide type: $1"; - -# Define type of slide -slide_type=$1 - -# Fixed dir -output_dir=${repo_dir}/output -histopatho_features_dir=${output_dir}/1_histopathological_features - -# Transfer Learning trained models directory (default: use of FF here) -models_dir=${repo_dir}/data/TF_models/SKCM_FF -var_names_path=${repo_dir}/Python/2_train_multitask_models/task_selection_names.pkl - -# Compute predictions using models learned from unseen folds -prediction_mode="test" # (tcga_validation, tcga_train_validation) - -echo "Prediction mode: $prediction_mode" - -# ---------------------------------------------------- # -# ---- Predict cell type abundances on tile level ---- # -# ---------------------------------------------------- # - -# For now, we use models trained on FF slides - -python ${repo_dir}/Python/2_train_multitask_models/tile_level_cell_type_quantification.py \ - --models_dir $models_dir \ - --output_dir "$output_dir/2_tile_level_quantification" \ - --histopatho_features_dir $histopatho_features_dir \ - --prediction_mode $prediction_mode \ - --var_names_path $var_names_path \ - --slide_type $slide_type diff --git a/run_scripts/3_compute_spatial_features.sh b/run_scripts/3_compute_spatial_features.sh deleted file mode 100644 index 3f27244..0000000 --- a/run_scripts/3_compute_spatial_features.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/bash - -############################### -## Compute spatial features ## -############################### - -# ----------------------------------- # -# --------- Setup file paths -------- # -# ----------------------------------- # - -# General setup -repo_dir="/project" - -# command line rguments -echo "Slide type: $1"; - - -# Fixed dir -output_dir=${repo_dir}/output - -# Fixed files -tile_quantification_path="${output_dir}/2_tile_level_quantification/test_tile_predictions_proba.csv" - -# Define type of slide -slide_type=$1 - -# ---------------------------------- # -# ---- Compute all features -------- # -# ---------------------------------- # -run_mode=1 -python $repo_dir/Python/3_spatial_characterization/computing_features.py \ - --workflow_mode $run_mode \ - --tile_quantification_path $tile_quantification_path \ - --output_dir $output_dir/3_spatial_features \ - --metadata_path $output_dir/3_spatial_features/metadata.csv \ - --slide_type $slide_type # OPTIONAL BY DEFAULT FF - # --cell_types=$cell_types \ # OPTIONAL - #--graphs_path=$graphs_path # OPTIONAL - -# # ---------------------------------- # -# # ---- Compute network features ---- # -# # ---------------------------------- # -# workflow=2 -# python $repo_path/Python/computing_features.py \ -# --workflow=$workflow \ -# --tile_quantification_path=$tile_quantification_path \ -# --output_dir=$output_dir -# # --slide_type=$slide_type \ # OPTIONAL BY DEFAULT FF -# # --cell_types=$cell_types \ # OPTIONAL -# # --graphs_path=$graphs_path \ # OPTIONAL - -# # ------------------------------------- # -# # ---- Compute clustering features ---- # -# # ------------------------------------- # -# workflow=3 -# python $repo_path/Python/computing_features.py \ -# --workflow=$workflow \ -# --tile_quantification_path=$tile_quantification_path \ -# --output_dir=$output_dir \ -# # --slide_type=$slide_type \ # OPTIONAL BY DEFAULT FF -# # --cell_types=$cell_types \ # OPTIONAL -# # --graphs_path=$graphs_path \ # OPTIONAL - - -# # -------------------------- # -# # ---- Combine features ---- # -# # -------------------------- # -# workflow=4 -# python $repo_path/Python/computing_features.py \ -# --workflow=$workflow \ -# --tile_quantification_path=$tile_quantification_path \ -# --output_dir=$output_dir \ -# # --slide_type=$slide_type \ # OPTIONAL BY DEFAULT FF -# # --cell_types=$cell_types \ # OPTIONAL -# # --graphs_path=$graphs_path \ # OPTIONAL diff --git a/run_scripts/create_tmp_clinical_file.sh b/run_scripts/create_tmp_clinical_file.sh deleted file mode 100644 index f9e692c..0000000 --- a/run_scripts/create_tmp_clinical_file.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -clinical_file="$data_example" -ls $slides_dir | tee list_images.txt -awk -v a=81 -v b="SKCM_T" -v c=41 'FNR==NR{print; next}{split($1, tmp, "."); print tmp[1], tmp[1], $1, a, b, c}' $clinical_file list_images.txt > $clinical_file/final_clinical_file.txt - diff --git a/run_scripts/requirements.txt b/run_scripts/requirements.txt deleted file mode 100644 index 79d7ac6..0000000 --- a/run_scripts/requirements.txt +++ /dev/null @@ -1,15 +0,0 @@ -caffeine==0.5 -GitPython==3.1.30 -joblib==1.1.1 -matplotlib==3.6.2 -networkx==3.0 -numpy==1.24.1 -pandas==1.5.2 -Pillow==9.4.0 -scikit_learn==1.2.0 -scipy==1.10.0 -seaborn==0.12.2 -six==1.16.0 -tensorflow==2.11.0 -tf_slim==1.1.0 -openpyxl \ No newline at end of file diff --git a/run_scripts/task_selection_names.pkl b/run_scripts/task_selection_names.pkl deleted file mode 100644 index f1e9f6763a162148daead088d8861f00080f5519..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 478 zcmZvYPfNov7{(O`6}BmEhv_ZH?&8_&mQ^S?9JGQSq{Q}}29jn=6WI=eA0Ws0$^1gb zrky&>>3x4s-ha<~=i~G6pnh*QG#xy-uI!_W`iYW)6Nai>NU&z`t)Os@7bq9HR3OLJ zbUlm~F?LzsXH)V_;B{=l1ZkE7W4_@j-YTKQhUUmaa}q6=kSifuFv7Ddl==*XYfZ{D zu3+t2_hQa<3C%KKdP;L4vz3-qZR>qUH9?-Ba57D9Z|>5_)$S=#lE(8%tqlLx_%pzq zL0Jg(0*n&&>wt^s2e&(Ju)ELAIR8g$T Date: Tue, 24 Sep 2024 17:30:16 -0400 Subject: [PATCH 10/10] fix minor bug for nf-implementation for tile-level quantification + exclude 'ok.txt' (check) from being saved in final output directory --- bin/tile_level_cell_type_quantification.py | 44 +++++++++++++--------- conf/modules.config | 41 ++++++++++---------- 2 files changed, 46 insertions(+), 39 deletions(-) diff --git a/bin/tile_level_cell_type_quantification.py b/bin/tile_level_cell_type_quantification.py index 5feeb60..6394c2e 100755 --- a/bin/tile_level_cell_type_quantification.py +++ b/bin/tile_level_cell_type_quantification.py @@ -11,6 +11,8 @@ from model.evaluate import compute_tile_predictions import time from argparse import ArgumentParser as AP +import glob + def get_args(): # Script description @@ -22,17 +24,17 @@ def get_args(): parser.add_argument("--models_dir", type=str, help="Path to models directory", required=True) parser.add_argument("--output_dir", type=str, - help="Path to output directory", required=False, default = "") + help="Path to output directory", required=False, default="") parser.add_argument("--histopatho_features_dir", type=str, - help="Path to histopathological features file", required=False, default = "") + help="Path to histopathological features file", required=False, default="") parser.add_argument("--var_names_path", type=str, help="Path to variable names pkl file", required=True) - parser.add_argument("--features_input", type = str, default = None) + parser.add_argument("--features_input", type=str, default=None) parser.add_argument("--prediction_mode", type=str, help="Choose prediction mode 'performance' or 'all' (default='all')", default="all", required=False) parser.add_argument("--n_outerfolds", type=int, default=5, help="Number of outer folds (default=5)", required=False) - parser.add_argument("--cell_types_path", type=str, default="", + parser.add_argument("--cell_types", type=str, default=None, help="List of cell types by default=['T_cells','CAFs', 'tumor_purity','endothelial_cells']", required=False) parser.add_argument( "--slide_type", help="Type of tissue slide (FF or FFPE)", type=str, required=True) @@ -41,13 +43,24 @@ def get_args(): if (arg.features_input is None): if arg.slide_type == "FF": - arg.features_input = Path(arg.histopatho_features_dir, "features.txt") + arg.features_input = Path( + arg.histopatho_features_dir, "features.txt") elif arg.slide_type == "FFPE": - arg.features_input = Path(arg.histopatho_features_dir, "features_format_parquet") + parquet_files = glob.glob1("", "*.parquet") + if (len(parquet_files) > 0): + if not (os.path.isdir("features_format_parquet")): + os.mkdir("features_format_parquet") + for parquet_file in parquet_files: + os.replace(parquet_file, Path( + "features_format_parquet", parquet_file)) + + arg.features_input = Path( + arg.histopatho_features_dir, "features_format_parquet") if (not Path(arg.features_input).exists()): - raise Exception("Invalid argument, please check `features_input` or `histopatho_features_dir`") + raise Exception( + "Invalid argument, please check `features_input` or `histopatho_features_dir`") if ((arg.output_dir != "") & (not os.path.isdir(arg.output_dir))): # Create an empty folder for TF records if folder doesn't exist @@ -55,7 +68,7 @@ def get_args(): return arg -def tile_level_quantification(features_input, models_dir, var_names_path, histopatho_features_dir="", prediction_mode="all", n_outerfolds=5, cell_types_path="", slide_type="FF"): +def tile_level_quantification(features_input, models_dir, var_names_path, prediction_mode="all", n_outerfolds=5, cell_types="", slide_type="FF"): """ Quantify the cell type abundances for the different tiles. Creates three files: (1) z-scores and @@ -74,11 +87,8 @@ def tile_level_quantification(features_input, models_dir, var_names_path, histop """ # Read data - cell_types = DEFAULT_CELL_TYPES - if os.path.isfile(cell_types_path): - cell_types = pd.read_csv( - cell_types_path, header=None).to_numpy().flatten() - print(cell_types) + if cell_types is None: + cell_types = DEFAULT_CELL_TYPES var_names = joblib.load(var_names_path) print(var_names) @@ -104,7 +114,6 @@ def tile_level_quantification(features_input, models_dir, var_names_path, histop metadata = metadata.compute() print("Computing tile predictions for each cell type...") - ############################################################################## # If predicting on all FFPE slides, we do this by chunks: # if any([prediction_mode == item for item in ['tcga_train_validation', 'test']]): @@ -176,20 +185,19 @@ def main(args): tile_predictions, pred_proba = tile_level_quantification( features_input=args.features_input, models_dir=args.models_dir, - histopatho_features_dir=args.histopatho_features_dir, prediction_mode=args.prediction_mode, n_outerfolds=args.n_outerfolds, - cell_types_path=args.cell_types_path, + cell_types=args.cell_types, var_names_path=args.var_names_path, slide_type=args.slide_type) - tile_predictions.to_csv( - Path(args.output_dir, f"{args.prediction_mode}_tile_predictions_zscores.csv"), sep="\t", index=False) + Path(args.output_dir, f"{args.prediction_mode}_tile_predictions_zscores.csv"), sep="\t", index=False) pred_proba.to_csv( Path(args.output_dir, f"{args.prediction_mode}_tile_predictions_proba.csv"), sep="\t", index=False) print("Finished tile predictions...") + if __name__ == "__main__": args = get_args() st = time.time() diff --git a/conf/modules.config b/conf/modules.config index 437f64d..61d4b67 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -11,30 +11,29 @@ */ process { - - - - // Setting defaults publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> + if (filename.equals('versions.yml') | it == "ok.txt") { null } + else { filename } + } ] // The three main subworkflows - withLabel: 'extract_histo_patho_features' { + withName: 'BOTTLENECK_PREDICT' { publishDir = [ path: { "${params.outdir}/1_extract_histopatho_features" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + // mode: params.publish_dir_mode, + // saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withLabel: 'tf_learning_celltyp_quant' { publishDir = [ path: { "${params.outdir}/2_tile_level_quantification" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + // mode: params.publish_dir_mode, + // saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -42,8 +41,8 @@ process { withLabel: 'spatial_clustering_features' { publishDir = [ path: { "${params.outdir}/3_spatial_features/clustering_features" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + // mode: params.publish_dir_mode, + // saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -51,8 +50,8 @@ process { withLabel: 'spatial_features' { publishDir = [ path: { "${params.outdir}/3_spatial_features" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + // mode: params.publish_dir_mode, + // saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -60,8 +59,8 @@ process { withLabel: 'spatial_network_features' { publishDir = [ path: { "${params.outdir}/3_spatial_features/network_features" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + // mode: params.publish_dir_mode, + // saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -76,7 +75,7 @@ process { publishDir = [ path: { "${params.outdir}/1_extract_histopatho_features/tiles" }, mode: "symlink", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + // saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -84,7 +83,7 @@ process { publishDir = [ path: { "${params.outdir}/1_extract_histopatho_features/process_train" }, mode: "symlink", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + // saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -99,7 +98,7 @@ process { publishDir = [ path: { "${params.outdir}/3_spatial_features/network_features" }, mode: "copy", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + // saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -112,8 +111,8 @@ process { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + // mode: params.publish_dir_mode, + // saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] }