From ac815cb1ba5f1cdab39b7cd3305e21c9bb0585aa Mon Sep 17 00:00:00 2001 From: Ilya Flyamer Date: Thu, 18 Feb 2021 19:11:43 +0000 Subject: [PATCH] Merge loopability across runs with simple CV filtering --- config/config.yaml | 1 + workflow/Snakefile | 35 ++++++++++++++++++++++++++--------- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index e387182..5b50f5b 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -105,6 +105,7 @@ pileups: loopability: do: True n_random_samples: 1 + CV_quantile_filter: 0.995 resolutions: - 1000 - 10000 diff --git a/workflow/Snakefile b/workflow/Snakefile index 6cf4407..da909b5 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -211,10 +211,10 @@ if config["loopability"]["do"]: loopability_shifts = config["loopability"]["shifts"] if loopability_shifts > 0: loopability_norms.append(f"{loopability_shifts}-shifts") - if config["loopability"]["expected"]: - loopability_norms.append("expected") - if len(loopability_norms) == 0: - raise ValueError("Please use expected or shifts to normalize loopability") + if config['loopability']['expected']: + loopability_norms.append('expected') + if len(loopability_norms)==0: + raise ValueError('Please use expected or shifts to normalize loopability') if config["insulation"]["do"]: @@ -369,10 +369,9 @@ if config["loopability"]["do"]: ): continue loopability_tables += expand( - f"{loopability_folder}/{sample}-{{resolution}}_over_{bedname}_{{norm}}_loopability_seed{{seed}}_{{args}}.tsv", + f"{loopability_folder}/merged/{sample}-{{resolution}}_over_{bedname}_{{norm}}_loopability_{{args}}.tsv", resolution=config["loopability"]["resolutions"], norm=loopability_norms, - seed=np.arange(config["loopability"]["n_random_samples"]), args=list(loopability_args.keys()), ) saddles = ( @@ -488,6 +487,25 @@ rule make_pileups_distance: f"coolpup.py {{input.cooler}}::resolutions/{{wildcards.resolution}} {{input.bedfile}} --basetype {{params.format}} --basetype {{params.format}} --n_proc {{threads}} --expected {{input.expected}} --mindist {{wildcards.mindist}} --maxdist {{wildcards.maxdist}} --outdir {{params.outdir}} --outname {{params.outname}}" +rule average_loopability: + input: + lambda wildcards: [f"{loopability_folder}/{{sample}}-{{resolution}}_over_{{bedname}}_{{norm}}_loopability_seed{seed}_{{args}}.tsv" + for seed in np.arange(config['loopability']['n_random_samples'])] + output: + f"{loopability_folder}/merged/{{sample}}-{{resolution,[0-9]+}}_over_{{bedname}}_{{norm}}_loopability_{{args}}.tsv" + threads: 1 + params: + CV_quantile_filter=lambda wildcards: config['loopability'].get('CV_quantile_filter', 0.995) + resources: + mem_mb=1024, + runtime=5 + run: + dfs = [pd.read_csv(f, sep='\t').dropna() for f in input] + df = pd.concat([df[(df['CV3']