Skip to content

Commit

Permalink
Merge branches 'development' and 'master' of https://github.com/kirch…
Browse files Browse the repository at this point in the history
…erlab/MPRAsnakeflow into development
  • Loading branch information
Max Schubach committed Dec 5, 2024
2 parents 02076d9 + 061d78a commit f1944b8
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 88 deletions.
21 changes: 4 additions & 17 deletions workflow/rules/assigned_counts.smk
Original file line number Diff line number Diff line change
Expand Up @@ -211,23 +211,10 @@ rule assigned_counts_combine_replicates_barcode_output:
thresh=lambda wc: config["experiments"][wc.project]["configs"][wc.config][
"filter"
]["bc_threshold"],
replicates=lambda wc: " ".join(
[
"--replicate %s" % r
for r in getReplicatesOfCondition(wc.project, wc.condition)
]
),
bc_counts=lambda wc: " ".join(
[
"--counts %s" % c
for c in expand(
"results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_{replicate}_barcode_assigned_counts.tsv.gz",
replicate=getReplicatesOfCondition(wc.project, wc.condition),
project=wc.project,
condition=wc.condition,
assignment=wc.assignment,
config=wc.config,
)
"--counts %s results/experiments/%s/assigned_counts/%s/%s/%s_%s_barcode_assigned_counts.tsv.gz" % (rep, wc.project, wc.assignment, wc.config, wc.condition, rep)
for rep in getReplicatesOfCondition(wc.project, wc.condition)
]
),
log:
Expand All @@ -236,9 +223,9 @@ rule assigned_counts_combine_replicates_barcode_output:
),
shell:
"""
python {input.script} {params.bc_counts} \
python {input.script} \
{params.bc_counts} \
--threshold {params.thresh} \
{params.replicates} \
--output-threshold {output.bc_merged_thresh} \
--output {output.bc_merged_all} &> {log}
"""
Expand Down
4 changes: 2 additions & 2 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -536,9 +536,9 @@ def withoutZeros(project, conf):


def getSplitNumber():
splits = []
splits = [1]

for assignment in config["assignments"]:
for assignment in getAssignments():
splits += [config["assignments"][assignment]["alignment_tool"]["split_number"]]

return max(splits)
Expand Down
64 changes: 32 additions & 32 deletions workflow/rules/qc_report.smk
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,18 @@ rule qc_report_assoc:
cp {input.quarto_script} {output.quarto_file};
cd `dirname {output.quarto_file}`;
quarto render `basename {output.quarto_file}` --output `basename {output.assi_file}` \
-P assignment:{wildcards.assignment} \
-P bc_length:{params.bc_length} \
-P fw:{params.fw} \
-P rev:{params.rev} \
-P bc:{params.bc} \
-P workdir:{params.workdir} \
-P design_file:{input.design_file} \
-P design_file_checked:{input.design_file_checked} \
-P configs:{wildcards.assignment_config} \
-P plot_file:{input.plot} \
-P statistic_filter_file:{input.statistic_filter} \
-P statistic_all_file:{input.statistic_all}
-P "assignment:{wildcards.assignment}" \
-P "bc_length:{params.bc_length}" \
-P "fw:{params.fw}" \
-P "rev:{params.rev}" \
-P "bc:{params.bc}" \
-P "workdir:{params.workdir}" \
-P "design_file:{input.design_file}" \
-P "design_file_checked:{input.design_file_checked}" \
-P "configs:{wildcards.assignment_config}" \
-P "plot_file:{input.plot}" \
-P "statistic_filter_file:{input.statistic_filter}" \
-P "statistic_all_file:{input.statistic_all}"
) &> {log}
"""

Expand Down Expand Up @@ -110,25 +110,25 @@ rule qc_report_count:
cp {input.quarto_script} {output.quarto_file};
cd `dirname {output.quarto_file}`;
quarto render `basename {output.quarto_file}` --output `basename {output.count_file}` \
-P assignment:{wildcards.assignment} \
-P project:{wildcards.project} \
-P dna_over_rna_plot:{input.dna_over_rna} \
-P dna_over_rna_thresh_plot:{input.dna_over_rna_thresh} \
-P dna_oligo_coor_min_thre_plot:{input.dna_oligo_coor_min_thre_plot} \
-P rna_oligo_coor_min_thre_plot:{input.rna_oligo_coor_min_thre_plot} \
-P dna_oligo_coor_plot:{input.dna_oligo_coor_plot} \
-P rna_oligo_coor_plot:{input.rna_oligo_coor_plot} \
-P ratio_oligo_coor_plot:{input.ratio_oligo_coor_plot} \
-P ratio_oligo_min_thre_plot:{input.ratio_oligo_min_thre_plot} \
-P statistics_all_merged:{input.statistics_all_merged} \
-P counts_per_oligo_dna:{input.counts_per_oligo_dna} \
-P counts_per_oligo_rna:{input.counts_per_oligo_rna} \
-P statistics_all_single:{input.statistics_all_single} \
-P activity_all:{input.activity_all} \
-P activity_thresh:{input.activity_thresh} \
-P statistics_all_oligo_cor_all:{input.statistics_all_oligo_cor_all} \
-P statistics_all_oligo_cor_thresh:{input.statistics_all_oligo_cor_thresh} \
-P thresh:{params.thresh} \
-P workdir:{params.workdir}
-P "assignment:{wildcards.assignment}" \
-P "project:{wildcards.project}" \
-P "dna_over_rna_plot:{input.dna_over_rna}" \
-P "dna_over_rna_thresh_plot:{input.dna_over_rna_thresh}" \
-P "dna_oligo_coor_min_thre_plot:{input.dna_oligo_coor_min_thre_plot}" \
-P "rna_oligo_coor_min_thre_plot:{input.rna_oligo_coor_min_thre_plot}" \
-P "dna_oligo_coor_plot:{input.dna_oligo_coor_plot}" \
-P "rna_oligo_coor_plot:{input.rna_oligo_coor_plot}" \
-P "ratio_oligo_coor_plot:{input.ratio_oligo_coor_plot}" \
-P "ratio_oligo_min_thre_plot:{input.ratio_oligo_min_thre_plot}" \
-P "statistics_all_merged:{input.statistics_all_merged}" \
-P "counts_per_oligo_dna:{input.counts_per_oligo_dna}" \
-P "counts_per_oligo_rna:{input.counts_per_oligo_rna}" \
-P "statistics_all_single:{input.statistics_all_single}" \
-P "activity_all:{input.activity_all}" \
-P "activity_thresh:{input.activity_thresh}" \
-P "statistics_all_oligo_cor_all:{input.statistics_all_oligo_cor_all}" \
-P "statistics_all_oligo_cor_thresh:{input.statistics_all_oligo_cor_thresh}" \
-P "thresh:{params.thresh}" \
-P "workdir:{params.workdir}"
) &> {log}
"""
43 changes: 7 additions & 36 deletions workflow/scripts/count/merge_replicates_barcode_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
"counts_files",
required=True,
multiple=True,
type=click.Path(exists=True, readable=True),
help="Assigned barcode count file",
type=(str,click.Path(exists=True, readable=True)),
help="Replicate name and assigned barcode count file",
)
@click.option(
"--threshold",
Expand All @@ -19,14 +19,6 @@
type=int,
help="Number of required barcodes (default 10)",
)
@click.option(
"--replicate",
"replicates",
multiple=True,
type=str,
help="replicate name",
required=True,
)
@click.option(
"--output",
"output_threshold_file",
Expand All @@ -41,39 +33,18 @@
type=click.Path(writable=True),
help="Output file.",
)
def cli(counts_files, bc_thresh, replicates, output_threshold_file, output_file):
def cli(counts_files, bc_thresh, output_threshold_file, output_file):
"""
Merge the associated barcode count files of all replicates.
"""

# ensure there are as many replicates as there are files
if len(replicates) != len(counts_files):
raise (
click.BadParameter(
"Number of replicates ({}) doesn't equal the number of files ({}).".format(
len(replicates), len(counts_files)
)
)
)

# check if every file exists
for file in counts_files:
if not os.path.exists(file):
raise (click.BadParameter("{}: file not found".format(file)))

all_reps = []
for file in counts_files:
curr_rep = -1
# find the replicate name of the current file
for rep in replicates:
if rep in os.path.basename(file).split("_")[1]:
curr_rep = rep
break
if curr_rep == -1:
raise (click.BadParameter("{}: incorrect file".format(file)))
replicates = []
for rep, file in counts_files:
df = pd.read_csv(file, sep="\t")
df['replicate'] = curr_rep
df['replicate'] = rep
all_reps.append(df)
replicates.append(rep)

df = pd.concat(all_reps)
df = df[df["oligo_name"] != "no_BC"]
Expand Down
2 changes: 1 addition & 1 deletion workflow/scripts/variants/correlateVariantTables.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def filterOnThreshold(variants, threshold):
variants_2 = filterOnThreshold(variants_2, bc_threshold)

click.echo("Join variants file...")
variants_join = variants_1.join(variants_2, how="inner", lsuffix='_A', rsuffix='_B')[["log2_expression_A", "log2_expression_B"]]
variants_join = variants_1.join(variants_2, how="inner", lsuffix='_A', rsuffix='_B')[["log2FoldChange_expression_A", "log2FoldChange_expression_B"]]

output = pd.concat([output, pd.DataFrame([[condition, rep_1, rep_2, variants_join.shape[0], bc_threshold, variants_join.corr(method="pearson").iloc[0,1],variants_join.corr(method="spearman").iloc[0,1]]])], ignore_index=True)

Expand Down

0 comments on commit f1944b8

Please sign in to comment.