generated from CCBR/CCBR_NextflowTemplate
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
14 changed files
with
396 additions
and
57 deletions.
There are no files selected for viewing
File renamed without changes.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
#!/usr/bin/env python3 | ||
|
||
""" | ||
adapted from https://github.com/nf-core/chipseq/blob/51eba00b32885c4d0bec60db3cb0a45eb61e34c5/bin/check_samplesheet.py | ||
""" | ||
|
||
import os | ||
import sys | ||
import errno | ||
import argparse | ||
|
||
|
||
def parse_args(args=None): | ||
Description = "Reformat samplesheet file and check its contents." | ||
Epilog = "Example usage: python check_samplesheet.py <FILE_IN> <FILE_OUT>" | ||
|
||
parser = argparse.ArgumentParser(description=Description, epilog=Epilog) | ||
parser.add_argument("FILE_IN", help="Input samplesheet file.") | ||
parser.add_argument("FILE_OUT", help="Output file.") | ||
return parser.parse_args(args) | ||
|
||
|
||
def make_dir(path): | ||
if len(path) > 0: | ||
try: | ||
os.makedirs(path) | ||
except OSError as exception: | ||
if exception.errno != errno.EEXIST: | ||
raise exception | ||
|
||
|
||
def print_error(error, context="Line", context_str=""): | ||
error_str = "ERROR: Please check samplesheet -> {}".format(error) | ||
if context != "" and context_str != "": | ||
error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format( | ||
error, context.strip(), context_str.strip() | ||
) | ||
print(error_str) | ||
sys.exit(1) | ||
|
||
|
||
def check_samplesheet(file_in, file_out): | ||
""" | ||
This function checks that the samplesheet follows the following structure: | ||
sample,fastq_1,fastq_2 | ||
SPT5_T0_REP1,SRR1822153_1.fastq.gz,SRR1822153_2.fastq.gz | ||
SPT5_T0_REP2,SRR1822154_1.fastq.gz,SRR1822154_2.fastq.gz | ||
""" | ||
|
||
sample_mapping_dict = {} | ||
with open(file_in, "r", encoding="utf-8-sig") as fin: | ||
## Check header | ||
MIN_COLS = 2 | ||
HEADER = ["sample", "fastq_1", "fastq_2"] | ||
header = [x.strip('"') for x in fin.readline().strip().split(",")] | ||
if header[: len(HEADER)] != HEADER: | ||
print( | ||
f"ERROR: Please check samplesheet header -> {','.join(header)} != {','.join(HEADER)}" | ||
) | ||
sys.exit(1) | ||
|
||
## Check sample entries | ||
for line in fin: | ||
lspl = [x.strip().strip('"') for x in line.strip().split(",")] | ||
|
||
# Check valid number of columns per row | ||
if len(lspl) < len(HEADER): | ||
print_error( | ||
"Invalid number of columns (minimum = {})!".format(len(HEADER)), | ||
"Line", | ||
line, | ||
) | ||
num_cols = len([x for x in lspl if x]) | ||
if num_cols < MIN_COLS: | ||
print_error( | ||
"Invalid number of populated columns (minimum = {})!".format( | ||
MIN_COLS | ||
), | ||
"Line", | ||
line, | ||
) | ||
|
||
## Check sample name entries | ||
sample, fastq_1, fastq_2 = lspl[: len(HEADER)] | ||
if sample.find(" ") != -1: | ||
print( | ||
f"WARNING: Spaces have been replaced by underscores for sample: {sample}" | ||
) | ||
sample = sample.replace(" ", "_") | ||
if not sample: | ||
print_error("Sample entry has not been specified!", "Line", line) | ||
|
||
## Check FastQ file extension | ||
for fastq in [fastq_1, fastq_2]: | ||
if fastq: | ||
if fastq.find(" ") != -1: | ||
print_error("FastQ file contains spaces!", "Line", line) | ||
if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): | ||
print_error( | ||
"FastQ file does not have extension '.fastq.gz' or '.fq.gz'!", | ||
"Line", | ||
line, | ||
) | ||
|
||
## Auto-detect paired-end/single-end | ||
sample_info = [] ## [single_end, fastq_1, fastq_2] | ||
if sample and fastq_1 and fastq_2: ## Paired-end short reads | ||
sample_info = ["0", fastq_1, fastq_2] | ||
elif sample and fastq_1 and not fastq_2: ## Single-end short reads | ||
sample_info = ["1", fastq_1, fastq_2] | ||
else: | ||
print_error("Invalid combination of columns provided!", "Line", line) | ||
|
||
## Create sample mapping dictionary = {sample: [[ single_end, fastq_1, fastq_2,]]} | ||
if sample not in sample_mapping_dict: | ||
sample_mapping_dict[sample] = [sample_info] | ||
else: | ||
if sample_info in sample_mapping_dict[sample]: | ||
print_error("Samplesheet contains duplicate rows!", "Line", line) | ||
else: | ||
sample_mapping_dict[sample].append(sample_info) | ||
|
||
## Write validated samplesheet with appropriate columns | ||
if len(sample_mapping_dict) > 0: | ||
out_dir = os.path.dirname(file_out) | ||
make_dir(out_dir) | ||
with open(file_out, "w") as fout: | ||
fout.write( | ||
",".join( | ||
[ | ||
"sample", | ||
"single_end", | ||
"fastq_1", | ||
"fastq_2", | ||
] | ||
) | ||
+ "\n" | ||
) | ||
for sample in sorted(sample_mapping_dict.keys()): | ||
## Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end | ||
if not all( | ||
x[0] == sample_mapping_dict[sample][0][0] | ||
for x in sample_mapping_dict[sample] | ||
): | ||
print_error( | ||
f"Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end!", | ||
"Sample", | ||
sample, | ||
) | ||
|
||
for idx, val in enumerate(sample_mapping_dict[sample]): | ||
plus_T = ( | ||
f"_T{idx+1}" if len(sample_mapping_dict[sample]) > 1 else "" | ||
) # do not append _T{idx} if not needed | ||
fout.write(",".join([f"{sample}{plus_T}"] + val) + "\n") | ||
else: | ||
print_error(f"No entries to process!", "Samplesheet: {file_in}") | ||
|
||
|
||
def main(args=None): | ||
args = parse_args(args) | ||
check_samplesheet(args.FILE_IN, args.FILE_OUT) | ||
|
||
|
||
if __name__ == "__main__": | ||
sys.exit(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
params { | ||
config_profile_name = 'Test dataset' | ||
config_profile_description = 'Minimal test dataset for pipeline functionality' | ||
|
||
input = 'assets/samplesheet_test.csv' | ||
outdir = 'results/test' | ||
genome = null | ||
|
||
max_cpus = 32 // for GitHub Actions https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources | ||
max_memory = '120.GB' | ||
max_time = '12.h' | ||
|
||
publish_dir_mode = 'symlink' | ||
} | ||
dag { | ||
enabled = true | ||
overwrite = true | ||
file = 'assets/dag.png' | ||
} | ||
report { | ||
enabled = true | ||
overwrite = true | ||
file = "${params.outdir}/pipeline_info/execution_report.html" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
|
||
process BAGEL { | ||
output: | ||
path("output.txt") | ||
|
||
script: | ||
""" | ||
uname -a >> output.txt | ||
which BAGEL.py >> output.txt | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
|
||
process DRUGZ { | ||
output: | ||
path("output.txt") | ||
|
||
script: | ||
""" | ||
uname -a >> output.txt | ||
which drugz.py >> output.txt | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
|
||
process MAGECK_COUNT { | ||
|
||
} | ||
|
||
process MAGECK { | ||
output: | ||
path("output.txt") | ||
|
||
script: | ||
""" | ||
uname -a >> output.txt | ||
which mageck >> output.txt | ||
""" | ||
} | ||
|
||
|
||
process VISPR { | ||
output: | ||
path("output.txt") | ||
|
||
script: | ||
""" | ||
uname -a >> output.txt | ||
which vispr >> output.txt | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
// adapted from https://github.com/nf-core/chipseq/blob/51eba00b32885c4d0bec60db3cb0a45eb61e34c5/modules/local/samplesheet_check.nf | ||
process SAMPLESHEET_CHECK { | ||
tag "$samplesheet" | ||
|
||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/python:3.8.3' : | ||
'quay.io/biocontainers/python:3.8.3' }" | ||
|
||
input: | ||
path samplesheet | ||
|
||
output: | ||
path '*.csv' , emit: csv | ||
path "versions.yml", emit: versions | ||
|
||
script: | ||
""" | ||
check_samplesheet.py \\ | ||
$samplesheet \\ | ||
samplesheet.valid.csv | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
python: \$(python --version | sed 's/Python //g') | ||
END_VERSIONS | ||
""" | ||
} |
Oops, something went wrong.