diff --git a/README.md b/README.md index 6a7c1b9..abd48e8 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,11 @@ There are two required files for running breakpoint analysis: * `metadata` which is a tab separated file containing `sample`, band for `region1` and band for `region2` * `cytobands` which contains the genomic regions and their corresponding cytogenetic bands +The cytobands file that is downloaded from the UCSC site requires an +additional column to work with the analysis pipeline. The script +`workflow/scripts/fix_cytoband_file.py` can be used to append the +additional column. + ### Run the workflow The basecaller uses `guppy` and a GPU to convert FAST5 files to FASTQ files. ``` diff --git a/workflow/scripts/createivf_breakpoints.py b/workflow/scripts/createivf_breakpoints.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/fix_cytoband_file.py b/workflow/scripts/fix_cytoband_file.py new file mode 100755 index 0000000..3c2735e --- /dev/null +++ b/workflow/scripts/fix_cytoband_file.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +""" +Append a column to the UCSC cytoband file +""" + +import os +import sys +import argparse +import csv + + +def init_args(): + """ + Initialize command line arguments + """ + description = 'Append chromosome identifier column to UCSC cytoband file' + parser = argparse.ArgumentParser(description=description) + parser.add_argument('-f', '--file', required=True, + help='full path to UCSC cytoband file') + return parser.parse_args() + + +def main(): + """ + Main program + """ + args = init_args() + fieldnames = ['chrom', 'start', 'end', 'band', 'desc'] + with open(args.file, 'r') as ifh: + reader = csv.DictReader(ifh, delimiter='\t', fieldnames=fieldnames) + for line in reader: + chr_cyto = ''.join(['chr', line['band']]) + print('\t'.join([ + line['chrom'], + line['start'], + line['end'], + line['band'], + line['desc'], + chr_cyto + ])) + ifh.close() + + +if __name__ == '__main__': + main() + + +#__END__ \ No newline at end of file