diff --git a/wisecondorX/convert_tools.py b/wisecondorX/convert_tools.py index 5b8dfcd..452a183 100644 --- a/wisecondorX/convert_tools.py +++ b/wisecondorX/convert_tools.py @@ -7,12 +7,12 @@ import sys ''' -Converts bam file to numpy array by transforming +Converts reads file to numpy array by transforming individual reads to counts per bin. ''' -def convert_bam(args): +def convert_reads(args): bins_per_chr = dict() for chr in range(1, 25): bins_per_chr[str(chr)] = None @@ -20,11 +20,11 @@ def convert_bam(args): logging.info('Importing data ...') if args.infile.endswith(".sam"): - bam_file = pysam.AlignmentFile(args.infile, 'r') + reads_file = pysam.AlignmentFile(args.infile, 'r') elif args.infile.endswith(".bam"): - bam_file = pysam.AlignmentFile(args.infile, 'rb') + reads_file = pysam.AlignmentFile(args.infile, 'rb') elif args.infile.endswith(".cram"): - bam_file = pysam.AlignmentFile(args.infile, 'rc') + reads_file = pysam.AlignmentFile(args.infile, 'rc') else: logging.error( "Unsupported input file type. Make sure your input filename has a correct extension (sam/bam/cram)") @@ -38,9 +38,9 @@ def convert_bam(args): larp = -1 larp2 = -1 - logging.info('Converting bam ... This might take a while ...') + logging.info('Converting reads ... This might take a while ...') - for index, chr in enumerate(bam_file.references): + for index, chr in enumerate(reads_file.references): chr_name = chr if chr_name[:3].lower() == 'chr': @@ -49,9 +49,9 @@ def convert_bam(args): continue logging.info('Working at {}; processing {} bins' - .format(chr, int(bam_file.lengths[index] / float(args.binsize) + 1))) - counts = np.zeros(int(bam_file.lengths[index] / float(args.binsize) + 1), dtype=np.int32) - bam_chr = bam_file.fetch(chr) + .format(chr, int(reads_file.lengths[index] / float(args.binsize) + 1))) + counts = np.zeros(int(reads_file.lengths[index] / float(args.binsize) + 1), dtype=np.int32) + bam_chr = reads_file.fetch(chr) if chr_name == 'X': chr_name = '23' @@ -91,9 +91,9 @@ def convert_bam(args): bins_per_chr[chr_name] = counts reads_kept += sum(counts) - qual_info = {'mapped': bam_file.mapped, - 'unmapped': bam_file.unmapped, - 'no_coordinate': bam_file.nocoordinate, + qual_info = {'mapped': reads_file.mapped, + 'unmapped': reads_file.unmapped, + 'no_coordinate': reads_file.nocoordinate, 'filter_rmdup': reads_rmdup, 'filter_mapq': reads_mapq, 'pre_retro': reads_seen, diff --git a/wisecondorX/main.py b/wisecondorX/main.py index 49888d6..01183a0 100755 --- a/wisecondorX/main.py +++ b/wisecondorX/main.py @@ -8,7 +8,7 @@ import numpy as np -from wisecondorX.convert_tools import convert_bam +from wisecondorX.convert_tools import convert_reads from wisecondorX.newref_control import tool_newref_prep, tool_newref_main, tool_newref_merge from wisecondorX.newref_tools import train_gender_model, get_mask from wisecondorX.overall_tools import gender_correct, scale_sample @@ -20,7 +20,7 @@ def tool_convert(args): logging.info('Starting conversion') - sample, qual_info = convert_bam(args) + sample, qual_info = convert_reads(args) np.savez_compressed(args.outfile, binsize=args.binsize, sample=sample,