Skip to content

Commit

Permalink
refactor bam references to reads
Browse files Browse the repository at this point in the history
  • Loading branch information
matthdsm committed Mar 24, 2021
1 parent e8009fe commit 2d65a51
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 15 deletions.
26 changes: 13 additions & 13 deletions wisecondorX/convert_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,24 @@
import sys

'''
Converts bam file to numpy array by transforming
Converts reads file to numpy array by transforming
individual reads to counts per bin.
'''


def convert_bam(args):
def convert_reads(args):
bins_per_chr = dict()
for chr in range(1, 25):
bins_per_chr[str(chr)] = None

logging.info('Importing data ...')

if args.infile.endswith(".sam"):
bam_file = pysam.AlignmentFile(args.infile, 'r')
reads_file = pysam.AlignmentFile(args.infile, 'r')
elif args.infile.endswith(".bam"):
bam_file = pysam.AlignmentFile(args.infile, 'rb')
reads_file = pysam.AlignmentFile(args.infile, 'rb')
elif args.infile.endswith(".cram"):
bam_file = pysam.AlignmentFile(args.infile, 'rc')
reads_file = pysam.AlignmentFile(args.infile, 'rc')
else:
logging.error(
"Unsupported input file type. Make sure your input filename has a correct extension (sam/bam/cram)")
Expand All @@ -38,9 +38,9 @@ def convert_bam(args):
larp = -1
larp2 = -1

logging.info('Converting bam ... This might take a while ...')
logging.info('Converting reads ... This might take a while ...')

for index, chr in enumerate(bam_file.references):
for index, chr in enumerate(reads_file.references):

chr_name = chr
if chr_name[:3].lower() == 'chr':
Expand All @@ -49,9 +49,9 @@ def convert_bam(args):
continue

logging.info('Working at {}; processing {} bins'
.format(chr, int(bam_file.lengths[index] / float(args.binsize) + 1)))
counts = np.zeros(int(bam_file.lengths[index] / float(args.binsize) + 1), dtype=np.int32)
bam_chr = bam_file.fetch(chr)
.format(chr, int(reads_file.lengths[index] / float(args.binsize) + 1)))
counts = np.zeros(int(reads_file.lengths[index] / float(args.binsize) + 1), dtype=np.int32)
bam_chr = reads_file.fetch(chr)

if chr_name == 'X':
chr_name = '23'
Expand Down Expand Up @@ -91,9 +91,9 @@ def convert_bam(args):
bins_per_chr[chr_name] = counts
reads_kept += sum(counts)

qual_info = {'mapped': bam_file.mapped,
'unmapped': bam_file.unmapped,
'no_coordinate': bam_file.nocoordinate,
qual_info = {'mapped': reads_file.mapped,
'unmapped': reads_file.unmapped,
'no_coordinate': reads_file.nocoordinate,
'filter_rmdup': reads_rmdup,
'filter_mapq': reads_mapq,
'pre_retro': reads_seen,
Expand Down
4 changes: 2 additions & 2 deletions wisecondorX/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import numpy as np

from wisecondorX.convert_tools import convert_bam
from wisecondorX.convert_tools import convert_reads
from wisecondorX.newref_control import tool_newref_prep, tool_newref_main, tool_newref_merge
from wisecondorX.newref_tools import train_gender_model, get_mask
from wisecondorX.overall_tools import gender_correct, scale_sample
Expand All @@ -20,7 +20,7 @@
def tool_convert(args):
logging.info('Starting conversion')

sample, qual_info = convert_bam(args)
sample, qual_info = convert_reads(args)
np.savez_compressed(args.outfile,
binsize=args.binsize,
sample=sample,
Expand Down

0 comments on commit 2d65a51

Please sign in to comment.