diff --git a/README.md b/README.md index a7e6ec4..cebc9f6 100755 --- a/README.md +++ b/README.md @@ -171,9 +171,9 @@ technique (the test case vs the reference cases). This file contains aberrant segments, defined by the [`--beta`](#stage-3-predict-copy-number-alterations) or [`--zscore`](#stage-3-predict-copy-number-alterations) parameters. -### ID_chr_statistics.bed +### ID_statistics.bed -This file contains some interesting statistics for each chromosome. The definition of the Z-scores matches the one from +This file contains some interesting statistics (per chromosome and overall). The definition of the Z-scores matches the one from the 'ID_segments.bed'. Particularly interesting for NIPT. # Dependencies diff --git a/setup.py b/setup.py index 8f0c863..4e2aeb3 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ #! /usr/bin/env python from setuptools import setup, find_packages -version = '1.2.0' +version = '1.2.1' dl_version = 'master' if 'dev' in version else '{}'.format(version) setup( diff --git a/wisecondorX/main.py b/wisecondorX/main.py index e4f69cd..49888d6 100755 --- a/wisecondorX/main.py +++ b/wisecondorX/main.py @@ -152,37 +152,36 @@ def tool_test(args): sample = scale_sample(sample, int( sample_file['binsize'].item()), int(ref_file['binsize'])) + gender = predict_gender(sample, ref_file['trained_cutoff']) if not ref_file['is_nipt']: - actual_gender = predict_gender(sample, ref_file['trained_cutoff']) if args.gender: - actual_gender = args.gender - sample = gender_correct(sample, actual_gender) + gender = args.gender + sample = gender_correct(sample, gender) + ref_gender = gender else: - actual_gender = 'F' - - if args.gender: - actual_gender = args.gender - - ref_gender = actual_gender + if args.gender: + gender = args.gender + ref_gender = 'F' logging.info('Normalizing autosomes ...') results_r, results_z, results_w, ref_sizes, m_lr, m_z = normalize( args, sample, ref_file, 'A') - if not ref_file['has_male'] and actual_gender == 'M': - logging.warning('This sample is male, whilst the reference is created with fewer than 5 males. ' - 'The female gonosomal reference will be used for X predictions. Note that these might ' - 'not be accurate. If the latter is desired, create a new reference and include more ' - 'male samples.') - ref_gender = 'F' - - elif not ref_file['has_female'] and actual_gender == 'F': - logging.warning('This sample is female, whilst the reference is created with fewer than 5 females. ' - 'The male gonosomal reference will be used for XY predictions. Note that these might ' - 'not be accurate. If the latter is desired, create a new reference and include more ' - 'female samples.') - ref_gender = 'M' + if not ref_file['is_nipt']: + if not ref_file['has_male'] and gender == 'M': + logging.warning('This sample is male, whilst the reference is created with fewer than 5 males. ' + 'The female gonosomal reference will be used for X predictions. Note that these might ' + 'not be accurate. If the latter is desired, create a new reference and include more ' + 'male samples.') + ref_gender = 'F' + + elif not ref_file['has_female'] and gender == 'F': + logging.warning('This sample is female, whilst the reference is created with fewer than 5 females. ' + 'The male gonosomal reference will be used for XY predictions. Note that these might ' + 'not be accurate. If the latter is desired, create a new reference and include more ' + 'female samples.') + ref_gender = 'M' logging.info('Normalizing gonosomes ...') @@ -198,7 +197,7 @@ def tool_test(args): 'binsize': int(ref_file['binsize']), 'n_reads': n_reads, 'ref_gender': ref_gender, - 'actual_gender': actual_gender, + 'gender': gender, 'mask': ref_file['mask.{}'.format(ref_gender)], 'bins_per_chr': ref_file['bins_per_chr.{}'.format(ref_gender)], 'masked_bins_per_chr': ref_file['masked_bins_per_chr.{}'.format(ref_gender)], diff --git a/wisecondorX/newref_tools.py b/wisecondorX/newref_tools.py index 90898b0..4f03567 100644 --- a/wisecondorX/newref_tools.py +++ b/wisecondorX/newref_tools.py @@ -33,7 +33,7 @@ def train_gender_model(args, samples): if args.plotyfrac is not None: import matplotlib.pyplot as plt fig, ax = plt.subplots(figsize=(16, 6)) - ax.hist(y_fractions, bins=100, normed=True) + ax.hist(y_fractions, bins=100, density=True) ax.plot(gmm_x, gmm_y, 'r-', label='Gaussian mixture fit') ax.set_xlim([0, 0.02]) ax.legend(loc='best') diff --git a/wisecondorX/predict_output.py b/wisecondorX/predict_output.py index 4a5ef9f..17153de 100644 --- a/wisecondorX/predict_output.py +++ b/wisecondorX/predict_output.py @@ -88,9 +88,9 @@ def _generate_segments_and_aberrations_bed(rem_input, results): int(segment[2] * rem_input['binsize']), segment[4], segment[3]] segments_file.write('{}\n'.format('\t'.join([str(x) for x in row]))) - ploidy = 2 - if (chr_name == 'X' or chr_name == 'Y') and rem_input['actual_gender'] == 'M': + ploidy = 2 + if (chr_name == 'X' or chr_name == 'Y') and rem_input['ref_gender'] == 'M': ploidy = 1 if rem_input['args'].beta is not None: if float(segment[4]) > __get_aberration_cutoff(rem_input['args'].beta, ploidy)[1]: @@ -146,7 +146,7 @@ def _generate_chr_statistics_file(rem_input, results): stats_file.write('\t'.join([str(x) for x in row]) + '\n') stats_file.write('Gender based on --yfrac (or manually overridden by --gender): {}\n' - .format(str(rem_input['actual_gender']))) + .format(str(rem_input['gender']))) stats_file.write('Number of reads: {}\n' .format(str(rem_input['n_reads'])))