Skip to content

Commit

Permalink
Work on read overlap local realignment
Browse files Browse the repository at this point in the history
  • Loading branch information
jonperdomo committed Oct 3, 2024
1 parent 72a7f75 commit 1791529
Show file tree
Hide file tree
Showing 6 changed files with 167 additions and 88 deletions.
26 changes: 3 additions & 23 deletions __main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,24 +153,6 @@ def main():
required=False,
help=argparse.SUPPRESS
)

# Turn off CIGAR string SV detection (split-read only)
parser.add_argument(
"--disable-cigar",
required=False,
action="store_true",
default=False,
help=argparse.SUPPRESS
)

# Turn off SNP-based CNV predictions for SV classification.
parser.add_argument(
"--disable-snp-cnv",
required=False,
action="store_true",
default=False,
help=argparse.SUPPRESS
)

# ----------------------------------------------------------------------- #

Expand All @@ -195,8 +177,8 @@ def main():
log.warning("Short read alignment file not provided. Using long read alignment file in its place.")
args.short_read = args.long_read

# SNPs file is required unless SNP-based CNV predictions are disabled.
if (args.snps is None and not args.disable_snp_cnv):
# SNPs file is required
if (args.snps is None):
log.error("Please provide the SNPs file.")
arg_error = True

Expand Down Expand Up @@ -231,8 +213,6 @@ def main():
input_data.setAlleleFreqFilepaths(args.pfb)
input_data.setHMMFilepath(args.hmm)
input_data.setOutputDir(args.output)
input_data.setDisableCIGAR(args.disable_cigar)
input_data.setDisableSNPCNV(args.disable_snp_cnv)
input_data.saveCNVData(args.save_cnv)
input_data.setWindowSize(args.window_size)

Expand All @@ -246,7 +226,7 @@ def main():
# cnv_data_path = os.path.join(args.output, "cnv_data.tsv")

# Generate python-based CNV plots if SNP-based CNV predictions are enabled
if (args.save_cnv and not args.disable_snp_cnv):
if (args.save_cnv):
log.info("Generating CNV plots...")

# Find all TSV files in the output directory
Expand Down
11 changes: 0 additions & 11 deletions include/input_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,6 @@ class InputData {
void setThreadCount(int thread_count);
int getThreadCount();

// Disable CIGAR string SV calling. This is useful for testing.
void setDisableCIGAR(bool disable_cigar);
bool getDisableCIGAR();

// Disable SNP-based CNV calling. This is useful for calling SVs on
// assemblies since alignment calling is sufficient.
void setDisableSNPCNV(bool disable_snp_cnv);
bool getDisableSNPCNV();

// Set the whole genome flag to true if the entire genome is being
// analyzed.
void setWholeGenome(bool whole_genome);
Expand Down Expand Up @@ -137,8 +128,6 @@ class InputData {
std::map<std::string, double> chr_cov; // Map of pre-calculated mean coverage values for each chromosome
int thread_count;
std::string hmm_filepath;
bool disable_cigar;
bool disable_snp_cnv;
std::string cnv_filepath;
bool whole_genome; // True if the entire genome is being analyzed
bool verbose; // True if verbose output is enabled
Expand Down
10 changes: 6 additions & 4 deletions include/sv_caller.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
#include <future>
/// @endcond

// SV candidate alignment data (chr, start, end, sequence)
using AlignmentData = std::tuple<std::string, int64_t, int64_t, std::string>;
// SV candidate alignment data (chr, start, end, sequence, query start, query
// end, mismatch rate)
using AlignmentData = std::tuple<std::string, int64_t, int64_t, std::string, int32_t, int32_t, double>;
using AlignmentVector = std::vector<AlignmentData>;

// Query map (query name, alignment vector)
Expand All @@ -31,8 +32,9 @@ class SVCaller {
InputData* input_data;
std::mutex sv_mtx; // Mutex for locking the SV data

// Detect SVs from long read alignments in the CIGAR string
void detectSVsFromCIGAR(bam_hdr_t* header, bam1_t* alignment, SVData& sv_calls);
// Detect SVs from the CIGAR string of a read alignment, and return the
// mismatch rate, and the start and end positions of the query sequence
std::tuple<double, int32_t, int32_t> detectSVsFromCIGAR(bam_hdr_t* header, bam1_t* alignment, SVData& sv_calls, bool is_primary);

// Detect SVs at a region from long read alignments. This is used for
// whole genome analysis running in parallel.
Expand Down
13 changes: 5 additions & 8 deletions src/contextsv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,11 @@ int ContextSV::run()
SVCaller sv_caller(*this->input_data);
SVData sv_calls = sv_caller.run();

// Classify SVs based on SNP CNV predictions if enabled
if (this->input_data->getDisableSNPCNV() == false) {
// Call CNVs at SNP positions
std::cout << "Running CNV predictions..." << std::endl;
CNVCaller cnv_caller(*this->input_data);
cnv_caller.run(sv_calls);
std::cout << "CNV predictions complete." << std::endl;
}
// Classify SVs based on copy number predictions
std::cout << "Running copy number predictions..." << std::endl;
CNVCaller cnv_caller(*this->input_data);
cnv_caller.run(sv_calls);
std::cout << "Copy number predictions complete." << std::endl;

// Print the total number of SVs called
std::cout << "Total SVs called: " << sv_calls.totalCalls() << std::endl;
Expand Down
22 changes: 0 additions & 22 deletions src/input_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ InputData::InputData()
this->thread_count = 1;
this->hmm_filepath = "data/wgs.hmm";
this->whole_genome = true;
this->disable_cigar = false;
this->disable_snp_cnv = false;
this->verbose = false;
this->save_cnv_data = false;
}
Expand Down Expand Up @@ -458,26 +456,6 @@ void InputData::setHMMFilepath(std::string filepath)
}
}

void InputData::setDisableCIGAR(bool disable_cigar)
{
this->disable_cigar = disable_cigar;
}

bool InputData::getDisableCIGAR()
{
return this->disable_cigar;
}

void InputData::setDisableSNPCNV(bool disable_snp_cnv)
{
this->disable_snp_cnv = disable_snp_cnv;
}

bool InputData::getDisableSNPCNV()
{
return this->disable_snp_cnv;
}

void InputData::setWholeGenome(bool whole_genome)
{
this->whole_genome = whole_genome;
Expand Down
Loading

0 comments on commit 1791529

Please sign in to comment.