From bc5fd3930c03b38eb38b686a2b5fe6815d007ebe Mon Sep 17 00:00:00 2001 From: arangrhie Date: Thu, 24 Jun 2021 15:09:55 -0400 Subject: [PATCH] accept phased GT --- src/merfin/merfin-globals.C | 2 +- src/merfin/merfin-variants.C | 21 ++++++++++----------- src/merfin/vcf.C | 8 +++++++- src/merfin/vcfRecord.H | 10 +++++----- 4 files changed, 23 insertions(+), 18 deletions(-) diff --git a/src/merfin/merfin-globals.C b/src/merfin/merfin-globals.C index cc593be..d0dd0b8 100644 --- a/src/merfin/merfin-globals.C +++ b/src/merfin/merfin-globals.C @@ -39,7 +39,7 @@ merfinGlobal::load_Kmetric(void) { char *line = nullptr; while (AS_UTL_readLine(line, lineLen, lineMax, F.file())) { - splitToWords S(line, splitLetter, ','); + splitToWords S(line, ','); if (S.numWords() == 2) { uint32 k = S.touint32(0); diff --git a/src/merfin/merfin-variants.C b/src/merfin/merfin-variants.C index c861246..2a1393c 100644 --- a/src/merfin/merfin-variants.C +++ b/src/merfin/merfin-variants.C @@ -53,8 +53,9 @@ traverse(uint32 idx, replaced = candidate; replaced.replace(refIdxList[idx], refLenList[idx], hap); - //fprintf(stderr, "REPLACE candidate '%s' ->\n", candidate.c_str()); - //fprintf(stderr, "REPLACE replaced '%s' by change %u-%u to '%s'\n", replaced.c_str(), refIdxList[idx], refLenList[idx], hap); + // Debug traversed variant we are testing + // fprintf(stderr, "REPLACE candidate '%s' ->\n", candidate.c_str()); + // fprintf(stderr, "REPLACE replaced '%s' by change %u-%u to '%s'\n", replaced.c_str(), refIdxList[idx], refLenList[idx], hap); // Apply to the rest of the positions, after skipping overlaps // refIdx in overlaps should remain as they were as we are using ref allele at these sites anyway @@ -181,12 +182,12 @@ processVariants(void *G, void *T, void *S) { // Debug report the mapPosHap - //fprintf(stderr, "\n"); - //fprintf(stderr, "[ DEBUG ] :: %s : %u - %u\n", s->seq.ident(), rStart, rEnd); - //fprintf(stderr, "[ DEBUG ] :: gts.size = %lu | ", gts.size()); - //for (uint32 i = 0; i < gts.size(); i++) - // fprintf(stderr, "gt->_pos = %u ", gts[i]->_pos); - //fprintf(stderr, "\n"); + // fprintf(stderr, "\n"); + // fprintf(stderr, "[ DEBUG ] :: %s : %u - %u\n", s->seq.ident(), rStart, rEnd); + // fprintf(stderr, "[ DEBUG ] :: gts.size = %lu | ", gts.size()); + // for (uint32 i = 0; i < gts.size(); i++) + // fprintf(stderr, "gt->_pos = %u ", gts[i]->_pos); + // fprintf(stderr, "\n"); // Load mapPosHap @@ -197,7 +198,6 @@ processVariants(void *G, void *T, void *S) { refLenList.push_back(gt->_refLen); // add alleles. alleles[0] is always the ref allele -#warning is this copying the whole vector? mapPosHap[i] = gt->_alleles; } @@ -216,7 +216,7 @@ processVariants(void *G, void *T, void *S) { continue; } - //fprintf(stderr, "%s\n", refTemplate); + // fprintf(stderr, "%s\n", refTemplate); // @@ -250,7 +250,6 @@ processVariants(void *G, void *T, void *S) { seqMer->getMaxAbsK(idx), seqMer->getMedAbsK(idx), seqMer->getAvgAbsK(idx), - //seqMer->getAvgAbsdK(idx, RefAvgK), seqMer->getTotdK(idx)); // new vcf records diff --git a/src/merfin/vcf.C b/src/merfin/vcf.C index 3b14cbe..efe0213 100644 --- a/src/merfin/vcf.C +++ b/src/merfin/vcf.C @@ -38,13 +38,19 @@ gtAllele::gtAllele(vcfRecord *r) { return; } - splitToWords GT(_record->_arr_samples[0], splitLetter, '/'); + // Make this accept both '/' and '|' as delimiters + + splitToWords GT; + GT.split(_record->_arr_samples[0], "|/"); _alleles.push_back(_record->get_ref()); // _alleles[0] is ALWAYS the reference allele. // Add alternate alleles to the list, as long as they aren't already there. for (uint32 ii=0; ii