From 8874e31ddda89d0039b9f92c78a08d27dc51dfdf Mon Sep 17 00:00:00 2001 From: Martin Steinegger Date: Thu, 5 Dec 2019 15:15:00 -0500 Subject: [PATCH] Fix warnings --- lib/flash/combine_reads.cpp | 11 +- lib/flash/util.cpp | 2 + src/LocalCommandDeclarations.h | 1 - src/assembler/CMakeLists.txt | 1 - src/assembler/assembleresult.cpp | 10 +- src/assembler/correctreads.cpp | 260 ------------------------- src/assembler/hybridassembleresult.cpp | 10 +- src/plass.cpp | 7 - 8 files changed, 17 insertions(+), 285 deletions(-) delete mode 100644 src/assembler/correctreads.cpp diff --git a/lib/flash/combine_reads.cpp b/lib/flash/combine_reads.cpp index 6ecac85e..fffe1af9 100644 --- a/lib/flash/combine_reads.cpp +++ b/lib/flash/combine_reads.cpp @@ -54,16 +54,15 @@ # define __noreturn __attribute__((noreturn)) # define __format(type, format_str, args_start) \ __attribute__((format(type, format_str, args_start))) -# define max(a,b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); _a > _b ? _a : _b; }) -# define min(a,b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); _a < _b ? _a : _b; }) -# define inline inline __attribute__((always_inline)) + #else # define __noreturn # define __cold # define __format(type, format_str, args_start) +#endif + # define max(a,b) (((a) > (b)) ? (a) : (b)) # define min(a,b) (((a) < (b)) ? (a) : (b)) -#endif /* Sum the values an 8 x 8 bit vector and return a 32-bit result. */ static inline uint32_t @@ -360,12 +359,12 @@ generate_combined_read(const struct read *read_1, char * combined_seq; char * combined_qual; - if (combined_read->seq_bufsz < combined_seq_len) { + if (combined_read->seq_bufsz < static_cast(combined_seq_len)) { combined_read->seq = (char *)xrealloc(combined_read->seq, combined_seq_len); combined_read->seq_bufsz = combined_seq_len; } - if (combined_read->qual_bufsz < combined_seq_len) { + if (combined_read->qual_bufsz < static_cast(combined_seq_len)) { combined_read->qual = (char *)xrealloc(combined_read->qual, combined_seq_len); combined_read->qual_bufsz = combined_seq_len; diff --git a/lib/flash/util.cpp b/lib/flash/util.cpp index 6ba3c8f7..ca639397 100644 --- a/lib/flash/util.cpp +++ b/lib/flash/util.cpp @@ -60,6 +60,7 @@ xmalloc(size_t size) return p; } fprintf(stderr, "Out of memory: tried to allocate %zu bytes", size); + return NULL; } @@ -78,5 +79,6 @@ xrealloc(void *ptr, size_t size) return p; } fprintf(stderr, "Out of memory: tried to reallocate %zu bytes", size); + return NULL; } diff --git a/src/LocalCommandDeclarations.h b/src/LocalCommandDeclarations.h index ef808252..e27e1e03 100644 --- a/src/LocalCommandDeclarations.h +++ b/src/LocalCommandDeclarations.h @@ -11,7 +11,6 @@ extern int hybridassembleresults(int argc, const char** argv, const Command &com extern int filternoncoding(int argc, const char** argv, const Command &command); extern int mergereads(int argc, const char** argv, const Command &command); extern int findassemblystart(int argc, const char** argv, const Command &command); -extern int correctreads(int argc, const char** argv, const Command &command); extern int cyclecheck(int argc, const char** argv, const Command &command); extern int createhdb(int argc, const char** argv, const Command &command); #endif diff --git a/src/assembler/CMakeLists.txt b/src/assembler/CMakeLists.txt index 9986ce07..a1e07d08 100644 --- a/src/assembler/CMakeLists.txt +++ b/src/assembler/CMakeLists.txt @@ -1,7 +1,6 @@ set(assembler_source_files assembler/assembleresult.cpp assembler/hybridassembleresult.cpp - assembler/correctreads.cpp assembler/findassemblystart.cpp assembler/filternoncoding.cpp assembler/mergereads.cpp diff --git a/src/assembler/assembleresult.cpp b/src/assembler/assembleresult.cpp index 47a97fd9..0f333fe1 100644 --- a/src/assembler/assembleresult.cpp +++ b/src/assembler/assembleresult.cpp @@ -45,8 +45,8 @@ Matcher::result_t selectFragmentToExtend(QueueByScore &alignments, alignments.pop(); size_t dbKey = res.dbKey; const bool notRightStartAndLeftStart = !(res.dbStartPos == 0 && res.qStartPos == 0 ); - const bool rightStart = res.dbStartPos == 0 && (res.dbEndPos != res.dbLen-1); - const bool leftStart = res.qStartPos == 0 && (res.qEndPos != res.qLen-1); + const bool rightStart = res.dbStartPos == 0 && (res.dbEndPos != static_cast(res.dbLen)-1); + const bool leftStart = res.qStartPos == 0 && (res.qEndPos != static_cast(res.qLen)-1); const bool isNotIdentity = (dbKey != queryKey); if ((rightStart || leftStart) && notRightStartAndLeftStart && isNotIdentity){ @@ -205,7 +205,7 @@ int doassembly(LocalParameters &par) { continue; } } else if (besttHitToExtend.qStartPos == 0) { - if (besttHitToExtend.dbStartPos <= leftQueryOffsetToUse) { + if (besttHitToExtend.dbStartPos <= static_cast(leftQueryOffsetToUse)) { continue; } } @@ -236,7 +236,7 @@ int doassembly(LocalParameters &par) { } // check right extension or reverse left - if (dbStartPos == 0 && qEndPos == (querySeqLen - 1) ) { + if (dbStartPos == 0 && qEndPos == (static_cast(querySeqLen) - 1) ) { if((!isReverse && queryCouldBeExtendedRight == true) || (isReverse && queryCouldBeExtendedLeft == true)) { float alnLen = qEndPos - qStartPos; float scorePerCol = static_cast(score) / (alnLen+0.5); @@ -278,7 +278,7 @@ int doassembly(LocalParameters &par) { } //check left extension - } else if (qStartPos == 0 && dbEndPos == (targetSeqLen - 1)) { + } else if (qStartPos == 0 && dbEndPos == (static_cast(targetSeqLen) - 1)) { if ((!isReverse && queryCouldBeExtendedLeft == true)|| (isReverse && queryCouldBeExtendedRight == true)) { float alnLen = qEndPos - qStartPos; float scorePerCol = static_cast(score) / (alnLen+0.5); diff --git a/src/assembler/correctreads.cpp b/src/assembler/correctreads.cpp deleted file mode 100644 index 8a5e188e..00000000 --- a/src/assembler/correctreads.cpp +++ /dev/null @@ -1,260 +0,0 @@ -#include "NucleotideMatrix.h" -#include "Sequence.h" -#include "Debug.h" -#include "DBReader.h" -#include "DBWriter.h" -#include "LocalParameters.h" -#include "simd.h" - -#ifdef OPENMP -#include -#endif - -#include -#include - -#define HI_NIBBLE(b) (((b) >> 4) & 0x0F) -#define LO_NIBBLE(b) ((b) & 0x0F) - -struct Kmer { - struct TwoLetters { - unsigned int first : 4; - unsigned int last : 4; - }; - - uint64_t kmer; - unsigned int id; - TwoLetters firstAndLastLetter; - short pos; - bool isReverse; - Kmer() {} - - static bool compareRepSequenceAndIdAndPos(const Kmer &first, const Kmer &second) { - if (first.kmer < second.kmer) - return true; - if (second.kmer < first.kmer) - return false; - if (first.id < second.id) - return true; - if (second.id < first.id) - return false; - if (first.pos < second.pos) - return true; - if (second.pos < first.pos) - return false; - return false; - } -}; - -std::pair getSubstituion(const char lastLetter, const size_t currKmerIndex, const bool isReverse, - const char * reverseLookup, Kmer *pKmer, const size_t currPos, const size_t maxSize); - - -void printKmer(size_t idx, int size); - -int correctreads(int argc, const char **argv, const Command& command) { - LocalParameters& par = LocalParameters::getLocalInstance(); - par.kmerSize = 5; - par.parseParameters(argc, argv, command, true, 0, 0); - - Debug(Debug::INFO) << "Sequence database: " << par.db1 << "\n"; - DBReader seqDb (par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_DATA|DBReader::USE_INDEX); - seqDb.open(DBReader::NOSORT); - NucleotideMatrix subMat(par.scoringMatrixFile.nucleotides, 1.0, 0.0); - Debug(Debug::INFO) << "Output database: " << par.db2 << "\n"; - DBWriter dbw(par.db2.c_str(), par.db2Index.c_str(), static_cast(par.threads), par.compressed, Parameters::DBTYPE_NUCLEOTIDES); - dbw.open(); - const unsigned int BUFFER_SIZE = 1024; - - //ACTG 0123 TGAC - char reverseLookup[4] = {2, 3, 0, 1}; - Timer timer; - timer.reset(); - size_t offset = 0; - Kmer * allKmers = new Kmer[seqDb.getAminoAcidDBSize()*2]; - // Create a 1D Tensor on length 20 for input data. - Debug(Debug::INFO) << "Extract kmers\n"; - - Debug::Progress progress(seqDb.getSize()); -#pragma omp parallel - { - unsigned int thread_idx = 0; -#ifdef OPENMP - thread_idx = static_cast(omp_get_thread_num()); -#endif - - Kmer* threadKmerBuffer = new Kmer[BUFFER_SIZE]; - size_t bufferPos = 0; - Sequence seq(par.maxSeqLen, seqDb.getDbtype(), &subMat, par.kmerSize, false, false); - -#pragma omp for schedule(static) - for (size_t id = 0; id < seqDb.getSize(); id++) { - progress.updateProgress(); - - char *seqData = seqDb.getData(id, thread_idx); - unsigned int seqLen = seqDb.getSeqLen(id); - - unsigned int dbKey = seqDb.getDbKey(id); - unsigned int pos = 0; - while (pos < (seqLen - (par.kmerSize -1))) { - const char *kmer = seqData+pos; - uint64_t kmerIdx = 0; - for(size_t kmerPos = 0; kmerPos < par.kmerSize; kmerPos++){ - kmerIdx = kmerIdx << 2; - kmerIdx = kmerIdx | (kmer[kmerPos]>>1)&3; - } - threadKmerBuffer[bufferPos].kmer = kmerIdx; - threadKmerBuffer[bufferPos].pos = static_cast(pos); - char firstLetter = static_cast((kmer[0]>>1)&3); - char lastLetter = static_cast((kmer[par.kmerSize-1]>>1)&3); - threadKmerBuffer[bufferPos].firstAndLastLetter.first = firstLetter; - threadKmerBuffer[bufferPos].firstAndLastLetter.last = lastLetter; - threadKmerBuffer[bufferPos].isReverse=false; - bufferPos++; - - size_t revKmerIdx = Util::revComplement(kmerIdx, par.kmerSize); -// printKmer(kmerIdx, par.kmerSize); -// printKmer(revKmerIdx, par.kmerSize); - threadKmerBuffer[bufferPos].kmer = revKmerIdx; - threadKmerBuffer[bufferPos].firstAndLastLetter.first=reverseLookup[lastLetter]; - threadKmerBuffer[bufferPos].firstAndLastLetter.last =reverseLookup[firstLetter]; - threadKmerBuffer[bufferPos].isReverse=true; - - if (bufferPos+1 >= BUFFER_SIZE) { - size_t writeOffset = __sync_fetch_and_add(&offset, bufferPos); - memcpy(allKmers + writeOffset, threadKmerBuffer, sizeof(Kmer) * bufferPos); - bufferPos = 0; - } - bufferPos++; - pos++; - } - } - if(bufferPos > 0){ - size_t writeOffset = __sync_fetch_and_add(&offset, bufferPos); - memcpy(allKmers + writeOffset, threadKmerBuffer, sizeof(Kmer) * bufferPos); - } - - delete[] threadKmerBuffer; - } - Debug(Debug::INFO) << "Time for extracting kmers: " << timer.lap() << "\n"; - timer.reset(); - - Debug(Debug::INFO) << "Sort kmer ... "; - omptl::sort(allKmers, allKmers + offset, Kmer::compareRepSequenceAndIdAndPos); - Debug(Debug::INFO) << "Time for sort: " << timer.lap() << "\n"; - - if(allKmers[0].kmer != allKmers[1].kmer){ - std::pair ret = getSubstituion(allKmers[0].firstAndLastLetter.last, allKmers[0].kmer, - allKmers[0].isReverse, reverseLookup, allKmers, 0, offset); - allKmers[0].kmer = ret.first; - } - for(size_t pos = 1; pos < offset; pos++){ - Kmer & currKmer = allKmers[pos]; - - // correct if it is a singleton - if(allKmers[pos-1].kmer != currKmer.kmer && - allKmers[pos+1].kmer != currKmer.kmer){ - std::pair ret =getSubstituion(allKmers[pos].firstAndLastLetter.last, allKmers[pos].kmer, - allKmers[pos].isReverse, reverseLookup, allKmers, pos, offset); - allKmers[pos].kmer = ret.first; - } - } - - dbw.close(); - seqDb.close(); - - return EXIT_SUCCESS; -} - -void printKmer(size_t idx, int size) { - char output[32]; - char nuclCode[4] = {'A','C','T','G'}; - int temp = idx; - for (int i=size-1; i>=0; i--) - { - output[i] = nuclCode[ idx&3 ]; - idx = idx>>2; - } - output[size]='\0'; - std::cout << output << std::endl; -} - -std::pair getSubstituion(const char lastLetter, const size_t currKmerIndex, const bool isReverse, - const char * reverseLookup, Kmer *kmerArray, const size_t currPos, size_t maxSize) { - // ATTGA 0 - // ATTGT 3 - // ATTGT 3 - // ATTTA <- A + 3 - // ATTTC <- C + 2, C - 1 - // ATTTG <- C + 1, C - 2 - // ATTTG - // ATTTT <- T - 3 - // ATTTT - - - - - size_t startIndex = 0; - size_t endIndex = 0; - switch (lastLetter){ - case 0: - startIndex = currKmerIndex + 1; - endIndex = currKmerIndex + 3; - break; - case 1: - startIndex = currKmerIndex - 1; - endIndex = currKmerIndex + 2; - break; - case 2: - startIndex = currKmerIndex - 2; - endIndex = currKmerIndex + 1; - break; - case 3: - startIndex = currKmerIndex - 3; - endIndex = currKmerIndex - 1; - break; - default: - std::cout << "this should not happen" << std::endl; - break; - } - // backward search - size_t pos = (currPos != 0) ? currPos - 1 : 0; - bool foundKmer = false; - char corrLastLetter=-1; - size_t prevKmer = SIZE_MAX; - size_t corrKmer= SIZE_MAX; - while(pos > 0 && kmerArray[pos].kmer >= startIndex ){ - if(prevKmer == kmerArray[pos].kmer){ - corrKmer = kmerArray[pos].kmer; - corrLastLetter = kmerArray[pos].firstAndLastLetter.last; - foundKmer = true; - break; - } - prevKmer = kmerArray[pos].kmer; - pos--; - } - - prevKmer = SIZE_MAX; - pos = currPos + 1; - while(foundKmer == false && pos < maxSize && kmerArray[pos].kmer <= endIndex ){ - if(prevKmer == kmerArray[pos].kmer){ - corrKmer = kmerArray[pos].kmer; - corrLastLetter = kmerArray[pos].firstAndLastLetter.last; - foundKmer = true; - break; - } - prevKmer = kmerArray[pos].kmer; - pos++; - } - - if(foundKmer==true){ - - char replaceChar = (isReverse) ? reverseLookup[corrLastLetter] : corrLastLetter; - return std::make_pair(corrKmer, replaceChar); - } - - return std::make_pair(currKmerIndex, lastLetter); -} - -#undef HI_NIBBLE -#undef LO_NIBBLE diff --git a/src/assembler/hybridassembleresult.cpp b/src/assembler/hybridassembleresult.cpp index ea3d1a60..45f1aa85 100644 --- a/src/assembler/hybridassembleresult.cpp +++ b/src/assembler/hybridassembleresult.cpp @@ -49,8 +49,8 @@ Matcher::result_t selectBestFragmentToExtend(QueueBySeqId &alignments, alignments.pop(); size_t dbKey = res.dbKey; const bool notRightStartAndLeftStart = !(res.dbStartPos == 0 && res.qStartPos == 0); - const bool rightStart = res.dbStartPos == 0 && (res.dbEndPos != res.dbLen-1); - const bool leftStart = res.qStartPos == 0 && (res.qEndPos != res.qLen-1); + const bool rightStart = res.dbStartPos == 0 && (res.dbEndPos != static_cast(res.dbLen)-1); + const bool leftStart = res.qStartPos == 0 && (res.qEndPos != static_cast(res.qLen)-1); const bool isNotIdentity = (dbKey != queryKey); if ((rightStart || leftStart) && notRightStartAndLeftStart && isNotIdentity){ return res; @@ -160,7 +160,7 @@ int dohybridassembleresult(LocalParameters &par) { continue; } } else if (nuclBesttHitToExtend.qStartPos == 0) { - if ((nuclBesttHitToExtend.dbStartPos <= nuclLeftQueryOffset) || excludeLeftExtension || + if ((nuclBesttHitToExtend.dbStartPos <= static_cast(nuclLeftQueryOffset)) || excludeLeftExtension || aaTargetSeq[aaTargetSeqLen-1] == '*') { continue; } @@ -189,7 +189,7 @@ int dohybridassembleresult(LocalParameters &par) { nuclDbEndPos = alignment.endPos + dist; } - if (nuclDbStartPos == 0 && qEndPos == (nuclQuerySeqLen - 1) ) { + if (nuclDbStartPos == 0 && qEndPos == (static_cast(nuclQuerySeqLen) - 1) ) { if(queryCouldBeExtendedRight == true) { tmpNuclAlignments.push_back(nuclBesttHitToExtend); continue; @@ -213,7 +213,7 @@ int dohybridassembleresult(LocalParameters &par) { nuclRightQueryOffset += nuclDbFragLen; - } else if (qStartPos == 0 && nuclDbEndPos == (nuclTargetSeqLen - 1)) { + } else if (qStartPos == 0 && nuclDbEndPos == (static_cast(nuclTargetSeqLen) - 1)) { if (queryCouldBeExtendedLeft == true) { tmpNuclAlignments.push_back(nuclBesttHitToExtend); continue; diff --git a/src/plass.cpp b/src/plass.cpp index 93a95d85..26c1cc2d 100644 --- a/src/plass.cpp +++ b/src/plass.cpp @@ -65,13 +65,6 @@ std::vector commands = { " ", CITATION_PLASS, {{"sequenceDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::sequenceDb }, {"sequenceDB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::sequenceDb }}}, - {"correctreads", correctreads, &localPar.onlythreads, COMMAND_HIDDEN, - "Simple read corrector", - NULL, - "Martin Steinegger ", - " ", - CITATION_PLASS, {{"sequenceDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::sequenceDb }, - {"sequenceDB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::sequenceDb }}}, {"mergereads", mergereads, &localPar.onlythreads, COMMAND_HIDDEN, "Merge paired-end reads from FASTQ file (powered by FLASH)", NULL,