From b360cd3bde12ad46adf5fe4f76c716f4726dc366 Mon Sep 17 00:00:00 2001 From: Victoria Carr Date: Tue, 8 Feb 2022 15:58:24 +0000 Subject: [PATCH] Include maximum length --- README.md | 10 ++++++---- file.h | 2 ++ pal-mem.cpp | 22 +++++++++++++++++----- pal-mem.h | 23 +++++++++++++---------- 4 files changed, 38 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 29bcc2b..599ef67 100755 --- a/README.md +++ b/README.md @@ -55,11 +55,11 @@ make ### USAGE ``` -pal-mem -f1 -f2 -o [options] +pal-mem -f1 -f2 -l -m -o [options] ``` OR ``` -pal-mem -fu -o [options] +pal-mem -fu -l -m -o [options] ``` Type *pal-mem -h* for a list of options. @@ -92,6 +92,8 @@ The program can be run in both serial and parallel mode. The parallel mode has a -l set the minimum length of a match. Default: 24 +-m set the maximum length of a match. Default: 100 + -k set the k-mer length. Default: 15 -t number of threads. Default: 1. The option -t is used for running the program in parallel mode. The default value is set to 1, which means serial mode. This option with value > 1 will reduce overall running time of the program. @@ -99,7 +101,7 @@ The program can be run in both serial and parallel mode. The parallel mode has a -h show possible options ## EXAMPLE -To get ITRs with a minimum length of 30 from paired-end metagenomic fasta files with a k-mer length of 18 on a machine with 8 threads: +To get ITRs with a minimum length of 30 and maximum length of 50 from paired-end metagenomic fasta files with a k-mer length of 18 on a machine with 8 threads: ``` -pal-mem -f1 example_1.fasta -f2 example_1.fasta -o example -l 30 -k 18 -t 8 +pal-mem -f1 example_1.fasta -f2 example_1.fasta -o example -l 30 -m 50 -k 18 -t 8 ``` diff --git a/file.h b/file.h index 5798104..16912f5 100755 --- a/file.h +++ b/file.h @@ -36,6 +36,7 @@ using namespace std; class commonData { public: static int32_t minMemLen; + static int32_t maxMemLen; static int32_t d; static int32_t numThreads; static int32_t kmerSize; @@ -44,6 +45,7 @@ class commonData { }; int32_t commonData::minMemLen=48; // 2 bit representation +int32_t commonData::maxMemLen=200; int32_t commonData::lenBuffer=46; int32_t commonData::d=1; int32_t commonData::numThreads=1; diff --git a/pal-mem.cpp b/pal-mem.cpp index 5f31b37..3d6578c 100755 --- a/pal-mem.cpp +++ b/pal-mem.cpp @@ -294,7 +294,7 @@ void helperReportMem(uint64_t &currRPos, uint64_t &currQPos, uint64_t totalRBits /* if current rRef/rQue plus matchSize smaller than minMEMLength, then simply return. * Note that one less character is compared due to a mismatch */ - if (rRef-lRef < static_cast(commonData::minMemLen)) { + if ((rRef-lRef < static_cast(commonData::minMemLen)) || (rRef-lRef > static_cast(commonData::maxMemLen))) { if ((lRef?(lRef - RefNpos.left <= commonData::lenBuffer):!lRef) || ((RefNpos.right - rRef) <= commonData::lenBuffer)) { rRMEM = RefNpos.right; lRMEM = RefNpos.left; @@ -470,7 +470,7 @@ void checkCommandLineOptions(uint32_t &options) } } - if (IS_LENGTH_DEF(options)){ + if (IS_MIN_LENGTH_DEF(options)){ if (commonData::minMemLen <= commonData::kmerSize){ cout << "ERROR: -l cannot be less than or equal to the k-mer length 15!" << endl; exit(EXIT_FAILURE); @@ -578,11 +578,11 @@ int main (int argc, char *argv[]) outPrefix = argv[n+1]; n+=2; }else if(boost::equals(argv[n],"-l")){ - if (IS_LENGTH_DEF(options)) { - cout << "ERROR: Length argument passed multiple times!" << endl; + if (IS_MIN_LENGTH_DEF(options)) { + cout << "ERROR: Minimum length argument passed multiple times!" << endl; exit(EXIT_FAILURE); } - SET_LENGTH(options); + SET_MIN_LENGTH(options); if (!argv[n+1] || !is_numeric(argv[n+1])){ cout << "ERROR: Invalid value for -l option!" << endl; exit(EXIT_FAILURE); @@ -590,6 +590,18 @@ int main (int argc, char *argv[]) commonData::minMemLen = 2*std::stoi(argv[n+1]); commonData::lenBuffer = commonData::minMemLen - 2; n+=2; + }else if(boost::equals(argv[n],"-m")){ + if (IS_MAX_LENGTH_DEF(options)) { + cout << "ERROR: Maximum length argument passed multiple times!" << endl; + exit(EXIT_FAILURE); + } + SET_MAX_LENGTH(options); + if (!argv[n+1] || !is_numeric(argv[n+1])){ + cout << "ERROR: Invalid value for -m option!" << endl; + exit(EXIT_FAILURE); + } + commonData::maxMemLen = 2*std::stoi(argv[n+1]); + n+=2; }else if (boost::equals(argv[n],"-t")){ if (IS_NUM_THREADS_DEF(options)) { cout << "ERROR: Number of threads argument passed multiple times!" << endl; diff --git a/pal-mem.h b/pal-mem.h index d735093..34e2847 100755 --- a/pal-mem.h +++ b/pal-mem.h @@ -34,20 +34,22 @@ #define FASTA2 0x00000002 #define FASTAU 0x00000004 #define OUT_FILE 0x00000008 -#define LENGTH 0x00000010 -#define REF_FILE 0x00000020 -#define QUERY_FILE 0x00000040 -#define KMER_SIZE 0x0000080 -#define NUM_THREADS 0x00000100 -#define REL_REV_QUEPOS 0x00000200 -#define FOUR_COL_OUTPUT 0x00000400 -#define LEN_IN_HEADER 0x00000800 +#define MIN_LENGTH 0x00000010 +#define MAX_LENGTH 0x00000020 +#define REF_FILE 0x00000040 +#define QUERY_FILE 0x00000080 +#define KMER_SIZE 0x0000100 +#define NUM_THREADS 0x00000200 +#define REL_REV_QUEPOS 0x00000400 +#define FOUR_COL_OUTPUT 0x00000800 +#define LEN_IN_HEADER 0x00001000 #define IS_FASTA1_DEF(x) (x & FASTA1) #define IS_FASTA2_DEF(x) (x & FASTA2) #define IS_FASTAU_DEF(x) (x & FASTAU) #define IS_OUT_FILE_DEF(x) (x & OUT_FILE) -#define IS_LENGTH_DEF(x) (x & LENGTH) +#define IS_MIN_LENGTH_DEF(x) (x & MIN_LENGTH) +#define IS_MAX_LENGTH_DEF(x) (x & MAX_LENGTH) #define IS_KMER_SIZE_DEF(x) (x & KMER_SIZE) #define IS_NUM_THREADS_DEF(x) (x & NUM_THREADS) @@ -55,7 +57,8 @@ #define SET_FASTA2(x) (x |= FASTA2) #define SET_FASTAU(x) (x |= FASTAU) #define SET_OUT_FILE(x) (x |= OUT_FILE) -#define SET_LENGTH(x) (x |= LENGTH) +#define SET_MIN_LENGTH(x) (x |= MIN_LENGTH) +#define SET_MAX_LENGTH(x) (x |= MAX_LENGTH) #define SET_KMER_SIZE(x) (x |= KMER_SIZE) #define SET_NUM_THREADS(x) (x |= NUM_THREADS)