From ddea6cf254f378db51d22c6eb21af775fa9e1f77 Mon Sep 17 00:00:00 2001 From: thegenemyers Date: Wed, 23 Oct 2024 11:51:05 +0200 Subject: [PATCH] Logex space consumption issue fixed --- LICENSE | 12 ++++++------ Logex.c | 45 ++++++++++++++++++++++++++++----------------- Profex.c | 23 +++++++++++++++++++---- libfastk.c | 7 +++++++ 4 files changed, 60 insertions(+), 27 deletions(-) diff --git a/LICENSE b/LICENSE index 4bfd99e..b83efed 100644 --- a/LICENSE +++ b/LICENSE @@ -4,9 +4,9 @@ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - · Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - + · Redistributions of source code or derivative codes must retain the above + copyright notice, this list of conditions and the following disclaimer. + · Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. @@ -27,8 +27,8 @@ For any issues regarding this software and its use, contact EWM at: Eugene W. Myers Jr. - Bautzner Str. 122e - 01099 Dresden - GERMANY + 6 Sugar Creek Dr. + Austin, TX 78749 + USA Email: gene.myers@gmail.com diff --git a/Logex.c b/Logex.c index 2839e69..5cb92c4 100644 --- a/Logex.c +++ b/Logex.c @@ -927,7 +927,7 @@ typedef struct static int GC[256]; static int GCR[256]; -#define IB_OUT 3 +static int IB_OUT; static void gc_setup(int kmer) { static int isgc[4] = { 0, 100, 100, 0 }; @@ -1118,7 +1118,12 @@ static void *merge_thread(void *args) { if (DO_TABLE) { fwrite(bst+IB_OUT,hbyte,1,out[i]); fwrite(&one,sizeof(short),1,out[i]); - x = (bst[0] << 16) | (bst[1] << 8) | bst[2]; + if (IB_OUT == 3) + x = (bst[0] << 16) | (bst[1] << 8) | bst[2]; + else if (IB_OUT == 2) + x = (bst[0] << 8) | bst[1]; + else + x = bst[0]; prefx[i][x] += 1; nels[i] += 1; } @@ -1140,7 +1145,12 @@ static void *merge_thread(void *args) else sho = c; fwrite(&sho,sizeof(short),1,out[i]); - x = (bst[0] << 16) | (bst[1] << 8) | bst[2]; + if (IB_OUT == 3) + x = (bst[0] << 16) | (bst[1] << 8) | bst[2]; + else if (IB_OUT == 2) + x = (bst[0] << 8) | bst[1]; + else + x = bst[0]; prefx[i][x] += 1; nels[i] += 1; } @@ -1404,9 +1414,21 @@ int main(int argc, char *argv[]) int t, a, i; int64 p; + for (a = 0; a < narg; a++) + { range[0][a] = 0; + range[NTHREADS][a] = S[a]->nels; + } + + pivot = 0; + for (a = 1; a < narg; a++) + if (S[a]->nels > S[pivot]->nels) + pivot = a; + + IB_OUT = S[pivot]->ibyte; + ixlen = (0x1 << (8*IB_OUT)); + if (DO_TABLE) - { ixlen = 0x1000000; - prefx[0] = Malloc(sizeof(int64)*ixlen*nass,"Allocating prefix tables"); + { prefx[0] = Malloc(sizeof(int64)*ixlen*nass,"Allocating prefix tables"); bzero(prefx[0],sizeof(int64)*ixlen*nass); for (a = 1; a < nass; a++) prefx[a] = prefx[a-1] + ixlen; @@ -1420,16 +1442,6 @@ int main(int argc, char *argv[]) Numbered_Suffix(".ktab.",t+1,"")),"w"); } - for (a = 0; a < narg; a++) - { range[0][a] = 0; - range[NTHREADS][a] = S[a]->nels; - } - - pivot = 0; - for (a = 1; a < narg; a++) - if (S[a]->nels > S[pivot]->nels) - pivot = a; - seq = Current_Kmer(S[0],NULL); ent = Current_Entry(S[0],NULL); for (t = 1; t < NTHREADS; t++) @@ -1493,7 +1505,6 @@ int main(int argc, char *argv[]) if (DO_TABLE) { int minval; - int three = 3; int mins[narg]; for (a = 0; a < narg; a++) @@ -1508,7 +1519,7 @@ int main(int argc, char *argv[]) fwrite(&kmer,sizeof(int),1,f); fwrite(&NTHREADS,sizeof(int),1,f); fwrite(&minval,sizeof(int),1,f); - fwrite(&three,sizeof(int),1,f); + fwrite(&IB_OUT,sizeof(int),1,f); for (i = 1; i < ixlen; i++) prf[i] += prf[i-1]; diff --git a/Profex.c b/Profex.c index 797191c..f3ab87a 100644 --- a/Profex.c +++ b/Profex.c @@ -17,7 +17,7 @@ #include "libfastk.h" #include "ONElib.h" -static char *Usage = "[-1] [.prof] [ [-(|#)] ... ]"; +static char *Usage = "[-1z] [.prof] [ [-(|#)] ... ]"; static char *One_Schema = "P 3 prf This is a 1-code fiel for profiles\n" @@ -33,6 +33,7 @@ int main(int argc, char *argv[]) { Profile_Index *P; char *command; int ONE_CODE; + int ZFLAG; // Process options and capture command line for provenance @@ -68,7 +69,7 @@ int main(int argc, char *argv[]) if (argv[i][0] == '-') switch (argv[i][1]) { default: - ARG_FLAGS("1") + ARG_FLAGS("1z") break; } else @@ -76,11 +77,13 @@ int main(int argc, char *argv[]) argc = j; ONE_CODE = flags['1']; + ZFLAG = flags['z']; if (argc < 2) { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage); fprintf(stderr,"\n"); fprintf(stderr," -1: Produce 1-code as output.\n"); + fprintf(stderr," -z: Compress runs and ignore zeros.\n"); exit (1); } } @@ -169,8 +172,20 @@ int main(int argc, char *argv[]) } else { printf("\nRead %d:\n",p); - for (i = 0; i < plen; i++) - printf(" %5d: %5d\n",i,profile[i]); + if (ZFLAG) + { int last = 0; + for (i = 0; i < plen; i++) + if (profile[i] != last) + { if (last != 0) + printf(" - %5d (%d)\n",i+P->kmer-1,last); + if (profile[i] != 0) + printf(" %5d",i); + last = profile[i]; + } + } + else + for (i = 0; i < plen; i++) + printf(" %5d: %5d\n",i,profile[i]); } } } diff --git a/libfastk.c b/libfastk.c index 54797e8..e525776 100644 --- a/libfastk.c +++ b/libfastk.c @@ -1280,6 +1280,13 @@ inline void GoTo_Kmer_Index(Kmer_Stream *_S, int64 i) S->cidx = i; + if (i >= S->nels) + { S->csuf = NULL; + S->cpre = S->ixlen; + S->part = S->nthr+1; + return; + } + p = S->inver[i>>S->shift]; while (index[p] <= i) p += 1;