From db8f62063cbb626ac5cc0b1713e2d8a2620efb5b Mon Sep 17 00:00:00 2001 From: "Brian P. Walenz" Date: Wed, 3 Nov 2021 07:19:29 -0400 Subject: [PATCH] Update submodules to fix crash loading meryl data; memory estimate is too high. --- src/Makefile | 2 +- src/merfin/merfin-globals.C | 41 ++++++++++++++++--------------------- src/meryl | 2 +- src/utility | 2 +- 4 files changed, 21 insertions(+), 26 deletions(-) diff --git a/src/Makefile b/src/Makefile index 0931cf6..2d99a4e 100644 --- a/src/Makefile +++ b/src/Makefile @@ -436,7 +436,7 @@ ifeq ($(origin CXXFLAGS), undefined) CXXFLAGS += -Wno-unused-variable CXXFLAGS += -Wno-deprecated-declarations CXXFLAGS += -Wno-format-truncation - CXXFLAGS += -std=c++11 + CXXFLAGS += -std=c++17 CFLAGS += -Wall -Wextra -Wformat CFLAGS += -Wno-char-subscripts diff --git a/src/merfin/merfin-globals.C b/src/merfin/merfin-globals.C index 7d69e82..e41cbd0 100644 --- a/src/merfin/merfin-globals.C +++ b/src/merfin/merfin-globals.C @@ -113,11 +113,7 @@ merfinGlobal::getK(kmer fmer, void merfinGlobal::load_Kmers(void) { - - double minMem, minMemTotal = 0; - double optMem, optMemTotal = 0; - bool useOpt = false; - bool useMin = false; + double reqMemory = 0.0; // Make readDB first so we know the k size merylFileReader* readDB = new merylFileReader(readDBname); @@ -125,32 +121,31 @@ merfinGlobal::load_Kmers(void) { // Open sequence and build seqDBname if not provided load_Sequence(); + // Since estimateMemoryUsage() is now including space for temporary + // buffers that are used only when loading, this estimate is significantly + // too large for small datasets. If table1 and table2 need only 5 GB + // memory (each), the estimate for each will also include several GB for + // buffers (based on the number of threads); 16 threads = 8 GB buffers. + // So while the data needs 10 GB memory, meryl claims it needs 2x 13 GB = + // 26 GB memory. Since the tables are loaded sequentially, it really only + // needs 13 - 8 + 13 - 8 = 18 GB peak, 10 GB final. +#warning estimate is too high + fprintf(stderr, "-- Estimating required space for loading '%s'\n", readDBname); readLookup = new merylExactLookup(); - readLookup->estimateMemoryUsage(readDB, maxMemory, minMem, optMem, minV, maxV); - minMemTotal += minMem; - optMemTotal += optMem; + reqMemory += readLookup->estimateMemoryUsage(readDB, maxMemory, 0, minV, maxV); fprintf(stderr, "-- Estimating required space for loading '%s'\n", seqDBname); merylFileReader* asmDB = new merylFileReader(seqDBname); asmLookup = new merylExactLookup(); - asmLookup->estimateMemoryUsage(asmDB, maxMemory, minMem, optMem, minV, maxV); - minMemTotal += minMem; - optMemTotal += optMem; - - if (optMemTotal <= maxMemory) - useOpt = true; - else if (minMemTotal <= maxMemory) - useMin = true; + reqMemory += asmLookup->estimateMemoryUsage(asmDB, maxMemory, 0); fprintf(stderr, "--\n"); - fprintf(stderr, "-- Minimal memory needed: %.3f GB%s\n", minMemTotal, (useMin) ? " enabled" : ""); - fprintf(stderr, "-- Optimal memory needed: %.3f GB%s\n", optMemTotal, (useOpt) ? " enabled" : ""); - fprintf(stderr, "-- Memory limit %.3f GB\n", maxMemory); + fprintf(stderr, "-- Memory needed: %.3f GB\n", reqMemory); + fprintf(stderr, "-- Memory limit: %.3f GB\n", maxMemory); fprintf(stderr, "--\n"); - if ((useMin == false) && - (useOpt == false)) { + if (reqMemory > maxMemory) { fprintf(stderr, "\n"); fprintf(stderr, "Not enough memory to load databases. Increase -memory.\n"); fprintf(stderr, "\n"); @@ -158,10 +153,10 @@ merfinGlobal::load_Kmers(void) { } fprintf(stderr, "-- Loading kmers from '%s' into lookup table.\n", readDBname); - readLookup->load(readDB, maxMemory, useMin, useOpt, minV, maxV); + readLookup->load(readDB, maxMemory, 0, minV, maxV); fprintf(stderr, "-- Loading kmers from '%s' into lookup table.\n", seqDBname); - asmLookup-> load(asmDB, maxMemory, useMin, useOpt); + asmLookup-> load(asmDB, maxMemory, 0); delete readDB; // Not needed anymore. delete asmDB; diff --git a/src/meryl b/src/meryl index 1d45750..51fad4b 160000 --- a/src/meryl +++ b/src/meryl @@ -1 +1 @@ -Subproject commit 1d45750d917fd469b5f950f1daf3c4d6d3fa44e3 +Subproject commit 51fad4b236a7e22450cd23d405d83921572f5a50 diff --git a/src/utility b/src/utility index 5d74875..c58d0a3 160000 --- a/src/utility +++ b/src/utility @@ -1 +1 @@ -Subproject commit 5d74875b0441dc8f921840e87aa20aded228a5c3 +Subproject commit c58d0a3635175b35711508c8bf567c9c096ad1e8