Skip to content

Commit

Permalink
Update submodules to fix crash loading meryl data; memory estimate is…
Browse files Browse the repository at this point in the history
… too high.
  • Loading branch information
brianwalenz committed Nov 3, 2021
1 parent dc2c96b commit db8f620
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 26 deletions.
2 changes: 1 addition & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ ifeq ($(origin CXXFLAGS), undefined)
CXXFLAGS += -Wno-unused-variable
CXXFLAGS += -Wno-deprecated-declarations
CXXFLAGS += -Wno-format-truncation
CXXFLAGS += -std=c++11
CXXFLAGS += -std=c++17

CFLAGS += -Wall -Wextra -Wformat
CFLAGS += -Wno-char-subscripts
Expand Down
41 changes: 18 additions & 23 deletions src/merfin/merfin-globals.C
Original file line number Diff line number Diff line change
Expand Up @@ -113,55 +113,50 @@ merfinGlobal::getK(kmer fmer,

void
merfinGlobal::load_Kmers(void) {

double minMem, minMemTotal = 0;
double optMem, optMemTotal = 0;
bool useOpt = false;
bool useMin = false;
double reqMemory = 0.0;

// Make readDB first so we know the k size
merylFileReader* readDB = new merylFileReader(readDBname);

// Open sequence and build seqDBname if not provided
load_Sequence();

// Since estimateMemoryUsage() is now including space for temporary
// buffers that are used only when loading, this estimate is significantly
// too large for small datasets. If table1 and table2 need only 5 GB
// memory (each), the estimate for each will also include several GB for
// buffers (based on the number of threads); 16 threads = 8 GB buffers.
// So while the data needs 10 GB memory, meryl claims it needs 2x 13 GB =
// 26 GB memory. Since the tables are loaded sequentially, it really only
// needs 13 - 8 + 13 - 8 = 18 GB peak, 10 GB final.
#warning estimate is too high

fprintf(stderr, "-- Estimating required space for loading '%s'\n", readDBname);
readLookup = new merylExactLookup();
readLookup->estimateMemoryUsage(readDB, maxMemory, minMem, optMem, minV, maxV);
minMemTotal += minMem;
optMemTotal += optMem;
reqMemory += readLookup->estimateMemoryUsage(readDB, maxMemory, 0, minV, maxV);

fprintf(stderr, "-- Estimating required space for loading '%s'\n", seqDBname);
merylFileReader* asmDB = new merylFileReader(seqDBname);
asmLookup = new merylExactLookup();
asmLookup->estimateMemoryUsage(asmDB, maxMemory, minMem, optMem, minV, maxV);
minMemTotal += minMem;
optMemTotal += optMem;

if (optMemTotal <= maxMemory)
useOpt = true;
else if (minMemTotal <= maxMemory)
useMin = true;
reqMemory += asmLookup->estimateMemoryUsage(asmDB, maxMemory, 0);

fprintf(stderr, "--\n");
fprintf(stderr, "-- Minimal memory needed: %.3f GB%s\n", minMemTotal, (useMin) ? " enabled" : "");
fprintf(stderr, "-- Optimal memory needed: %.3f GB%s\n", optMemTotal, (useOpt) ? " enabled" : "");
fprintf(stderr, "-- Memory limit %.3f GB\n", maxMemory);
fprintf(stderr, "-- Memory needed: %.3f GB\n", reqMemory);
fprintf(stderr, "-- Memory limit: %.3f GB\n", maxMemory);
fprintf(stderr, "--\n");

if ((useMin == false) &&
(useOpt == false)) {
if (reqMemory > maxMemory) {
fprintf(stderr, "\n");
fprintf(stderr, "Not enough memory to load databases. Increase -memory.\n");
fprintf(stderr, "\n");
exit(1);
}

fprintf(stderr, "-- Loading kmers from '%s' into lookup table.\n", readDBname);
readLookup->load(readDB, maxMemory, useMin, useOpt, minV, maxV);
readLookup->load(readDB, maxMemory, 0, minV, maxV);

fprintf(stderr, "-- Loading kmers from '%s' into lookup table.\n", seqDBname);
asmLookup-> load(asmDB, maxMemory, useMin, useOpt);
asmLookup-> load(asmDB, maxMemory, 0);

delete readDB; // Not needed anymore.
delete asmDB;
Expand Down
2 changes: 1 addition & 1 deletion src/meryl

0 comments on commit db8f620

Please sign in to comment.