From 6d0d52088bc5d75f59c34a128d2e5e4fc7ad9ad5 Mon Sep 17 00:00:00 2001 From: thesamovar Date: Fri, 9 Jan 2015 19:52:21 +0000 Subject: [PATCH 1/4] Change RAM reporting from available physical memory to total physical memory --- klustakwik.cpp | 8 +------- memorytracking.cpp | 13 +++++-------- memorytracking.h | 4 ++-- 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/klustakwik.cpp b/klustakwik.cpp index 9565119..f10cb16 100644 --- a/klustakwik.cpp +++ b/klustakwik.cpp @@ -1816,14 +1816,8 @@ int main(int argc, char **argv) SetupParams((integer)argc, argv); // This function is defined in parameters.cpp if (RamLimitGB == 0.0) { - RamLimitGB = (1.0*available_physical_memory()) / (1024.0*1024.0*1024.0); -#ifdef __APPLE__ + RamLimitGB = (1.0*total_physical_memory()) / (1024.0*1024.0*1024.0); Output("Setting RAM limit to total physical memory, %.2f GB.\n", (double)RamLimitGB); - Output("WARNING: Not all physical memory will be available, but on Macs it is not possible\n"); - Output(" to get the available physical memory.\n"); -#else - Output("Setting RAM limit to available physical memory, %.2f GB.\n", (double)RamLimitGB); -#endif } else if (RamLimitGB < 0.0) { diff --git a/memorytracking.cpp b/memorytracking.cpp index f36dcfa..cc1729d 100644 --- a/memorytracking.cpp +++ b/memorytracking.cpp @@ -7,9 +7,9 @@ // Unix way #include -size_t available_physical_memory() +size_t total_physical_memory() { - long pages = sysconf(_SC_AVPHYS_PAGES); + long pages = sysconf(_SC_PHYS_PAGES); long page_size = sysconf(_SC_PAGE_SIZE); return pages * page_size; } @@ -17,15 +17,12 @@ size_t available_physical_memory() #endif #ifdef __APPLE__ -// Mac way only returns total, not available physical memory because of the -// way the Mac uses memory to cache some data meaning that almost all memory -// is used at all times #include #include #include #include -size_t available_physical_memory() +size_t total_physical_memory() { int mib [] = { CTL_HW, HW_MEMSIZE }; uint64_t value = 0; @@ -45,12 +42,12 @@ size_t available_physical_memory() #include -size_t available_physical_memory() +size_t total_physical_memory() { MEMORYSTATUSEX status; status.dwLength = sizeof(status); GlobalMemoryStatusEx(&status); - return status.ullAvailPhys; + return status.ullTotalPhys; } #endif diff --git a/memorytracking.h b/memorytracking.h index 935823e..ab23914 100644 --- a/memorytracking.h +++ b/memorytracking.h @@ -6,7 +6,7 @@ Memory tracking utilities: used to warn the user that the memory they are reques #include "numerics.h" -size_t available_physical_memory(); +size_t total_physical_memory(); class KKMemoryRequest; @@ -32,4 +32,4 @@ class KKMemoryRequest integer num_bytes; }; -#endif \ No newline at end of file +#endif From 249f5bfa930ea5c8fe397c44439135f8063947fd Mon Sep 17 00:00:00 2001 From: thesamovar Date: Fri, 9 Jan 2015 20:16:20 +0000 Subject: [PATCH 2/4] Removed memory tracking during the run --- io.cpp | 2 +- klustakwik.cpp | 11 ++++------- klustakwik.h | 2 -- memorytracking.cpp | 21 --------------------- memorytracking.h | 24 ------------------------ 5 files changed, 5 insertions(+), 55 deletions(-) diff --git a/io.cpp b/io.cpp index b657e81..6fca701 100644 --- a/io.cpp +++ b/io.cpp @@ -39,7 +39,7 @@ void KK::LoadData(char *FileBase, integer ElecNo, char *UseFeatures) scalar val; //int maskval; // use int rather than integer because it is read as %d integer UseLen; - scalar max, min; + //scalar max, min; //bool usemasks = (UseDistributional && !UseFloatMasks); // open file diff --git a/klustakwik.cpp b/klustakwik.cpp index f10cb16..270408f 100644 --- a/klustakwik.cpp +++ b/klustakwik.cpp @@ -26,9 +26,10 @@ void KK::MemoryCheck() { integer num_bytes_required = 3 * NumBytesRequired(); scalar memory_required = (num_bytes_required*1.0) / (1024.0*1024.0*1024.0); - if (memory_required > memory_tracker.limit_gb) + + if (memory_required > RamLimitGB) { - Error("Running KlustaKwik on this data will use between %.2f and %.2f GB of RAM, and the limit is set at %.2f.\n", (double)(memory_required*2.0 / 3.0), (double)memory_required, (double)memory_tracker.limit_gb); + Error("Running KlustaKwik on this data will use between %.2f and %.2f GB of RAM, and the limit is set at %.2f.\n", (double)(memory_required*2.0 / 3.0), (double)memory_required, (double)RamLimitGB); Error("Possible candidates are:\n"); Error("- nPoints = %d\n", (int)nPoints); Error("- nDims = %d\n", (int)nDims); @@ -112,9 +113,6 @@ void KK::AllocateArrays() { nDims2 = nDims*nDims; NoisePoint = 1; // Ensures that the mixture weight for the noise cluster never gets to zero - integer num_bytes_allocated = NumBytesRequired(); - mem.add(num_bytes_allocated); - // Set sizes for arrays resize_and_fill_with_zeros(Data, nPoints * nDims); //SNK @@ -191,7 +189,7 @@ scalar KK::Penalty(integer n) // Penalties for Masked CEM void KK::ComputeClassPenalties() { - if(!((bool)UseDistributional)) // This function must only be called in Use Distributional mode + if(UseDistributional==0) // This function must only be called in Use Distributional mode { // Output("Caught in ComputeClassPenalties"); return; @@ -1824,7 +1822,6 @@ int main(int argc, char **argv) RamLimitGB = 1e20; Output("WARNING: You have chosen not to set a RAM limit, this may cause problems.\n"); } - memory_tracker.limit_gb = RamLimitGB; //clock_t Clock0 = clock(); Clock0 = clock(); diff --git a/klustakwik.h b/klustakwik.h index 41d1f57..7e59b62 100644 --- a/klustakwik.h +++ b/klustakwik.h @@ -213,8 +213,6 @@ class KK { // debugging info integer numiterations; integer init_type; - // memory tracking - KKMemoryRequest mem; }; #endif /* MASKED_KLUSTA_KWIK_2_H_ */ diff --git a/memorytracking.cpp b/memorytracking.cpp index cc1729d..c4982b0 100644 --- a/memorytracking.cpp +++ b/memorytracking.cpp @@ -51,24 +51,3 @@ size_t total_physical_memory() } #endif - - -void KKMemoryTracker::request(integer num_bytes) -{ - double reqsize = (double)num_bytes / (1024.0*1024.0*1024.0); - num_bytes_allocated += num_bytes; - double totalsize = (double)num_bytes_allocated / (1024.0*1024.0*1024.0); - Output("Memory request: %.2f GB would take us to total of %.2f GB.\n", reqsize, totalsize); - if (totalsize >= limit_gb) - { - Error("Memory request exceeds limit.\n"); - exit(EXIT_FAILURE); - } -}; - -void KKMemoryTracker::free(integer num_bytes) -{ - num_bytes_allocated -= num_bytes; -}; - -KKMemoryTracker memory_tracker; diff --git a/memorytracking.h b/memorytracking.h index ab23914..15bc653 100644 --- a/memorytracking.h +++ b/memorytracking.h @@ -8,28 +8,4 @@ Memory tracking utilities: used to warn the user that the memory they are reques size_t total_physical_memory(); -class KKMemoryRequest; - -class KKMemoryTracker -{ -public: - integer num_bytes_allocated; - scalar limit_gb; - KKMemoryTracker() { num_bytes_allocated = 0; limit_gb = 0.0; }; - void request(integer num_bytes); - void free(integer num_bytes); -}; - -extern KKMemoryTracker memory_tracker; - -class KKMemoryRequest -{ -public: - KKMemoryRequest() { num_bytes = 0; }; - KKMemoryRequest(integer N) : num_bytes(N) {}; - ~KKMemoryRequest() { memory_tracker.free(num_bytes); } - void add(integer N) { num_bytes += N; memory_tracker.request(N); } - integer num_bytes; -}; - #endif From 06c04dddc5361cfc1e1d1899da4fd0e60da283fb Mon Sep 17 00:00:00 2001 From: thesamovar Date: Fri, 9 Jan 2015 21:30:45 +0000 Subject: [PATCH 3/4] Display information about which arrays are using how much memory --- klustakwik.cpp | 69 ++++++++++++++-------------------------------- klustakwik.h | 1 - memorytracking.cpp | 36 ++++++++++++++++++++++++ memorytracking.h | 26 +++++++++++++++++ 4 files changed, 82 insertions(+), 50 deletions(-) diff --git a/klustakwik.cpp b/klustakwik.cpp index 270408f..25f6cf4 100644 --- a/klustakwik.cpp +++ b/klustakwik.cpp @@ -24,67 +24,38 @@ scalar timesofar; // Does a memory check (should only be called for first instance of KK) void KK::MemoryCheck() { - integer num_bytes_required = 3 * NumBytesRequired(); - scalar memory_required = (num_bytes_required*1.0) / (1024.0*1024.0*1024.0); - - if (memory_required > RamLimitGB) - { - Error("Running KlustaKwik on this data will use between %.2f and %.2f GB of RAM, and the limit is set at %.2f.\n", (double)(memory_required*2.0 / 3.0), (double)memory_required, (double)RamLimitGB); - Error("Possible candidates are:\n"); - Error("- nPoints = %d\n", (int)nPoints); - Error("- nDims = %d\n", (int)nDims); - Error("- MaxPossibleClusters = %d\n", (int)MaxPossibleClusters); - exit(EXIT_FAILURE); - } - Output("This run is expected to use between %.2f and %.2f GB of RAM.\n", (double)(memory_required*2.0 / 3.0), (double)memory_required); -} - -integer KK::NumBytesRequired() -{ - // we don't allocate any memory if we have already allocated memory to this - // (i.e. if we are in TrySplits) - if (Data.size()) - return 0; - nDims2 = nDims*nDims; - // Compute required memory and check if it exceeds the limit set - integer num_bytes_allocated = + long long NP = (long long)nPoints; + long long MPC = (long long)MaxPossibleClusters; + long long ND = (long long)nDims; + vector usages; #ifdef STORE_DATA_AS_INTEGER - sizeof(data_int)*nPoints*nDims + // Data + usages.push_back(MemoryUsage("Data", "data_int", sizeof(data_int), NP*ND, "nPoints*nDims", 2, 3)); #else - sizeof(scalar)*nPoints*nDims + // Data + usages.push_back(MemoryUsage("Data", "scalar", sizeof(scalar), NP*ND, "nPoints*nDims", 2, 3)); #endif #ifdef COMPUTED_BINARY_MASK - (!UseDistributional)*sizeof(char)*nPoints*nDims + // Masks + if (!UseDistributional) + usages.push_back(MemoryUsage("Masks", "char", sizeof(char), NP*ND, "nPoints*nDims", 2, 3)); #else - sizeof(char)*nPoints*nDims + // Masks + usages.push_back(MemoryUsage("Masks", "char", sizeof(char), NP*ND, "nPoints*nDims", 2, 3)); #endif #ifdef STORE_FLOAT_MASK_AS_CHAR - sizeof(char)*nPoints*nDims + // CharFloatMasks + usages.push_back(MemoryUsage("CharFloatMasks", "char", sizeof(char), NP*ND, "nPoints*nDims", 2, 3)); #else - sizeof(scalar)*nPoints*nDims + // FloatMasks + usages.push_back(MemoryUsage("FloatMasks", "scalar", sizeof(scalar), NP*ND, "nPoints*nDims", 2, 3)); #endif - sizeof(scalar)*nPoints + // UnMaskDims - sizeof(scalar)*MaxPossibleClusters + // Weight - sizeof(scalar)*MaxPossibleClusters*nDims + // Mean - (1 - UseDistributional)*sizeof(scalar)*MaxPossibleClusters*nDims2 + // Cov - sizeof(scalar)*MaxPossibleClusters*nPoints + // LogP - sizeof(integer)*nPoints + // Class - sizeof(integer)*nPoints + // OldClass - sizeof(integer)*nPoints + // Class2 - sizeof(integer)*nPoints + // BestClass - sizeof(integer)*MaxPossibleClusters + // ClassAlive - sizeof(integer)*MaxPossibleClusters + // AliveIndex - sizeof(scalar)*MaxPossibleClusters + // ClassPenalty - sizeof(integer)*MaxPossibleClusters + // nClassMembers - sizeof(scalar)*nPoints*nDims + // AllVector2Mean - // UseDistributional only + if (UseDistributional) + usages.push_back(MemoryUsage("Cov", "scalar", sizeof(scalar), MPC*ND*ND, "MaxPossibleClusters*nDims*nDims", 0, 3)); + else + usages.push_back(MemoryUsage("Cov", "scalar", sizeof(scalar), MPC*ND*ND, "MaxPossibleClusters*nDims*nDims", 2, 3)); + usages.push_back(MemoryUsage("LogP", "scalar", sizeof(scalar), MPC*NP, "MaxPossibleClusters*nPoints", 2, 3)); + usages.push_back(MemoryUsage("AllVector2Mean", "scalar", sizeof(scalar), NP*ND, "nPoints*nDims", 2, 3)); #ifndef COMPUTED_CORRECTION_TERM - UseDistributional*sizeof(scalar)*nPoints*nDims + // CorrectionTerm + if (UseDistributional) + usages.push_back(MemoryUsage("CorrectionTerm", "scalar", sizeof(scalar), NP*ND, "nPoints*nDims", 2, 3)); #endif - sizeof(scalar)*(UseDistributional*MaxPossibleClusters*nDims) + // ClusterMask (vector) - UseDistributional*sizeof(integer)*MaxPossibleClusters*nDims; // ClusterUnmaskedFeatures + ClusterMaskedFeatures - return num_bytes_allocated; + check_memory_usage(usages, RamLimitGB, nPoints, nDims, MaxPossibleClusters); } template diff --git a/klustakwik.h b/klustakwik.h index 7e59b62..0a35426 100644 --- a/klustakwik.h +++ b/klustakwik.h @@ -39,7 +39,6 @@ class KK { ~KK(); /////////////// FUNCTIONS ////////////////////////////////////////////////// void MemoryCheck(); - integer NumBytesRequired(); void AllocateArrays(); void Reindex(); // Random initial starting conditions functions diff --git a/memorytracking.cpp b/memorytracking.cpp index c4982b0..88f3a28 100644 --- a/memorytracking.cpp +++ b/memorytracking.cpp @@ -1,5 +1,6 @@ #include "memorytracking.h" #include "log.h" +#include // Platform independent way to get available memory @@ -51,3 +52,38 @@ size_t total_physical_memory() } #endif + +bool memory_usage_comparison(const MemoryUsage &lhs, const MemoryUsage &rhs) +{ + return lhs.num_bytes > rhs.num_bytes; +} + +void check_memory_usage(vector &usages, scalar limit_gb, integer nPoints, integer nDims, integer MaxPossibleClusters) +{ + sort(usages.begin(), usages.end(), memory_usage_comparison); + double total_min = 0.0, total_max = 0.0; + Output("\nExpected memory usage by array (largest first):\n\n"); + for (int i = 0; i < usages.size(); i++) + { + MemoryUsage &m = usages[i]; + double base_usage = (double)m.num_bytes / (1024.0*1024.0*1024.0); + double min_usage = m.min_multiplier*base_usage; + double max_usage = m.max_multiplier*base_usage; + total_min += min_usage; + total_max += max_usage; + Output("Array %s uses %lld bytes per element (%s) and has %lld elements when full. Total usage will be between %.2f and %.2f GB. Memory usage scales as %s.\n", + m.name_of_array, m.bytes_per_element, m.name_of_type, m.num_elements, min_usage, max_usage, m.expr); + } + Output("\nNote that nPoints=%d, nDims=%d, MaxPossibleClusters=%d", (int)nPoints, (int)nDims, (int)MaxPossibleClusters); + Output("\nTotal memory usage will be between %.2f and %.2f GB.\n", total_min, total_max); + Output("RAM limit is set at %.2f GB.\n", limit_gb); + if (total_min > limit_gb) + { + Error("The RAM limit does not cover the minimum possible memory usage: KlustaKwik definitely cannot run this.\n"); + exit(EXIT_FAILURE); + } else if(total_max > limit_gb) + { + Error("The RAM limit covers the minimum but not maximum possible memory usage, so it cannot be guaranteed not to crash. Call KlustaKwik with -RamLimitGB -1 to force it to run.\n"); + exit(EXIT_FAILURE); + } +} diff --git a/memorytracking.h b/memorytracking.h index 15bc653..5d9a173 100644 --- a/memorytracking.h +++ b/memorytracking.h @@ -5,7 +5,33 @@ Memory tracking utilities: used to warn the user that the memory they are reques #define _MEMORY_TRACKING_H #include "numerics.h" +#include size_t total_physical_memory(); +class MemoryUsage +{ +public: + long long num_bytes; + long long bytes_per_element; + long long num_elements; + char *name_of_type; + char *name_of_array; + char *expr; + double min_multiplier, max_multiplier; + MemoryUsage(char *_name_of_array, char *_name_of_type, long long _bytes_per_element, long long _num_elements, char *_expr, double _min_multiplier, double _max_multiplier) + { + name_of_array = _name_of_array; + name_of_type = _name_of_type; + bytes_per_element = _bytes_per_element; + num_elements = _num_elements; + expr = _expr; + min_multiplier = _min_multiplier; + max_multiplier = _max_multiplier; + num_bytes = bytes_per_element*num_elements; + } +}; + +void check_memory_usage(vector &usages, scalar limit_gb, integer nPoints, integer nDims, integer MaxPossibleClusters); + #endif From ff4ccecb3ae38f2c863ceddb61785a2dd741a4b8 Mon Sep 17 00:00:00 2001 From: thesamovar Date: Wed, 14 Jan 2015 09:59:40 +0000 Subject: [PATCH 4/4] Slightly clearer logging of memory issues --- memorytracking.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/memorytracking.cpp b/memorytracking.cpp index 88f3a28..3b7404a 100644 --- a/memorytracking.cpp +++ b/memorytracking.cpp @@ -71,19 +71,20 @@ void check_memory_usage(vector &usages, scalar limit_gb, integer nP double max_usage = m.max_multiplier*base_usage; total_min += min_usage; total_max += max_usage; - Output("Array %s uses %lld bytes per element (%s) and has %lld elements when full. Total usage will be between %.2f and %.2f GB. Memory usage scales as %s.\n", - m.name_of_array, m.bytes_per_element, m.name_of_type, m.num_elements, min_usage, max_usage, m.expr); + Output("Array %s will use between %.2f and %.2f GB.\n", m.name_of_array, min_usage, max_usage); + Output("- %lld bytes per element (%s), %lld elements when full.\n", m.bytes_per_element, m.name_of_type, m.num_elements); + Output("- Memory usage scales as %s.\n", m.expr); } Output("\nNote that nPoints=%d, nDims=%d, MaxPossibleClusters=%d", (int)nPoints, (int)nDims, (int)MaxPossibleClusters); Output("\nTotal memory usage will be between %.2f and %.2f GB.\n", total_min, total_max); Output("RAM limit is set at %.2f GB.\n", limit_gb); if (total_min > limit_gb) { - Error("The RAM limit does not cover the minimum possible memory usage: KlustaKwik definitely cannot run this.\n"); + Error("The RAM limit does not cover the minimum possible memory usage.\nKlustaKwik definitely cannot run this.\nOptions include: buying more RAM; reducing the key parameters above.\n"); exit(EXIT_FAILURE); } else if(total_max > limit_gb) { - Error("The RAM limit covers the minimum but not maximum possible memory usage, so it cannot be guaranteed not to crash. Call KlustaKwik with -RamLimitGB -1 to force it to run.\n"); + Error("The RAM limit covers the minimum but not maximum possible memory usage, so it\ncannot be guaranteed not to crash. Call KlustaKwik with -RamLimitGB -1 to\nforce it to run.\n"); exit(EXIT_FAILURE); } }