Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new memory runLog info to memoryProfiler.py #1970

Merged
merged 9 commits into from
Oct 25, 2024
Merged
45 changes: 45 additions & 0 deletions armi/bookkeeping/memoryProfiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@
https://pythonhosted.org/psutil/
https://docs.python.org/3/library/gc.html#gc.garbage
"""
from os import cpu_count
from typing import Optional
import gc
import sys
from math import floor
albeanth marked this conversation as resolved.
Show resolved Hide resolved

from armi import context
from armi import interfaces
Expand Down Expand Up @@ -68,6 +70,40 @@ def describeInterfaces(cs):
return (MemoryProfiler, {})


def getTotalJobMemory(nTasksPerNode):
"""Function to calculate the total memory of a job. This is a constant during a simulation."""
cpuPerNode = cpu_count()
ramPerCpuGB = psutil.virtual_memory().total / (1024**3) / cpuPerNode
if nTasksPerNode == 0:
nTasksPerNode = cpuPerNode
cpusPerTask = floor(cpuPerNode / nTasksPerNode)
jobMem = nTasksPerNode * cpusPerTask * ramPerCpuGB
return jobMem


def getCurrentMemoryUsage():
"""This scavenges the memory profiler in ARMI to get the current memory usage."""
memUsageAction = PrintSystemMemoryUsageAction()
memUsageAction.broadcast()
smpu = SystemAndProcessMemoryUsage()
memUsages = memUsageAction.gather(smpu)
# Grab virtual memory instead of physical. There is a large discrepancy, we will be conservative
memoryUsageInMB = sum([mu.processVirtualMemoryInMB for mu in memUsages])
return memoryUsageInMB


def printCurrentMemoryState(nTasksPerNode):
"""Print the current memory footprint and available memory."""
totalMemoryInGB = getTotalJobMemory(nTasksPerNode)
currentMemoryUsageInGB = getCurrentMemoryUsage() / 1024
availableMemoryInGB = totalMemoryInGB - currentMemoryUsageInGB
runLog.info(
f"Currently using {currentMemoryUsageInGB} GB of memory. "
f"There is {availableMemoryInGB} GB of memory left. "
f"There is a total allocation of {totalMemoryInGB} GB."
)


class MemoryProfiler(interfaces.Interface):

name = "memoryProfiler"
Expand All @@ -78,6 +114,11 @@ def __init__(self, r, cs):

def interactBOL(self):
interfaces.Interface.interactBOL(self)

# prints the bottom line memory state...
# the code after this is somewhat confusing to interpret
printCurrentMemoryState(self.cs["mpiTasksPerNode"])
albeanth marked this conversation as resolved.
Show resolved Hide resolved

mpiAction = PrintSystemMemoryUsageAction()
mpiAction.broadcast().invoke(self.o, self.r, self.cs)
mpiAction.printUsage("BOL SYS_MEM")
Expand All @@ -88,6 +129,10 @@ def interactBOL(self):
mpiAction.broadcast().invoke(self.o, self.r, self.cs)

def interactEveryNode(self, cycle, node):
# prints the bottom line memory state...
# the code after this is somewhat confusing to interpret
printCurrentMemoryState(self.cs["mpiTasksPerNode"])

mp = PrintSystemMemoryUsageAction()
mp.broadcast()
mp.invoke(self.o, self.r, self.cs)
Expand Down
61 changes: 61 additions & 0 deletions armi/bookkeeping/tests/test_memoryProfiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,17 @@
# limitations under the License.

"""Tests for memoryProfiler."""
from unittest.mock import MagicMock, patch
import logging
import unittest

from armi import runLog
from armi.bookkeeping import memoryProfiler
from armi.bookkeeping.memoryProfiler import (
getCurrentMemoryUsage,
getTotalJobMemory,
printCurrentMemoryState,
)
from armi.reactor.tests import test_reactors
from armi.tests import mockRunLogs, TEST_ROOT

Expand Down Expand Up @@ -123,6 +129,61 @@ def test_profileMemoryUsageAction(self):
pmua = memoryProfiler.ProfileMemoryUsageAction("timeDesc")
self.assertEqual(pmua.timeDescription, "timeDesc")

@patch("psutil.virtual_memory")
@patch("armi.bookkeeping.memoryProfiler.cpu_count")
def test_getTotalJobMemory(self, mockCpuCount, mockVMem):
"""Use an example node with 50 GB of total physical memory and 10 CPUs."""
mockCpuCount.return_value = 10
vMem = MagicMock()
vMem.total = (1024**3) * 50
mockVMem.return_value = vMem

expectedArrangement = {0: 50, 1: 50, 2: 50, 3: 45, 4: 40, 5: 50}
for nTasksPerNode, jobMemory in expectedArrangement.items():
self.assertEqual(getTotalJobMemory(nTasksPerNode), jobMemory)

@patch("armi.bookkeeping.memoryProfiler.PrintSystemMemoryUsageAction")
@patch("armi.bookkeeping.memoryProfiler.SystemAndProcessMemoryUsage")
def test_getCurrentMemoryUsage(
self, mockSysAndProcMemUse, mockPrintSysMemUseAction
):
"""Mock the memory usage across 3 different processes and that the total usage is as expected (6 MB)."""
self._setMemUseMock(mockPrintSysMemUseAction)
self.assertAlmostEqual(getCurrentMemoryUsage(), 6 * 1024)

@patch("armi.bookkeeping.memoryProfiler.PrintSystemMemoryUsageAction")
@patch("armi.bookkeeping.memoryProfiler.SystemAndProcessMemoryUsage")
@patch("psutil.virtual_memory")
@patch("armi.bookkeeping.memoryProfiler.cpu_count")
def test_printCurrentMemoryState(
self, mockCpuCount, mockVMem, mock1, mockPrintSysMemUseAction
):
"""Use an example node with 50 GB of total physical memory and 10 CPUs while using 6 GB."""
mockCpuCount.return_value = 10
vMem = MagicMock()
vMem.total = (1024**3) * 50
mockVMem.return_value = vMem
self._setMemUseMock(mockPrintSysMemUseAction)

with mockRunLogs.BufferLog() as mock:
printCurrentMemoryState(2)
stdOut = mock.getStdout()
self.assertIn("Currently using 6.0 GB of memory.", stdOut)
self.assertIn("There is 44.0 GB of memory left.", stdOut)
self.assertIn("There is a total allocation of 50.0 GB", stdOut)

def _setMemUseMock(self, mockPrintSysMemUseAction):
class mockMemUse:
def __init__(self, mem: float):
self.processVirtualMemoryInMB = mem

instance = mockPrintSysMemUseAction.return_value
instance.gather.return_value = [
mockMemUse(1 * 1024),
mockMemUse(2 * 1024),
mockMemUse(3 * 1024),
]


class KlassCounterTests(unittest.TestCase):
def get_containers(self):
Expand Down
Loading