Skip to content

Commit

Permalink
add memory usage/estimation quick parser utility (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
shnizzedy authored Feb 25, 2022
2 parents bd83ca2 + b469efe commit e6bdbe4
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 4 deletions.
6 changes: 3 additions & 3 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
=========
Changelog
=========
`Version 0.4.1`
===============
*
`Unreleased`
================================================================================================
* 🧮 Evaluates memory usage for specific nodes from `callback.log` files

`Version 0.4.0: Goodbye Singularity Hub <https://github.com/FCP-INDI/cpac/releases/tag/v0.4.0>`_
================================================================================================
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ setuptools
spython >= 0.0.81
tabulate >= 0.8.6
tornado
websocket-client
websocket-client
rich
93 changes: 93 additions & 0 deletions src/cpac/helpers/cpac_parse_resources.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/usr/bin/env python
'''cpac_parse_resources.py
`cpac_parse resources` is intended to be run outside a C-PAC container
'''

from rich.console import Console
from rich.table import Table

from argparse import ArgumentParser
import pandas as pd
import numpy as np
import json


runti = 'runtime_memory_gb'
estim = 'estimated_memory_gb'

field = {'runtime': runti,
'estimate': estim,
'efficiency': 'efficiency'}


def display(df):
console = Console()

table = Table(show_header=True, header_style="bold magenta")
table.add_column("Task ID", style="dim", width=40)
table.add_column("Memory Used")
table.add_column("Memory Estimated")
table.add_column("Memory Efficiency")

for _, d in df.iterrows():
tmp = list()
tmp += [d['id']]
tmp += [d[runti]]
tmp += [d[estim]]
tmp += ["{0:.2f} %".format(100*d[runti] * 1.0 / d[estim])]

tmp = ["{0:.4f}".format(t) if isinstance(t, float) else str(t)
for t in tmp]
table.add_row(*tmp)
del tmp

console.print(table)


def load_runtime_stats(callback):
with open(callback) as fhandle:
logs = [json.loads(log) for log in fhandle.readlines()]

pruned_logs = []
for log in logs:
if runti not in log.keys():
continue

tmp = {}
for k in ['id', runti, estim]:
tmp[k] = log[k]
tmp['efficiency'] = tmp[runti] / tmp[estim] * 100

pruned_logs += [tmp]
del tmp

return pd.DataFrame.from_dict(pruned_logs)


def query(usage, f, g, c):
order = True if g == 'lowest' else False
usage.sort_values(by=field[f], ascending=order, inplace=True)
usage.reset_index(inplace=True, drop=True)
return usage[0:c]


if __name__ == '__main__':
parser = ArgumentParser(__file__)
parser.add_argument("callback",
help="callback.log file found in the 'log' "
"directory of the specified derivatives path")
parser.add_argument("--filter_field", "-f", action="store",
choices=['runtime', 'estimate', 'efficiency'],
default='efficiency')
parser.add_argument("--filter_group", "-g", action="store",
choices=['lowest', 'highest'], default='lowest')
parser.add_argument("--filter_count", "-n", action="store", type=int,
default=10)

res = parser.parse_args()
usage = load_runtime_stats(res.callback)

filtered_usage = query(usage, res.filter_field, res.filter_group,
res.filter_count)
display(filtered_usage)

0 comments on commit e6bdbe4

Please sign in to comment.