Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve child handling #140

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 177 additions & 12 deletions memory_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import inspect
import subprocess
import logging

from collections import defaultdict

# TODO: provide alternative when multiprocessing is not available
try:
Expand Down Expand Up @@ -112,10 +112,10 @@ def _get_child_memory(process, meminfo_attr=None):
# Loop over the child processes and yield their memory
try:
for child in getattr(process, children_attr)(recursive=True):
yield getattr(child, meminfo_attr)()[0] / _TWO_20
yield child.pid, getattr(child, meminfo_attr)()[0] / _TWO_20
except psutil.NoSuchProcess:
# https://github.com/fabianp/memory_profiler/issues/71
yield 0.0
yield (0,0.0) # need to yield a tuple


def _get_memory(pid, backend, timestamps=False, include_children=False, filename=None):
Expand Down Expand Up @@ -143,7 +143,7 @@ def ps_util_tool():
else 'get_memory_info'
mem = getattr(process, meminfo_attr)()[0] / _TWO_20
if include_children:
mem += sum(_get_child_memory(process, meminfo_attr))
mem += sum((mem for (pid,mem) in _get_child_memory(process, meminfo_attr)))
if timestamps:
return mem, time.time()
else:
Expand Down Expand Up @@ -355,14 +355,14 @@ def memory_usage(proc=-1, interval=.1, timeout=None, timestamps=False,

# Write children to the stream file
if multiprocess:
for idx, chldmem in enumerate(_get_child_memory(proc.pid)):
stream.write("CHLD {0} {1:.6f} {2:.4f}\n".format(idx, chldmem, time.time()))
for chldpid, chldmem in _get_child_memory(proc.pid):
stream.write("CHLD {0} {1:.6f} {2:.4f}\n".format(chldpid, chldmem, time.time()))
else:
# Create a nested list with the child memory
if multiprocess:
mem_usage = [mem_usage]
for chldmem in _get_child_memory(proc.pid):
mem_usage.append(chldmem)
for chldpid, chldmem in _get_child_memory(proc.pid):
mem_usage.append((chldpid,chldmem))

# Append the memory usage to the return value
ret.append(mem_usage)
Expand Down Expand Up @@ -399,14 +399,14 @@ def memory_usage(proc=-1, interval=.1, timeout=None, timestamps=False,

# Write children to the stream file
if multiprocess:
for idx, chldmem in enumerate(_get_child_memory(proc.pid)):
stream.write("CHLD {0} {1:.6f} {2:.4f}\n".format(idx, chldmem, time.time()))
for child_pid, chldmem in _get_child_memory(proc):
stream.write("CHLD {0} {1:.6f} {2:.4f}\n".format(child_pid, chldmem, time.time()))
else:
# Create a nested list with the child memory
if multiprocess:
mem_usage = [mem_usage]
for chldmem in _get_child_memory(proc.pid):
mem_usage.append(chldmem)
for chldpid, chldmem in _get_child_memory(proc):
mem_usage.append((chldpid,chldmem))

# Append the memory usage to the return value
ret.append(mem_usage)
Expand Down Expand Up @@ -1207,3 +1207,168 @@ def flush(self):
prof.show_results(stream=out_file)
else:
show_results(prof, precision=options.precision, stream=out_file)


### I/O

def read_mprofile_file(filename):
"""Read an mprofile file and return its content.

Returns
-------
content: dict
Keys:

- "mem_usage": (list) memory usage values, in MiB
- "timestamp": (list) time instant for each memory usage value, in
second
- "func_timestamp": (dict) for each function, timestamps and memory
usage upon entering and exiting.
- 'cmd_line': (str) command-line ran for this profile.
"""
func_ts = {}
mem_usage = []
timestamp = []
children = defaultdict(list)
cmd_line = None
f = open(filename, "r")
for l in f:
if l == '\n':
raise ValueError('Sampling time was too short')
field, value = l.split(' ', 1)
if field == "MEM":
# mem, timestamp
values = value.split(' ')
mem_usage.append(float(values[0]))
timestamp.append(float(values[1]))

elif field == "FUNC":
values = value.split(' ')
f_name, mem_start, start, mem_end, end = values[:5]
ts = func_ts.get(f_name, [])
ts.append([float(start), float(end),
float(mem_start), float(mem_end)])
func_ts[f_name] = ts

elif field == "CHLD":
values = value.split(' ')
chldnum = values[0]
children[chldnum].append(
(float(values[1]), float(values[2]))
)

elif field == "CMDLINE":
cmd_line = value
else:
pass
f.close()

return {"mem_usage": mem_usage, "timestamp": timestamp,
"func_timestamp": func_ts, 'filename': filename,
'cmd_line': cmd_line, 'children': children}


def read_mprofile_file_multiprocess(filename):
"""Read an mprofile file and return a mem_usage list

Returns
-------
content: list

This is analogous to the list obtained when the `memory_usage` is used
"""

mem_usage = []
sample = []

f = open(filename,'r')

for i,l in enumerate(f):
if l == '\n':
raise ValueError('Sampling time was too short')
field, value = l.split(' ', 1)
values = value.split(' ')

if field=="MEM":
# append the existing sample and reset to zero
mem_usage.append(sample)
sample = []
sample.append((float(values[0]), float(values[1])))
elif field=="CHLD":
sample.append((int(values[0]), float(values[1])))

f.close()
return mem_usage[1:]


def convert_mem_usage_to_df(filename, is_pickle=False):
"""Convert a `mem_usage` list to a `pandas.DataFrame`

Parameters
----------
filename: path to the memory profile data; can be either a file
created by mprof or a pickle of the result of `memory_usage`

is_pickle: if True, assume the data is the pickled list
returned by `memory_usage`


Returns
-------
content: pandas.DataFrame

Returns a `pandas.DataFrame` with child IDs as columns and the timestamp as an index
"""

try:
import pandas as pd
import numpy as np
except ImportError:
raise ImportError('Pandas and numpy are required for conversion to DataFrame')

if is_pickle:
from cPickle import load
with open(filename) as f:
mem_usage = load(f)

else:
mem_usage = read_mprofile_file_multiprocess(filename)
mem_usage = filter(lambda m: len(m) > 1, mem_usage)

times =[m[0][1] for m in mem_usage]

# flatten list of lists, extract the pids and attach '0' (parent) at the end
pids = np.sort(np.unique([m[0] for n in mem_usage for m in n[1:] if not isinstance(m,float)] + [0,]))

time_lookup = {time: i for i,time in enumerate(times)}
pid_lookup = {pid:i for i,pid in enumerate(pids)}

data = np.zeros((len(times), len(pids)))

for i,m in enumerate(mem_usage):
t = m[0][1]

# add the parent memory by hand
data[time_lookup[t]][pid_lookup[0]] = m[0][0]

for pid,mem in m[1:]:
data[time_lookup[t]][pid_lookup[pid]] = mem

return pd.DataFrame(data, index=times, columns=pids)

def plot_mem_usage(filename, include_parent=True, plot_total=True, is_pickle=False):
import matplotlib.pylab as plt

data_df = convert_mem_usage_to_df(filename, is_pickle)

f = plt.figure(figsize=(10,6))

if not include_parent:
data_df = data_df[data_df.columns[1:]]

data_df.plot(legend=False, figsize=(14,10), grid=True, fontsize=14)

if plot_total:
data_df.sum(axis=1).plot(style='--', grid=True)

plt.xlabel('timestamp'); plt.ylabel('memory usage in MB')
113 changes: 35 additions & 78 deletions mprof
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,17 @@ def run_action():
parser.add_option("--multiprocess", "-M", dest="multiprocess",
default=False, action="store_true",
help="""Monitors forked processes creating individual plots for each child""")
parser.add_option("--pid", "-p", dest="pid",
default=False, action="store_true",
help="""Specify that the argument is a running pid not an executable or script""")
parser.add_option("--timeout", dest="timeout",
default=None, action="store", type=int,
help="""Timeout in seconds""")

(options, args) = parser.parse_args()

if len(args) == 0:
print("A program to run must be provided. Use -h for help")
if (len(args) == 0):
print("A program to run or a pid must be provided. Use -h for help")
sys.exit(1)

print("{1}: Sampling memory every {0.interval}s".format(
Expand All @@ -218,30 +224,36 @@ def run_action():
mprofile_output = "mprofile_%s.dat" % suffix

# .. TODO: more than one script as argument ? ..
if args[0].endswith('.py') and not options.nopython:
if not args[0].startswith("python"):
args.insert(0, "python")
if options.multiprocess:
# in multiprocessing mode you want to spawn a separate
# python process
options.python = False
if options.python:
print("running as a Python program...")
if not args[0].startswith("python"):
args.insert(0, "python")
cmd_line = get_cmd_line(args)
args[1:1] = ("-m", "memory_profiler", "--timestamp",
"-o", mprofile_output)
p = subprocess.Popen(args)
if not options.pid:
if args[0].endswith('.py') and not options.nopython:
if not args[0].startswith("python"):
args.insert(0, "python")
if options.multiprocess:
# in multiprocessing mode you want to spawn a separate
# python process
options.python = False
if options.python:
print("running as a Python program...")
if not args[0].startswith("python"):
args.insert(0, "python")
cmd_line = get_cmd_line(args)
args[1:1] = ("-m", "memory_profiler", "--timestamp",
"-o", mprofile_output)
p = subprocess.Popen(args)
else:
cmd_line = get_cmd_line(args)
p = subprocess.Popen(args)
else:
cmd_line = get_cmd_line(args)
p = subprocess.Popen(args)
p = int(args[0])

with open(mprofile_output, "a") as f:
f.write("CMDLINE {0}\n".format(cmd_line))
if not options.pid:
f.write("CMDLINE {0}\n".format(cmd_line))

mp.memory_usage(proc=p, interval=options.interval, timestamps=True,
include_children=options.include_children,
multiprocess=options.multiprocess, stream=f)
multiprocess=options.multiprocess, stream=f,
timeout=options.timeout)


def add_brackets(xloc, yloc, xshift=0, color="r", label=None, options=None):
Expand Down Expand Up @@ -291,61 +303,6 @@ def add_brackets(xloc, yloc, xshift=0, color="r", label=None, options=None):
## pl.plot(xloc[1], yloc[1], ">"+color, markersize=7)


def read_mprofile_file(filename):
"""Read an mprofile file and return its content.

Returns
=======
content: dict
Keys:

- "mem_usage": (list) memory usage values, in MiB
- "timestamp": (list) time instant for each memory usage value, in
second
- "func_timestamp": (dict) for each function, timestamps and memory
usage upon entering and exiting.
- 'cmd_line': (str) command-line ran for this profile.
"""
func_ts = {}
mem_usage = []
timestamp = []
children = defaultdict(list)
cmd_line = None
f = open(filename, "r")
for l in f:
if l == '\n':
raise ValueError('Sampling time was too short')
field, value = l.split(' ', 1)
if field == "MEM":
# mem, timestamp
values = value.split(' ')
mem_usage.append(float(values[0]))
timestamp.append(float(values[1]))

elif field == "FUNC":
values = value.split(' ')
f_name, mem_start, start, mem_end, end = values[:5]
ts = func_ts.get(f_name, [])
ts.append([float(start), float(end),
float(mem_start), float(mem_end)])
func_ts[f_name] = ts

elif field == "CHLD":
values = value.split(' ')
chldnum = values[0]
children[chldnum].append(
(float(values[1]), float(values[2]))
)

elif field == "CMDLINE":
cmd_line = value
else:
pass
f.close()

return {"mem_usage": mem_usage, "timestamp": timestamp,
"func_timestamp": func_ts, 'filename': filename,
'cmd_line': cmd_line, 'children': children}


def plot_file(filename, index=0, timestamps=True, children=True, options=None):
Expand All @@ -355,7 +312,7 @@ def plot_file(filename, index=0, timestamps=True, children=True, options=None):
print("matplotlib is needed for plotting.")
sys.exit(1)
import numpy as np # pylab requires numpy anyway
mprofile = read_mprofile_file(filename)
mprofile = mp.read_mprofile_file(filename)

if len(mprofile['timestamp']) == 0:
print('** No memory usage values have been found in the profile '
Expand Down Expand Up @@ -413,7 +370,7 @@ def plot_file(filename, index=0, timestamps=True, children=True, options=None):
cmem = np.asarray([item[0] for item in data])

# Plot the line to the figure
pl.plot(cts, cmem, "+-" + mem_line_colors[idx+1 % len(mem_line_colors)],
pl.plot(cts, cmem, "+-" + mem_line_colors[(idx+1) % len(mem_line_colors)],
label="child {}".format(proc))

# Detect the maximal child memory point
Expand Down