Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ashawini27 bspmm histogram #267

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions bin/function_histogram.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/usr/bin/env python3

import sys
import json
import matplotlib.pyplot as plt
import statistics # Import the statistics module for mean and other calculations

def generate_histogram(entry):
functions_dur_counts = {}

for event in entry:
pid = event["pid"]
function_name = event["name"].split('<')[0]
duration = event["dur"]
if pid not in functions_dur_counts:
functions_dur_counts[pid] = {}
functions_dur_counts[pid][function_name] = functions_dur_counts[pid].get(function_name, 0) + duration

return functions_dur_counts

def generate_cumulative_histogram(entry):
functions_cumulative_dur = {}

for event in entry:
pid = event["pid"]
function_name = event["name"].split('::')[-1].split('<')[0]
duration = event["dur"]
if pid not in functions_cumulative_dur:
functions_cumulative_dur[pid] = {}
functions_cumulative_dur[pid][function_name] = functions_cumulative_dur[pid].get(function_name, 0) + duration

return functions_cumulative_dur

def main():
if len(sys.argv) != 2:
print("Usage: python_script.py function_data.json")
return

json_file_path = sys.argv[1]

with open(json_file_path, 'r') as json_file:
json_data = json.load(json_file)

functions_dur_counts = generate_histogram(json_data["traceEvents"])

# Histogram for individual PIDs
for pid, function_dur_count in functions_dur_counts.items():
total_duration = sum(duration for duration in function_dur_count.values()) / 1000 # Calculate total duration in seconds

plt.bar(function_dur_count.keys(), [dur / 1000 for dur in function_dur_count.values()]) # Convert ms to sec
plt.xlabel('Function Name')
plt.ylabel('Total Duration (s)') # Adding unit for time here
plt.title(f'Histogram of Total Function Durations for PID {pid}')
plt.xticks(rotation=45, ha='right')

# Add total duration label to the top of each bar with rotation
for function, duration in function_dur_count.items():
plt.text(function, duration / 1000, f'{duration / 1000:.2f}', rotation=45, ha='center', va='bottom', fontweight='bold') # Convert ms to sec

# Add total duration to the top of the plot
plt.text(0.5, 0.95, f'Total Duration: {total_duration:.2f} s', transform=plt.gca().transAxes, ha='center', fontweight='bold')

plt.tight_layout()

# Save each histogram as a separate PDF file
pdf_filename = f'histogram_pid_{pid}.pdf'
plt.savefig(pdf_filename)
plt.close()

# Histogram for cumulative time taken by PIDs for each function
cumulative_function_dur_count = {}
for function_dur_count in functions_dur_counts.values():
for function, duration in function_dur_count.items():
cumulative_function_dur_count[function] = cumulative_function_dur_count.get(function, 0) + duration

plt.bar(cumulative_function_dur_count.keys(), [dur / 1000 for dur in cumulative_function_dur_count.values()])
plt.xlabel('Function Name')
plt.ylabel('Total Duration (s)')
plt.title('Histogram of Cumulative Function Durations across PIDs')
plt.xticks(rotation=45, ha='right')

# Add total duration label to the top of each bar with rotation
for function, duration in cumulative_function_dur_count.items():
plt.text(function, duration / 1000, f'{duration / 1000:.2f}', rotation=45, ha='center', va='bottom', fontweight='bold') # Convert ms to sec

plt.tight_layout()

# Save the cumulative histogram as a PDF file
pdf_filename = 'cumulative_histogram.pdf'
plt.savefig(pdf_filename)
plt.close()

if __name__ == "__main__":
main()
55 changes: 41 additions & 14 deletions bin/pbt_to_ctf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import time
import pandas
import sys
import statistics
except ModuleNotFoundError:
print("Did not find a system module, use pip to install it")

Expand All @@ -23,13 +24,12 @@ def read_pbt(pbt_files_list):
print('The columns of the DataFrame (or data labels) and their datatypes are:')
print(trace.events.dtypes)


print('the types are:\n', trace.event_types)
print('the streams are:\n', trace.streams)

print('There are ' + str(len(trace.events)) + ' events in this trace', end=' ')
for e in range(len(trace.events)):
print('id===', trace.events.id[e], ' node_id=', trace.events.node_id[e],' stream_id=',trace.events.stream_id[e], 'key=' ,trace.events.key[e],' type=',trace.events.type[e],' b=',trace.events.begin[e],' e=',trace.events.end[e])
print('id===', trace.events.id[e], ' node_id=', trace.events.node_id[e], ' stream_id=', trace.events.stream_id[e], 'key=' ,trace.events.key[e], ' type=', trace.events.type[e], ' b=', trace.events.begin[e], ' e=', trace.events.end[e])

import json
import re
Expand All @@ -40,42 +40,69 @@ def bool(str):
return str.lower() in ["true", "yes", "y", "1", "t"]

def pbt_to_ctf(pbt_files_list, ctf_filename, skip_parsec_events, skip_mpi_events):

ctf_data = {"traceEvents": []}
# Dictionary to store aggregated durations
aggregated_durations = {}

# Initialize lists to store duration values for each name
duration_values = []
ptt_filename = pbt2ptt.convert(pbt_files_list, multiprocess=False)
trace = ptt.from_hdf(ptt_filename)

for e in range(len(trace.events)):
# print('id=',trace.events.id[e],' node_id=',trace.events.node_id[e],' stream_id=',trace.events.stream_id[e],'key=',trace.events.key[e],' type=',trace.events.type[e],' b=',trace.events.begin[e],' e=',trace.events.end[e])
# print('id=', trace.events.id[e], ' node_id=', trace.events.node_id[e], ' stream_id=', trace.events.stream_id[e], 'key=', trace.events.key[e], ' type=', trace.events.type[e], ' b=', trace.events.begin[e], ' e=', trace.events.end[e])
# print('\n')

if(skip_parsec_events == True and trace.event_names[trace.events.type[e]].startswith("PARSEC")):
if skip_parsec_events == True and trace.event_names[trace.events.type[e]].startswith("PARSEC"):
continue
if(skip_mpi_events == True and trace.event_names[trace.events.type[e]].startswith("MPI")):
if skip_mpi_events == True and trace.event_names[trace.events.type[e]].startswith("MPI"):
continue

ctf_event = {}
ctf_event["ph"] = "X" # complete event type
ctf_event["ts"] = 0.001 * trace.events.begin[e] # when we started, in ms
ctf_event["dur"] = 0.001 * (trace.events.end[e] - trace.events.begin[e]) # when we started, in ms
ctf_event["ts"] = 0.001 * trace.events.begin[e] # when we started, in ms
ctf_event["dur"] = 0.001 * (trace.events.end[e] - trace.events.begin[e]) # when we started, in ms
ctf_event["name"] = trace.event_names[trace.events.type[e]]

if trace.events.key[e] != None:
if trace.events.key[e] is not None:
ctf_event["args"] = trace.events.key[e].decode('utf-8').rstrip('\x00')
ctf_event["name"] = trace.event_names[trace.events.type[e]]+"<"+ctf_event["args"]+">"
ctf_event["name"] = trace.event_names[trace.events.type[e]] + "<" + ctf_event["args"] + ">"

ctf_event["pid"] = trace.events.node_id[e]
tid = trace.streams.th_id[trace.events.stream_id[e]]
ctf_event["tid"] = 111111 if math.isnan(tid) else int(tid)

ctf_data["traceEvents"].append(ctf_event)

# Get the index of the first occurrence of '<'
index_of_open_bracket = ctf_event["name"].find('<')
# Extract the substring before '<' and assign it to the name variable
if index_of_open_bracket != -1:
name = ctf_event["name"][:index_of_open_bracket]
duration = ctf_event["dur"]
if name in aggregated_durations:
aggregated_durations[name]["duration"] += duration
aggregated_durations[name]["count"] += 1
else:
# If name doesn't exist, create a new entry
aggregated_durations[name] = {"duration": duration, "count": 1}
# Add duration value to the list
duration_values.append(duration)

# Calculate the mean, median, max, min, and standard deviation for each aggregated duration
for name, data in aggregated_durations.items():
mean_duration = data["duration"] / data["count"]
individual_durations = [ctf_event["dur"] for ctf_event in ctf_data["traceEvents"] if ctf_event["name"].startswith(name)]
median_duration = statistics.median(individual_durations) if len(individual_durations) > 1 else 0.0
max_duration = max(individual_durations)
min_duration = min(individual_durations)
std_deviation = statistics.stdev(individual_durations) if len(individual_durations) > 1 else 0.0
print(f"Name: {name}, Mean: {mean_duration:.2f} μs, Median: {median_duration:.2f} μs, Max: {max_duration:.2f} μs, Min: {min_duration:.2f} μs, Std Deviation: {std_deviation:.2f} μs")

with open(ctf_filename, "w") as chrome_trace:
json.dump(ctf_data, chrome_trace)

if __name__ == "__main__":

pbt_file_prefix = sys.argv[1]
ctf_file_name = sys.argv[2]
skip_parsec_events = True
Expand All @@ -88,11 +115,11 @@ def pbt_to_ctf(pbt_files_list, ctf_filename, skip_parsec_events, skip_mpi_events
skip_mpi_events = bool(sys.argv[4])

# iterate over all files within the directory that start with sys.argv[1]
pbt_files_list=[]
pbt_files_list = []
dirname = os.path.dirname(pbt_file_prefix)
for file in os.listdir(dirname):
file_fullname = os.path.join(dirname,file)
if file_fullname.startswith(pbt_file_prefix) and ".prof-" in file_fullname and file_fullname != ctf_file_name:
file_fullname = os.path.join(dirname, file)
if file_fullname.startswith(pbt_file_prefix) and ".prof" in file_fullname and file_fullname != ctf_file_name:
print("found file ", file_fullname)
pbt_files_list.append(file_fullname)

Expand Down
99 changes: 99 additions & 0 deletions bspmmTraces_Histogram_Analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env python3

import sys
import json
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import numpy as np

def extract_data_from_json(json_data):
entries = []

for event in json_data["traceEvents"]:
pid = event["pid"]
function_name = event["name"].split('<')[0]
duration_ms = event["dur"]

entry_data = {
"pid": pid,
"function_name": function_name,
"duration": duration_ms
}

entries.append(entry_data)

return entries

def generate_histograms(entries, target_pid):
function_histograms = {} # Create a dictionary to store durations for each function

for entry in entries:
pid = entry["pid"]
function_name = entry["function_name"]
duration_ms = entry["duration"] / 1000 # Convert ms to sec

if pid == target_pid:
if function_name not in function_histograms:
function_histograms[function_name] = []

function_histograms[function_name].append(duration_ms)

pdf_filename = f'histograms_for_pid_{target_pid}.pdf'
pdf_pages = PdfPages(pdf_filename)

for function_name, durations in function_histograms.items():
plt.figure(figsize=(8, 6)) # Adjust figure size for better visibility
n, bins, patches = plt.hist(durations, bins=10, range=(0, max(durations))) # Create a histogram
plt.xlabel('Duration (s)')
plt.ylabel('Frequency')
plt.title(f'Histogram of Durations for Function {function_name} (PID {target_pid})')

# Add labels for each bar in the histogram
for i, patch in enumerate(patches):
x = patch.get_x() + patch.get_width() / 2
y = patch.get_height()
label = f'{durations[i]:.2f}s' # Label with exact duration value
plt.annotate(label, (x, y), xytext=(0, 5), textcoords='offset points', ha='center')

pdf_pages.savefig()
plt.close()

# Compute and print average and standard deviation
avg_duration = np.mean(durations)
std_duration = np.std(durations)
print(f"Function: {function_name} (PID {target_pid})")
print(f"Average Duration: {avg_duration:.2f} s")
print(f"Standard Deviation: {std_duration:.2f} s")
print("")

pdf_pages.close()

def main():
if len(sys.argv) != 3:
print("Usage: python_script.py function_data.json pid_value")
return

json_file_path = sys.argv[1]

try:
target_pid = int(sys.argv[2])
except ValueError:
print("Error: Invalid pid_value. Please enter a valid integer.")
return

with open(json_file_path, 'r') as json_file:
json_data = json.load(json_file)

entries = extract_data_from_json(json_data)

# Check if the target_pid exists in the data
pids = set(entry["pid"] for entry in entries)
if target_pid not in pids:
print(f"Error: PID {target_pid} not found in the data.")
return

generate_histograms(entries, target_pid)

if __name__ == "__main__":
main()

Loading
Loading