From fa1e9ea7942a2e5e1f29725b444f7f1546cbfe4f Mon Sep 17 00:00:00 2001 From: Trevor Keller Date: Tue, 11 Jun 2024 15:36:13 -0400 Subject: [PATCH 1/3] Use argparse to add --output flag to plotting script --- episodes/06-expansion.md | 7 +-- episodes/files/plot_terse_amdahl_results.py | 50 +++++++++++++++------ 2 files changed, 40 insertions(+), 17 deletions(-) diff --git a/episodes/06-expansion.md b/episodes/06-expansion.md index c6dba6a..1d1ec6d 100644 --- a/episodes/06-expansion.md +++ b/episodes/06-expansion.md @@ -119,7 +119,7 @@ curl -O https://ocaisa.github.io/hpc-workflows/files/plot_terse_amdahl_results.p The script `plot_terse_amdahl_results.py` needs a command line that looks like: ```bash -python plot_terse_amdahl_results.py <1st input file> <2nd input file> ... +python plot_terse_amdahl_results.py --output <1st input file> <2nd input file> ... ``` Let's introduce that into our `generate_run_files` rule: @@ -129,7 +129,7 @@ rule generate_run_files: output: "p_{parallel_proportion}_runs.txt" input: expand("p_{{parallel_proportion}}/runs/amdahl_run_{count}.json", count=NTASK_SIZES) shell: - "python plot_terse_amdahl_results.py {output} {input}" + "python plot_terse_amdahl_results.py --output {output} {input}" ``` ::: challenge @@ -146,7 +146,7 @@ rule generate_run_files: envmodules: "matplotlib" shell: - "python plot_terse_amdahl_results.py {output} {input}" + "python plot_terse_amdahl_results.py --output {output} {input}" ``` :::::: @@ -188,6 +188,7 @@ snakemake --profile cluster_profile/ p_0.8_scalability.jpg ::: ::: challenge + ## Bonus round Create a final rule that can be called directly and generates a scaling plot for diff --git a/episodes/files/plot_terse_amdahl_results.py b/episodes/files/plot_terse_amdahl_results.py index a85425f..08b4cc8 100644 --- a/episodes/files/plot_terse_amdahl_results.py +++ b/episodes/files/plot_terse_amdahl_results.py @@ -1,13 +1,22 @@ -import sys +#!/usr/bin/env python3 +import argparse import json +import matplotlib import matplotlib.pyplot as plt import numpy as np -def process_files(file_list, output="plot.jpg"): +matplotlib.use('AGG') + +description = """ +Plot results of an Amdahl scaling study, +assuming the '--terse' output flag was used. +""" + +def process_files(output, file_list): value_tuples=[] for filename in file_list: # Open the JSON file and load data - with open(filename, 'r') as file: + with open(str(filename), 'r') as file: data = json.load(file) value_tuples.append((data['nproc'], data['execution_time'])) @@ -22,9 +31,10 @@ def process_files(file_list, output="plot.jpg"): # Adding the y=1/x line x_line = np.linspace(1, max(x), 100) # Create x values for the line - y_line = (y[0]/x[0]) / x_line # Calculate corresponding (scaled) y values + y_line = (y[0] / x[0]) / x_line # Calculate corresponding (scaled) y values - plt.plot(x_line, y_line, linestyle='--', color='red', label='Perfect scaling') + plt.plot(x_line, y_line, linestyle='--', + color='red', label='Perfect scaling') # Adding title and labels plt.title("Scaling plot") @@ -34,16 +44,28 @@ def process_files(file_list, output="plot.jpg"): # Show the legend plt.legend() - # Save the plot to a JPEG file - plt.savefig(output, format='jpeg') + # Save the plot to the specified file + plt.savefig(output, dpi=400, bbox_inches="tight") if __name__ == "__main__": - # The first command-line argument is the script name itself, so we skip it - output = sys.argv[1] - filenames = sys.argv[2:] + parser = argparse.ArgumentParser( + description=description, + epilog="Brought to you by HPC Carpentry" + ) + + parser.add_argument( + "--output", + default="scaling-study.png", + help="Image file to write (PNG or JPG)", + required=True + ) + + parser.add_argument( + "inputs", + help="Amdahl terse output files (JSON)", + nargs="+" + ) - if filenames: - process_files(filenames, output=output) - else: - print("No files provided.") + args = parser.parse_args() + process_files(args.output, args.inputs) From ee04732157d3cac4165f7d03c44d4f7c6dca06ac Mon Sep 17 00:00:00 2001 From: Trevor Keller Date: Wed, 12 Jun 2024 13:46:05 -0400 Subject: [PATCH 2/3] Update episodes/files/plot_terse_amdahl_results.py Incorporate @ocaisa's file error handling. Co-authored-by: ocaisa --- episodes/files/plot_terse_amdahl_results.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/episodes/files/plot_terse_amdahl_results.py b/episodes/files/plot_terse_amdahl_results.py index 08b4cc8..f5d1b7b 100644 --- a/episodes/files/plot_terse_amdahl_results.py +++ b/episodes/files/plot_terse_amdahl_results.py @@ -16,9 +16,19 @@ def process_files(output, file_list): value_tuples=[] for filename in file_list: # Open the JSON file and load data - with open(str(filename), 'r') as file: - data = json.load(file) - value_tuples.append((data['nproc'], data['execution_time'])) + try: + with open(filename, 'r') as file: + data = json.load(file) + value_tuples.append((data['nproc'], data['execution_time'])) + except FileNotFoundError: + print(f"Error: File {filename} not found.") + return + except json.JSONDecodeError: + print(f"Error: File {filename} is not a valid JSON.") + return + except KeyError: + print(f"Error: Missing required data in file {filename}.") + return # Sort the tuples sorted_list = sorted(value_tuples) From f8d1da5af682ca31d188d2e3553245e9ff780580 Mon Sep 17 00:00:00 2001 From: Trevor Keller Date: Wed, 12 Jun 2024 13:53:30 -0400 Subject: [PATCH 3/3] Remove default image filename --- episodes/files/plot_terse_amdahl_results.py | 1 - 1 file changed, 1 deletion(-) mode change 100644 => 100755 episodes/files/plot_terse_amdahl_results.py diff --git a/episodes/files/plot_terse_amdahl_results.py b/episodes/files/plot_terse_amdahl_results.py old mode 100644 new mode 100755 index f5d1b7b..fdb09bb --- a/episodes/files/plot_terse_amdahl_results.py +++ b/episodes/files/plot_terse_amdahl_results.py @@ -65,7 +65,6 @@ def process_files(output, file_list): parser.add_argument( "--output", - default="scaling-study.png", help="Image file to write (PNG or JPG)", required=True )