From dd0fc645fb1ff0fde71be3b3652584535a82f7c9 Mon Sep 17 00:00:00 2001
From: Sakib Rahman <rahmans@myumanitoba.ca>
Date: Wed, 18 Dec 2024 11:38:11 -0500
Subject: [PATCH] Plotting script to draw branch sizes

---
 benchmarks/output_branch_size_scan/plot.py | 63 ++++++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 benchmarks/output_branch_size_scan/plot.py

diff --git a/benchmarks/output_branch_size_scan/plot.py b/benchmarks/output_branch_size_scan/plot.py
new file mode 100644
index 0000000..ddfd048
--- /dev/null
+++ b/benchmarks/output_branch_size_scan/plot.py
@@ -0,0 +1,63 @@
+import pandas as pd
+import argparse
+import matplotlib.pyplot as plt
+
+parser = argparse.ArgumentParser(prog='Plot output branch sizes', description='Plot output branch sizes')
+
+parser.add_argument("-c", dest="current_campaign_file", action="store", required=True, help="Enter the current campaign file")
+parser.add_argument("-d", dest="default_file", action="store", required=True, help="Enter the default file")
+                    
+args=parser.parse_args()
+
+
+campaign1=args.current_campaign_file
+campaign2=args.default_file
+
+
+# Load the data from the CSV file
+df1 = pd.read_csv(campaign1+'.txt', header=None)
+df2 = pd.read_csv(campaign2+'.txt', header=None)
+
+# Plot the third column ('Value') against the first column ('Object')
+plt.figure(figsize=(10,6))
+plt.scatter(df1.iloc[:,0], df1.iloc[:,2])
+plt.scatter(df2.iloc[:,0], df2.iloc[:,2])
+
+plt.title("Branch Sizes (Bytes) vs Branch Names")
+
+
+
+
+# Show the figure
+plt.tight_layout()
+plt.yscale('log')
+plt.savefig(campaign1+'_vs_'+campaign2+'.png')
+
+print(df1)
+print(df2)
+
+# Assuming both dataframes have the same structure and the first column is branch name
+# Merge the two dataframes on the branch name (first column)
+merged_df = pd.merge(df1.iloc[:, [0, 2]], df2.iloc[:, [0, 2]], on=df1.columns[0], suffixes=('_' + campaign1, '_' + campaign2))
+
+# Create a new column that calculates the difference between the third columns of the two DataFrames
+merged_df['Difference'] = merged_df.iloc[:, 1] - merged_df.iloc[:, 2]
+
+# Create a new DataFrame with the branch names and the difference
+result_df = merged_df[[df1.columns[0], 'Difference']]
+
+# Display the resulting DataFrame
+print(result_df)
+
+# Sort the DataFrame by the absolute value of the difference in descending order
+sorted_df = result_df.reindex(result_df['Difference'].abs().sort_values(ascending=False).index)
+
+# Pick the top 10 branches with the largest differences
+top_20_branches = sorted_df.head(20)
+
+# Display the top 10 branches
+print(top_20_branches)
+
+
+# Optionally, save it to a new CSV file
+sorted_df.to_csv(f"{campaign1}_vs_{campaign2}_difference.csv", index=False)