generated from FLAMEGPU/FLAMEGPU2-model-template-cpp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
plot.py
536 lines (440 loc) · 18 KB
/
plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
#! /usr/bin/env python3
import os
import sys
import argparse
import pathlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
from dataclasses import dataclass
# Maximum DPI
MAX_SANE_DPI = 1000
# Default DPI
DEFAULT_DPI = 96
LEGEND_BORDER_PAD = 0.5
# Size of figures in inches
FIGSIZE_INCHES = (16, 9)
# CSV files that should be preset in the input directory.
EXPECTED_CSV_FILES=[
"performance_scaling.csv"
]
# input cols for per step per sim
# GPU,release_mode,seatbelts_on,model,steps,agent_count,env_width,comm_radius,repeat,agent_density,step,s_step
# Input cols for per sim.
# GPU,release_mode,seatbelts_on,model,steps,agent_count,env_width,comm_radius,repeat,agent_density,mean_message_count,s_rtc,s_simulation,s_init,s_exit,s_sim_mean
# input csv columns which identify a row as a duplicate of another repetition for aggregation, for per-step per-sim csvs
GROUP_BY_COLUMNS_PER_STEP_PER_SIM = ['pop_size', 'grid_width']
# input csv columns which identify a row as a duplicate of another repetition for aggregation, for per-sim csv
GROUP_BY_COLUMNS_PER_SIM = ['pop_size', 'grid_width']
# Aggregate operations to apply across grouped csv rows, for the per-step per-sim csvs
AGGREGATIONS_PER_STEP_PER_SIM = {
's_step_mean' : ['mean']
}
# Aggregate operations to apply across grouped csv rows, for the per-sim csvs
AGGREGATIONS_PER_SIM = {
's_step_mean' : ['mean']
}
def cli():
parser = argparse.ArgumentParser(description="Python script to generate figures from csv files")
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="increase verbosity of output"
)
parser.add_argument(
"-f",
"--force",
action="store_true",
help="Force overwriting of files (surpress user confirmation)"
)
parser.add_argument(
"-o",
"--output-dir",
type=str,
help="directory to output figures into."
)
parser.add_argument(
"--dpi",
type=int,
help="DPI for output file",
default=DEFAULT_DPI
)
parser.add_argument(
"-s",
"--show",
action="store_true",
help="Show the plot(s)"
)
parser.add_argument(
"input_dir",
type=str,
help="Input directory, containing the 4 expected input csv files",
default="."
)
args = parser.parse_args()
return args
def validate_args(args):
valid = True
# If output_dir is passed, create it, error if can't create it.
if args.output_dir is not None:
p = pathlib.Path(args.output_dir)
try:
p.mkdir(exist_ok=True, parents=True)
except Exception as e:
print(f"Error: Could not create output directory {p}: {e}")
valid = False
# DPI must be positive, and add a max.
if args.dpi is not None:
if args.dpi < 1:
print(f"Error: --dpi must be a positive value. {args.dpi}")
valid = False
if args.dpi > MAX_SANE_DPI:
print(f"Error: --dpi should not be excessively large. {args.dpi} > {MAX_SANE_DPI}")
valid = False
# Ensure that the input directory exists, and that all required inputs are present.
input_dir = pathlib.Path(args.input_dir)
if input_dir.is_dir():
missing_files = []
for csv_filename in EXPECTED_CSV_FILES:
csv_path = input_dir / csv_filename
if not csv_path.is_file():
missing_files.append(csv_filename)
valid = False
if len(missing_files) > 0:
print(f"Error: {input_dir} does not contain required files:")
for missing_file in missing_files:
print(f" {missing_file}")
else:
print(f"Error: Invalid input_dir provided {args.input_dir}")
valid = False
return valid
def load_inputs(input_dir):
dfs = {}
input_dir = pathlib.Path(input_dir)
for csv_name in EXPECTED_CSV_FILES:
csv_path = input_dir / csv_name
# Read in the csv
df = pd.read_csv(csv_path, sep=',', quotechar='"')
# Strip any whitespace from column names
df.columns = df.columns.str.strip()
# @todo - validate that the expected columns are available.
dfs[csv_name] = df
return dfs
def process_data(input_dataframes, verbose):
output_dataframes = {}
for csv_name, input_df in input_dataframes.items():
if verbose:
print(f"processing {csv_name}")
if verbose:
print(f"input columns:")
for column in input_df.columns:
print(f" {column}")
# If its a per-step file, use one set of operations, otherwise us a different set of operations.
csv_is_per_step = "perStep" in csv_name
# Columns to group data by - i.e. identify repetitions of a single run
group_by_columns = GROUP_BY_COLUMNS_PER_STEP_PER_SIM if csv_is_per_step else GROUP_BY_COLUMNS_PER_SIM
# fetch the appropriate list of aggregate operations to apply.
aggregations = AGGREGATIONS_PER_STEP_PER_SIM if csv_is_per_step else AGGREGATIONS_PER_SIM
# New names for each aggregated column, by flattening the dict of lists.
new_column_labels = [f"{op}_{col}" for col, ops in aggregations.items() for op in ops]
# Get the aggregated data
if (aggregations):
grouped_df = input_df.groupby(by=group_by_columns).agg(aggregations)
# Apply the new column names
grouped_df.columns = new_column_labels
# Reset the index,
grouped_df = grouped_df.reset_index()
if verbose:
print(f"output columns:")
for column in grouped_df.columns:
print(f" {column}")
else:
grouped_df = input_df
# Store the processed dataframe.
output_dataframes[csv_name] = grouped_df
return output_dataframes
def store_processed_data(input_dataframes, processed_dataframes, output_dir, force, verbose):
success = True
# If the output_dir is not none, save each processed csv to disk.
if output_dir is not None:
output_dir = pathlib.Path(output_dir)
for csv_name, processed_df in processed_dataframes.items():
output_csv_path = output_dir / f"processed_{csv_name}"
if not output_csv_path.exists() or force:
try:
if verbose:
print(f"Writing to {output_csv_path}")
processed_df.to_csv(output_csv_path, sep=",", header=True, index=False, quotechar='"', float_format='%.3f')
except Exception as e:
print(f"Error: could not write to {output_csv_path} with exception {e}")
success = False
else:
print(f"Error: {output_csv_path} already exists. Use `-f/--force` to overwrite.")
success = False
# Print out some key values to stdout.
# @todo - write to a summary txt file in the output dir, and / or stdout.
for csv_name, input_df in input_dataframes.items():
# Get the max rtc time from the input file, and also output the mean too for good measure.
# @todo - might be better to have a threshold cutoff?
if "s_rtc" in input_df:
max_s_rtc = input_df["s_rtc"].max()
mean_s_rtc = input_df["s_rtc"].mean()
print(f"{csv_name}: max_s_rtc {max_s_rtc:.3f}, mean_s_rtc {mean_s_rtc:.3f}")
return success
MANUAL_PRETTY_CSV_KEY_MAP = {
"step": "Step",
"GPU": "GPU",
"release_mode": "Release Mode",
"seatbelts_on": "Seatbelts On",
"model": "Implementation",
"steps": "Steps",
"agent_count": "Agent Count",
"env_width": "Environment Width",
"comm_radius": "Communication Radius",
"repeat": "Repeat",
"agent_density": "Agent Density",
"mean_message_count": "Average Message Count",
"s_rtc": "RTC Time (s)",
"s_simulation": "Simulation Time (s)",
"s_init": "Init Function Time (s)",
"s_exit": "Exit Function Time (s)",
"s_sim_mean": "Average Simulation Time (s)",
"s_step": "Step Time (s)",
"mean_s_rtc": "Average RTC Time (s)",
"mean_s_simulation": "Average Simulation Time (s)",
"mean_s_init": "Average Init Function Time (s)",
"mean_s_exit": "Average Exit Function Time (s)",
"mean_s_sim_mean": "Average Simulation Time (s)",
"mean_s_step_mean": "Average Step Time (s)",
"mean_agent_density": "Agent Density",
"env_volume": "Environment Volume",
"num_species" : "Number of Species",
"pop_size" : "World Grid Size",
"is_concurrent" : "Concurrency Enabled",
"speedup" : "Speedup"
}
def pretty_csv_key(csv_key):
if csv_key is None:
return None
pretty_key = csv_key.replace("_", " ")
if csv_key in MANUAL_PRETTY_CSV_KEY_MAP:
pretty_key = MANUAL_PRETTY_CSV_KEY_MAP[csv_key]
return pretty_key
# Dataclass requires py 3.7, but saves a bunch of effort.
@dataclass
class PlotOptions:
"""Class for options for a single plot"""
xkey: str
ykey: str
huekey: str = None
stylekey: str = None
plot_type: str = "lineplot"
filename: str = None
logx: bool = False
logy: bool = False
minx: int = None
maxx: int = None
miny: int = None
maxy: int = None
# “auto”, “brief”, “full”, or False
sns_legend: str = "full"
legend_outside: bool = True
legend_y_offset: float = -0.00
df_query: str = None
sns_palette: str = "Dark2"
sns_style: str = "darkgrid"
# notebook, talk, paper or poster
sns_context: str = "talk"
def plot(self, df_in, output_prefix, output_dir, dpi, force, show, verbose):
df = df_in
# Set a filename if needed.
if self.filename is None:
self.filename = f"{self.plot_type}--{self.xkey}--{self.ykey}--{self.huekey}--{self.stylekey}"
# Use some seaborn deafault for fontsize etc.
sns.set_context(self.sns_context, rc={"lines.linewidth": 2.5})
# Set the general style.
sns.set_style(self.sns_style)
# Filter the data using pandas queries if required.
if self.df_query is not None and len(self.df_query):
df = df.query(self.df_query)
# If the df is empty, skip.
if df.shape[0] == 0:
print(f"Skipping plot {self.filename} - 0 rows of data")
return False
# Get the number of palette values required.
huecount = len(df[self.huekey].unique()) if self.huekey is not None else 1
# Set palette.
palette = sns.color_palette(self.sns_palette, huecount)
sns.set_palette(palette)
# create a matplotlib figure and axis, for a single plot.
# Use constrained layout for better legend placement.
fig, ax = plt.subplots(constrained_layout=True)
# Set the size of the figure in inches
fig.set_size_inches(FIGSIZE_INCHES[0], FIGSIZE_INCHES[1])
# Generate labels / titles etc.
xlabel = f"{pretty_csv_key(self.xkey)}"
ylabel = f"{pretty_csv_key(self.ykey)}"
huelabel = f"{pretty_csv_key(self.huekey)}"
stylelabel = f"{pretty_csv_key(self.stylekey)}"
hs_label = f"{huelabel} x {stylelabel}" if huelabel != stylelabel else f"{huelabel}"
figtitle = f"{ylabel} vs {xlabel} (hs_label)"
# @todo - validate keys.
# Decide if using internal legend.
external_legend = self.legend_outside
g = None
if self.plot_type == "lineplot":
# plot the data @todo - lineplot vs scatter?
g = sns.lineplot(
data=df,
x=self.xkey,
y=self.ykey,
hue=self.huekey,
style=self.stylekey,
markers=True,
dashes=False,
ax=ax,
# size=6,
legend=self.sns_legend,
palette=palette
)
elif self.plot_type == "scatterplot":
g = sns.scatterplot(
data=df,
x=self.xkey,
y=self.ykey,
hue=self.huekey,
style=self.stylekey,
markers=True,
ax=ax,
# size=6,
legend=self.sns_legend,
palette=palette
)
else:
raise Exception(f"Bad plot_type {self.plot_type}")
# Set a title
# @disabled for now.
# if len(figtitle):
# plt.title(figtitle)
# adjust x axis if required.
ax.set(xlabel=xlabel)
if self.logx:
ax.set(xscale="log")
if self.minx is not None:
ax.set_xlim(left=self.minx)
if self.maxx is not None:
ax.set_xlim(right=self.maxx)
# adjust y axis if required.
ax.set(ylabel=ylabel)
if self.logy:
ax.set(yscale="log")
if self.miny is not None:
ax.set_ylim(bottom=self.miny)
if self.maxy is not None:
ax.set_ylim(top=self.maxy)
# Disable scientific notation on axes
ax.ticklabel_format(useOffset=False, style='plain')
# If there is reason to have a legend, do some extra processing.
if ax.get_legend() is not None:
legend = ax.get_legend()
loc = None
bbox_to_anchor = None
if external_legend:
# Set legend placement if not internal.
loc = "upper left"
# @todo - y offset should be LEGNED_BORDER_PAD trasnformed from font units to bbox.
bbox_to_anchor = (1, 1 - self.legend_y_offset)
# Get the handles and labels for the legend
handles, labels = ax.get_legend_handles_labels()
# Iterate the labels in the legend, looking for the huekey or stylekey as values
# If either were found, replace with the pretty version
found_count = 0
for i, label in enumerate(labels):
if label == self.huekey:
labels[i] = huelabel
found_count += 1
elif label == self.stylekey:
labels[i] = stylelabel
found_count += 1
# If neither were found, set a legend title.
if found_count == 0:
# add an invisble patch with the appropriate label, like how seaborn does if multiple values are provided.
handles.insert(0, mpatches.Rectangle((0,0), 1, 1, fill=False, edgecolor='none', visible=False, label=hs_label))
labels.insert(0, hs_label)
pass
ax.legend(handles=handles, labels=labels, loc=loc, bbox_to_anchor=bbox_to_anchor, borderaxespad=LEGEND_BORDER_PAD)
# if an output directory is provided, save the figure to disk.
if output_dir is not None:
output_dir = pathlib.Path(output_dir)
# Prefix the filename with the experiment prefix.
output_filename = f"{output_prefix}--{self.filename}"
# Get the path for output
output_filepath = output_dir / output_filename
# If the file does not exist, or force is true write the otuput file, otherwise error.
if not output_filepath.exists() or force:
try:
if verbose:
print(f"writing figure to {output_filepath}")
fig.savefig(output_filepath, dpi=dpi, bbox_inches='tight')
except Exception as e:
print(f"Error: could not write to {output_filepath}")
return False
else:
print(f"Error: {output_filepath} already exists. Specify a different `-o/--output-dir` or use `-f/--force`")
return False
# If not outputting, or if the show flag was set, show the plot.
if show: # or output_dir is None:
plt.show()
return True
QUALITATIVE_PALETTE = "Dark2"
SEQUENTIAL_PALETTE = "viridis"
# Define the figures to generate for each input CSV.
PLOTS_PER_CSV={
# No need for sequential colour pallete
"performance_scaling.csv": [
PlotOptions(
filename="submodel_performance_scaling.png",
plot_type="lineplot",
xkey="pop_size",
ykey="mean_s_step_mean",
sns_palette=SEQUENTIAL_PALETTE,
minx=0,
miny=0
)
]
}
def plot_figures(processed_dataframes, output_dir, dpi, force, show, verbose):
# For each processed dataframe
for csv_name, processed_df in processed_dataframes.items():
csv_is_per_step = "perStep" in csv_name
output_prefix = csv_name.split("_")[0]
if csv_is_per_step:
output_prefix = f"{output_prefix}_perStep"
# Get the list of figures to generate, based on the type of csv / the csv name?
if csv_name in PLOTS_PER_CSV:
plots_to_generate = PLOTS_PER_CSV[csv_name]
for plot_options in plots_to_generate:
plotted = plot_options.plot(processed_df, output_prefix, output_dir, dpi, force, show, verbose)
def main():
# @todo - print some key info to stdout to complement the data? i.e. RTC time? This can just be fetched from the input csv.
# Process the cli
args = cli()
# Validate cli
valid_args = validate_args(args)
if not valid_args:
return False
# Load all input dataframes.
input_dataframes = load_inputs(args.input_dir)
# Process the dataframes.
processed_dataframes = process_data(input_dataframes, args.verbose)
# Store the processed dataframes on disk if an output dir is provided and/or print some stuff to console.
store_processed_data(input_dataframes, processed_dataframes, args.output_dir, args.force, args.verbose)
# Plot the figures to disk, or interactively.
plot_figures(processed_dataframes, args.output_dir, args.dpi, args.force, args.show, args.verbose)
# Run the main methood if this was not included as a module
if __name__ == "__main__":
args = cli()
main()