Update plot-ig-builder-auto.py

Added handling for gaps in data (and for when additional IGs are added later on), which introduced need for the pandas library dependency to use DataFrame.
HL7 · Sep 30, 2024 · 8804a2e · 8804a2e
1 parent 601e475
commit 8804a2e
Showing 1 changed file with 34 additions and 87 deletions.
diff --git a/.azure/plot-ig-builder-auto/plot-ig-builder-auto.py b/.azure/plot-ig-builder-auto/plot-ig-builder-auto.py
@@ -23,166 +23,113 @@
 import sys
 import os
 import numpy as np
-
+import pandas as pd
 
 # Function to parse and sort version numbers
 def parse_version(version):
- # Split version into major, minor, and patch, and convert them to integers
  try:
- # Original code is now in the 'try' block, indented.
  major, minor, patch = map(int, version.split('.'))
  return major, minor, patch
- except ValueError: # Handling non-integer splits
+ except ValueError:
  return (0, 0, 0) # Default value for non-version strings
 
 def load_json_data(source):
  if source.startswith('http://') or source.startswith('https://'):
- # Fetch the JSON data from a URL
  response = requests.get(source)
- response.raise_for_status() # Raise an exception if the request failed
+ response.raise_for_status()
  data = response.json()
  else:
- # Load the JSON data from a local file
  with open(source, 'r') as file:
  data = json.load(file)
  return data
 
+def calculate_dynamic_width(versions, base_width, additional_width_per_version, max_width):
+ num_versions = len(versions)
+ if num_versions <= 10:
+ return base_width
+ else:
+ additional_width = (num_versions - 10) * additional_width_per_version
+ return min(base_width + additional_width, max_width)
+
 def main(source):
  data = load_json_data(source)
 
- # Prepare data for visualization
- build_times = {} # Structure to hold the build times
-
- # Extracting the keys, which represent version numbers
- version_keys = list(data.keys())
- version_keys = [key for key in version_keys if key[0].isdigit()]
+ build_times = {}
 
- # Sorting the version numbers
+ version_keys = [key for key in data.keys() if key[0].isdigit()]
  sorted_versions = sorted(version_keys, key=parse_version)
-
- # The latest version is the last one in the sorted list
  latest_version = sorted_versions[-1]
 
- # Construct the filename using the version number
  filename = f"{latest_version}.png"
 
- # Process the JSON data
  for version, guides in data.items():
  if version == 'format-version':
- continue # Skip the 'format-version' entry
-
+ continue
  for guide, stats in guides.items():
  if guide in ['sync-date', 'date']:
- continue # Skip non-guide entries
-
+ continue
  guide_name = guide
- time = stats.get('time', 0) / 1000.0 # Convert milliseconds to seconds
+ time = stats.get('time', 0) / 1000.0 # Convert to seconds
 
  if guide_name not in build_times:
  build_times[guide_name] = {}
  build_times[guide_name][version] = time
 
- # Determine the number of unique guides to plot
- num_guides = len(build_times)
-
- # Define the colormaps
- # More on colormaps: https://matplotlib.org/stable/gallery/color/colormap_reference.html
- cmap1 = plt.get_cmap('tab20', 20) # This map has 20 distinct colors
- cmap2 = plt.get_cmap('tab20b', 20) # This map has 20 distinct colors too
- cmap3 = plt.get_cmap('tab20c', 20)
+ # Convert to DataFrame and replace 0 with NaN to show gaps instead of 0s
+ build_times_df = pd.DataFrame(build_times).replace(0, pd.NA)
 
- # Initialize an empty list to store the colors
+ # Define colormaps
+ cmap1 = plt.get_cmap('tab20', 20)
+ cmap2 = plt.get_cmap('tab20b', 20)
  combined_colors = []
 
- # Function to add colors to the list from a given colormap
  def add_colors_from_cmap(cmap, num_colors, color_list):
  for i in range(num_colors):
  color_list.append(cmap(i))
 
- # Add colors from each colormap to the combined list
  add_colors_from_cmap(cmap1, 20, combined_colors)
  add_colors_from_cmap(cmap2, 20, combined_colors)
- #add_colors_from_cmap(cmap3, 20, combined_colors)
 
- # Create the visualization
  color_index = 0
-
- # Assuming 'build_times' is a dictionary where keys are guide names and values are dictionaries
- # of version: build_time pairs.
- # Start by collecting all timings and labels
+ handles = []
  timing_label_pairs = []
 
- for guide, times in build_times.items():
- # Extract the total build time for the current guide
- total_build_time = sum(times.values())
- # Append the total build time and the guide label to the list as a tuple
+ for guide in build_times_df.columns:
+ total_build_time = build_times_df[guide].sum(skipna=True)
  timing_label_pairs.append((total_build_time, guide))
 
- # Sort the list by timings in descending order
  timing_label_pairs.sort(reverse=True, key=lambda x: x[0])
 
- # Now we plot in the sorted order and collect handles for the legend
- handles = []
  for total_build_time, guide in timing_label_pairs:
- times = build_times[guide]
- sorted_items = sorted(times.items())
- versions = [item[0] for item in sorted_items]
- timings = [item[1] for item in sorted_items]
-
- # Use the next color in the color list
- handle, = plt.plot(versions, timings, marker='o', label=guide, color=combined_colors[color_index % len(combined_colors)])
+ guide_times = build_times_df[guide]
+ handle, = plt.plot(guide_times.index, guide_times, marker='o', label=guide, color=combined_colors[color_index % len(combined_colors)])
  handles.append(handle)
  color_index += 1
 
- # Update the legend with the sorted handles
  plt.legend(handles=handles, bbox_to_anchor=(1.05, 1), loc='upper left')
-
- plt.ylabel('Build Time (seconds)') # Update label to reflect new units
+ plt.ylabel('Build Time (seconds)')
  plt.xlabel('Version')
  plt.title('Build Time for each Implementation Guide by Version')
-
- # Set x-axis ticks to correspond to the actual versions present in the data
  plt.xticks(ticks=np.arange(len(sorted_versions)), labels=sorted_versions, rotation=90, fontsize=8)
 
- # Assume 'sorted_versions' is the list of version strings from the JSON data
- base_width = 8 # Base width for up to 10 versions
- additional_width_per_version = 0.2 # Additional width for each version above 10
- max_reasonable_width = 30 # Maximum width to keep the plot reasonable
- fixed_height = 5 # Fixed height in inches
-
- # Calculate the dynamic width based on the number of versions
- dynamic_width = calculate_dynamic_width(sorted_versions, base_width, additional_width_per_version, max_reasonable_width)
-
- # Set the dynamic figure size
- plt.gcf().set_size_inches(dynamic_width, fixed_height)
+ # Calculate dynamic width based on the number of versions
+ dynamic_width = calculate_dynamic_width(sorted_versions, base_width=8, additional_width_per_version=0.2, max_width=30)
+ plt.gcf().set_size_inches(dynamic_width, 5)
  plt.tight_layout()
 
- # Save the figure
+ # Save the plot
  plt.savefig(args.output)
- # plt.show()
-
- plt.close(args.output)
-
-def calculate_dynamic_width(versions, base_width, additional_width_per_version, max_width):
- num_versions = len(versions)
- if num_versions <= 10:
- return base_width
- else:
- additional_width = (num_versions - 10) * additional_width_per_version
- return min(base_width + additional_width, max_width)
+ plt.close()
 
 if __name__ == "__main__":
- # Set up the command-line argument parser
  parser = argparse.ArgumentParser(description='Visualize FHIR IG Publisher build times.')
  parser.add_argument('--source', type=str, help='The path or URL to the JSON data source')
- parser.add_argument('-o', '--output', type=str, help='Output filename with path', default='../data/publisher-build-time-trends/latest-version.png') # You can change the default to any relevant path or filename.
+ parser.add_argument('-o', '--output', type=str, help='Output filename with path', default='../data/publisher-build-time-trends/latest-version.png')
 
- # Parse the arguments
  args = parser.parse_args()
  args.source = args.source if args.source else 'https://raw.githubusercontent.com/HL7/fhir-ig-publisher/master/test-statistics.json'
 
  try:
  main(args.source)
  except Exception as e:
- print(f"Error: {str(e)}", file=sys.stderr)
- args.source = args.source if (args.source is not None) else 'https://raw.githubusercontent.com/HL7/fhir-ig-publisher/master/test-statistics.json'
+ print(f"Error: {str(e)}", file=sys.stderr)