diff --git a/docs/source/conf.py b/docs/source/conf.py index 20a20b5..687db8e 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,42 +1,24 @@ -# # Configuration file for the Sphinx documentation builder. # -# This file does only contain a selection of the most common options. For a -# full list see the documentation: -# http://www.sphinx-doc.org/en/master/config - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -# import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) - +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html # -- Project information ----------------------------------------------------- - -project = 'P3 Analysis Library' -copyright = ' Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others' -author = 'Intel Corporation' - -# The short X.Y version -version = '' -# The full version, including alpha/beta/rc tags -release = '' - +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "P3 Analysis Library" +copyright = ( + " Intel Corporation. Intel, the Intel logo, and other Intel marks are " + + "trademarks of Intel Corporation or its subsidiaries. Other names and " + + "brands may be claimed as the property of others." +) +author = "Intel Corporation" +version = "" +release = "" # -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.napoleon', @@ -45,167 +27,25 @@ 'sphinx.ext.intersphinx', ] -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' - -# The master toctree document. -master_doc = 'index' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = "en" - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. +templates_path = ["_templates"] exclude_patterns = [] -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = None - - # -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = 'furo' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -html_theme_options = {} - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -# Custom sidebar templates, must be a dictionary that maps document names -# to template names. -# -# The default sidebars (for documents that don't match any pattern) are -# defined by theme itself. Builtin themes are using these templates by -# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', -# 'searchbox.html']``. -# -# html_sidebars = {} - +html_theme = "furo" +html_static_path = ["_static"] html_title = "P3 Analysis Library" - -# -- Options for HTMLHelp output --------------------------------------------- - -# Output file base name for HTML help builder. -htmlhelp_basename = 'p3-analysis-library-doc' - - -# -- Options for LaTeX output ------------------------------------------------ - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', - - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', - - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', - - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - ( - master_doc, 'p3-analysis-library.tex', - 'P3 Analysis Library Documentation', - 'Intel Corporation', 'manual', - ), -] - - -# -- Options for manual page output ------------------------------------------ - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - ( - master_doc, 'p3-analysis-library', 'P3 Analysis Library Documentation', - [author], 1, - ), -] - - -# -- Options for Texinfo output ---------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - ( - master_doc, 'p3-analysis-library', 'P3 Analysis Library Documentation', - author, 'p3-analysis-library', - 'A library simplifying the collection and interpretation of P3 data.', - 'Miscellaneous', - ), -] - - -# -- Options for Epub output ------------------------------------------------- - -# Bibliographic Dublin Core info. -epub_title = project - -# The unique identifier of the text. This can be a ISBN number -# or the project homepage. -# -# epub_identifier = '' - -# A unique identification for the text. -# -# epub_uid = '' - -# A list of files that should not be packed into the epub file. -epub_exclude_files = ['search.html'] - - -# -- Extension configuration ------------------------------------------------- - -from sphinx_gallery.scrapers import matplotlib_scraper - - -class tight_scraper: - - def __repr__(self): - return self.__class__.__name__ - - def __call__(self, *args, **kwargs): - return matplotlib_scraper(*args, bbox_inches="tight", **kwargs) - sphinx_gallery_conf = { 'examples_dirs': ['../../examples','../../case-studies'], 'gallery_dirs': ['examples','case-studies'], - 'image_scrapers': (tight_scraper(),), + 'run_stale_examples': True, + 'filename_pattern': '/', + 'backreferences_dir' : 'gen_modules/backreferences', } intersphinx_mapping = { "matplotlib": ('https://matplotlib.org/stable/', None), + "pandas": ('https://pandas.pydata.org/pandas-docs/stable/', None), } diff --git a/examples/metrics/README.rst b/examples/metrics/README.rst new file mode 100644 index 0000000..7a9ebb8 --- /dev/null +++ b/examples/metrics/README.rst @@ -0,0 +1,6 @@ +Metrics +------- + +Generating metrics from raw P3 data is a pre-requisite for generating specific +plots. Understanding how to manipulate and prepare raw P3 data enables the +exploration of more complex "what if" scenarios and more detailed analyses. diff --git a/examples/metrics/application_efficiency.py b/examples/metrics/application_efficiency.py new file mode 100755 index 0000000..deae903 --- /dev/null +++ b/examples/metrics/application_efficiency.py @@ -0,0 +1,330 @@ +#!/usr/bin/env python3 +# Copyright (c) 2024 Intel Corporation +# SPDX-License-Identifier: 0BSD +""" +Working with Application Efficiency +=================================== + +Understanding relative performance. + +One goal of P3 analysis is to understand how well a given application is able +to adapt to and make effective use of the capabilities of different +platforms. Comparisons of *raw* performance (e.g., time to solution) across +platforms can't help us, because raw performance doesn't reflect how fast an +application **should** run. + +To address this, the P3 Analysis Library works with normalized *performance +efficiency* data. With normalized data, an application's performance can be +represented as a number in the range :math:`[0, 1]`, where :math:`1` means +an application is achieving the best possible performance. + +There are multiple ways we can normalize the data to measure relative +efficiency, and for this tutorial we will consider *application efficiency*, +defined below: + +**Application Efficiency** + The performance of an *application*, measured relative to the best known + performance previously demonstrated for solving the same *problem* on the + same *platform*. + +Working with application efficiency is simple because it does not rely on +performance models or theoretical hardware limits. Although it can't tell us +whether an application is performing as well as theoretically possible, it +shows how an application compares to the state-of-the-art, which is often good +enough. + +Calculating Application Efficiency +---------------------------------- + +Let's begin with a very simple example, with a single application, using the +data below: + +.. list-table:: + :widths: 20 20 20 20 20 + :header-rows: 1 + + * - problem + - application + - platform + - fom + - date + + * - Test + - MyApp + - A + - 25.0 + - 2023 + * - Test + - MyApp + - B + - 12.5 + - 2023 + * - Test + - MyApp + - C + - 25.0 + - 2023 + * - Test + - MyApp + - D + - NaN + - 2023 + * - Test + - MyApp + - E + - 5.0 + - 2023 + +.. tip:: + A NaN or 0.0 performance result is interpreted by the P3 Analysis Library + to mean that an application run was in some way invalid. We can use this + to explicitly represent cases where applications did not compile on + specific platforms, did not run to completion, or ran but produced + numerical results that failed some sort of verification. + +""" + +# %% +# After loading this data into a :py:class:`pandas.DataFrame` (`df`), we can +# use the :py:func:`p3.metrics.application_efficiency` function to calculate a +# table of application efficiencies. + +#sphinx_gallery_start_ignore +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +import p3 + +myapp_data = { + "problem": ["Test"] * 5, + "application": ["MyApp"] * 5, + "platform": ["A", "B", "C", "D", "E"], + "fom": [25.0, 12.5, 25.0, np.nan, 5.0], + "date": [2023] * 5, +} + +df = pd.DataFrame(myapp_data) +#sphinx_gallery_end_ignore + +effs = p3.metrics.application_efficiency(df) +print(effs) + +# %% +# These initial results may be a little surprising, because they're all +# either 1.0 or 0.0. What happened? Since our dataset contains only one result +# for MyApp on each platform, each non-zero result is the "best known" result +# for that platform! The only exception is Platform D, which is assigned an +# efficiency of 0.0 to reflect that it either did not compile, or did not run +# successfully. +# +# .. tip:: +# Calculating meaningful application efficiency results requires a minimum +# of two results *per platform*. +# +# Digging Deeper: Adding More Data +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# Let's see what happens if we add some more data, from a different application +# running on the same platforms: +# +# .. list-table:: +# :widths: 20 20 20 20 20 +# :header-rows: 1 +# +# * - problem +# - application +# - platform +# - fom +# - date +# +# * - Test +# - YourApp +# - A +# - 25.0 +# - 2023 +# * - Test +# - YourApp +# - B +# - 10.0 +# - 2023 +# * - Test +# - YourApp +# - C +# - 12.5 +# - 2023 +# * - Test +# - YourApp +# - D +# - 6.0 +# - 2023 +# * - Test +# - YourApp +# - E +# - 1.0 +# - 2023 + +# %% +# After updating our DataFrame, we can re-run the same function as before to +# recompute the application efficiencies. + +#sphinx_gallery_start_ignore +yourapp_data = { + "problem": ["Test"] * 5, + "application": ["YourApp"] * 5, + "platform": ["A", "B", "C", "D", "E"], + "fom": [25.0, 10.0, 12.5, 6.0, 1.0], + "date": [2023] * 5, +} + +df = pd.concat([df, pd.DataFrame(yourapp_data)], ignore_index=True) +#sphinx_gallery_end_ignore + +effs = p3.metrics.application_efficiency(df) +print(effs) + +# %% +# YourApp is now the fastest (best known) application on every platform, +# and so it assigned an application efficiency of 1.0 everywhere. The +# application efficiency values for MyApp are all between 0.0 and 1.0, +# reflecting how close it gets to the state-of-the-art performance on each +# platform. +# +# .. important:: +# Adding new data changed the application efficiencies for MyApp *and* +# YourApp. Application efficiency values can become stale over time, +# and accurate P3 analysis requires us to track "best known" results +# carefully. +# +# Plotting Application Efficiency +# ------------------------------- +# +# The P3 Analysis Library does not contain any dedicated functionality for +# plotting application efficiency values. However, it is straightforward to use +# :py:mod:`matplotlib` and/or the plotting functionality of :py:mod:`pandas` to +# produce useful visualizations. +# +# For example, plotting a bar chart of application efficiences for one +# application can help us to summarize that application's performance more +# effectively than a table: + +filtered = effs[effs["application"]=="MyApp"] +filtered.plot(kind="bar", x="platform", y="app eff", xlabel="Platform", ylabel="Application Efficiency", legend=False) +plt.savefig("application_efficiency_bars_2023.png") + +# %% +# We can now clearly see that MyApp can adapt to and make very effective use of +# Platforms A and B, is within 2x of state-of-the-art performance on Platform +# C, but performs poorly on Platforms D and E. The key takeaway from this +# analysis is that a developer wishing to improve the "performance portability" +# of MyApp should focus on improving support for Platforms D and E. +# +# Working with Historical Data +# ---------------------------- +# +# The performance of an application can change over time, as developers add new +# features and optimize for new platforms, or due to changes in the software +# stack (e.g., new compiler or driver versions). +# +# Let's see how this could affect the application efficiency of MyApp, by adding +# some new data points collected at a later point in time: +# +# .. list-table:: +# :widths: 20 20 20 20 20 +# :header-rows: 1 +# +# * - problem +# - application +# - platform +# - fom +# - date +# +# * - Test +# - MyApp +# - A +# - 30.0 +# - 2024 +# * - Test +# - MyApp +# - B +# - 15.0 +# - 2024 +# * - Test +# - MyApp +# - C +# - 25.0 +# - 2024 +# * - Test +# - MyApp +# - D +# - 3.0 +# - 2024 +# * - Test +# - MyApp +# - E +# - 2.5 +# - 2024 + +# %% +# We can compute application efficiency as before: + +#sphinx_gallery_start_ignore +new_myapp_data = { + "problem": ["Test"] * 5, + "application": ["MyApp"] * 5, + "platform": ["A", "B", "C", "D", "E"], + "fom": [30.0, 15.0, 25.0, 3.0, 2.5], + "date": [2024] * 5, +} + +df = pd.concat([df, pd.DataFrame(new_myapp_data)], ignore_index=True) +#sphinx_gallery_end_ignore + +effs = p3.metrics.application_efficiency(df) +print(effs) + +# %% +# These latest results suggest that the developers of MyApp acted upon +# earlier results and improved support for Platforms D and E. But in doing so, +# a small performance regression was introduced in Platforms A and B. +# +# .. note:: +# Such trade-offs are very common, especially when developers wish to +# maintain a single source code that targets multiple platforms. +# Different platforms may respond differently to the same code changes, +# owing to architectural differences (e.g., cache size, available +# parallelism) or differences in the software stack (e.g., compilers +# performing different optimizations). +# For some real-life examples, see the papers +# `here `__ +# and +# `here `__. +# +# Computing the correct application efficiency values for MyApp and YourApp +# requires that our dataset contains all of our historical performance results. +# Since what we're really interested in understanding is the *latest* +# application efficiency, we should take care to filter our data appropriately +# before producing any plots. + +filtered = effs[(effs["application"]=="MyApp") & (effs["date"]==2024)] +filtered.plot(kind="bar", x="platform", y="app eff", xlabel="Platform", ylabel="Application Efficiency", legend=False) +plt.savefig("application_efficiency_bars_2024.png") + +# %% +# Further Analysis +# ---------------- +# +# Computing application efficiency is often simply the first step of a +# more detailed P3 analysis. +# +# The examples below show how we can use the visualization capabilities +# of the P3 Analysis Library to compare the efficiency of different +# applications running across the same platform set, or to gain insight +# into how an application's efficiency relates to the code it uses on each +# platform. +# +# .. minigallery:: +# :add-heading: Examples +# +# ../../examples/cascade/plot_simple_cascade.py +# ../../examples/navchart/plot_simple_navchart.py