diff --git a/.eslintrc.js b/.eslintrc.js index a7d2a36..665374b 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -12,9 +12,16 @@ module.exports = { }, plugins: ['@typescript-eslint'], rules: { - '@typescript-eslint/interface-name-prefix': [ + '@typescript-eslint/naming-convention': [ 'error', - { prefixWithI: 'always' } + { + selector: 'interface', + format: ['PascalCase'], + custom: { + regex: '^I[A-Z]', + match: true + } + } ], '@typescript-eslint/no-unused-vars': ['warn', { args: 'none' }], '@typescript-eslint/no-explicit-any': 'off', diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..cb6aee8 --- /dev/null +++ b/.flake8 @@ -0,0 +1,10 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +[flake8] +filename = *.py, +exclude = __init__.py, *.egg, build, docs, .git +ignore = + # line break before binary operator + W503, + # whitespace before : + E203 diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..2027d97 --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,69 @@ +name: build + +on: + push: + branches: + - "branch-*" + tags: + - "v**" + - "!v**a" + workflow_dispatch: + inputs: + branch: + required: true + type: string + date: + required: true + type: string + sha: + required: true + type: string + build_type: + type: string + default: nightly + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} + cancel-in-progress: true + +jobs: + conda-python-build: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + # Package is pure Python and only ever requires one build. + matrix_filter: 'map(select(.ARCH == "amd64" and (.LINUX_VER | test("centos")|not))) | sort_by(.PY_VER | split(".") | map(tonumber)) | [.[-1]]' + upload-conda: + needs: conda-python-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + wheel-build: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel.sh + # Package is pure Python and only ever requires one build. + matrix_filter: 'map(select((.LINUX_VER | test("centos")|not))) | sort_by((.PY_VER | split(".") | map(tonumber))) | [.[-1] + {ARCH: "amd64"}]' + wheel-publish: + needs: wheel-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: jupyterlab-nvdashboard diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml new file mode 100644 index 0000000..74b53e3 --- /dev/null +++ b/.github/workflows/pr.yaml @@ -0,0 +1,58 @@ +name: pr + +on: + push: + branches: + - "pull-request/[0-9]+" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + pr-builder: + needs: + - checks + - conda-python-build + - conda-python-tests + - wheel-build + - wheel-tests + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.02 + checks: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.02 + conda-python-build: + needs: checks + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02 + with: + build_type: pull-request + # Package is pure Python and only ever requires one build. + matrix_filter: 'map(select(.ARCH == "amd64" and (.LINUX_VER | test("centos")|not))) | sort_by(.PY_VER | split(".") | map(tonumber)) | [.[-1]]' + conda-python-tests: + needs: conda-python-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.02 + with: + build_type: pull-request + # Package is pure Python and only ever requires one build. + matrix_filter: 'map(select(.ARCH == "amd64" and (.LINUX_VER | test("centos")|not))) | sort_by(.PY_VER | split(".") | map(tonumber)) | [.[-1]]' + wheel-build: + needs: checks + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + with: + build_type: pull-request + # Package is pure Python and only ever requires one build. + matrix_filter: 'map(select((.LINUX_VER | test("centos")|not))) | sort_by((.PY_VER | split(".") | map(tonumber))) | [.[-1] + {ARCH: "amd64"}]' + script: "ci/build_wheel.sh" + wheel-tests: + needs: wheel-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + with: + build_type: pull-request + # Package is pure Python and only ever requires one build. + matrix_filter: 'map(select(.ARCH == "amd64" and (.LINUX_VER | test("centos")|not))) | sort_by(.PY_VER | split(".") | map(tonumber)) | [.[-1]]' + script: "ci/test_wheel.sh" \ No newline at end of file diff --git a/.gitignore b/.gitignore index 7b7de7e..ddfb290 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,15 @@ *.bundle.* lib/ node_modules/ +*.log +.eslintcache +.stylelintcache *.egg-info/ .ipynb_checkpoints *.tsbuildinfo jupyterlab_nvdashboard/labextension +# Version file is handled by hatchling +jupyterlab_nvdashboard/_version.py # Created by https://www.gitignore.io/api/python # Edit at https://www.gitignore.io/?templates=python @@ -14,8 +19,6 @@ jupyterlab_nvdashboard/labextension __pycache__/ *.py[cod] *$py.class -dask-worker-space/ -.pytest_cache/ # C extensions *.so @@ -58,6 +61,7 @@ htmlcov/ .coverage.* .cache nosetests.xml +coverage/ coverage.xml *.cover .hypothesis/ @@ -112,3 +116,6 @@ dmypy.json # OSX files .DS_Store + +# Yarn cache +.yarn/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..469e3f1 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,23 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +repos: + - repo: https://github.com/psf/black + rev: 23.11.0 + hooks: + - id: black + files: jupyterlab_nvdashboard/.* + # Explicitly specify the pyproject.toml at the repo root, not per-project. + args: ['--config', 'pyproject.toml'] + - repo: https://github.com/PyCQA/flake8 + rev: 6.1.0 + hooks: + - id: flake8 + args: ['--config=.flake8'] + files: jupyterlab_nvdashboard/.*$ + - repo: https://github.com/rapidsai/dependency-file-generator + rev: v1.7.1 + hooks: + - id: rapids-dependency-file-generator + args: ['--clean'] +default_language_version: + python: python3 diff --git a/.yarnrc.yml b/.yarnrc.yml new file mode 100644 index 0000000..3186f3f --- /dev/null +++ b/.yarnrc.yml @@ -0,0 +1 @@ +nodeLinker: node-modules diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..92462fc --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,207 @@ +# JupyterLab NVdashboard Developer Guide + +This guide provides information on how to contribute to the JupyterLab v4 extension jupyterlab-nvdashboard. The project has two main components: a front-end React widget and a back-end Tornado server. + +## Development install + +> You will need NodeJS to build the extension package. + +The `jlpm` command is JupyterLab's pinned version of +[yarn](https://yarnpkg.com/) that is installed with JupyterLab. You may use +`yarn` or `npm` in lieu of `jlpm` below. + +```bash +# Clone the repo to your local environment +git clone https://github.com/rapidsai/jupyterlab-nvdashboard.git +# Change directory to the jupyterlab_nvdashboard directory +cd jupyterlab-nvdashboard +# Install package in development mode +pip install -e . +# Link your development version of the extension with JupyterLab +jupyter labextension develop . --overwrite +# Rebuild extension Typescript source after making changes +jlpm run build +``` + +You can watch the source directory and run JupyterLab at the same time in different terminals to watch for changes in the extension's source and automatically rebuild the extension. + +```bash +# Watch the source directory in one terminal, automatically rebuilding when needed +jlpm run watch +# Run JupyterLab in another terminal +jupyter lab +``` + +With the watch command running, every saved change will immediately be built locally and available in your running JupyterLab. Refresh JupyterLab to load the change in your browser (you may need to wait several seconds for the extension to be rebuilt). + +By default, the `jlpm run build` command generates the source maps for this extension to make it easier to debug using the browser dev tools. To also generate source maps for the JupyterLab core extensions, you can run the following command: + +```bash +jupyter lab build --minimize=False +``` + +### Uninstall + +```bash +pip uninstall jupyterlab-nvdashboard +``` + +Releases for both packages are handled by [gpuCI](https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/jupyterlab-nvdashboard/). Nightly builds are triggered when a push to a versioned branch occurs (i.e. `branch-0.10`). Stable builds are triggered when a push to the `main` branch occurs. + +## Contributions + +### Front-End + +To contribute to the front-end of the extension, edit the files in the `src/` directory. + +```pre +├── src +│ ├── assets +│ │ ├── constants.ts +│ │ └── icons.ts +│ ├── charts +│ │ ├── GpuMemoryChart.tsx +│ │ ├── GpuResourceChart.tsx +│ │ ├── GpuUtilizationChart.tsx +│ │ ├── index.ts +│ │ ├── MachineResourceChart.tsx +│ │ ├── NvLinkThroughputChart.tsx +│ │ ├── NvLinkTimelineChart.tsx +│ │ └── PciThroughputChart.tsx +│ ├── components +│ │ ├── customLineChart.tsx +│ │ ├── formatUtils.tsx +│ │ └── tooltipUtils.tsx +│ ├── handler.ts +│ ├── index.ts +│ ├── launchWidget.tsx +│ └── svg.d.ts +├── style + └── base.css + +``` + +The main React widget is located in the `src/index.ts` file. + +#### `index.ts` + +This file initializes the JupyterLab extension. It sets up the necessary components, such as the ControlWidget and the associated commands. + +#### `launchWidget.tsx` + +This file contains the ControlWidget class, which serves as the control component for the GPU Dashboard. It includes buttons to open various GPU widgets. + +#### `ControlWidget.tsx` + +This file defines the Control component, which contains buttons to open different GPU widgets. It also includes functions to handle widget creation and restoration. + +#### `src/charts Directory` + +The src/charts directory contains React components responsible for rendering various GPU-related charts. Each chart component focuses on a specific aspect of GPU statistics, and they are integrated into the JupyterLab environment via the main ControlWidget. + +Each chart contains: + +- Data Fetching: Each chart component fetches data from the server using the requestAPI function. + +- Real-time Updates: Some charts use a setInterval to continuously fetch and update data, providing a real-time view of the GPU metrics. + +- Recharts Library: The charts are implemented using the Recharts library, a React charting library that simplifies the process of creating interactive charts. + +#### Add an example new chart: + +```typescript + +import React, { useEffect, useState } from 'react'; +import { requestAPI } from '../handler'; +import { ReactWidget } from '@jupyterlab/ui-components'; +import { LineChart, Line, CartesianGrid, XAxis, YAxis, Tooltip } from 'recharts'; + +const NewChart = (): JSX.Element => { + const [chartData, setChartData] = useState([]); + + useEffect(() => { + const fetchData = async () => setChartData((await requestAPI('new_chart_endpoint')).data); + fetchData(); + }, []); + + return ( +
+ New Chart Title + + + + + + + +
+ ); +}; + +export class NewChartWidget extends ReactWidget { + render = (): JSX.Element => ; +} +``` + +### Backend (Server) Part + +The server part of the extension is going to be presented in this section. + +JupyterLab server is built on top of the Tornado Python package. To extend the server, your extension needs to be defined as a proper Python package with some hook functions: + +The directory `/jupyterlab-nvdashboard` contains server-side code, including handlers declared in `/jupyterlab-nvdashboard/apps/*`. + +#### Adding a New Server-Side Endpoint + +Example: Adding a GPU Resource Endpoint + +1. Modify `jupyterlab-nvdashboard/handlers.py`: + +```python + +from jupyter_server.utils import url_path_join +from . import apps + +URL_PATH = "nvdashboard" + +base_url = web_app.settings["base_url"] + +# Add a new route for CPU resource +route_pattern_cpu_resource = url_path_join( + base_url, URL_PATH, "gpu_resource" +) + +# Update handlers list +handlers += [ + (route_pattern_cpu_resource, apps.cpu.CPUResourceHandler), +] +``` + +2. Create the GPU Resource Handler in `jupyterlab-nvdashboard/apps/gpu.py`: + +```python +from jupyter_server.base.handlers import APIHandler +import json + +class GPUResourceHandler(APIHandler): + @tornado.web.authenticated + def get(self): + # Implement GPU resource logic here + cpu_info = {"gpu_utilization": 70, "gpu_memory_usage": 512} + self.finish(json.dumps(cpu_info)) + +``` + +3. Testing: + +- Build and test your changes. Follow the JupyterLab documentation for building and testing extensions. + +- Launch JupyterLab and check if the new endpoint is accessible (e.g., http://localhost:8888/nvdashboard/gpu_resource). + +**Conclusion** + +This guide has provided a brief overview of how to contribute to the JupyterLab v4 extension jupyterlab-nvdashboard. For more information on developing JupyterLab extensions, please see the following resources: + +- Develop JupyterLab Extensions: https://jupyterlab.readthedocs.io/en/stable/extension/extension_dev.html +- Front-End React Widgets: https://jupyterlab.readthedocs.io/en/stable/extension/virtualdom.html +- Back-End Server Extensions: https://github.com/jupyterlab/extension-examples/tree/main/server-extension diff --git a/MANIFEST.in b/MANIFEST.in index a636781..d3933b5 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,14 +1,14 @@ include LICENSE include README.md include pyproject.toml -include jupyter-config/jupyterlab_nvdashboard.json +include jupyter-config/jupyterlab-nvdashboard.json include package.json include install.json include ts*.json include requirements.txt -graft jupyterlab_nvdashboard/labextension +graft jupyterlab-nvdashboard/labextension # Javascript files graft src diff --git a/README.md b/README.md index 0acf4c2..618f811 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,4 @@ -# JupyterLab NVDashboard - -![GPU Dashboard](demo.gif) - -![Github Actions Status](https://github.com/rapidsai/jupyterlab-nvdashboard/workflows/Build/badge.svg) +# JupyterLab NVdashboard NVDashboard is a JupyterLab extension for displaying GPU usage dashboards. It enables JupyterLab users to visualize system hardware metrics within the same interactive environment they use for development. Supported metrics include: @@ -11,82 +7,96 @@ NVDashboard is a JupyterLab extension for displaying GPU usage dashboards. It en - PCIe throughput - NVLink throughput +## Demo -This extension is composed of a Python package named `jupyterlab_nvdashboard` -for the server extension and a NPM package named `jupyterlab-nvdashboard` -for the frontend extension. +![JupyterLab-nvdashboard Demo](./docs/_images/screencast1.gif) +## Table of Contents -## Requirements +- [New Features](#new-features) + - [Brush for Time Series Charts](#brush-for-time-series-charts) + - [Synced Tooltips](#synced-tooltips) + - [Theme Compatibility](#theme-compatibility) +- [Version Compatibility](#version-compatibility) +- [Requirements](#requirements) +- [Installation](#installation) +- [Troubleshoot](#troubleshoot) +- [Contributing](#contributing-developers-guide) +- [Future Improvements](#future-improvements) -* JupyterLab >= 3.0 +## New Features -## Install +JupyterLab-nvdashboard v4 brings a host of new features, improved backend architecture, and enhanced frontend components for an even better user experience. +Explore the exciting updates below. -```bash -pip install jupyterlab_nvdashboard -``` +### Brush for Time Series Charts +Introducing a powerful brushing feature for time series charts. Users can easily inspect past events by selecting a specific time range, providing more granular control over data exploration. -## Troubleshoot +![JupyterLab-nvdashboard Demo1](./docs/_images/screencast2.gif) -If you are seeing the frontend extension, but it is not working, check -that the server extension is enabled: +### Synced Tooltips -```bash -jupyter server extension list -``` +For pages with multiple charts, JupyterLab-nvdashboard now offers synchronized tooltips for timestamps across all charts. This feature enhances the user's ability to analyze data cohesively and understand relationships between different data points. -If the server extension is installed and enabled, but you are not seeing -the frontend extension, check the frontend extension is installed: +![JupyterLab-nvdashboard Demo4](./docs/_images/screenshot3.png) -```bash -jupyter labextension list -``` +### Theme Compatibility + +Seamless integration with JupyterLab themes is now a reality. The extension adapts its colors and aesthetics based on whether the user is in a light or dark theme, ensuring a consistent and visually appealing experience. +#### Light Theme -## Contributing +![JupyterLab-nvdashboard Demo3](./docs/_images/screenshot2.png) -### Development install +#### Dark Theme -Note: You will need NodeJS to build the extension package. +![JupyterLab-nvdashboard Demo2](./docs/_images/screenshot1.png) -The `jlpm` command is JupyterLab's pinned version of -[yarn](https://yarnpkg.com/) that is installed with JupyterLab. You may use -`yarn` or `npm` in lieu of `jlpm` below. +## Version Compatibility + +JupyterLab-nvdashboard v4 is designed exclusively for JupyterLab v4 and later versions. To ensure continued support for JupyterLab v3 users, we will maintain the previous version separately (branch-0.9). + +## Requirements + +- JupyterLab >=4 +- pynvml +- psutil + +## Installation + +### Conda ```bash -# Clone the repo to your local environment -# Change directory to the jupyterlab_nvdashboard directory -# Install package in development mode -pip install -e . -# Link your development version of the extension with JupyterLab -jupyter labextension develop . --overwrite -# Rebuild extension Typescript source after making changes -jlpm run build +conda install -c rapidsai jupyterlab-nvdashboard ``` -You can watch the source directory and run JupyterLab at the same time in different terminals to watch for changes in the extension's source and automatically rebuild the extension. +### PyPI ```bash -# Watch the source directory in one terminal, automatically rebuilding when needed -jlpm run watch -# Run JupyterLab in another terminal -jupyter lab +pip install jupyterlab-nvdashboard ``` -With the watch command running, every saved change will immediately be built locally and available in your running JupyterLab. Refresh JupyterLab to load the change in your browser (you may need to wait several seconds for the extension to be rebuilt). +## Troubleshoot -By default, the `jlpm run build` command generates the source maps for this extension to make it easier to debug using the browser dev tools. To also generate source maps for the JupyterLab core extensions, you can run the following command: +If you are seeing the frontend extension, but it is not working, check +that the server extension is enabled: ```bash -jupyter lab build --minimize=False +jupyter server extension list ``` -### Uninstall +If the server extension is installed and enabled, but you are not seeing +the frontend extension, check the frontend extension is installed: ```bash -pip uninstall jupyterlab_nvdashboard +jupyter labextension list ``` -Releases for both packages are handled by [gpuCI](https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/jupyterlab-nvdashboard/). Nightly builds are triggered when a push to a versioned branch occurs (i.e. `branch-0.5`). Stable builds are triggered when a push to the `main` branch occurs. +## Contributing Developers Guide + +For more details, check out the [contributing guide](./CONTRIBUTING.md). + +## Future Improvements + +While we've introduced a range of exciting features in this release, we understand that there are always opportunities for improvement. We have noted a request to add cell execution markers to the charts. Due to the complexities associated with asynchronous cells, we have decided to defer this feature to a future update. Rest assured, we will explore this enhancement in subsequent releases. diff --git a/ci/build_python.sh b/ci/build_python.sh new file mode 100755 index 0000000..9d9d8ca --- /dev/null +++ b/ci/build_python.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# Copyright (c) 2023, NVIDIA CORPORATION. + +# Exit script if any command fails +set -euo pipefail + +# Source rapids-env-update to set environment variables +source rapids-env-update + +# Print the Rapids environment for debugging purposes +rapids-print-env + +# Generate version and replace any letter with a hyphen +version=$(rapids-generate-version) +node_version=$(echo "$version" | sed 's/[a-zA-Z]/-\0/' | sed 's/^-//') + +# Update the version field in package.json +rapids-logger "Updating version in package.json to $node_version" +jq -e --arg tag "$node_version" '.version=$tag' package.json > package.json.tmp +mv package.json.tmp package.json + +# Generate jupyterlab_nvdashboard/_version.py since hatch version hook isn't working with conda-build +echo "__version__ = '$version'" > jupyterlab_nvdashboard/_version.py + +# TODO: Remove `--no-test` flag once importing on a CPU +# node works correctly +rapids-logger "Building JupyterLab NVDashboard conda package" +RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild --no-test conda/recipes/jupyterlab-nvdashboard --output + +rapids-logger "Uploading JupyterLab NVDashboard conda package to S3" +rapids-upload-conda-to-s3 python diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh new file mode 100755 index 0000000..5a765f1 --- /dev/null +++ b/ci/build_wheel.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Copyright (c) 2023, NVIDIA CORPORATION. + +# Exit script if any command fails +set -euo pipefail + +# Set the package name +package_name="jupyterlab-nvdashboard" + +# Configure sccache and set the date string +source rapids-configure-sccache +source rapids-date-string + +rapids-logger "Install Node.js required for building the extension front-end" + +# Install NVM for managing Node.js versions +curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.5/install.sh | bash +export NVM_DIR="$HOME/.nvm" && [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" + +# Install Node.js required for building the extension front-end +nvm install 18 && nvm use 18 + +# Generate version and replace any letter with a hyphen +version=$(rapids-generate-version) +node_version=$(echo "$version" | sed 's/[a-zA-Z]/-\0/' | sed 's/^-//') + +# Log message: Update the version field in package.json +rapids-logger "Updating version in package.json to $node_version" +jq -e --arg tag "$node_version" '.version=$tag' package.json > package.json.tmp +mv package.json.tmp package.json + +# Log message: Begin py build +rapids-logger "Begin py build" + +# Install build tools for Python +python -m pip install build + +# Build the Python package +python -m build -s -w + +rapids-logger "Uploading JupyterLab NVDashboard wheels to S3" +# Upload Python wheels to S3 +RAPIDS_PY_WHEEL_NAME="${package_name}" rapids-upload-wheels-to-s3 dist diff --git a/ci/check_style.sh b/ci/check_style.sh new file mode 100755 index 0000000..4b54ba0 --- /dev/null +++ b/ci/check_style.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Copyright (c) 2020-2022, NVIDIA CORPORATION. + +set -euo pipefail + +rapids-logger "Create checks conda environment" +. /opt/conda/etc/profile.d/conda.sh + +rapids-dependency-file-generator \ + --output conda \ + --file_key checks \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml + +rapids-mamba-retry env create --force -f env.yaml -n checks +conda activate checks + +rapids-logger "Run pre-commit checks - Python backend" +# Run pre-commit checks +pre-commit run --hook-stage manual --all-files --show-diff-on-failure + +rapids-logger "eslint:check - TS frontend" +# Run eslint checks +jlpm install +jlpm run eslint:check \ No newline at end of file diff --git a/ci/checks/style.sh b/ci/checks/style.sh deleted file mode 100755 index 0351c81..0000000 --- a/ci/checks/style.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright (c) 2020, NVIDIA CORPORATION. - -# Ignore errors and set path -set +e -PATH=/opt/conda/bin:$PATH - -# Activate common conda env -. /opt/conda/etc/profile.d/conda.sh -conda activate rapids - -# Run flake8 and get results/return code -FLAKE=`flake8 --exclude=src,style,test` -RETVAL=$? - -# Output results if failure otherwise show pass -if [ "$FLAKE" != "" ]; then - echo -e "\n\n>>>> FAILED: flake8 style check; begin output\n\n" - echo -e "$FLAKE" - echo -e "\n\n>>>> FAILED: flake8 style check; end output\n\n" -else - echo -e "\n\n>>>> PASSED: flake8 style check\n\n" -fi - -#TODO Fix flake8 issues then re-enable this check -#exit $RETVAL -exit 0 \ No newline at end of file diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh deleted file mode 100755 index bb50819..0000000 --- a/ci/cpu/build.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/bin/bash -# Copyright (c) 2020-2022, NVIDIA CORPORATION. -set -e - -# Set path and build parallel level -export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH -export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} - -# Set home to the job's workspace -export HOME="$WORKSPACE" - -# Switch to project root; also root of repo checkout -cd "$WORKSPACE" - -# Get latest tag and number of commits since tag -export GIT_DESCRIBE_TAG=`git describe --abbrev=0 --tags` -export GIT_DESCRIBE_NUMBER=`git rev-list ${GIT_DESCRIBE_TAG}..HEAD --count` -export RAPIDS_DATE_STRING=$(date +%y%m%d) - -# Setup 'gpuci_conda_retry' for build retries (results in 2 total attempts) -export GPUCI_CONDA_RETRY_MAX=1 -export GPUCI_CONDA_RETRY_SLEEP=30 - -################################################################################ -# SETUP - Check environment -################################################################################ - -gpuci_logger "Get env" -env - -gpuci_logger "Activate conda env" -. /opt/conda/etc/profile.d/conda.sh -conda activate rapids - -gpuci_logger "Check versions" -python --version -$CC --version -$CXX --version -conda info -conda config --show-sources -conda list --show-channel-urls - -# FIX Added to deal with Anancoda SSL verification issues during conda builds -conda config --set ssl_verify False - -# FIXME: Remove -gpuci_mamba_retry install -c conda-forge boa - -################################################################################ -# BUILD - Conda & pip package -################################################################################ - -gpuci_logger "Build conda pkg for jupyterlab-nvdashboard" -gpuci_conda_retry mambabuild conda/recipes/jupyterlab-nvdashboard --python=$PYTHON - -gpuci_logger "Build pip pkg for jupyterlab-nvdashboard" -rm -rf dist/ -python setup.py sdist bdist_wheel - -################################################################################ -# UPLOAD - Packages -################################################################################ - -gpuci_logger "Upload packages" -source ci/cpu/upload.sh diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh deleted file mode 100755 index f78dccc..0000000 --- a/ci/cpu/upload.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash -# Copyright (c) 2020, NVIDIA CORPORATION. - -set -e - -# Setup 'gpuci_retry' for upload retries (results in 4 total attempts) -export GPUCI_RETRY_MAX=3 -export GPUCI_RETRY_SLEEP=30 - -# Set default label options if they are not defined elsewhere -export LABEL_OPTION=${LABEL_OPTION:-"--label main"} - -# Skip uploads unless BUILD_MODE == "branch" -if [ ${BUILD_MODE} != "branch" ]; then - echo "Skipping upload" - return 0 -fi - -# Skip uploads if there is no upload key -if [ -z "$MY_UPLOAD_KEY" ]; then - echo "No upload key" - return 0 -fi - -if [ -z "$TWINE_PASSWORD" ]; then - echo "TWINE_PASSWORD not set" - return 0 -fi - -################################################################################ -# SETUP - Get conda file output locations -################################################################################ - -gpuci_logger "Get conda file output locations" -export JUPYTERLAB_NVDASHBOARD_FILE=`conda build conda/recipes/jupyterlab-nvdashboard --output` - -################################################################################ -# UPLOAD - Conda packages -################################################################################ - -gpuci_logger "Starting conda uploads" - -gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${JUPYTERLAB_NVDASHBOARD_FILE} --no-progress - - -echo "Upload pypi" -twine upload --skip-existing -u ${TWINE_USERNAME:-rapidsai} dist/* \ No newline at end of file diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh deleted file mode 100755 index d44847d..0000000 --- a/ci/gpu/build.sh +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/bash -# Copyright (c) 2020, NVIDIA CORPORATION. -set -e -NUMARGS=$# -ARGS=$* - -# Arg parsing function -function hasArg { - (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ") -} - -# Set path and build parallel level -export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH -export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} -export CUDA_REL=${CUDA_VERSION%.*} - -# Set home to the job's workspace -export HOME=$WORKSPACE - -# Parse git describe -cd $WORKSPACE -export GIT_DESCRIBE_TAG=`git describe --abbrev=0 --tags` -export GIT_DESCRIBE_NUMBER=`git rev-list ${GIT_DESCRIBE_TAG}..HEAD --count` -export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'` -export RAPIDS_DATE_STRING=$(date +%y%m%d) - -################################################################################ -# SETUP - Check environment -################################################################################ - -gpuci_logger "Check environment" -env - -gpuci_logger "Check GPU usage" -nvidia-smi - -gpuci_logger "Activate conda env" -. /opt/conda/etc/profile.d/conda.sh -conda activate rapids - -################################################################################ -# TEST -################################################################################ - -if hasArg --skip-tests; then - gpuci_logger "Skipping Tests" -else - gpuci_logger "Check GPU usage" - nvidia-smi - - cd $WORKSPACE - python -m pip install . - - gpuci_logger "Python py.test for jupyterlab_nvdashboard" - py.test --cache-clear --junitxml=${WORKSPACE}/junit-nvstrings.xml -v jupyterlab_nvdashboard - - gpuci_logger "Node jlpm test for jupyterlab_nvdashboard" - jlpm install - jlpm run eslint:check - jlpm test - - gpuci_logger "Jupyter extension installation test for jupyterlab_nvdashboard" - jupyter labextension list 2>&1 | grep -ie "jupyterlab-nvdashboard.*OK" -fi diff --git a/ci/test_python.sh b/ci/test_python.sh new file mode 100755 index 0000000..8eedcf9 --- /dev/null +++ b/ci/test_python.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# Copyright (c) 2023, NVIDIA CORPORATION. + +set -euo pipefail + +. /opt/conda/etc/profile.d/conda.sh + +rapids-logger "Generate Python testing dependencies" +rapids-dependency-file-generator \ + --output conda \ + --file_key test_python \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml + +rapids-mamba-retry env create --force -f env.yaml -n test + +# Temporarily allow unbound variables for conda activation. +set +u +conda activate test +set -u + +# rapids-logger "Downloading artifacts from previous jobs" +PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) + +rapids-print-env + +rapids-mamba-retry install \ + --channel "${PYTHON_CHANNEL}" \ + jupyterlab-nvdashboard + +rapids-logger "Check GPU usage" +nvidia-smi + +EXITCODE=0 +trap "EXITCODE=1" ERR +set +e + +rapids-logger "pytest jupyterlab-nvdashboard" +JUPYTER_PLATFORM_DIRS=1 python -m pytest + +rapids-logger "Test script exiting with value: $EXITCODE" +exit ${EXITCODE} diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh new file mode 100755 index 0000000..8497bd5 --- /dev/null +++ b/ci/test_wheel.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# Copyright (c) 2023, NVIDIA CORPORATION. + +set -eou pipefail + +# Set the package name +package_name="jupyterlab-nvdashboard" + +rapids-logger "Downloading artifacts from previous jobs" +RAPIDS_PY_WHEEL_NAME="${package_name}" rapids-download-wheels-from-s3 ./dist + +# echo to expand wildcard before adding `[extra]` required for pip +python -m pip install $(echo ./dist/jupyterlab_nvdashboard*.whl)[test] + +rapids-logger "Check GPU usage" +nvidia-smi + +EXITCODE=0 +trap "EXITCODE=1" ERR +set +e + +rapids-logger "pytest jupyterlab-nvdashboard" +JUPYTER_PLATFORM_DIRS=1 python -m pytest + +rapids-logger "Test script exiting with value: $EXITCODE" +exit ${EXITCODE} diff --git a/conda/environments/all_arch-any.yaml b/conda/environments/all_arch-any.yaml new file mode 100644 index 0000000..93dd1ab --- /dev/null +++ b/conda/environments/all_arch-any.yaml @@ -0,0 +1,17 @@ +# This file is generated by `rapids-dependency-file-generator`. +# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +channels: +- rapidsai +- rapidsai-nightly +- conda-forge +- nvidia +dependencies: +- jupyterlab>=4 +- nodejs=18 +- pre-commit +- psutil +- pynvml +- pytest +- pytest-jupyter[server]>=0.6.0 +- python>=3.8 +name: all_arch-any diff --git a/conda/recipes/jupyterlab-nvdashboard/meta.yaml b/conda/recipes/jupyterlab-nvdashboard/meta.yaml index 1941ae6..0bf73c2 100644 --- a/conda/recipes/jupyterlab-nvdashboard/meta.yaml +++ b/conda/recipes/jupyterlab-nvdashboard/meta.yaml @@ -1,6 +1,4 @@ -{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %} -{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} -{% set py_version=environ.get('CONDA_PY', 37) %} +{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %} {% set date_string = environ['RAPIDS_DATE_STRING'] %} package: @@ -15,22 +13,19 @@ build: string: py_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} script: python -m pip install . --no-deps --ignore-installed --no-cache-dir -vvv noarch: python - script_env: - - NODE_OPTIONS=--openssl-legacy-provider requirements: host: - - python >=3.7 + - python >=3.8 - setuptools - - pynvml - - psutil - - nodejs !=16.10.0 - - jupyter-packaging >=0.7.0,<0.8 + - nodejs =18 + - hatchling + - hatch-nodejs-version + - hatch-jupyter-builder + - jupyterlab >=4 run: - - python >=3.7 - - jupyterlab >=3.0.0,<4 - - jupyter-server-proxy >=1.3.2 - - bokeh >2.1 + - python >=3.8 + - jupyterlab >=4 - pynvml - psutil @@ -38,9 +33,6 @@ requirements: test: imports: - jupyterlab_nvdashboard - commands: - - test -f ${PREFIX}/share/jupyter/labextensions/jupyterlab-nvdashboard/package.json # [unix] - - if exist %PREFIX%\\share\\jupyter\\labextensions\\jupyterlab-nvdashboard\\package.json (exit 0) else (exit 1) # [win] about: home: https://rapids.ai diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..5badf1c --- /dev/null +++ b/conftest.py @@ -0,0 +1,10 @@ +import pytest + +pytest_plugins = ("pytest_jupyter.jupyter_server",) + + +@pytest.fixture +def jp_server_config(jp_server_config): + return { + "ServerApp": {"jpserver_extensions": {"jupyterlab_nvdashboard": True}} + } diff --git a/demo/GPU Dashboard Demo.ipynb b/demo/GPU Dashboard Demo.ipynb index db42d44..e50f5f2 100644 --- a/demo/GPU Dashboard Demo.ipynb +++ b/demo/GPU Dashboard Demo.ipynb @@ -20,7 +20,7 @@ "metadata": {}, "outputs": [], "source": [ - "gdf = dask_cudf.read_csv('/datasets/nyc_taxi/**/*')" + "gdf = dask_cudf.read_csv('/datasets/nyc_taxi/concatenated_csv_files/*')" ] }, { @@ -40,7 +40,7 @@ { "data": { "text/plain": [ - "336960335" + "777584079" ] }, "execution_count": 4, @@ -62,7 +62,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -76,7 +76,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/dependencies.yaml b/dependencies.yaml new file mode 100644 index 0000000..d7403c8 --- /dev/null +++ b/dependencies.yaml @@ -0,0 +1,82 @@ +# Dependency list for https://github.com/rapidsai/dependency-file-generator +files: + all: + output: conda + matrix: + arch: ['any'] + includes: + - build_wheels + - checks + - py_version + - run + - test_python + test_python: + output: none + includes: + - py_version + - test_python + checks: + output: none + includes: + - checks + - py_version + py_build: + output: pyproject + pyproject_dir: ./ + extras: + table: build-system + includes: + - build_wheels + py_run: + output: pyproject + pyproject_dir: ./ + extras: + table: project + includes: + - run + py_test: + output: pyproject + pyproject_dir: ./ + extras: + table: project.optional-dependencies + key: test + includes: + - test_python +channels: + - rapidsai + - rapidsai-nightly + - conda-forge + - nvidia +dependencies: + build_wheels: + common: + - output_types: pyproject + packages: + - hatchling>=1.5.0 + - jupyterlab>=4.0.0,<5 + - hatch-nodejs-version>=0.3.2 + checks: + common: + - output_types: [conda, requirements] + packages: + - pre-commit + - jupyterlab>=4 + - nodejs=18 + py_version: + common: + - output_types: conda + packages: + - python>=3.8 + run: + common: + - output_types: [conda, requirements, pyproject] + packages: + - jupyterlab>=4 + - pynvml + - psutil + test_python: + common: + - output_types: [conda, requirements, pyproject] + packages: + - pytest + - pytest-jupyter[server]>=0.6.0 diff --git a/docs/_images/screencast1.gif b/docs/_images/screencast1.gif new file mode 100644 index 0000000..c5dc551 Binary files /dev/null and b/docs/_images/screencast1.gif differ diff --git a/docs/_images/screencast2.gif b/docs/_images/screencast2.gif new file mode 100644 index 0000000..2e19e12 Binary files /dev/null and b/docs/_images/screencast2.gif differ diff --git a/docs/_images/screenshot1.png b/docs/_images/screenshot1.png new file mode 100644 index 0000000..8ed71c6 Binary files /dev/null and b/docs/_images/screenshot1.png differ diff --git a/docs/_images/screenshot2.png b/docs/_images/screenshot2.png new file mode 100644 index 0000000..e147220 Binary files /dev/null and b/docs/_images/screenshot2.png differ diff --git a/docs/_images/screenshot3.png b/docs/_images/screenshot3.png new file mode 100644 index 0000000..a2e5d41 Binary files /dev/null and b/docs/_images/screenshot3.png differ diff --git a/jupyter-config/server-config/jupyterlab_nvdashboard.json b/jupyter-config/server-config/jupyterlab_nvdashboard.json new file mode 100644 index 0000000..a1d27b9 --- /dev/null +++ b/jupyter-config/server-config/jupyterlab_nvdashboard.json @@ -0,0 +1,7 @@ +{ + "ServerApp": { + "jpserver_extensions": { + "jupyterlab_nvdashboard": true + } + } +} diff --git a/jupyterlab_nvdashboard/__init__.py b/jupyterlab_nvdashboard/__init__.py index c0f7d21..f7ed3d9 100644 --- a/jupyterlab_nvdashboard/__init__.py +++ b/jupyterlab_nvdashboard/__init__.py @@ -1,22 +1,34 @@ -""" -Return config on servers to start for bokeh -See https://jupyter-server-proxy.readthedocs.io/en/latest/server-process.html -for more information. -""" -import os -import sys +try: + from ._version import __version__ +except ImportError: + # Fallback when using the package in dev mode without installing + # in editable mode with pip. It is highly recommended to install + # the package from a stable release or in editable mode: https://pip.pypa.io/en/stable/topics/local-project-installs/#editable-installs + import warnings -serverfile = os.path.join(os.path.dirname(__file__), "server.py") + warnings.warn( + "Importing 'jupyterlab_nvdashboard' outside a proper installation." + ) + __version__ = "dev" +from .handlers import setup_handlers -def launch_server(): - return {"command": [sys.executable, serverfile, "{port}"], "timeout": 20, "launcher_entry": {"enabled": False}} +def _jupyter_labextension_paths(): + return [{"src": "labextension", "dest": "jupyterlab-nvdashboard"}] -def _jupyter_labextension_paths(): - return [ - { - "src": "labextension", - "dest": "jupyterlab-nvdashboard", - } - ] +def _jupyter_server_extension_points(): + return [{"module": "jupyterlab_nvdashboard"}] + + +def _load_jupyter_server_extension(server_app): + """Registers the API handler to receive HTTP requests from the frontend extension. + + Parameters + ---------- + server_app: jupyterlab.labapp.LabApp + JupyterLab application instance + """ + setup_handlers(server_app.web_app) + name = "jupyterlab_nvdashboard" + server_app.log.info(f"Registered {name} server extension") diff --git a/jupyterlab_nvdashboard/_version.py b/jupyterlab_nvdashboard/_version.py deleted file mode 100644 index 8e56d52..0000000 --- a/jupyterlab_nvdashboard/_version.py +++ /dev/null @@ -1,20 +0,0 @@ -__all__ = ["__version__"] - - -def _fetchVersion(): - import json - import os - - HERE = os.path.abspath(os.path.dirname(__file__)) - - for d, _, _ in os.walk(HERE): - try: - with open(os.path.join(d, "package.json")) as f: - return json.load(f)["version"] - except FileNotFoundError: - pass - - raise FileNotFoundError("Could not find package.json under dir {}".format(HERE)) - - -__version__ = _fetchVersion() diff --git a/jupyterlab_nvdashboard/apps/cpu.py b/jupyterlab_nvdashboard/apps/cpu.py index d7f7f45..0bbf494 100644 --- a/jupyterlab_nvdashboard/apps/cpu.py +++ b/jupyterlab_nvdashboard/apps/cpu.py @@ -1,149 +1,22 @@ -from bokeh.plotting import figure, ColumnDataSource -from bokeh.models import DataRange1d, NumeralTickFormatter -from bokeh.layouts import column -from bokeh.models.mappers import LinearColorMapper -from bokeh.palettes import all_palettes - +import json import psutil import time +import tornado +from jupyter_server.base.handlers import APIHandler -def cpu(doc): - fig = figure( - title="CPU Utilization [%]", sizing_mode="stretch_both", y_range=[0, 100] - ) - - cpu = psutil.cpu_percent(percpu=True) - left = list(range(len(cpu))) - right = [l + 0.8 for l in left] - - source = ColumnDataSource({"left": left, "right": right, "cpu": cpu}) - mapper = LinearColorMapper(palette=all_palettes["RdYlBu"][4], low=0, high=100) - - fig.quad( - source=source, - left="left", - right="right", - bottom=0, - top="cpu", - color={"field": "cpu", "transform": mapper}, - ) - - doc.title = "CPU Usage" - doc.add_root(fig) - - def cb(): - source.data.update({"cpu": psutil.cpu_percent(percpu=True)}) - - doc.add_periodic_callback(cb, 200) - - -def resource_timeline(doc): - - # Shared X Range for all plots - x_range = DataRange1d(follow="end", follow_interval=20000, range_padding=0) - tools = "reset,xpan,xwheel_zoom" - - source = ColumnDataSource( - { - "time": [], - "memory": [], - "cpu": [], - "disk-read": [], - "disk-write": [], - "net-read": [], - "net-sent": [], - } - ) - - memory_fig = figure( - title="Memory", - sizing_mode="stretch_both", - x_axis_type="datetime", - y_range=[0, psutil.virtual_memory().total], - x_range=x_range, - tools=tools, - ) - memory_fig.line(source=source, x="time", y="memory") - memory_fig.yaxis.formatter = NumeralTickFormatter(format="0.0b") - - cpu_fig = figure( - title="CPU", - sizing_mode="stretch_both", - x_axis_type="datetime", - y_range=[0, 100], - x_range=x_range, - tools=tools, - ) - cpu_fig.line(source=source, x="time", y="cpu") - - disk_fig = figure( - title="Disk I/O Bandwidth", - sizing_mode="stretch_both", - x_axis_type="datetime", - x_range=x_range, - tools=tools, - ) - disk_fig.line(source=source, x="time", y="disk-read", color="blue", legend_label="Read") - disk_fig.line(source=source, x="time", y="disk-write", color="red", legend_label="Write") - disk_fig.yaxis.formatter = NumeralTickFormatter(format="0.0b") - disk_fig.legend.location = "top_left" - - net_fig = figure( - title="Network I/O Bandwidth", - sizing_mode="stretch_both", - x_axis_type="datetime", - x_range=x_range, - tools=tools, - ) - net_fig.line(source=source, x="time", y="net-read", color="blue", legend_label="Recv") - net_fig.line(source=source, x="time", y="net-sent", color="red", legend_label="Send") - net_fig.yaxis.formatter = NumeralTickFormatter(format="0.0b") - net_fig.legend.location = "top_left" - - doc.title = "Resource Timeline" - doc.add_root( - column(cpu_fig, memory_fig, disk_fig, net_fig, sizing_mode="stretch_both") - ) - - last_disk_read = psutil.disk_io_counters().read_bytes - last_disk_write = psutil.disk_io_counters().write_bytes - last_net_recv = psutil.net_io_counters().bytes_recv - last_net_sent = psutil.net_io_counters().bytes_sent - last_time = time.time() - - def cb(): - nonlocal last_disk_read, last_disk_write, last_net_recv, last_net_sent, last_time - +class CPUResourceHandler(APIHandler): + @tornado.web.authenticated + def get(self): now = time.time() - cpu = psutil.cpu_percent() - mem = psutil.virtual_memory().used - - disk = psutil.disk_io_counters() - disk_read = disk.read_bytes - disk_write = disk.write_bytes - - net = psutil.net_io_counters() - net_read = net.bytes_recv - net_sent = net.bytes_sent - - source.stream( - { - "time": [now * 1000], # bokeh measures in ms - "cpu": [cpu], - "memory": [mem], - "disk-read": [(disk_read - last_disk_read) / (now - last_time)], - "disk-write": [(disk_write - last_disk_write) / (now - last_time)], - "net-read": [(net_read - last_net_recv) / (now - last_time)], - "net-sent": [(net_sent - last_net_sent) / (now - last_time)], - }, - 1000, - ) - - last_disk_read = disk_read - last_disk_write = disk_write - last_net_recv = net_read - last_net_sent = net_sent - last_time = now - - doc.add_periodic_callback(cb, 200) + stats = { + "time": now * 1000, + "cpu_utilization": psutil.cpu_percent(), + "memory_usage": psutil.virtual_memory().used, + "disk_read": psutil.disk_io_counters().read_bytes, + "disk_write": psutil.disk_io_counters().write_bytes, + "network_read": psutil.net_io_counters().bytes_recv, + "network_write": psutil.net_io_counters().bytes_sent, + } + self.set_header("Content-Type", "application/json") + self.write(json.dumps(stats)) diff --git a/jupyterlab_nvdashboard/apps/gpu.py b/jupyterlab_nvdashboard/apps/gpu.py index 548d957..4cbaa96 100644 --- a/jupyterlab_nvdashboard/apps/gpu.py +++ b/jupyterlab_nvdashboard/apps/gpu.py @@ -1,23 +1,12 @@ -from bokeh.plotting import figure, ColumnDataSource -from bokeh.models import DataRange1d, NumeralTickFormatter, BasicTicker -from bokeh.layouts import column -from bokeh.models.mappers import LinearColorMapper -from bokeh.palettes import all_palettes - -import math -import time - +import json import pynvml - -from jupyterlab_nvdashboard.utils import format_bytes - -KB = 1e3 -MB = KB * KB -GB = MB * KB +import time +import tornado +from jupyter_server.base.handlers import APIHandler try: pynvml.nvmlInit() -except pynvml.nvml.NVMLError_LibraryNotFound as error: +except pynvml.nvml.NVMLError_LibraryNotFound: ngpus = 0 gpu_handles = [] else: @@ -25,509 +14,88 @@ gpu_handles = [pynvml.nvmlDeviceGetHandleByIndex(i) for i in range(ngpus)] try: nvlink_ver = pynvml.nvmlDeviceGetNvLinkVersion(gpu_handles[0], 0) + links = [ + getattr(pynvml, f"NVML_FI_DEV_NVLINK_SPEED_MBPS_L{i}", "") + for i in range(pynvml.NVML_NVLINK_MAX_LINKS) + if hasattr(pynvml, f"NVML_FI_DEV_NVLINK_SPEED_MBPS_L{i}") + ] + + bandwidth = [ + pynvml.nvmlDeviceGetFieldValues(handle, links) + for handle in gpu_handles + ] + + # Maximum bandwidth is bidirectional, divide by 2 for separate RX & TX + max_bw = ( + max( + sum(i.value.ullVal for i in bw) * 1024**2 for bw in bandwidth + ) + / 2 + ) except (IndexError, pynvml.nvml.NVMLError_NotSupported): nvlink_ver = None + max_bw = [] try: pci_gen = pynvml.nvmlDeviceGetMaxPcieLinkGeneration(gpu_handles[0]) except (IndexError, pynvml.nvml.NVMLError_NotSupported): pci_gen = None -def gpu(doc): - fig = figure(title="GPU Utilization", sizing_mode="stretch_both", x_range=[0, 100]) - def get_utilization(): - return [ +class GPUUtilizationHandler(APIHandler): + @tornado.web.authenticated + def get(self): + gpu_utilization = [ pynvml.nvmlDeviceGetUtilizationRates(gpu_handles[i]).gpu for i in range(ngpus) ] + self.finish(json.dumps({"gpu_utilization": gpu_utilization})) - gpu = get_utilization() - y = list(range(len(gpu))) - source = ColumnDataSource({"right": y, "gpu": gpu}) - mapper = LinearColorMapper(palette=all_palettes["RdYlBu"][4], low=0, high=100) - - fig.hbar( - source=source, - y="right", - right="gpu", - height=0.8, - color={"field": "gpu", "transform": mapper}, - ) - - fig.toolbar_location = None - - doc.title = "GPU Utilization [%]" - doc.add_root(fig) - - def cb(): - source.data.update({"gpu": get_utilization()}) - - doc.add_periodic_callback(cb, 200) - - -def gpu_mem(doc): - def get_mem(): - return [pynvml.nvmlDeviceGetMemoryInfo(handle).used for handle in gpu_handles] - - def get_total(): - return pynvml.nvmlDeviceGetMemoryInfo(gpu_handles[0]).total - - fig = figure( - title="GPU Memory", sizing_mode="stretch_both", x_range=[0, get_total()] - ) - - gpu = get_mem() - - y = list(range(len(gpu))) - source = ColumnDataSource({"right": y, "gpu": gpu}) - mapper = LinearColorMapper( - palette=all_palettes["RdYlBu"][8], low=0, high=get_total() - ) - - fig.hbar( - source=source, - y="right", - right="gpu", - height=0.8, - color={"field": "gpu", "transform": mapper}, - ) - fig.xaxis[0].formatter = NumeralTickFormatter(format="0.0 b") - fig.xaxis.major_label_orientation = -math.pi / 12 - - fig.toolbar_location = None - - doc.title = "GPU Memory" - doc.add_root(fig) - - def cb(): - mem = get_mem() - source.data.update({"gpu": mem}) - fig.title.text = "GPU Memory: {}".format(format_bytes(sum(mem))) - - doc.add_periodic_callback(cb, 200) - - -def pci(doc): - - # Use device-0 to get "upper bound" - pci_width = pynvml.nvmlDeviceGetMaxPcieLinkWidth(gpu_handles[0]) - pci_bw = { - # Keys = PCIe-Generation, Values = Max PCIe Lane BW (per direction) - # [Note: Using specs at https://en.wikipedia.org/wiki/PCI_Express] - 1: (250.0 * MB), - 2: (500.0 * MB), - 3: (985.0 * MB), - 4: (1969.0 * MB), - 5: (3938.0 * MB), - 6: (7877.0 * MB), - } - # Max PCIe Throughput = BW-per-lane * Width - max_rxtx_tp = pci_width * pci_bw[pci_gen] - - pci_tx = [ - pynvml.nvmlDeviceGetPcieThroughput( - gpu_handles[i], pynvml.NVML_PCIE_UTIL_TX_BYTES - ) - * KB - for i in range(ngpus) - ] - pci_rx = [ - pynvml.nvmlDeviceGetPcieThroughput( - gpu_handles[i], pynvml.NVML_PCIE_UTIL_RX_BYTES - ) - * KB - for i in range(ngpus) - ] - - left = list(range(ngpus)) - right = [l + 0.8 for l in left] - source = ColumnDataSource( - {"left": left, "right": right, "pci-tx": pci_tx, "pci-rx": pci_rx} - ) - mapper = LinearColorMapper( - palette=all_palettes["RdYlBu"][4], low=0, high=max_rxtx_tp - ) - - tx_fig = figure( - title="TX PCIe [B/s]", sizing_mode="stretch_both", y_range=[0, max_rxtx_tp] - ) - tx_fig.quad( - source=source, - left="left", - right="right", - bottom=0, - top="pci-tx", - color={"field": "pci-tx", "transform": mapper}, - ) - tx_fig.yaxis.formatter = NumeralTickFormatter(format="0.0 b") - tx_fig.toolbar_location = None - - rx_fig = figure( - title="RX PCIe [B/s]", sizing_mode="stretch_both", y_range=[0, max_rxtx_tp] - ) - rx_fig.quad( - source=source, - left="left", - right="right", - bottom=0, - top="pci-rx", - color={"field": "pci-rx", "transform": mapper}, - ) - rx_fig.yaxis.formatter = NumeralTickFormatter(format="0.0 b") - rx_fig.toolbar_location = None - - doc.title = "PCIe Throughput" - doc.add_root(column(tx_fig, rx_fig, sizing_mode="stretch_both")) - - def cb(): - src_dict = {} - src_dict["pci-tx"] = [ - pynvml.nvmlDeviceGetPcieThroughput( - gpu_handles[i], pynvml.NVML_PCIE_UTIL_TX_BYTES - ) - * KB - for i in range(ngpus) - ] - src_dict["pci-rx"] = [ - pynvml.nvmlDeviceGetPcieThroughput( - gpu_handles[i], pynvml.NVML_PCIE_UTIL_RX_BYTES - ) - * KB - for i in range(ngpus) +class GPUUsageHandler(APIHandler): + @tornado.web.authenticated + def get(self): + memory_usage = [ + pynvml.nvmlDeviceGetMemoryInfo(handle).used + for handle in gpu_handles ] - source.data.update(src_dict) - - doc.add_periodic_callback(cb, 200) - - -def _get_nvlink_throughput(): - throughput = [ - pynvml.nvmlDeviceGetFieldValues( - handle, - [ - (pynvml.NVML_FI_DEV_NVLINK_THROUGHPUT_DATA_RX, scope_id) - for scope_id in range(pynvml.NVML_NVLINK_MAX_LINKS) - ] + - [ - (pynvml.NVML_FI_DEV_NVLINK_THROUGHPUT_DATA_TX, scope_id) - for scope_id in range(pynvml.NVML_NVLINK_MAX_LINKS) - ] - ) - for handle in gpu_handles - ] - - # Output is given in KiB, thus multiply by 1024 for result in bytes - # First `pynvml.NVML_NVLINK_MAX_LINKS` contain RX throughput, last - # `pynvml.NVML_NVLINK_MAX_LINKS` contain TX - return { - "rx": [ - sum(t[i].value.ullVal * 1024 - for i in range(pynvml.NVML_NVLINK_MAX_LINKS)) - for t in throughput - ], - "tx": [ - sum(t[pynvml.NVML_NVLINK_MAX_LINKS + i].value.ullVal * 1024 - for i in range(pynvml.NVML_NVLINK_MAX_LINKS)) - for t in throughput - ], - } - - -def _get_max_bandwidth(): - links = [ - getattr(pynvml, f"NVML_FI_DEV_NVLINK_SPEED_MBPS_L{i}") - for i in range(pynvml.NVML_NVLINK_MAX_LINKS) - ] - - bandwidth = [ - pynvml.nvmlDeviceGetFieldValues(handle, links) - for handle in gpu_handles - ] - - # Maximum bandwidth is bidirectional, divide by two for separate RX and TX - return max( - sum(i.value.ullVal for i in bw) * 1024**2 - for bw in bandwidth - ) / 2 - - -def nvlink(doc): - max_bw = _get_max_bandwidth() - - tx_fig = figure( - title="TX NVLink [B/s]", sizing_mode="stretch_both", y_range=[0, max_bw] - ) - tx_fig.yaxis.formatter = NumeralTickFormatter(format="0.0 b") - nvlink_state = _get_nvlink_throughput() - nvlink_state["tx-ref"] = nvlink_state["tx"].copy() - left = list(range(ngpus)) - right = [l + 0.8 for l in left] - source = ColumnDataSource( - { - "left": left, - "right": right, - "count-tx": [0.0 for i in range(ngpus)], - "count-rx": [0.0 for i in range(ngpus)], - } - ) - mapper = LinearColorMapper(palette=all_palettes["RdYlBu"][4], low=0, high=max_bw) - - tx_fig.quad( - source=source, - left="left", - right="right", - bottom=0, - top="count-tx", - color={"field": "count-tx", "transform": mapper}, - ) - tx_fig.toolbar_location = None - - rx_fig = figure( - title="RX NVLink [B/s]", sizing_mode="stretch_both", y_range=[0, max_bw] - ) - rx_fig.yaxis.formatter = NumeralTickFormatter(format="0.0 b") - nvlink_state["rx-ref"] = nvlink_state["rx"].copy() - - rx_fig.quad( - source=source, - left="left", - right="right", - bottom=0, - top="count-rx", - color={"field": "count-rx", "transform": mapper}, - ) - rx_fig.toolbar_location = None - doc.title = "NVLink Utilization Counters" - doc.add_root(column(tx_fig, rx_fig, sizing_mode="stretch_both")) - - def cb(): - nvlink_state["tx-ref"] = nvlink_state["tx"].copy() - nvlink_state["rx-ref"] = nvlink_state["rx"].copy() - src_dict = {} - nvlink_state.update(_get_nvlink_throughput()) - src_dict["count-tx"] = [ - max(a - b, 0.0) * 5.0 - for (a, b) in zip(nvlink_state["tx"], nvlink_state["tx-ref"]) - ] - src_dict["count-rx"] = [ - max(a - b, 0.0) * 5.0 - for (a, b) in zip(nvlink_state["rx"], nvlink_state["rx-ref"]) - ] - - source.data.update(src_dict) - - doc.add_periodic_callback(cb, 200) - - -def nvlink_timeline(doc): - - # X Range - x_range = DataRange1d(follow="end", follow_interval=20000, range_padding=0) - tools = "reset,xpan,xwheel_zoom" - - item_dict = {"time": []} - for i in range(ngpus): - item_dict["nvlink-tx-" + str(i)] = [] - item_dict["nvlink-rx-" + str(i)] = [] - - source = ColumnDataSource(item_dict) - - def _get_color(ind): - color_list = [ - "blue", - "red", - "green", - "black", - "brown", - "cyan", - "orange", - "pink", - "purple", - "gold", + total_memory = [ + pynvml.nvmlDeviceGetMemoryInfo(handle).total + for handle in gpu_handles ] - return color_list[ind % len(color_list)] - tx_fig = figure( - title="TX NVLink (per Device) [B/s]", - sizing_mode="stretch_both", - x_axis_type="datetime", - x_range=x_range, - tools=tools, - ) - rx_fig = figure( - title="RX NVLink (per Device) [B/s]", - sizing_mode="stretch_both", - x_axis_type="datetime", - x_range=x_range, - tools=tools, - ) - for i in range(ngpus): - tx_fig.line( - source=source, x="time", y="nvlink-tx-" + str(i), color=_get_color(i) - ) - rx_fig.line( - source=source, x="time", y="nvlink-rx-" + str(i), color=_get_color(i) + self.finish( + json.dumps( + {"memory_usage": memory_usage, "total_memory": total_memory} + ) ) - tx_fig.yaxis.formatter = NumeralTickFormatter(format="0.0 b") - rx_fig.yaxis.formatter = NumeralTickFormatter(format="0.0 b") - - doc.title = "NVLink Throughput Timeline" - doc.add_root(column(tx_fig, rx_fig, sizing_mode="stretch_both")) - counter = 1 - nvlink_state = _get_nvlink_throughput() - nvlink_state["tx-ref"] = nvlink_state["tx"].copy() - nvlink_state["rx-ref"] = nvlink_state["rx"].copy() - last_time = time.time() - - def cb(): - nonlocal last_time - nonlocal nvlink_state +class GPUResourceHandler(APIHandler): + @tornado.web.authenticated + def get(self): now = time.time() - src_dict = {"time": [now * 1000]} - - nvlink_state["tx-ref"] = nvlink_state["tx"].copy() - nvlink_state["rx-ref"] = nvlink_state["rx"].copy() - nvlink_state.update(_get_nvlink_throughput()) - tx_diff = [ - max(a - b, 0.0) * 5.0 - for (a, b) in zip(nvlink_state["tx"], nvlink_state["tx-ref"]) - ] - - rx_diff = [ - max(a - b, 0.0) * 5.0 - for (a, b) in zip(nvlink_state["rx"], nvlink_state["rx-ref"]) - ] - - for i in range(ngpus): - src_dict["nvlink-tx-" + str(i)] = [tx_diff[i]] - src_dict["nvlink-rx-" + str(i)] = [rx_diff[i]] - source.stream(src_dict, 1000) - last_time = now - - doc.add_periodic_callback(cb, 200) - - -def gpu_resource_timeline(doc): - - memory_list = [ - pynvml.nvmlDeviceGetMemoryInfo(handle).total / (1024 * 1024) - for handle in gpu_handles - ] - gpu_mem_max = max(memory_list) * (1024 * 1024) - gpu_mem_sum = sum(memory_list) - - # Shared X Range for all plots - x_range = DataRange1d(follow="end", follow_interval=20000, range_padding=0) - tools = "reset,xpan,xwheel_zoom" - - item_dict = { - "time": [], - "gpu-total": [], - "memory-total": [], - "rx-total": [], - "tx-total": [], - } - for i in range(ngpus): - item_dict["gpu-" + str(i)] = [] - item_dict["memory-" + str(i)] = [] - - source = ColumnDataSource(item_dict) - - def _get_color(ind): - color_list = [ - "blue", - "red", - "green", - "black", - "brown", - "cyan", - "orange", - "pink", - "purple", - "gold", + stats = { + "time": now * 1000, + "gpu_utilization_total": 0, + "gpu_memory_total": 0, + "rx_total": 0, + "tx_total": 0, + "gpu_memory_individual": [], + "gpu_utilization_individual": [], + } + memory_list = [ + pynvml.nvmlDeviceGetMemoryInfo(handle).total / (1024 * 1024) + for handle in gpu_handles ] - return color_list[ind % len(color_list)] - - memory_fig = figure( - title="Memory Utilization (per Device) [B]", - sizing_mode="stretch_both", - x_axis_type="datetime", - y_range=[0, gpu_mem_max], - x_range=x_range, - tools=tools, - ) - for i in range(ngpus): - memory_fig.line( - source=source, x="time", y="memory-" + str(i), color=_get_color(i) - ) - memory_fig.yaxis.formatter = NumeralTickFormatter(format="0.0 b") + gpu_mem_sum = sum(memory_list) - gpu_fig = figure( - title="GPU Utilization (per Device) [%]", - sizing_mode="stretch_both", - x_axis_type="datetime", - y_range=[0, 100], - x_range=x_range, - tools=tools, - ) - for i in range(ngpus): - gpu_fig.line(source=source, x="time", y="gpu-" + str(i), color=_get_color(i)) - - tot_fig = figure( - title="Total Utilization [%]", - sizing_mode="stretch_both", - x_axis_type="datetime", - y_range=[0, 100], - x_range=x_range, - tools=tools, - ) - tot_fig.line( - source=source, x="time", y="gpu-total", color="blue", legend_label="Total-GPU" - ) - tot_fig.line( - source=source, x="time", y="memory-total", color="red", legend_label="Total-Memory" - ) - tot_fig.legend.location = "top_left" - - figures = [gpu_fig, memory_fig, tot_fig] - if pci_gen is not None: - pci_fig = figure( - title="Total PCI Throughput [B/s]", - sizing_mode="stretch_both", - x_axis_type="datetime", - x_range=x_range, - tools=tools, - ) - pci_fig.line(source=source, x="time", y="tx-total", color="blue", legend_label="TX") - pci_fig.line(source=source, x="time", y="rx-total", color="red", legend_label="RX") - pci_fig.yaxis.formatter = NumeralTickFormatter(format="0.0 b") - pci_fig.legend.location = "top_left" - figures.append(pci_fig) - - doc.title = "Resource Timeline" - doc.add_root( - column(*figures, sizing_mode="stretch_both") - ) - - last_time = time.time() - - def cb(): - nonlocal last_time - now = time.time() - src_dict = {"time": [now * 1000]} - gpu_tot = 0 - mem_tot = 0 - tx_tot = 0 - rx_tot = 0 for i in range(ngpus): gpu = pynvml.nvmlDeviceGetUtilizationRates(gpu_handles[i]).gpu mem = pynvml.nvmlDeviceGetMemoryInfo(gpu_handles[i]).used - gpu_tot += gpu - mem_tot += mem / (1024 * 1024) + stats["gpu_utilization_total"] += gpu + stats["gpu_memory_total"] += mem / (1024 * 1024) + if pci_gen is not None: tx = ( pynvml.nvmlDeviceGetPcieThroughput( @@ -541,17 +109,127 @@ def cb(): ) * 1024 ) - rx_tot += rx - tx_tot += tx - src_dict["gpu-" + str(i)] = [gpu] - src_dict["memory-" + str(i)] = [mem] - src_dict["gpu-total"] = [gpu_tot / ngpus] - src_dict["memory-total"] = [(mem_tot / gpu_mem_sum) * 100] - src_dict["tx-total"] = [tx_tot] - src_dict["rx-total"] = [rx_tot] + stats["rx_total"] += rx + stats["tx_total"] += tx + stats["gpu_utilization_individual"].append(gpu) + stats["gpu_memory_individual"].append(mem) + + stats["gpu_utilization_total"] /= ngpus + stats["gpu_memory_total"] = round( + (stats["gpu_memory_total"] / gpu_mem_sum) * 100, 2 + ) + self.set_header("Content-Type", "application/json") + self.write(json.dumps(stats)) + + +class NVLinkThroughputHandler(APIHandler): + prev_throughput = None + + @tornado.web.authenticated + def get(self): + throughput = [ + pynvml.nvmlDeviceGetFieldValues( + handle, + [ + (pynvml.NVML_FI_DEV_NVLINK_THROUGHPUT_DATA_RX, scope_id) + for scope_id in range(pynvml.NVML_NVLINK_MAX_LINKS) + ] + + [ + (pynvml.NVML_FI_DEV_NVLINK_THROUGHPUT_DATA_TX, scope_id) + for scope_id in range(pynvml.NVML_NVLINK_MAX_LINKS) + ], + ) + for handle in gpu_handles + ] + + # Check if previous throughput is available + if self.prev_throughput is not None: + # Calculate the change since the last request + throughput_change = [ + [ + throughput[i][j].value.ullVal + - self.prev_throughput[i][j].value.ullVal + for j in range(len(throughput[i])) + ] + for i in range(len(throughput)) + ] + else: + # If no previous throughput is available, set change to zero + throughput_change = [ + [0] * len(throughput[i]) for i in range(len(throughput)) + ] + + # Store the current throughput for the next request + self.prev_throughput = throughput + + self.set_header("Content-Type", "application/json") + # Send the change in throughput as part of the response + self.write( + json.dumps( + { + "nvlink_rx": [ + sum( + throughput_change[i][ + : pynvml.NVML_NVLINK_MAX_LINKS + ] + ) + * 1024 + for i in range(len(throughput_change)) + ], + "nvlink_tx": [ + sum( + throughput_change[i][ + pynvml.NVML_NVLINK_MAX_LINKS : + ] + ) + * 1024 + for i in range(len(throughput_change)) + ], + "max_rxtx_bw": max_bw, + } + ) + ) + + +class PCIStatsHandler(APIHandler): + @tornado.web.authenticated + def get(self): + # Use device-0 to get "upper bound" + pci_width = pynvml.nvmlDeviceGetMaxPcieLinkWidth(gpu_handles[0]) + pci_bw = { + # Keys = PCIe-Generation, Values = Max PCIe Lane BW (per direction) + # [Note: Using specs at https://en.wikipedia.org/wiki/PCI_Express] + 1: (250.0 * 1024 * 1024), + 2: (500.0 * 1024 * 1024), + 3: (985.0 * 1024 * 1024), + 4: (1969.0 * 1024 * 1024), + 5: (3938.0 * 1024 * 1024), + 6: (7877.0 * 1024 * 1024), + } + # Max PCIe Throughput = BW-per-lane * Width + max_rxtx_tp = pci_width * pci_bw[pci_gen] + + pci_tx = [ + pynvml.nvmlDeviceGetPcieThroughput( + gpu_handles[i], pynvml.NVML_PCIE_UTIL_TX_BYTES + ) + * 1024 + for i in range(ngpus) + ] - source.stream(src_dict, 1000) + pci_rx = [ + pynvml.nvmlDeviceGetPcieThroughput( + gpu_handles[i], pynvml.NVML_PCIE_UTIL_RX_BYTES + ) + * 1024 + for i in range(ngpus) + ] - last_time = now + stats = { + "pci_tx": pci_tx, + "pci_rx": pci_rx, + "max_rxtx_tp": max_rxtx_tp, + } - doc.add_periodic_callback(cb, 200) + self.set_header("Content-Type", "application/json") + self.write(json.dumps(stats)) diff --git a/jupyterlab_nvdashboard/handlers.py b/jupyterlab_nvdashboard/handlers.py new file mode 100644 index 0000000..b920b50 --- /dev/null +++ b/jupyterlab_nvdashboard/handlers.py @@ -0,0 +1,47 @@ +from jupyter_server.utils import url_path_join +from . import apps + +URL_PATH = "nvdashboard" + + +def setup_handlers(web_app): + host_pattern = ".*$" + base_url = web_app.settings["base_url"] + handlers = [] + if apps.gpu.ngpus > 0: + # Prepend the base_url so that it works in a JupyterHub setting + route_pattern_gpu_util = url_path_join( + base_url, URL_PATH, "gpu_utilization" + ) + route_pattern_gpu_usage = url_path_join( + base_url, URL_PATH, "gpu_usage" + ) + route_pattern_gpu_resource = url_path_join( + base_url, URL_PATH, "gpu_resource" + ) + route_pattern_pci_stats = url_path_join( + base_url, URL_PATH, "pci_stats" + ) + route_pattern_nvlink_throughput = url_path_join( + base_url, URL_PATH, "nvlink_throughput" + ) + handlers += [ + (route_pattern_gpu_util, apps.gpu.GPUUtilizationHandler), + (route_pattern_gpu_usage, apps.gpu.GPUUsageHandler), + (route_pattern_gpu_resource, apps.gpu.GPUResourceHandler), + (route_pattern_pci_stats, apps.gpu.PCIStatsHandler), + ( + route_pattern_nvlink_throughput, + apps.gpu.NVLinkThroughputHandler, + ), + ] + + route_pattern_cpu_resource = url_path_join( + base_url, URL_PATH, "cpu_resource" + ) + + handlers += [ + (route_pattern_cpu_resource, apps.cpu.CPUResourceHandler), + ] + + web_app.add_handlers(host_pattern, handlers) diff --git a/jupyterlab_nvdashboard/server.py b/jupyterlab_nvdashboard/server.py deleted file mode 100644 index d8242b0..0000000 --- a/jupyterlab_nvdashboard/server.py +++ /dev/null @@ -1,53 +0,0 @@ -import sys -import os -from bokeh.server.server import Server -from tornado.ioloop import IOLoop -from tornado import web - -from jupyterlab_nvdashboard import apps - - -DEFAULT_PORT = 8000 - - -routes = { - # "/CPU-Utilization": apps.cpu.cpu, - "/Machine-Resources": apps.cpu.resource_timeline, -} - -if apps.gpu.ngpus > 0: - routes["/GPU-Utilization"] = apps.gpu.gpu - routes["/GPU-Memory"] = apps.gpu.gpu_mem - routes["/GPU-Resources"] = apps.gpu.gpu_resource_timeline - if apps.gpu.pci_gen is not None: - routes["/PCIe-Throughput"] = apps.gpu.pci - if apps.gpu.nvlink_ver is not None: - routes["/NVLink-Throughput"] = apps.gpu.nvlink - routes["/NVLink-Timeline"] = apps.gpu.nvlink_timeline - -class RouteIndex(web.RequestHandler): - """ A JSON index of all routes present on the Bokeh Server """ - - def get(self): - self.write({route: route.strip("/").replace("-", " ") for route in routes}) - - -def go(): - if len(sys.argv) > 1: - port = int(sys.argv[1]) - else: - port = DEFAULT_PORT - - os.environ['BOKEH_RESOURCES'] = 'cdn' - server = Server(routes, port=port, allow_websocket_origin=["*"]) - server.start() - - server._tornado.add_handlers( - r".*", [(server.prefix + "/" + "index.json", RouteIndex, {})] - ) - - IOLoop.current().start() - - -if __name__ == "__main__": - go() diff --git a/jupyterlab_nvdashboard/tests/test_handlers.py b/jupyterlab_nvdashboard/tests/test_handlers.py new file mode 100644 index 0000000..26e1d6d --- /dev/null +++ b/jupyterlab_nvdashboard/tests/test_handlers.py @@ -0,0 +1,62 @@ +import json + +URL_PATH = "nvdashboard" + + +async def test_gpu_utilization_handler(jp_fetch): + response = await jp_fetch(URL_PATH, "gpu_utilization") + assert response.code == 200 + data = json.loads(response.body.decode()) + assert "gpu_utilization" in data + + +async def test_gpu_usage_handler(jp_fetch): + response = await jp_fetch(URL_PATH, "gpu_usage") + assert response.code == 200 + data = json.loads(response.body.decode()) + assert "memory_usage" in data + assert "total_memory" in data + + +async def test_gpu_resource_handler(jp_fetch): + response = await jp_fetch(URL_PATH, "gpu_resource") + assert response.code == 200 + data = json.loads(response.body.decode()) + assert "time" in data + assert "gpu_utilization_total" in data + assert "gpu_memory_total" in data + assert "rx_total" in data + assert "tx_total" in data + assert "gpu_memory_individual" in data + assert "gpu_utilization_individual" in data + + +async def test_pci_stats_handler(jp_fetch): + response = await jp_fetch(URL_PATH, "pci_stats") + assert response.code == 200 + data = json.loads(response.body.decode()) + assert "pci_tx" in data + assert "pci_rx" in data + assert "max_rxtx_tp" in data + + +async def test_nvlink_throughput_handler(jp_fetch): + response = await jp_fetch(URL_PATH, "nvlink_throughput") + assert response.code == 200 + data = json.loads(response.body.decode()) + assert "nvlink_rx" in data + assert "nvlink_tx" in data + assert "max_rxtx_bw" in data + + +async def test_cpu_handlers(jp_fetch): + response = await jp_fetch(URL_PATH, "cpu_resource") + assert response.code == 200 + data = json.loads(response.body.decode()) + assert "time" in data + assert "cpu_utilization" in data + assert "memory_usage" in data + assert "disk_read" in data + assert "disk_write" in data + assert "network_read" in data + assert "network_write" in data diff --git a/jupyterlab_nvdashboard/tests/test_utils.py b/jupyterlab_nvdashboard/tests/test_utils.py deleted file mode 100644 index c365664..0000000 --- a/jupyterlab_nvdashboard/tests/test_utils.py +++ /dev/null @@ -1,7 +0,0 @@ -import pytest - - -def test_format_bytes(): - from jupyterlab_nvdashboard.utils import format_bytes - - assert format_bytes(1e13) == "10.00 TB" diff --git a/jupyterlab_nvdashboard/utils.py b/jupyterlab_nvdashboard/utils.py deleted file mode 100644 index 1511b98..0000000 --- a/jupyterlab_nvdashboard/utils.py +++ /dev/null @@ -1,17 +0,0 @@ -def format_bytes(n): - """Format bytes as text - - Copied from dask to avoid dependency. - - """ - if n > 1e15: - return "%0.2f PB" % (n / 1e15) - if n > 1e12: - return "%0.2f TB" % (n / 1e12) - if n > 1e9: - return "%0.2f GB" % (n / 1e9) - if n > 1e6: - return "%0.2f MB" % (n / 1e6) - if n > 1e3: - return "%0.2f kB" % (n / 1000) - return "%d B" % n diff --git a/package.json b/package.json index 89f204f..6c37e09 100644 --- a/package.json +++ b/package.json @@ -1,77 +1,207 @@ { - "name": "jupyterlab-nvdashboard", - "version": "0.10.0", - "description": "A JupyterLab extension for displaying GPU usage dashboards", - "keywords": [ - "jupyter", - "jupyterlab", - "jupyterlab-extension" - ], - "homepage": "https://github.com/rapidsai/jupyterlab-nvdashboard", - "bugs": { - "url": "https://github.com/rapidsai/jupyterlab-nvdashboard/issues" - }, - "license": "", - "author": "NV Dashbaord contributors", - "files": [ - "lib/**/*.{d.ts,eot,gif,html,jpg,js,js.map,json,png,svg,woff2,ttf}", - "style/**/*.{css,eot,gif,html,jpg,json,png,svg,woff2,ttf}", - "style/index.js" - ], - "main": "lib/index.js", - "types": "lib/index.d.ts", - "repository": { - "type": "git", - "url": "https://github.com/rapidsai/jupyterlab-nvdashboard.git" - }, - "scripts": { - "build": "jlpm run build:lib && jlpm run build:labextension:dev", - "build:labextension": "jupyter labextension build .", - "build:labextension:dev": "jupyter labextension build --development True .", - "build:lib": "tsc", - "build:prod": "jlpm run build:lib && jlpm run build:labextension", - "clean": "jlpm run clean:lib", - "clean:all": "jlpm run clean:lib && jlpm run clean:labextension", - "clean:labextension": "rimraf jupyterlab_nvdashboard/labextension", - "clean:lib": "rimraf lib tsconfig.tsbuildinfo", - "eslint": "eslint . --ext .ts,.tsx --fix", - "eslint:check": "eslint . --ext .ts,.tsx", - "install:extension": "jupyter labextension develop --overwrite .", - "prepare": "jlpm run clean && jlpm run build:prod", - "test": "mocha", - "watch": "run-p watch:src watch:labextension", - "watch:labextension": "jupyter labextension watch .", - "watch:src": "tsc -w" - }, - "dependencies": { - "@jupyterlab/application": "^3.0.0", - "@jupyterlab/apputils": "^3.0.0", - "@jupyterlab/coreutils": "^5.0.0", - "react": "^17.0.1", - "react-dom": "^17.0.1" - }, - "resolutions": { - "@types/react": "^16.4.2" - }, - "devDependencies": { - "@jupyterlab/builder": "^3.0.0-rc.13", - "@types/react": "^16.4.2", - "@types/react-dom": "~16.9.0", - "@typescript-eslint/eslint-plugin": "^2.27.0", - "@typescript-eslint/parser": "^2.27.0", - "eslint": "^7.5.0", - "eslint-config-prettier": "^6.10.1", - "eslint-plugin-prettier": "^3.1.2", - "mkdirp": "^1.0.3", - "mocha": "^6.2.0", - "npm-run-all": "^4.1.5", - "prettier": "^1.19.0", - "rimraf": "^3.0.2", - "typescript": "~4.1.3" - }, - "jupyterlab": { - "extension": true, - "outputDir": "jupyterlab_nvdashboard/labextension" - }, - "styleModule": "style/index.js" + "name": "jupyterlab-nvdashboard", + "version": "0.10.0", + "description": "A JupyterLab extension for displaying GPU usage dashboards", + "keywords": [ + "jupyter", + "jupyterlab", + "jupyterlab-extension" + ], + "homepage": "https://github.com/rapidsai/jupyterlab-nvdashboard", + "bugs": { + "url": "https://github.com/rapidsai/jupyterlab-nvdashboard/issues" + }, + "license": "", + "author": "NV Dashboard contributors", + "files": [ + "lib/**/*.{d.ts,eot,gif,html,jpg,js,js.map,json,png,svg,woff2,ttf}", + "style/**/*.{css,js,eot,gif,html,jpg,json,png,svg,woff2,ttf}", + "schema/*.json" + ], + "main": "lib/index.js", + "types": "lib/index.d.ts", + "style": "style/index.css", + "repository": { + "type": "git", + "url": "https://github.com/rapidsai/jupyterlab-nvdashboard.git" + }, + "scripts": { + "build": "jlpm build:lib && jlpm build:labextension:dev", + "build:prod": "jlpm clean && jlpm build:lib:prod && jlpm build:labextension", + "build:labextension": "jupyter labextension build .", + "build:labextension:dev": "jupyter labextension build --development True .", + "build:lib": "tsc --sourceMap", + "build:lib:prod": "tsc", + "clean": "jlpm clean:lib", + "clean:lib": "rimraf lib tsconfig.tsbuildinfo", + "clean:lintcache": "rimraf .eslintcache .stylelintcache", + "clean:labextension": "rimraf jupyterlab_nvdashboard/labextension jupyterlab_nvdashboard/_version.py", + "clean:all": "jlpm clean:lib && jlpm clean:labextension && jlpm clean:lintcache", + "eslint": "jlpm eslint:check --fix", + "eslint:check": "eslint . --cache --ext .ts,.tsx", + "install:extension": "jlpm build", + "lint": "jlpm stylelint && jlpm prettier && jlpm eslint", + "lint:check": "jlpm stylelint:check && jlpm prettier:check && jlpm eslint:check", + "prettier": "jlpm prettier:base --write --list-different", + "prettier:base": "prettier \"**/*{.ts,.tsx,.js,.jsx,.css,.json,.md}\"", + "prettier:check": "jlpm prettier:base --check", + "stylelint": "jlpm stylelint:check --fix", + "stylelint:check": "stylelint --cache \"style/**/*.css\"", + "watch": "run-p watch:src watch:labextension", + "watch:src": "tsc -w --sourceMap", + "watch:labextension": "jupyter labextension watch ." + }, + "dependencies": { + "@jupyterlab/application": "^4.0.0", + "@jupyterlab/coreutils": "^6.0.0", + "@jupyterlab/launcher": "^4.0.5", + "@jupyterlab/services": "^7.0.0", + "d3-format": "^3.1.0", + "d3-scale": "^4.0.2", + "react": "^18.2.0", + "react-dom": "^18.2.0", + "react-virtualized-auto-sizer": "^1.0.20", + "recharts": "^2.8.0" + }, + "devDependencies": { + "@jupyterlab/builder": "^4.0.0", + "@types/d3-format": "^3.0.1", + "@types/d3-scale": "^4.0.4", + "@types/json-schema": "^7.0.11", + "@types/react": "^18.0.26", + "@types/react-addons-linked-state-mixin": "^0.14.22", + "@typescript-eslint/eslint-plugin": "^6.1.0", + "@typescript-eslint/parser": "^6.1.0", + "css-loader": "^6.7.1", + "eslint": "^8.36.0", + "eslint-config-prettier": "^8.8.0", + "eslint-plugin-prettier": "^5.0.0", + "mkdirp": "^1.0.3", + "npm-run-all": "^4.1.5", + "prettier": "^3.0.0", + "rimraf": "^5.0.1", + "source-map-loader": "^1.0.2", + "style-loader": "^3.3.1", + "stylelint": "^15.10.1", + "stylelint-config-recommended": "^13.0.0", + "stylelint-config-standard": "^34.0.0", + "stylelint-csstree-validator": "^3.0.0", + "stylelint-prettier": "^4.0.0", + "typescript": "~5.0.2", + "yjs": "^13.5.0" + }, + "sideEffects": [ + "style/*.css", + "style/index.js" + ], + "styleModule": "style/index.js", + "publishConfig": { + "access": "public" + }, + "jupyterlab": { + "discovery": { + "server": { + "managers": [ + "pip" + ], + "base": { + "name": "jupyterlab_nvdashboard" + } + } + }, + "extension": true, + "outputDir": "jupyterlab_nvdashboard/labextension", + "schemaDir": "schema" + }, + "eslintIgnore": [ + "node_modules", + "dist", + "coverage", + "**/*.d.ts" + ], + "eslintConfig": { + "extends": [ + "eslint:recommended", + "plugin:@typescript-eslint/eslint-recommended", + "plugin:@typescript-eslint/recommended", + "plugin:prettier/recommended" + ], + "parser": "@typescript-eslint/parser", + "parserOptions": { + "project": "tsconfig.json", + "sourceType": "module" + }, + "plugins": [ + "@typescript-eslint" + ], + "rules": { + "@typescript-eslint/naming-convention": [ + "error", + { + "selector": "interface", + "format": [ + "PascalCase" + ], + "custom": { + "regex": "^I[A-Z]", + "match": true + } + } + ], + "@typescript-eslint/no-unused-vars": [ + "warn", + { + "args": "none" + } + ], + "@typescript-eslint/no-explicit-any": "off", + "@typescript-eslint/no-namespace": "off", + "@typescript-eslint/no-use-before-define": "off", + "@typescript-eslint/quotes": [ + "error", + "single", + { + "avoidEscape": true, + "allowTemplateLiterals": false + } + ], + "curly": [ + "error", + "all" + ], + "eqeqeq": "error", + "prefer-arrow-callback": "error" + } + }, + "prettier": { + "singleQuote": true, + "trailingComma": "none", + "arrowParens": "avoid", + "endOfLine": "auto", + "overrides": [ + { + "files": "package.json", + "options": { + "tabWidth": 4 + } + } + ] + }, + "stylelint": { + "extends": [ + "stylelint-config-recommended", + "stylelint-config-standard", + "stylelint-prettier/recommended" + ], + "plugins": [ + "stylelint-csstree-validator" + ], + "rules": { + "csstree/validator": true, + "property-no-vendor-prefix": null, + "selector-class-pattern": "^([a-z][A-z\\d]*)(-[A-z\\d]+)*$", + "selector-no-vendor-prefix": null, + "value-no-vendor-prefix": null + } + } } diff --git a/pyproject.toml b/pyproject.toml index 5f53684..015f35b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,114 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + [build-system] -requires = ["jupyter_packaging~=0.7.9", "jupyterlab>=3.0.0rc13,==3.*", "setuptools>=40.8.0", "wheel"] -build-backend = "setuptools.build_meta" +requires = [ + "hatch-nodejs-version>=0.3.2", + "hatchling>=1.5.0", + "jupyterlab>=4.0.0,<5", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. +build-backend = "hatchling.build" + +[project] +name = "jupyterlab_nvdashboard" +readme = "README.md" +license = { file = "LICENSE" } +requires-python = ">=3.8" +classifiers = [ + "Framework :: Jupyter", + "Framework :: Jupyter :: JupyterLab", + "Framework :: Jupyter :: JupyterLab :: 4", + "Framework :: Jupyter :: JupyterLab :: Extensions", + "Framework :: Jupyter :: JupyterLab :: Extensions :: Prebuilt", + "License :: OSI Approved :: BSD License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] +dependencies = [ + "jupyterlab>=4", + "psutil", + "pynvml", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. +dynamic = ["version", "description", "authors", "urls", "keywords"] + +[project.optional-dependencies] +test = [ + "pytest", + "pytest-jupyter[server]>=0.6.0", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. + +[tool.hatch.version] +source = "nodejs" + +[tool.hatch.metadata.hooks.nodejs] +fields = ["description", "authors", "urls"] + +[tool.hatch.build.targets.sdist] +artifacts = ["jupyterlab_nvdashboard/labextension"] +exclude = [".github", "binder"] + +[tool.hatch.build.targets.wheel.shared-data] +"jupyterlab_nvdashboard/labextension" = "share/jupyter/labextensions/jupyterlab-nvdashboard" +"install.json" = "share/jupyter/labextensions/jupyterlab-nvdashboard/install.json" +"jupyter-config/server-config" = "etc/jupyter/jupyter_server_config.d" + +[tool.hatch.build.hooks.version] +path = "jupyterlab_nvdashboard/_version.py" + +[tool.hatch.build.hooks.jupyter-builder] +dependencies = ["hatch-jupyter-builder>=0.5"] +build-function = "hatch_jupyter_builder.npm_builder" +ensured-targets = [ + "jupyterlab_nvdashboard/labextension/static/style.js", + "jupyterlab_nvdashboard/labextension/package.json", +] +skip-if-exists = ["jupyterlab_nvdashboard/labextension/static/style.js"] + +[tool.hatch.build.hooks.jupyter-builder.build-kwargs] +build_cmd = "build:prod" +npm = ["jlpm"] + +[tool.hatch.build.hooks.jupyter-builder.editable-build-kwargs] +build_cmd = "install:extension" +npm = ["jlpm"] +source_dir = "src" +build_dir = "jupyterlab_nvdashboard/labextension" + +[tool.jupyter-releaser.options] +version_cmd = "hatch version" + +[tool.jupyter-releaser.hooks] +before-build-npm = [ + "python -m pip install 'jupyterlab>=4.0.0,<5'", + "jlpm", + "jlpm build:prod" +] +before-build-python = ["jlpm clean:all"] + +[tool.check-wheel-contents] +ignore = ["W002"] + +[tool.black] +line-length = 79 +target-version = ["py39"] +include = '\.py?$' +force-exclude = ''' +/( + thirdparty | + \.eggs | + \.git | + \.hg | + \.mypy_cache | + \.tox | + \.venv | + _build | + buck-out | + build | + dist + package.json +)/ +''' diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 87bcea2..0000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -jupyter-server-proxy -bokeh>2.1 -pynvml>=11.0.0 -psutil -jupyterlab>=3.0.0rc13,==3.* diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index ef9e2d1..0000000 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[metadata] -license_files = LICENSE.txt diff --git a/setup.py b/setup.py index 6c05b85..aefdf20 100644 --- a/setup.py +++ b/setup.py @@ -1,105 +1 @@ -""" -jupyterlab_nvdashboard setup -""" -import json -import os - -from jupyter_packaging import ( - create_cmdclass, install_npm, ensure_targets, - combine_commands, skip_if_exists -) -import setuptools - -HERE = os.path.abspath(os.path.dirname(__file__)) - -# The name of the project -name="jupyterlab_nvdashboard" - -# Get our version -if 'GIT_DESCRIBE_TAG' in os.environ: - describe_tag = os.environ['GIT_DESCRIBE_TAG'] - version = describe_tag.lstrip('v') + os.environ.get('VERSION_SUFFIX', '') - if describe_tag[-1] == 'a': - version += os.environ['GIT_DESCRIBE_NUMBER'] -else: - # get version from package.json (to avoid duplicating) - with open(os.path.join(HERE, 'package.json'), encoding='utf-8') as f: - version = json.load(f)['version'] - -lab_path = os.path.join(HERE, name, "labextension") - -# Representative files that should exist after a successful build -jstargets = [ - os.path.join(lab_path, "package.json"), -] - -package_data_spec = { - name: [ - "*" - ] -} - -labext_name = "jupyterlab-nvdashboard" - -data_files_spec = [ - ("share/jupyter/labextensions/%s" % labext_name, lab_path, "**"), - ("share/jupyter/labextensions/%s" % labext_name, HERE, "install.json"), -] - -cmdclass = create_cmdclass("jsdeps", - package_data_spec=package_data_spec, - data_files_spec=data_files_spec -) - -js_command = combine_commands( - install_npm(HERE, build_cmd="build:prod", npm=["jlpm"]), - ensure_targets(jstargets), -) - -is_repo = os.path.exists(os.path.join(HERE, ".git")) -if is_repo: - cmdclass["jsdeps"] = js_command -else: - cmdclass["jsdeps"] = skip_if_exists(jstargets, js_command) - -with open("README.md", "r") as fh: - long_description = fh.read() - -setup_args = dict( - name=name, - version=version, - url="https://github.com/rapidsai/jupyterlab-nvdashboard", - author="NVDashboard Contributors", - description="A JupyterLab extension for displaying GPU usage dashboards", - long_description= long_description, - long_description_content_type="text/markdown", - cmdclass= cmdclass, - packages=setuptools.find_packages(), - install_requires=list(open("requirements.txt").read().strip().split("\n")), - entry_points={ - 'jupyter_serverproxy_servers': [ - 'nvdashboard = jupyterlab_nvdashboard:launch_server', - ], - 'console_scripts': [ - 'nvdashboard = jupyterlab_nvdashboard.server:go', - ], - }, - zip_safe=False, - include_package_data=True, - python_requires=">=3.7", - license="BSD-3-Clause", - platforms="Linux, Mac OS X, Windows", - keywords=["Jupyter", "JupyterLab", "JupyterLab3"], - classifiers=[ - "License :: OSI Approved :: BSD License", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Framework :: Jupyter", - ], -) - - -if __name__ == "__main__": - setuptools.setup(**setup_args) +__import__("setuptools").setup() diff --git a/src/assets/constants.ts b/src/assets/constants.ts new file mode 100644 index 0000000..fdffb0b --- /dev/null +++ b/src/assets/constants.ts @@ -0,0 +1,2 @@ +export const BAR_COLOR_LINEAR_RANGE: string[] = ['#ff7900', '#b30000']; +export const GPU_COLOR_CATEGORICAL_RANGE: string[] = ['#fecc5c', '#bd0026']; diff --git a/src/assets/icons.ts b/src/assets/icons.ts new file mode 100644 index 0000000..fdce766 --- /dev/null +++ b/src/assets/icons.ts @@ -0,0 +1,37 @@ +import { LabIcon } from '@jupyterlab/ui-components'; +import gpuIconStr from '../../style/icons/expansion-card.svg'; +import hBarIconStr from '../../style/icons/horizontal-bar-chart.svg'; +import vBarIconStr from '../../style/icons/vertical-bar-chart.svg'; +import lineIconStr from '../../style/icons/line-chart.svg'; +import pauseIconStr from '../../style/icons/pause.svg'; +import playIconStr from '../../style/icons/play.svg'; + +export const gpuIcon = new LabIcon({ + name: 'launcher:gpu-icon', + svgstr: gpuIconStr +}); + +export const hBarIcon = new LabIcon({ + name: 'launcher:hbar-icon', + svgstr: hBarIconStr +}); + +export const vBarIcon = new LabIcon({ + name: 'launcher:vbar-icon', + svgstr: vBarIconStr +}); + +export const lineIcon = new LabIcon({ + name: 'launcher:time-series-icon', + svgstr: lineIconStr +}); + +export const pauseIcon = new LabIcon({ + name: 'launcher:pause-icon', + svgstr: pauseIconStr +}); + +export const playIcon = new LabIcon({ + name: 'launcher:play-icon', + svgstr: playIconStr +}); diff --git a/src/charts/GpuMemoryChart.tsx b/src/charts/GpuMemoryChart.tsx new file mode 100644 index 0000000..bca1eea --- /dev/null +++ b/src/charts/GpuMemoryChart.tsx @@ -0,0 +1,111 @@ +import React, { useEffect, useState } from 'react'; +import { requestAPI } from '../handler'; +import { ReactWidget } from '@jupyterlab/ui-components'; +import { + BarChart, + Bar, + Cell, + YAxis, + XAxis, + Tooltip, + CartesianGrid +} from 'recharts'; +import { scaleLinear } from 'd3-scale'; +import { renderCustomTooltip } from '../components/tooltipUtils'; +import { BAR_COLOR_LINEAR_RANGE } from '../assets/constants'; +import { format } from 'd3-format'; +import AutoSizer from 'react-virtualized-auto-sizer'; + +const GpuMemoryChart = (): JSX.Element => { + const [gpuMemory, setGpuMemory] = useState([]); + const [gpuTotalMemory, setGpuTotalMemory] = useState([]); + + useEffect(() => { + async function fetchGPUMemory() { + const response = await requestAPI('gpu_usage'); + setGpuMemory(response.memory_usage); + // set gpuTotalMemory to max of total memory array returned from API + setGpuTotalMemory(response.total_memory); + } + + fetchGPUMemory(); + }, []); + + useEffect(() => { + async function fetchGPUMemory() { + const response = await requestAPI('gpu_usage'); + setGpuMemory(response.memory_usage); + setGpuTotalMemory(response.total_memory); + } + const intervalId = setInterval(() => { + fetchGPUMemory(); + }, 1000); + + return () => clearInterval(intervalId); + }, []); + + const data = gpuMemory.map((memory, index) => ({ + name: `GPU ${index}`, + memory: memory, + totalMemory: gpuTotalMemory[index] + })); + + // Create a formatter for displaying bytes + + const colorScale = scaleLinear().range(BAR_COLOR_LINEAR_RANGE); + + const usageSum = data.reduce((sum, data) => sum + data.memory, 0); + const formatBytes = (value: number): string => { + return `${format('.2s')(value)}B`; + }; + + return ( +
+ + {' '} + GPU Memory: {formatBytes(usageSum)} + + + {({ height, width }: { height: number; width: number }) => ( + + + + + + renderCustomTooltip(data, { valueFormatter: formatBytes }) + } + /> + + {data.map((entry, index) => ( + + ))} + + + )} + +
+ ); +}; + +export class GpuMemoryChartWidget extends ReactWidget { + render(): JSX.Element { + return ; + } +} diff --git a/src/charts/GpuResourceChart.tsx b/src/charts/GpuResourceChart.tsx new file mode 100644 index 0000000..25517f0 --- /dev/null +++ b/src/charts/GpuResourceChart.tsx @@ -0,0 +1,222 @@ +import React, { useState, useEffect } from 'react'; +import { ReactWidget, Button } from '@jupyterlab/ui-components'; +import { Line, XAxis, YAxis, Brush, LineChart } from 'recharts'; +import AutoSizer from 'react-virtualized-auto-sizer'; +import { requestAPI } from '../handler'; +import { CustomLineChart } from '../components/customLineChart'; +import { formatDate, formatBytes } from '../components/formatUtils'; +import { scaleLinear } from 'd3-scale'; +import { GPU_COLOR_CATEGORICAL_RANGE } from '../assets/constants'; +import { pauseIcon, playIcon } from '../assets/icons'; + +interface IChartProps { + time: number; + gpu_utilization_total: number; + gpu_memory_total: number; + rx_total: number; + tx_total: number; + gpu_utilization_individual: number[]; + gpu_memory_individual: number[]; +} + +const GpuResourceChart = () => { + const [gpuData, setGpuData] = useState([]); + const [tempData, setTempData] = useState([]); + const [isPaused, setIsPaused] = useState(false); + const ngpus = gpuData[0]?.gpu_utilization_individual.length || 0; + + useEffect(() => { + async function fetchGpuUsage() { + const response = await requestAPI('gpu_resource'); + if (!isPaused) { + setGpuData(prevData => { + if (tempData.length > 1) { + prevData = [...prevData, ...tempData]; + } + const newData = [...prevData, response]; + return newData; + }); + setTempData([]); + } else { + setTempData([...tempData, response]); + } + } + + const interval = setInterval(fetchGpuUsage, 1000); + + return () => clearInterval(interval); + }, [isPaused, tempData]); + + const handlePauseClick = () => { + setIsPaused(!isPaused); + }; + + const colorScale = scaleLinear() + .domain([0, ngpus]) + .range(GPU_COLOR_CATEGORICAL_RANGE); + + return ( +
+ + {({ height, width }: { height: number; width: number }) => ( +
+ `${value}%`} + width={width} + height={height / 5} + syncId="gpu-resource-sync" + > + {gpuData[0] && + Object.keys(gpuData[0].gpu_utilization_individual).map( + (gpu: any, index: number) => ( + + ) + )} + + + {gpuData[0] && + Object.keys(gpuData[0].gpu_memory_individual).map( + (gpu: any, index: number) => ( + + ) + )} + + `${value}%`} + yDomain={[0, 100]} + width={width} + height={height / 5} + syncId="gpu-resource-sync" + > + + + + + + + +
+ + + + + + + +
+
+ )} +
+
+ ); +}; + +export class GpuResourceChartWidget extends ReactWidget { + render() { + return ; + } +} diff --git a/src/charts/GpuUtilizationChart.tsx b/src/charts/GpuUtilizationChart.tsx new file mode 100644 index 0000000..f9cd5f0 --- /dev/null +++ b/src/charts/GpuUtilizationChart.tsx @@ -0,0 +1,105 @@ +import React, { useEffect, useState } from 'react'; +import { requestAPI } from '../handler'; +import { ReactWidget } from '@jupyterlab/ui-components'; +import { + BarChart, + Bar, + Cell, + YAxis, + XAxis, + Tooltip, + CartesianGrid +} from 'recharts'; +import { scaleLinear } from 'd3-scale'; +import { renderCustomTooltip } from '../components/tooltipUtils'; +import { BAR_COLOR_LINEAR_RANGE } from '../assets/constants'; +import AutoSizer from 'react-virtualized-auto-sizer'; + +const GpuUtilizationChart = (): JSX.Element => { + const [gpuUtilization, setGpuUtilization] = useState([]); + + useEffect(() => { + async function fetchGPUUtilization() { + const response = await requestAPI('gpu_utilization'); + setGpuUtilization(response.gpu_utilization); + } + + fetchGPUUtilization(); + }, []); + + useEffect(() => { + async function fetchGPUUtilization() { + const response = await requestAPI('gpu_utilization'); + setGpuUtilization(response.gpu_utilization); + } + const intervalId = setInterval(() => { + fetchGPUUtilization(); + }, 1000); + + return () => clearInterval(intervalId); + }, []); + + const data = gpuUtilization.map((utilization, index) => ({ + name: `GPU ${index}`, + utilization: utilization + })); + + const colorScale = scaleLinear() + .domain([0, 100]) + .range(BAR_COLOR_LINEAR_RANGE); + + return ( +
+ GPU Utilization + + {({ height, width }: { height: number; width: number }) => ( + + + + `${value}%`} + tick={{ fill: 'var(--nv-custom-tick-color)' }} + className="nv-axis-custom" + /> + + + renderCustomTooltip(data, { + valueFormatter: value => `${value}%` + }) + } + /> + + + {data.map((entry, index) => ( + + ))} + + + )} + +
+ ); +}; + +export class GpuUtilizationChartWidget extends ReactWidget { + render(): JSX.Element { + return ; + } +} diff --git a/src/charts/MachineResourceChart.tsx b/src/charts/MachineResourceChart.tsx new file mode 100644 index 0000000..ec7b46d --- /dev/null +++ b/src/charts/MachineResourceChart.tsx @@ -0,0 +1,207 @@ +import React, { useState, useEffect } from 'react'; +import { ReactWidget, Button } from '@jupyterlab/ui-components'; +import { Line, XAxis, YAxis, Brush, LineChart } from 'recharts'; +import AutoSizer from 'react-virtualized-auto-sizer'; +import { requestAPI } from '../handler'; +import { CustomLineChart } from '../components/customLineChart'; +import { formatDate, formatBytes } from '../components/formatUtils'; +import { scaleLinear } from 'd3-scale'; +import { GPU_COLOR_CATEGORICAL_RANGE } from '../assets/constants'; +import { pauseIcon, playIcon } from '../assets/icons'; + +interface IChartProps { + time: number; + cpu_utilization: number; + memory_usage: number; + disk_read: number; + disk_write: number; + network_read: number; + network_write: number; + disk_read_current: number; + disk_write_current: number; + network_read_current: number; + network_write_current: number; +} + +const MachineResourceChart = () => { + const [cpuData, setCpuData] = useState([]); + const [tempData, setTempData] = useState([]); + const [isPaused, setIsPaused] = useState(false); + + useEffect(() => { + async function fetchCpuUsage() { + let response = await requestAPI('cpu_resource'); + + if (cpuData.length > 0) { + response = { + ...response, + disk_read_current: + response.disk_read - cpuData[cpuData.length - 1].disk_read, + disk_write_current: + response.disk_write - cpuData[cpuData.length - 1].disk_write, + network_read_current: + response.network_read - cpuData[cpuData.length - 1].network_read, + network_write_current: + response.network_write - cpuData[cpuData.length - 1].network_write + }; + } + if (!isPaused) { + setCpuData(prevData => { + if (tempData.length > 1) { + prevData = [...prevData, ...tempData]; + } + const newData = [...prevData, response]; + return newData; + }); + setTempData([]); + } else { + setTempData([...tempData, response]); + } + } + + const interval = setInterval(fetchCpuUsage, 1000); + + return () => clearInterval(interval); + }, [isPaused, tempData]); + + const handlePauseClick = () => { + setIsPaused(!isPaused); + }; + + const colorScale = scaleLinear().range(GPU_COLOR_CATEGORICAL_RANGE); + + return ( +
+ + {({ height, width }: { height: number; width: number }) => ( +
+ `${value}%`} + width={width} + height={height / 5} + syncId="cpu-resource-sync" + > + + + + + + + + + + + + + +
+ + + + + + +
+
+ )} +
+
+ ); +}; + +export class MachineResourceChartWidget extends ReactWidget { + render() { + return ; + } +} diff --git a/src/charts/NvLinkThroughputChart.tsx b/src/charts/NvLinkThroughputChart.tsx new file mode 100644 index 0000000..d918df9 --- /dev/null +++ b/src/charts/NvLinkThroughputChart.tsx @@ -0,0 +1,144 @@ +import React, { useEffect, useState } from 'react'; +import { requestAPI } from '../handler'; +import { ReactWidget } from '@jupyterlab/ui-components'; +import { BarChart, Bar, Cell, YAxis, XAxis, Tooltip } from 'recharts'; +import { scaleLinear } from 'd3-scale'; +import { renderCustomTooltip } from '../components/tooltipUtils'; +import { format } from 'd3-format'; +import { BAR_COLOR_LINEAR_RANGE } from '../assets/constants'; +import AutoSizer from 'react-virtualized-auto-sizer'; + +interface INvLinkChartProps { + nvlink_tx: number[]; + nvlink_rx: number[]; + max_rxtx_bw: number; +} + +const NvLinkThroughputChart = (): JSX.Element => { + const [nvlinkStats, setNvLinkStats] = useState(); + + useEffect(() => { + async function fetchGPUMemory() { + const response = await requestAPI('nvlink_throughput'); + console.log(response); + setNvLinkStats(response); + } + + fetchGPUMemory(); + }, []); + + useEffect(() => { + async function fetchGPUMemory() { + const response = await requestAPI('nvlink_throughput'); + setNvLinkStats(response); + } + const intervalId = setInterval(() => { + fetchGPUMemory(); + }, 1000); + + return () => clearInterval(intervalId); + }, []); + + const gpuCount = nvlinkStats?.nvlink_rx.length; + const data = Array.from(Array(gpuCount).keys()).map(index => ({ + name: `GPU ${index}`, + rx: nvlinkStats?.nvlink_rx[index] || 0, + tx: nvlinkStats?.nvlink_tx[index] || 0, + maxTP: nvlinkStats?.max_rxtx_bw || 0 + })); + + const colorScale = scaleLinear() + .domain([0, 1]) + .range(BAR_COLOR_LINEAR_RANGE); + + const formatBytes = (bytes: number): string => { + return `${format('.2s')(bytes)}B`; + }; + + return ( +
+ + {({ height, width }: { height: number; width: number }) => ( +
+ + TX NvLink [B/s] + + + + + + + renderCustomTooltip(data, { valueFormatter: formatBytes }) + } + /> + + {data.map((entry, index) => ( + + ))} + + + + RX NvLink [B/s] + + + + + + + renderCustomTooltip(data, { valueFormatter: formatBytes }) + } + /> + + {data.map((entry, index) => ( + + ))} + + +
+ )} +
+
+ ); +}; + +export class NvLinkThroughputChartWidget extends ReactWidget { + render(): JSX.Element { + return ; + } +} diff --git a/src/charts/NvLinkTimelineChart.tsx b/src/charts/NvLinkTimelineChart.tsx new file mode 100644 index 0000000..d32a805 --- /dev/null +++ b/src/charts/NvLinkTimelineChart.tsx @@ -0,0 +1,157 @@ +import React, { useEffect, useState } from 'react'; +import { requestAPI } from '../handler'; +import { ReactWidget, Button } from '@jupyterlab/ui-components'; +import AutoSizer from 'react-virtualized-auto-sizer'; +import { CustomLineChart } from '../components/customLineChart'; +import { Line, XAxis, YAxis, Brush, LineChart } from 'recharts'; +import { formatDate, formatBytes } from '../components/formatUtils'; +import { pauseIcon, playIcon } from '../assets/icons'; +import { scaleLinear } from 'd3-scale'; +import { GPU_COLOR_CATEGORICAL_RANGE } from '../assets/constants'; + +interface INvLinkChartProps { + time: number; + nvlink_tx: number[]; + nvlink_rx: number[]; + max_rxtx_bw: number; +} + +const NvLinkTimelineChart = (): JSX.Element => { + const [nvlinkStats, setNvLinkStats] = useState([]); + const [tempData, setTempData] = useState([]); + const [isPaused, setIsPaused] = useState(false); + const ngpus = nvlinkStats[0]?.nvlink_tx.length || 0; + + useEffect(() => { + async function fetchNvLinkStats() { + const response = await requestAPI('nvlink_throughput'); + response.time = Date.now(); + if (!isPaused) { + setNvLinkStats(prevData => { + if (tempData.length > 1) { + prevData = [...prevData, ...tempData]; + } + const newData = [...prevData, response]; + return newData; + }); + setTempData([]); + } else { + setTempData([...tempData, response]); + } + } + + const interval = setInterval(fetchNvLinkStats, 1000); + + return () => clearInterval(interval); + }, [isPaused, tempData]); + + const handlePauseClick = () => { + setIsPaused(!isPaused); + }; + + const colorScale = scaleLinear() + .domain([0, ngpus]) + .range(GPU_COLOR_CATEGORICAL_RANGE); + + return ( +
+ + {({ height, width }: { height: number; width: number }) => ( +
+ + {nvlinkStats[0] && + Object.keys(nvlinkStats[0].nvlink_tx).map( + (gpu: any, index: number) => ( + + ) + )} + + + {nvlinkStats[0] && + Object.keys(nvlinkStats[0].nvlink_rx).map( + (gpu: any, index: number) => ( + + ) + )} + +
+ + + + + + +
+
+ )} +
+
+ ); +}; + +export class NvLinkTimelineChartWidget extends ReactWidget { + render(): JSX.Element { + return ; + } +} diff --git a/src/charts/PciThroughputChart.tsx b/src/charts/PciThroughputChart.tsx new file mode 100644 index 0000000..b105fa8 --- /dev/null +++ b/src/charts/PciThroughputChart.tsx @@ -0,0 +1,139 @@ +import React, { useEffect, useState } from 'react'; +import { requestAPI } from '../handler'; +import { ReactWidget } from '@jupyterlab/ui-components'; +import { BarChart, Bar, Cell, YAxis, XAxis, Tooltip } from 'recharts'; +import { scaleLinear } from 'd3-scale'; +import { renderCustomTooltip } from '../components/tooltipUtils'; +import AutoSizer from 'react-virtualized-auto-sizer'; +import { formatBytes } from '../components/formatUtils'; +import { BAR_COLOR_LINEAR_RANGE } from '../assets/constants'; +interface IPciChartProps { + pci_tx: number[]; + pci_rx: number[]; + max_rxtx_tp: number; +} + +const PciThroughputChart = (): JSX.Element => { + const [pciStats, setPciStats] = useState(); + + useEffect(() => { + async function fetchGPUMemory() { + const response = await requestAPI('pci_stats'); + console.log(response); + setPciStats(response); + } + + fetchGPUMemory(); + }, []); + + useEffect(() => { + async function fetchGPUMemory() { + const response = await requestAPI('pci_stats'); + setPciStats(response); + } + const intervalId = setInterval(() => { + fetchGPUMemory(); + }, 1000); + + return () => clearInterval(intervalId); + }, []); + + const gpuCount = pciStats?.pci_tx.length; + const data = Array.from(Array(gpuCount).keys()).map(index => ({ + name: `GPU ${index}`, + rx: pciStats?.pci_rx[index] || 0, + tx: pciStats?.pci_tx[index] || 0, + maxTP: pciStats?.max_rxtx_tp || 0 + })); + + const colorScale = scaleLinear() + .domain([0, 1]) + .range(BAR_COLOR_LINEAR_RANGE); + + return ( +
+ + {({ height, width }: { height: number; width: number }) => ( +
+ + TX PCIe [B/s] + + + + + + + renderCustomTooltip(data, { valueFormatter: formatBytes }) + } + /> + + {data.map((entry, index) => ( + + ))} + + + + RX PCIe [B/s] + + + + + + + renderCustomTooltip(data, { valueFormatter: formatBytes }) + } + /> + + {data.map((entry, index) => ( + + ))} + + +
+ )} +
+
+ ); +}; + +export class PciThroughputChartWidget extends ReactWidget { + render(): JSX.Element { + return ; + } +} diff --git a/src/charts/index.ts b/src/charts/index.ts new file mode 100644 index 0000000..dba28b7 --- /dev/null +++ b/src/charts/index.ts @@ -0,0 +1,16 @@ +import { GpuMemoryChartWidget } from './GpuMemoryChart'; +import { GpuUtilizationChartWidget } from './GpuUtilizationChart'; +import { GpuResourceChartWidget } from './GpuResourceChart'; +import { MachineResourceChartWidget } from './MachineResourceChart'; +import { PciThroughputChartWidget } from './PciThroughputChart'; +import { NvLinkThroughputChartWidget } from './NvLinkThroughputChart'; +import { NvLinkTimelineChartWidget } from './NvLinkTimelineChart'; +export { + GpuMemoryChartWidget, + GpuUtilizationChartWidget, + GpuResourceChartWidget, + MachineResourceChartWidget, + PciThroughputChartWidget, + NvLinkThroughputChartWidget, + NvLinkTimelineChartWidget +}; diff --git a/src/components/customLineChart.tsx b/src/components/customLineChart.tsx new file mode 100644 index 0000000..bc113cc --- /dev/null +++ b/src/components/customLineChart.tsx @@ -0,0 +1,66 @@ +import React from 'react'; +import { + XAxis, + YAxis, + Tooltip, + Legend, + Brush, + LineChart, + CartesianGrid +} from 'recharts'; +import { renderCustomTooltip } from '../components/tooltipUtils'; + +export const CustomLineChart = ({ + data, + title = '', + yDomain, + xFormatter, + yFormatter, + width, + height, + syncId, + children +}: { + data: any[]; + title?: string; + yDomain?: [number, number]; + xFormatter?: (value: number | string | undefined) => string; + yFormatter?: (value: number | undefined) => string; + width: number; + height: number; + syncId: string; + children?: React.ReactNode; +}) => ( + <> + {title} + + + + + + renderCustomTooltip(data, { + labelFormatter: xFormatter, + valueFormatter: yFormatter + }) + } + /> + + {children} + + +
+ +); diff --git a/src/components/formatUtils.tsx b/src/components/formatUtils.tsx new file mode 100644 index 0000000..9038c4f --- /dev/null +++ b/src/components/formatUtils.tsx @@ -0,0 +1,9 @@ +import { format } from 'd3-format'; + +export const formatBytes = (value: number | undefined): string => { + return value !== undefined ? `${format('.2s')(value)}B` : ''; +}; + +export const formatDate = (value: number | string | undefined): string => { + return value ? new Date(value).toLocaleTimeString() : ''; +}; diff --git a/src/components/tooltipUtils.tsx b/src/components/tooltipUtils.tsx new file mode 100644 index 0000000..9315cc5 --- /dev/null +++ b/src/components/tooltipUtils.tsx @@ -0,0 +1,55 @@ +import React from 'react'; + +interface ITooltipProps { + active?: boolean; + payload?: any[]; + label?: string; + color?: string | undefined; +} + +interface ITooltipOptions { + labelFormatter?: (value: string | undefined) => string; + valueFormatter?: (value: number) => string; +} + +export function renderCustomTooltip( + data: ITooltipProps, + options: ITooltipOptions +): JSX.Element | null { + if (data.active && data.payload && data.payload.length) { + const { payload, label } = data; + const formatterLabel = options.labelFormatter + ? options.labelFormatter(label) + : label; + + const formattedYValues = payload.map( + (entry: any) => + `${entry.name}: ${ + options.valueFormatter + ? options.valueFormatter(entry.value) + : entry.value + }` + ); + + const color = payload.map((entry: any) => entry.color); + const columnCount = Math.ceil(formattedYValues.length / 5); + return ( +
+
{formatterLabel}
+
+ {formattedYValues.map((value: string, index: number) => ( +
+ {value} +
+ ))} +
+
+ ); + } + + return null; +} diff --git a/src/dashboard.tsx b/src/dashboard.tsx deleted file mode 100644 index a918df3..0000000 --- a/src/dashboard.tsx +++ /dev/null @@ -1,231 +0,0 @@ -import { IFrame, MainAreaWidget } from '@jupyterlab/apputils'; - -import { URLExt } from '@jupyterlab/coreutils'; - -import { ServerConnection } from '@jupyterlab/services'; - -import { JSONExt, JSONObject } from '@lumino/coreutils'; - -import { Message } from '@lumino/messaging'; - -import { Widget, PanelLayout } from '@lumino/widgets'; - -import * as React from 'react'; -import * as ReactDOM from 'react-dom'; - -/** - * A class for hosting a Bokeh dashboard in an iframe. - */ -export class BokehDashboard extends MainAreaWidget