microsoft · abuccts · Nov 27, 2024 · Nov 23, 2024 · Nov 23, 2024 · Nov 23, 2024
@@ -7,6 +7,7 @@ trigger:
 
 pool:
   name: SuperBench CI
+  demands: ansible-agent
   vmImage: ubuntu-latest
 
 container:

@@ -7,12 +7,12 @@ trigger:
 
 strategy:
   matrix:
-    python-3.6:
-      imageTag: '3.6'
     python-3.7:
       imageTag: '3.7'
     python-3.8:
       imageTag: '3.8'
+    python-3.10:
+      imageTag: '3.10'
     # TODO
     #python-latest:
     #  imageTag: '3'

@@ -7,22 +7,26 @@ trigger:
 
 pool:
   name: SuperBench CI
+  demands: cuda-agent
   vmImage: ubuntu-latest
 
 container:
-  image: nvcr.io/nvidia/pytorch:20.12-py3
-  options: '-v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker -v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/:/usr/lib/sudo/'
+  image: nvcr.io/nvidia/pytorch:24.03-py3
+  options: '--name cuda-ci -v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker:ro'
 
 steps:
   - script: |
       echo "##vso[task.prependpath]$HOME/.local/bin"
     displayName: Export path
   - script: |
+      docker exec -t -u root -e DEBIAN_FRONTEND=noninteractive cuda-ci bash -c \
+        "apt-get update -y -q && \
+        yes '' | apt-get install -y -q sudo && \
+        apt-get install -y -q \
+        ffmpeg libavcodec-dev libavformat-dev libavutil-dev libboost-program-options-dev libswresample-dev"
       python3 -m pip install --upgrade pip setuptools==65.7
       python3 -m pip install .[test,nvworker]
       make postinstall
-      sudo DEBIAN_FRONTEND=noninteractive apt-get update
-      sudo DEBIAN_FRONTEND=noninteractive apt-get install -y ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswresample-dev
     displayName: Install dependencies
   - script: |
       python3 setup.py lint

@@ -14,16 +14,18 @@ coverage:
         target: 80%
         threshold: 1%
         flags:
-          - cpu-python3.6-unit-test
           - cpu-python3.7-unit-test
+          - cpu-python3.8-unit-test
+          - cpu-python3.10-unit-test
           - cuda-unit-test
           - directx-unit-test
     patch:
       default:
         target: 80%
         threshold: 1%
         flags:
-          - cpu-python3.6-unit-test
           - cpu-python3.7-unit-test
+          - cpu-python3.8-unit-test
+          - cpu-python3.10-unit-test
           - cuda-unit-test
           - directx-unit-test
@@ -26,7 +26,7 @@ Here're the system requirements for control node.
 ### Requirements
 
 * Latest version of Linux, you're highly encouraged to use Ubuntu 18.04 or later.
-* [Python](https://www.python.org/) version 3.6 or later (which can be checked by running `python3 --version`).
+* [Python](https://www.python.org/) version 3.7 or later (which can be checked by running `python3 --version`).
 * [Pip](https://pip.pypa.io/en/stable/installing/) version 18.0 or later (which can be checked by running `python3 -m pip --version`).
 
 :::note

@@ -131,17 +131,17 @@ def run(self):
         'Operating System :: POSIX',
         'Programming Language :: Python :: 3',
         'Programming Language :: Python :: 3 :: Only',
-        'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
         'Programming Language :: Python :: 3.8',
         'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3.10',
         'Topic :: System :: Benchmark',
         'Topic :: System :: Clustering',
         'Topic :: System :: Hardware',
     ],
     keywords='benchmark, AI systems',
     packages=find_packages(exclude=['tests']),
-    python_requires='>=3.6, <4',
+    python_requires='>=3.7, <4',
     use_scm_version={
         'local_scheme': 'node-and-date',
         'version_scheme': lambda _: superbench.__version__,

@@ -48,6 +48,8 @@ def __init__(self, name, parameters=''):
             allow_abbrev=False,
             formatter_class=SortedMetavarTypeHelpFormatter,
         )
+        # Fix optionals title in Python 3.10
+        self._parser._optionals.title = 'optional arguments'
         self._args = None
         self._curr_run_index = 0
         self._result = None

@@ -4,7 +4,7 @@
 """Tests for SummaryOp module."""
 
 import unittest
-from numpy import NaN, float64
+from numpy import nan, float64
 
 import pandas as pd
 
@@ -55,7 +55,7 @@ def test_rule_op(self):
         # Test - std
         result = SummaryOp.std(raw_data_df)
         print(result)
-        expectedResult = pd.Series([3.0, 3.0, 2.1213203435596424, NaN], index=['a', 'b', 'c', 'd'], dtype=float64)
+        expectedResult = pd.Series([3.0, 3.0, 2.1213203435596424, nan], index=['a', 'b', 'c', 'd'], dtype=float64)
         pd.testing.assert_series_equal(result, expectedResult)
         # Test - count
         result = SummaryOp.count(raw_data_df)

@@ -250,16 +250,35 @@ def test_pytorch_empty_cache():
     # Register mnist benchmark.
     BenchmarkRegistry.register_benchmark('pytorch-mnist', PytorchMNIST)
 
+    # Get initial memory reserved
+    init_res_memory = torch.cuda.memory_reserved()
+
     # Test cache empty by manually calling torch.cuda.empty_cache().
     parameters = '--batch_size 32 --num_warmup 8 --num_steps 64 --model_action train'
     benchmark = PytorchMNIST('pytorch-mnist', parameters=parameters)
+
     assert (benchmark)
     assert (benchmark._preprocess())
     assert (benchmark._benchmark())
     del benchmark
-    assert (torch.cuda.memory_stats()['reserved_bytes.all.current'] > 0)
+
+    # Get current reserved memory after benchmark
+    post_bm_res_memory = torch.cuda.memory_reserved()
+
+    # Assert that memory is increased after benchmark
+    assert (post_bm_res_memory >= init_res_memory)
+
+    # Manually empty cache and get reserved memory
+    # Calling empty_cache() releases all unused cached memory from PyTorch so that those can be used by
+    # other GPU applications. However, the occupied GPU memory by tensors will not be freed so it can not
+    # increase the amount of GPU memory available for PyTorch.
+    # https://pytorch.org/docs/stable/notes/cuda.html#cuda-memory-management
     torch.cuda.empty_cache()
-    assert (torch.cuda.memory_stats()['reserved_bytes.all.current'] == 0)
+    post_empty_cache_res_memory = torch.cuda.memory_reserved()
+
+    # Assert that some memory is released after manually empty cache. The cache is not guaranteed to be reset
+    # back to the init_res_memory due to some tensors not being released.
+    assert (post_empty_cache_res_memory <= post_bm_res_memory)
 
     # Test automatic cache empty.
     context = BenchmarkRegistry.create_benchmark_context(
@@ -268,4 +287,4 @@ def test_pytorch_empty_cache():
 
     benchmark = BenchmarkRegistry.launch_benchmark(context)
     assert (benchmark)
-    assert (torch.cuda.memory_stats()['reserved_bytes.all.current'] == 0)
+    assert (torch.cuda.memory_reserved() == post_empty_cache_res_memory)