From e201ce628e781769b54121ac83a10cfee379f6b8 Mon Sep 17 00:00:00 2001
From: Yifan Xiong <yifan.xiong@microsoft.com>
Date: Fri, 22 Nov 2024 18:37:58 -0800
Subject: [PATCH 01/10] Upgrade dependency versions in pipeline

Upgrade dependency versions in Azure pipeline:
* Remove Python 3.6 and add Python 3.10 for cpu-unit-test
* Upgrade CUDA from 11.1 to 12.4 for cuda-unit-test
* Update labels accordingly
---
 .azure-pipelines/ansible-integration-test.yml | 1 +
 .azure-pipelines/cpu-unit-test.yml            | 4 ++--
 .azure-pipelines/cuda-unit-test.yml           | 3 ++-
 .codecov.yml                                  | 6 ++++--
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/.azure-pipelines/ansible-integration-test.yml b/.azure-pipelines/ansible-integration-test.yml
index f5b34dd60..9c8550966 100644
--- a/.azure-pipelines/ansible-integration-test.yml
+++ b/.azure-pipelines/ansible-integration-test.yml
@@ -7,6 +7,7 @@ trigger:
 
 pool:
   name: SuperBench CI
+  demands: ansible-agent
   vmImage: ubuntu-latest
 
 container:
diff --git a/.azure-pipelines/cpu-unit-test.yml b/.azure-pipelines/cpu-unit-test.yml
index 7fc698f4f..1de67824f 100644
--- a/.azure-pipelines/cpu-unit-test.yml
+++ b/.azure-pipelines/cpu-unit-test.yml
@@ -7,12 +7,12 @@ trigger:
 
 strategy:
   matrix:
-    python-3.6:
-      imageTag: '3.6'
     python-3.7:
       imageTag: '3.7'
     python-3.8:
       imageTag: '3.8'
+    python-3.10:
+      imageTag: '3.10'
     # TODO
     #python-latest:
     #  imageTag: '3'
diff --git a/.azure-pipelines/cuda-unit-test.yml b/.azure-pipelines/cuda-unit-test.yml
index e0a69fc0d..2dbdfb9a5 100644
--- a/.azure-pipelines/cuda-unit-test.yml
+++ b/.azure-pipelines/cuda-unit-test.yml
@@ -7,10 +7,11 @@ trigger:
 
 pool:
   name: SuperBench CI
+  demands: cuda-agent
   vmImage: ubuntu-latest
 
 container:
-  image: nvcr.io/nvidia/pytorch:20.12-py3
+  image: nvcr.io/nvidia/pytorch:24.03-py3
   options: '-v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker -v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/:/usr/lib/sudo/'
 
 steps:
diff --git a/.codecov.yml b/.codecov.yml
index 81d50f8bc..8f9f5de87 100644
--- a/.codecov.yml
+++ b/.codecov.yml
@@ -14,8 +14,9 @@ coverage:
         target: 80%
         threshold: 1%
         flags:
-          - cpu-python3.6-unit-test
           - cpu-python3.7-unit-test
+          - cpu-python3.8-unit-test
+          - cpu-python3.10-unit-test
           - cuda-unit-test
           - directx-unit-test
     patch:
@@ -23,7 +24,8 @@ coverage:
         target: 80%
         threshold: 1%
         flags:
-          - cpu-python3.6-unit-test
           - cpu-python3.7-unit-test
+          - cpu-python3.8-unit-test
+          - cpu-python3.10-unit-test
           - cuda-unit-test
           - directx-unit-test

From 1d2c652d30467bf1fd8e36adf7735c4ba5e881f5 Mon Sep 17 00:00:00 2001
From: Yifan Xiong <yifan.xiong@microsoft.com>
Date: Fri, 22 Nov 2024 19:08:06 -0800
Subject: [PATCH 02/10] Fix sudo issue inside container

Fix sudo issue inside container.
---
 .azure-pipelines/cuda-unit-test.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.azure-pipelines/cuda-unit-test.yml b/.azure-pipelines/cuda-unit-test.yml
index 2dbdfb9a5..1885b7c85 100644
--- a/.azure-pipelines/cuda-unit-test.yml
+++ b/.azure-pipelines/cuda-unit-test.yml
@@ -12,18 +12,18 @@ pool:
 
 container:
   image: nvcr.io/nvidia/pytorch:24.03-py3
-  options: '-v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker -v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/:/usr/lib/sudo/'
+  options: '--name cuda-ci -v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker:ro'
 
 steps:
   - script: |
       echo "##vso[task.prependpath]$HOME/.local/bin"
     displayName: Export path
   - script: |
+      docker exec -t -u root -e DEBIAN_FRONTEND=noninteractive cuda-ci bash -c \
+        "apt-get update -y && apt-get install -y ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswresample-dev"
       python3 -m pip install --upgrade pip setuptools==65.7
       python3 -m pip install .[test,nvworker]
       make postinstall
-      sudo DEBIAN_FRONTEND=noninteractive apt-get update
-      sudo DEBIAN_FRONTEND=noninteractive apt-get install -y ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswresample-dev
     displayName: Install dependencies
   - script: |
       python3 setup.py lint

From e0da4c43c3ea55dfa1056ea7197654f520532fab Mon Sep 17 00:00:00 2001
From: Yifan Xiong <yifan.xiong@microsoft.com>
Date: Fri, 22 Nov 2024 19:13:09 -0800
Subject: [PATCH 03/10] Update

Update.
---
 .azure-pipelines/cuda-unit-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.azure-pipelines/cuda-unit-test.yml b/.azure-pipelines/cuda-unit-test.yml
index 1885b7c85..d53593850 100644
--- a/.azure-pipelines/cuda-unit-test.yml
+++ b/.azure-pipelines/cuda-unit-test.yml
@@ -20,7 +20,7 @@ steps:
     displayName: Export path
   - script: |
       docker exec -t -u root -e DEBIAN_FRONTEND=noninteractive cuda-ci bash -c \
-        "apt-get update -y && apt-get install -y ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswresample-dev"
+        "apt-get update -y -q && apt-get install -y -q ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswresample-dev"
       python3 -m pip install --upgrade pip setuptools==65.7
       python3 -m pip install .[test,nvworker]
       make postinstall

From 2a240b63babe29772364d79861ce3f0639e55fd1 Mon Sep 17 00:00:00 2001
From: Yifan Xiong <yifan.xiong@microsoft.com>
Date: Fri, 22 Nov 2024 19:40:48 -0800
Subject: [PATCH 04/10] Fix build

Fix build.
---
 .azure-pipelines/cuda-unit-test.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.azure-pipelines/cuda-unit-test.yml b/.azure-pipelines/cuda-unit-test.yml
index d53593850..649a21f44 100644
--- a/.azure-pipelines/cuda-unit-test.yml
+++ b/.azure-pipelines/cuda-unit-test.yml
@@ -20,7 +20,8 @@ steps:
     displayName: Export path
   - script: |
       docker exec -t -u root -e DEBIAN_FRONTEND=noninteractive cuda-ci bash -c \
-        "apt-get update -y -q && apt-get install -y -q ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswresample-dev"
+        "apt-get update -y -q && yes | apt-get install -y -q \
+        ffmpeg libavcodec-dev libavformat-dev libavutil-dev libboost-program-options-dev libswresample-dev sudo"
       python3 -m pip install --upgrade pip setuptools==65.7
       python3 -m pip install .[test,nvworker]
       make postinstall

From 682b3ce591007f10433ce799b6a8b8d225aeaeab Mon Sep 17 00:00:00 2001
From: Yifan Xiong <yifan.xiong@microsoft.com>
Date: Fri, 22 Nov 2024 21:38:43 -0800
Subject: [PATCH 05/10] Fix build

Fix build.
---
 .azure-pipelines/cuda-unit-test.yml | 6 ++++--
 tests/analyzer/test_summaryop.py    | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/.azure-pipelines/cuda-unit-test.yml b/.azure-pipelines/cuda-unit-test.yml
index 649a21f44..36f03d242 100644
--- a/.azure-pipelines/cuda-unit-test.yml
+++ b/.azure-pipelines/cuda-unit-test.yml
@@ -20,8 +20,10 @@ steps:
     displayName: Export path
   - script: |
       docker exec -t -u root -e DEBIAN_FRONTEND=noninteractive cuda-ci bash -c \
-        "apt-get update -y -q && yes | apt-get install -y -q \
-        ffmpeg libavcodec-dev libavformat-dev libavutil-dev libboost-program-options-dev libswresample-dev sudo"
+        "apt-get update -y -q && \
+        yes '' | apt-get install -y -q sudo && \
+        apt-get install -y -q \
+        ffmpeg libavcodec-dev libavformat-dev libavutil-dev libboost-program-options-dev libswresample-dev"
       python3 -m pip install --upgrade pip setuptools==65.7
       python3 -m pip install .[test,nvworker]
       make postinstall
diff --git a/tests/analyzer/test_summaryop.py b/tests/analyzer/test_summaryop.py
index 3b1054444..889ebc1e8 100644
--- a/tests/analyzer/test_summaryop.py
+++ b/tests/analyzer/test_summaryop.py
@@ -4,7 +4,7 @@
 """Tests for SummaryOp module."""
 
 import unittest
-from numpy import NaN, float64
+from numpy import nan, float64
 
 import pandas as pd
 
@@ -55,7 +55,7 @@ def test_rule_op(self):
         # Test - std
         result = SummaryOp.std(raw_data_df)
         print(result)
-        expectedResult = pd.Series([3.0, 3.0, 2.1213203435596424, NaN], index=['a', 'b', 'c', 'd'], dtype=float64)
+        expectedResult = pd.Series([3.0, 3.0, 2.1213203435596424, nan], index=['a', 'b', 'c', 'd'], dtype=float64)
         pd.testing.assert_series_equal(result, expectedResult)
         # Test - count
         result = SummaryOp.count(raw_data_df)

From 957983b65e38dbefbf113a4b4a94ebff9ce41834 Mon Sep 17 00:00:00 2001
From: Yifan Xiong <yifan.xiong@microsoft.com>
Date: Mon, 25 Nov 2024 23:07:38 -0800
Subject: [PATCH 06/10] Fix unit test

Fix unit test.
---
 setup.py                                                      | 2 +-
 superbench/benchmarks/base.py                                 | 2 ++
 .../benchmarks/micro_benchmarks/_export_torch_to_onnx.py      | 4 ++--
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index 738095889..584aed22e 100644
--- a/setup.py
+++ b/setup.py
@@ -211,7 +211,7 @@ def run(self):
             'torch': [
                 'torch>=1.7.0a0',
                 'torchvision>=0.8.0a0',
-                'transformers>=4.3.3, <4.23.0',
+                'transformers>=4.28.0',
             ],
             'ort': [
                 'onnx>=1.10.2',
diff --git a/superbench/benchmarks/base.py b/superbench/benchmarks/base.py
index 86c6b6d15..323f366d8 100644
--- a/superbench/benchmarks/base.py
+++ b/superbench/benchmarks/base.py
@@ -48,6 +48,8 @@ def __init__(self, name, parameters=''):
             allow_abbrev=False,
             formatter_class=SortedMetavarTypeHelpFormatter,
         )
+        # Fix optionals title in Python 3.10
+        self._parser._optionals.title = 'optional arguments:'
         self._args = None
         self._curr_run_index = 0
         self._result = None
diff --git a/superbench/benchmarks/micro_benchmarks/_export_torch_to_onnx.py b/superbench/benchmarks/micro_benchmarks/_export_torch_to_onnx.py
index 1e37b793d..abb75676d 100644
--- a/superbench/benchmarks/micro_benchmarks/_export_torch_to_onnx.py
+++ b/superbench/benchmarks/micro_benchmarks/_export_torch_to_onnx.py
@@ -138,7 +138,7 @@ def export_torchvision_model(self, model_name, batch_size=1):
             model,
             dummy_input,
             file_name,
-            opset_version=10,
+            opset_version=14,
             operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK,
             input_names=['input'],
             output_names=['output'],
@@ -179,7 +179,7 @@ def export_benchmark_model(self, model_name, batch_size=1, seq_length=512):
             model,
             dummy_input,
             file_name,
-            opset_version=10,
+            opset_version=14,
             do_constant_folding=True,
             input_names=['input'],
             output_names=['output'],

From 0eb654f5e7cb0640958ff2cc438dd26de5e10c67 Mon Sep 17 00:00:00 2001
From: Yifan Xiong <yifan.xiong@microsoft.com>
Date: Mon, 25 Nov 2024 23:26:28 -0800
Subject: [PATCH 07/10] Update

Update.

Co-authored-by: Dilip Patlolla <dilipreddi@gmail.com>
---
 docs/getting-started/installation.mdx | 2 +-
 setup.py                              | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/getting-started/installation.mdx b/docs/getting-started/installation.mdx
index 30fdee829..0a582e92f 100644
--- a/docs/getting-started/installation.mdx
+++ b/docs/getting-started/installation.mdx
@@ -26,7 +26,7 @@ Here're the system requirements for control node.
 ### Requirements
 
 * Latest version of Linux, you're highly encouraged to use Ubuntu 18.04 or later.
-* [Python](https://www.python.org/) version 3.6 or later (which can be checked by running `python3 --version`).
+* [Python](https://www.python.org/) version 3.7 or later (which can be checked by running `python3 --version`).
 * [Pip](https://pip.pypa.io/en/stable/installing/) version 18.0 or later (which can be checked by running `python3 -m pip --version`).
 
 :::note
diff --git a/setup.py b/setup.py
index 584aed22e..2474dcbc1 100644
--- a/setup.py
+++ b/setup.py
@@ -131,17 +131,17 @@ def run(self):
         'Operating System :: POSIX',
         'Programming Language :: Python :: 3',
         'Programming Language :: Python :: 3 :: Only',
-        'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
         'Programming Language :: Python :: 3.8',
         'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3.10',
         'Topic :: System :: Benchmark',
         'Topic :: System :: Clustering',
         'Topic :: System :: Hardware',
     ],
     keywords='benchmark, AI systems',
     packages=find_packages(exclude=['tests']),
-    python_requires='>=3.6, <4',
+    python_requires='>=3.7, <4',
     use_scm_version={
         'local_scheme': 'node-and-date',
         'version_scheme': lambda _: superbench.__version__,

From 999cc3342c7141c211c551bdf743970ea38ed720 Mon Sep 17 00:00:00 2001
From: Yifan Xiong <yifan.xiong@microsoft.com>
Date: Mon, 25 Nov 2024 23:29:31 -0800
Subject: [PATCH 08/10] Fix unit test

Fix unit test.
---
 superbench/benchmarks/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/superbench/benchmarks/base.py b/superbench/benchmarks/base.py
index 323f366d8..014103744 100644
--- a/superbench/benchmarks/base.py
+++ b/superbench/benchmarks/base.py
@@ -49,7 +49,7 @@ def __init__(self, name, parameters=''):
             formatter_class=SortedMetavarTypeHelpFormatter,
         )
         # Fix optionals title in Python 3.10
-        self._parser._optionals.title = 'optional arguments:'
+        self._parser._optionals.title = 'optional arguments'
         self._args = None
         self._curr_run_index = 0
         self._result = None

From 9788f534483f0a83493c6a371d4ac039d96c627c Mon Sep 17 00:00:00 2001
From: pdr <dilipreddi@gmail.com>
Date: Tue, 26 Nov 2024 20:37:22 -0800
Subject: [PATCH 09/10] fix cache tests issues with cuda 12.4 and pytorch 2
 (#672)

Fix cache tests issues with cuda 12.4 and pytorch 2

https://pytorch.org/docs/stable/notes/cuda.html#cuda-memory-management:
```
PyTorch uses a caching memory allocator to speed up memory allocations. This allows fast memory deallocation without
device synchronizations. However, the unused memory managed by the allocator will still show as if used in nvidia-smi.
You can use memory_allocated() and max_memory_allocated() to monitor memory occupied by tensors, and use
memory_reserved() and max_memory_reserved() to monitor the total amount of memory managed by the caching allocator.

Calling empty_cache() releases all unused cached memory from PyTorch so that those can be used by other GPU
applications. However, the occupied GPU memory by tensors will not be freed so it can not increase the amount of GPU
memory available for PyTorch.
```
---
 .../model_benchmarks/test_pytorch_base.py     | 25 ++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/tests/benchmarks/model_benchmarks/test_pytorch_base.py b/tests/benchmarks/model_benchmarks/test_pytorch_base.py
index d92cd187b..96e1718a0 100644
--- a/tests/benchmarks/model_benchmarks/test_pytorch_base.py
+++ b/tests/benchmarks/model_benchmarks/test_pytorch_base.py
@@ -250,16 +250,35 @@ def test_pytorch_empty_cache():
     # Register mnist benchmark.
     BenchmarkRegistry.register_benchmark('pytorch-mnist', PytorchMNIST)
 
+    # Get initial memory reserved
+    init_res_memory = torch.cuda.memory_reserved()
+
     # Test cache empty by manually calling torch.cuda.empty_cache().
     parameters = '--batch_size 32 --num_warmup 8 --num_steps 64 --model_action train'
     benchmark = PytorchMNIST('pytorch-mnist', parameters=parameters)
+
     assert (benchmark)
     assert (benchmark._preprocess())
     assert (benchmark._benchmark())
     del benchmark
-    assert (torch.cuda.memory_stats()['reserved_bytes.all.current'] > 0)
+
+    # Get current reserved memory after benchmark
+    post_bm_res_memory = torch.cuda.memory_reserved()
+
+    # Assert that memory is increased after benchmark
+    assert (post_bm_res_memory >= init_res_memory)
+
+    # Manually empty cache and get reserved memory
+    # Calling empty_cache() releases all unused cached memory from PyTorch so that those can be used by
+    # other GPU applications. However, the occupied GPU memory by tensors will not be freed so it can not
+    # increase the amount of GPU memory available for PyTorch.
+    # https://pytorch.org/docs/stable/notes/cuda.html#cuda-memory-management
     torch.cuda.empty_cache()
-    assert (torch.cuda.memory_stats()['reserved_bytes.all.current'] == 0)
+    post_empty_cache_res_memory = torch.cuda.memory_reserved()
+
+    # Assert that some memory is released after manually empty cache. The cache is not guaranteed to be reset
+    # back to the init_res_memory due to some tensors not being released.
+    assert (post_empty_cache_res_memory <= post_bm_res_memory)
 
     # Test automatic cache empty.
     context = BenchmarkRegistry.create_benchmark_context(
@@ -268,4 +287,4 @@ def test_pytorch_empty_cache():
 
     benchmark = BenchmarkRegistry.launch_benchmark(context)
     assert (benchmark)
-    assert (torch.cuda.memory_stats()['reserved_bytes.all.current'] == 0)
+    assert (torch.cuda.memory_reserved() == post_empty_cache_res_memory)

From 7d1f04e89eb98b811eeefb0637055ae209e967e8 Mon Sep 17 00:00:00 2001
From: Yifan Xiong <yifan.xiong@microsoft.com>
Date: Tue, 26 Nov 2024 20:39:41 -0800
Subject: [PATCH 10/10] Revert changes for transformer

Revert changes for transformer.
---
 setup.py                                                      | 2 +-
 .../benchmarks/micro_benchmarks/_export_torch_to_onnx.py      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index 2474dcbc1..cf9779a08 100644
--- a/setup.py
+++ b/setup.py
@@ -211,7 +211,7 @@ def run(self):
             'torch': [
                 'torch>=1.7.0a0',
                 'torchvision>=0.8.0a0',
-                'transformers>=4.28.0',
+                'transformers>=4.3.3, <4.23.0',
             ],
             'ort': [
                 'onnx>=1.10.2',
diff --git a/superbench/benchmarks/micro_benchmarks/_export_torch_to_onnx.py b/superbench/benchmarks/micro_benchmarks/_export_torch_to_onnx.py
index abb75676d..1e37b793d 100644
--- a/superbench/benchmarks/micro_benchmarks/_export_torch_to_onnx.py
+++ b/superbench/benchmarks/micro_benchmarks/_export_torch_to_onnx.py
@@ -138,7 +138,7 @@ def export_torchvision_model(self, model_name, batch_size=1):
             model,
             dummy_input,
             file_name,
-            opset_version=14,
+            opset_version=10,
             operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK,
             input_names=['input'],
             output_names=['output'],
@@ -179,7 +179,7 @@ def export_benchmark_model(self, model_name, batch_size=1, seq_length=512):
             model,
             dummy_input,
             file_name,
-            opset_version=14,
+            opset_version=10,
             do_constant_folding=True,
             input_names=['input'],
             output_names=['output'],