From de427486599ae1ad93eeabe280adb96f2c5587ec Mon Sep 17 00:00:00 2001
From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com>
Date: Sat, 21 Sep 2024 15:13:58 +0530
Subject: [PATCH 01/67] added checksums

---
 script/get-ml-model-dlrm-terabyte/_cm.json | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/script/get-ml-model-dlrm-terabyte/_cm.json b/script/get-ml-model-dlrm-terabyte/_cm.json
index 553808932..622488825 100644
--- a/script/get-ml-model-dlrm-terabyte/_cm.json
+++ b/script/get-ml-model-dlrm-terabyte/_cm.json
@@ -68,7 +68,8 @@
         "CM_PACKAGE_URL": "https://dlrm.s3-us-west-1.amazonaws.com/models/tb00_40M.onnx.tar",
         "CM_UNTAR": "yes",
         "CM_ML_MODEL_FILE": "tb00_40M.onnx",
-        "CM_ML_MODEL_DLRM_MAX_INDEX_RANGE": "40000000"
+        "CM_ML_MODEL_DLRM_MAX_INDEX_RANGE": "40000000",
+        "CM_DOWNLOAD_CHECKSUM": "763b964eaffe5f86e92cdcb60c5dc0de"
       }
     },
     "pytorch": {
@@ -110,7 +111,8 @@
         "CM_PACKAGE_URL": "https://cloud.mlcommons.org/index.php/s/XzfSeLgW8FYfR3S/download",
         "CM_DAE_EXTRACT_DOWNLOADED": "yes",
         "CM_DOWNLOAD_FILENAME": "download",
-        "CM_EXTRACT_UNZIP": "yes"
+        "CM_EXTRACT_UNZIP": "yes",
+        "CM_DOWNLOAD_CHECKSUM": "07e76718b52601303bb5c54fc0a3500c"
       }
     },
     "wget": {
@@ -138,7 +140,8 @@
       "env": {
         "CM_ML_MODEL_ACCURACY": "0.8107",
         "CM_PACKAGE_URL": "https://dlrm.s3-us-west-1.amazonaws.com/models/tb0875_10M.pt",
-        "CM_ML_MODEL_DLRM_MAX_INDEX_RANGE": "10000000"
+        "CM_ML_MODEL_DLRM_MAX_INDEX_RANGE": "10000000",
+        "CM_DOWNLOAD_CHECKSUM": "b7cacffcf75f767faa9cb2af397723aa"
       }
     },
     "onnx,fp32,debug": {
@@ -147,7 +150,8 @@
         "CM_PACKAGE_URL": "https://dlrm.s3-us-west-1.amazonaws.com/models/tb0875_10M.onnx.tar",
         "CM_ML_MODEL_DLRM_MAX_INDEX_RANGE": "10000000",
         "CM_UNTAR": "yes",
-        "CM_ML_MODEL_FILE": "tb0875_10M.onnx"
+        "CM_ML_MODEL_FILE": "tb0875_10M.onnx",
+        "CM_DOWNLOAD_CHECKSUM": "d11255cd9926cda9181a347861e4d263"
       }
     },
     "weight_sharded": {

From 7249b7350e0857c2013abcaf703022753045d50f Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhukicks@gmail.com>
Date: Sat, 21 Sep 2024 18:23:10 +0530
Subject: [PATCH 02/67] corrected pre download clean

---
 script/download-file/customize.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/script/download-file/customize.py b/script/download-file/customize.py
index 0a281bce0..7f6b56eff 100644
--- a/script/download-file/customize.py
+++ b/script/download-file/customize.py
@@ -13,7 +13,7 @@ def preprocess(i):
     quiet = (env.get('CM_QUIET', False) == 'yes')
 
     tool = env.get('CM_DOWNLOAD_TOOL', '')
-    pre_clean = env.get('CM_PRE_DOWNLOAD_CLEAN', False)
+    pre_clean = env.get('CM_PRE_DOWNLOAD_CLEAN', True)
 
     #    xsep = '^&^&' if windows else '&&'
     xsep = '&&'
@@ -208,7 +208,8 @@ def preprocess(i):
         for x in ['CM_DOWNLOAD_CMD', 'CM_DOWNLOAD_CHECKSUM_CMD']:
             env[x+'_USED']='YES' if env.get(x,'')!='' else 'NO'
     else:
-        env['CM_PRE_DOWNLOAD_CLEAN_CMD'] = "rm -f {}".format(env['CM_DOWNLOAD_FILENAME'])
+        if pre_clean:
+            env['CM_PRE_DOWNLOAD_CLEAN_CMD'] = "rm -f {}".format(env['CM_DOWNLOAD_FILENAME'])
 
     return {'return':0}
 

From b2b7dfc25beebdf62a433f4fd001d59d3b2d4425 Mon Sep 17 00:00:00 2001
From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com>
Date: Sun, 22 Sep 2024 00:17:52 +0530
Subject: [PATCH 03/67] Disabled check for condition

---
 script/download-file/run.sh | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/script/download-file/run.sh b/script/download-file/run.sh
index b738a7cd1..552b27fad 100644
--- a/script/download-file/run.sh
+++ b/script/download-file/run.sh
@@ -35,10 +35,8 @@ fi
 
 if [[ ${require_download} == "1" ]]; then
   echo ""
-  if [ -e "${CM_PRE_DOWNLOAD_CLEAN}" ]; then
-    echo ${CM_PRE_DOWNLOAD_CLEAN_CMD}
-    ${CM_PRE_DOWNLOAD_CLEAN_CMD}
-  fi
+  echo ${CM_PRE_DOWNLOAD_CLEAN_CMD}
+  ${CM_PRE_DOWNLOAD_CLEAN_CMD}
 
   echo ""
   echo "${CM_DOWNLOAD_CMD}"

From 67dc9489155767a8ca8a969f0536c08398187700 Mon Sep 17 00:00:00 2001
From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com>
Date: Mon, 23 Sep 2024 11:22:15 +0530
Subject: [PATCH 04/67] Proper exit for unhandled md5sum errors

---
 script/download-file/customize.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/script/download-file/customize.py b/script/download-file/customize.py
index 7f6b56eff..1a639aafa 100644
--- a/script/download-file/customize.py
+++ b/script/download-file/customize.py
@@ -108,6 +108,8 @@ def preprocess(i):
                 elif "no such file" in checksum_result.stderr.lower():
                     #print(f"No file {env['CM_DOWNLOAD_FILENAME']}. Downloading through cmutil.")
                     cmutil_require_download = 1
+                elif checksum_result.returncode == 1:
+                    return {"return":1, "error":f"Error while checking checksum: {checksum_result.stderr}"}
                 else:
                     print(f"File {env['CM_DOWNLOAD_FILENAME']} already present, original checksum and computed checksum matches! Skipping Download..")
             else:

From 729a65ede8bcaf3eca6970caafc3b3f09a614205 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhukicks@gmail.com>
Date: Mon, 23 Sep 2024 18:15:48 +0530
Subject: [PATCH 05/67] sdxl scc commit - WIP

---
 script/run-mlperf-inference-app/_cm.yaml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml
index 25bbde364..984752102 100644
--- a/script/run-mlperf-inference-app/_cm.yaml
+++ b/script/run-mlperf-inference-app/_cm.yaml
@@ -242,6 +242,16 @@ variations:
       CM_RUN_SUBMISSION_CHECKER: 'no'
     group: submission-generation
 
+  scc24-base:
+    adr:
+      coco2014-preprocessed:
+        tags: _size.50
+
+  scc24-main:
+    adr:
+      coco2014-preprocessed:
+        tags: _size.500
+
   r2.1:
     env:
       CM_MLPERF_INFERENCE_VERSION: '2.1'

From 4986d1fc2f1ae0044a225260851fc9b5ce621688 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Mon, 23 Sep 2024 07:41:22 -0700
Subject: [PATCH 06/67] Restrict the self-hosted runs to the runner repo

---
 .github/workflows/test-mlperf-inference-gptj.yml | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/test-mlperf-inference-gptj.yml b/.github/workflows/test-mlperf-inference-gptj.yml
index 040d00f9b..6728c8851 100644
--- a/.github/workflows/test-mlperf-inference-gptj.yml
+++ b/.github/workflows/test-mlperf-inference-gptj.yml
@@ -4,15 +4,12 @@
 name: MLPerf inference GPT-J
 
 on:
-  push:
-    branches: [ "main", "dev", "mlperf-inference" ]
-    paths:
-      - '.github/workflows/test-mlperf-inference-gptj.yml'
-      - '**'
-      - '!**.md'
+  schedule:
+    - cron: "1 1 * * */3"
 
 jobs:
   build:
+    if: github.repository_owner == 'gateoverflow'
     runs-on: [ self-hosted, linux, x64 ]
     strategy:
       fail-fast: false

From 30d90d20e6443f74afe172222d2ef3c5b82ec5b8 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Mon, 23 Sep 2024 11:45:04 -0700
Subject: [PATCH 07/67] Fix rocm pytorch install

---
 script/get-generic-python-lib/_cm.json | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/script/get-generic-python-lib/_cm.json b/script/get-generic-python-lib/_cm.json
index 487f79ccb..6143caca2 100644
--- a/script/get-generic-python-lib/_cm.json
+++ b/script/get-generic-python-lib/_cm.json
@@ -887,6 +887,7 @@
       "env": {
         "CM_GENERIC_PYTHON_PACKAGE_NAME": "torch",
         "CM_GENERIC_PYTHON_PIP_INDEX_URL": "https://download.pytorch.org/whl/nightly/rocm6.2",
+        "CM_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL": "",
         "CM_GENERIC_PYTHON_PIP_UNINSTALL_DEPS": "torch"
       },
       "new_env_keys": [
@@ -1009,8 +1010,9 @@
     "torchvision,rocm": {
       "env": {
         "CM_GENERIC_PYTHON_PACKAGE_NAME": "torchvision",
-        "CM_GENERIC_PYTHON_PIP_INDEX_URL": "https://download.pytorch.org/whl/rocm5.6",
-        "CM_GENERIC_PYTHON_PIP_UNINSTALL_DEPS": "torchvision"
+        "CM_GENERIC_PYTHON_PIP_INDEX_URL": "https://download.pytorch.org/whl/nightly/rocm6.2",
+        "CM_GENERIC_PYTHON_PIP_UNINSTALL_DEPS": "torchvision",
+        "CM_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL": ""
       },
       "new_env_keys": [
         "CM_TORCHVISION_VERSION*"

From 6373f6270c2cfeae5ef1f098b861c631f3dd491a Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Mon, 23 Sep 2024 13:06:40 -0700
Subject: [PATCH 08/67] Fixes for SCC24

---
 script/app-mlperf-inference-nvidia/_cm.yaml    |  4 ++++
 .../app-mlperf-inference-nvidia/customize.py   |  7 ++++---
 .../_cm.yaml                                   |  2 ++
 .../customize.py                               | 17 ++++++++++++-----
 script/run-mlperf-inference-app/_cm.yaml       | 18 ++++++++++++++++++
 5 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/script/app-mlperf-inference-nvidia/_cm.yaml b/script/app-mlperf-inference-nvidia/_cm.yaml
index d9750841d..9b586f410 100644
--- a/script/app-mlperf-inference-nvidia/_cm.yaml
+++ b/script/app-mlperf-inference-nvidia/_cm.yaml
@@ -878,6 +878,8 @@ variations:
         tags: build,nvidia,inference,server
 
       - tags: reproduce,mlperf,inference,nvidia,harness,_preprocess_data
+        names:
+          - nvidia-preprocess-data
         inherit_variation_tags: true
         force_cache: true
         skip_inherit_variation_groups:
@@ -988,6 +990,8 @@ variations:
 
       - tags: reproduce,mlperf,inference,nvidia,harness,_preprocess_data
         inherit_variation_tags: true
+        names:
+          - nvidia-preprocess-data
         skip_inherit_variation_groups:
           - run-mode
           - loadgen-scenario
diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py
index 043c070c8..917102701 100644
--- a/script/app-mlperf-inference-nvidia/customize.py
+++ b/script/app-mlperf-inference-nvidia/customize.py
@@ -73,10 +73,11 @@ def preprocess(i):
     elif "stable-diffusion" in env["CM_MODEL"]:
         target_data_path = os.path.join(env['MLPERF_SCRATCH_PATH'], 'data', 'coco', 'SDXL')
         if not os.path.exists(target_data_path):
-            cmds.append("make download_data BENCHMARKS='stable-diffusion-xl'")
+            os.makedirs(target_data_path)
+            #cmds.append("make download_data BENCHMARKS='stable-diffusion-xl'")
             env['CM_REQUIRE_COCO2014_DOWNLOAD'] = 'yes'
-            cmds.append(f"cp -r \${CM_DATASET_PATH_ROOT}/captions/captions.tsv {target_data_path}/captions_5k_final.tsv" )
-            cmds.append(f"cp -r \${CM_DATASET_PATH_ROOT}/latents/latents.pt {target_data_path}/latents.pt" )
+            cmds.append(f"cp -r \$CM_DATASET_PATH_ROOT/captions/captions.tsv {target_data_path}/captions_5k_final.tsv" )
+            cmds.append(f"cp -r \$CM_DATASET_PATH_ROOT/latents/latents.pt {target_data_path}/latents.pt" )
         fp16_model_path = os.path.join(env['MLPERF_SCRATCH_PATH'], 'models', 'SDXL', 'official_pytorch', 'fp16', 'stable_diffusion_fp16')
 
         if not os.path.exists(os.path.dirname(fp16_model_path)):
diff --git a/script/clean-nvidia-mlperf-inference-scratch-space/_cm.yaml b/script/clean-nvidia-mlperf-inference-scratch-space/_cm.yaml
index 8d74e1e72..079fe309d 100644
--- a/script/clean-nvidia-mlperf-inference-scratch-space/_cm.yaml
+++ b/script/clean-nvidia-mlperf-inference-scratch-space/_cm.yaml
@@ -10,6 +10,8 @@ tags:
 - mlperf
 - inference
 uid: bb41f6e3608e4e8a
+input_mapping:
+  extra_cache_rm_tags: CM_CLEAN_EXTRA_CACHE_RM_TAGS
 deps:
   # Get Nvidia scratch space where data and models get downloaded
   - tags: get,mlperf,inference,nvidia,scratch,space
diff --git a/script/clean-nvidia-mlperf-inference-scratch-space/customize.py b/script/clean-nvidia-mlperf-inference-scratch-space/customize.py
index 8980de245..5a0a95e76 100644
--- a/script/clean-nvidia-mlperf-inference-scratch-space/customize.py
+++ b/script/clean-nvidia-mlperf-inference-scratch-space/customize.py
@@ -16,22 +16,29 @@ def preprocess(i):
 
     clean_cmd = ''
     cache_rm_tags = ''
+    extra_cache_rm_tags = env.get('CM_CLEAN_EXTRA_CACHE_RM_TAGS', '')
 
     if env.get('CM_MODEL', '') == 'sdxl':
         if env.get('CM_CLEAN_ARTIFACT_NAME', '') == 'downloaded_data':
             clean_cmd = f"""rm -rf {os.path.join(env['CM_NVIDIA_MLPERF_SCRATCH_PATH'], "data", "coco", "SDXL")} """
-            cache_rm_tags  = "nvidia-harness,_preprocessed_data,_sdxl"
+            cache_rm_tags  = "nvidia-harness,_preprocess_data,_sdxl"
         if env.get('CM_CLEAN_ARTIFACT_NAME', '') == 'preprocessed_data':
             clean_cmd = f"""rm -rf {os.path.join(env['CM_NVIDIA_MLPERF_SCRATCH_PATH'], "preprocessed_data", "coco2014-tokenized-sdxl")} """
-            cache_rm_tags  = "nvidia-harness,_preprocessed_data,_sdxl"
+            cache_rm_tags  = "nvidia-harness,_preprocess_data,_sdxl"
 
-    if clean_cmd != '':
-        env['CM_RUN_CMD'] = clean_cmd
+    cache_rm_tags = cache_rm_tags + extra_cache_rm_tags
 
     if cache_rm_tags:
-        r = cm.access({'action': 'rm', 'automation': 'cache', 'tags': cache_rm_tags})
+        r = cm.access({'action': 'rm', 'automation': 'cache', 'tags': cache_rm_tags, 'f': True})
+        print(r)
         if r['return'] != 0 and r['return'] != 16: ## ignore missing ones
             return r
+        if r['return'] == 0: # cache entry found
+            if clean_cmd != '':
+                env['CM_RUN_CMD'] = clean_cmd
+    else:
+        if clean_cmd != '':
+            env['CM_RUN_CMD'] = clean_cmd
 
     return {'return':0}
 
diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml
index 984752102..0d5ce3ea8 100644
--- a/script/run-mlperf-inference-app/_cm.yaml
+++ b/script/run-mlperf-inference-app/_cm.yaml
@@ -243,14 +243,32 @@ variations:
     group: submission-generation
 
   scc24-base:
+    base:
+    - short
     adr:
       coco2014-preprocessed:
         tags: _size.50
+      coco2014-dataset:
+        tags: _size.50
+      nvidia-preprocess-data:
+        extra_cache_tags: "scc24-base"
+    deps:
+      - tags: clean,nvidia,scratch,_sdxl,_downloaded-data
+        extra_cache_rm_tags: scc24-main
 
   scc24-main:
+    base:
+    - short
     adr:
       coco2014-preprocessed:
         tags: _size.500
+      coco2014-dataset:
+        tags: _size.500
+      nvidia-preprocess-data:
+        extra_cache_tags: "scc24-main"
+    deps:
+      - tags: clean,nvidia,scratch,_sdxl,_downloaded-data
+        extra_cache_rm_tags: scc24-base
 
   r2.1:
     env:

From 8ceb31351d9fbc994fa93dc706f1eee9fab889ce Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Mon, 23 Sep 2024 13:32:30 -0700
Subject: [PATCH 09/67] Update torchvision for rocm

---
 script/app-mlperf-inference-mlcommons-python/_cm.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml
index 5ecb69b5d..7063e8ec0 100644
--- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml
+++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml
@@ -216,6 +216,7 @@ deps:
   - tags: get,generic-python-lib,_torchvision
     names:
     - ml-engine-torchvision
+    - torchvision
     skip_if_env:
       CM_MODEL:
         - dlrm-v2-99
@@ -231,6 +232,7 @@ deps:
   - tags: get,generic-python-lib,_torchvision_cuda
     names:
     - ml-engine-torchvision
+    - torchvision
     enable_if_env:
       CM_MLPERF_BACKEND:
       - pytorch
@@ -695,6 +697,8 @@ variations:
     add_deps_recursive:
       pytorch:
         tags: _rocm
+      torchvision:
+        tags: _rocm
 
   rocm,sdxl:
     add_deps:

From 0d133c9551d9b2cb5b0d10aec9114dcf92c02dd9 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Mon, 23 Sep 2024 13:37:24 -0700
Subject: [PATCH 10/67] Update sut config name for SCC24

---
 script/run-mlperf-inference-app/_cm.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml
index 0d5ce3ea8..91c1e2a67 100644
--- a/script/run-mlperf-inference-app/_cm.yaml
+++ b/script/run-mlperf-inference-app/_cm.yaml
@@ -245,6 +245,8 @@ variations:
   scc24-base:
     base:
     - short
+    env:
+      CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX4: scc24-base
     adr:
       coco2014-preprocessed:
         tags: _size.50
@@ -266,6 +268,8 @@ variations:
         tags: _size.500
       nvidia-preprocess-data:
         extra_cache_tags: "scc24-main"
+    env:
+      CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX4: scc24-main
     deps:
       - tags: clean,nvidia,scratch,_sdxl,_downloaded-data
         extra_cache_rm_tags: scc24-base

From 14a6a668c2c64f5648b83f6b5c8e140c1700c513 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Mon, 23 Sep 2024 14:08:29 -0700
Subject: [PATCH 11/67] Fix starting weights for nvidia mlperf inference sdxl

---
 script/app-mlperf-inference-nvidia/_cm.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/script/app-mlperf-inference-nvidia/_cm.yaml b/script/app-mlperf-inference-nvidia/_cm.yaml
index 9b586f410..15d6e4519 100644
--- a/script/app-mlperf-inference-nvidia/_cm.yaml
+++ b/script/app-mlperf-inference-nvidia/_cm.yaml
@@ -423,7 +423,7 @@ variations:
     group: model
     env:
       CM_MODEL: stable-diffusion-xl
-      CM_NOT_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://github.com/mlcommons/cm4mlops/blob/main/script/get-ml-model-stable-diffusion/_cm.json#L174"
+      CM_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://github.com/mlcommons/cm4mlops/blob/main/script/get-ml-model-stable-diffusion/_cm.json#L174"
       CM_ML_MODEL_WEIGHT_TRANSFORMATIONS: "quantization, affine fusion"
       CM_ML_MODEL_INPUTS_DATA_TYPE: int32
       CM_ML_MODEL_WEIGHTS_DATA_TYPE: int8

From a4706214911dba500b3bf9fe88f2e731dde15f1d Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Mon, 23 Sep 2024 14:42:34 -0700
Subject: [PATCH 12/67] Fix torchaudio installation for rocm

---
 script/get-generic-python-lib/_cm.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/script/get-generic-python-lib/_cm.json b/script/get-generic-python-lib/_cm.json
index 6143caca2..fa78d0d96 100644
--- a/script/get-generic-python-lib/_cm.json
+++ b/script/get-generic-python-lib/_cm.json
@@ -971,7 +971,8 @@
       "env": {
         "CM_GENERIC_PYTHON_PACKAGE_NAME": "torchaudio",
         "CM_GENERIC_PYTHON_PIP_INDEX_URL": "https://download.pytorch.org/whl/nightly/rocm6.2",
-        "CM_GENERIC_PYTHON_PIP_UNINSTALL_DEPS": "torchaudio"
+        "CM_GENERIC_PYTHON_PIP_UNINSTALL_DEPS": "torchaudio",
+        "CM_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL": ""
       },
       "new_env_keys": [
         "CM_TORCHAUDIO_VERSION*"

From d12083efb8ddbf596449f944ea097ff44f10ea7e Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhukicks@gmail.com>
Date: Tue, 24 Sep 2024 11:44:16 +0530
Subject: [PATCH 13/67] preclean fixed

---
 script/download-file/customize.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/script/download-file/customize.py b/script/download-file/customize.py
index b8fc43ec7..97969418c 100644
--- a/script/download-file/customize.py
+++ b/script/download-file/customize.py
@@ -13,7 +13,7 @@ def preprocess(i):
     quiet = (env.get('CM_QUIET', False) == 'yes')
 
     tool = env.get('CM_DOWNLOAD_TOOL', '')
-    pre_clean = env.get('CM_PRE_DOWNLOAD_CLEAN', True)
+    pre_clean = env.get('CM_PRE_DOWNLOAD_CLEAN', False)
 
     #    xsep = '^&^&' if windows else '&&'
     xsep = '&&'
@@ -214,8 +214,7 @@ def preprocess(i):
         for x in ['CM_DOWNLOAD_CMD', 'CM_DOWNLOAD_CHECKSUM_CMD']:
             env[x+'_USED']='YES' if env.get(x,'')!='' else 'NO'
     else:
-        if pre_clean:
-            env['CM_PRE_DOWNLOAD_CLEAN_CMD'] = "rm -f {}".format(env['CM_DOWNLOAD_FILENAME'])
+        env['CM_PRE_DOWNLOAD_CLEAN_CMD'] = "rm -f {}".format(env['CM_DOWNLOAD_FILENAME'])
 
     return {'return':0}
 

From 74030b292e31ccdf4c4aca7f0da8634606ef0fb9 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhukicks@gmail.com>
Date: Tue, 24 Sep 2024 14:30:38 +0530
Subject: [PATCH 14/67] deleted checksum for url -> cloud.*

---
 script/get-ml-model-dlrm-terabyte/_cm.json | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/script/get-ml-model-dlrm-terabyte/_cm.json b/script/get-ml-model-dlrm-terabyte/_cm.json
index 622488825..e9cb28c56 100644
--- a/script/get-ml-model-dlrm-terabyte/_cm.json
+++ b/script/get-ml-model-dlrm-terabyte/_cm.json
@@ -111,8 +111,7 @@
         "CM_PACKAGE_URL": "https://cloud.mlcommons.org/index.php/s/XzfSeLgW8FYfR3S/download",
         "CM_DAE_EXTRACT_DOWNLOADED": "yes",
         "CM_DOWNLOAD_FILENAME": "download",
-        "CM_EXTRACT_UNZIP": "yes",
-        "CM_DOWNLOAD_CHECKSUM": "07e76718b52601303bb5c54fc0a3500c"
+        "CM_EXTRACT_UNZIP": "yes"
       }
     },
     "wget": {

From 3566ac15168f12e68ac2b5fa82ebbdee4f6456b7 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhukicks@gmail.com>
Date: Tue, 24 Sep 2024 14:49:32 +0530
Subject: [PATCH 15/67] proper handling of pre_clean

---
 script/download-file/customize.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/script/download-file/customize.py b/script/download-file/customize.py
index 97969418c..4454a659f 100644
--- a/script/download-file/customize.py
+++ b/script/download-file/customize.py
@@ -214,7 +214,12 @@ def preprocess(i):
         for x in ['CM_DOWNLOAD_CMD', 'CM_DOWNLOAD_CHECKSUM_CMD']:
             env[x+'_USED']='YES' if env.get(x,'')!='' else 'NO'
     else:
-        env['CM_PRE_DOWNLOAD_CLEAN_CMD'] = "rm -f {}".format(env['CM_DOWNLOAD_FILENAME'])
+        # pre_clean variable is used in order to clean the partial download files if checksums are not provided
+        if env.get('CM_DOWNLOAD_CHECKSUM_FILE', '') == '' and env.get('CM_DOWNLOAD_CHECKSUM', '') == '':
+            if pre_clean:
+                env['CM_PRE_DOWNLOAD_CLEAN_CMD'] = "rm -f {}".format(env['CM_DOWNLOAD_FILENAME'])
+        else:
+            env['CM_PRE_DOWNLOAD_CLEAN_CMD'] = "rm -f {}".format(env['CM_DOWNLOAD_FILENAME'])
 
     return {'return':0}
 

From 27861a5d657f4253571b9977c65a89625decab16 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhukicks@gmail.com>
Date: Tue, 24 Sep 2024 16:19:06 +0530
Subject: [PATCH 16/67] reverted pre clean change

---
 script/download-file/customize.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/script/download-file/customize.py b/script/download-file/customize.py
index 4454a659f..97969418c 100644
--- a/script/download-file/customize.py
+++ b/script/download-file/customize.py
@@ -214,12 +214,7 @@ def preprocess(i):
         for x in ['CM_DOWNLOAD_CMD', 'CM_DOWNLOAD_CHECKSUM_CMD']:
             env[x+'_USED']='YES' if env.get(x,'')!='' else 'NO'
     else:
-        # pre_clean variable is used in order to clean the partial download files if checksums are not provided
-        if env.get('CM_DOWNLOAD_CHECKSUM_FILE', '') == '' and env.get('CM_DOWNLOAD_CHECKSUM', '') == '':
-            if pre_clean:
-                env['CM_PRE_DOWNLOAD_CLEAN_CMD'] = "rm -f {}".format(env['CM_DOWNLOAD_FILENAME'])
-        else:
-            env['CM_PRE_DOWNLOAD_CLEAN_CMD'] = "rm -f {}".format(env['CM_DOWNLOAD_FILENAME'])
+        env['CM_PRE_DOWNLOAD_CLEAN_CMD'] = "rm -f {}".format(env['CM_DOWNLOAD_FILENAME'])
 
     return {'return':0}
 

From 54ae9118295011f24a4c1780403e50b30c0828d4 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhukicks@gmail.com>
Date: Tue, 24 Sep 2024 16:37:14 +0530
Subject: [PATCH 17/67] changes for custom sample id generation - SDXL

---
 script/app-mlperf-inference-mlcommons-python/customize.py | 2 ++
 script/get-dataset-coco2014/_cm.yaml                      | 1 +
 script/get-dataset-coco2014/customize.py                  | 3 +++
 script/get-dataset-coco2014/run.sh                        | 3 +++
 4 files changed, 9 insertions(+)

diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py
index d6b1222b0..9d0a64955 100644
--- a/script/app-mlperf-inference-mlcommons-python/customize.py
+++ b/script/app-mlperf-inference-mlcommons-python/customize.py
@@ -296,6 +296,8 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio
                  scenario_extra_options + mode_extra_options + \
                 " --output " + env['CM_MLPERF_OUTPUT_DIR'] + \
                 " --model-path " + env['CM_ML_MODEL_PATH']
+        if env.get('CM_COCO2014_SAMPLE_ID_PATH','') != '':
+            cmd += " --ids-path " + env['CM_COCO2014_SAMPLE_ID_PATH']
 
     elif "llama2-70b" in env['CM_MODEL']:
         env['RUN_DIR'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "llama2-70b")
diff --git a/script/get-dataset-coco2014/_cm.yaml b/script/get-dataset-coco2014/_cm.yaml
index 690e0926a..aba9b2ea0 100644
--- a/script/get-dataset-coco2014/_cm.yaml
+++ b/script/get-dataset-coco2014/_cm.yaml
@@ -45,6 +45,7 @@ new_env_keys:
 - CM_DATASET_ANNOTATIONS_DIR_PATH
 - CM_DATASET_ANNOTATIONS_FILE_PATH
 - CM_CALIBRATION_DATASET_PATH
+- CM_COCO2014_SAMPLE_ID_PATH
 
 posthook_deps:
 - enable_if_env:
diff --git a/script/get-dataset-coco2014/customize.py b/script/get-dataset-coco2014/customize.py
index b6984bca1..a38f336bc 100644
--- a/script/get-dataset-coco2014/customize.py
+++ b/script/get-dataset-coco2014/customize.py
@@ -16,6 +16,9 @@ def preprocess(i):
 
 def postprocess(i):
     env = i['env']
+    if env.get('CM_GENERATE_SAMPLE_ID', '') == "yes":  
+        env['CM_COCO2014_SAMPLE_ID_PATH'] = os.path.join(os.getcwd(), 'install', 'sample_ids.txt')
+        print(env['CM_COCO2014_SAMPLE_ID_PATH'])
     if env.get('CM_DATASET_CALIBRATION','') == "no":
         env['CM_DATASET_PATH_ROOT'] = os.path.join(os.getcwd(), 'install')
         #env['CM_DATASET_PATH'] = os.path.join(os.getcwd(), 'install', 'validation', 'data')
diff --git a/script/get-dataset-coco2014/run.sh b/script/get-dataset-coco2014/run.sh
index f37ba603b..0324ea162 100644
--- a/script/get-dataset-coco2014/run.sh
+++ b/script/get-dataset-coco2014/run.sh
@@ -33,6 +33,9 @@ else
   eval $cmd
   test $? -eq 0 || exit 1
 fi
+if [[ ${CM_GENERATE_COCO2014_SAMPLE_ID} == "yes" ]]; then
+  cmd="python3 sample_ids.py --tsv-path ${INSTALL_DIR}"
+fi
 cd ${INSTALL_DIR}
 
 test $? -eq 0 || exit 1

From dc9bf9948bfb888d66167da48a0162a356de1f03 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhukicks@gmail.com>
Date: Tue, 24 Sep 2024 16:37:46 +0530
Subject: [PATCH 18/67] changes for custom sample id generation - SDXL

---
 script/run-mlperf-inference-app/_cm.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml
index 984752102..2d6adfd8b 100644
--- a/script/run-mlperf-inference-app/_cm.yaml
+++ b/script/run-mlperf-inference-app/_cm.yaml
@@ -246,11 +246,15 @@ variations:
     adr:
       coco2014-preprocessed:
         tags: _size.50
+        env:
+          CM_GENERATE_SAMPLE_ID: 'yes'
 
   scc24-main:
     adr:
       coco2014-preprocessed:
         tags: _size.500
+        env:
+          CM_GENERATE_SAMPLE_ID: 'yes'
 
   r2.1:
     env:

From 0c98cbd2e0e6c5e59e988904089aac6d62e5482d Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhukicks@gmail.com>
Date: Tue, 24 Sep 2024 16:56:08 +0530
Subject: [PATCH 19/67] code clean

---
 script/download-file/run.sh | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/script/download-file/run.sh b/script/download-file/run.sh
index 552b27fad..b9bf01933 100644
--- a/script/download-file/run.sh
+++ b/script/download-file/run.sh
@@ -26,6 +26,9 @@ elif [ -e "${CM_DOWNLOAD_DOWNLOADED_PATH}" ]; then
        # checksum not supposed to fail for locally given file
        if [[ "${CM_DOWNLOAD_LOCAL_FILE_PATH}" != "" ]]; then
           exit 1
+       else
+          echo "Checksum mismatch. Deleting through command: ${CM_PRE_DOWNLOAD_CLEAN_CMD}"
+          ${CM_PRE_DOWNLOAD_CLEAN_CMD}
        fi
     else
        require_download="0"
@@ -35,8 +38,10 @@ fi
 
 if [[ ${require_download} == "1" ]]; then
   echo ""
-  echo ${CM_PRE_DOWNLOAD_CLEAN_CMD}
-  ${CM_PRE_DOWNLOAD_CLEAN_CMD}
+  if [ -e "${CM_PRE_DOWNLOAD_CLEAN}" ]; then
+    echo ${CM_PRE_DOWNLOAD_CLEAN_CMD}
+    ${CM_PRE_DOWNLOAD_CLEAN_CMD}
+  fi
 
   echo ""
   echo "${CM_DOWNLOAD_CMD}"

From d8a33bca6414df42876565a2c7bf0f66c799fd9c Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj011@gmail.com>
Date: Tue, 24 Sep 2024 11:54:07 +0000
Subject: [PATCH 20/67] fixed bug

---
 script/get-dataset-coco2014/run.sh | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/script/get-dataset-coco2014/run.sh b/script/get-dataset-coco2014/run.sh
index 0324ea162..9668e3abd 100644
--- a/script/get-dataset-coco2014/run.sh
+++ b/script/get-dataset-coco2014/run.sh
@@ -33,8 +33,11 @@ else
   eval $cmd
   test $? -eq 0 || exit 1
 fi
-if [[ ${CM_GENERATE_COCO2014_SAMPLE_ID} == "yes" ]]; then
-  cmd="python3 sample_ids.py --tsv-path ${INSTALL_DIR}"
+if [[ ${CM_GENERATE_SAMPLE_ID} == "yes" ]]; then
+  cmd="python3 sample_ids.py --tsv-path ${INSTALL_DIR}/captions/captions.tsv --output-path ${INSTALL_DIR}/sample_ids.txt"
+  echo $cmd
+  eval $cmd
+  test $? -eq 0 || exit 1
 fi
 cd ${INSTALL_DIR}
 

From b40ea46bbe1b24c250eb36af58a2e03fd5018a28 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhukicks@gmail.com>
Date: Tue, 24 Sep 2024 17:40:13 +0530
Subject: [PATCH 21/67] fix pre download clean

---
 script/download-file/run.sh | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/script/download-file/run.sh b/script/download-file/run.sh
index b9bf01933..0fe414beb 100644
--- a/script/download-file/run.sh
+++ b/script/download-file/run.sh
@@ -27,8 +27,7 @@ elif [ -e "${CM_DOWNLOAD_DOWNLOADED_PATH}" ]; then
        if [[ "${CM_DOWNLOAD_LOCAL_FILE_PATH}" != "" ]]; then
           exit 1
        else
-          echo "Checksum mismatch. Deleting through command: ${CM_PRE_DOWNLOAD_CLEAN_CMD}"
-          ${CM_PRE_DOWNLOAD_CLEAN_CMD}
+          CM_PRE_DOWNLOAD_CLEAN=true
        fi
     else
        require_download="0"
@@ -38,7 +37,7 @@ fi
 
 if [[ ${require_download} == "1" ]]; then
   echo ""
-  if [ -e "${CM_PRE_DOWNLOAD_CLEAN}" ]; then
+  if [ "${CM_PRE_DOWNLOAD_CLEAN}" != "" ]; then
     echo ${CM_PRE_DOWNLOAD_CLEAN_CMD}
     ${CM_PRE_DOWNLOAD_CLEAN_CMD}
   fi

From 7c8984f22e019b40c55ba351dd267fac15db1a6e Mon Sep 17 00:00:00 2001
From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com>
Date: Tue, 24 Sep 2024 17:44:37 +0530
Subject: [PATCH 22/67] added gh action workflow for sdxl reference and nvidia

---
 .../workflows/test-mlperf-inference-sdxl.yaml | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 .github/workflows/test-mlperf-inference-sdxl.yaml

diff --git a/.github/workflows/test-mlperf-inference-sdxl.yaml b/.github/workflows/test-mlperf-inference-sdxl.yaml
new file mode 100644
index 000000000..b4415a612
--- /dev/null
+++ b/.github/workflows/test-mlperf-inference-sdxl.yaml
@@ -0,0 +1,47 @@
+name: MLPerf inference SDXL
+
+on:
+  schedule:
+    - cron: "1 1 * * */3"
+
+jobs:
+  build_reference:
+    if: github.repository_owner == 'gateoverflow'
+    runs-on: [ self-hosted, linux, x64 ]
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [ "3.12" ]
+        backend: [ "pytorch" ]
+        precision: [ "float16" ]
+    steps:
+    - name: Install dependencies
+      run: |
+        source gh_action/bin/deactivate || python3 -m venv gh_action
+        source gh_action/bin/activate
+        export CM_REPOS=$HOME/GH_CM
+        cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
+    - name: Test MLPerf Inference SDXL
+      run: |
+        cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
+
+    build_nvidia:
+      if: github.repository_owner == 'gateoverflow'
+      runs-on: [ self-hosted, linux, x64 ]
+      strategy:
+        fail-fast: false
+        matrix:
+          python-version: [ "3.12" ]
+          backend: [ "tensorrt" ]
+          precision: [ "float16" ]
+          implementation: [ "nvidia" ]
+      steps:
+      - name: Install dependencies
+        run: |
+          source gh_action/bin/deactivate || python3 -m venv gh_action
+          source gh_action/bin/activate
+          export CM_REPOS=$HOME/GH_CM
+          cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
+      - name: Test MLPerf Inference SDXL
+        run: |
+          cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean

From 735b581a345d5a04f8cd7195f6fbff3a37c538d8 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Tue, 24 Sep 2024 07:30:09 -0700
Subject: [PATCH 23/67] Fixes for coco2014 saample ids

---
 script/get-dataset-coco2014/_cm.yaml     |  3 +++
 script/get-dataset-coco2014/run.sh       |  8 ++++----
 script/run-mlperf-inference-app/_cm.yaml | 16 ++++------------
 3 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/script/get-dataset-coco2014/_cm.yaml b/script/get-dataset-coco2014/_cm.yaml
index aba9b2ea0..39c603642 100644
--- a/script/get-dataset-coco2014/_cm.yaml
+++ b/script/get-dataset-coco2014/_cm.yaml
@@ -84,6 +84,9 @@ variations:
     env:
       CM_DATASET_SIZE: '#'
     group: size
+  with-sample-ids:
+    env:
+      CM_GENERATE_SAMPLE_ID: 'yes'
   validation:
     default: true
     env:
diff --git a/script/get-dataset-coco2014/run.sh b/script/get-dataset-coco2014/run.sh
index 9668e3abd..61b9ffe52 100644
--- a/script/get-dataset-coco2014/run.sh
+++ b/script/get-dataset-coco2014/run.sh
@@ -26,19 +26,19 @@ if [[ ${CM_DATASET_CALIBRATION} == "no" ]]; then
   cmd="./download-coco-2014.sh -d ${INSTALL_DIR}  ${max_images}"
   echo $cmd
   eval $cmd
-  test $? -eq 0 || exit 1
+  test $? -eq 0 || exit $?
 else
   cmd="./download-coco-2014-calibration.sh -d ${INSTALL_DIR}"
   echo $cmd
   eval $cmd
-  test $? -eq 0 || exit 1
+  test $? -eq 0 || exit $?
 fi
 if [[ ${CM_GENERATE_SAMPLE_ID} == "yes" ]]; then
   cmd="python3 sample_ids.py --tsv-path ${INSTALL_DIR}/captions/captions.tsv --output-path ${INSTALL_DIR}/sample_ids.txt"
   echo $cmd
   eval $cmd
-  test $? -eq 0 || exit 1
+  test $? -eq 0 || exit $?
 fi
 cd ${INSTALL_DIR}
 
-test $? -eq 0 || exit 1
+test $? -eq 0 || exit $?
diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml
index e3dcb5279..efb637150 100644
--- a/script/run-mlperf-inference-app/_cm.yaml
+++ b/script/run-mlperf-inference-app/_cm.yaml
@@ -249,13 +249,9 @@ variations:
       CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX4: scc24-base
     adr:
       coco2014-preprocessed:
-        tags: _size.50
-        env:
-          CM_GENERATE_SAMPLE_ID: 'yes'
+        tags: _size.50,_with-sample-ids
       coco2014-dataset:
-        tags: _size.50
-        env:
-          CM_GENERATE_SAMPLE_ID: 'yes'
+        tags: _size.50,_with-sample-ids
       nvidia-preprocess-data:
         extra_cache_tags: "scc24-base"
     deps:
@@ -267,13 +263,9 @@ variations:
     - short
     adr:
       coco2014-preprocessed:
-        tags: _size.500
-        env:
-          CM_GENERATE_SAMPLE_ID: 'yes'
+        tags: _size.500,_with-sample-ids
       coco2014-dataset:
-        tags: _size.500
-        env:
-          CM_GENERATE_SAMPLE_ID: 'yes'
+        tags: _size.500,_with-sample-ids
       nvidia-preprocess-data:
         extra_cache_tags: "scc24-main"
     env:

From 0784740fbb6824daaaba8d3a1ea04e2341448473 Mon Sep 17 00:00:00 2001
From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com>
Date: Tue, 24 Sep 2024 20:11:47 +0530
Subject: [PATCH 24/67] removed beam size

---
 .github/workflows/test-mlperf-inference-sdxl.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test-mlperf-inference-sdxl.yaml b/.github/workflows/test-mlperf-inference-sdxl.yaml
index b4415a612..166781173 100644
--- a/.github/workflows/test-mlperf-inference-sdxl.yaml
+++ b/.github/workflows/test-mlperf-inference-sdxl.yaml
@@ -23,7 +23,7 @@ jobs:
         cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
     - name: Test MLPerf Inference SDXL
       run: |
-        cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
+        cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
 
     build_nvidia:
       if: github.repository_owner == 'gateoverflow'
@@ -44,4 +44,4 @@ jobs:
           cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
       - name: Test MLPerf Inference SDXL
         run: |
-          cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
+          cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean

From 29bed25f581fa339900d3324a9da8d2e91538d07 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhukicks@gmail.com>
Date: Tue, 24 Sep 2024 20:36:18 +0530
Subject: [PATCH 25/67] handled false condition in download-file

---
 script/download-file/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/script/download-file/run.sh b/script/download-file/run.sh
index 0fe414beb..e0b9037c1 100644
--- a/script/download-file/run.sh
+++ b/script/download-file/run.sh
@@ -37,7 +37,7 @@ fi
 
 if [[ ${require_download} == "1" ]]; then
   echo ""
-  if [ "${CM_PRE_DOWNLOAD_CLEAN}" != "" ]; then
+  if [ "${CM_PRE_DOWNLOAD_CLEAN}" != "" ] && [ "${CM_PRE_DOWNLOAD_CLEAN,,}" != "false" ]; then
     echo ${CM_PRE_DOWNLOAD_CLEAN_CMD}
     ${CM_PRE_DOWNLOAD_CLEAN_CMD}
   fi

From b89de1df4fc34866ab050b90f4e819cb773c3d30 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Tue, 24 Sep 2024 21:04:25 +0530
Subject: [PATCH 26/67] Cleanup of download-file run.sh

---
 script/download-file/run.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/script/download-file/run.sh b/script/download-file/run.sh
index e0b9037c1..d9848c39e 100644
--- a/script/download-file/run.sh
+++ b/script/download-file/run.sh
@@ -37,9 +37,9 @@ fi
 
 if [[ ${require_download} == "1" ]]; then
   echo ""
-  if [ "${CM_PRE_DOWNLOAD_CLEAN}" != "" ] && [ "${CM_PRE_DOWNLOAD_CLEAN,,}" != "false" ]; then
-    echo ${CM_PRE_DOWNLOAD_CLEAN_CMD}
-    ${CM_PRE_DOWNLOAD_CLEAN_CMD}
+  if [ -n "${CM_PRE_DOWNLOAD_CLEAN}" ] && [ "${CM_PRE_DOWNLOAD_CLEAN,,}" != "false" ]; then
+    echo "${CM_PRE_DOWNLOAD_CLEAN_CMD}"
+    eval "${CM_PRE_DOWNLOAD_CLEAN_CMD}"
   fi
 
   echo ""

From f1ca1eefa3fd7e7ecc61cb93351513931e167f2c Mon Sep 17 00:00:00 2001
From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com>
Date: Tue, 24 Sep 2024 21:39:23 +0530
Subject: [PATCH 27/67] Create github action for scc24 sdxl

---
 .github/workflows/test-scc24-sdxl.yaml | 57 ++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 .github/workflows/test-scc24-sdxl.yaml

diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml
new file mode 100644
index 000000000..36e76b86d
--- /dev/null
+++ b/.github/workflows/test-scc24-sdxl.yaml
@@ -0,0 +1,57 @@
+name: MLPerf inference SDXL
+
+on:
+  schedule:
+    - cron: "1 1 * * */3"
+
+jobs:
+  build_reference:
+    if: github.repository_owner == 'gateoverflow'
+    runs-on: [ self-hosted, linux, x64 ]
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [ "3.12" ]
+        backend: [ "pytorch" ]
+        precision: [ "float16" ]
+        device: [ "cuda", "rocm" ]
+    steps:
+    - name: Install dependencies
+      run: |
+        source gh_action/bin/deactivate || python3 -m venv gh_action
+        source gh_action/bin/activate
+        export CM_REPOS=$HOME/GH_CM
+        cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
+    - name: Test MLPerf Inference reference SDXL SCC 
+      env:
+        GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
+      run: |
+        cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --quiet --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --precision=float16 --clean |
+        cm run script --tags=generate,inference,submission --clean --preprocess_submission=yes --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons |
+        cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet
+        
+  build_nvidia:
+      if: github.repository_owner == 'gateoverflow'
+      runs-on: [ self-hosted, linux, x64 ]
+      strategy:
+        fail-fast: false
+        matrix:
+          python-version: [ "3.12" ]
+          backend: [ "tensorrt" ]
+          precision: [ "float16" ]
+          implementation: [ "nvidia" ]
+      steps:
+      - name: Install dependencies
+        run: |
+          source gh_action/bin/deactivate || python3 -m venv gh_action
+          source gh_action/bin/activate
+          export CM_REPOS=$HOME/GH_CM
+          cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
+      - name: Test MLPerf Inference NVIDIA SDXL SCC
+        env:
+          GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
+        run: |
+          cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --precision=float16 --clean |
+          cm run script --tags=generate,inference,submission --clean --preprocess_submission=yes --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons |
+          cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet
+        

From 088a8d44c73cd439d870747174356ea3dc044432 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhukicks@gmail.com>
Date: Wed, 25 Sep 2024 00:31:18 +0530
Subject: [PATCH 28/67] added checksum and clean code

---
 script/get-dataset-mixtral/_cm.json | 22 +++++++++++++++++++---
 script/get-dataset-mixtral/run.bat  |  7 -------
 script/get-dataset-mixtral/run.sh   |  8 --------
 3 files changed, 19 insertions(+), 18 deletions(-)
 delete mode 100644 script/get-dataset-mixtral/run.bat
 delete mode 100644 script/get-dataset-mixtral/run.sh

diff --git a/script/get-dataset-mixtral/_cm.json b/script/get-dataset-mixtral/_cm.json
index e0ddd31ec..656665a28 100644
--- a/script/get-dataset-mixtral/_cm.json
+++ b/script/get-dataset-mixtral/_cm.json
@@ -34,15 +34,31 @@
     "openorca-mbxp-gsm8k-combined"
   ],
   "uid": "89e7c91444804775",
+  "prehook_deps": [
+    {
+      "tags": "download-and-extract",
+      "env": {
+        "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_DATASET_PREPROCESSED_PATH"
+      },
+      "update_tags_from_env_with_prefix": {
+        "_url.": [ "CM_PACKAGE_URL" ]
+      },
+      "force_cache": true,
+      "extra_cache_tags": "mixtral,get-mixtral-dataset"
+    }
+  ],
   "variations": {
     "mlcommons-storage":{
       "default":true,
       "env":{
-        "CM_RCLONE_WINDOWS_URL": "https://inference.mlcommons-storage.org/mixtral_8x7b%%2F2024.06.06_mixtral_15k_v4.pkl",
-        "CM_RCLONE_LINUX_URL": "https://inference.mlcommons-storage.org/mixtral_8x7b%2F2024.06.06_mixtral_15k_v4.pkl",
-        "CM_DATASET_FILE_NAME": "2024.06.06_mixtral_15k_v4.pkl"
+        "CM_PACKAGE_URL": "https://inference.mlcommons-storage.org/mixtral_8x7b%2F2024.06.06_mixtral_15k_v4.pkl",
+        "CM_DOWNLOAD_FILENAME": "2024.06.06_mixtral_15k_v4.pkl",
+        "CM_DOWNLOAD_CHECKSUM": "78823c13e0e73e518872105c4b09628b"
       },
       "group": "download-source"
     }
+  },
+  "print_env_at_the_end" : {
+    "CM_DATASET_PREPROCESSED_PATH": "Path to the ML model"
   }
 }
diff --git a/script/get-dataset-mixtral/run.bat b/script/get-dataset-mixtral/run.bat
deleted file mode 100644
index bf1e128dd..000000000
--- a/script/get-dataset-mixtral/run.bat
+++ /dev/null
@@ -1,7 +0,0 @@
-echo.
-
-rclone copyurl %CM_RCLONE_WINDOWS_URL% . -a -P
-IF %ERRORLEVEL% NEQ 0 EXIT 1
-
-echo CM_DATASET_PREPROCESSED_PATH=%CD%\%CM_DATASET_FILE_NAME% > tmp-run-env.out
-echo %CD%\%CM_DATASET_FILE_NAME%
diff --git a/script/get-dataset-mixtral/run.sh b/script/get-dataset-mixtral/run.sh
deleted file mode 100644
index ed3b3142f..000000000
--- a/script/get-dataset-mixtral/run.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-
-echo ""
-
-rclone copyurl ${CM_RCLONE_LINUX_URL} ./ -a -P
-test $? -eq 0 || exit 1
-
-echo "CM_DATASET_PREPROCESSED_PATH=$PWD/${CM_DATASET_FILE_NAME}" > tmp-run-env.out

From 6fa8c6a79c32646003100312955c7dbef0765c5c Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhukicks@gmail.com>
Date: Wed, 25 Sep 2024 00:43:19 +0530
Subject: [PATCH 29/67] clean code

---
 script/get-dataset-mixtral/_cm.json | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/script/get-dataset-mixtral/_cm.json b/script/get-dataset-mixtral/_cm.json
index 656665a28..ca354eea4 100644
--- a/script/get-dataset-mixtral/_cm.json
+++ b/script/get-dataset-mixtral/_cm.json
@@ -7,27 +7,6 @@
   "new_env_keys": [
     "CM_DATASET_*"
   ],
-  "deps":[
-    {
-      "tags": "detect,detect-os"
-    },
-    {
-      "skip_if_env": {
-        "CM_HOST_OS_TYPE": [
-          "windows"
-        ]
-      },
-      "tags": "get,rclone"
-    },
-    {
-      "enable_if_env": {
-        "CM_HOST_OS_TYPE": [
-          "windows"
-        ]
-      },
-      "tags": "get,rclone"
-    }
-  ],
   "tags": [
     "get",
     "dataset-mixtral",

From 39a36849a4167e70ca20abf959dd2bd3a38b0e86 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 10:26:47 +0100
Subject: [PATCH 30/67] Update test-mlperf-inference-sdxl.yaml | Changed
 conflicting schedule time

---
 .github/workflows/test-mlperf-inference-sdxl.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test-mlperf-inference-sdxl.yaml b/.github/workflows/test-mlperf-inference-sdxl.yaml
index 166781173..a5e348336 100644
--- a/.github/workflows/test-mlperf-inference-sdxl.yaml
+++ b/.github/workflows/test-mlperf-inference-sdxl.yaml
@@ -2,7 +2,7 @@ name: MLPerf inference SDXL
 
 on:
   schedule:
-    - cron: "1 1 * * */3"
+    - cron: "1 2 * * */3"
 
 jobs:
   build_reference:
@@ -25,7 +25,7 @@ jobs:
       run: |
         cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
 
-    build_nvidia:
+  build_nvidia:
       if: github.repository_owner == 'gateoverflow'
       runs-on: [ self-hosted, linux, x64 ]
       strategy:

From 85303853abccd3d486bf313a4c8604421237d94d Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 15:00:54 +0530
Subject: [PATCH 31/67] Cleanups

---
 .github/workflows/test-scc24-sdxl.yaml | 5 ++---
 script/get-dataset-mixtral/_cm.json    | 3 ---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml
index 36e76b86d..3ced30e04 100644
--- a/.github/workflows/test-scc24-sdxl.yaml
+++ b/.github/workflows/test-scc24-sdxl.yaml
@@ -2,7 +2,7 @@ name: MLPerf inference SDXL
 
 on:
   schedule:
-    - cron: "1 1 * * */3"
+    - cron: "1 3 * * */3"
 
 jobs:
   build_reference:
@@ -14,7 +14,7 @@ jobs:
         python-version: [ "3.12" ]
         backend: [ "pytorch" ]
         precision: [ "float16" ]
-        device: [ "cuda", "rocm" ]
+        device: [ "cuda" ]
     steps:
     - name: Install dependencies
       run: |
@@ -54,4 +54,3 @@ jobs:
           cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --precision=float16 --clean |
           cm run script --tags=generate,inference,submission --clean --preprocess_submission=yes --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons |
           cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet
-        
diff --git a/script/get-dataset-mixtral/_cm.json b/script/get-dataset-mixtral/_cm.json
index ca354eea4..4dfbc82e0 100644
--- a/script/get-dataset-mixtral/_cm.json
+++ b/script/get-dataset-mixtral/_cm.json
@@ -36,8 +36,5 @@
       },
       "group": "download-source"
     }
-  },
-  "print_env_at_the_end" : {
-    "CM_DATASET_PREPROCESSED_PATH": "Path to the ML model"
   }
 }

From d1957bf9f5680623b4b48246079da967ea72509a Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 15:03:53 +0530
Subject: [PATCH 32/67] Fix precision for gptj test

---
 .github/workflows/test-mlperf-inference-gptj.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-mlperf-inference-gptj.yml b/.github/workflows/test-mlperf-inference-gptj.yml
index 6728c8851..5a7ecc7e8 100644
--- a/.github/workflows/test-mlperf-inference-gptj.yml
+++ b/.github/workflows/test-mlperf-inference-gptj.yml
@@ -16,7 +16,7 @@ jobs:
       matrix:
         python-version: [ "3.12" ]
         backend: [ "pytorch" ]
-        precision: [ "bfloat16" ]
+        precision: [ "float16" ]
 
     steps:
     - name: Install dependencies

From 21c81709d396462293a134c051c3f0b8e6841384 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 15:11:45 +0530
Subject: [PATCH 33/67] Fix precision for gptj fp16

---
 script/app-mlperf-inference-mlcommons-python/_cm.yaml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml
index 7063e8ec0..24bc31044 100644
--- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml
+++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml
@@ -354,6 +354,8 @@ deps:
   - tags: get,ml-model,large-language-model,gptj
     names:
     - ml-model
+    - ml-model-float16
+    - ml-model
     - gptj-model
     - gpt-j-model
     enable_if_env:
@@ -1251,9 +1253,9 @@ variations:
   bfloat16:
     group: precision
     add_deps_recursive:
-      ml-model-bfloat16:
+      ml-model-float16:
         tags:
-          _fp32
+          _fp16
     env:
       CM_MLPERF_QUANTIZATION: off
       CM_MLPERF_MODEL_PRECISION: bfloat16

From 70c1f9f0432a0fb2903917a39751b82065dd09ad Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 15:16:10 +0530
Subject: [PATCH 34/67] Fix precision for gptj fp16

---
 script/app-mlperf-inference-mlcommons-python/_cm.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml
index 24bc31044..12b8c50df 100644
--- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml
+++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml
@@ -355,7 +355,6 @@ deps:
     names:
     - ml-model
     - ml-model-float16
-    - ml-model
     - gptj-model
     - gpt-j-model
     enable_if_env:

From 8c7a2c69d6f5bb081f2a199de4143e249a15db74 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 15:22:50 +0530
Subject: [PATCH 35/67] Fix precision for gptj fp16

---
 script/app-mlperf-inference-mlcommons-python/_cm.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml
index 12b8c50df..452a3f1dd 100644
--- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml
+++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml
@@ -354,7 +354,6 @@ deps:
   - tags: get,ml-model,large-language-model,gptj
     names:
     - ml-model
-    - ml-model-float16
     - gptj-model
     - gpt-j-model
     enable_if_env:

From bc1036737c2b26846331fd45662c9389e9466c1d Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 15:51:33 +0530
Subject: [PATCH 36/67] Added support for cuda 12.6.1

---
 script/install-cuda-prebuilt/_cm.json | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/script/install-cuda-prebuilt/_cm.json b/script/install-cuda-prebuilt/_cm.json
index 9628bd0fc..b77165c0c 100644
--- a/script/install-cuda-prebuilt/_cm.json
+++ b/script/install-cuda-prebuilt/_cm.json
@@ -131,6 +131,11 @@
       "env": {
         "CM_CUDA_LINUX_FILENAME": "cuda_12.6.0_560.28.03_linux.run"
       }
+    },
+    "12.6.1": {
+      "env": {
+        "CM_CUDA_LINUX_FILENAME": "cuda_12.6.1_560.35.03_linux.run"
+      }
     }
   }
 }

From a877d2129d0233ca01eaab729bca2b3cd5eb1924 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 16:05:37 +0530
Subject: [PATCH 37/67] Support install prefix for cuda install

---
 script/install-cuda-prebuilt/customize.py | 4 ++++
 script/install-cuda-prebuilt/run.sh       | 4 +---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/script/install-cuda-prebuilt/customize.py b/script/install-cuda-prebuilt/customize.py
index fb395bb0b..22c07d5df 100644
--- a/script/install-cuda-prebuilt/customize.py
+++ b/script/install-cuda-prebuilt/customize.py
@@ -18,6 +18,10 @@ def preprocess(i):
         supported_versions = list(meta['versions'].keys())
         return {'return': 1, 'error': "Only CUDA versions {} are supported now".format(', '.join(supported_versions))}
 
+    install_prefix = env.get('CM_CUDA_INSTALL_PREFIX', os.getcwd())
+
+    env['CM_CUDA_INSTALL_PREFIX'] = install_prefix
+
     recursion_spaces = i['recursion_spaces']
     nvcc_bin = "nvcc"
 
diff --git a/script/install-cuda-prebuilt/run.sh b/script/install-cuda-prebuilt/run.sh
index 88ad70407..de8d76469 100644
--- a/script/install-cuda-prebuilt/run.sh
+++ b/script/install-cuda-prebuilt/run.sh
@@ -1,8 +1,6 @@
 #!/bin/bash
 
-CUR=${PWD}
-
-INSTALL_DIR=${CUR}/install
+INSTALL_DIR=${CM_CUDA_INSTALL_PREFIX}/install
 
 cmd="${CM_SUDO} bash ${CM_CUDA_RUN_FILE_PATH} --toolkitpath=${INSTALL_DIR} --defaultroot=${INSTALL_DIR} --toolkit ${CUDA_ADDITIONAL_INSTALL_OPTIONS} --silent --override"
 echo "${cmd}"

From 1171a54b85f6ed3cbf8bd413652df47d7c922dc7 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 11:38:51 +0100
Subject: [PATCH 38/67] Create code-review.yml

---
 .github/workflows/code-review.yml | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 .github/workflows/code-review.yml

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
new file mode 100644
index 000000000..5cde46057
--- /dev/null
+++ b/.github/workflows/code-review.yml
@@ -0,0 +1,21 @@
+on:
+  pull_request:
+    types: [opened, synchronize]
+
+jobs:
+  code_review_job:
+    runs-on: ubuntu-latest
+    name: ChatGPT Code Review
+    steps:
+      - name: GenAI Code Review
+        uses: cirolini/genai-code-review@v2
+        with:
+          openai_api_key: ${{ secrets.openai_api_key }}
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          github_pr_id: ${{ github.event.number }}
+          openai_model: "gpt-3.5-turbo" # optional
+          openai_temperature: 0.5 # optional
+          openai_max_tokens: 2048 # optional
+          mode: files # files or patch
+          language: en # optional, default is 'en'
+          custom_prompt: "" # optional

From 253b0526e26164bfd389ac8bb287f0be08fd0555 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 11:43:55 +0100
Subject: [PATCH 39/67] Update code-review.yml

---
 .github/workflows/code-review.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
index 5cde46057..b4ef17b39 100644
--- a/.github/workflows/code-review.yml
+++ b/.github/workflows/code-review.yml
@@ -13,7 +13,7 @@ jobs:
           openai_api_key: ${{ secrets.openai_api_key }}
           github_token: ${{ secrets.GITHUB_TOKEN }}
           github_pr_id: ${{ github.event.number }}
-          openai_model: "gpt-3.5-turbo" # optional
+          openai_model: "GPT-4o" # optional
           openai_temperature: 0.5 # optional
           openai_max_tokens: 2048 # optional
           mode: files # files or patch

From 545ddebd7070e1b2cdd94771c6a7b52613062ad1 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 11:48:24 +0100
Subject: [PATCH 40/67] Update code-review.yml

---
 .github/workflows/code-review.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
index b4ef17b39..640e33871 100644
--- a/.github/workflows/code-review.yml
+++ b/.github/workflows/code-review.yml
@@ -5,6 +5,7 @@ on:
 jobs:
   code_review_job:
     runs-on: ubuntu-latest
+    if: github.repository_owner == 'gateoverflow'
     name: ChatGPT Code Review
     steps:
       - name: GenAI Code Review

From b0a02ae2eb647af3ea93acf4f64b64183ec26055 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 16:20:29 +0530
Subject: [PATCH 41/67] Support --install_prefix for cuda installation

---
 script/install-cuda-prebuilt/_cm.json | 1 +
 1 file changed, 1 insertion(+)

diff --git a/script/install-cuda-prebuilt/_cm.json b/script/install-cuda-prebuilt/_cm.json
index b77165c0c..72441ddd0 100644
--- a/script/install-cuda-prebuilt/_cm.json
+++ b/script/install-cuda-prebuilt/_cm.json
@@ -18,6 +18,7 @@
   },
   "input_mapping": {
     "local_run_file_path": "CUDA_RUN_FILE_LOCAL_PATH",
+    "install_prefix": "CM_CUDA_INSTALL_PREFIX",
     "skip_sudo": "CUDA_SKIP_SUDO"
   },
   "new_env_keys": [

From d57bd30076788e836202361b3be9e0fa8b52f3ba Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 16:29:02 +0530
Subject: [PATCH 42/67] Support --install_prefix for cuda installation

---
 script/install-cuda-prebuilt/customize.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/script/install-cuda-prebuilt/customize.py b/script/install-cuda-prebuilt/customize.py
index 22c07d5df..f2f0b8f8b 100644
--- a/script/install-cuda-prebuilt/customize.py
+++ b/script/install-cuda-prebuilt/customize.py
@@ -32,8 +32,8 @@ def preprocess(i):
         extra_options += " --driver"
     env['CUDA_ADDITIONAL_INSTALL_OPTIONS'] = extra_options
 
-    env['CM_CUDA_INSTALLED_PATH'] = os.path.join(os.getcwd(), 'install')
-    env['CM_NVCC_BIN_WITH_PATH'] = os.path.join(os.getcwd(), 'install', 'bin', nvcc_bin)
+    env['CM_CUDA_INSTALLED_PATH'] = os.path.join(install_prefix, 'install')
+    env['CM_NVCC_BIN_WITH_PATH'] = os.path.join(install_prefix, 'install', 'bin', nvcc_bin)
     env['CM_GET_DEPENDENT_CACHED_PATH'] =  env['CM_NVCC_BIN_WITH_PATH']
 
     # Set CUDA_RUN_FILE_LOCAL_PATH to empty if not set for backwards compatibility in download file

From c1f2139d4ef12c283168f104a8a7898ef594eb49 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 16:50:05 +0530
Subject: [PATCH 43/67] Support --extra_install_args for cuda installation

---
 script/install-cuda-prebuilt/_cm.json     | 3 ++-
 script/install-cuda-prebuilt/customize.py | 7 +++++++
 script/install-cuda-prebuilt/run.sh       | 2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/script/install-cuda-prebuilt/_cm.json b/script/install-cuda-prebuilt/_cm.json
index 72441ddd0..bf7965812 100644
--- a/script/install-cuda-prebuilt/_cm.json
+++ b/script/install-cuda-prebuilt/_cm.json
@@ -19,7 +19,8 @@
   "input_mapping": {
     "local_run_file_path": "CUDA_RUN_FILE_LOCAL_PATH",
     "install_prefix": "CM_CUDA_INSTALL_PREFIX",
-    "skip_sudo": "CUDA_SKIP_SUDO"
+    "skip_sudo": "CUDA_SKIP_SUDO",
+    "override-driver-check": "CM_CUDA_DRIVER_INSTALL_OVERRIDE"
   },
   "new_env_keys": [
     "CM_CUDA_*",
diff --git a/script/install-cuda-prebuilt/customize.py b/script/install-cuda-prebuilt/customize.py
index f2f0b8f8b..ac20aca71 100644
--- a/script/install-cuda-prebuilt/customize.py
+++ b/script/install-cuda-prebuilt/customize.py
@@ -22,6 +22,11 @@ def preprocess(i):
 
     env['CM_CUDA_INSTALL_PREFIX'] = install_prefix
 
+    extra_install_args = ''
+
+    if str(env.get('CM_CUDA_DRIVER_INSTALL_OVERRIDE', '')) != '':
+        extra_install_args += ' --override-driver-check'
+
     recursion_spaces = i['recursion_spaces']
     nvcc_bin = "nvcc"
 
@@ -36,6 +41,8 @@ def preprocess(i):
     env['CM_NVCC_BIN_WITH_PATH'] = os.path.join(install_prefix, 'install', 'bin', nvcc_bin)
     env['CM_GET_DEPENDENT_CACHED_PATH'] =  env['CM_NVCC_BIN_WITH_PATH']
 
+    env['CM_CUDA_EXTRA_INSTALL_ARGS'] = extra_install_args
+
     # Set CUDA_RUN_FILE_LOCAL_PATH to empty if not set for backwards compatibility in download file
     env['CUDA_RUN_FILE_LOCAL_PATH'] = env.get('CUDA_RUN_FILE_LOCAL_PATH','')
 
diff --git a/script/install-cuda-prebuilt/run.sh b/script/install-cuda-prebuilt/run.sh
index de8d76469..c13e96b3b 100644
--- a/script/install-cuda-prebuilt/run.sh
+++ b/script/install-cuda-prebuilt/run.sh
@@ -2,7 +2,7 @@
 
 INSTALL_DIR=${CM_CUDA_INSTALL_PREFIX}/install
 
-cmd="${CM_SUDO} bash ${CM_CUDA_RUN_FILE_PATH} --toolkitpath=${INSTALL_DIR} --defaultroot=${INSTALL_DIR} --toolkit ${CUDA_ADDITIONAL_INSTALL_OPTIONS} --silent --override"
+cmd="${CM_SUDO} bash ${CM_CUDA_RUN_FILE_PATH} --toolkitpath=${INSTALL_DIR} --defaultroot=${INSTALL_DIR} --toolkit ${CUDA_ADDITIONAL_INSTALL_OPTIONS} --silent --override ${CM_CUDA_EXTRA_INSTALL_ARGS}"
 echo "${cmd}"
 eval "${cmd}"
 test $? -eq 0 || exit $?

From 5ccf5f729d9f4684abf285396cbaba37e67b885a Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 12:28:00 +0100
Subject: [PATCH 44/67] Improve download-file run.sh

---
 script/download-file/run.sh | 75 ++++++++++++++++++-------------------
 1 file changed, 36 insertions(+), 39 deletions(-)

diff --git a/script/download-file/run.sh b/script/download-file/run.sh
index d9848c39e..c02e44f00 100644
--- a/script/download-file/run.sh
+++ b/script/download-file/run.sh
@@ -1,61 +1,58 @@
 #!/bin/bash
 
-
+# Execute config command if it exists
 if [[ -n ${CM_DOWNLOAD_CONFIG_CMD} ]]; then
-  echo ""
-  echo "${CM_DOWNLOAD_CONFIG_CMD}"
-  eval "${CM_DOWNLOAD_CONFIG_CMD}"
-  test $? -eq 0 || exit $?
+  echo -e "\nExecuting: ${CM_DOWNLOAD_CONFIG_CMD}"
+  eval "${CM_DOWNLOAD_CONFIG_CMD}" || exit $?
 fi
 
+# Assume download is required by default
 require_download=1
 
-if [[ "${CM_DOWNLOAD_LOCAL_FILE_PATH}" != "" ]]; then
+# No download needed if a local file path is specified or the tool is 'cmutil'
+if [[ -n "${CM_DOWNLOAD_LOCAL_FILE_PATH}" || ${CM_DOWNLOAD_TOOL} == "cmutil" ]]; then
   require_download=0
 fi
 
-if [[ ${CM_DOWNLOAD_TOOL} == "cmutil" ]]; then
-  require_download=0
-
-elif [ -e "${CM_DOWNLOAD_DOWNLOADED_PATH}" ]; then
-  if [[ "${CM_DOWNLOAD_CHECKSUM_CMD}" != "" ]]; then
-    echo ""
-    echo "${CM_DOWNLOAD_CHECKSUM_CMD}"
-    eval "${CM_DOWNLOAD_CHECKSUM_CMD}"
-    if [ $? -ne 0 ]; then
-       # checksum not supposed to fail for locally given file
-       if [[ "${CM_DOWNLOAD_LOCAL_FILE_PATH}" != "" ]]; then
-          exit 1
-       else
-          CM_PRE_DOWNLOAD_CLEAN=true
-       fi
+# If the file exists, check the checksum if necessary
+if [[ -e "${CM_DOWNLOAD_DOWNLOADED_PATH}" && -n "${CM_DOWNLOAD_CHECKSUM_CMD}" ]]; then
+  echo -e "\nChecking checksum: ${CM_DOWNLOAD_CHECKSUM_CMD}"
+  eval "${CM_DOWNLOAD_CHECKSUM_CMD}"
+  
+  if [[ $? -ne 0 ]]; then
+    # If the checksum fails, handle errors based on whether the file is local
+    if [[ -n "${CM_DOWNLOAD_LOCAL_FILE_PATH}" ]]; then
+      echo "Checksum failed for local file. Exiting."
+      exit 1
     else
-       require_download="0"
+      echo "Checksum failed. Marking for re-download."
+      CM_PRE_DOWNLOAD_CLEAN=true
     fi
+  else
+    # If checksum succeeds, no download is required
+    require_download=0
   fi
 fi
 
-if [[ ${require_download} == "1" ]]; then
+# Perform download if required
+if [[ ${require_download} == 1 ]]; then
   echo ""
-  if [ -n "${CM_PRE_DOWNLOAD_CLEAN}" ] && [ "${CM_PRE_DOWNLOAD_CLEAN,,}" != "false" ]; then
-    echo "${CM_PRE_DOWNLOAD_CLEAN_CMD}"
-    eval "${CM_PRE_DOWNLOAD_CLEAN_CMD}"
-  fi
 
-  echo ""
-  echo "${CM_DOWNLOAD_CMD}"
-  eval "${CM_DOWNLOAD_CMD}"
-  test $? -eq 0 || exit $?
+  # If a pre-download clean command is specified and needed, execute it
+  if [[ -n "${CM_PRE_DOWNLOAD_CLEAN}" && "${CM_PRE_DOWNLOAD_CLEAN,,}" != "false" ]]; then
+    echo "Executing pre-download clean: ${CM_PRE_DOWNLOAD_CLEAN_CMD}"
+    eval "${CM_PRE_DOWNLOAD_CLEAN_CMD}" || exit $?
+  fi
 
+  # Execute the download command
+  echo "Downloading: ${CM_DOWNLOAD_CMD}"
+  eval "${CM_DOWNLOAD_CMD}" || exit $?
 fi
 
-if [[ ${CM_DOWNLOAD_TOOL} == "cmutil" || ${require_download} == "1"  ]]; then
-  if [[ "${CM_DOWNLOAD_CHECKSUM_CMD}" != "" ]]; then
-      echo ""
-      echo "${CM_DOWNLOAD_CHECKSUM_CMD}"
-      eval "${CM_DOWNLOAD_CHECKSUM_CMD}"
-      test $? -eq 0 || exit $?
+# Verify checksum again if necessary
+if [[ ${CM_DOWNLOAD_TOOL} == "cmutil" || ${require_download} == 1 ]]; then
+  if [[ -n "${CM_DOWNLOAD_CHECKSUM_CMD}" ]]; then
+    echo -e "\nVerifying checksum after download: ${CM_DOWNLOAD_CHECKSUM_CMD}"
+    eval "${CM_DOWNLOAD_CHECKSUM_CMD}" || exit $?
   fi
 fi
-
-test $? -eq 0 || exit $?

From a0775dd09eddc5dba4c9faa8c0549f14b25e946c Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 12:47:57 +0100
Subject: [PATCH 45/67] Update code-review.yml

---
 .github/workflows/code-review.yml | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
index 640e33871..c784788b1 100644
--- a/.github/workflows/code-review.yml
+++ b/.github/workflows/code-review.yml
@@ -1,22 +1,28 @@
+name: OpenAI Code Review
+
 on:
   pull_request:
     types: [opened, synchronize]
 
 jobs:
-  code_review_job:
+  code_review:
     runs-on: ubuntu-latest
     if: github.repository_owner == 'gateoverflow'
-    name: ChatGPT Code Review
     steps:
-      - name: GenAI Code Review
+      # Checkout the code
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      # Run code review via OpenAI
+      - name: Run OpenAI Code Review
         uses: cirolini/genai-code-review@v2
         with:
-          openai_api_key: ${{ secrets.openai_api_key }}
           github_token: ${{ secrets.GITHUB_TOKEN }}
-          github_pr_id: ${{ github.event.number }}
-          openai_model: "GPT-4o" # optional
-          openai_temperature: 0.5 # optional
-          openai_max_tokens: 2048 # optional
-          mode: files # files or patch
-          language: en # optional, default is 'en'
+          openai_api_key: ${{ secrets.openai_api_key }}
+          github_pr_id: ${{ github.event.pull_request.number }}
+          openai_model: "gpt-4o"
+          openai_temperature: 0.5
+          openai_max_tokens: 2048
+          mode: "files"  # Options: files, diff
+          language: "en"
           custom_prompt: "" # optional

From 8484a75b3c0ebbe1f3cd411901a5c8f4451442aa Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 12:59:05 +0100
Subject: [PATCH 46/67] Update code-review.yml

---
 .github/workflows/code-review.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
index c784788b1..c89e6b783 100644
--- a/.github/workflows/code-review.yml
+++ b/.github/workflows/code-review.yml
@@ -15,7 +15,7 @@ jobs:
 
       # Run code review via OpenAI
       - name: Run OpenAI Code Review
-        uses: cirolini/genai-code-review@v2
+        uses: dlidstrom/genai-code-review@v2
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           openai_api_key: ${{ secrets.openai_api_key }}

From a6bad02a5e294934f61a8a0e8cf30937f108170b Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 13:04:29 +0100
Subject: [PATCH 47/67] Update code-review.yml

---
 .github/workflows/code-review.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
index c89e6b783..672283c77 100644
--- a/.github/workflows/code-review.yml
+++ b/.github/workflows/code-review.yml
@@ -13,9 +13,9 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v3
 
-      # Run code review via OpenAI
+      # Run code review via OpenAI 
       - name: Run OpenAI Code Review
-        uses: dlidstrom/genai-code-review@v2
+        uses: dlidstrom/genai-code-review@3
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           openai_api_key: ${{ secrets.openai_api_key }}

From 1f9c4bba966b8dafa37d3dc9169c0769f53051c0 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 13:11:01 +0100
Subject: [PATCH 48/67] Update code-review.yml

---
 .github/workflows/code-review.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
index 672283c77..00f7b08e1 100644
--- a/.github/workflows/code-review.yml
+++ b/.github/workflows/code-review.yml
@@ -4,14 +4,14 @@ on:
   pull_request:
     types: [opened, synchronize]
 
+permissions:
+  issues: write
+
 jobs:
   code_review:
     runs-on: ubuntu-latest
     if: github.repository_owner == 'gateoverflow'
     steps:
-      # Checkout the code
-      - name: Checkout repository
-        uses: actions/checkout@v3
 
       # Run code review via OpenAI 
       - name: Run OpenAI Code Review

From 35c9a87d84bfff8414d2e216a6d1a2bc020ff2cd Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 13:21:26 +0100
Subject: [PATCH 49/67] Update code-review.yml

---
 .github/workflows/code-review.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
index 00f7b08e1..84d19c5e6 100644
--- a/.github/workflows/code-review.yml
+++ b/.github/workflows/code-review.yml
@@ -15,7 +15,7 @@ jobs:
 
       # Run code review via OpenAI 
       - name: Run OpenAI Code Review
-        uses: dlidstrom/genai-code-review@3
+        uses: dlidstrom/genai-code-review@3.0.2
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           openai_api_key: ${{ secrets.openai_api_key }}

From 1c0f623fa63b30599a59e46fcdde9eab34efefa7 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 13:28:20 +0100
Subject: [PATCH 50/67] Update code-review.yml

---
 .github/workflows/code-review.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
index 84d19c5e6..8682b7212 100644
--- a/.github/workflows/code-review.yml
+++ b/.github/workflows/code-review.yml
@@ -15,7 +15,7 @@ jobs:
 
       # Run code review via OpenAI 
       - name: Run OpenAI Code Review
-        uses: dlidstrom/genai-code-review@3.0.2
+        uses: dlidstrom/genai-code-review@v3
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           openai_api_key: ${{ secrets.openai_api_key }}

From 4f441260b5571cb32244a9b909c098b9cb91bab8 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 13:36:58 +0100
Subject: [PATCH 51/67] Update code-review.yml

---
 .github/workflows/code-review.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
index 8682b7212..98692a151 100644
--- a/.github/workflows/code-review.yml
+++ b/.github/workflows/code-review.yml
@@ -15,7 +15,7 @@ jobs:
 
       # Run code review via OpenAI 
       - name: Run OpenAI Code Review
-        uses: dlidstrom/genai-code-review@v3
+        uses: dlidstrom/genai-code-review@v3.0.2
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           openai_api_key: ${{ secrets.openai_api_key }}

From 053682d1742a61901124dc111f7e93775fc55520 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 13:47:53 +0100
Subject: [PATCH 52/67] Update code-review.yml

---
 .github/workflows/code-review.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
index 98692a151..974732041 100644
--- a/.github/workflows/code-review.yml
+++ b/.github/workflows/code-review.yml
@@ -18,7 +18,7 @@ jobs:
         uses: dlidstrom/genai-code-review@v3.0.2
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
-          openai_api_key: ${{ secrets.openai_api_key }}
+          openai_api_key: ${{ secrets.OPENAI_API_KEY }}
           github_pr_id: ${{ github.event.pull_request.number }}
           openai_model: "gpt-4o"
           openai_temperature: 0.5

From 43f35a046c4f1d0452f3b5bb48fd22abaae608b0 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 13:58:22 +0100
Subject: [PATCH 53/67] Update code-review.yml

---
 .github/workflows/code-review.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
index 974732041..d24fdc97e 100644
--- a/.github/workflows/code-review.yml
+++ b/.github/workflows/code-review.yml
@@ -18,7 +18,7 @@ jobs:
         uses: dlidstrom/genai-code-review@v3.0.2
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
-          openai_api_key: ${{ secrets.OPENAI_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           github_pr_id: ${{ github.event.pull_request.number }}
           openai_model: "gpt-4o"
           openai_temperature: 0.5

From 1debe15746c06e174eb9df13a886e9e62820d775 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 14:08:34 +0100
Subject: [PATCH 54/67] Update code-review.yml

---
 .github/workflows/code-review.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
index d24fdc97e..c741db9e2 100644
--- a/.github/workflows/code-review.yml
+++ b/.github/workflows/code-review.yml
@@ -15,10 +15,10 @@ jobs:
 
       # Run code review via OpenAI 
       - name: Run OpenAI Code Review
-        uses: dlidstrom/genai-code-review@v3.0.2
+        uses: GATEOverflow/genai-code-review@v1
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          openai_api_key: ${{ secrets.OPENAI_API_KEY }}
           github_pr_id: ${{ github.event.pull_request.number }}
           openai_model: "gpt-4o"
           openai_temperature: 0.5

From 42db1f7d0d3f478de562391ae24dcfdc334fa34b Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 14:21:36 +0100
Subject: [PATCH 55/67] Update code-review.yml

---
 .github/workflows/code-review.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
index c741db9e2..5dc58ed45 100644
--- a/.github/workflows/code-review.yml
+++ b/.github/workflows/code-review.yml
@@ -12,7 +12,6 @@ jobs:
     runs-on: ubuntu-latest
     if: github.repository_owner == 'gateoverflow'
     steps:
-
       # Run code review via OpenAI 
       - name: Run OpenAI Code Review
         uses: GATEOverflow/genai-code-review@v1

From e5cc9ce06c7d4517f43fea2fd96429f03ddf8aa1 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 14:28:00 +0100
Subject: [PATCH 56/67] Update code-review.yml

---
 .github/workflows/code-review.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
index c741db9e2..ec9fb5df4 100644
--- a/.github/workflows/code-review.yml
+++ b/.github/workflows/code-review.yml
@@ -1,7 +1,7 @@
 name: OpenAI Code Review
 
 on:
-  pull_request:
+  pull_request_target:
     types: [opened, synchronize]
 
 permissions:

From 42ed9d375e9ac7e3a1146037252eddd8f7be16e3 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 15:14:30 +0100
Subject: [PATCH 57/67] Update code-review.yml

---
 .github/workflows/code-review.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
index ec9fb5df4..6315c3927 100644
--- a/.github/workflows/code-review.yml
+++ b/.github/workflows/code-review.yml
@@ -6,6 +6,7 @@ on:
 
 permissions:
   issues: write
+  pull-requests: write
 
 jobs:
   code_review:

From 76c9de9ae745cc232ca5ba449b5e424a85dd5b0a Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 18:17:40 +0100
Subject: [PATCH 58/67] Update code-review.yml

---
 .github/workflows/code-review.yml | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
index 6315c3927..120511ae4 100644
--- a/.github/workflows/code-review.yml
+++ b/.github/workflows/code-review.yml
@@ -11,19 +11,21 @@ permissions:
 jobs:
   code_review:
     runs-on: ubuntu-latest
-    if: github.repository_owner == 'gateoverflow'
+    if: github.repository_owner == 'gateoverflow' && github.event.pull_request.changed_files > 0
     steps:
 
       # Run code review via OpenAI 
-      - name: Run OpenAI Code Review
-        uses: GATEOverflow/genai-code-review@v1
-        with:
-          github_token: ${{ secrets.GITHUB_TOKEN }}
-          openai_api_key: ${{ secrets.OPENAI_API_KEY }}
-          github_pr_id: ${{ github.event.pull_request.number }}
-          openai_model: "gpt-4o"
-          openai_temperature: 0.5
-          openai_max_tokens: 2048
-          mode: "files"  # Options: files, diff
-          language: "en"
-          custom_prompt: "" # optional
+      # Step to run the OpenAI Code Review using the GATEOverflow action
+    - name: Run OpenAI Code Review
+      uses: GATEOverflow/genai-code-review@v1
+      with:
+        github_token: ${{ secrets.GITHUB_TOKEN }}  # GitHub token for authentication
+        openai_api_key: ${{ secrets.OPENAI_API_KEY }}  # OpenAI API key for accessing the GPT model
+        github_pr_id: ${{ github.event.pull_request.number }}  # ID of the pull request to review
+        openai_model: "gpt-4o"  # Model to use for the code review
+        openai_temperature: 0.5  # Temperature setting for the model's output
+        openai_max_tokens: 2048  # Maximum number of tokens for the model's response
+        mode: "files"  # Mode of review, can be "files" or "diff"
+        language: "en"  # Language for the review output
+        custom_prompt: "" # Optional custom prompt for the model
+      continue-on-error: true  # Allow the workflow to continue even if this step fails

From 219ff8c7ecf07dcf199074ba3da6b0a4275821f5 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 18:23:24 +0100
Subject: [PATCH 59/67] Update code-review.yml

---
 .github/workflows/code-review.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
index 9ee6924fb..258b305f3 100644
--- a/.github/workflows/code-review.yml
+++ b/.github/workflows/code-review.yml
@@ -3,6 +3,10 @@ name: OpenAI Code Review
 on:
   pull_request_target:
     types: [opened, synchronize]
+    paths:
+      - 'automation/**'
+      - 'script/**'
+      - '!**.md'
 
 permissions:
   issues: write

From 0e1489a9b734c0f87f5067df7337cd6d545ae02f Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Thu, 26 Sep 2024 01:13:02 +0530
Subject: [PATCH 60/67] Improve get-cuda-devices to handle multiple GPUs, fixes
 #288

---
 script/get-cuda-devices/_cm.yaml     | 14 +++++++++
 script/get-cuda-devices/customize.py | 39 ++++++++++++++++++-----
 script/get-cuda-devices/detect.py    | 47 ++++++++++++++++++++++++++++
 script/get-cuda-devices/detect.sh    |  4 +++
 4 files changed, 96 insertions(+), 8 deletions(-)
 create mode 100644 script/get-cuda-devices/detect.py
 create mode 100644 script/get-cuda-devices/detect.sh

diff --git a/script/get-cuda-devices/_cm.yaml b/script/get-cuda-devices/_cm.yaml
index b91e791af..0c3fcfb23 100644
--- a/script/get-cuda-devices/_cm.yaml
+++ b/script/get-cuda-devices/_cm.yaml
@@ -36,6 +36,20 @@ new_env_keys:
 
 new_state_keys:
 - cm_cuda_device_prop
+- cm_cuda_devices_prop
 
 print_files_if_script_error:
 - tmp-run.out
+
+variations:
+  with-pycuda:
+    env:
+      CM_CUDA_DEVICES_DETECT_USING_PYCUDA: 'yes'
+    deps:
+      - tags: get,python3
+        names:
+          - python
+          - python3
+      - tags: get,generic-python-lib,_package.pycuda
+        names:
+          - pycuda
diff --git a/script/get-cuda-devices/customize.py b/script/get-cuda-devices/customize.py
index 54fa9094f..4aaf21548 100644
--- a/script/get-cuda-devices/customize.py
+++ b/script/get-cuda-devices/customize.py
@@ -2,6 +2,15 @@
 import os
 import subprocess
 
+def preprocess(i):
+
+    env = i['env']
+
+    if str(env.get('CM_CUDA_DEVICES_DETECT_USING_PYCUDA', '')).lower() in [ "1", "yes", "true"]:
+        i['run_script_input']['script_name'] = 'detect'
+
+    return {'return':0} 
+
 def postprocess(i):
 
     env = i['env']
@@ -18,22 +27,36 @@ def postprocess(i):
 
     # properties
     p = {}
+    gpu = {}
+
+    gpu_id = -1
 
     for line in lst:
-        print (line)
+        #print (line)
 
         j = line.find(':')
+
         if j>=0:
-           key = line[:j].strip()
-           val = line[j+1:].strip()
+            key = line[:j].strip()
+            val = line[j+1:].strip()
+
+            if key == "GPU Device ID":
+                gpu_id+=1
+                gpu[gpu_id] = {}
 
-           p[key] = val
+            if gpu_id < 0:
+                continue
 
-           key_env = 'CM_CUDA_DEVICE_PROP_'+key.upper().replace(' ','_')
-           env[key_env] = val
+            gpu[gpu_id][key] = val 
+            p[key] = val
+
+            key_env = 'CM_CUDA_DEVICE_PROP_'+key.upper().replace(' ','_')
+            env[key_env] = val
     
+    state['cm_cuda_num_devices'] = gpu_id
+    env['CM_CUDA_NUM_DEVICES'] = gpu_id
 
     state['cm_cuda_device_prop'] = p
+    state['cm_cuda_devices_prop'] = gpu
     
-    return {'return':0}
-    
+    return {'return':0} 
diff --git a/script/get-cuda-devices/detect.py b/script/get-cuda-devices/detect.py
new file mode 100644
index 000000000..817e46a6f
--- /dev/null
+++ b/script/get-cuda-devices/detect.py
@@ -0,0 +1,47 @@
+import pycuda.driver as cuda
+import pycuda.autoinit
+
+def get_gpu_info():
+    num_gpus = cuda.Device.count()
+    all_gpu_info = []
+
+    for i in range(num_gpus):
+        device = cuda.Device(i)
+        cuda_runtime_version = cuda.get_version()
+        cuda_runtime_version_str = f"{cuda_runtime_version[0]}.{cuda_runtime_version[1]}"
+
+        gpu_info = {
+            "GPU Device ID": device.pci_bus_id(),
+            "GPU Name": device.name(),
+            "GPU compute capability": f"{device.compute_capability()[0]}.{device.compute_capability()[1]}",
+            "CUDA driver version": f"{cuda.get_driver_version() // 1000}.{(cuda.get_driver_version() % 1000) // 10}",
+            "CUDA runtime version": cuda_runtime_version_str,
+            "Global memory": device.total_memory(),
+            "Max clock rate": f"{device.get_attribute(cuda.device_attribute.CLOCK_RATE)} MHz",
+            "Total amount of shared memory per block": device.get_attribute(cuda.device_attribute.MAX_SHARED_MEMORY_PER_BLOCK),
+            "Total number of registers available per block": device.get_attribute(cuda.device_attribute.MAX_REGISTERS_PER_BLOCK),
+            "Warp size": device.get_attribute(cuda.device_attribute.WARP_SIZE),
+            "Maximum number of threads per multiprocessor": device.get_attribute(cuda.device_attribute.MAX_THREADS_PER_MULTIPROCESSOR),
+            "Maximum number of threads per block": device.get_attribute(cuda.device_attribute.MAX_THREADS_PER_BLOCK),
+            "Max dimension size of a thread block X": device.get_attribute(cuda.device_attribute.MAX_BLOCK_DIM_X),
+            "Max dimension size of a thread block Y": device.get_attribute(cuda.device_attribute.MAX_BLOCK_DIM_Y),
+            "Max dimension size of a thread block Z": device.get_attribute(cuda.device_attribute.MAX_BLOCK_DIM_Z),
+            "Max dimension size of a grid size X": device.get_attribute(cuda.device_attribute.MAX_GRID_DIM_X),
+            "Max dimension size of a grid size Y": device.get_attribute(cuda.device_attribute.MAX_GRID_DIM_Y),
+            "Max dimension size of a grid size Z": device.get_attribute(cuda.device_attribute.MAX_GRID_DIM_Z),
+        }
+
+        all_gpu_info.append(gpu_info)
+
+    return all_gpu_info
+
+
+# Print the GPU information for all available GPUs
+if __name__ == "__main__":
+    gpu_info_list = get_gpu_info()
+    with open ("tmp-run.out", "w") as f:
+        for idx, gpu_info in enumerate(gpu_info_list):
+            print(f"GPU {idx}:")
+            for key, value in gpu_info.items():
+                f.write(f"{key}: {value}\n")
+
diff --git a/script/get-cuda-devices/detect.sh b/script/get-cuda-devices/detect.sh
new file mode 100644
index 000000000..8f6b93596
--- /dev/null
+++ b/script/get-cuda-devices/detect.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+${CM_PYTHON_BIN_WITH_PATH} ${CM_TMP_CURRENT_SCRIPT_PATH}/detect.py
+test $? -eq 0 || exit $?

From f4a1ad293c554b0b139f1d28caca66a7913f8d32 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Thu, 26 Sep 2024 01:56:18 +0530
Subject: [PATCH 61/67] Use updated get-cuda-devices in mlperf-inference

---
 script/app-mlperf-inference/_cm.yaml                 | 2 +-
 script/get-ml-model-gptj/_cm.json                    | 2 +-
 script/get-ml-model-llama2/_cm.json                  | 2 +-
 script/get-mlperf-inference-sut-description/_cm.json | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml
index e55c1cdf0..82e67641d 100644
--- a/script/app-mlperf-inference/_cm.yaml
+++ b/script/app-mlperf-inference/_cm.yaml
@@ -1162,7 +1162,7 @@ variations:
       mlperf-inference-implementation:
         tags: _cuda
     deps:
-      - tags: get,cuda-devices
+      - tags: get,cuda-devices,_with-pycuda
         skip_if_env:
           CM_CUDA_DEVICE_PROP_GLOBAL_MEMORY:
             - "yes"
diff --git a/script/get-ml-model-gptj/_cm.json b/script/get-ml-model-gptj/_cm.json
index 2b9c67e62..396e0d595 100644
--- a/script/get-ml-model-gptj/_cm.json
+++ b/script/get-ml-model-gptj/_cm.json
@@ -231,7 +231,7 @@
           "tags": "get,nvidia,scratch,space"
         },
         {
-          "tags": "get,cuda-devices"
+          "tags": "get,cuda-devices,_with-pycuda"
         },
         {
           "tags": "get,ml-model,gpt-j,_fp32,_pytorch",
diff --git a/script/get-ml-model-llama2/_cm.json b/script/get-ml-model-llama2/_cm.json
index 0734395d5..d64c6e004 100644
--- a/script/get-ml-model-llama2/_cm.json
+++ b/script/get-ml-model-llama2/_cm.json
@@ -223,7 +223,7 @@
           "tags": "get,nvidia,scratch,space"
         },
         {
-          "tags": "get,cuda-devices"
+          "tags": "get,cuda-devices,_with-pycuda"
         },
         {
           "tags": "get,ml-model,llama2-70b,_fp32,_pytorch",
diff --git a/script/get-mlperf-inference-sut-description/_cm.json b/script/get-mlperf-inference-sut-description/_cm.json
index a160722c2..f9c1b0345 100644
--- a/script/get-mlperf-inference-sut-description/_cm.json
+++ b/script/get-mlperf-inference-sut-description/_cm.json
@@ -25,7 +25,7 @@
       "tags": "get,compiler"
     },
     {
-      "tags": "get,cuda-devices",
+      "tags": "get,cuda-devices,_with-pycuda",
       "enable_if_env": {
         "CM_MLPERF_DEVICE": [
           "gpu",

From eb8910b8d497eace02e7bc0ef05f25c9dcf147f5 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Thu, 26 Sep 2024 02:20:29 +0530
Subject: [PATCH 62/67] Improved meta for app-mlperf-inference

---
 .../app-mlperf-inference-mlcommons-python/_cm.yaml  | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml
index 452a3f1dd..df7a5a1d7 100644
--- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml
+++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml
@@ -185,8 +185,9 @@ deps:
   ## Pytorch (CPU)
   - tags: get,generic-python-lib,_torch
     names:
-    - ml-engine-pytorch
-    - pytorch
+      - torch
+      - ml-engine-pytorch
+      - pytorch
     skip_if_env:
       CM_MODEL:
         - dlrm-v2-99
@@ -838,6 +839,9 @@ variations:
       MLPERF_TVM_TORCH_QUANTIZED_ENGINE: qnnpack
     deps:
     - tags: get,generic-python-lib,_torch
+      names:
+        - torch
+        - pytorch
     - tags: get,tvm
       names:
       - tvm
@@ -865,7 +869,6 @@ variations:
 
   gptj_:
     deps:
-    - tags: get,generic-python-lib,_torch
     - tags: get,generic-python-lib,_package.datasets
     - tags: get,generic-python-lib,_package.attrs
     - tags: get,generic-python-lib,_package.accelerate
@@ -1099,6 +1102,10 @@ variations:
       - dlrm-src
     # to force the version
     - tags: get,generic-python-lib,_torch
+      names:
+        - torch
+        - pytorch
+        - ml-engine-pytorch
       version: "1.13.1"
     - tags: get,generic-python-lib,_mlperf_logging
     - tags: get,generic-python-lib,_opencv-python

From 8d7d254ff6be6bb2b7a6f5ec609edea4cdc02f57 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Thu, 26 Sep 2024 02:48:37 +0530
Subject: [PATCH 63/67] Stop the remaining mlperf runs for docker detached mode

---
 script/run-mlperf-inference-app/customize.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/script/run-mlperf-inference-app/customize.py b/script/run-mlperf-inference-app/customize.py
index 0722c016e..e2361f2de 100644
--- a/script/run-mlperf-inference-app/customize.py
+++ b/script/run-mlperf-inference-app/customize.py
@@ -133,6 +133,7 @@ def preprocess(i):
     ad = inp.get('ad', {})
     adr = inp.get('adr', {})
     docker_it = inp.get('docker_it', '')
+    docker_dt = inp.get('docker_dt', '')
     adr_from_meta = i['run_script_input'].get('add_deps_recursive')
 
     for key in adr_from_meta:
@@ -237,7 +238,7 @@ def preprocess(i):
                 env['CM_MLPERF_INFERENCE_RESULTS_DIR_'] = os.path.join(env['OUTPUT_BASE_DIR'], f"{env['CM_MLPERF_RUN_STYLE']}_results")
 
             if action == "docker":
-                if str(docker_it).lower() not in ["no", "false", "0"]:
+                if str(docker_dt).lower() not in ["yes", "true", "1"]:
                     print(f"\nStop Running loadgen scenario: {scenario} and mode: {mode}")
                     return {'return': 0} # We run commands interactively inside the docker container
                 else:

From 042079b83034d5232464795a48aec3ec497dbbd1 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Thu, 26 Sep 2024 02:56:54 +0530
Subject: [PATCH 64/67] Fix number of accelerators (GPUs) for mlperf-inference

---
 script/get-cuda-devices/_cm.yaml                         | 2 ++
 script/get-mlperf-inference-sut-description/customize.py | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/script/get-cuda-devices/_cm.yaml b/script/get-cuda-devices/_cm.yaml
index 0c3fcfb23..64d49d95b 100644
--- a/script/get-cuda-devices/_cm.yaml
+++ b/script/get-cuda-devices/_cm.yaml
@@ -33,10 +33,12 @@ docker:
 
 new_env_keys:
 - CM_CUDA_DEVICE_*
+- CM_CUDA_NUM_DEVICES
 
 new_state_keys:
 - cm_cuda_device_prop
 - cm_cuda_devices_prop
+- cm_cuda_num_devices
 
 print_files_if_script_error:
 - tmp-run.out
diff --git a/script/get-mlperf-inference-sut-description/customize.py b/script/get-mlperf-inference-sut-description/customize.py
index 71636941f..cc36483c6 100644
--- a/script/get-mlperf-inference-sut-description/customize.py
+++ b/script/get-mlperf-inference-sut-description/customize.py
@@ -100,7 +100,8 @@ def preprocess(i):
             state['CM_SUT_META']['accelerator_frequency'] = state['cm_cuda_device_prop']['Max clock rate']
             state['CM_SUT_META']['accelerator_memory_capacity'] = str(int(state['cm_cuda_device_prop']['Global memory'])/(1024*1024.0*1024)) + " GB"
             state['CM_SUT_META']['accelerator_model_name'] = state['cm_cuda_device_prop']['GPU Name']
-            state['CM_SUT_META']['accelerators_per_node'] = "1"
+            num_accelerators = env.get('CM_CUDA_NUM_DEVICES', "1")
+            state['CM_SUT_META']['accelerators_per_node'] = num_accelerators
 
         if state['CM_SUT_META'].get('host_processor_core_count', '') == '':
             physical_cores_per_node = env.get('CM_HOST_CPU_PHYSICAL_CORES_PER_SOCKET')

From cd24064d232d6e224012d543bce15bcaee982eb6 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 23:07:20 +0100
Subject: [PATCH 65/67] Update test-mlperf-inference-sdxl.yaml

---
 .github/workflows/test-mlperf-inference-sdxl.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-mlperf-inference-sdxl.yaml b/.github/workflows/test-mlperf-inference-sdxl.yaml
index a5e348336..c7d693495 100644
--- a/.github/workflows/test-mlperf-inference-sdxl.yaml
+++ b/.github/workflows/test-mlperf-inference-sdxl.yaml
@@ -2,7 +2,7 @@ name: MLPerf inference SDXL
 
 on:
   schedule:
-    - cron: "1 2 * * */3"
+    - cron: "1 2 * * *"
 
 jobs:
   build_reference:

From 3dbea4a03583d31eb1a07a026b6b316028258643 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Wed, 25 Sep 2024 23:08:24 +0100
Subject: [PATCH 66/67] Update test-scc24-sdxl.yaml

---
 .github/workflows/test-scc24-sdxl.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml
index 3ced30e04..e9a2fa410 100644
--- a/.github/workflows/test-scc24-sdxl.yaml
+++ b/.github/workflows/test-scc24-sdxl.yaml
@@ -2,7 +2,7 @@ name: MLPerf inference SDXL
 
 on:
   schedule:
-    - cron: "1 3 * * */3"
+    - cron: "43 1 * * *"
 
 jobs:
   build_reference:

From 12c779cd641dcdbc66a6e28388fcd58a122f1422 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Thu, 26 Sep 2024 16:08:19 +0530
Subject: [PATCH 67/67] Dont use venv for nvidia mlperf inference docker

---
 script/app-mlperf-inference/_cm.yaml | 1 +
 script/build-dockerfile/customize.py | 7 +++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml
index 82e67641d..c51b5b515 100644
--- a/script/app-mlperf-inference/_cm.yaml
+++ b/script/app-mlperf-inference/_cm.yaml
@@ -385,6 +385,7 @@ variations:
       CM_IMAGENET_ACCURACY_DTYPE: int32
       CM_CNNDM_ACCURACY_DTYPE: int32
       CM_LIBRISPEECH_ACCURACY_DTYPE: int8
+      CM_DOCKER_USE_VIRTUAL_PYTHON: no
     prehook_deps:
       - names:
          - nvidia-original-mlperf-inference
diff --git a/script/build-dockerfile/customize.py b/script/build-dockerfile/customize.py
index b5bae64fc..41300cdde 100644
--- a/script/build-dockerfile/customize.py
+++ b/script/build-dockerfile/customize.py
@@ -180,8 +180,11 @@ def preprocess(i):
 
     f.write(EOL+'# Install python packages' + EOL)
     python = get_value(env, config, 'PYTHON', 'CM_DOCKERFILE_PYTHON')
-    f.write('RUN {} -m venv /home/cmuser/venv/cm'.format(python) + " " + EOL)
-    f.write('ENV PATH="/home/cmuser/venv/cm/bin:$PATH"' + EOL)
+
+    docker_use_virtual_python = env.get('CM_DOCKER_USE_VIRTUAL_PYTHON', "yes")
+    if str(docker_use_virtual_python).lower() not in [ "no", "0", "false"]:
+        f.write('RUN {} -m venv /home/cmuser/venv/cm'.format(python) + " " + EOL)
+        f.write('ENV PATH="/home/cmuser/venv/cm/bin:$PATH"' + EOL)
     #f.write('RUN . /opt/venv/cm/bin/activate' + EOL)
     f.write('RUN {} -m pip install '.format(python) + " ".join(get_value(env, config, 'python-packages')) + ' ' + pip_extra_flags + ' ' + EOL)