Merge branch 'dev' into num_workers_fix

mlcommons · Mar 29, 2024 · e29a2d4 · e29a2d4
2 parents 135c56a + 9365996
commit e29a2d4
Show file tree

Hide file tree

Showing 29 changed files with 248 additions and 128 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,23 @@
 # Change Log
 
+## algoperf-benchmark-0.1.4 (2024-03-26)
+
+Upgrade CUDA version to CUDA 12.1:
+- Upgrade CUDA version in Dockerfiles that will be used for scoring.
+- Update Jax and PyTorch package version tags to use local CUDA installation.
+
+Add flag for completely disabling checkpointing. 
+- Note that we will run with checkpointing off at scoring time.
+
+Update Deepspeech and Conformer variant target setting configurations. 
+- Note that variant targets are not final. 
+
+Fixed bug in scoring code to take best trial in a study for external-tuning ruleset.
+
+Added instructions for submission. 
+
+Changed default number of workers for PyTorch data loaders to 0. Running with >0 may lead to incorrect eval results see https://github.com/mlcommons/algorithmic-efficiency/issues/732. 
+
 ## algoperf-benchmark-0.1.2 (2024-03-04)
 Workload variant additions and fixes:
 - Add Deepspeech workload variant

diff --git a/GETTING_STARTED.md b/GETTING_STARTED.md
@@ -388,7 +388,42 @@ python score_submissions.py --submission_directory <directory_with_submissions>
 
 We provide the scores and performance profiles for the [paper baseline algorithms](/reference_algorithms/paper_baselines/) in the "Baseline Results" section in [Benchmarking Neural Network Training Algorithms](https://arxiv.org/abs/2306.07179).
 
-## Package Submission for Self-Reporting
+## Package your Submission code
+
+If you have registered for the AlgoPerf competition you will receive
+an email on 3/27/2024 with a link to a UI to upload a compressed submission folder.
+
+To package your submission modules please make sure your submission folder is structured as follows:
+
+```bash
+submission_folder/
+├── external_tuning
+│   ├── algorithm_name
+│   │   ├── helper_module.py
+│   │   ├── requirements.txt
+│   │   ├── submission.py
+│   │   └── tuning_search_space.json
+│   └── other_algorithm_name
+│       ├── requirements.txt
+│       ├── submission.py
+│       └── tuning_search_space.json
+└── self_tuning
+    └── algorithm_name
+        ├── requirements.txt
+        └── submission.py
+```
+
+Specifically we require that:
+1. There exist subdirectories in the the submission folder named after the ruleset: `external_tuning` or `self_tuning`.
+2. The ruleset subdirectories contain directories named according to 
+some identifier of the algorithm. 
+3. Each algorithm subdirectory contains a `submission.py` module. Additional helper modules are allowed if prefer to you organize your code into multiple files. If there are additional python packages that have to be installed for the algorithm also include a `requirements.txt` with package names and versions in the algorithm subdirectory. 
+4. For `external_tuning` algorithms the algorithm subdirectory
+should contain a `tuning_search_space.json`.
+
+To check that your submission folder meets the above requirements you can run the `submissions/repo_checker.py` script.
+
+## Package Logs for Self-Reporting Submissions
 To prepare your submission for self reporting run:
 
 ```

diff --git a/README.md b/README.md
@@ -28,8 +28,9 @@
 
 > [!IMPORTANT]
 > Upcoming Deadline:
-> Submission deadline: **April 04th, 2024** (*moved by a week*) \
-> For other key dates please see [Call for Submissions](/CALL_FOR_SUBMISSIONS.md).
+> Submission deadline: **April 04th, 2024** (*moved by a week*). \
+> For submission instructions please see [Packaging your Submission Code](/GETTING_STARTED.md#package-your-submission-code) section in the Getting Started document.\
+> For other key dates please see [Call for Submissions](CALL_FOR_SUBMISSIONS.md).
 
 ## Table of Contents <!-- omit from toc -->
 

diff --git a/algorithmic_efficiency/random_utils.py b/algorithmic_efficiency/random_utils.py
@@ -26,11 +26,11 @@
 
 def _signed_to_unsigned(seed: SeedType) -> SeedType:
   if isinstance(seed, int):
-    return seed + 2**32 if seed < 0 else seed
+    return seed % 2**32
   if isinstance(seed, list):
-    return [s + 2**32 if s < 0 else s for s in seed]
+    return [s % 2**32 for s in seed]
   if isinstance(seed, np.ndarray):
-    return np.array([s + 2**32 if s < 0 else s for s in seed.tolist()])
+    return np.array([s % 2**32 for s in seed.tolist()])
 
 
 def _fold_in(seed: SeedType, data: Any) -> List[Union[SeedType, Any]]:

diff --git a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py
@@ -173,7 +173,7 @@ def use_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.123744
+    return 0.123757
 
   @property
   def test_target_value(self) -> float:
@@ -191,23 +191,23 @@ def use_resnet(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.124027
+    return 0.12415
 
   @property
   def test_target_value(self) -> float:
-    return 0.126468
+    return 0.12648
 
 
 class Criteo1TbDlrmSmallEmbedInitWorkload(Criteo1TbDlrmSmallWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 0.124286
+    return 0.129657
 
   @property
   def test_target_value(self) -> float:
     # Todo
-    return 0.126725
+    return 0.131967
 
   @property
   def embedding_init_multiplier(self) -> float:

diff --git a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py
@@ -254,7 +254,7 @@ def use_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.123744
+    return 0.123757
 
   @property
   def test_target_value(self) -> float:
@@ -272,23 +272,23 @@ def use_resnet(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.124027
+    return 0.12415
 
   @property
   def test_target_value(self) -> float:
-    return 0.126468
+    return 0.12648
 
 
 class Criteo1TbDlrmSmallEmbedInitWorkload(Criteo1TbDlrmSmallWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 0.124286
+    return 0.129657
 
   @property
   def test_target_value(self) -> float:
     # Todo
-    return 0.126725
+    return 0.131967
 
   @property
   def embedding_init_multiplier(self) -> float:

diff --git a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py
@@ -272,11 +272,11 @@ def use_silu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22009
+    return 0.75445
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3426
+    return 0.6323
 
 
 class ImagenetResNetGELUWorkload(ImagenetResNetWorkload):
@@ -287,11 +287,11 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22077
+    return 0.76765
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3402
+    return 0.6519
 
 
 class ImagenetResNetLargeBNScaleWorkload(ImagenetResNetWorkload):
@@ -302,8 +302,8 @@ def bn_init_scale(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.23474
+    return 0.76526
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3577
+    return 0.6423
diff --git a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py
@@ -326,11 +326,11 @@ def use_silu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22009
+    return 0.75445
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.342
+    return 0.6323
 
 
 class ImagenetResNetGELUWorkload(ImagenetResNetWorkload):
@@ -341,11 +341,11 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22077
+    return 0.76765
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3402
+    return 0.6519
 
 
 class ImagenetResNetLargeBNScaleWorkload(ImagenetResNetWorkload):
@@ -356,8 +356,8 @@ def bn_init_scale(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.23474
+    return 0.76526
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3577
+    return 0.6423
diff --git a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py
@@ -99,11 +99,11 @@ def use_glu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.2233
+    return 0.75738
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3455
+    return 0.6359
 
 
 class ImagenetVitPostLNWorkload(ImagenetVitWorkload):
@@ -114,11 +114,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.24688
+    return 0.75312
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3714
+    return 0.6286
 
 
 class ImagenetVitMapWorkload(ImagenetVitWorkload):
@@ -129,8 +129,8 @@ def use_map(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22886
+    return 0.77113
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3477
+    return 0.6523
diff --git a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py
@@ -90,11 +90,11 @@ def use_glu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.2233
+    return 0.75738
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3455
+    return 0.6359
 
 
 class ImagenetVitPostLNWorkload(ImagenetVitWorkload):
@@ -105,11 +105,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.24688
+    return 0.75312
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3714
+    return 0.6286
 
 
 class ImagenetVitMapWorkload(ImagenetVitWorkload):
@@ -120,8 +120,8 @@ def use_map(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22886
+    return 0.77113
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3477
+    return 0.6523
diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py
@@ -161,7 +161,7 @@ def _build_input_queue(
             batch_size=global_batch_size,
             shuffle=train,
             sampler=None,
-            num_workers=4 if train else self.eval_num_workers,
+            num_workers=4,
             prefetch_factor=10,
             pin_memory=False,
             drop_last=train,
@@ -388,11 +388,11 @@ def attention_temperature(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.082665
+    return 0.109977
 
   @property
   def test_target_value(self) -> float:
-    return 0.50168
+    return 0.068065
 
 
 class LibriSpeechConformerLayerNormWorkload(LibriSpeechConformerWorkload):
@@ -403,11 +403,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.085371
+    return 0.09731
 
   @property
   def test_target_value(self) -> float:
-    return 0.053096
+    return 0.05996
 
 
 class LibriSpeechConformerGeluWorkload(LibriSpeechConformerWorkload):
@@ -418,8 +418,8 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.077958
+    return 0.094114
 
   @property
   def test_target_value(self) -> float:
-    return 0.047643
+    return 0.056629