Merge pull request #716 from mlcommons/variants_target_setting_clean

Add configs for variants target setting.
mlcommons · Mar 22, 2024 · 231fc5b · 231fc5b
2 parents d0ed25a + dcc14d7
commit 231fc5b
Show file tree

Hide file tree

Showing 39 changed files with 1,234 additions and 32 deletions.
diff --git a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py
@@ -272,11 +272,11 @@ def use_silu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.22009
+    return 1 - 0.22009
 
   @property
   def test_target_value(self) -> float:
-    return 0.3426
+    return 1 - 0.3426
 
 
 class ImagenetResNetGELUWorkload(ImagenetResNetWorkload):
@@ -287,11 +287,11 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.22077
+    return 1 - 0.22077
 
   @property
   def test_target_value(self) -> float:
-    return 0.3402
+    return 1 - 0.3402
 
 
 class ImagenetResNetLargeBNScaleWorkload(ImagenetResNetWorkload):
@@ -302,8 +302,8 @@ def bn_init_scale(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.23474
+    return 1 - 0.23474
 
   @property
   def test_target_value(self) -> float:
-    return 0.3577
+    return 1 - 0.3577
diff --git a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py
@@ -326,11 +326,11 @@ def use_silu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.22009
+    return 1 - 0.22009
 
   @property
   def test_target_value(self) -> float:
-    return 0.342
+    return 1 - 0.342
 
 
 class ImagenetResNetGELUWorkload(ImagenetResNetWorkload):
@@ -341,11 +341,11 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.22077
+    return 1 - 0.22077
 
   @property
   def test_target_value(self) -> float:
-    return 0.3402
+    return 1 - 0.3402
 
 
 class ImagenetResNetLargeBNScaleWorkload(ImagenetResNetWorkload):
@@ -356,8 +356,8 @@ def bn_init_scale(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.23474
+    return 1 - 0.23474
 
   @property
   def test_target_value(self) -> float:
-    return 0.3577
+    return 1 - 0.3577
diff --git a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py
@@ -99,11 +99,11 @@ def use_glu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.2233
+    return 1 - 0.2233
 
   @property
   def test_target_value(self) -> float:
-    return 0.3455
+    return 1 - 0.3455
 
 
 class ImagenetVitPostLNWorkload(ImagenetVitWorkload):
@@ -114,11 +114,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.24688
+    return 1 - 0.24688
 
   @property
   def test_target_value(self) -> float:
-    return 0.3714
+    return 1 - 0.3714
 
 
 class ImagenetVitMapWorkload(ImagenetVitWorkload):
@@ -129,8 +129,8 @@ def use_map(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.22886
+    return 1 - 0.22886
 
   @property
   def test_target_value(self) -> float:
-    return 0.3477
+    return 1 - 0.3477
diff --git a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py
@@ -90,11 +90,11 @@ def use_glu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.2233
+    return 1 - 0.2233
 
   @property
   def test_target_value(self) -> float:
-    return 0.3455
+    return 1 - 0.3455
 
 
 class ImagenetVitPostLNWorkload(ImagenetVitWorkload):
@@ -105,11 +105,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.24688
+    return 1 - 0.24688
 
   @property
   def test_target_value(self) -> float:
-    return 0.3714
+    return 1 - 0.3714
 
 
 class ImagenetVitMapWorkload(ImagenetVitWorkload):
@@ -120,8 +120,8 @@ def use_map(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.22886
+    return 1 - 0.22886
 
   @property
   def test_target_value(self) -> float:
-    return 0.3477
+    return 1 - 0.3477
diff --git a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py
@@ -107,13 +107,29 @@ class LibriSpeechDeepSpeechTanhWorkload(LibriSpeechDeepSpeechWorkload):
   def use_tanh(self) -> bool:
     return True
 
+  @property
+  def validation_target_value(self) -> float:
+    return 0.133449
+
+  @property
+  def test_target_value(self) -> float:
+    return 0.079810
+
 
 class LibriSpeechDeepSpeechNoResNetWorkload(LibriSpeechDeepSpeechWorkload):
 
   @property
   def enable_residual_connections(self) -> bool:
     return False
 
+  @property
+  def validation_target_value(self) -> float:
+    return 0.105042
+
+  @property
+  def test_target_value(self) -> float:
+    return 0.060388
+
 
 class LibriSpeechDeepSpeechNormAndSpecAugWorkload(LibriSpeechDeepSpeechWorkload
                                                  ):
@@ -137,3 +153,11 @@ def freq_mask_count(self) -> int:
   @property
   def time_mask_count(self) -> int:
     return 15
+
+  @property
+  def validation_target_value(self) -> float:
+    return 0.131553
+
+  @property
+  def test_target_value(self) -> float:
+    return 0.082442
diff --git a/algorithmic_efficiency/workloads/workloads.py b/algorithmic_efficiency/workloads/workloads.py
@@ -78,7 +78,7 @@
     },
     'imagenet_vit_map': {
         'workload_path': 'imagenet_vit/imagenet',
-        'workload_class_name': 'ImagenetVitMapLNWorkload',
+        'workload_class_name': 'ImagenetVitMapWorkload',
     },
     'librispeech_conformer': {
         'workload_path': 'librispeech_conformer/librispeech',

diff --git a/reference_algorithms/target_setting_algorithms/criteo1tb_embed_init/tuning_search_space.json b/reference_algorithms/target_setting_algorithms/criteo1tb_embed_init/tuning_search_space.json
@@ -0,0 +1,28 @@
+{
+    "learning_rate": {
+        "feasible_points": [
+            0.002517072211464665
+        ]
+    },
+    "beta1": {
+        "feasible_points": [
+            0.9908351643533544
+        ]
+    },
+    "beta2": {
+        "feasible_points": [
+            0.9859568907533993
+        ]
+    },
+    "warmup_steps": {
+        "feasible_points": [
+            799
+        ]
+    },
+    "weight_decay": {
+        "feasible_points": [
+            0.12274552870237089
+        ]
+    }
+  }
+
diff --git a/reference_algorithms/target_setting_algorithms/criteo1tb_layernorm/tuning_search_space.json b/reference_algorithms/target_setting_algorithms/criteo1tb_layernorm/tuning_search_space.json
@@ -0,0 +1,28 @@
+{
+    "learning_rate": {
+        "feasible_points": [
+            0.05493199486120455
+        ]
+    },
+    "beta1": {
+        "feasible_points": [
+            0.954922991734919
+        ]
+    },
+    "beta2": {
+        "feasible_points": [
+            0.9986188074995163
+        ]
+    },
+    "warmup_steps": {
+        "feasible_points": [
+            799
+        ]
+    },
+    "weight_decay": {
+        "feasible_points": [
+            0.00011065469792077193
+        ]
+    }
+  }
+
diff --git a/reference_algorithms/target_setting_algorithms/criteo1tb_resnet/tuning_search_space.json b/reference_algorithms/target_setting_algorithms/criteo1tb_resnet/tuning_search_space.json
@@ -0,0 +1,28 @@
+{
+    "learning_rate": {
+        "feasible_points": [
+            0.001493629901423942
+        ]
+    },
+    "beta1": {
+        "feasible_points": [
+            0.9592129978682067
+        ]
+    },
+    "beta2": {
+        "feasible_points": [
+            0.9824918272399145
+        ]
+    },
+    "warmup_steps": {
+        "feasible_points": [
+            399
+        ]
+    },
+    "weight_decay": {
+        "feasible_points": [
+            0.00038587516415285595
+        ]
+    }
+  }
+
diff --git a/reference_algorithms/target_setting_algorithms/fastmri_layernorm/tuning_search_space.json b/reference_algorithms/target_setting_algorithms/fastmri_layernorm/tuning_search_space.json
@@ -0,0 +1,27 @@
+{
+    "learning_rate": {
+        "feasible_points": [
+            0.008334676559764446
+        ]
+    },
+    "beta1": {
+        "feasible_points": [
+            0.8294338711079317
+        ]
+    },
+    "beta2": {
+        "feasible_points": [
+            0.8551723332825868
+        ]
+    },
+    "warmup_steps": {
+        "feasible_points": [
+            2714
+        ]
+    },
+    "weight_decay": {
+        "feasible_points": [
+            0.01371235755699044
+        ]
+    }
+}
diff --git a/reference_algorithms/target_setting_algorithms/fastmri_model_size/tuning_search_space.json b/reference_algorithms/target_setting_algorithms/fastmri_model_size/tuning_search_space.json
@@ -0,0 +1,27 @@
+{
+    "learning_rate": {
+        "feasible_points": [
+            0.006173154695175443
+        ]
+    },
+    "beta1": {
+        "feasible_points": [
+            0.8496694604806512
+        ]
+    },
+    "beta2": {
+        "feasible_points": [
+            0.4639437428687345
+        ]
+    },
+    "warmup_steps": {
+        "feasible_points": [
+            1357
+        ]
+    },
+    "weight_decay": {
+        "feasible_points": [
+            0.1679001017957879
+        ]
+    }
+}
diff --git a/reference_algorithms/target_setting_algorithms/fastmri_tanh/tuning_search_space.json b/reference_algorithms/target_setting_algorithms/fastmri_tanh/tuning_search_space.json
@@ -0,0 +1,27 @@
+{
+    "learning_rate": {
+        "feasible_points": [
+            0.04037951750205473
+        ]
+    },
+    "beta1": {
+        "feasible_points": [
+            0.9932215932637941
+        ]
+    },
+    "beta2": {
+        "feasible_points": [
+            0.9425306939334134
+        ]
+    },
+    "warmup_steps": {
+        "feasible_points": [
+            542
+        ]
+    },
+    "weight_decay": {
+        "feasible_points": [
+            0.14877061239151607
+        ]
+    }
+}