From ad8f72b3f65031c6a4f5d423c6490f4cf9f12dee Mon Sep 17 00:00:00 2001
From: Juhan Bae <pomonam15@gmail.com>
Date: Tue, 19 Mar 2024 13:35:05 -0400
Subject: [PATCH 01/17] Initial Commit

---
 .../librispeech_pytorch/models.py                      | 10 +++++-----
 submission_runner.py                                   |  1 +
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py
index fe3a1e179..1476fd361 100644
--- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py
+++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py
@@ -93,7 +93,7 @@ def __init__(self,
         out_features=self.encoder_dim,
         bias=True)
     self.pos_encode = AddPositionalEmbedding(embedding_dim=self.encoder_dim)
-    self.dropout = nn.Dropout(p=self.input_dropout_rate)
+    self.dropout = nn.Dropout(p=self.input_dropout_rate, inplace=True)
 
   def forward(self, inputs, input_paddings):
     output_paddings = input_paddings
@@ -195,7 +195,7 @@ def __init__(self, config: ConformerConfig):
         in_features=config.encoder_dim,
         out_features=config.encoder_dim * config.feed_forward_expansion_factor,
         bias=True)
-    self.dropout1 = nn.Dropout(p=config.feed_forward_dropout_rate)
+    self.dropout1 = nn.Dropout(p=config.feed_forward_dropout_rate, inplace=True)
     self.linear2 = nn.Linear(
         in_features=config.encoder_dim * config.feed_forward_expansion_factor,
         out_features=config.encoder_dim,
@@ -206,7 +206,7 @@ def __init__(self, config: ConformerConfig):
     else:
       feed_forward_residual_dropout_rate = (
           config.feed_forward_residual_dropout_rate)
-    self.dropout2 = nn.Dropout(p=feed_forward_residual_dropout_rate)
+    self.dropout2 = nn.Dropout(p=feed_forward_residual_dropout_rate, inplace=True)
 
   def forward(self, inputs, padding_mask):
     inputs = self.ln(inputs)
@@ -316,7 +316,7 @@ def __init__(self, config: ConformerConfig):
       attention_residual_dropout_rate = 0.1
     else:
       attention_residual_dropout_rate = config.attention_residual_dropout_rate
-    self.dropout = nn.Dropout(p=attention_residual_dropout_rate)
+    self.dropout = nn.Dropout(p=attention_residual_dropout_rate, inplace=True)
 
   def forward(self, outputs, paddings):
     outputs = self.ln(outputs)
@@ -407,7 +407,7 @@ def __init__(self, config):
       conv_residual_dropout_rate = 0.0
     else:
       conv_residual_dropout_rate = config.conv_residual_dropout_rate
-    self.dropout = nn.Dropout(p=conv_residual_dropout_rate)
+    self.dropout = nn.Dropout(p=conv_residual_dropout_rate, inplace=True)
 
   def forward(self, inputs, input_paddings):
     inputs = self.ln(inputs)
diff --git a/submission_runner.py b/submission_runner.py
index ff290079b..7c8d7fb53 100644
--- a/submission_runner.py
+++ b/submission_runner.py
@@ -203,6 +203,7 @@ def train_once(
     log_dir: Optional[str] = None,
     save_checkpoints: Optional[bool] = True
 ) -> Tuple[spec.Timing, Dict[str, Any]]:
+  _reset_cuda_mem()
   data_rng, opt_init_rng, model_init_rng, rng = prng.split(rng, 4)
 
   # Workload setup.

From de238bcbd7831b29886517e868ecacd3540babf9 Mon Sep 17 00:00:00 2001
From: Juhan Bae <pomonam15@gmail.com>
Date: Tue, 19 Mar 2024 13:41:05 -0400
Subject: [PATCH 02/17] Lint fix

---
 .../librispeech_conformer/librispeech_pytorch/models.py        | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py
index 1476fd361..b3f1eeaad 100644
--- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py
+++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py
@@ -206,7 +206,8 @@ def __init__(self, config: ConformerConfig):
     else:
       feed_forward_residual_dropout_rate = (
           config.feed_forward_residual_dropout_rate)
-    self.dropout2 = nn.Dropout(p=feed_forward_residual_dropout_rate, inplace=True)
+    self.dropout2 = nn.Dropout(p=feed_forward_residual_dropout_rate,
+                               inplace=True)
 
   def forward(self, inputs, padding_mask):
     inputs = self.ln(inputs)

From f208dd2dc13d98619633b3144e7300fd74060461 Mon Sep 17 00:00:00 2001
From: Juhan Bae <pomonam15@gmail.com>
Date: Tue, 19 Mar 2024 13:56:49 -0400
Subject: [PATCH 03/17] Lint fix

---
 .../librispeech_conformer/librispeech_pytorch/models.py     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py
index b3f1eeaad..90a12b779 100644
--- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py
+++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py
@@ -206,9 +206,9 @@ def __init__(self, config: ConformerConfig):
     else:
       feed_forward_residual_dropout_rate = (
           config.feed_forward_residual_dropout_rate)
-    self.dropout2 = nn.Dropout(p=feed_forward_residual_dropout_rate,
-                               inplace=True)
-
+    self.dropout2 = nn.Dropout(
+        p=feed_forward_residual_dropout_rate, inplace=True)
+  
   def forward(self, inputs, padding_mask):
     inputs = self.ln(inputs)
     inputs = self.linear1(inputs)

From 135c56adcf1d6d9fbec3a84e2942352ec13fb222 Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Tue, 26 Mar 2024 17:28:50 +0000
Subject: [PATCH 04/17] add warning to flag

---
 submission_runner.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/submission_runner.py b/submission_runner.py
index 2945e3fd0..e9a3f7dba 100644
--- a/submission_runner.py
+++ b/submission_runner.py
@@ -156,7 +156,9 @@
                      'If true, set pytorch max_split_size_mb to 256')
 flags.DEFINE_integer('pytorch_eval_num_workers',
                      0,
-                     'Number of workers for PyTorch evaluation data loaders.')
+                     'Number of workers for PyTorch evaluation data loaders.'
+                     'WARNING: there is an known bug that results in wrong'
+                     'evals when the number of workers is not equal to 0.')
 FLAGS = flags.FLAGS
 USE_PYTORCH_DDP, RANK, DEVICE, N_GPUS = pytorch_setup()
 

From 4fabc1474237d110510b76d259b323ba662f4b2f Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Thu, 28 Mar 2024 01:20:39 +0000
Subject: [PATCH 05/17] add variant targets

---
 .../criteo1tb/criteo1tb_jax/workload.py       | 10 ++--
 .../criteo1tb/criteo1tb_pytorch/workload.py   | 10 ++--
 .../imagenet_resnet/imagenet_jax/workload.py  | 12 ++---
 .../imagenet_pytorch/workload.py              | 12 ++---
 .../imagenet_vit/imagenet_jax/workload.py     | 12 ++---
 .../imagenet_vit/imagenet_pytorch/workload.py | 12 ++---
 .../workloads/wmt/wmt_jax/workload.py         | 12 ++---
 .../workloads/wmt/wmt_pytorch/workload.py     | 12 ++---
 scoring/score_submissions.py                  | 52 ++++++++++++++-----
 scoring/scoring_utils.py                      | 17 ++++--
 10 files changed, 97 insertions(+), 64 deletions(-)

diff --git a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py
index 84a0a7416..f6945c021 100644
--- a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py
+++ b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py
@@ -173,7 +173,7 @@ def use_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.123744
+    return 0.1237562372
 
   @property
   def test_target_value(self) -> float:
@@ -191,23 +191,23 @@ def use_resnet(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.124027
+    return 0.1241490923
 
   @property
   def test_target_value(self) -> float:
-    return 0.126468
+    return 0.1264799502
 
 
 class Criteo1TbDlrmSmallEmbedInitWorkload(Criteo1TbDlrmSmallWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 0.124286
+    return 0.129656005
 
   @property
   def test_target_value(self) -> float:
     # Todo
-    return 0.126725
+    return 0.1319666458
 
   @property
   def embedding_init_multiplier(self) -> float:
diff --git a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py
index c63ac3f7b..434ca7f50 100644
--- a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py
@@ -254,7 +254,7 @@ def use_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.123744
+    return 0.1237562372
 
   @property
   def test_target_value(self) -> float:
@@ -272,23 +272,23 @@ def use_resnet(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.124027
+    return 0.1241490923
 
   @property
   def test_target_value(self) -> float:
-    return 0.126468
+    return 0.1264799502
 
 
 class Criteo1TbDlrmSmallEmbedInitWorkload(Criteo1TbDlrmSmallWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 0.124286
+    return 0.129656005
 
   @property
   def test_target_value(self) -> float:
     # Todo
-    return 0.126725
+    return 0.1319666458
 
   @property
   def embedding_init_multiplier(self) -> float:
diff --git a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py
index e4810e142..a3506b4fd 100644
--- a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py
+++ b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py
@@ -272,11 +272,11 @@ def use_silu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22009
+    return 0.7544599771
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3426
+    return 0.6323000193
 
 
 class ImagenetResNetGELUWorkload(ImagenetResNetWorkload):
@@ -287,11 +287,11 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22077
+    return 0.7676599622
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3402
+    return 0.651900053
 
 
 class ImagenetResNetLargeBNScaleWorkload(ImagenetResNetWorkload):
@@ -302,8 +302,8 @@ def bn_init_scale(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.23474
+    return 0.76526
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3577
+    return 0.6423
diff --git a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py
index 5c7c6c7d2..089caf5cb 100644
--- a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py
@@ -326,11 +326,11 @@ def use_silu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22009
+    return 0.7544599771
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.342
+    return 0.6323000193
 
 
 class ImagenetResNetGELUWorkload(ImagenetResNetWorkload):
@@ -341,11 +341,11 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22077
+    return 0.7676599622
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3402
+    return 0.651900053
 
 
 class ImagenetResNetLargeBNScaleWorkload(ImagenetResNetWorkload):
@@ -356,8 +356,8 @@ def bn_init_scale(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.23474
+    return 0.76526
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3577
+    return 0.6423
diff --git a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py
index a54ee9b5e..0cd60251e 100644
--- a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py
+++ b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py
@@ -99,11 +99,11 @@ def use_glu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.2233
+    return 0.7573800087
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3455
+    return 0.6359000206
 
 
 class ImagenetVitPostLNWorkload(ImagenetVitWorkload):
@@ -114,11 +114,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.24688
+    return 0.75312
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3714
+    return 0.6286
 
 
 class ImagenetVitMapWorkload(ImagenetVitWorkload):
@@ -129,8 +129,8 @@ def use_map(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22886
+    return 0.77114
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3477
+    return 0.6523
diff --git a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py
index 51c79b2d0..50233b0b0 100644
--- a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py
@@ -90,11 +90,11 @@ def use_glu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.2233
+    return 0.7573800087
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3455
+    return 0.6359000206
 
 
 class ImagenetVitPostLNWorkload(ImagenetVitWorkload):
@@ -105,11 +105,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.24688
+    return 0.75312
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3714
+    return 0.6286
 
 
 class ImagenetVitMapWorkload(ImagenetVitWorkload):
@@ -120,8 +120,8 @@ def use_map(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22886
+    return 0.77114
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3477
+    return 0.6523
diff --git a/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py b/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py
index b10d4056d..c7da35b11 100644
--- a/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py
+++ b/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py
@@ -299,7 +299,7 @@ class WmtWorkloadPostLN(WmtWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 30.2003
+    return 30.07797237
 
   @property
   def test_target_value(self) -> float:
@@ -315,15 +315,15 @@ class WmtWorkloadAttentionTemp(WmtWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 30.0756
+    return 29.86119393
 
   @property
   def test_target_value(self) -> float:
-    return 29.8094
+    return 29.41438511
 
   @property
   def attention_temp(self) -> float:
-    return 4.0
+    return 1.6
 
 
 class WmtWorkloadGLUTanH(WmtWorkload):
@@ -331,11 +331,11 @@ class WmtWorkloadGLUTanH(WmtWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 30.0002
+    return 29.65174349
 
   @property
   def test_target_value(self) -> float:
-    return 29.8139
+    return 29.05153769
 
   @property
   def activation(self) -> str:
diff --git a/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py b/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py
index 9f6d817f4..dd7893be3 100644
--- a/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py
@@ -355,7 +355,7 @@ class WmtWorkloadPostLN(WmtWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 30.2003
+    return 30.07797237
 
   @property
   def test_target_value(self) -> float:
@@ -371,15 +371,15 @@ class WmtWorkloadAttentionTemp(WmtWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 30.0756
+    return 29.86119393
 
   @property
   def test_target_value(self) -> float:
-    return 29.8094
+    return 29.41438511
 
   @property
   def attention_temp(self) -> float:
-    return 4.0
+    return 1.6
 
 
 class WmtWorkloadGLUTanH(WmtWorkload):
@@ -387,11 +387,11 @@ class WmtWorkloadGLUTanH(WmtWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 30.0002
+    return 29.65174349
 
   @property
   def test_target_value(self) -> float:
-    return 29.8139
+    return 29.05153769
 
   @property
   def activation(self) -> str:
diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py
index 48777c69e..891f04e78 100644
--- a/scoring/score_submissions.py
+++ b/scoring/score_submissions.py
@@ -48,8 +48,9 @@
 FLAGS = flags.FLAGS
 
 
-def get_summary_df(workload, workload_df):
-  validation_metric, validation_target = scoring_utils.get_workload_validation_target(workload)
+def get_summary_df(workload, workload_df, include_test_split=False):
+  validation_metric, validation_target = scoring_utils.get_workload_metrics_and_targets(workload, split='validation')
+
   is_minimized = performance_profile.check_if_minimized(validation_metric)
   target_op = operator.le if is_minimized else operator.ge
   best_op = min if is_minimized else max
@@ -58,32 +59,52 @@ def get_summary_df(workload, workload_df):
   summary_df = pd.DataFrame()
   summary_df['workload'] = workload_df['workload']
   summary_df['trial'] = workload_df['trial'].apply(lambda x: x[0])
-  summary_df['target metric name'] = validation_metric
-  summary_df['target metric value'] = validation_target
+  summary_df['val target metric name'] = validation_metric
+  summary_df['val target metric value'] = validation_target
 
-  summary_df['target reached'] = workload_df[validation_metric].apply(
+  summary_df['val target reached'] = workload_df[validation_metric].apply(
       lambda x: target_op(x, validation_target)).apply(np.any)
-  summary_df['best metric value'] = workload_df[validation_metric].apply(
+  summary_df['best metric value on val'] = workload_df[validation_metric].apply(
       lambda x: best_op(x))
-  workload_df['index best eval'] = workload_df[validation_metric].apply(
+  workload_df['index best eval on val'] = workload_df[validation_metric].apply(
       lambda x: idx_op(x))
-  summary_df['time to best eval (s)'] = workload_df.apply(
-      lambda x: x['accumulated_submission_time'][x['index best eval']], axis=1)
-  summary_df['time to target (s)'] = summary_df.apply(
-      lambda x: x['time to best eval (s)'] if x['target reached'] else np.inf,
+  summary_df['time to best eval on val (s)'] = workload_df.apply(
+      lambda x: x['accumulated_submission_time'][x['index best eval on val']], axis=1)
+  summary_df['time to target on val (s)'] = summary_df.apply(
+      lambda x: x['time to best eval on val (s)'] if x['val target reached'] else np.inf,
       axis=1)
 
+  # test metrics
+  if include_test_split:
+    test_metric, test_target = scoring_utils.get_workload_metrics_and_targets(workload, split='test')
+
+    summary_df['test target metric name'] = test_metric
+    summary_df['test target metric value'] = test_target
+
+    summary_df['test target reached'] = workload_df[test_metric].apply(
+        lambda x: target_op(x, test_target)).apply(np.any)
+    summary_df['best metric value on test'] = workload_df[test_metric].apply(
+        lambda x: best_op(x))
+    workload_df['index best eval on test'] = workload_df[test_metric].apply(
+        lambda x: idx_op(x))
+    summary_df['time to best eval on test (s)'] = workload_df.apply(
+        lambda x: x['accumulated_submission_time'][x['index best eval on test']], axis=1)
+    summary_df['time to target on test (s)'] = summary_df.apply(
+        lambda x: x['time to best eval on test (s)'] if x['test target reached'] else np.inf,
+        axis=1)
+
   return summary_df
 
 
-def print_submission_summary(df):
+def print_submission_summary(df, include_test_split=True):
   dfs = []
   for workload, group in df.groupby('workload'):
-    summary_df = get_summary_df(workload, group)
+    summary_df = get_summary_df(workload, group, include_test_split=include_test_split)
     dfs.append(summary_df)
 
   df = pd.concat(dfs)
   logging.info('\n' + tabulate(df, headers='keys', tablefmt='psql'))
+  return df
 
 
 def main(_):
@@ -93,7 +114,10 @@ def main(_):
     experiment_path = os.path.join(FLAGS.submission_directory, submission)
     df = scoring_utils.get_experiment_df(experiment_path)
     results[submission] = df
-    print_submission_summary(df)
+    summary_df = print_submission_summary(df)
+    with open(os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'),
+              'w') as fout:
+      summary_df.to_csv(fout)
 
   if not FLAGS.strict:
     logging.warning(
diff --git a/scoring/scoring_utils.py b/scoring/scoring_utils.py
index 722b197a4..4a62db362 100644
--- a/scoring/scoring_utils.py
+++ b/scoring/scoring_utils.py
@@ -174,6 +174,11 @@ def get_experiment_df(experiment_dir):
     study_dirs = os.listdir(experiment_dir)
     for study_dir in study_dirs:
       workload_dirs = os.listdir(os.path.join(experiment_dir, study_dir))
+      workload_dirs = [
+          w for w in workload_dirs
+          if os.path.isdir(os.path.join(experiment_dir, study_dir, w))
+      ]
+      print(workload_dirs)
       for workload in workload_dirs:
         data = {
             'workload': workload,
@@ -208,7 +213,7 @@ def get_experiment_df(experiment_dir):
 
 
 ## Get workload properties
-def get_workload_validation_target(workload):
+def get_workload_metrics_and_targets(workload, split='validation'):
   """Returns workload target metric name and value."""
   workload_name = re.match(WORKLOAD_NAME_PATTERN, workload).group(1)
   framework = re.match(WORKLOAD_NAME_PATTERN, workload).group(2)
@@ -225,6 +230,10 @@ def get_workload_validation_target(workload):
       workload_class_name=workload_metadata['workload_class_name'],
       workload_init_kwargs=workload_init_kwargs)
   metric_name = workload_obj.target_metric_name
-  validation_metric = f'validation/{metric_name}'
-  validation_target = workload_obj.validation_target_value
-  return validation_metric, validation_target
+  if split=='validation':
+    metric = f'validation/{metric_name}'
+    target = workload_obj.validation_target_value
+  elif split=='test':
+    metric = f'test/{metric_name}'
+    target = workload_obj.test_target_value
+  return metric,target

From e17d6041c1b620f366657f9ad24e8c06f9c43949 Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Thu, 28 Mar 2024 01:36:57 +0000
Subject: [PATCH 06/17] variants

---
 .../librispeech_jax/workload.py               | 12 +++++-----
 .../librispeech_pytorch/workload.py           | 12 +++++-----
 .../librispeech_jax/workload.py               | 12 +++++-----
 .../librispeech_pytorch/workload.py           | 24 +++++++++++++++++++
 4 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py
index a991b07ab..1b46b1841 100644
--- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py
+++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py
@@ -388,11 +388,11 @@ def attention_temperature(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.082665
+    return 0.107585
 
   @property
   def test_target_value(self) -> float:
-    return 0.50168
+    return 0.066145
 
 
 class LibriSpeechConformerLayerNormWorkload(LibriSpeechConformerWorkload):
@@ -403,11 +403,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.085371
+    return 0.094943
 
   @property
   def test_target_value(self) -> float:
-    return 0.053096
+    return 0.057181
 
 
 class LibriSpeechConformerGeluWorkload(LibriSpeechConformerWorkload):
@@ -418,8 +418,8 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.077958
+    return 0.084047
 
   @property
   def test_target_value(self) -> float:
-    return 0.047643
+    return 0.050733
diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py
index 9e09e387f..5b144de33 100644
--- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py
@@ -354,11 +354,11 @@ def attention_temperature(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.082665
+    return 0.107585
 
   @property
   def test_target_value(self) -> float:
-    return 0.050168
+    return 0.066145
 
 
 class LibriSpeechConformerLayerNormWorkload(LibriSpeechConformerWorkload):
@@ -369,11 +369,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.085371
+    return 0.094943
 
   @property
   def test_target_value(self) -> float:
-    return 0.053096
+    return 0.057181
 
 
 class LibriSpeechConformerGeluWorkload(LibriSpeechConformerWorkload):
@@ -384,8 +384,8 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.077958
+    return 0.084047
 
   @property
   def test_target_value(self) -> float:
-    return 0.047643
+    return 0.050733
diff --git a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py
index 4489c0402..79f1f502a 100644
--- a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py
+++ b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py
@@ -109,11 +109,11 @@ def use_tanh(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.133449
+    return 0.140084
 
   @property
   def test_target_value(self) -> float:
-    return 0.079810
+    return 0.089249
 
 
 class LibriSpeechDeepSpeechNoResNetWorkload(LibriSpeechDeepSpeechWorkload):
@@ -124,11 +124,11 @@ def enable_residual_connections(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.105042
+    return 0.122745
 
   @property
   def test_target_value(self) -> float:
-    return 0.060388
+    return 0.073837
 
 
 class LibriSpeechDeepSpeechNormAndSpecAugWorkload(LibriSpeechDeepSpeechWorkload
@@ -156,8 +156,8 @@ def time_mask_count(self) -> int:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.131553
+    return 0.137877
 
   @property
   def test_target_value(self) -> float:
-    return 0.082442
+    return 0.088675
diff --git a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py
index 23d533aa1..55a5773aa 100644
--- a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py
@@ -114,6 +114,14 @@ class LibriSpeechDeepSpeechTanhWorkload(LibriSpeechDeepSpeechWorkload):
   def use_tanh(self) -> bool:
     return True
 
+  @property
+  def validation_target_value(self) -> float:
+    return 0.140084
+
+  @property
+  def test_target_value(self) -> float:
+    return 0.089249
+
 
 class LibriSpeechDeepSpeechNoResNetWorkload(LibriSpeechDeepSpeechWorkload):
 
@@ -121,6 +129,14 @@ class LibriSpeechDeepSpeechNoResNetWorkload(LibriSpeechDeepSpeechWorkload):
   def enable_residual_connections(self) -> bool:
     return False
 
+  @property
+  def validation_target_value(self) -> float:
+    return 0.122745
+
+  @property
+  def test_target_value(self) -> float:
+    return 0.073837
+
 
 class LibriSpeechDeepSpeechNormAndSpecAugWorkload(LibriSpeechDeepSpeechWorkload
                                                  ):
@@ -144,3 +160,11 @@ def freq_mask_count(self) -> int:
   @property
   def time_mask_count(self) -> int:
     return 15
+
+  @property
+  def validation_target_value(self) -> float:
+    return 0.137877
+
+  @property
+  def test_target_value(self) -> float:
+    return 0.088675

From 4eacf68516910ad2e49a22b429408076ff4234e1 Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Thu, 28 Mar 2024 01:50:16 +0000
Subject: [PATCH 07/17] formatting

---
 scoring/score_submissions.py | 16 +++++++++++-----
 scoring/scoring_utils.py     |  6 +++---
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py
index 891f04e78..0b768855e 100644
--- a/scoring/score_submissions.py
+++ b/scoring/score_submissions.py
@@ -69,9 +69,11 @@ def get_summary_df(workload, workload_df, include_test_split=False):
   workload_df['index best eval on val'] = workload_df[validation_metric].apply(
       lambda x: idx_op(x))
   summary_df['time to best eval on val (s)'] = workload_df.apply(
-      lambda x: x['accumulated_submission_time'][x['index best eval on val']], axis=1)
+      lambda x: x['accumulated_submission_time'][x['index best eval on val']],
+      axis=1)
   summary_df['time to target on val (s)'] = summary_df.apply(
-      lambda x: x['time to best eval on val (s)'] if x['val target reached'] else np.inf,
+      lambda x: x['time to best eval on val (s)']
+      if x['val target reached'] else np.inf,
       axis=1)
 
   # test metrics
@@ -88,9 +90,12 @@ def get_summary_df(workload, workload_df, include_test_split=False):
     workload_df['index best eval on test'] = workload_df[test_metric].apply(
         lambda x: idx_op(x))
     summary_df['time to best eval on test (s)'] = workload_df.apply(
-        lambda x: x['accumulated_submission_time'][x['index best eval on test']], axis=1)
+        lambda x: x['accumulated_submission_time'][x['index best eval on test']
+                                                  ],
+        axis=1)
     summary_df['time to target on test (s)'] = summary_df.apply(
-        lambda x: x['time to best eval on test (s)'] if x['test target reached'] else np.inf,
+        lambda x: x['time to best eval on test (s)']
+        if x['test target reached'] else np.inf,
         axis=1)
 
   return summary_df
@@ -99,7 +104,8 @@ def get_summary_df(workload, workload_df, include_test_split=False):
 def print_submission_summary(df, include_test_split=True):
   dfs = []
   for workload, group in df.groupby('workload'):
-    summary_df = get_summary_df(workload, group, include_test_split=include_test_split)
+    summary_df = get_summary_df(
+        workload, group, include_test_split=include_test_split)
     dfs.append(summary_df)
 
   df = pd.concat(dfs)
diff --git a/scoring/scoring_utils.py b/scoring/scoring_utils.py
index 4a62db362..0dd997ab9 100644
--- a/scoring/scoring_utils.py
+++ b/scoring/scoring_utils.py
@@ -230,10 +230,10 @@ def get_workload_metrics_and_targets(workload, split='validation'):
       workload_class_name=workload_metadata['workload_class_name'],
       workload_init_kwargs=workload_init_kwargs)
   metric_name = workload_obj.target_metric_name
-  if split=='validation':
+  if split == 'validation':
     metric = f'validation/{metric_name}'
     target = workload_obj.validation_target_value
-  elif split=='test':
+  elif split == 'test':
     metric = f'test/{metric_name}'
     target = workload_obj.test_target_value
-  return metric,target
+  return metric, target

From 5c4485e8fca3ebf7202d663a8ded179c6fa83f50 Mon Sep 17 00:00:00 2001
From: Alice <8447104+tfaod@users.noreply.github.com>
Date: Thu, 28 Mar 2024 15:46:23 -0400
Subject: [PATCH 08/17] [fix] random_utils.py to `_signed_to_unsigned`

When running the submission_runner on the self-tuning track, we run into this error calling `_signed_to_unsigned` from random_utils.py.

I've added a fix

```ValueError: Seed must be between 0 and 2**32 - 1
    rng = prng.PRNGKey(rng_seed)
  File "/private/home/axyang/optimization/algorithmic-efficiency-entry/algorithm
ic_efficiency/random_utils.py", line 79, in PRNGKey
    return _PRNGKey(seed)
```
---
 algorithmic_efficiency/random_utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/algorithmic_efficiency/random_utils.py b/algorithmic_efficiency/random_utils.py
index 68e9a9cfe..cf1ea6c32 100644
--- a/algorithmic_efficiency/random_utils.py
+++ b/algorithmic_efficiency/random_utils.py
@@ -26,11 +26,11 @@
 
 def _signed_to_unsigned(seed: SeedType) -> SeedType:
   if isinstance(seed, int):
-    return seed + 2**32 if seed < 0 else seed
+    return seed % 2**32
   if isinstance(seed, list):
-    return [s + 2**32 if s < 0 else s for s in seed]
+    return [s % 2**32 for s in seed]
   if isinstance(seed, np.ndarray):
-    return np.array([s + 2**32 if s < 0 else s for s in seed.tolist()])
+    return np.array([s % 2**32 for s in seed.tolist()])
 
 
 def _fold_in(seed: SeedType, data: Any) -> List[Union[SeedType, Any]]:

From d8c7edf35f72afa343e364d077e24e4d44f90e58 Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Thu, 28 Mar 2024 21:40:25 +0000
Subject: [PATCH 09/17] change conformer variant targets

---
 .../librispeech_jax/workload.py                     | 12 ++++++------
 .../librispeech_pytorch/workload.py                 | 13 +++++++------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py
index 1b46b1841..b579ebef9 100644
--- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py
+++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py
@@ -388,11 +388,11 @@ def attention_temperature(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.107585
+    return 0.109976153
 
   @property
   def test_target_value(self) -> float:
-    return 0.066145
+    return 0.06806410335
 
 
 class LibriSpeechConformerLayerNormWorkload(LibriSpeechConformerWorkload):
@@ -403,11 +403,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.094943
+    return 0.09730924819
 
   @property
   def test_target_value(self) -> float:
-    return 0.057181
+    return 0.05995978307
 
 
 class LibriSpeechConformerGeluWorkload(LibriSpeechConformerWorkload):
@@ -418,8 +418,8 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.084047
+    return 0.09411355803
 
   @property
   def test_target_value(self) -> float:
-    return 0.050733
+    return 0.05662868401
diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py
index 5b144de33..16a365f93 100644
--- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py
@@ -354,11 +354,11 @@ def attention_temperature(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.107585
+    return 0.109976153
 
   @property
   def test_target_value(self) -> float:
-    return 0.066145
+    return 0.06806410335
 
 
 class LibriSpeechConformerLayerNormWorkload(LibriSpeechConformerWorkload):
@@ -369,11 +369,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.094943
+    return 0.09730924819
 
   @property
   def test_target_value(self) -> float:
-    return 0.057181
+    return 0.05995978307
 
 
 class LibriSpeechConformerGeluWorkload(LibriSpeechConformerWorkload):
@@ -384,8 +384,9 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.084047
+    return 0.09411355803
 
   @property
   def test_target_value(self) -> float:
-    return 0.050733
+    return 0.05662868401
+

From fc623faa0f1ee24b8a55a01941d6e8be7413cdcf Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Thu, 28 Mar 2024 21:42:11 +0000
Subject: [PATCH 10/17] formatting

---
 .../librispeech_conformer/librispeech_pytorch/workload.py        | 1 -
 1 file changed, 1 deletion(-)

diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py
index 16a365f93..34b615b73 100644
--- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py
@@ -389,4 +389,3 @@ def validation_target_value(self) -> float:
   @property
   def test_target_value(self) -> float:
     return 0.05662868401
-

From ff8ba5d68cd1d8a408f2d03e145a207267db7769 Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Thu, 28 Mar 2024 22:53:39 +0000
Subject: [PATCH 11/17] undo wrong change

---
 algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py     | 2 +-
 algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py b/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py
index c7da35b11..8408b284f 100644
--- a/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py
+++ b/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py
@@ -323,7 +323,7 @@ def test_target_value(self) -> float:
 
   @property
   def attention_temp(self) -> float:
-    return 1.6
+    return 4.0
 
 
 class WmtWorkloadGLUTanH(WmtWorkload):
diff --git a/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py b/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py
index dd7893be3..c39b0a9df 100644
--- a/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py
@@ -379,7 +379,7 @@ def test_target_value(self) -> float:
 
   @property
   def attention_temp(self) -> float:
-    return 1.6
+    return 4.0
 
 
 class WmtWorkloadGLUTanH(WmtWorkload):

From ce7f7e367e0e6b9fc12b33df0d827e7903284f12 Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Thu, 28 Mar 2024 23:07:45 +0000
Subject: [PATCH 12/17] fix

---
 scoring/performance_profile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scoring/performance_profile.py b/scoring/performance_profile.py
index 8009dbc88..8ee271804 100644
--- a/scoring/performance_profile.py
+++ b/scoring/performance_profile.py
@@ -157,7 +157,7 @@ def get_workloads_time_to_target(submission,
 
   # For each workload get submission time get the submission times to target.
   for workload, group in submission.groupby('workload'):
-    validation_metric, validation_target = scoring_utils.get_workload_validation_target(workload)
+    validation_metric, validation_target = scoring_utils.get_workload_metrics_and_targets(workload)
 
     # Check number of studies
     time_vals_per_study = []

From 66e53c993f81434016cb74a1108b35e58e2341c2 Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Fri, 29 Mar 2024 00:38:47 +0000
Subject: [PATCH 13/17] fix config

---
 utils/target_setting_workload_config.json | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/utils/target_setting_workload_config.json b/utils/target_setting_workload_config.json
index 56988c78a..a8c050422 100644
--- a/utils/target_setting_workload_config.json
+++ b/utils/target_setting_workload_config.json
@@ -123,25 +123,25 @@
         "max_steps": 48000,
         "dataset": "librispeech",
         "submission_path": "reference_algorithms/target_setting_algorithms/jax_nadamw.py",
-        "tuning_search_space": "reference_algorithms/target_setting_algorithms/imagenet_resnet/tuning_search_space.json"
+        "tuning_search_space": "reference_algorithms/target_setting_algorithms/librispeech_deepspeech/tuning_search_space.json"
     },
     "librispeech_deepspeech_no_resnet": {
         "max_steps": 48000,
         "dataset": "librispeech",
         "submission_path": "reference_algorithms/target_setting_algorithms/jax_nadamw.py",
-        "tuning_search_space": "reference_algorithms/target_setting_algorithms/imagenet_resnet/tuning_search_space.json"
+        "tuning_search_space": "reference_algorithms/target_setting_algorithms/librispeech_deepspeech_no_resnet/tuning_search_space.json"
     },
     "librispeech_deepspeech_norm_and_spec_aug": {
         "max_steps": 48000,
         "dataset": "librispeech",
         "submission_path": "reference_algorithms/target_setting_algorithms/jax_nadamw.py",
-        "tuning_search_space": "reference_algorithms/target_setting_algorithms/imagenet_resnet/tuning_search_space.json"
+        "tuning_search_space": "reference_algorithms/target_setting_algorithms/librispeech_deepspeech_norm_and_spec_aug/tuning_search_space.json"
     },
     "librispeech_deepspeech_tanh": {
         "max_steps": 48000,
         "dataset": "librispeech",
         "submission_path": "reference_algorithms/target_setting_algorithms/jax_nadamw.py",
-        "tuning_search_space": "reference_algorithms/target_setting_algorithms/imagenet_resnet/tuning_search_space.json"
+        "tuning_search_space": "reference_algorithms/target_setting_algorithms/librispeech_deepspeech_tanh/tuning_search_space.json"
     },
     "criteo1tb": {
         "max_steps": 10666,

From 5beb680ae5521f3f1663b9709a526635e5d24d09 Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Fri, 29 Mar 2024 18:51:41 +0000
Subject: [PATCH 14/17] target rounding

---
 .../workloads/criteo1tb/criteo1tb_jax/workload.py    | 10 +++++-----
 .../criteo1tb/criteo1tb_pytorch/workload.py          | 10 +++++-----
 .../imagenet_resnet/imagenet_jax/workload.py         |  8 ++++----
 .../imagenet_resnet/imagenet_pytorch/workload.py     |  8 ++++----
 .../workloads/imagenet_vit/imagenet_jax/workload.py  |  6 +++---
 .../imagenet_vit/imagenet_pytorch/workload.py        |  6 +++---
 .../librispeech_jax/workload.py                      | 12 ++++++------
 .../librispeech_pytorch/workload.py                  | 12 ++++++------
 .../librispeech_jax/workload.py                      | 12 ++++++------
 .../librispeech_pytorch/workload.py                  | 12 ++++++------
 .../workloads/wmt/wmt_jax/workload.py                | 10 +++++-----
 .../workloads/wmt/wmt_pytorch/workload.py            | 10 +++++-----
 12 files changed, 58 insertions(+), 58 deletions(-)

diff --git a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py
index f6945c021..3743dc1ff 100644
--- a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py
+++ b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py
@@ -173,7 +173,7 @@ def use_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.1237562372
+    return 0.123757
 
   @property
   def test_target_value(self) -> float:
@@ -191,23 +191,23 @@ def use_resnet(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.1241490923
+    return 0.12415
 
   @property
   def test_target_value(self) -> float:
-    return 0.1264799502
+    return 0.12648
 
 
 class Criteo1TbDlrmSmallEmbedInitWorkload(Criteo1TbDlrmSmallWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 0.129656005
+    return 0.129657
 
   @property
   def test_target_value(self) -> float:
     # Todo
-    return 0.1319666458
+    return 0.131967
 
   @property
   def embedding_init_multiplier(self) -> float:
diff --git a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py
index 434ca7f50..446267440 100644
--- a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py
@@ -254,7 +254,7 @@ def use_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.1237562372
+    return 0.123757
 
   @property
   def test_target_value(self) -> float:
@@ -272,23 +272,23 @@ def use_resnet(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.1241490923
+    return 0.12415
 
   @property
   def test_target_value(self) -> float:
-    return 0.1264799502
+    return 0.12648
 
 
 class Criteo1TbDlrmSmallEmbedInitWorkload(Criteo1TbDlrmSmallWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 0.129656005
+    return 0.129657
 
   @property
   def test_target_value(self) -> float:
     # Todo
-    return 0.1319666458
+    return 0.131967
 
   @property
   def embedding_init_multiplier(self) -> float:
diff --git a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py
index a3506b4fd..d8de214f5 100644
--- a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py
+++ b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py
@@ -272,11 +272,11 @@ def use_silu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.7544599771
+    return 0.75445
 
   @property
   def test_target_value(self) -> float:
-    return 0.6323000193
+    return 0.6323
 
 
 class ImagenetResNetGELUWorkload(ImagenetResNetWorkload):
@@ -287,11 +287,11 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.7676599622
+    return 0.76765
 
   @property
   def test_target_value(self) -> float:
-    return 0.651900053
+    return 0.6519
 
 
 class ImagenetResNetLargeBNScaleWorkload(ImagenetResNetWorkload):
diff --git a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py
index 089caf5cb..3549911fa 100644
--- a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py
@@ -326,11 +326,11 @@ def use_silu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.7544599771
+    return 0.75445
 
   @property
   def test_target_value(self) -> float:
-    return 0.6323000193
+    return 0.6323
 
 
 class ImagenetResNetGELUWorkload(ImagenetResNetWorkload):
@@ -341,11 +341,11 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.7676599622
+    return 0.76765
 
   @property
   def test_target_value(self) -> float:
-    return 0.651900053
+    return 0.6519
 
 
 class ImagenetResNetLargeBNScaleWorkload(ImagenetResNetWorkload):
diff --git a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py
index 0cd60251e..2ad71ffd0 100644
--- a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py
+++ b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py
@@ -99,11 +99,11 @@ def use_glu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.7573800087
+    return 0.75738
 
   @property
   def test_target_value(self) -> float:
-    return 0.6359000206
+    return 0.6359
 
 
 class ImagenetVitPostLNWorkload(ImagenetVitWorkload):
@@ -129,7 +129,7 @@ def use_map(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.77114
+    return 0.77113
 
   @property
   def test_target_value(self) -> float:
diff --git a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py
index 50233b0b0..703d40b07 100644
--- a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py
@@ -90,11 +90,11 @@ def use_glu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.7573800087
+    return 0.75738
 
   @property
   def test_target_value(self) -> float:
-    return 0.6359000206
+    return 0.6359
 
 
 class ImagenetVitPostLNWorkload(ImagenetVitWorkload):
@@ -120,7 +120,7 @@ def use_map(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.77114
+    return 0.77113
 
   @property
   def test_target_value(self) -> float:
diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py
index b579ebef9..f4d1ab0f3 100644
--- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py
+++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py
@@ -388,11 +388,11 @@ def attention_temperature(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.109976153
+    return 0.109977
 
   @property
   def test_target_value(self) -> float:
-    return 0.06806410335
+    return 0.068065
 
 
 class LibriSpeechConformerLayerNormWorkload(LibriSpeechConformerWorkload):
@@ -403,11 +403,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.09730924819
+    return 0.09731
 
   @property
   def test_target_value(self) -> float:
-    return 0.05995978307
+    return 0.05996
 
 
 class LibriSpeechConformerGeluWorkload(LibriSpeechConformerWorkload):
@@ -418,8 +418,8 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.09411355803
+    return 0.094114
 
   @property
   def test_target_value(self) -> float:
-    return 0.05662868401
+    return 0.056629
diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py
index 34b615b73..155b30920 100644
--- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py
@@ -354,11 +354,11 @@ def attention_temperature(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.109976153
+    return 0.109977
 
   @property
   def test_target_value(self) -> float:
-    return 0.06806410335
+    return 0.068065
 
 
 class LibriSpeechConformerLayerNormWorkload(LibriSpeechConformerWorkload):
@@ -369,11 +369,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.09730924819
+    return 0.09731
 
   @property
   def test_target_value(self) -> float:
-    return 0.05995978307
+    return 0.05996
 
 
 class LibriSpeechConformerGeluWorkload(LibriSpeechConformerWorkload):
@@ -384,8 +384,8 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.09411355803
+    return 0.094114
 
   @property
   def test_target_value(self) -> float:
-    return 0.05662868401
+    return 0.056629
diff --git a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py
index 79f1f502a..8473fac0f 100644
--- a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py
+++ b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py
@@ -109,11 +109,11 @@ def use_tanh(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.140084
+    return 0.150883
 
   @property
   def test_target_value(self) -> float:
-    return 0.089249
+    return 0.098613
 
 
 class LibriSpeechDeepSpeechNoResNetWorkload(LibriSpeechDeepSpeechWorkload):
@@ -124,11 +124,11 @@ def enable_residual_connections(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.122745
+    return 0.131564
 
   @property
   def test_target_value(self) -> float:
-    return 0.073837
+    return 0.079297
 
 
 class LibriSpeechDeepSpeechNormAndSpecAugWorkload(LibriSpeechDeepSpeechWorkload
@@ -156,8 +156,8 @@ def time_mask_count(self) -> int:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.137877
+    return 0.14342
 
   @property
   def test_target_value(self) -> float:
-    return 0.088675
+    return 0.090976
diff --git a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py
index 55a5773aa..626bac278 100644
--- a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py
@@ -116,11 +116,11 @@ def use_tanh(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.140084
+    return 0.150883
 
   @property
   def test_target_value(self) -> float:
-    return 0.089249
+    return 0.098613
 
 
 class LibriSpeechDeepSpeechNoResNetWorkload(LibriSpeechDeepSpeechWorkload):
@@ -131,11 +131,11 @@ def enable_residual_connections(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.122745
+    return 0.131564
 
   @property
   def test_target_value(self) -> float:
-    return 0.073837
+    return 0.079297
 
 
 class LibriSpeechDeepSpeechNormAndSpecAugWorkload(LibriSpeechDeepSpeechWorkload
@@ -163,8 +163,8 @@ def time_mask_count(self) -> int:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.137877
+    return 0.14342
 
   @property
   def test_target_value(self) -> float:
-    return 0.088675
+    return 0.090976
diff --git a/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py b/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py
index 8408b284f..c69965692 100644
--- a/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py
+++ b/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py
@@ -299,7 +299,7 @@ class WmtWorkloadPostLN(WmtWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 30.07797237
+    return 30.0779
 
   @property
   def test_target_value(self) -> float:
@@ -315,11 +315,11 @@ class WmtWorkloadAttentionTemp(WmtWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 29.86119393
+    return 29.8611
 
   @property
   def test_target_value(self) -> float:
-    return 29.41438511
+    return 29.4143
 
   @property
   def attention_temp(self) -> float:
@@ -331,11 +331,11 @@ class WmtWorkloadGLUTanH(WmtWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 29.65174349
+    return 29.6517
 
   @property
   def test_target_value(self) -> float:
-    return 29.05153769
+    return 29.0515
 
   @property
   def activation(self) -> str:
diff --git a/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py b/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py
index c39b0a9df..5ef09d278 100644
--- a/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py
@@ -355,7 +355,7 @@ class WmtWorkloadPostLN(WmtWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 30.07797237
+    return 30.0779
 
   @property
   def test_target_value(self) -> float:
@@ -371,11 +371,11 @@ class WmtWorkloadAttentionTemp(WmtWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 29.86119393
+    return 29.8611
 
   @property
   def test_target_value(self) -> float:
-    return 29.41438511
+    return 229.4143
 
   @property
   def attention_temp(self) -> float:
@@ -387,11 +387,11 @@ class WmtWorkloadGLUTanH(WmtWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 29.65174349
+    return 29.6517
 
   @property
   def test_target_value(self) -> float:
-    return 29.05153769
+    return 29.0515
 
   @property
   def activation(self) -> str:

From c9598c0901e18745e38738f29107198cf7368203 Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Fri, 29 Mar 2024 21:34:20 +0000
Subject: [PATCH 15/17] formatting

---
 .../librispeech_conformer/librispeech_pytorch/models.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py
index 90a12b779..502cb093e 100644
--- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py
+++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py
@@ -208,7 +208,7 @@ def __init__(self, config: ConformerConfig):
           config.feed_forward_residual_dropout_rate)
     self.dropout2 = nn.Dropout(
         p=feed_forward_residual_dropout_rate, inplace=True)
-  
+
   def forward(self, inputs, padding_mask):
     inputs = self.ln(inputs)
     inputs = self.linear1(inputs)

From 28adc86c67fefa45ab0fffa1e0bd85137c7701b5 Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Fri, 29 Mar 2024 23:02:22 +0000
Subject: [PATCH 16/17] update warning

---
 submission_runner.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/submission_runner.py b/submission_runner.py
index 40eb8cd58..87b2703e1 100644
--- a/submission_runner.py
+++ b/submission_runner.py
@@ -154,11 +154,12 @@
 flags.DEFINE_boolean('set_pytorch_max_split_size',
                      False,
                      'If true, set pytorch max_split_size_mb to 256')
-flags.DEFINE_integer('pytorch_eval_num_workers',
-                     0,
-                     'Number of workers for PyTorch evaluation data loaders.'
-                     'WARNING: there is an known bug that results in wrong'
-                     'evals when the number of workers is not equal to 0.')
+flags.DEFINE_integer(
+    'pytorch_eval_num_workers',
+    0,
+    'Number of workers for ImageNet PyTorch evaluation data loaders.'
+    'WARNING: Setting pytorch_eval_num_workers != 0, will result '
+    'in incorrect evals currently, see issues/732.')
 FLAGS = flags.FLAGS
 USE_PYTORCH_DDP, RANK, DEVICE, N_GPUS = pytorch_setup()
 

From d492d69dfc1eee3f707231901191c98ec4031a7a Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Fri, 29 Mar 2024 23:04:10 +0000
Subject: [PATCH 17/17] add warning about num_workers

---
 submission_runner.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/submission_runner.py b/submission_runner.py
index 87b2703e1..a6f8c05a3 100644
--- a/submission_runner.py
+++ b/submission_runner.py
@@ -637,6 +637,12 @@ def main(_):
   if FLAGS.framework == 'pytorch':
     pytorch_init(USE_PYTORCH_DDP, RANK, profiler)
 
+  # TODO: remove once issue resolved.
+  if FLAGS.pytorch_eval_num_workers != 0:
+    logging.warning(
+        'WARNING: Setting pytorch_eval_num_workers != 0, will result '
+        'in incorrect evals currently, see issues/732.')
+
   workload_metadata = WORKLOADS[FLAGS.workload]
 
   # Prevent OOM on librispeech conformer.