diff --git a/algorithmic_efficiency/random_utils.py b/algorithmic_efficiency/random_utils.py
index 68e9a9cfe..cf1ea6c32 100644
--- a/algorithmic_efficiency/random_utils.py
+++ b/algorithmic_efficiency/random_utils.py
@@ -26,11 +26,11 @@
 
 def _signed_to_unsigned(seed: SeedType) -> SeedType:
   if isinstance(seed, int):
-    return seed + 2**32 if seed < 0 else seed
+    return seed % 2**32
   if isinstance(seed, list):
-    return [s + 2**32 if s < 0 else s for s in seed]
+    return [s % 2**32 for s in seed]
   if isinstance(seed, np.ndarray):
-    return np.array([s + 2**32 if s < 0 else s for s in seed.tolist()])
+    return np.array([s % 2**32 for s in seed.tolist()])
 
 
 def _fold_in(seed: SeedType, data: Any) -> List[Union[SeedType, Any]]:
diff --git a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py
index 84a0a7416..3743dc1ff 100644
--- a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py
+++ b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py
@@ -173,7 +173,7 @@ def use_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.123744
+    return 0.123757
 
   @property
   def test_target_value(self) -> float:
@@ -191,23 +191,23 @@ def use_resnet(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.124027
+    return 0.12415
 
   @property
   def test_target_value(self) -> float:
-    return 0.126468
+    return 0.12648
 
 
 class Criteo1TbDlrmSmallEmbedInitWorkload(Criteo1TbDlrmSmallWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 0.124286
+    return 0.129657
 
   @property
   def test_target_value(self) -> float:
     # Todo
-    return 0.126725
+    return 0.131967
 
   @property
   def embedding_init_multiplier(self) -> float:
diff --git a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py
index c63ac3f7b..446267440 100644
--- a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py
@@ -254,7 +254,7 @@ def use_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.123744
+    return 0.123757
 
   @property
   def test_target_value(self) -> float:
@@ -272,23 +272,23 @@ def use_resnet(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.124027
+    return 0.12415
 
   @property
   def test_target_value(self) -> float:
-    return 0.126468
+    return 0.12648
 
 
 class Criteo1TbDlrmSmallEmbedInitWorkload(Criteo1TbDlrmSmallWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 0.124286
+    return 0.129657
 
   @property
   def test_target_value(self) -> float:
     # Todo
-    return 0.126725
+    return 0.131967
 
   @property
   def embedding_init_multiplier(self) -> float:
diff --git a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py
index e4810e142..d8de214f5 100644
--- a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py
+++ b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py
@@ -272,11 +272,11 @@ def use_silu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22009
+    return 0.75445
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3426
+    return 0.6323
 
 
 class ImagenetResNetGELUWorkload(ImagenetResNetWorkload):
@@ -287,11 +287,11 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22077
+    return 0.76765
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3402
+    return 0.6519
 
 
 class ImagenetResNetLargeBNScaleWorkload(ImagenetResNetWorkload):
@@ -302,8 +302,8 @@ def bn_init_scale(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.23474
+    return 0.76526
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3577
+    return 0.6423
diff --git a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py
index 5c7c6c7d2..3549911fa 100644
--- a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py
@@ -326,11 +326,11 @@ def use_silu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22009
+    return 0.75445
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.342
+    return 0.6323
 
 
 class ImagenetResNetGELUWorkload(ImagenetResNetWorkload):
@@ -341,11 +341,11 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22077
+    return 0.76765
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3402
+    return 0.6519
 
 
 class ImagenetResNetLargeBNScaleWorkload(ImagenetResNetWorkload):
@@ -356,8 +356,8 @@ def bn_init_scale(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.23474
+    return 0.76526
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3577
+    return 0.6423
diff --git a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py
index a54ee9b5e..2ad71ffd0 100644
--- a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py
+++ b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py
@@ -99,11 +99,11 @@ def use_glu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.2233
+    return 0.75738
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3455
+    return 0.6359
 
 
 class ImagenetVitPostLNWorkload(ImagenetVitWorkload):
@@ -114,11 +114,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.24688
+    return 0.75312
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3714
+    return 0.6286
 
 
 class ImagenetVitMapWorkload(ImagenetVitWorkload):
@@ -129,8 +129,8 @@ def use_map(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22886
+    return 0.77113
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3477
+    return 0.6523
diff --git a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py
index 51c79b2d0..703d40b07 100644
--- a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py
@@ -90,11 +90,11 @@ def use_glu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.2233
+    return 0.75738
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3455
+    return 0.6359
 
 
 class ImagenetVitPostLNWorkload(ImagenetVitWorkload):
@@ -105,11 +105,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.24688
+    return 0.75312
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3714
+    return 0.6286
 
 
 class ImagenetVitMapWorkload(ImagenetVitWorkload):
@@ -120,8 +120,8 @@ def use_map(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22886
+    return 0.77113
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3477
+    return 0.6523
diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py
index a991b07ab..f4d1ab0f3 100644
--- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py
+++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py
@@ -388,11 +388,11 @@ def attention_temperature(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.082665
+    return 0.109977
 
   @property
   def test_target_value(self) -> float:
-    return 0.50168
+    return 0.068065
 
 
 class LibriSpeechConformerLayerNormWorkload(LibriSpeechConformerWorkload):
@@ -403,11 +403,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.085371
+    return 0.09731
 
   @property
   def test_target_value(self) -> float:
-    return 0.053096
+    return 0.05996
 
 
 class LibriSpeechConformerGeluWorkload(LibriSpeechConformerWorkload):
@@ -418,8 +418,8 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.077958
+    return 0.094114
 
   @property
   def test_target_value(self) -> float:
-    return 0.047643
+    return 0.056629
diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py
index fe3a1e179..502cb093e 100644
--- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py
+++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py
@@ -93,7 +93,7 @@ def __init__(self,
         out_features=self.encoder_dim,
         bias=True)
     self.pos_encode = AddPositionalEmbedding(embedding_dim=self.encoder_dim)
-    self.dropout = nn.Dropout(p=self.input_dropout_rate)
+    self.dropout = nn.Dropout(p=self.input_dropout_rate, inplace=True)
 
   def forward(self, inputs, input_paddings):
     output_paddings = input_paddings
@@ -195,7 +195,7 @@ def __init__(self, config: ConformerConfig):
         in_features=config.encoder_dim,
         out_features=config.encoder_dim * config.feed_forward_expansion_factor,
         bias=True)
-    self.dropout1 = nn.Dropout(p=config.feed_forward_dropout_rate)
+    self.dropout1 = nn.Dropout(p=config.feed_forward_dropout_rate, inplace=True)
     self.linear2 = nn.Linear(
         in_features=config.encoder_dim * config.feed_forward_expansion_factor,
         out_features=config.encoder_dim,
@@ -206,7 +206,8 @@ def __init__(self, config: ConformerConfig):
     else:
       feed_forward_residual_dropout_rate = (
           config.feed_forward_residual_dropout_rate)
-    self.dropout2 = nn.Dropout(p=feed_forward_residual_dropout_rate)
+    self.dropout2 = nn.Dropout(
+        p=feed_forward_residual_dropout_rate, inplace=True)
 
   def forward(self, inputs, padding_mask):
     inputs = self.ln(inputs)
@@ -316,7 +317,7 @@ def __init__(self, config: ConformerConfig):
       attention_residual_dropout_rate = 0.1
     else:
       attention_residual_dropout_rate = config.attention_residual_dropout_rate
-    self.dropout = nn.Dropout(p=attention_residual_dropout_rate)
+    self.dropout = nn.Dropout(p=attention_residual_dropout_rate, inplace=True)
 
   def forward(self, outputs, paddings):
     outputs = self.ln(outputs)
@@ -407,7 +408,7 @@ def __init__(self, config):
       conv_residual_dropout_rate = 0.0
     else:
       conv_residual_dropout_rate = config.conv_residual_dropout_rate
-    self.dropout = nn.Dropout(p=conv_residual_dropout_rate)
+    self.dropout = nn.Dropout(p=conv_residual_dropout_rate, inplace=True)
 
   def forward(self, inputs, input_paddings):
     inputs = self.ln(inputs)
diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py
index 9e09e387f..155b30920 100644
--- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py
@@ -354,11 +354,11 @@ def attention_temperature(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.082665
+    return 0.109977
 
   @property
   def test_target_value(self) -> float:
-    return 0.050168
+    return 0.068065
 
 
 class LibriSpeechConformerLayerNormWorkload(LibriSpeechConformerWorkload):
@@ -369,11 +369,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.085371
+    return 0.09731
 
   @property
   def test_target_value(self) -> float:
-    return 0.053096
+    return 0.05996
 
 
 class LibriSpeechConformerGeluWorkload(LibriSpeechConformerWorkload):
@@ -384,8 +384,8 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.077958
+    return 0.094114
 
   @property
   def test_target_value(self) -> float:
-    return 0.047643
+    return 0.056629
diff --git a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py
index 4489c0402..8473fac0f 100644
--- a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py
+++ b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py
@@ -109,11 +109,11 @@ def use_tanh(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.133449
+    return 0.150883
 
   @property
   def test_target_value(self) -> float:
-    return 0.079810
+    return 0.098613
 
 
 class LibriSpeechDeepSpeechNoResNetWorkload(LibriSpeechDeepSpeechWorkload):
@@ -124,11 +124,11 @@ def enable_residual_connections(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.105042
+    return 0.131564
 
   @property
   def test_target_value(self) -> float:
-    return 0.060388
+    return 0.079297
 
 
 class LibriSpeechDeepSpeechNormAndSpecAugWorkload(LibriSpeechDeepSpeechWorkload
@@ -156,8 +156,8 @@ def time_mask_count(self) -> int:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.131553
+    return 0.14342
 
   @property
   def test_target_value(self) -> float:
-    return 0.082442
+    return 0.090976
diff --git a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py
index 23d533aa1..626bac278 100644
--- a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py
@@ -114,6 +114,14 @@ class LibriSpeechDeepSpeechTanhWorkload(LibriSpeechDeepSpeechWorkload):
   def use_tanh(self) -> bool:
     return True
 
+  @property
+  def validation_target_value(self) -> float:
+    return 0.150883
+
+  @property
+  def test_target_value(self) -> float:
+    return 0.098613
+
 
 class LibriSpeechDeepSpeechNoResNetWorkload(LibriSpeechDeepSpeechWorkload):
 
@@ -121,6 +129,14 @@ class LibriSpeechDeepSpeechNoResNetWorkload(LibriSpeechDeepSpeechWorkload):
   def enable_residual_connections(self) -> bool:
     return False
 
+  @property
+  def validation_target_value(self) -> float:
+    return 0.131564
+
+  @property
+  def test_target_value(self) -> float:
+    return 0.079297
+
 
 class LibriSpeechDeepSpeechNormAndSpecAugWorkload(LibriSpeechDeepSpeechWorkload
                                                  ):
@@ -144,3 +160,11 @@ def freq_mask_count(self) -> int:
   @property
   def time_mask_count(self) -> int:
     return 15
+
+  @property
+  def validation_target_value(self) -> float:
+    return 0.14342
+
+  @property
+  def test_target_value(self) -> float:
+    return 0.090976
diff --git a/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py b/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py
index b10d4056d..c69965692 100644
--- a/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py
+++ b/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py
@@ -299,7 +299,7 @@ class WmtWorkloadPostLN(WmtWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 30.2003
+    return 30.0779
 
   @property
   def test_target_value(self) -> float:
@@ -315,11 +315,11 @@ class WmtWorkloadAttentionTemp(WmtWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 30.0756
+    return 29.8611
 
   @property
   def test_target_value(self) -> float:
-    return 29.8094
+    return 29.4143
 
   @property
   def attention_temp(self) -> float:
@@ -331,11 +331,11 @@ class WmtWorkloadGLUTanH(WmtWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 30.0002
+    return 29.6517
 
   @property
   def test_target_value(self) -> float:
-    return 29.8139
+    return 29.0515
 
   @property
   def activation(self) -> str:
diff --git a/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py b/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py
index 9f6d817f4..5ef09d278 100644
--- a/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py
+++ b/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py
@@ -355,7 +355,7 @@ class WmtWorkloadPostLN(WmtWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 30.2003
+    return 30.0779
 
   @property
   def test_target_value(self) -> float:
@@ -371,11 +371,11 @@ class WmtWorkloadAttentionTemp(WmtWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 30.0756
+    return 29.8611
 
   @property
   def test_target_value(self) -> float:
-    return 29.8094
+    return 229.4143
 
   @property
   def attention_temp(self) -> float:
@@ -387,11 +387,11 @@ class WmtWorkloadGLUTanH(WmtWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 30.0002
+    return 29.6517
 
   @property
   def test_target_value(self) -> float:
-    return 29.8139
+    return 29.0515
 
   @property
   def activation(self) -> str:
diff --git a/scoring/performance_profile.py b/scoring/performance_profile.py
index 8009dbc88..8ee271804 100644
--- a/scoring/performance_profile.py
+++ b/scoring/performance_profile.py
@@ -157,7 +157,7 @@ def get_workloads_time_to_target(submission,
 
   # For each workload get submission time get the submission times to target.
   for workload, group in submission.groupby('workload'):
-    validation_metric, validation_target = scoring_utils.get_workload_validation_target(workload)
+    validation_metric, validation_target = scoring_utils.get_workload_metrics_and_targets(workload)
 
     # Check number of studies
     time_vals_per_study = []
diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py
index 48777c69e..0b768855e 100644
--- a/scoring/score_submissions.py
+++ b/scoring/score_submissions.py
@@ -48,8 +48,9 @@
 FLAGS = flags.FLAGS
 
 
-def get_summary_df(workload, workload_df):
-  validation_metric, validation_target = scoring_utils.get_workload_validation_target(workload)
+def get_summary_df(workload, workload_df, include_test_split=False):
+  validation_metric, validation_target = scoring_utils.get_workload_metrics_and_targets(workload, split='validation')
+
   is_minimized = performance_profile.check_if_minimized(validation_metric)
   target_op = operator.le if is_minimized else operator.ge
   best_op = min if is_minimized else max
@@ -58,32 +59,58 @@ def get_summary_df(workload, workload_df):
   summary_df = pd.DataFrame()
   summary_df['workload'] = workload_df['workload']
   summary_df['trial'] = workload_df['trial'].apply(lambda x: x[0])
-  summary_df['target metric name'] = validation_metric
-  summary_df['target metric value'] = validation_target
+  summary_df['val target metric name'] = validation_metric
+  summary_df['val target metric value'] = validation_target
 
-  summary_df['target reached'] = workload_df[validation_metric].apply(
+  summary_df['val target reached'] = workload_df[validation_metric].apply(
       lambda x: target_op(x, validation_target)).apply(np.any)
-  summary_df['best metric value'] = workload_df[validation_metric].apply(
+  summary_df['best metric value on val'] = workload_df[validation_metric].apply(
       lambda x: best_op(x))
-  workload_df['index best eval'] = workload_df[validation_metric].apply(
+  workload_df['index best eval on val'] = workload_df[validation_metric].apply(
       lambda x: idx_op(x))
-  summary_df['time to best eval (s)'] = workload_df.apply(
-      lambda x: x['accumulated_submission_time'][x['index best eval']], axis=1)
-  summary_df['time to target (s)'] = summary_df.apply(
-      lambda x: x['time to best eval (s)'] if x['target reached'] else np.inf,
+  summary_df['time to best eval on val (s)'] = workload_df.apply(
+      lambda x: x['accumulated_submission_time'][x['index best eval on val']],
+      axis=1)
+  summary_df['time to target on val (s)'] = summary_df.apply(
+      lambda x: x['time to best eval on val (s)']
+      if x['val target reached'] else np.inf,
       axis=1)
 
+  # test metrics
+  if include_test_split:
+    test_metric, test_target = scoring_utils.get_workload_metrics_and_targets(workload, split='test')
+
+    summary_df['test target metric name'] = test_metric
+    summary_df['test target metric value'] = test_target
+
+    summary_df['test target reached'] = workload_df[test_metric].apply(
+        lambda x: target_op(x, test_target)).apply(np.any)
+    summary_df['best metric value on test'] = workload_df[test_metric].apply(
+        lambda x: best_op(x))
+    workload_df['index best eval on test'] = workload_df[test_metric].apply(
+        lambda x: idx_op(x))
+    summary_df['time to best eval on test (s)'] = workload_df.apply(
+        lambda x: x['accumulated_submission_time'][x['index best eval on test']
+                                                  ],
+        axis=1)
+    summary_df['time to target on test (s)'] = summary_df.apply(
+        lambda x: x['time to best eval on test (s)']
+        if x['test target reached'] else np.inf,
+        axis=1)
+
   return summary_df
 
 
-def print_submission_summary(df):
+def print_submission_summary(df, include_test_split=True):
   dfs = []
   for workload, group in df.groupby('workload'):
-    summary_df = get_summary_df(workload, group)
+    summary_df = get_summary_df(
+        workload, group, include_test_split=include_test_split)
     dfs.append(summary_df)
 
   df = pd.concat(dfs)
   logging.info('\n' + tabulate(df, headers='keys', tablefmt='psql'))
+  return df
 
 
 def main(_):
@@ -93,7 +120,10 @@ def main(_):
     experiment_path = os.path.join(FLAGS.submission_directory, submission)
     df = scoring_utils.get_experiment_df(experiment_path)
     results[submission] = df
-    print_submission_summary(df)
+    summary_df = print_submission_summary(df)
+    with open(os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'),
+              'w') as fout:
+      summary_df.to_csv(fout)
 
   if not FLAGS.strict:
     logging.warning(
diff --git a/scoring/scoring_utils.py b/scoring/scoring_utils.py
index 722b197a4..0dd997ab9 100644
--- a/scoring/scoring_utils.py
+++ b/scoring/scoring_utils.py
@@ -174,6 +174,11 @@ def get_experiment_df(experiment_dir):
     study_dirs = os.listdir(experiment_dir)
     for study_dir in study_dirs:
       workload_dirs = os.listdir(os.path.join(experiment_dir, study_dir))
+      workload_dirs = [
+          w for w in workload_dirs
+          if os.path.isdir(os.path.join(experiment_dir, study_dir, w))
+      ]
+      print(workload_dirs)
       for workload in workload_dirs:
         data = {
             'workload': workload,
@@ -208,7 +213,7 @@ def get_experiment_df(experiment_dir):
 
 
 ## Get workload properties
-def get_workload_validation_target(workload):
+def get_workload_metrics_and_targets(workload, split='validation'):
   """Returns workload target metric name and value."""
   workload_name = re.match(WORKLOAD_NAME_PATTERN, workload).group(1)
   framework = re.match(WORKLOAD_NAME_PATTERN, workload).group(2)
@@ -225,6 +230,10 @@ def get_workload_validation_target(workload):
       workload_class_name=workload_metadata['workload_class_name'],
       workload_init_kwargs=workload_init_kwargs)
   metric_name = workload_obj.target_metric_name
-  validation_metric = f'validation/{metric_name}'
-  validation_target = workload_obj.validation_target_value
-  return validation_metric, validation_target
+  if split == 'validation':
+    metric = f'validation/{metric_name}'
+    target = workload_obj.validation_target_value
+  elif split == 'test':
+    metric = f'test/{metric_name}'
+    target = workload_obj.test_target_value
+  return metric, target
diff --git a/submission_runner.py b/submission_runner.py
index 2945e3fd0..a6f8c05a3 100644
--- a/submission_runner.py
+++ b/submission_runner.py
@@ -154,9 +154,12 @@
 flags.DEFINE_boolean('set_pytorch_max_split_size',
                      False,
                      'If true, set pytorch max_split_size_mb to 256')
-flags.DEFINE_integer('pytorch_eval_num_workers',
-                     0,
-                     'Number of workers for PyTorch evaluation data loaders.')
+flags.DEFINE_integer(
+    'pytorch_eval_num_workers',
+    0,
+    'Number of workers for ImageNet PyTorch evaluation data loaders.'
+    'WARNING: Setting pytorch_eval_num_workers != 0, will result '
+    'in incorrect evals currently, see issues/732.')
 FLAGS = flags.FLAGS
 USE_PYTORCH_DDP, RANK, DEVICE, N_GPUS = pytorch_setup()
 
@@ -205,6 +208,7 @@ def train_once(
     log_dir: Optional[str] = None,
     save_checkpoints: Optional[bool] = True
 ) -> Tuple[spec.Timing, Dict[str, Any]]:
+  _reset_cuda_mem()
   data_rng, opt_init_rng, model_init_rng, rng = prng.split(rng, 4)
 
   # Workload setup.
@@ -633,6 +637,12 @@ def main(_):
   if FLAGS.framework == 'pytorch':
     pytorch_init(USE_PYTORCH_DDP, RANK, profiler)
 
+  # TODO: remove once issue resolved.
+  if FLAGS.pytorch_eval_num_workers != 0:
+    logging.warning(
+        'WARNING: Setting pytorch_eval_num_workers != 0, will result '
+        'in incorrect evals currently, see issues/732.')
+
   workload_metadata = WORKLOADS[FLAGS.workload]
 
   # Prevent OOM on librispeech conformer.
diff --git a/utils/target_setting_workload_config.json b/utils/target_setting_workload_config.json
index 56988c78a..a8c050422 100644
--- a/utils/target_setting_workload_config.json
+++ b/utils/target_setting_workload_config.json
@@ -123,25 +123,25 @@
         "max_steps": 48000,
         "dataset": "librispeech",
         "submission_path": "reference_algorithms/target_setting_algorithms/jax_nadamw.py",
-        "tuning_search_space": "reference_algorithms/target_setting_algorithms/imagenet_resnet/tuning_search_space.json"
+        "tuning_search_space": "reference_algorithms/target_setting_algorithms/librispeech_deepspeech/tuning_search_space.json"
     },
     "librispeech_deepspeech_no_resnet": {
         "max_steps": 48000,
         "dataset": "librispeech",
         "submission_path": "reference_algorithms/target_setting_algorithms/jax_nadamw.py",
-        "tuning_search_space": "reference_algorithms/target_setting_algorithms/imagenet_resnet/tuning_search_space.json"
+        "tuning_search_space": "reference_algorithms/target_setting_algorithms/librispeech_deepspeech_no_resnet/tuning_search_space.json"
     },
     "librispeech_deepspeech_norm_and_spec_aug": {
         "max_steps": 48000,
         "dataset": "librispeech",
         "submission_path": "reference_algorithms/target_setting_algorithms/jax_nadamw.py",
-        "tuning_search_space": "reference_algorithms/target_setting_algorithms/imagenet_resnet/tuning_search_space.json"
+        "tuning_search_space": "reference_algorithms/target_setting_algorithms/librispeech_deepspeech_norm_and_spec_aug/tuning_search_space.json"
     },
     "librispeech_deepspeech_tanh": {
         "max_steps": 48000,
         "dataset": "librispeech",
         "submission_path": "reference_algorithms/target_setting_algorithms/jax_nadamw.py",
-        "tuning_search_space": "reference_algorithms/target_setting_algorithms/imagenet_resnet/tuning_search_space.json"
+        "tuning_search_space": "reference_algorithms/target_setting_algorithms/librispeech_deepspeech_tanh/tuning_search_space.json"
     },
     "criteo1tb": {
         "max_steps": 10666,