diff --git a/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py b/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py index c69965692..046d5e469 100644 --- a/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py +++ b/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py @@ -315,7 +315,7 @@ class WmtWorkloadAttentionTemp(WmtWorkload): @property def validation_target_value(self) -> float: - return 29.8611 + return 29.3379 @property def test_target_value(self) -> float: @@ -331,7 +331,7 @@ class WmtWorkloadGLUTanH(WmtWorkload): @property def validation_target_value(self) -> float: - return 29.6517 + return 29.5779 @property def test_target_value(self) -> float: diff --git a/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py b/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py index 5ef09d278..0ba49c2f6 100644 --- a/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py +++ b/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py @@ -371,7 +371,7 @@ class WmtWorkloadAttentionTemp(WmtWorkload): @property def validation_target_value(self) -> float: - return 29.8611 + return 29.3379 @property def test_target_value(self) -> float: @@ -387,7 +387,7 @@ class WmtWorkloadGLUTanH(WmtWorkload): @property def validation_target_value(self) -> float: - return 29.6517 + return 29.5779 @property def test_target_value(self) -> float: diff --git a/reference_algorithms/target_setting_algorithms/wmt_attention_temp/tuning_search_space.json b/reference_algorithms/target_setting_algorithms/wmt_attention_temp/tuning_search_space.json index 1327bcb38..266cdedbb 100644 --- a/reference_algorithms/target_setting_algorithms/wmt_attention_temp/tuning_search_space.json +++ b/reference_algorithms/target_setting_algorithms/wmt_attention_temp/tuning_search_space.json @@ -1,17 +1,17 @@ { "learning_rate": { "feasible_points": [ - 0.0003477912008450351 + 0.000590120167916659 ] }, "beta1": { "feasible_points": [ - 0.9936632117510711 + 0.737199286155609 ] }, "beta2": { "feasible_points": [ - 0.9967873550453692 + 0.05919391544031072 ] }, "warmup_steps": { @@ -21,7 +21,7 @@ }, "weight_decay": { "feasible_points": [ - 0.04120183162940475 + 0.14128519778326312 ] }, "label_smoothing": { diff --git a/reference_algorithms/target_setting_algorithms/wmt_glu_tanh/tuning_search_space.json b/reference_algorithms/target_setting_algorithms/wmt_glu_tanh/tuning_search_space.json index 4708a26dd..d288d9a49 100644 --- a/reference_algorithms/target_setting_algorithms/wmt_glu_tanh/tuning_search_space.json +++ b/reference_algorithms/target_setting_algorithms/wmt_glu_tanh/tuning_search_space.json @@ -1,32 +1,32 @@ { "learning_rate": { "feasible_points": [ - 0.0002111193022461917 + 0.000872041489644454 ] }, "beta1": { "feasible_points": [ - 0.8748186204170956 + 0.45562164405092065 ] }, "beta2": { "feasible_points": [ - 0.8576876516215266 + 0.9982167124443476 ] }, "warmup_steps": { "feasible_points": [ - 9999 + 4999 ] }, "weight_decay": { "feasible_points": [ - 0.18033280763289028 + 0.01536114562763022 ] }, "label_smoothing": { "feasible_points": [ - 0.0 + 0.1 ] } }