From ad8f72b3f65031c6a4f5d423c6490f4cf9f12dee Mon Sep 17 00:00:00 2001 From: Juhan Bae Date: Tue, 19 Mar 2024 13:35:05 -0400 Subject: [PATCH 01/17] Initial Commit --- .../librispeech_pytorch/models.py | 10 +++++----- submission_runner.py | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py index fe3a1e179..1476fd361 100644 --- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py +++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py @@ -93,7 +93,7 @@ def __init__(self, out_features=self.encoder_dim, bias=True) self.pos_encode = AddPositionalEmbedding(embedding_dim=self.encoder_dim) - self.dropout = nn.Dropout(p=self.input_dropout_rate) + self.dropout = nn.Dropout(p=self.input_dropout_rate, inplace=True) def forward(self, inputs, input_paddings): output_paddings = input_paddings @@ -195,7 +195,7 @@ def __init__(self, config: ConformerConfig): in_features=config.encoder_dim, out_features=config.encoder_dim * config.feed_forward_expansion_factor, bias=True) - self.dropout1 = nn.Dropout(p=config.feed_forward_dropout_rate) + self.dropout1 = nn.Dropout(p=config.feed_forward_dropout_rate, inplace=True) self.linear2 = nn.Linear( in_features=config.encoder_dim * config.feed_forward_expansion_factor, out_features=config.encoder_dim, @@ -206,7 +206,7 @@ def __init__(self, config: ConformerConfig): else: feed_forward_residual_dropout_rate = ( config.feed_forward_residual_dropout_rate) - self.dropout2 = nn.Dropout(p=feed_forward_residual_dropout_rate) + self.dropout2 = nn.Dropout(p=feed_forward_residual_dropout_rate, inplace=True) def forward(self, inputs, padding_mask): inputs = self.ln(inputs) @@ -316,7 +316,7 @@ def __init__(self, config: ConformerConfig): attention_residual_dropout_rate = 0.1 else: attention_residual_dropout_rate = config.attention_residual_dropout_rate - self.dropout = nn.Dropout(p=attention_residual_dropout_rate) + self.dropout = nn.Dropout(p=attention_residual_dropout_rate, inplace=True) def forward(self, outputs, paddings): outputs = self.ln(outputs) @@ -407,7 +407,7 @@ def __init__(self, config): conv_residual_dropout_rate = 0.0 else: conv_residual_dropout_rate = config.conv_residual_dropout_rate - self.dropout = nn.Dropout(p=conv_residual_dropout_rate) + self.dropout = nn.Dropout(p=conv_residual_dropout_rate, inplace=True) def forward(self, inputs, input_paddings): inputs = self.ln(inputs) diff --git a/submission_runner.py b/submission_runner.py index ff290079b..7c8d7fb53 100644 --- a/submission_runner.py +++ b/submission_runner.py @@ -203,6 +203,7 @@ def train_once( log_dir: Optional[str] = None, save_checkpoints: Optional[bool] = True ) -> Tuple[spec.Timing, Dict[str, Any]]: + _reset_cuda_mem() data_rng, opt_init_rng, model_init_rng, rng = prng.split(rng, 4) # Workload setup. From de238bcbd7831b29886517e868ecacd3540babf9 Mon Sep 17 00:00:00 2001 From: Juhan Bae Date: Tue, 19 Mar 2024 13:41:05 -0400 Subject: [PATCH 02/17] Lint fix --- .../librispeech_conformer/librispeech_pytorch/models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py index 1476fd361..b3f1eeaad 100644 --- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py +++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py @@ -206,7 +206,8 @@ def __init__(self, config: ConformerConfig): else: feed_forward_residual_dropout_rate = ( config.feed_forward_residual_dropout_rate) - self.dropout2 = nn.Dropout(p=feed_forward_residual_dropout_rate, inplace=True) + self.dropout2 = nn.Dropout(p=feed_forward_residual_dropout_rate, + inplace=True) def forward(self, inputs, padding_mask): inputs = self.ln(inputs) From f208dd2dc13d98619633b3144e7300fd74060461 Mon Sep 17 00:00:00 2001 From: Juhan Bae Date: Tue, 19 Mar 2024 13:56:49 -0400 Subject: [PATCH 03/17] Lint fix --- .../librispeech_conformer/librispeech_pytorch/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py index b3f1eeaad..90a12b779 100644 --- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py +++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py @@ -206,9 +206,9 @@ def __init__(self, config: ConformerConfig): else: feed_forward_residual_dropout_rate = ( config.feed_forward_residual_dropout_rate) - self.dropout2 = nn.Dropout(p=feed_forward_residual_dropout_rate, - inplace=True) - + self.dropout2 = nn.Dropout( + p=feed_forward_residual_dropout_rate, inplace=True) + def forward(self, inputs, padding_mask): inputs = self.ln(inputs) inputs = self.linear1(inputs) From 135c56adcf1d6d9fbec3a84e2942352ec13fb222 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Tue, 26 Mar 2024 17:28:50 +0000 Subject: [PATCH 04/17] add warning to flag --- submission_runner.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/submission_runner.py b/submission_runner.py index 2945e3fd0..e9a3f7dba 100644 --- a/submission_runner.py +++ b/submission_runner.py @@ -156,7 +156,9 @@ 'If true, set pytorch max_split_size_mb to 256') flags.DEFINE_integer('pytorch_eval_num_workers', 0, - 'Number of workers for PyTorch evaluation data loaders.') + 'Number of workers for PyTorch evaluation data loaders.' + 'WARNING: there is an known bug that results in wrong' + 'evals when the number of workers is not equal to 0.') FLAGS = flags.FLAGS USE_PYTORCH_DDP, RANK, DEVICE, N_GPUS = pytorch_setup() From 4fabc1474237d110510b76d259b323ba662f4b2f Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Thu, 28 Mar 2024 01:20:39 +0000 Subject: [PATCH 05/17] add variant targets --- .../criteo1tb/criteo1tb_jax/workload.py | 10 ++-- .../criteo1tb/criteo1tb_pytorch/workload.py | 10 ++-- .../imagenet_resnet/imagenet_jax/workload.py | 12 ++--- .../imagenet_pytorch/workload.py | 12 ++--- .../imagenet_vit/imagenet_jax/workload.py | 12 ++--- .../imagenet_vit/imagenet_pytorch/workload.py | 12 ++--- .../workloads/wmt/wmt_jax/workload.py | 12 ++--- .../workloads/wmt/wmt_pytorch/workload.py | 12 ++--- scoring/score_submissions.py | 52 ++++++++++++++----- scoring/scoring_utils.py | 17 ++++-- 10 files changed, 97 insertions(+), 64 deletions(-) diff --git a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py index 84a0a7416..f6945c021 100644 --- a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py +++ b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py @@ -173,7 +173,7 @@ def use_layer_norm(self) -> bool: @property def validation_target_value(self) -> float: - return 0.123744 + return 0.1237562372 @property def test_target_value(self) -> float: @@ -191,23 +191,23 @@ def use_resnet(self) -> bool: @property def validation_target_value(self) -> float: - return 0.124027 + return 0.1241490923 @property def test_target_value(self) -> float: - return 0.126468 + return 0.1264799502 class Criteo1TbDlrmSmallEmbedInitWorkload(Criteo1TbDlrmSmallWorkload): @property def validation_target_value(self) -> float: - return 0.124286 + return 0.129656005 @property def test_target_value(self) -> float: # Todo - return 0.126725 + return 0.1319666458 @property def embedding_init_multiplier(self) -> float: diff --git a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py index c63ac3f7b..434ca7f50 100644 --- a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py +++ b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py @@ -254,7 +254,7 @@ def use_layer_norm(self) -> bool: @property def validation_target_value(self) -> float: - return 0.123744 + return 0.1237562372 @property def test_target_value(self) -> float: @@ -272,23 +272,23 @@ def use_resnet(self) -> bool: @property def validation_target_value(self) -> float: - return 0.124027 + return 0.1241490923 @property def test_target_value(self) -> float: - return 0.126468 + return 0.1264799502 class Criteo1TbDlrmSmallEmbedInitWorkload(Criteo1TbDlrmSmallWorkload): @property def validation_target_value(self) -> float: - return 0.124286 + return 0.129656005 @property def test_target_value(self) -> float: # Todo - return 0.126725 + return 0.1319666458 @property def embedding_init_multiplier(self) -> float: diff --git a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py index e4810e142..a3506b4fd 100644 --- a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py +++ b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py @@ -272,11 +272,11 @@ def use_silu(self) -> bool: @property def validation_target_value(self) -> float: - return 1 - 0.22009 + return 0.7544599771 @property def test_target_value(self) -> float: - return 1 - 0.3426 + return 0.6323000193 class ImagenetResNetGELUWorkload(ImagenetResNetWorkload): @@ -287,11 +287,11 @@ def use_gelu(self) -> bool: @property def validation_target_value(self) -> float: - return 1 - 0.22077 + return 0.7676599622 @property def test_target_value(self) -> float: - return 1 - 0.3402 + return 0.651900053 class ImagenetResNetLargeBNScaleWorkload(ImagenetResNetWorkload): @@ -302,8 +302,8 @@ def bn_init_scale(self) -> float: @property def validation_target_value(self) -> float: - return 1 - 0.23474 + return 0.76526 @property def test_target_value(self) -> float: - return 1 - 0.3577 + return 0.6423 diff --git a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py index 5c7c6c7d2..089caf5cb 100644 --- a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py +++ b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py @@ -326,11 +326,11 @@ def use_silu(self) -> bool: @property def validation_target_value(self) -> float: - return 1 - 0.22009 + return 0.7544599771 @property def test_target_value(self) -> float: - return 1 - 0.342 + return 0.6323000193 class ImagenetResNetGELUWorkload(ImagenetResNetWorkload): @@ -341,11 +341,11 @@ def use_gelu(self) -> bool: @property def validation_target_value(self) -> float: - return 1 - 0.22077 + return 0.7676599622 @property def test_target_value(self) -> float: - return 1 - 0.3402 + return 0.651900053 class ImagenetResNetLargeBNScaleWorkload(ImagenetResNetWorkload): @@ -356,8 +356,8 @@ def bn_init_scale(self) -> float: @property def validation_target_value(self) -> float: - return 1 - 0.23474 + return 0.76526 @property def test_target_value(self) -> float: - return 1 - 0.3577 + return 0.6423 diff --git a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py index a54ee9b5e..0cd60251e 100644 --- a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py +++ b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py @@ -99,11 +99,11 @@ def use_glu(self) -> bool: @property def validation_target_value(self) -> float: - return 1 - 0.2233 + return 0.7573800087 @property def test_target_value(self) -> float: - return 1 - 0.3455 + return 0.6359000206 class ImagenetVitPostLNWorkload(ImagenetVitWorkload): @@ -114,11 +114,11 @@ def use_post_layer_norm(self) -> bool: @property def validation_target_value(self) -> float: - return 1 - 0.24688 + return 0.75312 @property def test_target_value(self) -> float: - return 1 - 0.3714 + return 0.6286 class ImagenetVitMapWorkload(ImagenetVitWorkload): @@ -129,8 +129,8 @@ def use_map(self) -> bool: @property def validation_target_value(self) -> float: - return 1 - 0.22886 + return 0.77114 @property def test_target_value(self) -> float: - return 1 - 0.3477 + return 0.6523 diff --git a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py index 51c79b2d0..50233b0b0 100644 --- a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py +++ b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py @@ -90,11 +90,11 @@ def use_glu(self) -> bool: @property def validation_target_value(self) -> float: - return 1 - 0.2233 + return 0.7573800087 @property def test_target_value(self) -> float: - return 1 - 0.3455 + return 0.6359000206 class ImagenetVitPostLNWorkload(ImagenetVitWorkload): @@ -105,11 +105,11 @@ def use_post_layer_norm(self) -> bool: @property def validation_target_value(self) -> float: - return 1 - 0.24688 + return 0.75312 @property def test_target_value(self) -> float: - return 1 - 0.3714 + return 0.6286 class ImagenetVitMapWorkload(ImagenetVitWorkload): @@ -120,8 +120,8 @@ def use_map(self) -> bool: @property def validation_target_value(self) -> float: - return 1 - 0.22886 + return 0.77114 @property def test_target_value(self) -> float: - return 1 - 0.3477 + return 0.6523 diff --git a/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py b/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py index b10d4056d..c7da35b11 100644 --- a/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py +++ b/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py @@ -299,7 +299,7 @@ class WmtWorkloadPostLN(WmtWorkload): @property def validation_target_value(self) -> float: - return 30.2003 + return 30.07797237 @property def test_target_value(self) -> float: @@ -315,15 +315,15 @@ class WmtWorkloadAttentionTemp(WmtWorkload): @property def validation_target_value(self) -> float: - return 30.0756 + return 29.86119393 @property def test_target_value(self) -> float: - return 29.8094 + return 29.41438511 @property def attention_temp(self) -> float: - return 4.0 + return 1.6 class WmtWorkloadGLUTanH(WmtWorkload): @@ -331,11 +331,11 @@ class WmtWorkloadGLUTanH(WmtWorkload): @property def validation_target_value(self) -> float: - return 30.0002 + return 29.65174349 @property def test_target_value(self) -> float: - return 29.8139 + return 29.05153769 @property def activation(self) -> str: diff --git a/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py b/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py index 9f6d817f4..dd7893be3 100644 --- a/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py +++ b/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py @@ -355,7 +355,7 @@ class WmtWorkloadPostLN(WmtWorkload): @property def validation_target_value(self) -> float: - return 30.2003 + return 30.07797237 @property def test_target_value(self) -> float: @@ -371,15 +371,15 @@ class WmtWorkloadAttentionTemp(WmtWorkload): @property def validation_target_value(self) -> float: - return 30.0756 + return 29.86119393 @property def test_target_value(self) -> float: - return 29.8094 + return 29.41438511 @property def attention_temp(self) -> float: - return 4.0 + return 1.6 class WmtWorkloadGLUTanH(WmtWorkload): @@ -387,11 +387,11 @@ class WmtWorkloadGLUTanH(WmtWorkload): @property def validation_target_value(self) -> float: - return 30.0002 + return 29.65174349 @property def test_target_value(self) -> float: - return 29.8139 + return 29.05153769 @property def activation(self) -> str: diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py index 48777c69e..891f04e78 100644 --- a/scoring/score_submissions.py +++ b/scoring/score_submissions.py @@ -48,8 +48,9 @@ FLAGS = flags.FLAGS -def get_summary_df(workload, workload_df): - validation_metric, validation_target = scoring_utils.get_workload_validation_target(workload) +def get_summary_df(workload, workload_df, include_test_split=False): + validation_metric, validation_target = scoring_utils.get_workload_metrics_and_targets(workload, split='validation') + is_minimized = performance_profile.check_if_minimized(validation_metric) target_op = operator.le if is_minimized else operator.ge best_op = min if is_minimized else max @@ -58,32 +59,52 @@ def get_summary_df(workload, workload_df): summary_df = pd.DataFrame() summary_df['workload'] = workload_df['workload'] summary_df['trial'] = workload_df['trial'].apply(lambda x: x[0]) - summary_df['target metric name'] = validation_metric - summary_df['target metric value'] = validation_target + summary_df['val target metric name'] = validation_metric + summary_df['val target metric value'] = validation_target - summary_df['target reached'] = workload_df[validation_metric].apply( + summary_df['val target reached'] = workload_df[validation_metric].apply( lambda x: target_op(x, validation_target)).apply(np.any) - summary_df['best metric value'] = workload_df[validation_metric].apply( + summary_df['best metric value on val'] = workload_df[validation_metric].apply( lambda x: best_op(x)) - workload_df['index best eval'] = workload_df[validation_metric].apply( + workload_df['index best eval on val'] = workload_df[validation_metric].apply( lambda x: idx_op(x)) - summary_df['time to best eval (s)'] = workload_df.apply( - lambda x: x['accumulated_submission_time'][x['index best eval']], axis=1) - summary_df['time to target (s)'] = summary_df.apply( - lambda x: x['time to best eval (s)'] if x['target reached'] else np.inf, + summary_df['time to best eval on val (s)'] = workload_df.apply( + lambda x: x['accumulated_submission_time'][x['index best eval on val']], axis=1) + summary_df['time to target on val (s)'] = summary_df.apply( + lambda x: x['time to best eval on val (s)'] if x['val target reached'] else np.inf, axis=1) + # test metrics + if include_test_split: + test_metric, test_target = scoring_utils.get_workload_metrics_and_targets(workload, split='test') + + summary_df['test target metric name'] = test_metric + summary_df['test target metric value'] = test_target + + summary_df['test target reached'] = workload_df[test_metric].apply( + lambda x: target_op(x, test_target)).apply(np.any) + summary_df['best metric value on test'] = workload_df[test_metric].apply( + lambda x: best_op(x)) + workload_df['index best eval on test'] = workload_df[test_metric].apply( + lambda x: idx_op(x)) + summary_df['time to best eval on test (s)'] = workload_df.apply( + lambda x: x['accumulated_submission_time'][x['index best eval on test']], axis=1) + summary_df['time to target on test (s)'] = summary_df.apply( + lambda x: x['time to best eval on test (s)'] if x['test target reached'] else np.inf, + axis=1) + return summary_df -def print_submission_summary(df): +def print_submission_summary(df, include_test_split=True): dfs = [] for workload, group in df.groupby('workload'): - summary_df = get_summary_df(workload, group) + summary_df = get_summary_df(workload, group, include_test_split=include_test_split) dfs.append(summary_df) df = pd.concat(dfs) logging.info('\n' + tabulate(df, headers='keys', tablefmt='psql')) + return df def main(_): @@ -93,7 +114,10 @@ def main(_): experiment_path = os.path.join(FLAGS.submission_directory, submission) df = scoring_utils.get_experiment_df(experiment_path) results[submission] = df - print_submission_summary(df) + summary_df = print_submission_summary(df) + with open(os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'), + 'w') as fout: + summary_df.to_csv(fout) if not FLAGS.strict: logging.warning( diff --git a/scoring/scoring_utils.py b/scoring/scoring_utils.py index 722b197a4..4a62db362 100644 --- a/scoring/scoring_utils.py +++ b/scoring/scoring_utils.py @@ -174,6 +174,11 @@ def get_experiment_df(experiment_dir): study_dirs = os.listdir(experiment_dir) for study_dir in study_dirs: workload_dirs = os.listdir(os.path.join(experiment_dir, study_dir)) + workload_dirs = [ + w for w in workload_dirs + if os.path.isdir(os.path.join(experiment_dir, study_dir, w)) + ] + print(workload_dirs) for workload in workload_dirs: data = { 'workload': workload, @@ -208,7 +213,7 @@ def get_experiment_df(experiment_dir): ## Get workload properties -def get_workload_validation_target(workload): +def get_workload_metrics_and_targets(workload, split='validation'): """Returns workload target metric name and value.""" workload_name = re.match(WORKLOAD_NAME_PATTERN, workload).group(1) framework = re.match(WORKLOAD_NAME_PATTERN, workload).group(2) @@ -225,6 +230,10 @@ def get_workload_validation_target(workload): workload_class_name=workload_metadata['workload_class_name'], workload_init_kwargs=workload_init_kwargs) metric_name = workload_obj.target_metric_name - validation_metric = f'validation/{metric_name}' - validation_target = workload_obj.validation_target_value - return validation_metric, validation_target + if split=='validation': + metric = f'validation/{metric_name}' + target = workload_obj.validation_target_value + elif split=='test': + metric = f'test/{metric_name}' + target = workload_obj.test_target_value + return metric,target From e17d6041c1b620f366657f9ad24e8c06f9c43949 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Thu, 28 Mar 2024 01:36:57 +0000 Subject: [PATCH 06/17] variants --- .../librispeech_jax/workload.py | 12 +++++----- .../librispeech_pytorch/workload.py | 12 +++++----- .../librispeech_jax/workload.py | 12 +++++----- .../librispeech_pytorch/workload.py | 24 +++++++++++++++++++ 4 files changed, 42 insertions(+), 18 deletions(-) diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py index a991b07ab..1b46b1841 100644 --- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py +++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py @@ -388,11 +388,11 @@ def attention_temperature(self) -> float: @property def validation_target_value(self) -> float: - return 0.082665 + return 0.107585 @property def test_target_value(self) -> float: - return 0.50168 + return 0.066145 class LibriSpeechConformerLayerNormWorkload(LibriSpeechConformerWorkload): @@ -403,11 +403,11 @@ def use_post_layer_norm(self) -> bool: @property def validation_target_value(self) -> float: - return 0.085371 + return 0.094943 @property def test_target_value(self) -> float: - return 0.053096 + return 0.057181 class LibriSpeechConformerGeluWorkload(LibriSpeechConformerWorkload): @@ -418,8 +418,8 @@ def use_gelu(self) -> bool: @property def validation_target_value(self) -> float: - return 0.077958 + return 0.084047 @property def test_target_value(self) -> float: - return 0.047643 + return 0.050733 diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py index 9e09e387f..5b144de33 100644 --- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py +++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py @@ -354,11 +354,11 @@ def attention_temperature(self) -> float: @property def validation_target_value(self) -> float: - return 0.082665 + return 0.107585 @property def test_target_value(self) -> float: - return 0.050168 + return 0.066145 class LibriSpeechConformerLayerNormWorkload(LibriSpeechConformerWorkload): @@ -369,11 +369,11 @@ def use_post_layer_norm(self) -> bool: @property def validation_target_value(self) -> float: - return 0.085371 + return 0.094943 @property def test_target_value(self) -> float: - return 0.053096 + return 0.057181 class LibriSpeechConformerGeluWorkload(LibriSpeechConformerWorkload): @@ -384,8 +384,8 @@ def use_gelu(self) -> bool: @property def validation_target_value(self) -> float: - return 0.077958 + return 0.084047 @property def test_target_value(self) -> float: - return 0.047643 + return 0.050733 diff --git a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py index 4489c0402..79f1f502a 100644 --- a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py +++ b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py @@ -109,11 +109,11 @@ def use_tanh(self) -> bool: @property def validation_target_value(self) -> float: - return 0.133449 + return 0.140084 @property def test_target_value(self) -> float: - return 0.079810 + return 0.089249 class LibriSpeechDeepSpeechNoResNetWorkload(LibriSpeechDeepSpeechWorkload): @@ -124,11 +124,11 @@ def enable_residual_connections(self) -> bool: @property def validation_target_value(self) -> float: - return 0.105042 + return 0.122745 @property def test_target_value(self) -> float: - return 0.060388 + return 0.073837 class LibriSpeechDeepSpeechNormAndSpecAugWorkload(LibriSpeechDeepSpeechWorkload @@ -156,8 +156,8 @@ def time_mask_count(self) -> int: @property def validation_target_value(self) -> float: - return 0.131553 + return 0.137877 @property def test_target_value(self) -> float: - return 0.082442 + return 0.088675 diff --git a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py index 23d533aa1..55a5773aa 100644 --- a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py +++ b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py @@ -114,6 +114,14 @@ class LibriSpeechDeepSpeechTanhWorkload(LibriSpeechDeepSpeechWorkload): def use_tanh(self) -> bool: return True + @property + def validation_target_value(self) -> float: + return 0.140084 + + @property + def test_target_value(self) -> float: + return 0.089249 + class LibriSpeechDeepSpeechNoResNetWorkload(LibriSpeechDeepSpeechWorkload): @@ -121,6 +129,14 @@ class LibriSpeechDeepSpeechNoResNetWorkload(LibriSpeechDeepSpeechWorkload): def enable_residual_connections(self) -> bool: return False + @property + def validation_target_value(self) -> float: + return 0.122745 + + @property + def test_target_value(self) -> float: + return 0.073837 + class LibriSpeechDeepSpeechNormAndSpecAugWorkload(LibriSpeechDeepSpeechWorkload ): @@ -144,3 +160,11 @@ def freq_mask_count(self) -> int: @property def time_mask_count(self) -> int: return 15 + + @property + def validation_target_value(self) -> float: + return 0.137877 + + @property + def test_target_value(self) -> float: + return 0.088675 From 4eacf68516910ad2e49a22b429408076ff4234e1 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Thu, 28 Mar 2024 01:50:16 +0000 Subject: [PATCH 07/17] formatting --- scoring/score_submissions.py | 16 +++++++++++----- scoring/scoring_utils.py | 6 +++--- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py index 891f04e78..0b768855e 100644 --- a/scoring/score_submissions.py +++ b/scoring/score_submissions.py @@ -69,9 +69,11 @@ def get_summary_df(workload, workload_df, include_test_split=False): workload_df['index best eval on val'] = workload_df[validation_metric].apply( lambda x: idx_op(x)) summary_df['time to best eval on val (s)'] = workload_df.apply( - lambda x: x['accumulated_submission_time'][x['index best eval on val']], axis=1) + lambda x: x['accumulated_submission_time'][x['index best eval on val']], + axis=1) summary_df['time to target on val (s)'] = summary_df.apply( - lambda x: x['time to best eval on val (s)'] if x['val target reached'] else np.inf, + lambda x: x['time to best eval on val (s)'] + if x['val target reached'] else np.inf, axis=1) # test metrics @@ -88,9 +90,12 @@ def get_summary_df(workload, workload_df, include_test_split=False): workload_df['index best eval on test'] = workload_df[test_metric].apply( lambda x: idx_op(x)) summary_df['time to best eval on test (s)'] = workload_df.apply( - lambda x: x['accumulated_submission_time'][x['index best eval on test']], axis=1) + lambda x: x['accumulated_submission_time'][x['index best eval on test'] + ], + axis=1) summary_df['time to target on test (s)'] = summary_df.apply( - lambda x: x['time to best eval on test (s)'] if x['test target reached'] else np.inf, + lambda x: x['time to best eval on test (s)'] + if x['test target reached'] else np.inf, axis=1) return summary_df @@ -99,7 +104,8 @@ def get_summary_df(workload, workload_df, include_test_split=False): def print_submission_summary(df, include_test_split=True): dfs = [] for workload, group in df.groupby('workload'): - summary_df = get_summary_df(workload, group, include_test_split=include_test_split) + summary_df = get_summary_df( + workload, group, include_test_split=include_test_split) dfs.append(summary_df) df = pd.concat(dfs) diff --git a/scoring/scoring_utils.py b/scoring/scoring_utils.py index 4a62db362..0dd997ab9 100644 --- a/scoring/scoring_utils.py +++ b/scoring/scoring_utils.py @@ -230,10 +230,10 @@ def get_workload_metrics_and_targets(workload, split='validation'): workload_class_name=workload_metadata['workload_class_name'], workload_init_kwargs=workload_init_kwargs) metric_name = workload_obj.target_metric_name - if split=='validation': + if split == 'validation': metric = f'validation/{metric_name}' target = workload_obj.validation_target_value - elif split=='test': + elif split == 'test': metric = f'test/{metric_name}' target = workload_obj.test_target_value - return metric,target + return metric, target From 5c4485e8fca3ebf7202d663a8ded179c6fa83f50 Mon Sep 17 00:00:00 2001 From: Alice <8447104+tfaod@users.noreply.github.com> Date: Thu, 28 Mar 2024 15:46:23 -0400 Subject: [PATCH 08/17] [fix] random_utils.py to `_signed_to_unsigned` When running the submission_runner on the self-tuning track, we run into this error calling `_signed_to_unsigned` from random_utils.py. I've added a fix ```ValueError: Seed must be between 0 and 2**32 - 1 rng = prng.PRNGKey(rng_seed) File "/private/home/axyang/optimization/algorithmic-efficiency-entry/algorithm ic_efficiency/random_utils.py", line 79, in PRNGKey return _PRNGKey(seed) ``` --- algorithmic_efficiency/random_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/algorithmic_efficiency/random_utils.py b/algorithmic_efficiency/random_utils.py index 68e9a9cfe..cf1ea6c32 100644 --- a/algorithmic_efficiency/random_utils.py +++ b/algorithmic_efficiency/random_utils.py @@ -26,11 +26,11 @@ def _signed_to_unsigned(seed: SeedType) -> SeedType: if isinstance(seed, int): - return seed + 2**32 if seed < 0 else seed + return seed % 2**32 if isinstance(seed, list): - return [s + 2**32 if s < 0 else s for s in seed] + return [s % 2**32 for s in seed] if isinstance(seed, np.ndarray): - return np.array([s + 2**32 if s < 0 else s for s in seed.tolist()]) + return np.array([s % 2**32 for s in seed.tolist()]) def _fold_in(seed: SeedType, data: Any) -> List[Union[SeedType, Any]]: From d8c7edf35f72afa343e364d077e24e4d44f90e58 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Thu, 28 Mar 2024 21:40:25 +0000 Subject: [PATCH 09/17] change conformer variant targets --- .../librispeech_jax/workload.py | 12 ++++++------ .../librispeech_pytorch/workload.py | 13 +++++++------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py index 1b46b1841..b579ebef9 100644 --- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py +++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py @@ -388,11 +388,11 @@ def attention_temperature(self) -> float: @property def validation_target_value(self) -> float: - return 0.107585 + return 0.109976153 @property def test_target_value(self) -> float: - return 0.066145 + return 0.06806410335 class LibriSpeechConformerLayerNormWorkload(LibriSpeechConformerWorkload): @@ -403,11 +403,11 @@ def use_post_layer_norm(self) -> bool: @property def validation_target_value(self) -> float: - return 0.094943 + return 0.09730924819 @property def test_target_value(self) -> float: - return 0.057181 + return 0.05995978307 class LibriSpeechConformerGeluWorkload(LibriSpeechConformerWorkload): @@ -418,8 +418,8 @@ def use_gelu(self) -> bool: @property def validation_target_value(self) -> float: - return 0.084047 + return 0.09411355803 @property def test_target_value(self) -> float: - return 0.050733 + return 0.05662868401 diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py index 5b144de33..16a365f93 100644 --- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py +++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py @@ -354,11 +354,11 @@ def attention_temperature(self) -> float: @property def validation_target_value(self) -> float: - return 0.107585 + return 0.109976153 @property def test_target_value(self) -> float: - return 0.066145 + return 0.06806410335 class LibriSpeechConformerLayerNormWorkload(LibriSpeechConformerWorkload): @@ -369,11 +369,11 @@ def use_post_layer_norm(self) -> bool: @property def validation_target_value(self) -> float: - return 0.094943 + return 0.09730924819 @property def test_target_value(self) -> float: - return 0.057181 + return 0.05995978307 class LibriSpeechConformerGeluWorkload(LibriSpeechConformerWorkload): @@ -384,8 +384,9 @@ def use_gelu(self) -> bool: @property def validation_target_value(self) -> float: - return 0.084047 + return 0.09411355803 @property def test_target_value(self) -> float: - return 0.050733 + return 0.05662868401 + From fc623faa0f1ee24b8a55a01941d6e8be7413cdcf Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Thu, 28 Mar 2024 21:42:11 +0000 Subject: [PATCH 10/17] formatting --- .../librispeech_conformer/librispeech_pytorch/workload.py | 1 - 1 file changed, 1 deletion(-) diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py index 16a365f93..34b615b73 100644 --- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py +++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py @@ -389,4 +389,3 @@ def validation_target_value(self) -> float: @property def test_target_value(self) -> float: return 0.05662868401 - From ff8ba5d68cd1d8a408f2d03e145a207267db7769 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Thu, 28 Mar 2024 22:53:39 +0000 Subject: [PATCH 11/17] undo wrong change --- algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py | 2 +- algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py b/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py index c7da35b11..8408b284f 100644 --- a/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py +++ b/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py @@ -323,7 +323,7 @@ def test_target_value(self) -> float: @property def attention_temp(self) -> float: - return 1.6 + return 4.0 class WmtWorkloadGLUTanH(WmtWorkload): diff --git a/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py b/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py index dd7893be3..c39b0a9df 100644 --- a/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py +++ b/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py @@ -379,7 +379,7 @@ def test_target_value(self) -> float: @property def attention_temp(self) -> float: - return 1.6 + return 4.0 class WmtWorkloadGLUTanH(WmtWorkload): From ce7f7e367e0e6b9fc12b33df0d827e7903284f12 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Thu, 28 Mar 2024 23:07:45 +0000 Subject: [PATCH 12/17] fix --- scoring/performance_profile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scoring/performance_profile.py b/scoring/performance_profile.py index 8009dbc88..8ee271804 100644 --- a/scoring/performance_profile.py +++ b/scoring/performance_profile.py @@ -157,7 +157,7 @@ def get_workloads_time_to_target(submission, # For each workload get submission time get the submission times to target. for workload, group in submission.groupby('workload'): - validation_metric, validation_target = scoring_utils.get_workload_validation_target(workload) + validation_metric, validation_target = scoring_utils.get_workload_metrics_and_targets(workload) # Check number of studies time_vals_per_study = [] From 66e53c993f81434016cb74a1108b35e58e2341c2 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Fri, 29 Mar 2024 00:38:47 +0000 Subject: [PATCH 13/17] fix config --- utils/target_setting_workload_config.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/target_setting_workload_config.json b/utils/target_setting_workload_config.json index 56988c78a..a8c050422 100644 --- a/utils/target_setting_workload_config.json +++ b/utils/target_setting_workload_config.json @@ -123,25 +123,25 @@ "max_steps": 48000, "dataset": "librispeech", "submission_path": "reference_algorithms/target_setting_algorithms/jax_nadamw.py", - "tuning_search_space": "reference_algorithms/target_setting_algorithms/imagenet_resnet/tuning_search_space.json" + "tuning_search_space": "reference_algorithms/target_setting_algorithms/librispeech_deepspeech/tuning_search_space.json" }, "librispeech_deepspeech_no_resnet": { "max_steps": 48000, "dataset": "librispeech", "submission_path": "reference_algorithms/target_setting_algorithms/jax_nadamw.py", - "tuning_search_space": "reference_algorithms/target_setting_algorithms/imagenet_resnet/tuning_search_space.json" + "tuning_search_space": "reference_algorithms/target_setting_algorithms/librispeech_deepspeech_no_resnet/tuning_search_space.json" }, "librispeech_deepspeech_norm_and_spec_aug": { "max_steps": 48000, "dataset": "librispeech", "submission_path": "reference_algorithms/target_setting_algorithms/jax_nadamw.py", - "tuning_search_space": "reference_algorithms/target_setting_algorithms/imagenet_resnet/tuning_search_space.json" + "tuning_search_space": "reference_algorithms/target_setting_algorithms/librispeech_deepspeech_norm_and_spec_aug/tuning_search_space.json" }, "librispeech_deepspeech_tanh": { "max_steps": 48000, "dataset": "librispeech", "submission_path": "reference_algorithms/target_setting_algorithms/jax_nadamw.py", - "tuning_search_space": "reference_algorithms/target_setting_algorithms/imagenet_resnet/tuning_search_space.json" + "tuning_search_space": "reference_algorithms/target_setting_algorithms/librispeech_deepspeech_tanh/tuning_search_space.json" }, "criteo1tb": { "max_steps": 10666, From 5beb680ae5521f3f1663b9709a526635e5d24d09 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Fri, 29 Mar 2024 18:51:41 +0000 Subject: [PATCH 14/17] target rounding --- .../workloads/criteo1tb/criteo1tb_jax/workload.py | 10 +++++----- .../criteo1tb/criteo1tb_pytorch/workload.py | 10 +++++----- .../imagenet_resnet/imagenet_jax/workload.py | 8 ++++---- .../imagenet_resnet/imagenet_pytorch/workload.py | 8 ++++---- .../workloads/imagenet_vit/imagenet_jax/workload.py | 6 +++--- .../imagenet_vit/imagenet_pytorch/workload.py | 6 +++--- .../librispeech_jax/workload.py | 12 ++++++------ .../librispeech_pytorch/workload.py | 12 ++++++------ .../librispeech_jax/workload.py | 12 ++++++------ .../librispeech_pytorch/workload.py | 12 ++++++------ .../workloads/wmt/wmt_jax/workload.py | 10 +++++----- .../workloads/wmt/wmt_pytorch/workload.py | 10 +++++----- 12 files changed, 58 insertions(+), 58 deletions(-) diff --git a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py index f6945c021..3743dc1ff 100644 --- a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py +++ b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py @@ -173,7 +173,7 @@ def use_layer_norm(self) -> bool: @property def validation_target_value(self) -> float: - return 0.1237562372 + return 0.123757 @property def test_target_value(self) -> float: @@ -191,23 +191,23 @@ def use_resnet(self) -> bool: @property def validation_target_value(self) -> float: - return 0.1241490923 + return 0.12415 @property def test_target_value(self) -> float: - return 0.1264799502 + return 0.12648 class Criteo1TbDlrmSmallEmbedInitWorkload(Criteo1TbDlrmSmallWorkload): @property def validation_target_value(self) -> float: - return 0.129656005 + return 0.129657 @property def test_target_value(self) -> float: # Todo - return 0.1319666458 + return 0.131967 @property def embedding_init_multiplier(self) -> float: diff --git a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py index 434ca7f50..446267440 100644 --- a/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py +++ b/algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py @@ -254,7 +254,7 @@ def use_layer_norm(self) -> bool: @property def validation_target_value(self) -> float: - return 0.1237562372 + return 0.123757 @property def test_target_value(self) -> float: @@ -272,23 +272,23 @@ def use_resnet(self) -> bool: @property def validation_target_value(self) -> float: - return 0.1241490923 + return 0.12415 @property def test_target_value(self) -> float: - return 0.1264799502 + return 0.12648 class Criteo1TbDlrmSmallEmbedInitWorkload(Criteo1TbDlrmSmallWorkload): @property def validation_target_value(self) -> float: - return 0.129656005 + return 0.129657 @property def test_target_value(self) -> float: # Todo - return 0.1319666458 + return 0.131967 @property def embedding_init_multiplier(self) -> float: diff --git a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py index a3506b4fd..d8de214f5 100644 --- a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py +++ b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py @@ -272,11 +272,11 @@ def use_silu(self) -> bool: @property def validation_target_value(self) -> float: - return 0.7544599771 + return 0.75445 @property def test_target_value(self) -> float: - return 0.6323000193 + return 0.6323 class ImagenetResNetGELUWorkload(ImagenetResNetWorkload): @@ -287,11 +287,11 @@ def use_gelu(self) -> bool: @property def validation_target_value(self) -> float: - return 0.7676599622 + return 0.76765 @property def test_target_value(self) -> float: - return 0.651900053 + return 0.6519 class ImagenetResNetLargeBNScaleWorkload(ImagenetResNetWorkload): diff --git a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py index 089caf5cb..3549911fa 100644 --- a/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py +++ b/algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py @@ -326,11 +326,11 @@ def use_silu(self) -> bool: @property def validation_target_value(self) -> float: - return 0.7544599771 + return 0.75445 @property def test_target_value(self) -> float: - return 0.6323000193 + return 0.6323 class ImagenetResNetGELUWorkload(ImagenetResNetWorkload): @@ -341,11 +341,11 @@ def use_gelu(self) -> bool: @property def validation_target_value(self) -> float: - return 0.7676599622 + return 0.76765 @property def test_target_value(self) -> float: - return 0.651900053 + return 0.6519 class ImagenetResNetLargeBNScaleWorkload(ImagenetResNetWorkload): diff --git a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py index 0cd60251e..2ad71ffd0 100644 --- a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py +++ b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py @@ -99,11 +99,11 @@ def use_glu(self) -> bool: @property def validation_target_value(self) -> float: - return 0.7573800087 + return 0.75738 @property def test_target_value(self) -> float: - return 0.6359000206 + return 0.6359 class ImagenetVitPostLNWorkload(ImagenetVitWorkload): @@ -129,7 +129,7 @@ def use_map(self) -> bool: @property def validation_target_value(self) -> float: - return 0.77114 + return 0.77113 @property def test_target_value(self) -> float: diff --git a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py index 50233b0b0..703d40b07 100644 --- a/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py +++ b/algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py @@ -90,11 +90,11 @@ def use_glu(self) -> bool: @property def validation_target_value(self) -> float: - return 0.7573800087 + return 0.75738 @property def test_target_value(self) -> float: - return 0.6359000206 + return 0.6359 class ImagenetVitPostLNWorkload(ImagenetVitWorkload): @@ -120,7 +120,7 @@ def use_map(self) -> bool: @property def validation_target_value(self) -> float: - return 0.77114 + return 0.77113 @property def test_target_value(self) -> float: diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py index b579ebef9..f4d1ab0f3 100644 --- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py +++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py @@ -388,11 +388,11 @@ def attention_temperature(self) -> float: @property def validation_target_value(self) -> float: - return 0.109976153 + return 0.109977 @property def test_target_value(self) -> float: - return 0.06806410335 + return 0.068065 class LibriSpeechConformerLayerNormWorkload(LibriSpeechConformerWorkload): @@ -403,11 +403,11 @@ def use_post_layer_norm(self) -> bool: @property def validation_target_value(self) -> float: - return 0.09730924819 + return 0.09731 @property def test_target_value(self) -> float: - return 0.05995978307 + return 0.05996 class LibriSpeechConformerGeluWorkload(LibriSpeechConformerWorkload): @@ -418,8 +418,8 @@ def use_gelu(self) -> bool: @property def validation_target_value(self) -> float: - return 0.09411355803 + return 0.094114 @property def test_target_value(self) -> float: - return 0.05662868401 + return 0.056629 diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py index 34b615b73..155b30920 100644 --- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py +++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py @@ -354,11 +354,11 @@ def attention_temperature(self) -> float: @property def validation_target_value(self) -> float: - return 0.109976153 + return 0.109977 @property def test_target_value(self) -> float: - return 0.06806410335 + return 0.068065 class LibriSpeechConformerLayerNormWorkload(LibriSpeechConformerWorkload): @@ -369,11 +369,11 @@ def use_post_layer_norm(self) -> bool: @property def validation_target_value(self) -> float: - return 0.09730924819 + return 0.09731 @property def test_target_value(self) -> float: - return 0.05995978307 + return 0.05996 class LibriSpeechConformerGeluWorkload(LibriSpeechConformerWorkload): @@ -384,8 +384,8 @@ def use_gelu(self) -> bool: @property def validation_target_value(self) -> float: - return 0.09411355803 + return 0.094114 @property def test_target_value(self) -> float: - return 0.05662868401 + return 0.056629 diff --git a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py index 79f1f502a..8473fac0f 100644 --- a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py +++ b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py @@ -109,11 +109,11 @@ def use_tanh(self) -> bool: @property def validation_target_value(self) -> float: - return 0.140084 + return 0.150883 @property def test_target_value(self) -> float: - return 0.089249 + return 0.098613 class LibriSpeechDeepSpeechNoResNetWorkload(LibriSpeechDeepSpeechWorkload): @@ -124,11 +124,11 @@ def enable_residual_connections(self) -> bool: @property def validation_target_value(self) -> float: - return 0.122745 + return 0.131564 @property def test_target_value(self) -> float: - return 0.073837 + return 0.079297 class LibriSpeechDeepSpeechNormAndSpecAugWorkload(LibriSpeechDeepSpeechWorkload @@ -156,8 +156,8 @@ def time_mask_count(self) -> int: @property def validation_target_value(self) -> float: - return 0.137877 + return 0.14342 @property def test_target_value(self) -> float: - return 0.088675 + return 0.090976 diff --git a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py index 55a5773aa..626bac278 100644 --- a/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py +++ b/algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py @@ -116,11 +116,11 @@ def use_tanh(self) -> bool: @property def validation_target_value(self) -> float: - return 0.140084 + return 0.150883 @property def test_target_value(self) -> float: - return 0.089249 + return 0.098613 class LibriSpeechDeepSpeechNoResNetWorkload(LibriSpeechDeepSpeechWorkload): @@ -131,11 +131,11 @@ def enable_residual_connections(self) -> bool: @property def validation_target_value(self) -> float: - return 0.122745 + return 0.131564 @property def test_target_value(self) -> float: - return 0.073837 + return 0.079297 class LibriSpeechDeepSpeechNormAndSpecAugWorkload(LibriSpeechDeepSpeechWorkload @@ -163,8 +163,8 @@ def time_mask_count(self) -> int: @property def validation_target_value(self) -> float: - return 0.137877 + return 0.14342 @property def test_target_value(self) -> float: - return 0.088675 + return 0.090976 diff --git a/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py b/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py index 8408b284f..c69965692 100644 --- a/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py +++ b/algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py @@ -299,7 +299,7 @@ class WmtWorkloadPostLN(WmtWorkload): @property def validation_target_value(self) -> float: - return 30.07797237 + return 30.0779 @property def test_target_value(self) -> float: @@ -315,11 +315,11 @@ class WmtWorkloadAttentionTemp(WmtWorkload): @property def validation_target_value(self) -> float: - return 29.86119393 + return 29.8611 @property def test_target_value(self) -> float: - return 29.41438511 + return 29.4143 @property def attention_temp(self) -> float: @@ -331,11 +331,11 @@ class WmtWorkloadGLUTanH(WmtWorkload): @property def validation_target_value(self) -> float: - return 29.65174349 + return 29.6517 @property def test_target_value(self) -> float: - return 29.05153769 + return 29.0515 @property def activation(self) -> str: diff --git a/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py b/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py index c39b0a9df..5ef09d278 100644 --- a/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py +++ b/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py @@ -355,7 +355,7 @@ class WmtWorkloadPostLN(WmtWorkload): @property def validation_target_value(self) -> float: - return 30.07797237 + return 30.0779 @property def test_target_value(self) -> float: @@ -371,11 +371,11 @@ class WmtWorkloadAttentionTemp(WmtWorkload): @property def validation_target_value(self) -> float: - return 29.86119393 + return 29.8611 @property def test_target_value(self) -> float: - return 29.41438511 + return 229.4143 @property def attention_temp(self) -> float: @@ -387,11 +387,11 @@ class WmtWorkloadGLUTanH(WmtWorkload): @property def validation_target_value(self) -> float: - return 29.65174349 + return 29.6517 @property def test_target_value(self) -> float: - return 29.05153769 + return 29.0515 @property def activation(self) -> str: From c9598c0901e18745e38738f29107198cf7368203 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Fri, 29 Mar 2024 21:34:20 +0000 Subject: [PATCH 15/17] formatting --- .../librispeech_conformer/librispeech_pytorch/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py index 90a12b779..502cb093e 100644 --- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py +++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py @@ -208,7 +208,7 @@ def __init__(self, config: ConformerConfig): config.feed_forward_residual_dropout_rate) self.dropout2 = nn.Dropout( p=feed_forward_residual_dropout_rate, inplace=True) - + def forward(self, inputs, padding_mask): inputs = self.ln(inputs) inputs = self.linear1(inputs) From 28adc86c67fefa45ab0fffa1e0bd85137c7701b5 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Fri, 29 Mar 2024 23:02:22 +0000 Subject: [PATCH 16/17] update warning --- submission_runner.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/submission_runner.py b/submission_runner.py index 40eb8cd58..87b2703e1 100644 --- a/submission_runner.py +++ b/submission_runner.py @@ -154,11 +154,12 @@ flags.DEFINE_boolean('set_pytorch_max_split_size', False, 'If true, set pytorch max_split_size_mb to 256') -flags.DEFINE_integer('pytorch_eval_num_workers', - 0, - 'Number of workers for PyTorch evaluation data loaders.' - 'WARNING: there is an known bug that results in wrong' - 'evals when the number of workers is not equal to 0.') +flags.DEFINE_integer( + 'pytorch_eval_num_workers', + 0, + 'Number of workers for ImageNet PyTorch evaluation data loaders.' + 'WARNING: Setting pytorch_eval_num_workers != 0, will result ' + 'in incorrect evals currently, see issues/732.') FLAGS = flags.FLAGS USE_PYTORCH_DDP, RANK, DEVICE, N_GPUS = pytorch_setup() From d492d69dfc1eee3f707231901191c98ec4031a7a Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Fri, 29 Mar 2024 23:04:10 +0000 Subject: [PATCH 17/17] add warning about num_workers --- submission_runner.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/submission_runner.py b/submission_runner.py index 87b2703e1..a6f8c05a3 100644 --- a/submission_runner.py +++ b/submission_runner.py @@ -637,6 +637,12 @@ def main(_): if FLAGS.framework == 'pytorch': pytorch_init(USE_PYTORCH_DDP, RANK, profiler) + # TODO: remove once issue resolved. + if FLAGS.pytorch_eval_num_workers != 0: + logging.warning( + 'WARNING: Setting pytorch_eval_num_workers != 0, will result ' + 'in incorrect evals currently, see issues/732.') + workload_metadata = WORKLOADS[FLAGS.workload] # Prevent OOM on librispeech conformer.