diff --git a/launcher_scripts/conf/training/nemotron/nemotron_340b.yaml b/launcher_scripts/conf/training/nemotron/nemotron_340b.yaml index e475a2425..c24f14a23 100644 --- a/launcher_scripts/conf/training/nemotron/nemotron_340b.yaml +++ b/launcher_scripts/conf/training/nemotron/nemotron_340b.yaml @@ -164,7 +164,6 @@ model: fp8_amax_history_len: 1024 # Number of steps for which amax history is recorded per tensor fp8_amax_compute_algo: max # 'most_recent' or 'max'. Algorithm for computing amax from history fp8_wgrad: True - ub_tp_comm_overlap: True optim: name: mcore_distributed_optim