Merge pull request #376 from NVIDIA/revert-372-dpykhtar/update_t5

Revert "update t5/mt5 configs"
NVIDIA · Jul 11, 2024 · 4fc35bc · 4fc35bc
2 parents 82c297c + f714d70
commit 4fc35bc
Show file tree

Hide file tree

Showing 15 changed files with 0 additions and 60 deletions.
diff --git a/launcher_scripts/conf/fine_tuning/mt5/custom_task.yaml b/launcher_scripts/conf/fine_tuning/mt5/custom_task.yaml
@@ -45,10 +45,6 @@ exp_manager:
     save_best_model: True
 
 model: # For different fine_tuning tasks, tuning the hyper parameters accordingly
-  # use MT5 model from megatron.core
-  mcore_t5: True
-  transformer_engine: False
-
   restore_from_path: ${fine_tuning.run.convert_dir}/results/megatron_mt5.nemo # Path to a trained mt5 .nemo file
   pretrained_checkpoint:
     checkpoint_dir: null # Path to a folder that contains a .ckpt file

diff --git a/launcher_scripts/conf/fine_tuning/mt5/xquad.yaml b/launcher_scripts/conf/fine_tuning/mt5/xquad.yaml
@@ -45,10 +45,6 @@ exp_manager:
     save_best_model: True
 
 model: # For different fine_tuning tasks, tuning the hyper parameters accordingly
-  # use MT5 model from megatron.core
-  mcore_t5: True
-  transformer_engine: False
-
   restore_from_path: ${fine_tuning.run.convert_dir}/results/megatron_mt5.nemo # Path to a trained mt5 .nemo file
   pretrained_checkpoint:
     checkpoint_dir: null # Path to a folder that contains a .ckpt file

diff --git a/launcher_scripts/conf/fine_tuning/t5/custom_task.yaml b/launcher_scripts/conf/fine_tuning/t5/custom_task.yaml
@@ -45,10 +45,6 @@ exp_manager:
     save_best_model: True
 
 model: # For different fine_tuning tasks, tuning the hyper parameters accordingly; below is only for MNLI
-  # use T5 model from megatron.core
-  mcore_t5: True
-  transformer_engine: False
-
   restore_from_path: ${fine_tuning.run.convert_dir}/results/megatron_t5.nemo # Path to a trained T5 .nemo file
   pretrained_checkpoint:
     checkpoint_dir: null # Path to a folder that contains a .ckpt file

diff --git a/launcher_scripts/conf/fine_tuning/t5/squad.yaml b/launcher_scripts/conf/fine_tuning/t5/squad.yaml
@@ -45,10 +45,6 @@ exp_manager:
     save_best_model: True
 
 model: # For different fine_tuning tasks, tuning the hyper parameters accordingly; below is only for MNLI
-  # use T5 model from megatron.core
-  mcore_t5: True
-  transformer_engine: False
-
   restore_from_path: ${fine_tuning.run.convert_dir}/results/megatron_t5.nemo # Path to a trained T5 .nemo file
   pretrained_checkpoint:
     checkpoint_dir: null # Path to a folder that contains a .ckpt file

diff --git a/launcher_scripts/conf/peft/t5/squad.yaml b/launcher_scripts/conf/peft/t5/squad.yaml
@@ -54,10 +54,6 @@ exp_manager:
     strict: False # Should be False to avoid a runtime error where EarlyStopping says monitor is unavailable, which sometimes happens with resumed training.
 
 model:
-  # use T5 model from megatron.core
-  mcore_t5: True
-  transformer_engine: False
-
   seed: 1234
   tensor_model_parallel_size: 1 # intra-layer model parallelism
   pipeline_model_parallel_size: 1 # inter-layer model parallelism

diff --git a/launcher_scripts/conf/training/mt5/11b.yaml b/launcher_scripts/conf/training/mt5/11b.yaml
@@ -53,10 +53,6 @@ exp_manager:
     buffer_size: 5
 
 model:
-  # use MT5 model from megatron.core
-  mcore_t5: True
-  transformer_engine: False
-
   # model parallelism
   micro_batch_size: 24
   global_batch_size: 1920 # will use more micro batches to reach global batch size

diff --git a/launcher_scripts/conf/training/mt5/170m.yaml b/launcher_scripts/conf/training/mt5/170m.yaml
@@ -53,10 +53,6 @@ exp_manager:
     buffer_size: 5
 
 model:
-  # use MT5 model from megatron.core
-  mcore_t5: True
-  transformer_engine: False
-
   # model parallelism
   micro_batch_size: 64
   global_batch_size: 2048 # will use more micro batches to reach global batch size

diff --git a/launcher_scripts/conf/training/mt5/23b.yaml b/launcher_scripts/conf/training/mt5/23b.yaml
@@ -53,10 +53,6 @@ exp_manager:
     buffer_size: 5
 
 model:
-  # use MT5 model from megatron.core
-  mcore_t5: True
-  transformer_engine: False
-
   # model parallelism
   micro_batch_size: 8
   global_batch_size: 1920 # will use more micro batches to reach global batch size

diff --git a/launcher_scripts/conf/training/mt5/390m.yaml b/launcher_scripts/conf/training/mt5/390m.yaml
@@ -53,10 +53,6 @@ exp_manager:
     buffer_size: 5
 
 model:
-  # use MT5 model from megatron.core
-  mcore_t5: True
-  transformer_engine: False
-
   # model parallelism
   micro_batch_size: 32
   global_batch_size: 2048 # will use more micro batches to reach global batch size

diff --git a/launcher_scripts/conf/training/mt5/3b.yaml b/launcher_scripts/conf/training/mt5/3b.yaml
@@ -53,10 +53,6 @@ exp_manager:
     buffer_size: 5
 
 model:
-  # use MT5 model from megatron.core
-  mcore_t5: True
-  transformer_engine: False
-
   # model parallelism
   micro_batch_size: 24
   global_batch_size: 1920 # will use more micro batches to reach global batch size

diff --git a/launcher_scripts/conf/training/t5/11b.yaml b/launcher_scripts/conf/training/t5/11b.yaml
@@ -51,10 +51,6 @@ exp_manager:
     buffer_size: 5
 
 model:
-  # use T5 model from megatron.core
-  mcore_t5: True
-  transformer_engine: False
-
   # model parallelism
   micro_batch_size: 24
   global_batch_size: 1920 # will use more micro batches to reach global batch size

diff --git a/launcher_scripts/conf/training/t5/220m.yaml b/launcher_scripts/conf/training/t5/220m.yaml
@@ -51,10 +51,6 @@ exp_manager:
     buffer_size: 5
 
 model:
-  # use T5 model from megatron.core
-  mcore_t5: True
-  transformer_engine: False
-
   # model parallelism
   micro_batch_size: 64
   global_batch_size: 2048 # will use more micro batches to reach global batch size

diff --git a/launcher_scripts/conf/training/t5/23b.yaml b/launcher_scripts/conf/training/t5/23b.yaml
@@ -51,10 +51,6 @@ exp_manager:
     buffer_size: 5
 
 model:
-  # use T5 model from megatron.core
-  mcore_t5: True
-  transformer_engine: False
-
   # model parallelism
   micro_batch_size: 8
   global_batch_size: 1920 # will use more micro batches to reach global batch size

diff --git a/launcher_scripts/conf/training/t5/3b.yaml b/launcher_scripts/conf/training/t5/3b.yaml
@@ -51,10 +51,6 @@ exp_manager:
     buffer_size: 5
 
 model:
-  # use T5 model from megatron.core
-  mcore_t5: True
-  transformer_engine: False
-
   # model parallelism
   micro_batch_size: 24
   global_batch_size: 1920 # will use more micro batches to reach global batch size

diff --git a/launcher_scripts/conf/training/t5/41b.yaml b/launcher_scripts/conf/training/t5/41b.yaml
@@ -51,10 +51,6 @@ exp_manager:
     buffer_size: 5
 
 model:
-  # use T5 model from megatron.core
-  mcore_t5: True
-  transformer_engine: False
-
   # model parallelism
   micro_batch_size: 6
   global_batch_size: 1920 # will use more micro batches to reach global batch size