Skip to content

Commit

Permalink
Merge pull request #376 from NVIDIA/revert-372-dpykhtar/update_t5
Browse files Browse the repository at this point in the history
Revert "update t5/mt5 configs"
  • Loading branch information
ericharper authored Jul 11, 2024
2 parents 82c297c + f714d70 commit 4fc35bc
Show file tree
Hide file tree
Showing 15 changed files with 0 additions and 60 deletions.
4 changes: 0 additions & 4 deletions launcher_scripts/conf/fine_tuning/mt5/custom_task.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,6 @@ exp_manager:
save_best_model: True

model: # For different fine_tuning tasks, tuning the hyper parameters accordingly
# use MT5 model from megatron.core
mcore_t5: True
transformer_engine: False

restore_from_path: ${fine_tuning.run.convert_dir}/results/megatron_mt5.nemo # Path to a trained mt5 .nemo file
pretrained_checkpoint:
checkpoint_dir: null # Path to a folder that contains a .ckpt file
Expand Down
4 changes: 0 additions & 4 deletions launcher_scripts/conf/fine_tuning/mt5/xquad.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,6 @@ exp_manager:
save_best_model: True

model: # For different fine_tuning tasks, tuning the hyper parameters accordingly
# use MT5 model from megatron.core
mcore_t5: True
transformer_engine: False

restore_from_path: ${fine_tuning.run.convert_dir}/results/megatron_mt5.nemo # Path to a trained mt5 .nemo file
pretrained_checkpoint:
checkpoint_dir: null # Path to a folder that contains a .ckpt file
Expand Down
4 changes: 0 additions & 4 deletions launcher_scripts/conf/fine_tuning/t5/custom_task.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,6 @@ exp_manager:
save_best_model: True

model: # For different fine_tuning tasks, tuning the hyper parameters accordingly; below is only for MNLI
# use T5 model from megatron.core
mcore_t5: True
transformer_engine: False

restore_from_path: ${fine_tuning.run.convert_dir}/results/megatron_t5.nemo # Path to a trained T5 .nemo file
pretrained_checkpoint:
checkpoint_dir: null # Path to a folder that contains a .ckpt file
Expand Down
4 changes: 0 additions & 4 deletions launcher_scripts/conf/fine_tuning/t5/squad.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,6 @@ exp_manager:
save_best_model: True

model: # For different fine_tuning tasks, tuning the hyper parameters accordingly; below is only for MNLI
# use T5 model from megatron.core
mcore_t5: True
transformer_engine: False

restore_from_path: ${fine_tuning.run.convert_dir}/results/megatron_t5.nemo # Path to a trained T5 .nemo file
pretrained_checkpoint:
checkpoint_dir: null # Path to a folder that contains a .ckpt file
Expand Down
4 changes: 0 additions & 4 deletions launcher_scripts/conf/peft/t5/squad.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,6 @@ exp_manager:
strict: False # Should be False to avoid a runtime error where EarlyStopping says monitor is unavailable, which sometimes happens with resumed training.

model:
# use T5 model from megatron.core
mcore_t5: True
transformer_engine: False

seed: 1234
tensor_model_parallel_size: 1 # intra-layer model parallelism
pipeline_model_parallel_size: 1 # inter-layer model parallelism
Expand Down
4 changes: 0 additions & 4 deletions launcher_scripts/conf/training/mt5/11b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,6 @@ exp_manager:
buffer_size: 5

model:
# use MT5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 24
global_batch_size: 1920 # will use more micro batches to reach global batch size
Expand Down
4 changes: 0 additions & 4 deletions launcher_scripts/conf/training/mt5/170m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,6 @@ exp_manager:
buffer_size: 5

model:
# use MT5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 64
global_batch_size: 2048 # will use more micro batches to reach global batch size
Expand Down
4 changes: 0 additions & 4 deletions launcher_scripts/conf/training/mt5/23b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,6 @@ exp_manager:
buffer_size: 5

model:
# use MT5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 8
global_batch_size: 1920 # will use more micro batches to reach global batch size
Expand Down
4 changes: 0 additions & 4 deletions launcher_scripts/conf/training/mt5/390m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,6 @@ exp_manager:
buffer_size: 5

model:
# use MT5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 32
global_batch_size: 2048 # will use more micro batches to reach global batch size
Expand Down
4 changes: 0 additions & 4 deletions launcher_scripts/conf/training/mt5/3b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,6 @@ exp_manager:
buffer_size: 5

model:
# use MT5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 24
global_batch_size: 1920 # will use more micro batches to reach global batch size
Expand Down
4 changes: 0 additions & 4 deletions launcher_scripts/conf/training/t5/11b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@ exp_manager:
buffer_size: 5

model:
# use T5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 24
global_batch_size: 1920 # will use more micro batches to reach global batch size
Expand Down
4 changes: 0 additions & 4 deletions launcher_scripts/conf/training/t5/220m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@ exp_manager:
buffer_size: 5

model:
# use T5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 64
global_batch_size: 2048 # will use more micro batches to reach global batch size
Expand Down
4 changes: 0 additions & 4 deletions launcher_scripts/conf/training/t5/23b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@ exp_manager:
buffer_size: 5

model:
# use T5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 8
global_batch_size: 1920 # will use more micro batches to reach global batch size
Expand Down
4 changes: 0 additions & 4 deletions launcher_scripts/conf/training/t5/3b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@ exp_manager:
buffer_size: 5

model:
# use T5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 24
global_batch_size: 1920 # will use more micro batches to reach global batch size
Expand Down
4 changes: 0 additions & 4 deletions launcher_scripts/conf/training/t5/41b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@ exp_manager:
buffer_size: 5

model:
# use T5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 6
global_batch_size: 1920 # will use more micro batches to reach global batch size
Expand Down

0 comments on commit 4fc35bc

Please sign in to comment.