Skip to content

Commit

Permalink
Merge branch 'main' into malay/token_drop
Browse files Browse the repository at this point in the history
  • Loading branch information
erhoo82 authored Jul 9, 2024
2 parents 1c3d475 + 70278f9 commit b885571
Show file tree
Hide file tree
Showing 15 changed files with 60 additions and 0 deletions.
4 changes: 4 additions & 0 deletions launcher_scripts/conf/fine_tuning/mt5/custom_task.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ exp_manager:
save_best_model: True

model: # For different fine_tuning tasks, tuning the hyper parameters accordingly
# use MT5 model from megatron.core
mcore_t5: True
transformer_engine: False

restore_from_path: ${fine_tuning.run.convert_dir}/results/megatron_mt5.nemo # Path to a trained mt5 .nemo file
pretrained_checkpoint:
checkpoint_dir: null # Path to a folder that contains a .ckpt file
Expand Down
4 changes: 4 additions & 0 deletions launcher_scripts/conf/fine_tuning/mt5/xquad.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ exp_manager:
save_best_model: True

model: # For different fine_tuning tasks, tuning the hyper parameters accordingly
# use MT5 model from megatron.core
mcore_t5: True
transformer_engine: False

restore_from_path: ${fine_tuning.run.convert_dir}/results/megatron_mt5.nemo # Path to a trained mt5 .nemo file
pretrained_checkpoint:
checkpoint_dir: null # Path to a folder that contains a .ckpt file
Expand Down
4 changes: 4 additions & 0 deletions launcher_scripts/conf/fine_tuning/t5/custom_task.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ exp_manager:
save_best_model: True

model: # For different fine_tuning tasks, tuning the hyper parameters accordingly; below is only for MNLI
# use T5 model from megatron.core
mcore_t5: True
transformer_engine: False

restore_from_path: ${fine_tuning.run.convert_dir}/results/megatron_t5.nemo # Path to a trained T5 .nemo file
pretrained_checkpoint:
checkpoint_dir: null # Path to a folder that contains a .ckpt file
Expand Down
4 changes: 4 additions & 0 deletions launcher_scripts/conf/fine_tuning/t5/squad.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ exp_manager:
save_best_model: True

model: # For different fine_tuning tasks, tuning the hyper parameters accordingly; below is only for MNLI
# use T5 model from megatron.core
mcore_t5: True
transformer_engine: False

restore_from_path: ${fine_tuning.run.convert_dir}/results/megatron_t5.nemo # Path to a trained T5 .nemo file
pretrained_checkpoint:
checkpoint_dir: null # Path to a folder that contains a .ckpt file
Expand Down
4 changes: 4 additions & 0 deletions launcher_scripts/conf/peft/t5/squad.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ exp_manager:
strict: False # Should be False to avoid a runtime error where EarlyStopping says monitor is unavailable, which sometimes happens with resumed training.

model:
# use T5 model from megatron.core
mcore_t5: True
transformer_engine: False

seed: 1234
tensor_model_parallel_size: 1 # intra-layer model parallelism
pipeline_model_parallel_size: 1 # inter-layer model parallelism
Expand Down
4 changes: 4 additions & 0 deletions launcher_scripts/conf/training/mt5/11b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ exp_manager:
buffer_size: 5

model:
# use MT5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 24
global_batch_size: 1920 # will use more micro batches to reach global batch size
Expand Down
4 changes: 4 additions & 0 deletions launcher_scripts/conf/training/mt5/170m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ exp_manager:
buffer_size: 5

model:
# use MT5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 64
global_batch_size: 2048 # will use more micro batches to reach global batch size
Expand Down
4 changes: 4 additions & 0 deletions launcher_scripts/conf/training/mt5/23b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ exp_manager:
buffer_size: 5

model:
# use MT5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 8
global_batch_size: 1920 # will use more micro batches to reach global batch size
Expand Down
4 changes: 4 additions & 0 deletions launcher_scripts/conf/training/mt5/390m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ exp_manager:
buffer_size: 5

model:
# use MT5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 32
global_batch_size: 2048 # will use more micro batches to reach global batch size
Expand Down
4 changes: 4 additions & 0 deletions launcher_scripts/conf/training/mt5/3b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ exp_manager:
buffer_size: 5

model:
# use MT5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 24
global_batch_size: 1920 # will use more micro batches to reach global batch size
Expand Down
4 changes: 4 additions & 0 deletions launcher_scripts/conf/training/t5/11b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ exp_manager:
buffer_size: 5

model:
# use T5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 24
global_batch_size: 1920 # will use more micro batches to reach global batch size
Expand Down
4 changes: 4 additions & 0 deletions launcher_scripts/conf/training/t5/220m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ exp_manager:
buffer_size: 5

model:
# use T5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 64
global_batch_size: 2048 # will use more micro batches to reach global batch size
Expand Down
4 changes: 4 additions & 0 deletions launcher_scripts/conf/training/t5/23b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ exp_manager:
buffer_size: 5

model:
# use T5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 8
global_batch_size: 1920 # will use more micro batches to reach global batch size
Expand Down
4 changes: 4 additions & 0 deletions launcher_scripts/conf/training/t5/3b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ exp_manager:
buffer_size: 5

model:
# use T5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 24
global_batch_size: 1920 # will use more micro batches to reach global batch size
Expand Down
4 changes: 4 additions & 0 deletions launcher_scripts/conf/training/t5/41b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ exp_manager:
buffer_size: 5

model:
# use T5 model from megatron.core
mcore_t5: True
transformer_engine: False

# model parallelism
micro_batch_size: 6
global_batch_size: 1920 # will use more micro batches to reach global batch size
Expand Down

0 comments on commit b885571

Please sign in to comment.