Skip to content

Commit

Permalink
fix conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
JRD971000 committed May 15, 2024
2 parents 7926abd + 14a1e41 commit 5b0d8b1
Show file tree
Hide file tree
Showing 75 changed files with 7,877 additions and 118 deletions.
6 changes: 2 additions & 4 deletions .github/workflows/launcher.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,8 @@ jobs:

- name: install dependencies
run: |
python -m pip install pytest
pip install -r requirements.txt
pip install requests-mock
python -m pip install pytest requests-mock -r requirements.txt
- name: run unit tests
run: pytest
run: PYTHONPATH=$PWD pytest
working-directory: ${{env.working-directory}}
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ repos:
hooks:
- id: isort
name: Format imports
args: ['--profile', 'black', '--line-length', '88']
exclude: docs/

- repo: https://github.com/psf/black
rev: 19.10b0
hooks:
- id: black
name: Format code
args: [--skip-string-normalization, --line-length=119]
additional_dependencies: ['click==8.0.2']
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ See the [Feature Matrix](https://docs.nvidia.com/nemo-framework/user-guide/lates
The NeMo Framework Launcher should be installed on a head node or a local machine in a virtual python environment.

```bash
git clone https://github.com/NVIDIA/NeMo-Megatron-Launcher.git
cd NeMo-Megatron-Launcher
git clone https://github.com/NVIDIA/NeMo-Framework-Launcher.git
cd NeMo-Framework-Launcher
pip install -r requirements.txt
```

Expand Down
6 changes: 3 additions & 3 deletions auto_configurator/autoconfig/scripts/compare_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,12 @@ def main(cfg):
gbs = model_cfg.get("global_batch_size")
enc_seq_len = (
model_cfg.get("encoder_seq_length")
if model_name in ("gpt3", "bert", "llama", "baichuan2", "chatglm")
if model_name in ("gpt3", "bert", "llama", "baichuan2", "chatglm", "qwen2")
else model_cfg.get("seq_length")
)
dec_seq_len = data_cfg.get("seq_length_dec")

if model_name in ("gpt3", "bert", "llama", "baichuan2", "chatglm"):
if model_name in ("gpt3", "bert", "llama", "baichuan2", "chatglm", "qwen2"):
hs = model_cfg.get("hidden_size")
ffn_hs = None
layers = model_cfg.get("num_layers")
Expand Down Expand Up @@ -249,7 +249,7 @@ def calculate_tflops(
Bert Formula:
Model FLOPs = 72BLsh^2 * ( 1 + (s/6h) + (v/12hL))
"""
if model_name in ["gpt3", "llama", "baichuan2", "chatglm"]:
if model_name in ["gpt3", "llama", "baichuan2", "chatglm", "qwen2"]:
# Model FLOPS calculation
model_flops = (
(
Expand Down
11 changes: 10 additions & 1 deletion auto_configurator/autoconfig/search_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,16 @@
from autoconfig.inference_sweep import search_inference_config
from autoconfig.training_config import search_training_config

SUPPORTED_MODELS = ["gpt3", "t5", "mt5", "bert", "llama", "baichuan2", "chatglm"]
SUPPORTED_MODELS = [
"gpt3",
"t5",
"mt5",
"bert",
"llama",
"baichuan2",
"chatglm",
"qwen2",
]


def search_config(
Expand Down
39 changes: 30 additions & 9 deletions auto_configurator/autoconfig/training_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,17 +81,19 @@ def generate_grid_search_configs(

# 2 * num_layers is needed because of encoder/decoder architecture.
multiplier = (
1 if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm"] else 2
1
if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm", "qwen2"]
else 2
)

seq_length = base_cfg["model"]["data"]["seq_length"]
num_layers = (
base_cfg["model"]["num_layers"]
if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm"]
if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm", "qwen2"]
else base_cfg["model"]["encoder"]["num_layers"]
)

if model_name in ["gpt3", "bert", "llama"]:
if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm", "qwen2"]:
act_method = base_cfg["model"].get("activations_checkpoint_method", "None")
else:
act_method = base_cfg["model"]["encoder"].get(
Expand Down Expand Up @@ -126,7 +128,14 @@ def generate_grid_search_configs(
base_cfg["trainer"]["num_nodes"] * base_cfg["trainer"]["devices"]
)
gbs = base_cfg["model"]["global_batch_size"]
if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm"]:
if model_name in [
"gpt3",
"bert",
"llama",
"baichuan2",
"chatglm",
"qwen2",
]:
att_heads = base_cfg["model"]["num_attention_heads"]
num_layers = base_cfg["model"]["num_layers"]
else:
Expand Down Expand Up @@ -222,7 +231,8 @@ def _set_activations_checkpoint_params(
max_layers_per_pipe = num_layers
interval_layers_per_pipe = act_multiple
if (
model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm"] and pp > 2
model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm", "qwen2"]
and pp > 2
): # Interleaved pipeline scheduling.
virtual_pipelines = (
num_layers // pp
Expand All @@ -246,7 +256,14 @@ def _set_activations_checkpoint_params(
0, multiplier * num_layers // pp // virtual_pipelines + 1, act_multiple
)

if pp > 1 and model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm"]:
if pp > 1 and model_name in [
"gpt3",
"bert",
"llama",
"baichuan2",
"chatglm",
"qwen2",
]:
# Num micro batches with partial act ckpt
num_micro_batches_partial_act_ckpt = list(
range(min_micro_b, max_micro_b + 1, interval_micro_b)
Expand Down Expand Up @@ -824,14 +841,18 @@ def _calculate_tp_pp_mbs_grid(
gpu_memory_gb = train_cfg.get("gpu_memory_gb")

multiplier = (
1 if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm"] else 2
1
if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm", "qwen2"]
else 2
)
init_pp = (
[] if model_name in ["gpt3", "llama", "baichuan2", "chatglm", "qwen2"] else [1]
)
init_pp = [] if model_name in ["gpt3", "llama", "baichuan2", "chatglm"] else [1]
valid_pp = init_pp + [
multiplier * x for x in range(1, num_layers + 1) if num_layers % x == 0
] # Only divisors of num_layers are possible.

if model_name in ["gpt3", "llama", "baichuan2", "chatglm"]:
if model_name in ["gpt3", "llama", "baichuan2", "chatglm", "qwen2"]:
if gpu_memory_gb == 80:
(
tp,
Expand Down
11 changes: 7 additions & 4 deletions auto_configurator/autoconfig/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def _calculate_model_size(
:rtype: float
:raises NotImplementedError: if the model name is not valid.
"""
if model_name in ["gpt3", "llama", "baichuan2", "chatglm"]:
if model_name in ["gpt3", "llama", "baichuan2", "chatglm", "qwen2"]:
model_size = (
12
* num_layers
Expand Down Expand Up @@ -113,7 +113,7 @@ def calculate_model_size_params(
:raises NotImplementedError: if the model name is not supported.
"""
ffn, kv = None, None # Only needed for some models.
if model_name in ["gpt3", "llama", "baichuan2", "chatglm"]:
if model_name in ["gpt3", "llama", "baichuan2", "chatglm", "qwen2"]:
if model_size_in_b < 0.25:
hs, att_h, lr = 768, 12, 6e-4
elif model_size_in_b < 0.5:
Expand Down Expand Up @@ -395,7 +395,7 @@ def modify_cfg(
"""
new_cfg = copy.deepcopy(base_cfg)
if act is not None:
if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm"]:
if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm", "qwen2"]:
new_cfg["model"]["activations_checkpoint_num_layers"] = act
else:
new_cfg["model"]["encoder"]["activations_checkpoint_num_layers"] = act // 2
Expand All @@ -407,6 +407,7 @@ def modify_cfg(
"llama",
"baichuan2",
"chatglm",
"qwen2",
]:
new_cfg["model"][
"num_micro_batches_with_partial_activation_checkpoints"
Expand All @@ -418,6 +419,7 @@ def modify_cfg(
"llama",
"baichuan2",
"chatglm",
"qwen2",
]:
new_cfg["model"]["activations_checkpoint_layers_per_pipeline"] = act_per_pipe

Expand All @@ -427,14 +429,15 @@ def modify_cfg(
"llama",
"baichuan2",
"chatglm",
"qwen2",
]:
new_cfg["model"]["virtual_pipeline_model_parallel_size"] = virtual_pipelines

new_cfg["model"]["tensor_model_parallel_size"] = tp
new_cfg["model"]["pipeline_model_parallel_size"] = pp
new_cfg["model"]["micro_batch_size"] = mbs

if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm"]:
if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm", "qwen2"]:
att_heads = new_cfg["model"]["num_attention_heads"]
num_layers = new_cfg["model"]["num_layers"]
else:
Expand Down
Loading

0 comments on commit 5b0d8b1

Please sign in to comment.