fix conflict

NVIDIA · May 15, 2024 · 5b0d8b1 · 5b0d8b1
2 parents 7926abd + 14a1e41
commit 5b0d8b1
Show file tree

Hide file tree

Showing 75 changed files with 7,877 additions and 118 deletions.
diff --git a/.github/workflows/launcher.yml b/.github/workflows/launcher.yml
@@ -17,10 +17,8 @@ jobs:
 
       - name: install dependencies
         run: |
-          python -m pip install pytest
-          pip install -r requirements.txt
-          pip install requests-mock
+          python -m pip install pytest requests-mock -r requirements.txt
 
       - name: run unit tests
-        run: pytest
+        run: PYTHONPATH=$PWD pytest
         working-directory: ${{env.working-directory}}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -36,12 +36,12 @@ repos:
     hooks:
       - id: isort
         name: Format imports
+        args: ['--profile', 'black', '--line-length', '88']
         exclude: docs/
 
   - repo: https://github.com/psf/black
     rev: 19.10b0
     hooks:
       - id: black
         name: Format code
-        args: [--skip-string-normalization, --line-length=119]
         additional_dependencies: ['click==8.0.2']
diff --git a/README.md b/README.md
@@ -60,8 +60,8 @@ See the [Feature Matrix](https://docs.nvidia.com/nemo-framework/user-guide/lates
 The NeMo Framework Launcher should be installed on a head node or a local machine in a virtual python environment.
 
 ```bash
-git clone https://github.com/NVIDIA/NeMo-Megatron-Launcher.git
-cd NeMo-Megatron-Launcher
+git clone https://github.com/NVIDIA/NeMo-Framework-Launcher.git
+cd NeMo-Framework-Launcher
 pip install -r requirements.txt
 ```
 

diff --git a/auto_configurator/autoconfig/scripts/compare_throughput.py b/auto_configurator/autoconfig/scripts/compare_throughput.py
@@ -78,12 +78,12 @@ def main(cfg):
         gbs = model_cfg.get("global_batch_size")
         enc_seq_len = (
             model_cfg.get("encoder_seq_length")
-            if model_name in ("gpt3", "bert", "llama", "baichuan2", "chatglm")
+            if model_name in ("gpt3", "bert", "llama", "baichuan2", "chatglm", "qwen2")
             else model_cfg.get("seq_length")
         )
         dec_seq_len = data_cfg.get("seq_length_dec")
 
-        if model_name in ("gpt3", "bert", "llama", "baichuan2", "chatglm"):
+        if model_name in ("gpt3", "bert", "llama", "baichuan2", "chatglm", "qwen2"):
             hs = model_cfg.get("hidden_size")
             ffn_hs = None
             layers = model_cfg.get("num_layers")
@@ -249,7 +249,7 @@ def calculate_tflops(
     Bert Formula: 
         Model FLOPs = 72BLsh^2 * ( 1 + (s/6h) + (v/12hL))
     """
-    if model_name in ["gpt3", "llama", "baichuan2", "chatglm"]:
+    if model_name in ["gpt3", "llama", "baichuan2", "chatglm", "qwen2"]:
         # Model FLOPS calculation
         model_flops = (
             (

diff --git a/auto_configurator/autoconfig/search_config.py b/auto_configurator/autoconfig/search_config.py
@@ -20,7 +20,16 @@
 from autoconfig.inference_sweep import search_inference_config
 from autoconfig.training_config import search_training_config
 
-SUPPORTED_MODELS = ["gpt3", "t5", "mt5", "bert", "llama", "baichuan2", "chatglm"]
+SUPPORTED_MODELS = [
+    "gpt3",
+    "t5",
+    "mt5",
+    "bert",
+    "llama",
+    "baichuan2",
+    "chatglm",
+    "qwen2",
+]
 
 
 def search_config(

diff --git a/auto_configurator/autoconfig/training_config.py b/auto_configurator/autoconfig/training_config.py
@@ -81,17 +81,19 @@ def generate_grid_search_configs(
 
     # 2 * num_layers is needed because of encoder/decoder architecture.
     multiplier = (
-        1 if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm"] else 2
+        1
+        if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm", "qwen2"]
+        else 2
     )
 
     seq_length = base_cfg["model"]["data"]["seq_length"]
     num_layers = (
         base_cfg["model"]["num_layers"]
-        if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm"]
+        if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm", "qwen2"]
         else base_cfg["model"]["encoder"]["num_layers"]
     )
 
-    if model_name in ["gpt3", "bert", "llama"]:
+    if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm", "qwen2"]:
         act_method = base_cfg["model"].get("activations_checkpoint_method", "None")
     else:
         act_method = base_cfg["model"]["encoder"].get(
@@ -126,7 +128,14 @@ def generate_grid_search_configs(
                     base_cfg["trainer"]["num_nodes"] * base_cfg["trainer"]["devices"]
                 )
                 gbs = base_cfg["model"]["global_batch_size"]
-                if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm"]:
+                if model_name in [
+                    "gpt3",
+                    "bert",
+                    "llama",
+                    "baichuan2",
+                    "chatglm",
+                    "qwen2",
+                ]:
                     att_heads = base_cfg["model"]["num_attention_heads"]
                     num_layers = base_cfg["model"]["num_layers"]
                 else:
@@ -222,7 +231,8 @@ def _set_activations_checkpoint_params(
     max_layers_per_pipe = num_layers
     interval_layers_per_pipe = act_multiple
     if (
-        model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm"] and pp > 2
+        model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm", "qwen2"]
+        and pp > 2
     ):  # Interleaved pipeline scheduling.
         virtual_pipelines = (
             num_layers // pp
@@ -246,7 +256,14 @@ def _set_activations_checkpoint_params(
                 0, multiplier * num_layers // pp // virtual_pipelines + 1, act_multiple
             )
 
-        if pp > 1 and model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm"]:
+        if pp > 1 and model_name in [
+            "gpt3",
+            "bert",
+            "llama",
+            "baichuan2",
+            "chatglm",
+            "qwen2",
+        ]:
             # Num micro batches with partial act ckpt
             num_micro_batches_partial_act_ckpt = list(
                 range(min_micro_b, max_micro_b + 1, interval_micro_b)
@@ -824,14 +841,18 @@ def _calculate_tp_pp_mbs_grid(
     gpu_memory_gb = train_cfg.get("gpu_memory_gb")
 
     multiplier = (
-        1 if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm"] else 2
+        1
+        if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm", "qwen2"]
+        else 2
+    )
+    init_pp = (
+        [] if model_name in ["gpt3", "llama", "baichuan2", "chatglm", "qwen2"] else [1]
     )
-    init_pp = [] if model_name in ["gpt3", "llama", "baichuan2", "chatglm"] else [1]
     valid_pp = init_pp + [
         multiplier * x for x in range(1, num_layers + 1) if num_layers % x == 0
     ]  # Only divisors of num_layers are possible.
 
-    if model_name in ["gpt3", "llama", "baichuan2", "chatglm"]:
+    if model_name in ["gpt3", "llama", "baichuan2", "chatglm", "qwen2"]:
         if gpu_memory_gb == 80:
             (
                 tp,

diff --git a/auto_configurator/autoconfig/utils.py b/auto_configurator/autoconfig/utils.py
@@ -45,7 +45,7 @@ def _calculate_model_size(
     :rtype: float
     :raises NotImplementedError: if the model name is not valid.
     """
-    if model_name in ["gpt3", "llama", "baichuan2", "chatglm"]:
+    if model_name in ["gpt3", "llama", "baichuan2", "chatglm", "qwen2"]:
         model_size = (
             12
             * num_layers
@@ -113,7 +113,7 @@ def calculate_model_size_params(
     :raises NotImplementedError: if the model name is not supported.
     """
     ffn, kv = None, None  # Only needed for some models.
-    if model_name in ["gpt3", "llama", "baichuan2", "chatglm"]:
+    if model_name in ["gpt3", "llama", "baichuan2", "chatglm", "qwen2"]:
         if model_size_in_b < 0.25:
             hs, att_h, lr = 768, 12, 6e-4
         elif model_size_in_b < 0.5:
@@ -395,7 +395,7 @@ def modify_cfg(
     """
     new_cfg = copy.deepcopy(base_cfg)
     if act is not None:
-        if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm"]:
+        if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm", "qwen2"]:
             new_cfg["model"]["activations_checkpoint_num_layers"] = act
         else:
             new_cfg["model"]["encoder"]["activations_checkpoint_num_layers"] = act // 2
@@ -407,6 +407,7 @@ def modify_cfg(
         "llama",
         "baichuan2",
         "chatglm",
+        "qwen2",
     ]:
         new_cfg["model"][
             "num_micro_batches_with_partial_activation_checkpoints"
@@ -418,6 +419,7 @@ def modify_cfg(
         "llama",
         "baichuan2",
         "chatglm",
+        "qwen2",
     ]:
         new_cfg["model"]["activations_checkpoint_layers_per_pipeline"] = act_per_pipe
 
@@ -427,14 +429,15 @@ def modify_cfg(
         "llama",
         "baichuan2",
         "chatglm",
+        "qwen2",
     ]:
         new_cfg["model"]["virtual_pipeline_model_parallel_size"] = virtual_pipelines
 
     new_cfg["model"]["tensor_model_parallel_size"] = tp
     new_cfg["model"]["pipeline_model_parallel_size"] = pp
     new_cfg["model"]["micro_batch_size"] = mbs
 
-    if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm"]:
+    if model_name in ["gpt3", "bert", "llama", "baichuan2", "chatglm", "qwen2"]:
         att_heads = new_cfg["model"]["num_attention_heads"]
         num_layers = new_cfg["model"]["num_layers"]
     else: