Add search path to gpt configs that points to nemo. Add missing + sig…

…n to override Signed-off-by: Jan Baczek <[email protected]>
NVIDIA · Aug 11, 2023 · 6dcc162 · 6dcc162
1 parent 14a9279
commit 6dcc162
Show file tree

Hide file tree

Showing 11 changed files with 41 additions and 1 deletion.
diff --git a/launcher_scripts/conf/training/gpt3/126m.yaml b/launcher_scripts/conf/training/gpt3/126m.yaml
@@ -1,3 +1,7 @@
+hydra:
+  searchpath:
+    - file:///opt/NeMo/examples/nlp/language_modeling/conf
+
 run:
   name: gpt3_126m
   results_dir: ${base_results_dir}/${.name}

diff --git a/launcher_scripts/conf/training/gpt3/175b.yaml b/launcher_scripts/conf/training/gpt3/175b.yaml
@@ -1,3 +1,7 @@
+hydra:
+  searchpath:
+    - file:///opt/NeMo/examples/nlp/language_modeling/conf
+
 run:
   name: gpt3_175b
   results_dir: ${base_results_dir}/${.name}

diff --git a/launcher_scripts/conf/training/gpt3/175b_performance.yaml b/launcher_scripts/conf/training/gpt3/175b_performance.yaml
@@ -1,6 +1,10 @@
 # The configurations below provide the best 175B training performance with the NeMo SW stack.
 # We have confirmed the model convergence only with a limited number of tokens and the full model
 # convergence (e.g., 300B tokens) is not guaranteed.
+hydra:
+  searchpath:
+    - file:///opt/NeMo/examples/nlp/language_modeling/conf
+
 run:
   name: gpt3_175b
   results_dir: ${base_results_dir}/${.name}

diff --git a/launcher_scripts/conf/training/gpt3/1b_improved.yaml b/launcher_scripts/conf/training/gpt3/1b_improved.yaml
@@ -1,3 +1,7 @@
+hydra:
+  searchpath:
+    - file:///opt/NeMo/examples/nlp/language_modeling/conf
+
 run:
   name: gpt_1b_improved
   results_dir: ${base_results_dir}/${.name}

diff --git a/launcher_scripts/conf/training/gpt3/20b.yaml b/launcher_scripts/conf/training/gpt3/20b.yaml
@@ -1,3 +1,7 @@
+hydra:
+  searchpath:
+    - file:///opt/NeMo/examples/nlp/language_modeling/conf
+
 run:
   name: gpt3_20b
   results_dir: ${base_results_dir}/${.name}

diff --git a/launcher_scripts/conf/training/gpt3/400m_improved.yaml b/launcher_scripts/conf/training/gpt3/400m_improved.yaml
@@ -1,3 +1,7 @@
+hydra:
+  searchpath:
+    - file:///opt/NeMo/examples/nlp/language_modeling/conf
+
 run:
   name: gpt_400m_improved
   results_dir: ${base_results_dir}/${.name}

diff --git a/launcher_scripts/conf/training/gpt3/40b.yaml b/launcher_scripts/conf/training/gpt3/40b.yaml
@@ -1,3 +1,7 @@
+hydra:
+  searchpath:
+    - file:///opt/NeMo/examples/nlp/language_modeling/conf
+
 run:
   name: gpt3_40b
   results_dir: ${base_results_dir}/${.name}

diff --git a/launcher_scripts/conf/training/gpt3/40b_improved.yaml b/launcher_scripts/conf/training/gpt3/40b_improved.yaml
@@ -1,3 +1,7 @@
+hydra:
+  searchpath:
+    - file:///opt/NeMo/examples/nlp/language_modeling/conf
+
 run:
   name: gpt_40b_improved
   results_dir: ${base_results_dir}/${.name}

diff --git a/launcher_scripts/conf/training/gpt3/5b.yaml b/launcher_scripts/conf/training/gpt3/5b.yaml
@@ -1,3 +1,7 @@
+hydra:
+  searchpath:
+    - file:///opt/NeMo/examples/nlp/language_modeling/conf
+
 run:
   name: gpt3_5b
   results_dir: ${base_results_dir}/${.name}

diff --git a/launcher_scripts/conf/training/gpt3/7b_improved.yaml b/launcher_scripts/conf/training/gpt3/7b_improved.yaml
@@ -1,3 +1,7 @@
+hydra:
+  searchpath:
+    - file:///opt/NeMo/examples/nlp/language_modeling/conf
+
 run:
   name: gpt_7b_improved
   results_dir: ${base_results_dir}/${.name}

diff --git a/launcher_scripts/nemo_launcher/core/stages.py b/launcher_scripts/nemo_launcher/core/stages.py
@@ -601,7 +601,7 @@ def _make_hydra_override(self) -> List:
             hydra_override += [f"model.data.data_prefix=\$({auto_blend_command})"]
         if self.stage_cfg.model.get("ub_tp_comm_overlap", False):
             ub_cfg_name = self._get_ub_cfg_override()
-            hydra_override += [f"'[email protected]_tp_comm_overlap_cfg={ub_cfg_name}'"]
+            hydra_override += [f"'+[email protected]_tp_comm_overlap_cfg={ub_cfg_name}'"]
         if self.stage_cfg.model.get("gc_interval", 0) > 1:
             gc_interval = min(self.stage_cfg.model.get("gc_interval"), self.cfg.training.trainer.get("val_check_interval"))
             hydra_override += [f"model.gc_interval={gc_interval}"]