turn off ddp_overlap for non-O2 usage and add synthetic data argument (…

…#396)
NVIDIA · Aug 1, 2024 · 8325688 · 8325688
1 parent 6a76374
commit 8325688
Show file tree

Hide file tree

Showing 6 changed files with 28 additions and 32 deletions.
diff --git a/launcher_scripts/conf/training/stable_diffusion/860m_res_256_pretrain.yaml b/launcher_scripts/conf/training/stable_diffusion/860m_res_256_pretrain.yaml
@@ -165,7 +165,7 @@ model:
   resume_from_checkpoint: null # manually set the checkpoint file to load from
   apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this
   gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
-  ddp_overlap: True # True for using PyTorch DDP overlap.
+  ddp_overlap: False # True for using PyTorch DDP overlap.
 
   optim:
     name: fused_adam
@@ -189,6 +189,7 @@ model:
 
   data:
       num_workers: 16
+      synthetic_data: False
       train:
           dataset_path:
             - ${data_dir}/your_dataset/wdinfo.pkl

diff --git a/launcher_scripts/conf/training/stable_diffusion/860m_res_256_v2_0_pretrain.yaml b/launcher_scripts/conf/training/stable_diffusion/860m_res_256_v2_0_pretrain.yaml
@@ -148,26 +148,21 @@ model:
       target: torch.nn.Identity
 
   cond_stage_config:
-    _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenMegatronCLIPEmbedder
-    restore_from_path: /path/to/nemo_clip.nemo
-    device: cuda
-    freeze: True
-    layer: "penultimate"
-    #    For compatibility of history version that uses open clip model
-    #    _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenOpenCLIPEmbedder
-    #    arch: ViT-H-14
-    #    version: laion2b_s32b_b79k
-    #    device: cuda
-    #    max_length: 77
-    #    freeze: True
-    #    layer: "penultimate"
+        For compatibility of history version that uses open clip model
+        _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenOpenCLIPEmbedder
+        arch: ViT-H-14
+        version: laion2b_s32b_b79k
+        device: cuda
+        max_length: 77
+        freeze: True
+        layer: "penultimate"
 
   # miscellaneous
   seed: 666
   resume_from_checkpoint: null # manually set the checkpoint file to load from
   apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this
   gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
-  ddp_overlap: True # True for using PyTorch DDP overlap.
+  ddp_overlap: False # True for using PyTorch DDP overlap.
 
   optim:
     name: fused_adam
@@ -191,6 +186,7 @@ model:
 
   data:
       num_workers: 16
+      synthetic_data: False
       train:
           dataset_path:
             - ${data_dir}/your_dataset/wdinfo.pkl

diff --git a/launcher_scripts/conf/training/stable_diffusion/860m_res_512_v1_1.yaml b/launcher_scripts/conf/training/stable_diffusion/860m_res_512_v1_1.yaml
@@ -164,7 +164,7 @@ model:
   resume_from_checkpoint: null # manually set the checkpoint file to load from
   apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this
   gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
-  ddp_overlap: True # True for using PyTorch DDP overlap.
+  ddp_overlap: False # True for using PyTorch DDP overlap.
 
   optim:
     name: fused_adam
@@ -188,6 +188,7 @@ model:
 
   data:
       num_workers: 16
+      synthetic_data: False
       train:
           dataset_path:
             - ${data_dir}/your_dataset/wdinfo.pkl

diff --git a/launcher_scripts/conf/training/stable_diffusion/860m_res_512_v1_2.yaml b/launcher_scripts/conf/training/stable_diffusion/860m_res_512_v1_2.yaml
@@ -164,7 +164,7 @@ model:
   resume_from_checkpoint: null # manually set the checkpoint file to load from
   apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this
   gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
-  ddp_overlap: True # True for using PyTorch DDP overlap.
+  ddp_overlap: False # True for using PyTorch DDP overlap.
 
   optim:
     name: fused_adam
@@ -188,6 +188,7 @@ model:
 
   data:
       num_workers: 16
+      synthetic_data: False
       train:
           dataset_path:
             - ${data_dir}/your_dataset/wdinfo.pkl

diff --git a/launcher_scripts/conf/training/stable_diffusion/860m_res_512_v1_5.yaml b/launcher_scripts/conf/training/stable_diffusion/860m_res_512_v1_5.yaml
@@ -164,7 +164,7 @@ model:
   resume_from_checkpoint: null # manually set the checkpoint file to load from
   apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this
   gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
-  ddp_overlap: True # True for using PyTorch DDP overlap.
+  ddp_overlap: False # True for using PyTorch DDP overlap.
 
   optim:
     name: fused_adam
@@ -188,6 +188,7 @@ model:
 
   data:
       num_workers: 16
+      synthetic_data: False
       train:
           dataset_path:
             - ${data_dir}/your_dataset/wdinfo.pkl

diff --git a/launcher_scripts/conf/training/stable_diffusion/860m_res_512_v2_0_base.yaml b/launcher_scripts/conf/training/stable_diffusion/860m_res_512_v2_0_base.yaml
@@ -148,26 +148,21 @@ model:
       target: torch.nn.Identity
 
   cond_stage_config:
-    _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenMegatronCLIPEmbedder
-    restore_from_path: /path/to/nemo_clip.nemo
-    device: cuda
-    freeze: True
-    layer: "penultimate"
-    #    For compatibility of history version that uses open clip model
-    #    _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenOpenCLIPEmbedder
-    #    arch: ViT-H-14
-    #    version: laion2b_s32b_b79k
-    #    device: cuda
-    #    max_length: 77
-    #    freeze: True
-    #    layer: "penultimate"
+        For compatibility of history version that uses open clip model
+        _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenOpenCLIPEmbedder
+        arch: ViT-H-14
+        version: laion2b_s32b_b79k
+        device: cuda
+        max_length: 77
+        freeze: True
+        layer: "penultimate"
 
   # miscellaneous
   seed: 666
   resume_from_checkpoint: null # manually set the checkpoint file to load from
   apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this
   gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
-  ddp_overlap: True # True for using PyTorch DDP overlap.
+  ddp_overlap: False # True for using PyTorch DDP overlap.
 
   optim:
     name: fused_adam
@@ -191,6 +186,7 @@ model:
 
   data:
       num_workers: 16
+      synthetic_data: False
       train:
           dataset_path:
             - ${data_dir}/your_dataset/wdinfo.pkl