automatically use SyncBatchNorm if doing distributed training

lucidrains · Nov 1, 2023 · 243151b · 243151b
1 parent dce5709
commit 243151b
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 3 deletions.
diff --git a/enformer_pytorch/modeling_enformer.py b/enformer_pytorch/modeling_enformer.py
@@ -4,6 +4,7 @@
 import torch
 from torch import nn, einsum
 import torch.nn.functional as F
+import torch.distributed as dist
 from torch.utils.checkpoint import checkpoint_sequential
 
 from einops import rearrange, reduce
@@ -53,6 +54,12 @@ def _round(x):
 def log(t, eps = 1e-20):
     return torch.log(t.clamp(min = eps))
 
+# maybe sync batchnorm, for distributed training
+
+def MaybeSyncBatchnorm(is_distributed = None):
+    is_distributed = default(is_distributed, dist.is_initialized() and dist.get_world_size() > 1)
+    return nn.SyncBatchNorm if is_distributed else nn.BatchNorm1d
+
 # losses and metrics
 
 def poisson_loss(pred, target):
@@ -204,9 +211,11 @@ def forward(self, x):
 
         return x[:, -trim:trim]
 
-def ConvBlock(dim, dim_out = None, kernel_size = 1):
+def ConvBlock(dim, dim_out = None, kernel_size = 1, is_distributed = None):
+    batchnorm_klass = MaybeSyncBatchnorm(is_distributed = is_distributed)
+
     return nn.Sequential(
-        nn.BatchNorm1d(dim),
+        batchnorm_klass(dim),
         GELU(),
         nn.Conv1d(dim, default(dim_out, dim), kernel_size, padding = kernel_size // 2)
     )

diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
   name = 'enformer-pytorch',
   packages = find_packages(exclude=[]),
   include_package_data = True,
-  version = '0.8.5',
+  version = '0.8.6',
   license='MIT',
   description = 'Enformer - Pytorch',
   author = 'Phil Wang',