[hotfix] fixed memory usage of shardformer module replacement (hpcait…

…ech#5122)
chengeharrison · Nov 28, 2023 · 126cf18 · 126cf18
1 parent 7b789f4
commit 126cf18
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 6 deletions.
diff --git a/colossalai/shardformer/layer/_operation.py b/colossalai/shardformer/layer/_operation.py
@@ -473,16 +473,17 @@ def forward(ctx, input_, dim, process_group):
  @staticmethod
  def backward(ctx, grad_output):
  return _split(grad_output, ctx.dim, ctx.process_group), None, None
- 
+
 
 class HookParameter(torch.autograd.Function):
  """In order to be hooked into Gemini's '__torch_function__', adding a view operation to weight and bias. Used in FusedLayerNorm"""
+
  @staticmethod
  def forward(ctx, input, weight, bias):
  ctx.save_for_backward(weight, bias)
  output = input
  return output
- 
+
  @staticmethod
  def backward(ctx, grad_output):
  weight, bias = ctx.saved_tensors
@@ -491,13 +492,12 @@ def backward(ctx, grad_output):
  if bias is not None:
  bias = bias.view(bias.shape)
  return grad_output, None, None
- 
+
 
 def hook_paramter_in_backward(input, weight=None, bias=None):
  return HookParameter.apply(input, weight, bias)
 
 
-
 def _reduce(input_, process_group):
  # skip if only one rank involved
  if dist.get_world_size(process_group) == 1:
@@ -522,7 +522,7 @@ def _split(input_, dim=-1, process_group=None):
 
  tensor_list = torch.split(input_, dim_size // world_size, dim=dim)
  rank = dist.get_rank(process_group)
- output = tensor_list[rank].contiguous()
+ output = tensor_list[rank].clone().contiguous()
 
  return output
 

diff --git a/colossalai/tensor/d_tensor/comm_spec.py b/colossalai/tensor/d_tensor/comm_spec.py
@@ -112,7 +112,7 @@ def _split(tensor: torch.Tensor, comm_spec: CommSpec):
  dim = comm_spec.shard_dim
  length = tensor.shape[comm_spec.shard_dim] // dist.get_world_size(process_group)
  start = length * dist.get_rank(process_group)
- output = torch.narrow(tensor, dim, start, length).contiguous()
+ output = torch.narrow(tensor, dim, start, length).clone().contiguous()
  return output