diff --git a/colossalai/shardformer/layer/_operation.py b/colossalai/shardformer/layer/_operation.py index 0d8c3d453ce1..8fd92a2ed6b8 100644 --- a/colossalai/shardformer/layer/_operation.py +++ b/colossalai/shardformer/layer/_operation.py @@ -473,16 +473,17 @@ def forward(ctx, input_, dim, process_group): @staticmethod def backward(ctx, grad_output): return _split(grad_output, ctx.dim, ctx.process_group), None, None - + class HookParameter(torch.autograd.Function): """In order to be hooked into Gemini's '__torch_function__', adding a view operation to weight and bias. Used in FusedLayerNorm""" + @staticmethod def forward(ctx, input, weight, bias): ctx.save_for_backward(weight, bias) output = input return output - + @staticmethod def backward(ctx, grad_output): weight, bias = ctx.saved_tensors @@ -491,13 +492,12 @@ def backward(ctx, grad_output): if bias is not None: bias = bias.view(bias.shape) return grad_output, None, None - + def hook_paramter_in_backward(input, weight=None, bias=None): return HookParameter.apply(input, weight, bias) - def _reduce(input_, process_group): # skip if only one rank involved if dist.get_world_size(process_group) == 1: @@ -522,7 +522,7 @@ def _split(input_, dim=-1, process_group=None): tensor_list = torch.split(input_, dim_size // world_size, dim=dim) rank = dist.get_rank(process_group) - output = tensor_list[rank].contiguous() + output = tensor_list[rank].clone().contiguous() return output diff --git a/colossalai/tensor/d_tensor/comm_spec.py b/colossalai/tensor/d_tensor/comm_spec.py index 8f5b52aab8f8..fc017c663a81 100644 --- a/colossalai/tensor/d_tensor/comm_spec.py +++ b/colossalai/tensor/d_tensor/comm_spec.py @@ -112,7 +112,7 @@ def _split(tensor: torch.Tensor, comm_spec: CommSpec): dim = comm_spec.shard_dim length = tensor.shape[comm_spec.shard_dim] // dist.get_world_size(process_group) start = length * dist.get_rank(process_group) - output = torch.narrow(tensor, dim, start, length).contiguous() + output = torch.narrow(tensor, dim, start, length).clone().contiguous() return output