Skip to content

Commit

Permalink
Merge branch 'master' into mrwyattii/pydantic-2-support
Browse files Browse the repository at this point in the history
  • Loading branch information
mrwyattii authored Mar 2, 2024
2 parents 43e6367 + 4578c24 commit 1e8ba21
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 13 deletions.
13 changes: 12 additions & 1 deletion .github/workflows/nv-mii.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@ name: nv-mii

on:
workflow_dispatch:
inputs:
mii_branch:
description: 'DeepSpeed-MII Branch'
required: false
default: 'main'
type: string
pull_request:
paths:
- '.github/workflows/nv-mii.yml'
Expand Down Expand Up @@ -55,7 +61,12 @@ jobs:
- name: MII unit tests
run: |
git clone https://github.com/microsoft/DeepSpeed-MII.git
BRANCH="main"
if [[ ! -z "${{ github.event.inputs.mii_branch }}" ]]; then
BRANCH="${{ github.event.inputs.mii_branch }}"
fi
echo "Cloning DeepSpeed-MII branch: $BRANCH"
git clone -b $BRANCH --depth=1 https://github.com/microsoft/DeepSpeed-MII.git
cd DeepSpeed-MII
pip install .[dev]
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
Expand Down
21 changes: 9 additions & 12 deletions deepspeed/runtime/zero/partition_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -1635,19 +1635,16 @@ def _partition_param_sec(self, param, buffer=None, has_been_updated=False):
secondary_end = secondary_start + secondary_partition_size

one_dim_param = param.contiguous().view(-1)
start = partition_size * self.rank
end = start + partition_size
if start < param.ds_numel and end <= param.ds_numel:
if secondary_start < param.ds_numel and secondary_end <= param.ds_numel:
sec_src_tensor = one_dim_param.narrow(0, secondary_start, secondary_partition_size)
param.ds_secondary_tensor.copy_(sec_src_tensor)

else:
if start < param.ds_numel:
elements_to_copy = param.ds_numel - start
elements_to_copy_sec = elements_to_copy * param.ds_secondary_tensor_num_of_groups
param.ds_secondary_tensor.narrow(0, 0, elements_to_copy_sec).copy_(
one_dim_param.narrow(0, secondary_start, elements_to_copy_sec))
# ds_numel is unpadded, so the last chunk of the secondary tensor might not be secondary_partition_size
sec_numel = param.ds_numel - secondary_start if secondary_end > param.ds_numel else secondary_partition_size

# copy from full tensor to secondary tensor
param.ds_secondary_tensor.narrow(0, 0,
sec_numel).copy_(one_dim_param.narrow(0, secondary_start, sec_numel))

# TODO: This is a temporary fix to avoid the issue that 2nd tensor all-gather happens before 2nd tensor partition is done
get_accelerator().current_stream().synchronize()

print_rank_0(f"{param.ds_id} partitioned type {param.dtype} dev {param.device} shape {param.shape}",
force=False)
Expand Down

0 comments on commit 1e8ba21

Please sign in to comment.