Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup required_torch_version code and references. #5370

Merged
merged 12 commits into from
Apr 10, 2024
28 changes: 4 additions & 24 deletions deepspeed/comm/torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .backend import *
from .comm import *
from ..runtime import compiler
from deepspeed.utils.torch import required_torch_version
import os

DS_COMM_ALL_GATHER_OFF = False
Expand All @@ -18,40 +19,19 @@
DS_COMM_REDUCE_OFF = False


def is_torch_ver_eq_2_0():
TORCH_MAJOR, TORCH_MINOR = map(int, torch.__version__.split('.')[:2])
if TORCH_MAJOR == 2 and TORCH_MINOR == 0:
return True
return False


def is_torch_ver_ge_2_1():
TORCH_MAJOR, TORCH_MINOR = map(int, torch.__version__.split('.')[:2])
if TORCH_MAJOR >= 2 and TORCH_MINOR >= 1:
return True
return False


def torch_ver_ge_1_13():
TORCH_MAJOR, TORCH_MINOR = map(int, torch.__version__.split('.')[:2])
if TORCH_MAJOR >= 1 and TORCH_MINOR >= 13:
return True
return False


def has_coalescing_manager():
has_c10d = hasattr(torch.distributed, 'distributed_c10d')
return has_c10d and hasattr(torch.distributed.distributed_c10d, '_coalescing_manager')


def has_all_reduce_coalesced():
return hasattr(torch.distributed, "all_reduce_coalesced") and torch_ver_ge_1_13()
return hasattr(torch.distributed, "all_reduce_coalesced") and required_torch_version(min_version=1.13)


def get_coalescing_manager(group, device, reqs, async_op):
if is_torch_ver_eq_2_0():
if required_torch_version(min_version=2.0, max_version=2.0):
return torch.distributed.distributed_c10d._coalescing_manager(group, device=device, reqs=reqs)
elif is_torch_ver_ge_2_1():
elif required_torch_version(min_version=2.1):
return torch.distributed.distributed_c10d._coalescing_manager(group, device=device, async_ops=async_op)
else:
return torch.distributed.distributed_c10d._coalescing_manager(group, reqs)
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/elasticity/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

# DeepSpeed Team

from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version


def is_torch_elastic_compatible():
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/comm/nccl.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import numpy as np

from deepspeed.runtime.compression.cupy import CupyBackend
from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version
from deepspeed.accelerator import get_accelerator


Expand Down
3 changes: 2 additions & 1 deletion deepspeed/runtime/fp16/fused_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors

from deepspeed.runtime.base_optimizer import DeepSpeedOptimizer
from deepspeed.runtime.utils import get_global_norm, get_grad_norm, CheckOverflow, get_weight_norm, required_torch_version, get_norm_with_moe_layers
from deepspeed.runtime.utils import get_global_norm, get_grad_norm, CheckOverflow, get_weight_norm, get_norm_with_moe_layers
from deepspeed.runtime.fp16.loss_scaler import INITIAL_LOSS_SCALE, SCALE_WINDOW, MIN_LOSS_SCALE
from deepspeed.utils import logger, log_dist
from deepspeed.utils.torch import required_torch_version
from deepspeed.checkpoint.constants import OPTIMIZER_STATE_DICT, CLIP_GRAD
from deepspeed.accelerator import get_accelerator
from deepspeed.moe.utils import is_moe_param_group
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/fp16/onebit/adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import torch
import numpy as np
from deepspeed.accelerator import get_accelerator
from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version
from deepspeed import comm as dist


Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/fp16/onebit/lamb.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import torch
import numpy as np
from deepspeed import comm as dist
from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
from deepspeed.accelerator import get_accelerator

Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/fp16/onebit/zoadam.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import torch
import numpy as np
from deepspeed.accelerator import get_accelerator
from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version
from deepspeed import comm as dist


Expand Down
3 changes: 2 additions & 1 deletion deepspeed/runtime/fp16/unfused_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
from torch._utils import _flatten_dense_tensors

from deepspeed.runtime.base_optimizer import DeepSpeedOptimizer
from deepspeed.runtime.utils import get_global_norm, CheckOverflow, get_weight_norm, required_torch_version
from deepspeed.runtime.utils import get_global_norm, CheckOverflow, get_weight_norm
from deepspeed.runtime.fp16.loss_scaler import INITIAL_LOSS_SCALE, SCALE_WINDOW, MIN_LOSS_SCALE
from deepspeed.utils import logger
from deepspeed.utils.torch import required_torch_version
from deepspeed.checkpoint.constants import OPTIMIZER_STATE_DICT
from deepspeed.accelerator import get_accelerator
from deepspeed import comm as dist
Expand Down
8 changes: 2 additions & 6 deletions deepspeed/runtime/pipe/p2p.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@
import torch
from deepspeed import comm as dist

# To query whether we have send/recv support
from packaging.version import Version
from deepspeed.git_version_info import torch_info
from deepspeed.utils.torch import required_torch_version
from deepspeed.accelerator import get_accelerator

_groups = None
Expand All @@ -21,9 +19,7 @@


def can_send_recv() -> bool:
torch_version = Version(torch_info['version'])
sendrecv_min = Version('1.8')
return torch_version >= sendrecv_min
return required_torch_version(min_version=1.8)


#initializes adjacent process groups
Expand Down
15 changes: 0 additions & 15 deletions deepspeed/runtime/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import psutil
import gc
from math import sqrt
from packaging import version as pkg_version

import torch
from deepspeed import comm as dist
Expand Down Expand Up @@ -1036,20 +1035,6 @@ def get_inactive_params(param_list):
param.ds_status == ZeroParamStatus.NOT_AVAILABLE)]


def required_torch_version(min_version=None, max_version=None):
assert min_version or max_version, "Must provide a min_version or max_version argument"

torch_version = pkg_version.parse(torch.__version__)

if min_version and pkg_version.parse(str(min_version)) > torch_version:
return False

if max_version and pkg_version.parse(str(max_version)) < torch_version:
return False

return True


def get_norm_with_moe_layers(non_expert_norm, mpu, expert_tensors, norm_type=2):
""" Compute the global norm with MoE experts

Expand Down
22 changes: 22 additions & 0 deletions deepspeed/utils/torch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0

# DeepSpeed Team

from packaging import version as pkg_version

import torch


def required_torch_version(min_version=None, max_version=None):
assert min_version or max_version, "Must provide a min_version or max_version argument"

torch_version = pkg_version.parse(torch.__version__)

if min_version and pkg_version.parse(str(min_version)) > torch_version:
return False

if max_version and pkg_version.parse(str(max_version)) < torch_version:
return False

return True
2 changes: 1 addition & 1 deletion tests/unit/alexnet_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import deepspeed
import deepspeed.comm as dist
import deepspeed.runtime.utils as ds_utils
from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version
from deepspeed.accelerator import get_accelerator
from deepspeed.runtime.pipe.module import PipelineModule, LayerSpec

Expand Down
2 changes: 1 addition & 1 deletion tests/unit/checkpoint/test_mics_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import deepspeed

from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version
from unit.common import DistributedTest
from unit.simple_model import *
from unit.checkpoint.common import *
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/checkpoint/test_moe_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# DeepSpeed Team

from deepspeed.moe.utils import split_params_into_different_moe_groups_for_optimizer
from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version

from unit.common import DistributedTest
from unit.simple_model import *
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/checkpoint/test_universal_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from types import SimpleNamespace
from torch.utils._pytree import tree_map

from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version
from deepspeed.checkpoint import UNIVERSAL_CHECKPOINT_INFO
from deepspeed.checkpoint.ds_to_universal import main as convert_to_universal

Expand Down
2 changes: 1 addition & 1 deletion tests/unit/checkpoint/test_zero_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from deepspeed.ops.op_builder import CPUAdamBuilder
from deepspeed.checkpoint.utils import clone_tensors_for_torch_save, get_model_ckpt_name_for_rank
from deepspeed.accelerator import get_accelerator
from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version

from unit.common import DistributedTest, DistributedFixture
from unit.simple_model import *
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/compression/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from deepspeed.compression.basic_layer import LinearLayer_Compress, ColumnParallelLinear_Compress, RowParallelLinear_Compress
from deepspeed.compression.helper import convert_conv1d_to_linear
from deepspeed.accelerator import get_accelerator
from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version
from unit.common import DistributedTest

pytestmark = pytest.mark.skipif(not required_torch_version(min_version=1.5),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from deepspeed.inference.quantization.quantization import _init_group_wise_weight_quantization
from deepspeed.inference.quantization.utils import Quantizer, DeQuantizer
from deepspeed.inference.quantization.layers import QuantizedLinear
from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version
from transformers.models.opt.modeling_opt import OPTDecoderLayer
from transformers import AutoConfig, OPTConfig, AutoModel
import pytest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from deepspeed.accelerator import get_accelerator
from unit.common import DistributedTest, DistributedFixture
from unit.megatron_model import get_gpt2_model, get_megatron_version
from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version

pytestmark = pytest.mark.skipif(not required_torch_version(min_version=1.5, max_version=1.13),
reason='Megatron-LM package requires Pytorch version >=1.5 and <=1.13')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from unit.megatron_model import MockGPT2ModelPipe as GPT2ModelPipe
from deepspeed.utils import RepeatingLoader
from deepspeed.accelerator import get_accelerator
from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version

pytestmark = pytest.mark.skipif(not required_torch_version(min_version=1.5, max_version=1.13),
reason='Megatron-LM package requires Pytorch version >=1.5 and <=1.13')
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/moe/test_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from deepspeed import get_accelerator
from deepspeed.moe.sharded_moe import top1gating
from deepspeed.moe.utils import split_params_into_different_moe_groups_for_optimizer, is_moe_param
from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version


@pytest.mark.parametrize("zero_stage", [0, 1, 2])
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/moe/test_moe_tp.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import deepspeed
import pytest
from unit.common import DistributedTest
from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version
from deepspeed.moe.layer import MoE


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from deepspeed.profiling.flops_profiler import get_model_profile
from unit.simple_model import SimpleModel, random_dataloader
from unit.common import DistributedTest
from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version
from deepspeed.accelerator import get_accelerator

if torch.half not in get_accelerator().supported_dtypes():
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/runtime/compile/test_compile_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import deepspeed
from deepspeed.accelerator import get_accelerator
from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version

from unit.common import DistributedTest

Expand Down
2 changes: 1 addition & 1 deletion tests/unit/runtime/compile/test_compile_zero.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import torch

from deepspeed.runtime.zero.offload_config import OffloadDeviceEnum
from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version
from deepspeed.accelerator import get_accelerator

from unit.runtime.compile.util import compare_loss
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/runtime/compile/test_load_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from unit.simple_model import SimpleModel
import deepspeed
from deepspeed.accelerator import get_accelerator
from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version

from unit.common import DistributedTest

Expand Down
2 changes: 1 addition & 1 deletion tests/unit/runtime/half_precision/onebit/test_onebit.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from unit.common import DistributedTest
from unit.simple_model import SimpleModel, random_dataloader
from unit.alexnet_model import AlexNetPipe, train_cifar
from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version
from deepspeed.accelerator import get_accelerator

PipeTopo = PipeDataParallelTopology
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/runtime/half_precision/test_fp16.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from deepspeed.ops.adam import FusedAdam
from unit.common import DistributedTest
from unit.simple_model import SimpleModel, SimpleOptimizer, random_dataloader, SimpleMoEModel, sequence_dataloader
from deepspeed.runtime.utils import required_torch_version
from deepspeed.utils.torch import required_torch_version
from deepspeed.accelerator import get_accelerator
from deepspeed.ops.op_builder import CPUAdamBuilder
from deepspeed.moe.utils import split_params_into_different_moe_groups_for_optimizer
Expand Down
3 changes: 2 additions & 1 deletion tests/unit/runtime/test_ds_initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
from deepspeed.ops.adam import FusedAdam
from deepspeed.runtime.lr_schedules import WARMUP_LR, WarmupLR
from deepspeed.runtime.config import ADAM_OPTIMIZER
from deepspeed.runtime.utils import see_memory_usage, required_torch_version
from deepspeed.runtime.utils import see_memory_usage
from deepspeed.utils.torch import required_torch_version
from deepspeed.accelerator import get_accelerator


Expand Down
15 changes: 0 additions & 15 deletions tests/unit/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import torch
from deepspeed.accelerator import get_accelerator, is_current_accelerator_supported
from deepspeed.git_version_info import torch_info
from packaging import version as pkg_version


def skip_on_arch(min_arch=7):
Expand Down Expand Up @@ -62,20 +61,6 @@ def bf16_required_version_check(accelerator_check=True):
return False


def required_torch_version(min_version=None, max_version=None):
assert min_version or max_version, "Must provide a min_version or max_version argument"

torch_version = pkg_version.parse(torch.__version__)

if min_version and pkg_version.parse(str(min_version)) > torch_version:
return False

if max_version and pkg_version.parse(str(max_version)) < torch_version:
return False

return True


def required_amp_check():
from importlib.util import find_spec
if find_spec('apex') is None:
Expand Down
Loading