From 896a58de15498c9a51af2b803e292352b9dae7a5 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Fri, 16 Jun 2023 16:38:23 +0200 Subject: [PATCH] Byebye pytorch 1.9 (#24080) byebye --------- Co-authored-by: ydshieh --- .../workflows/build-past-ci-docker-images.yml | 2 +- .../workflows/self-nightly-past-ci-caller.yml | 13 +----------- docker/transformers-past-gpu/Dockerfile | 2 +- .../bridgetower/modeling_bridgetower.py | 8 +------ src/transformers/models/vilt/modeling_vilt.py | 7 ------- src/transformers/pytorch_utils.py | 8 +------ src/transformers/trainer.py | 21 +++++++------------ src/transformers/utils/import_utils.py | 17 ++++----------- tests/models/bloom/test_modeling_bloom.py | 7 ------- .../bridgetower/test_modeling_bridgetower.py | 6 ------ tests/models/levit/test_modeling_levit.py | 6 ------ tests/models/tvlt/test_modeling_tvlt.py | 4 ---- tests/models/vilt/test_modeling_vilt.py | 6 ------ 13 files changed, 16 insertions(+), 91 deletions(-) diff --git a/.github/workflows/build-past-ci-docker-images.yml b/.github/workflows/build-past-ci-docker-images.yml index 18d88f2d52fa75..aa47dfd08c2d07 100644 --- a/.github/workflows/build-past-ci-docker-images.yml +++ b/.github/workflows/build-past-ci-docker-images.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - version: ["1.13", "1.12", "1.11", "1.10", "1.9"] + version: ["1.13", "1.12", "1.11", "1.10"] runs-on: ubuntu-latest steps: - diff --git a/.github/workflows/self-nightly-past-ci-caller.yml b/.github/workflows/self-nightly-past-ci-caller.yml index 25e711105fed2f..dfc258e5be856a 100644 --- a/.github/workflows/self-nightly-past-ci-caller.yml +++ b/.github/workflows/self-nightly-past-ci-caller.yml @@ -67,21 +67,10 @@ jobs: sha: ${{ github.sha }} secrets: inherit - run_past_ci_pytorch_1-9: - name: PyTorch 1.9 - if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))) - needs: [run_past_ci_pytorch_1-10] - uses: ./.github/workflows/self-past.yml - with: - framework: pytorch - version: "1.9" - sha: ${{ github.sha }} - secrets: inherit - run_past_ci_tensorflow_2-11: name: TensorFlow 2.11 if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) - needs: [run_past_ci_pytorch_1-9] + needs: [run_past_ci_pytorch_1-10] uses: ./.github/workflows/self-past.yml with: framework: tensorflow diff --git a/docker/transformers-past-gpu/Dockerfile b/docker/transformers-past-gpu/Dockerfile index 7b7f53c85899a4..0cdc9ff0712437 100644 --- a/docker/transformers-past-gpu/Dockerfile +++ b/docker/transformers-past-gpu/Dockerfile @@ -24,7 +24,7 @@ ARG FRAMEWORK ARG VERSION # Control `setuptools` version to avoid some issues -RUN [ "$VERSION" != "1.9" -a "$VERSION" != "1.10" ] && python3 -m pip install -U setuptools || python3 -m pip install -U "setuptools<=59.5" +RUN [ "$VERSION" != "1.10" ] && python3 -m pip install -U setuptools || python3 -m pip install -U "setuptools<=59.5" # Remove all frameworks RUN python3 -m pip uninstall -y torch torchvision torchaudio tensorflow jax flax diff --git a/src/transformers/models/bridgetower/modeling_bridgetower.py b/src/transformers/models/bridgetower/modeling_bridgetower.py index 2eee5c9fe56d6d..4290241fbc097d 100644 --- a/src/transformers/models/bridgetower/modeling_bridgetower.py +++ b/src/transformers/models/bridgetower/modeling_bridgetower.py @@ -33,19 +33,13 @@ SequenceClassifierOutput, ) from ...modeling_utils import PreTrainedModel, apply_chunking_to_forward -from ...pytorch_utils import find_pruneable_heads_and_indices, is_torch_greater_or_equal_than_1_10, prune_linear_layer +from ...pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer from ...utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings from .configuration_bridgetower import BridgeTowerConfig, BridgeTowerTextConfig, BridgeTowerVisionConfig logger = logging.get_logger(__name__) -if not is_torch_greater_or_equal_than_1_10: - logger.warning( - f"You are using torch=={torch.__version__}, but torch>=1.10.0 is required to use " - "BridgeTowerModel. Please upgrade torch." - ) - _CONFIG_FOR_DOC = "BridgeTowerConfig" _CHECKPOINT_FOR_DOC = "BridgeTower/bridgetower-base" _TOKENIZER_FOR_DOC = "RobertaTokenizer" diff --git a/src/transformers/models/vilt/modeling_vilt.py b/src/transformers/models/vilt/modeling_vilt.py index 19a1454c6e0b06..6ee1e396a625e3 100755 --- a/src/transformers/models/vilt/modeling_vilt.py +++ b/src/transformers/models/vilt/modeling_vilt.py @@ -36,7 +36,6 @@ from ...modeling_utils import PreTrainedModel from ...pytorch_utils import ( find_pruneable_heads_and_indices, - is_torch_greater_or_equal_than_1_10, meshgrid, prune_linear_layer, ) @@ -46,12 +45,6 @@ logger = logging.get_logger(__name__) -if not is_torch_greater_or_equal_than_1_10: - logger.warning( - f"You are using torch=={torch.__version__}, but torch>=1.10.0 is required to use " - "ViltModel. Please upgrade torch." - ) - _CONFIG_FOR_DOC = "ViltConfig" _CHECKPOINT_FOR_DOC = "dandelin/vilt-b32-mlm" diff --git a/src/transformers/pytorch_utils.py b/src/transformers/pytorch_utils.py index 3beaf31efa6db1..4723c43035e67c 100644 --- a/src/transformers/pytorch_utils.py +++ b/src/transformers/pytorch_utils.py @@ -31,7 +31,6 @@ is_torch_greater_or_equal_than_2_0 = parsed_torch_version_base >= version.parse("2.0") is_torch_greater_or_equal_than_1_12 = parsed_torch_version_base >= version.parse("1.12") is_torch_greater_or_equal_than_1_11 = parsed_torch_version_base >= version.parse("1.11") -is_torch_greater_or_equal_than_1_10 = parsed_torch_version_base >= version.parse("1.10") is_torch_less_than_1_11 = parsed_torch_version_base < version.parse("1.11") @@ -275,12 +274,7 @@ def meshgrid( Reference: https://pytorch.org/docs/1.13/generated/torch.meshgrid.html """ - if is_torch_greater_or_equal_than_1_10: - return torch.meshgrid(*tensors, indexing=indexing) - else: - if indexing != "ij": - raise ValueError('torch.meshgrid only supports `indexing="ij"` for torch<1.10.') - return torch.meshgrid(*tensors) + return torch.meshgrid(*tensors, indexing=indexing) def id_tensor_storage(tensor: torch.Tensor) -> Tuple[torch.device, int, int]: diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index e9fec6a5466b62..f47b57d6d9a16e 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -70,7 +70,7 @@ from .modeling_utils import PreTrainedModel, load_sharded_checkpoint, unwrap_model from .models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES, MODEL_MAPPING_NAMES from .optimization import Adafactor, get_scheduler -from .pytorch_utils import ALL_LAYERNORM_LAYERS, is_torch_greater_or_equal_than_1_10 +from .pytorch_utils import ALL_LAYERNORM_LAYERS from .tokenization_utils_base import PreTrainedTokenizerBase from .trainer_callback import ( CallbackHandler, @@ -155,8 +155,6 @@ from .utils.generic import ContextManagers -_is_native_cpu_amp_available = is_torch_greater_or_equal_than_1_10 - DEFAULT_CALLBACKS = [DefaultFlowCallback] DEFAULT_PROGRESS_CALLBACK = ProgressCallback @@ -621,10 +619,8 @@ def __init__( if args.device == torch.device("cpu"): if args.fp16: raise ValueError("Tried to use `fp16` but it is not supported on cpu") - elif _is_native_cpu_amp_available: - args.half_precision_backend = "cpu_amp" else: - raise ValueError("Tried to use cpu amp but native cpu amp is not available") + args.half_precision_backend = "cpu_amp" else: args.half_precision_backend = "cuda_amp" @@ -2595,14 +2591,11 @@ def autocast_smart_context_manager(self, cache_enabled: Optional[bool] = True): arguments, depending on the situation. """ if self.use_cuda_amp or self.use_cpu_amp: - if is_torch_greater_or_equal_than_1_10: - ctx_manager = ( - torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype) - if self.use_cpu_amp - else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype) - ) - else: - ctx_manager = torch.cuda.amp.autocast() + ctx_manager = ( + torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype) + if self.use_cpu_amp + else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype) + ) else: ctx_manager = contextlib.nullcontext() if sys.version_info >= (3, 7) else contextlib.suppress() diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py index 03b564876898dd..bbdae6a7b4b844 100644 --- a/src/transformers/utils/import_utils.py +++ b/src/transformers/utils/import_utils.py @@ -258,16 +258,12 @@ def is_torch_bf16_gpu_available(): # since currently no utility function is available we build our own. # some bits come from https://github.com/pytorch/pytorch/blob/2289a12f21c54da93bf5d696e3f9aea83dd9c10d/torch/testing/_internal/common_cuda.py#L51 # with additional check for torch version - # to succeed: - # 1. torch >= 1.10 (1.9 should be enough for AMP API has changed in 1.10, so using 1.10 as minimal) - # 2. the hardware needs to support bf16 (GPU arch >= Ampere, or CPU) - # 3. if using gpu, CUDA >= 11 - # 4. torch.autocast exists + # to succeed: (torch is required to be >= 1.10 anyway) + # 1. the hardware needs to support bf16 (GPU arch >= Ampere, or CPU) + # 2. if using gpu, CUDA >= 11 + # 3. torch.autocast exists # XXX: one problem here is that it may give invalid results on mixed gpus setup, so it's # really only correct for the 0th gpu (or currently set default device if different from 0) - if version.parse(version.parse(torch.__version__).base_version) < version.parse("1.10"): - return False - if torch.cuda.is_available() and torch.version.cuda is not None: if torch.cuda.get_device_properties(torch.cuda.current_device()).major < 8: return False @@ -287,9 +283,6 @@ def is_torch_bf16_cpu_available(): import torch - if version.parse(version.parse(torch.__version__).base_version) < version.parse("1.10"): - return False - try: # multiple levels of AttributeError depending on the pytorch version so do them all in one check _ = torch.cpu.amp.autocast @@ -526,8 +519,6 @@ def is_optimum_neuron_available(): def is_safetensors_available(): - if is_torch_available() and version.parse(_torch_version) < version.parse("1.10"): - return False return _safetensors_available diff --git a/tests/models/bloom/test_modeling_bloom.py b/tests/models/bloom/test_modeling_bloom.py index 678c46bd0ca471..f7ef199febdb94 100644 --- a/tests/models/bloom/test_modeling_bloom.py +++ b/tests/models/bloom/test_modeling_bloom.py @@ -38,9 +38,6 @@ BloomModel, BloomTokenizerFast, ) - from transformers.pytorch_utils import is_torch_greater_or_equal_than_1_10 -else: - is_torch_greater_or_equal_than_1_10 = False @require_torch @@ -518,10 +515,6 @@ def setUp(self): super().setUp() self.path_bigscience_model = "bigscience/bigscience-small-testing" - @unittest.skipIf( - not is_torch_greater_or_equal_than_1_10, - "Test failed with torch < 1.10 (`LayerNormKernelImpl` not implemented for `BFloat16`)", - ) @require_torch def test_embeddings(self): # The config in this checkpoint has `bfloat16` as `torch_dtype` -> model in `bfloat16` diff --git a/tests/models/bridgetower/test_modeling_bridgetower.py b/tests/models/bridgetower/test_modeling_bridgetower.py index 9c40f376a7b573..0eb293fdc33cfb 100644 --- a/tests/models/bridgetower/test_modeling_bridgetower.py +++ b/tests/models/bridgetower/test_modeling_bridgetower.py @@ -50,9 +50,6 @@ BridgeTowerModel, ) from transformers.models.bridgetower.modeling_bridgetower import BRIDGETOWER_PRETRAINED_MODEL_ARCHIVE_LIST - from transformers.pytorch_utils import is_torch_greater_or_equal_than_1_10 -else: - is_torch_greater_or_equal_than_1_10 = False if is_vision_available(): from PIL import Image @@ -298,7 +295,6 @@ def prepare_config_and_inputs_for_common(self): @require_torch -@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "BridgeTower is only available in torch v1.10+") class BridgeTowerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = ( ( @@ -516,7 +512,6 @@ def prepare_img(): @require_torch @require_vision -@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "BridgeTower is only available in torch v1.10+") class BridgeTowerModelIntegrationTest(unittest.TestCase): @cached_property def default_processor(self): @@ -601,7 +596,6 @@ def test_constrastive_learning(self): @slow @require_torch -@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "BridgeTower is only available in torch v1.10+") class BridgeTowerModelTrainingTest(unittest.TestCase): all_training_supported_model_classes = ( (BridgeTowerForImageAndTextRetrieval, BridgeTowerForMaskedLM, BridgeTowerForContrastiveLearning) diff --git a/tests/models/levit/test_modeling_levit.py b/tests/models/levit/test_modeling_levit.py index 18764a0090f535..d4e803ed4d2f04 100644 --- a/tests/models/levit/test_modeling_levit.py +++ b/tests/models/levit/test_modeling_levit.py @@ -20,8 +20,6 @@ import warnings from math import ceil, floor -from packaging import version - from transformers import LevitConfig from transformers.file_utils import cached_property, is_torch_available, is_vision_available from transformers.models.auto import get_values @@ -346,10 +344,6 @@ def test_training_gradient_checkpointing(self): loss.backward() def test_problem_types(self): - parsed_torch_version_base = version.parse(version.parse(torch.__version__).base_version) - if parsed_torch_version_base.base_version.startswith("1.9"): - self.skipTest(reason="This test fails with PyTorch 1.9.x: some CUDA issue") - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() problem_types = [ diff --git a/tests/models/tvlt/test_modeling_tvlt.py b/tests/models/tvlt/test_modeling_tvlt.py index 4b307d489e1a4a..e1864e48dd0481 100644 --- a/tests/models/tvlt/test_modeling_tvlt.py +++ b/tests/models/tvlt/test_modeling_tvlt.py @@ -42,9 +42,6 @@ from transformers import TvltForAudioVisualClassification, TvltForPreTraining, TvltModel from transformers.models.tvlt.modeling_tvlt import TVLT_PRETRAINED_MODEL_ARCHIVE_LIST - from transformers.pytorch_utils import is_torch_greater_or_equal_than_1_10 -else: - is_torch_greater_or_equal_than_1_10 = False if is_datasets_available(): @@ -322,7 +319,6 @@ def prepare_audio_values(self): @require_torch -@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "TVLT is only available in torch v1.10+") class TvltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = ( (TvltModel, TvltForPreTraining, TvltForAudioVisualClassification) if is_torch_available() else () diff --git a/tests/models/vilt/test_modeling_vilt.py b/tests/models/vilt/test_modeling_vilt.py index 958527dbf483b2..772091d5b976d5 100644 --- a/tests/models/vilt/test_modeling_vilt.py +++ b/tests/models/vilt/test_modeling_vilt.py @@ -42,9 +42,6 @@ ViltModel, ) from transformers.models.vilt.modeling_vilt import VILT_PRETRAINED_MODEL_ARCHIVE_LIST - from transformers.pytorch_utils import is_torch_greater_or_equal_than_1_10 -else: - is_torch_greater_or_equal_than_1_10 = False if is_vision_available(): import PIL @@ -218,7 +215,6 @@ def prepare_pixel_values(self): @require_torch -@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "Vilt is only available in torch v1.10+") class ViltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = ( ( @@ -520,7 +516,6 @@ def test_model_from_pretrained(self): @require_torch -@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "Vilt is only available in torch v1.10+") class ViltForImagesAndTextClassificationModelTest(ViltModelTest, unittest.TestCase): all_model_classes = (ViltForImagesAndTextClassification,) if is_torch_available() else () @@ -545,7 +540,6 @@ def prepare_img(): @require_torch @require_vision -@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "Vilt is only available in torch v1.10+") class ViltModelIntegrationTest(unittest.TestCase): @cached_property def default_processor(self):