diff --git a/.circleci/docker/Dockerfile b/.circleci/docker/Dockerfile index 7212c32e9c..fb5f495608 100644 --- a/.circleci/docker/Dockerfile +++ b/.circleci/docker/Dockerfile @@ -1,6 +1,6 @@ -ARG PYTORCH="1.8.1" -ARG CUDA="10.2" -ARG CUDNN="7" +ARG PYTORCH="2.5.0" +ARG CUDA="12.4" +ARG CUDNN="9" FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel diff --git a/.circleci/test.yml b/.circleci/test.yml index f0718f11e3..9c7935e1de 100644 --- a/.circleci/test.yml +++ b/.circleci/test.yml @@ -108,7 +108,7 @@ jobs: pip install wheel - run: name: Install PyTorch - command: pip install torch==<< parameters.torch >>+cpu torchvision==<< parameters.torchvision >>+cpu -f https://download.pytorch.org/whl/torch_stable.html + command: pip install torch==<< parameters.torch >>+cpu torchvision==<< parameters.torchvision >>+cpu -f https://download.pytorch.org/whl/torch -f https://download.pytorch.org/whl/torchvision - run: name: Build MMEngine from source command: pip install -e . -v @@ -137,7 +137,7 @@ jobs: type: string cuda: type: enum - enum: ["10.1", "10.2", "11.1", "11.7", "11.8"] + enum: ["10.1", "10.2", "11.1", "11.7", "11.8", "12.1", "12.4"] cudnn: type: integer default: 7 @@ -267,9 +267,9 @@ workflows: - lint - build_cpu: name: maximum_version_cpu - torch: 2.1.0 - torchvision: 0.16.0 - python: 3.9.0 + torch: 2.5.0 + torchvision: 0.20.0 + python: 3.11.9 requires: - minimum_version_cpu - hold_integration_test: @@ -296,9 +296,9 @@ workflows: - hold - build_cuda: name: maximum_version_gpu - torch: 2.1.0 - cuda: "11.8" - cudnn: 8 + torch: 2.5.0 + cuda: "12.4" + cudnn: 9 requires: - hold merge_stage_test: diff --git a/.gitignore b/.gitignore index 5a48f34473..8a400bd415 100644 --- a/.gitignore +++ b/.gitignore @@ -109,6 +109,9 @@ venv.bak/ .vscode .idea .DS_Store +.aim +.dvc +.dvcignore # custom *.pkl diff --git a/docker/dev/Dockerfile b/docker/dev/Dockerfile index f63c284365..ba993e6e5c 100644 --- a/docker/dev/Dockerfile +++ b/docker/dev/Dockerfile @@ -1,6 +1,6 @@ -ARG PYTORCH="1.8.1" -ARG CUDA="10.2" -ARG CUDNN="7" +ARG PYTORCH="2.5.0" +ARG CUDA="12.4" +ARG CUDNN="9" FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel diff --git a/docker/release/Dockerfile b/docker/release/Dockerfile index de099f4c2e..5b9520e02f 100644 --- a/docker/release/Dockerfile +++ b/docker/release/Dockerfile @@ -1,6 +1,6 @@ -ARG PYTORCH="1.8.1" -ARG CUDA="10.2" -ARG CUDNN="7" +ARG PYTORCH="2.5.0" +ARG CUDA="12.4" +ARG CUDNN="9" FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel diff --git a/mmengine/optim/optimizer/builder.py b/mmengine/optim/optimizer/builder.py index 8557f4d34c..c5f361a5bd 100644 --- a/mmengine/optim/optimizer/builder.py +++ b/mmengine/optim/optimizer/builder.py @@ -160,15 +160,9 @@ def register_bitsandbytes_optimizers() -> List[str]: def register_transformers_optimizers(): - transformer_optimizers = [] - try: - from transformers import Adafactor - except ImportError: - pass - else: - OPTIMIZERS.register_module(name='Adafactor', module=Adafactor) - transformer_optimizers.append('Adafactor') - return transformer_optimizers + return [ + 'Adafactor', + ] TRANSFORMERS_OPTIMIZERS = register_transformers_optimizers() diff --git a/mmengine/testing/_internal/distributed.py b/mmengine/testing/_internal/distributed.py index b795cc9456..5f3dffa810 100644 --- a/mmengine/testing/_internal/distributed.py +++ b/mmengine/testing/_internal/distributed.py @@ -94,8 +94,15 @@ def wrapper(self): # or run the underlying test function. def __init__(self, method_name: str = 'runTest') -> None: super().__init__(method_name) - fn = getattr(self, method_name) - setattr(self, method_name, self.join_or_run(fn)) + try: + fn = getattr(self, method_name) + setattr(self, method_name, self.join_or_run(fn)) + except AttributeError as e: + if method_name != 'runTest': + # we allow instantiation with no explicit method name + # but not an *incorrect* or missing method name + raise ValueError(f'no such test method in {self.__class__}:' + f' {method_name}') from e def setUp(self) -> None: super().setUp() @@ -345,12 +352,13 @@ def _check_return_codes(self, elapsed_time) -> None: if first_process.exitcode == skip.exit_code: raise unittest.SkipTest(skip.message) - # Skip the unittest since the raised error maybe not caused by - # the tested function. For example, in CI environment, the tested - # method could be terminated by system signal for the limited - # resources. - self.skipTest(f'Skip test {self._testMethodName} due to ' - 'the program abort') + if first_process.exitcode != 0: + # Skip the unittest since the raised error maybe not caused by + # the tested function. For example, in CI environment, the tested + # method could be terminated by system signal for the limited + # resources. + self.skipTest(f'Skip test {self._testMethodName} due to ' + 'the program abort') @property def is_master(self) -> bool: diff --git a/requirements/tests.txt b/requirements/tests.txt index 94ee153fa3..fe175e6f19 100644 --- a/requirements/tests.txt +++ b/requirements/tests.txt @@ -1,4 +1,4 @@ -aim<=3.17.5;sys_platform!='win32' +aim;sys_platform!='win32' bitsandbytes clearml coverage