diff --git a/pl-hydra/configs/test.yaml b/pl-hydra/configs/test.yaml deleted file mode 100644 index 1e10d5c..0000000 --- a/pl-hydra/configs/test.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# @package _global_ - -# specify here default evaluation configuration -defaults: - - _self_ - - datamodule: mnist.yaml # choose the datamodule for evaluation - - model: mnist.yaml - - callbacks: null - - logger: null - - trainer: default.yaml - - log_dir: evaluation.yaml - - - experiment: null - - # enable color logging - - override hydra/hydra_logging: colorlog - - override hydra/job_logging: colorlog - -original_work_dir: ${hydra:runtime.cwd} - -data_dir: ${original_work_dir}/data/ - -print_config: True - -ignore_warnings: True - -seed: null - -name: "default" - -# passing checkpoint path is necessary -ckpt_path: ??? diff --git a/pl-hydra/configs/train.yaml b/pl-hydra/configs/train.yaml index d9f03d7..114bcf9 100644 --- a/pl-hydra/configs/train.yaml +++ b/pl-hydra/configs/train.yaml @@ -44,7 +44,7 @@ defaults: # default name for the experiment, determines logging folder path # (you can overwrite this name in experiment configs) -name: "resnet" +name: "test" # path to original working directory # hydra hijacks working directory by changing it to the new log directory diff --git a/pl-hydra/configs/trainer/ddp.yaml b/pl-hydra/configs/trainer/ddp.yaml index 8a11249..bf10a88 100644 --- a/pl-hydra/configs/trainer/ddp.yaml +++ b/pl-hydra/configs/trainer/ddp.yaml @@ -1,6 +1,6 @@ defaults: - default.yaml -gpus: 4 +gpus: 2 strategy: ddp sync_batchnorm: True diff --git a/pl-hydra/src/testing_pipeline.py b/pl-hydra/src/testing_pipeline.py deleted file mode 100644 index abd030a..0000000 --- a/pl-hydra/src/testing_pipeline.py +++ /dev/null @@ -1,57 +0,0 @@ -import os -from typing import List - -import hydra -from omegaconf import DictConfig -from pytorch_lightning import LightningDataModule, LightningModule, Trainer, seed_everything -from pytorch_lightning.loggers import LightningLoggerBase - -from src import utils - -log = utils.get_logger(__name__) - - -def test(config: DictConfig) -> None: - """Contains minimal example of the testing pipeline. Evaluates given checkpoint on a testset. - - Args: - config (DictConfig): Configuration composed by Hydra. - - Returns: - None - """ - - # Set seed for random number generators in pytorch, numpy and python.random - if config.get("seed"): - seed_everything(config.seed, workers=True) - - # Convert relative ckpt path to absolute path if necessary - if not os.path.isabs(config.ckpt_path): - config.ckpt_path = os.path.join(hydra.utils.get_original_cwd(), config.ckpt_path) - - # Init lightning datamodule - log.info(f"Instantiating datamodule <{config.datamodule._target_}>") - datamodule: LightningDataModule = hydra.utils.instantiate(config.datamodule) - - # Init lightning model - log.info(f"Instantiating model <{config.model._target_}>") - model: LightningModule = hydra.utils.instantiate(config.model) - - # Init lightning loggers - logger: List[LightningLoggerBase] = [] - if "logger" in config: - for _, lg_conf in config.logger.items(): - if "_target_" in lg_conf: - log.info(f"Instantiating logger <{lg_conf._target_}>") - logger.append(hydra.utils.instantiate(lg_conf)) - - # Init lightning trainer - log.info(f"Instantiating trainer <{config.trainer._target_}>") - trainer: Trainer = hydra.utils.instantiate(config.trainer, logger=logger) - - # Log hyperparameters - if trainer.logger: - trainer.logger.log_hyperparams({"ckpt_path": config.ckpt_path}) - - log.info("Starting testing!") - trainer.test(model=model, datamodule=datamodule, ckpt_path=config.ckpt_path) diff --git a/pl-hydra/test.py b/pl-hydra/test.py deleted file mode 100644 index ee02d04..0000000 --- a/pl-hydra/test.py +++ /dev/null @@ -1,26 +0,0 @@ -import dotenv -import hydra -from omegaconf import DictConfig - -# load environment variables from `.env` file if it exists -# recursively searches for `.env` in all folders starting from work dir -dotenv.load_dotenv(override=True) - - -@hydra.main(config_path="configs/", config_name="test.yaml") -def main(config: DictConfig): - - # Imports can be nested inside @hydra.main to optimize tab completion - # https://github.com/facebookresearch/hydra/issues/934 - from src import utils - from src.testing_pipeline import test - - # Applies optional utilities - utils.extras(config) - - # Evaluate model - return test(config) - - -if __name__ == "__main__": - main() diff --git a/pl-hydra/tests/__init__.py b/pl-hydra/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/pl-hydra/tests/helpers/__init__.py b/pl-hydra/tests/helpers/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/pl-hydra/tests/helpers/module_available.py b/pl-hydra/tests/helpers/module_available.py deleted file mode 100644 index d3137f3..0000000 --- a/pl-hydra/tests/helpers/module_available.py +++ /dev/null @@ -1,28 +0,0 @@ -import platform -from importlib.util import find_spec - -""" -Adapted from: - https://github.com/PyTorchLightning/pytorch-lightning/blob/master/pytorch_lightning/utilities/imports.py -""" - - -def _module_available(module_path: str) -> bool: - """Check if a path is available in your environment. - - >>> _module_available('os') - True - >>> _module_available('bla.bla') - False - """ - try: - return find_spec(module_path) is not None - except ModuleNotFoundError: - # Python 3.7+ - return False - - -_IS_WINDOWS = platform.system() == "Windows" -_DEEPSPEED_AVAILABLE = not _IS_WINDOWS and _module_available("deepspeed") -_FAIRSCALE_AVAILABLE = not _IS_WINDOWS and _module_available("fairscale.nn") -_RPC_AVAILABLE = not _IS_WINDOWS and _module_available("torch.distributed.rpc") diff --git a/pl-hydra/tests/helpers/run_command.py b/pl-hydra/tests/helpers/run_command.py deleted file mode 100644 index 1670988..0000000 --- a/pl-hydra/tests/helpers/run_command.py +++ /dev/null @@ -1,15 +0,0 @@ -from typing import List - -import pytest -import sh - - -def run_command(command: List[str]): - """Default method for executing shell commands with pytest.""" - msg = None - try: - sh.python(command) - except sh.ErrorReturnCode as e: - msg = e.stderr.decode() - if msg: - pytest.fail(msg=msg) diff --git a/pl-hydra/tests/helpers/runif.py b/pl-hydra/tests/helpers/runif.py deleted file mode 100644 index 36d73e1..0000000 --- a/pl-hydra/tests/helpers/runif.py +++ /dev/null @@ -1,104 +0,0 @@ -import sys -from typing import Optional - -import pytest -import torch -from packaging.version import Version -from pkg_resources import get_distribution - -""" -Adapted from: - https://github.com/PyTorchLightning/pytorch-lightning/blob/master/tests/helpers/runif.py -""" - -from tests.helpers.module_available import ( - _DEEPSPEED_AVAILABLE, - _FAIRSCALE_AVAILABLE, - _IS_WINDOWS, - _RPC_AVAILABLE, -) - - -class RunIf: - """RunIf wrapper for conditional skipping of tests. - - Fully compatible with `@pytest.mark`. - - Example: - - @RunIf(min_torch="1.8") - @pytest.mark.parametrize("arg1", [1.0, 2.0]) - def test_wrapper(arg1): - assert arg1 > 0 - """ - - def __new__( - self, - min_gpus: int = 0, - min_torch: Optional[str] = None, - max_torch: Optional[str] = None, - min_python: Optional[str] = None, - skip_windows: bool = False, - rpc: bool = False, - fairscale: bool = False, - deepspeed: bool = False, - **kwargs, - ): - """ - Args: - min_gpus: min number of gpus required to run test - min_torch: minimum pytorch version to run test - max_torch: maximum pytorch version to run test - min_python: minimum python version required to run test - skip_windows: skip test for Windows platform - rpc: requires Remote Procedure Call (RPC) - fairscale: if `fairscale` module is required to run the test - deepspeed: if `deepspeed` module is required to run the test - kwargs: native pytest.mark.skipif keyword arguments - """ - conditions = [] - reasons = [] - - if min_gpus: - conditions.append(torch.cuda.device_count() < min_gpus) - reasons.append(f"GPUs>={min_gpus}") - - if min_torch: - torch_version = get_distribution("torch").version - conditions.append(Version(torch_version) < Version(min_torch)) - reasons.append(f"torch>={min_torch}") - - if max_torch: - torch_version = get_distribution("torch").version - conditions.append(Version(torch_version) >= Version(max_torch)) - reasons.append(f"torch<{max_torch}") - - if min_python: - py_version = ( - f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" - ) - conditions.append(Version(py_version) < Version(min_python)) - reasons.append(f"python>={min_python}") - - if skip_windows: - conditions.append(_IS_WINDOWS) - reasons.append("does not run on Windows") - - if rpc: - conditions.append(not _RPC_AVAILABLE) - reasons.append("RPC") - - if fairscale: - conditions.append(not _FAIRSCALE_AVAILABLE) - reasons.append("Fairscale") - - if deepspeed: - conditions.append(not _DEEPSPEED_AVAILABLE) - reasons.append("Deepspeed") - - reasons = [rs for cond, rs in zip(conditions, reasons) if cond] - return pytest.mark.skipif( - condition=any(conditions), - reason=f"Requires: [{' + '.join(reasons)}]", - **kwargs, - ) diff --git a/pl-hydra/tests/shell/__init__.py b/pl-hydra/tests/shell/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/pl-hydra/tests/shell/test_basic_commands.py b/pl-hydra/tests/shell/test_basic_commands.py deleted file mode 100644 index f708ede..0000000 --- a/pl-hydra/tests/shell/test_basic_commands.py +++ /dev/null @@ -1,58 +0,0 @@ -import pytest - -from tests.helpers.run_command import run_command -from tests.helpers.runif import RunIf - -""" -A couple of sanity checks to make sure the model doesn't crash with different running options. -""" - - -def test_fast_dev_run(): - """Test running for 1 train, val and test batch.""" - command = ["train.py", "++trainer.fast_dev_run=true"] - run_command(command) - - -@pytest.mark.slow -def test_cpu(): - """Test running 1 epoch on CPU.""" - command = ["train.py", "++trainer.max_epochs=1", "++trainer.gpus=0"] - run_command(command) - - -# use RunIf to skip execution of some tests, e.g. when no gpus are available -@RunIf(min_gpus=1) -@pytest.mark.slow -def test_gpu(): - """Test running 1 epoch on GPU.""" - command = [ - "train.py", - "++trainer.max_epochs=1", - "++trainer.gpus=1", - ] - run_command(command) - - -@RunIf(min_gpus=1) -@pytest.mark.slow -def test_mixed_precision(): - """Test running 1 epoch with pytorch native automatic mixed precision (AMP).""" - command = [ - "train.py", - "++trainer.max_epochs=1", - "++trainer.gpus=1", - "++trainer.precision=16", - ] - run_command(command) - - -@pytest.mark.slow -def test_double_validation_loop(): - """Test running 1 epoch with validation loop twice per epoch.""" - command = [ - "train.py", - "++trainer.max_epochs=1", - "++trainer.val_check_interval=0.5", - ] - run_command(command) diff --git a/pl-hydra/tests/shell/test_debug_configs.py b/pl-hydra/tests/shell/test_debug_configs.py deleted file mode 100644 index a73dda8..0000000 --- a/pl-hydra/tests/shell/test_debug_configs.py +++ /dev/null @@ -1,35 +0,0 @@ -import pytest - -from tests.helpers.run_command import run_command - - -@pytest.mark.slow -def test_debug_default(): - command = ["train.py", "debug=default"] - run_command(command) - - -def test_debug_limit_batches(): - command = ["train.py", "debug=limit_batches"] - run_command(command) - - -def test_debug_overfit(): - command = ["train.py", "debug=overfit"] - run_command(command) - - -@pytest.mark.slow -def test_debug_profiler(): - command = ["train.py", "debug=profiler"] - run_command(command) - - -def test_debug_step(): - command = ["train.py", "debug=step"] - run_command(command) - - -def test_debug_test_only(): - command = ["train.py", "debug=test_only"] - run_command(command) diff --git a/pl-hydra/tests/shell/test_sweeps.py b/pl-hydra/tests/shell/test_sweeps.py deleted file mode 100644 index 10a298d..0000000 --- a/pl-hydra/tests/shell/test_sweeps.py +++ /dev/null @@ -1,44 +0,0 @@ -import pytest - -from tests.helpers.run_command import run_command - -""" -A couple of tests executing hydra sweeps. - -Use the following command to skip slow tests: - pytest -k "not slow" -""" - - -@pytest.mark.slow -def test_experiments(): - """Test running all available experiment configs for 1 epoch.""" - command = ["train.py", "-m", "experiment=glob(*)", "++trainer.max_epochs=1"] - run_command(command) - - -@pytest.mark.slow -def test_default_sweep(): - """Test default Hydra sweeper.""" - command = [ - "train.py", - "-m", - "datamodule.batch_size=64,128", - "model.lr=0.01,0.02", - "trainer=default", - "++trainer.fast_dev_run=true", - ] - run_command(command) - - -@pytest.mark.slow -def test_optuna_sweep(): - """Test Optuna sweeper.""" - command = [ - "train.py", - "-m", - "hparams_search=mnist_optuna", - "trainer=default", - "++trainer.fast_dev_run=true", - ] - run_command(command) diff --git a/pl-hydra/tests/unit/__init__.py b/pl-hydra/tests/unit/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/pl-hydra/tests/unit/test_mnist_datamodule.py b/pl-hydra/tests/unit/test_mnist_datamodule.py deleted file mode 100644 index 91e6182..0000000 --- a/pl-hydra/tests/unit/test_mnist_datamodule.py +++ /dev/null @@ -1,36 +0,0 @@ -import os - -import pytest -import torch - -from src.datamodules.mnist_datamodule import MNISTDataModule - - -@pytest.mark.parametrize("batch_size", [32, 128]) -def test_mnist_datamodule(batch_size): - datamodule = MNISTDataModule(batch_size=batch_size) - datamodule.prepare_data() - - assert not datamodule.data_train and not datamodule.data_val and not datamodule.data_test - - assert os.path.exists(os.path.join("data", "MNIST")) - assert os.path.exists(os.path.join("data", "MNIST", "raw")) - - datamodule.setup() - - assert datamodule.data_train and datamodule.data_val and datamodule.data_test - assert ( - len(datamodule.data_train) + len(datamodule.data_val) + len(datamodule.data_test) == 70_000 - ) - - assert datamodule.train_dataloader() - assert datamodule.val_dataloader() - assert datamodule.test_dataloader() - - batch = next(iter(datamodule.train_dataloader())) - x, y = batch - - assert len(x) == batch_size - assert len(y) == batch_size - assert x.dtype == torch.float32 - assert y.dtype == torch.int64 diff --git a/pl-hydra/train.py b/pl-hydra/train.py index 05b78f3..22b258e 100644 --- a/pl-hydra/train.py +++ b/pl-hydra/train.py @@ -6,8 +6,8 @@ import hydra from omegaconf import DictConfig import os -# Set the visible GPUs (curent machine has 16 GPUS [0-15]) -os.environ["CUDA_VISIBLE_DEVICES"]="11" +# # Set the visible GPUs (curent machine has 16 GPUS [0-15]) +# os.environ["CUDA_VISIBLE_DEVICES"]="11" # load environment variables from `.env` file if it exists # recursively searches for `.env` in all folders starting from work dir diff --git a/train_multi.py b/train_multi.py index 47809bc..1be1d30 100644 --- a/train_multi.py +++ b/train_multi.py @@ -27,9 +27,9 @@ # load helper functions from utils.helper import set_random_seed, print_info, evaluate -# Set the visible GPUs, in case of multi-GPU device, otherwise comment it -# you can use `nvidia-smi` in terminal to see the available GPUS -os.environ["CUDA_VISIBLE_DEVICES"]="13,14" +# # Set the visible GPUs, in case of multi-GPU device, otherwise comment it +# # you can use `nvidia-smi` in terminal to see the available GPUS +# os.environ["CUDA_VISIBLE_DEVICES"]="13,14" ###################################################################### @@ -42,6 +42,7 @@ # Path to the folder where the datasets are/should be downloaded (e.g. CIFAR10) DATASET_PATH = "../data" +os.makedirs(CHECKPOINT_PATH,exist_ok=True) # Path to the folder where the models will be saved CHECKPOINT_PATH = "../saved_models/multi/" diff --git a/train_pl.py b/train_pl.py index 862bdc7..f8c3e3d 100644 --- a/train_pl.py +++ b/train_pl.py @@ -40,9 +40,9 @@ from utils.helper import set_random_seed, print_info, evaluate from utils.plotter import plot_cm, plot_preds -# Set the visible GPUs, in case of multi-GPU device, otherwise comment it -# you can use `nvidia-smi` in terminal to see the available GPUS -os.environ["CUDA_VISIBLE_DEVICES"]="13,14,15,16" +# # Set the visible GPUs, in case of multi-GPU device, otherwise comment it +# # you can use `nvidia-smi` in terminal to see the available GPUS +# os.environ["CUDA_VISIBLE_DEVICES"]="13,14,15,16" ###################################################################### # Set the Global values @@ -55,6 +55,7 @@ # Path to the folder where the datasets are/should be downloaded (e.g. CIFAR10) DATASET_PATH = "../data" LOG_PATH = "./logs" +os.makedirs(CHECKPOINT_PATH,exist_ok=True) # Path to the folder where the models will be saved CHECKPOINT_PATH = "../saved_models/pl/" diff --git a/train_simple.py b/train_simple.py index 87cd2df..ac54e78 100644 --- a/train_simple.py +++ b/train_simple.py @@ -25,9 +25,9 @@ # load helper functions from utils.helper import set_random_seed, print_info, evaluate -# Set the visible GPUs, in case of multi-GPU device, otherwise comment it -# you can use `nvidia-smi` in terminal to see the available GPUS -os.environ["CUDA_VISIBLE_DEVICES"]="12" +# # Set the visible GPUs, in case of multi-GPU device, otherwise comment it +# # you can use `nvidia-smi` in terminal to see the available GPUS +# os.environ["CUDA_VISIBLE_DEVICES"]="12" ###################################################################### @@ -40,7 +40,7 @@ # Path to the folder where the datasets are/should be downloaded (e.g. CIFAR10) DATASET_PATH = "../data" - +os.makedirs(DATASET_PATH,exist_ok=True) # Path to the folder where the models will be saved CHECKPOINT_PATH = "../saved_models/simple/" os.makedirs(CHECKPOINT_PATH,exist_ok=True)