diff --git a/.flake8 b/.flake8 deleted file mode 100644 index b4f0315..0000000 --- a/.flake8 +++ /dev/null @@ -1,10 +0,0 @@ -[flake8] -show-source = True -max-line-length = 100 -extend-exclude = - venv - .venv - build -extend-ignore = - E203 # No whitespace before ':' in [x : y] - E731 # No lambdas — too strict diff --git a/MANIFEST.in b/MANIFEST.in index 24a7794..8039ffc 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,6 +5,5 @@ # Include individual files include LICENSE.txt -include requirements.txt prune tests prune examples diff --git a/README.md b/README.md index d4941ee..2380aff 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,10 @@ pip install -e DEHB # -e stands for editable, lets you modify the code and reru To run PyTorch example: (*note additional requirements*) ```bash python examples/03_pytorch_mnist_hpo.py \ - --min_budget 1 --max_budget 3 --verbose --runtime 60 + --min_budget 1 \ + --max_budget 3 \ + --runtime 60 \ + --verbose ``` ### Running DEHB in a parallel setting @@ -58,8 +61,13 @@ to it by that DEHB run. To run the PyTorch MNIST example on a single node using 2 workers: ```bash -python examples/03_pytorch_mnist_hpo.py --min_budget 1 --max_budget 3 \ - --verbose --runtime 60 --n_workers 2 --single_node_with_gpus +python examples/03_pytorch_mnist_hpo.py \ + --min_budget 1 \ + --max_budget 3 \ + --runtime 60 \ + --n_workers 2 \ + --single_node_with_gpus \ + --verbose ``` #### Multi-node runs @@ -80,10 +88,19 @@ manner on clusters managed by SLURM. (*not expected to work off-the-shelf*) To run the PyTorch MNIST example on a multi-node setup using 4 workers: ```bash -bash utils/run_dask_setup.sh -f dask_dump/scheduler.json -e env_name -n 4 +bash utils/run_dask_setup.sh \ + -f dask_dump/scheduler.json \ # This is how the workers will be discovered by DEHB + -e env_name \ + -n 4 + +# Make sure to sleep to allow the workers to setup properly sleep 5 -python examples/03_pytorch_mnist_hpo.py --min_budget 1 --max_budget 3 \ - --verbose --runtime 60 --scheduler_file dask_dump/scheduler.json +python examples/03_pytorch_mnist_hpo.py \ + --min_budget 1 \ + --max_budget 3 \ + --runtime 60 \ + --scheduler_file dask_dump/scheduler.json \ + --verbose ``` ### DEHB Hyperparameters @@ -127,8 +144,8 @@ represents the *mutation* strategy while `bin` represents the *binomial crossove } @online{Awad-arXiv-2023, -title = {MO-DEHB: Evolutionary-based Hyperband for Multi-Objective Optimization}, -author = {Noor Awad and Ayushi Sharma and Frank Hutter}, -year = {2023}, -keywords = {} +title = {MO-DEHB: Evolutionary-based Hyperband for Multi-Objective Optimization}, +author = {Noor Awad and Ayushi Sharma and Frank Hutter}, +year = {2023}, +keywords = {} } diff --git a/__init__.py b/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/docs/index.md b/docs/index.md index bea8d2e..297e931 100644 --- a/docs/index.md +++ b/docs/index.md @@ -13,12 +13,17 @@ To start using the `dehb` package, you can install it via pip. You can either in ```bash # Install from pypi pip install dehb - -# Install as editable from github -git clone https://github.com/automl/DEHB.git -pip install -e DEHB # -e stands for editable, lets you modify the code and rerun things ``` +!!! note "From Source" + + To install directly from from source + + ```bash + git clone https://github.com/automl/DEHB.git + pip install -e DEHB # -e stands for editable, lets you modify the code and rerun things + ``` + ## Getting Started In the following sections we provide some basic examplatory setup for running DEHB with a single worker or in a multi-worker setup. @@ -26,47 +31,54 @@ In the following sections we provide some basic examplatory setup for running DE ### Basic single worker setup A basic setup for optimizing can be done as follows. Please note, that this is example should solely show a simple setup of `dehb`. More in-depth examples can be found in the [examples folder](../examples/). First we need to setup a `ConfigurationSpace`, from which Configurations will be sampled: -```python -import ConfigSpace +```python exec="true" source="material-block" result="python" title="Configuration Space" session="someid" +from ConfigSpace import ConfigurationSpace, Configuration -cs = ConfigSpace.ConfigurationSpace() -cs.add_hyperparameter(ConfigSpace.UniformFloatHyperparameter("x0", lower=3, upper=10, log=False)) +cs = ConfigurationSpace({"x0": (3.0, 10.0), "x1": ["red", "green"]}) +print(cs) ``` Next, we need an `object_function`, which we are aiming to optimize: -```python +```python exec="true" source="material-block" result="python" title="Configuration Space" session="someid" import numpy as np -def objective_function(x, budget, **kwargs): - """Toy objective function. - - Args: - x (ConfigSpace.Configuration): Configuration to evaluate - budget (float): Budget to evaluate x on - - Returns: - dict: Result dictionary - """ - # This obviously does not make sense in a real world example. Replace this with your actual objective value (y) and cost. - y = np.random.uniform() - cost = 5 - result = { - "fitness": y, - "cost": cost - } - return result + +def objective_function(x: Configuration, budget: float, **kwargs): + # Replace this with your actual objective value (y) and cost. + cost = (10 if x["x1"] == "red" else 100) + budget + y = x["x0"] + np.random.uniform() + return {"fitness": y, "cost": x["x0"]} + +sample_config = cs.sample_configuration() +print(sample_config) + +result = objective_function(sample_config, budget=10) +print(result) ``` Finally, we can setup our optimizer and run DEHB: -```python +```python exec="true" source="material-block" result="python" title="Configuration Space" session="someid" from dehb import DEHB dim = len(cs.get_hyperparameters()) -optimizer = DEHB(f=objective_function, cs=cs, dimensions=dim, min_budget=3, output_path="./logs", - max_budget=27, eta=3, n_workers=1) - -# Run optimization for 10 brackets. Output files will be save to ./logs -traj, runtime, history = opt.run(brackets=10, verbose=True) +optimizer = DEHB( + f=objective_function, + cs=cs, + dimensions=dim, + min_budget=3, + max_budget=27, + eta=3, + n_workers=1, + output_path="./logs", +) + +# Run optimization for 1 bracket. Output files will be saved to ./logs +traj, runtime, history = optimizer.run(brackets=1, verbose=True) +config, fitness, runtime, budget, _ = history[0] +print("config", config) +print("fitness", fitness) +print("runtime", runtime) +print("budget", budget) ``` ### Running DEHB in a parallel setting @@ -99,8 +111,13 @@ to it by that DEHB run. To run the PyTorch MNIST example on a single node using 2 workers: ```bash -python examples/03_pytorch_mnist_hpo.py --min_budget 1 --max_budget 3 \ - --verbose --runtime 60 --n_workers 2 --single_node_with_gpus +python examples/03_pytorch_mnist_hpo.py \ + --min_budget 1 \ + --max_budget 3 \ + --runtime 60 \ + --n_workers 2 \ + --single_node_with_gpus \ + --verbose ``` #### Multi-node runs @@ -121,10 +138,20 @@ manner on clusters managed by SLURM. (*not expected to work off-the-shelf*) To run the PyTorch MNIST example on a multi-node setup using 4 workers: ```bash -bash utils/run_dask_setup.sh -f dask_dump/scheduler.json -e env_name -n 4 +bash utils/run_dask_setup.sh \ + -n 4 \ + -f dask_dump/scheduler.json \ # This is how the workers will be discovered by DEHB + -e env_name + +# Make sure to sleep to allow the workers to setup properly sleep 5 -python examples/03_pytorch_mnist_hpo.py --min_budget 1 --max_budget 3 \ - --verbose --runtime 60 --scheduler_file dask_dump/scheduler.json + +python examples/03_pytorch_mnist_hpo.py \ + --min_budget 1 \ + --max_budget 3 \ + --runtime 60 \ + --scheduler_file dask_dump/scheduler.json \ + --verbose ``` ## To cite the paper or code @@ -142,9 +169,9 @@ If you use DEHB in one of your research projects, please cite our paper(s): } @online{Awad-arXiv-2023, -title = {MO-DEHB: Evolutionary-based Hyperband for Multi-Objective Optimization}, -author = {Noor Awad and Ayushi Sharma and Frank Hutter}, -year = {2023}, -keywords = {} + title = {MO-DEHB: Evolutionary-based Hyperband for Multi-Objective Optimization}, + author = {Noor Awad and Ayushi Sharma and Frank Hutter}, + year = {2023}, + keywords = {} } ``` \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index f2afacb..e9ed3a5 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -45,8 +45,39 @@ theme: icon: material/eye name: Switch to dark mode +markdown_extensions: + - admonition + - tables + - attr_list + - md_in_html + - toc: + permalink: "#" + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.magiclink: + hide_protocol: true + repo_url_shortener: true + repo_url_shorthand: true + user: automl + repo: DEHB + - pymdownx.highlight + - pymdownx.inlinehilite + - pymdownx.snippets + - pymdownx.details + - pymdownx.tabbed: + alternate_style: true + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format + - pymdownx.emoji: + emoji_index: !!python/name:materialx.emoji.twemoji + emoji_generator: !!python/name:materialx.emoji.to_svg + plugins: - search + - markdown-exec - mkdocstrings: default_handler: python enable_inventory: true diff --git a/pyproject.toml b/pyproject.toml index e598cb7..8aeaa3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,33 +1,83 @@ # For TOML reference # https://learnxinyminutes.com/docs/toml/ +[project] +urls = { Documentation = "https://automl.github.io/DEHB/", Github = "https://github.com/automl/DEHB" } + +name = "DEHB" +version = "0.0.7" +dependencies = [ + "numpy>=1.18.2", + "loguru>=0.5.3", + "dask>=2.27.0", + "distributed>=2.27.0", + "ConfigSpace>=0.4.16", +] +classifiers = [ + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Natural Language :: English", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Artificial Intelligence", +] +license = { file = "LICENSE.txt" } +readme = "README.md" +description = "Evolutionary Hyperband for Scalable, Robust and Efficient Hyperparameter Optimization" +authors = [ + { name = "Neeratyoy Mallik", email = "mallik@cs.uni-freiburg.de" }, + { name = "Noor Awad" }, + { name = "Frank Hutter" }, + { name = "Janis Fix", email = "fixj@cs.uni-freiburg.de" }, +] +requires-python = ">=3.8" +[project.optional-dependencies] +dev = [ + # Test + "pytest>=4.6", + "pytest-cov", + "pytest-xdist", + "pytest-timeout", + # Docs + "mkdocs", + "mkdocs-material", + "mkdocstrings[python]", + "markdown-exec[ansi]", + # Others + "ruff", + "black", + "pre-commit", +] [tool.pytest.ini_options] -testpaths = ["tests"] # path to the test directory +testpaths = ["tests"] # path to the test directory minversion = "3.8" addopts = "--cov=src --cov-report=lcov" # Should be package name -pythonpath = [ - "." -] +pythonpath = ["."] [tool.coverage.run] branch = true context = "dehb" # Should be package name omit = [ - "dehb/__init__.py", # Has variables only needed for setup.py + "dehb/__init__.py", # Has variables only needed for setup.py ] [tool.coverage.report] show_missing = true skip_covered = true exclude_lines = [ - "pragma: no cover", - '\.\.\.', - "raise NotImplementedError", - "if TYPE_CHECKING", + "pragma: no cover", + '\.\.\.', + "raise NotImplementedError", + "if TYPE_CHECKING", ] # These are lines to exclude from coverage [tool.black] target-version = ['py38'] +line-length = 100 # https://github.com/charliermarsh/ruff [tool.ruff] @@ -183,4 +233,4 @@ warn_return_any = true module = ["tests.*"] disallow_untyped_defs = false # Sometimes we just want to ignore verbose types disallow_untyped_decorators = false # Test decorators are not properly typed -disallow_incomplete_defs = false # Sometimes we just want to ignore verbose types \ No newline at end of file +disallow_incomplete_defs = false # Sometimes we just want to ignore verbose types diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index b7d3788..0000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -numpy>=1.18.2 -loguru>=0.5.3 -dask>=2.27.0 -distributed>=2.27.0 -ConfigSpace>=0.4.16 \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index 8dd7673..0000000 --- a/setup.py +++ /dev/null @@ -1,90 +0,0 @@ -import os - -import setuptools - -import datetime - - -name = "DEHB" -package_name = "dehb" -author = "Neeratyoy, Noor, Janis, Frank" -author_email = "mallik@cs.uni-freiburg.de" -description = "Evolutionary Hyperband for Scalable, Robust and Efficient Hyperparameter Optimization" -url = "https://github.com/automl/DEHB" -project_urls = { - "Documentation": "https://automl.github.io/DEHB/", - "Source Code": "https://github.com/automl/DEHB", -} -copyright = f"Copyright {datetime.date.today().strftime('%Y')}, Neeratyoy, Noor, Frank" -version = "0.0.7" - -HERE = os.path.dirname(os.path.realpath(__file__)) - - -def read_file(filepath: str) -> str: - """ - Read in a files contents - - Parameters - ---------- - filepath : str - The name of the file. - - Returns - ------- - str - The contents of the file. - """ - - with open(filepath, "r", encoding="utf-8") as fh: - return fh.read() - - -extras_require = { - "dev": [ - # Test - "pytest>=4.6", - "pytest-cov", - "pytest-xdist", - "pytest-timeout", - # Docs - "mkdocs-material", - "mkdocstrings", - # Others - "ruff", - "black", - "pre-commit", - ] -} - -setuptools.setup( - name=package_name, - author=author, - author_email=author_email, - description=description, - long_description=read_file(os.path.join(HERE, "README.md")), - long_description_content_type="text/markdown", - license="Apache-2.0", - url=url, - project_urls=project_urls, - version=version, - packages=setuptools.find_packages("src", exclude=["tests"]), - package_dir={"": "src"}, - python_requires=">=3.8", - install_requires=read_file(os.path.join(HERE, "requirements.txt")).split("\n"), - extras_require=extras_require, - test_suite="pytest", - platforms=["Linux"], - classifiers=[ - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Natural Language :: English", - "Intended Audience :: Developers", - "Intended Audience :: Education", - "Intended Audience :: Science/Research", - "Topic :: Scientific/Engineering", - "Topic :: Scientific/Engineering :: Artificial Intelligence", - ], -) diff --git a/src/dehb/README.md b/src/dehb/README.md deleted file mode 100644 index e69de29..0000000 diff --git a/utils/README.md b/utils/README.md index b791438..65f4058 100644 --- a/utils/README.md +++ b/utils/README.md @@ -20,7 +20,8 @@ sbatch temp/workers.sh Alternatively, to enable GPU usage by the workers, ```bash -python utils/generate_slurm_jobs.py --worker_p [cpu_node] --scheduler_p [cpu_node] --nworkers 10 \ +python utils/generate_slurm_jobs.py \ + --worker_p [cpu_node] --scheduler_p [cpu_node] --nworkers 10 \ --scheduler_path ./scheduler --scheduler_file scheduler_gpu.json --output_path temp \ --setup_file ./setup.sh --gpu # generates 2 shell scripts @@ -34,8 +35,13 @@ One or more DEHB processes can share this pool of 10 workers. For example, running a DEHB optimization by specifiying `scheduler_file` makes that DEHB process, connect to the Dask cluster runnning. ```bash -python examples/03_pytorch_mnist_hpo.py --min_budget 1 --max_budget 9 --verbose \ - --scheduler_file scheduler/scheduler_gpu.json --runtime 200 --seed 123 +python examples/03_pytorch_mnist_hpo.py \ + --min_budget 1 \ + --max_budget 9 \ + --runtime 200 \ + --seed 123 \ + --scheduler_file scheduler/scheduler_gpu.json \ + --verbose ``` The decoupled Dask cluster remains alive even after the DEHB optimization is over. It can be reused by other DEHB runs or processes. diff --git a/utils/dask_scheduler.sh b/utils/dask_scheduler.sh index 297f3be..f556e1d 100644 --- a/utils/dask_scheduler.sh +++ b/utils/dask_scheduler.sh @@ -1,5 +1,4 @@ -#! /bin/bash - +#!/bin/bash #SBATCH -p cluster-name #SBATCH --gres=gpu:0 #SBATCH --mem 0 @@ -12,13 +11,17 @@ do case "${flag}" in f) filename=${OPTARG};; # specified as -f e) envname=${OPTARG};; # specified as -e + *) echo "usage: $0 [-f] [-e]" + echo " -f: filename of scheduler file" + echo " -e: name of conda environment" + exit 1 ;; esac done # setting up environment -source $HOME/anaconda3/bin/activate $envname +source "$HOME/anaconda3/bin/activate" "$envname" # Creating a Dask scheduler -PYTHONPATH=$PWD dask-scheduler --scheduler-file $filename +PYTHONPATH=$PWD dask-scheduler --scheduler-file "$filename" # for more options: https://docs.dask.org/en/latest/setup/cli.html#dask-scheduler diff --git a/utils/dask_workers.sh b/utils/dask_workers.sh index c19a412..c01e4d0 100644 --- a/utils/dask_workers.sh +++ b/utils/dask_workers.sh @@ -1,5 +1,4 @@ -#! /bin/bash - +#!/bin/bash #SBATCH -p cluster-name #SBATCH --gres=gpu:1 #SBATCH --mem 0 @@ -12,13 +11,22 @@ do f) filename=${OPTARG};; # specified as -f e) envname=${OPTARG};; # specified as -e w) workername=${OPTARG};; # specified as -w + *) echo "usage: $0 [-f] [-e] [-w]" + echo " -f: filename of scheduler file" + echo " -e: name of conda environment" + echo " -w: name of worker" + exit 1 ;; esac done # setting up environment -source $HOME/anaconda3/bin/activate $envname +source "$HOME/anaconda3/bin/activate" "$envname" # creating a Dask worker -PYTHONPATH=$PWD dask-worker --scheduler-file $filename --name $workername --resources "GPU=1" --no-nanny +PYTHONPATH=$PWD dask-worker \ + --scheduler-file "$filename" \ + --name "$workername" \ + --resources "GPU=1" \ + --no-nanny # for more options: https://docs.dask.org/en/latest/setup/cli.html#dask-worker diff --git a/utils/generate_slurm_jobs.py b/utils/generate_slurm_jobs.py index aeaa36b..a286966 100644 --- a/utils/generate_slurm_jobs.py +++ b/utils/generate_slurm_jobs.py @@ -27,11 +27,11 @@ def scheduler_command(scheduler_file): def worker_command(scheduler_file, worker_name, gpu=False, gpu_per_worker=1): - cmd = "dask-worker --scheduler-file {} --name \"{}_\"$SLURM_ARRAY_TASK_ID --no-nanny" + cmd = 'dask-worker --scheduler-file {} --name "{}_"$SLURM_ARRAY_TASK_ID --no-nanny' extra_args = " --reconnect --nprocs 1 --nthreads 1" cmd = cmd.format(scheduler_file, worker_name) if gpu: - cmd += " --resources \"GPU={}\"".format(gpu_per_worker) + cmd += ' --resources "GPU={}"'.format(gpu_per_worker) cmd += extra_args cmd += "\n" return cmd @@ -72,25 +72,31 @@ def input_arguments(): "--scheduler_file", default="scheduler.json", type=str, - help="The file name storing the Dask cluster connections" + help="The file name storing the Dask cluster connections", ) parser.add_argument( "--scheduler_path", default="./scheduler", type=str, - help="The path to keep the scheduler.json like files for Dask" + help="The path to keep the scheduler.json like files for Dask", ) parser.add_argument( "--setup_file", default=None, type=str, - help="The path to file that will be sourced to load environment and set path variables" + help="The path to file that will be sourced to load environment and set path variables", ) parser.add_argument( - "--output_path", default="./", type=str, help="The path to dump the generated script" + "--output_path", + default="./", + type=str, + help="The path to dump the generated script", ) parser.add_argument( - "--slurm_dump_path", default="./slurm-logs", type=str, help="Path to dump the slurm logs" + "--slurm_dump_path", + default="./slurm-logs", + type=str, + help="Path to dump the slurm logs", ) parser.add_argument( "--nworkers", default=10, type=int, help="Number of workers to run" @@ -98,24 +104,31 @@ def input_arguments(): parser.add_argument( "--worker_name", default="w", type=str, help="Dask worker name prefix" ) + parser.add_argument("-c", default=2, type=int, help="CPUs per task requested") parser.add_argument( - "-c", default=2, type=int, help="CPUs per task requested" - ) - parser.add_argument( - "--gpu", default=False, action="store_true", help="If set, the workers request GPUs" + "--gpu", + default=False, + action="store_true", + help="If set, the workers request GPUs", ) parser.add_argument( "--gpu_per_worker", default=1, type=int, help="Number of GPUs per worker" ) parser.add_argument( - "--scheduler_p", default=None, required=True, type=str, help="The node to submit schedulers" - ) - parser.add_argument( - "--worker_p", default=None, required=True, type=str, help="The node to submit workers" + "--scheduler_p", + default=None, + required=True, + type=str, + help="The node to submit schedulers", ) parser.add_argument( - "-t", default="1:00:00", type=str, help="TIMELIMIT" + "--worker_p", + default=None, + required=True, + type=str, + help="The node to submit workers", ) + parser.add_argument("-t", default="1:00:00", type=str, help="TIMELIMIT") parser.add_argument( "-J", default="dehb", type=str, help="Prefix to scheduler and worker job names" ) @@ -151,7 +164,7 @@ def input_arguments(): scheduler_file=scheduler, worker_name=args.worker_name, gpu=args.gpu, - gpu_per_worker=args.gpu_per_worker + gpu_per_worker=args.gpu_per_worker, ) with open(worker_file, "w") as f: f.writelines(cmd)