From 98c7b39d7d217e9e63d501284781c8bae77150a4 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Thu, 30 May 2024 11:24:21 +0200 Subject: [PATCH] example update --- examples/Readme.md | 9 +- examples/configs/epsilon_heuristic.yaml | 4 +- examples/configs/gradient_lr.yaml | 4 +- examples/configs/smac.yaml | 2 +- examples/hypersweeper_tuning.ipynb | 4994 +++++++++++++++++++++++ examples/run_heuristic_schedule.py | 9 + examples/run_reactive_schedule.py | 13 +- examples/schedules.ipynb | 214 + 8 files changed, 5239 insertions(+), 10 deletions(-) create mode 100644 examples/hypersweeper_tuning.ipynb create mode 100644 examples/schedules.ipynb diff --git a/examples/Readme.md b/examples/Readme.md index c5cfabbf7..de7dc428d 100644 --- a/examples/Readme.md +++ b/examples/Readme.md @@ -6,10 +6,17 @@ We provide three different categories of examples: 3. Running a reactive schedule based on the gradient history We use 'hydra' as a command line interface for these experiments, you'll find the corresponding configurations (including some variations on the algorithms and environments) in the 'configs' directory. +The "hypersweeper_tuning" and "schedules" notebooks can help you run these examples and inspect their results. ## 1. Black-Box HPO -We use the 'hypersweeper' package to demonstrate how ARLBench can be used for black-box HPO. Since it's hydra-based, we simply set up a script which takes a configuration, runs it and returns the evaluation reward at the end. You can try a single run like this: +We use the 'hypersweeper' package to demonstrate how ARLBench can be used for black-box HPO. Since it's hydra-based, we simply set up a script which takes a configuration, runs it and returns the evaluation reward at the end. First, use pip to install the hypersweeper: + +```bash +pip install hypersweeper +``` + +You can try a single run of arlbench first: ```bash python run_arlbench.py diff --git a/examples/configs/epsilon_heuristic.yaml b/examples/configs/epsilon_heuristic.yaml index 21424f7ea..29ba27856 100644 --- a/examples/configs/epsilon_heuristic.yaml +++ b/examples/configs/epsilon_heuristic.yaml @@ -4,9 +4,9 @@ defaults: hydra: run: - dir: results/${algorithm}_${environment.name}/${autorl.seed} + dir: results/heuristic_schedule_${algorithm}_${environment.name}/${autorl.seed} sweep: - dir: results/${algorithm}_${environment.name}/${autorl.seed} + dir: results/heuristic_schedule_${algorithm}_${environment.name}/${autorl.seed} job: chdir: true diff --git a/examples/configs/gradient_lr.yaml b/examples/configs/gradient_lr.yaml index 7d2ff1bca..df17abc46 100644 --- a/examples/configs/gradient_lr.yaml +++ b/examples/configs/gradient_lr.yaml @@ -4,9 +4,9 @@ defaults: hydra: run: - dir: results/${algorithm}_${environment.name}/${autorl.seed} + dir: results/reactive_schedule_${algorithm}_${environment.name}/${autorl.seed} sweep: - dir: results/${algorithm}_${environment.name}/${autorl.seed} + dir: results/reactive_schedule_${algorithm}_${environment.name}/${autorl.seed} job: chdir: true diff --git a/examples/configs/smac.yaml b/examples/configs/smac.yaml index 639ea4e8f..dd671da52 100644 --- a/examples/configs/smac.yaml +++ b/examples/configs/smac.yaml @@ -31,7 +31,7 @@ hydra: n_workers: 1 output_directory: ${hydra.sweep.dir} seeds: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - maximize: true + maximize: false run: dir: results/smac/${algorithm}_${autorl.env_name}/${smac_seed}/${seed} sweep: diff --git a/examples/hypersweeper_tuning.ipynb b/examples/hypersweeper_tuning.ipynb new file mode 100644 index 000000000..9d3ac9a81 --- /dev/null +++ b/examples/hypersweeper_tuning.ipynb @@ -0,0 +1,4994 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Black-Box Hyperparameter Optimization With ARLBench\n", + "\n", + "The most common way of optimizing hyperparameters is probably black-box optimization, i.e. using a reward or cost as the sole decision factor for a configuration's quality.\n", + "There are many great tools that do this integrated into hydra, so let's use one to tune our trusty CartPole DQN: the Hypersweeper." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's use the provided runscript to do a random search:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Couldn't import CARP-S, the CARP-S Hypersweeper will not be available.\n", + "Couldn't import DEHB, the DEHB Hypersweeper will not be available.\n", + "Couldn't import HEBO, the HEBO Hypersweeper will not be available.\n", + "Couldn't import Nevergrad, the Nevergrad Hypersweeper will not be available.\n", + "Config \u001b[1m{\u001b[0m\u001b[32m'hydra'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'run'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'dir'\u001b[0m: \n", + "\u001b[32m'results/sobol/$\u001b[0m\u001b[32m{\u001b[0m\u001b[32malgorithm\u001b[0m\u001b[32m}\u001b[0m\u001b[32m_$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mautorl.env_name\u001b[0m\u001b[32m}\u001b[0m\u001b[32m/$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mautorl.seed\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'sweep'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'dir'\u001b[0m: \u001b[32m'results/sobol/$\u001b[0m\u001b[32m{\u001b[0m\u001b[32malgorithm\u001b[0m\u001b[32m}\u001b[0m\u001b[32m_$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mautorl.env_name\u001b[0m\u001b[32m}\u001b[0m\u001b[32m/$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mautorl.seed\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \n", + "\u001b[32m'subdir'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mhydra.job.num\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'launcher'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'_target_'\u001b[0m: \n", + "\u001b[32m'hydra._internal.core_plugins.basic_launcher.BasicLauncher'\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'sweeper'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'_target_'\u001b[0m: \u001b[32m'hydra_plugins.hypersweeper.hypersweeper.Hypersweeper'\u001b[0m, \n", + "\u001b[32m'opt_constructor'\u001b[0m: \u001b[32m'hydra_plugins.hyper_rs.hyper_rs.make_rs'\u001b[0m, \u001b[32m'search_space'\u001b[0m: \n", + "\u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32msearch_space\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'resume'\u001b[0m: \u001b[3;91mFalse\u001b[0m, \u001b[32m'budget'\u001b[0m: \u001b[3;35mNone\u001b[0m, \u001b[32m'n_trials'\u001b[0m: \u001b[1;36m16\u001b[0m, \n", + "\u001b[32m'budget_variable'\u001b[0m: \u001b[3;35mNone\u001b[0m, \u001b[32m'loading_variable'\u001b[0m: \u001b[3;35mNone\u001b[0m, \u001b[32m'saving_variable'\u001b[0m: \u001b[3;35mNone\u001b[0m, \n", + "\u001b[32m'sweeper_kwargs'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'max_parallelization'\u001b[0m: \u001b[1;36m1\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'help'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'app_name'\u001b[0m: \n", + "\u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mhydra.job.name\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'header'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mhydra.help.app_name\u001b[0m\u001b[32m}\u001b[0m\u001b[32m is powered by Hydra.\\n'\u001b[0m, \n", + "\u001b[32m'footer'\u001b[0m: \u001b[32m'Powered by Hydra \u001b[0m\u001b[32m(\u001b[0m\u001b[32mhttps://hydra.cc\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\nUse --hydra-help to view Hydra \u001b[0m\n", + "\u001b[32mspecific help\\n'\u001b[0m, \u001b[32m'template'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mhydra.help.header\u001b[0m\u001b[32m}\u001b[0m\u001b[32m\\\u001b[0m\u001b[32mn\u001b[0m\u001b[32m== Configuration groups \u001b[0m\n", + "\u001b[32m==\\nCompose your configuration from those groups \u001b[0m\n", + "\u001b[32m(\u001b[0m\u001b[32mgroup\u001b[0m\u001b[32m=\u001b[0m\u001b[32moption\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\n\\n$APP_CONFIG_GROUPS\\n\\\u001b[0m\u001b[32mn\u001b[0m\u001b[32m== Config ==\\nOverride anything in the \u001b[0m\n", + "\u001b[32mconfig \u001b[0m\u001b[32m(\u001b[0m\u001b[32mfoo.\u001b[0m\u001b[32mbar\u001b[0m\u001b[32m=\u001b[0m\u001b[32mvalue\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\n\\n$CONFIG\\n\\n$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mhydra.help.footer\u001b[0m\u001b[32m}\u001b[0m\u001b[32m\\n'\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hydra_help'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'template'\u001b[0m: \u001b[32m\"Hydra \u001b[0m\u001b[32m(\u001b[0m\u001b[32m$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mhydra.runtime.version\u001b[0m\u001b[32m}\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\nSee https://hydra.cc for more \u001b[0m\n", + "\u001b[32minfo.\\n\\\u001b[0m\u001b[32mn\u001b[0m\u001b[32m== Flags ==\\n$FLAGS_HELP\\n\\\u001b[0m\u001b[32mn\u001b[0m\u001b[32m== Configuration groups ==\\nCompose your \u001b[0m\n", + "\u001b[32mconfiguration from those groups \u001b[0m\u001b[32m(\u001b[0m\u001b[32mFor example, append hydra/\u001b[0m\u001b[32mjob_logging\u001b[0m\u001b[32m=\u001b[0m\u001b[32mdisabled\u001b[0m\u001b[32m \u001b[0m\n", + "\u001b[32mto command line\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\n\\n$HYDRA_CONFIG_GROUPS\\n\\nUse '--cfg hydra' to Show the Hydra \u001b[0m\n", + "\u001b[32mconfig.\\n\"\u001b[0m, \u001b[32m'hydra_help'\u001b[0m: \u001b[32m'???'\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hydra_logging'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'version'\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m'formatters'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'simple'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'format'\u001b[0m: \u001b[32m'\u001b[0m\u001b[32m[\u001b[0m\u001b[32m%\u001b[0m\u001b[32m(\u001b[0m\u001b[32masctime\u001b[0m\u001b[32m)\u001b[0m\u001b[32ms\u001b[0m\u001b[32m]\u001b[0m\u001b[32m[\u001b[0m\u001b[32mHYDRA\u001b[0m\u001b[32m]\u001b[0m\u001b[32m %\u001b[0m\u001b[32m(\u001b[0m\u001b[32mmessage\u001b[0m\u001b[32m)\u001b[0m\u001b[32ms'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'handlers'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'console'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'class'\u001b[0m: \u001b[32m'logging.StreamHandler'\u001b[0m, \u001b[32m'formatter'\u001b[0m: \u001b[32m'simple'\u001b[0m, \u001b[32m'stream'\u001b[0m: \n", + "\u001b[32m'ext://sys.stdout'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'root'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'level'\u001b[0m: \u001b[32m'INFO'\u001b[0m, \u001b[32m'handlers'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'console'\u001b[0m\u001b[1m]\u001b[0m\u001b[1m}\u001b[0m, \n", + "\u001b[32m'loggers'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'logging_example'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'level'\u001b[0m: \u001b[32m'DEBUG'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'disable_existing_loggers'\u001b[0m: \n", + "\u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'job_logging'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'version'\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m'formatters'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'simple'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'format'\u001b[0m: \n", + "\u001b[32m'\u001b[0m\u001b[32m[\u001b[0m\u001b[32m%\u001b[0m\u001b[32m(\u001b[0m\u001b[32masctime\u001b[0m\u001b[32m)\u001b[0m\u001b[32ms\u001b[0m\u001b[32m]\u001b[0m\u001b[32m[\u001b[0m\u001b[32m%\u001b[0m\u001b[32m(\u001b[0m\u001b[32mname\u001b[0m\u001b[32m)\u001b[0m\u001b[32ms\u001b[0m\u001b[32m]\u001b[0m\u001b[32m[\u001b[0m\u001b[32m%\u001b[0m\u001b[32m(\u001b[0m\u001b[32mlevelname\u001b[0m\u001b[32m)\u001b[0m\u001b[32ms\u001b[0m\u001b[32m]\u001b[0m\u001b[32m - %\u001b[0m\u001b[32m(\u001b[0m\u001b[32mmessage\u001b[0m\u001b[32m)\u001b[0m\u001b[32ms'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'handlers'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'console'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'class'\u001b[0m: \u001b[32m'logging.StreamHandler'\u001b[0m, \u001b[32m'formatter'\u001b[0m: \u001b[32m'simple'\u001b[0m, \u001b[32m'stream'\u001b[0m: \n", + "\u001b[32m'ext://sys.stdout'\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'file'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'class'\u001b[0m: \u001b[32m'logging.FileHandler'\u001b[0m, \u001b[32m'formatter'\u001b[0m: \n", + "\u001b[32m'simple'\u001b[0m, \u001b[32m'filename'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mhydra.runtime.output_dir\u001b[0m\u001b[32m}\u001b[0m\u001b[32m/$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mhydra.job.name\u001b[0m\u001b[32m}\u001b[0m\u001b[32m.log'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \n", + "\u001b[32m'root'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'level'\u001b[0m: \u001b[32m'INFO'\u001b[0m, \u001b[32m'handlers'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'console'\u001b[0m, \u001b[32m'file'\u001b[0m\u001b[1m]\u001b[0m\u001b[1m}\u001b[0m, \n", + "\u001b[32m'disable_existing_loggers'\u001b[0m: \u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'env'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'mode'\u001b[0m: \u001b[1m<\u001b[0m\u001b[1;95mRunMode.MULTIRUN:\u001b[0m\u001b[39m \u001b[0m\u001b[1;36m2\u001b[0m\u001b[1m>\u001b[0m, \n", + "\u001b[32m'searchpath'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m'callbacks'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'output_subdir'\u001b[0m: \u001b[32m'.hydra'\u001b[0m, \u001b[32m'overrides'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'hydra'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'hydra.\u001b[0m\u001b[32mmode\u001b[0m\u001b[32m=\u001b[0m\u001b[32mMULTIRUN\u001b[0m\u001b[32m'\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m'task'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'job'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'run_arlbench'\u001b[0m, \n", + "\u001b[32m'chdir'\u001b[0m: \u001b[3;92mTrue\u001b[0m, \u001b[32m'override_dirname'\u001b[0m: \u001b[32m''\u001b[0m, \u001b[32m'id'\u001b[0m: \u001b[32m'???'\u001b[0m, \u001b[32m'num'\u001b[0m: \u001b[32m'???'\u001b[0m, \u001b[32m'config_name'\u001b[0m:\n", + "\u001b[32m'random_search'\u001b[0m, \u001b[32m'env_set'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'env_copy'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m'config'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'override_dirname'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'kv_sep'\u001b[0m: \u001b[32m'='\u001b[0m, \u001b[32m'item_sep'\u001b[0m: \u001b[32m','\u001b[0m, \u001b[32m'exclude_keys'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'runtime'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'version'\u001b[0m: \n", + "\u001b[32m'1.3.2'\u001b[0m, \u001b[32m'version_base'\u001b[0m: \u001b[32m'1.3'\u001b[0m, \u001b[32m'cwd'\u001b[0m: \n", + "\u001b[32m'/Users/theeimer/Documents/git/arlbench/examples'\u001b[0m, \u001b[32m'config_sources'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'path'\u001b[0m: \n", + "\u001b[32m'hydra.conf'\u001b[0m, \u001b[32m'schema'\u001b[0m: \u001b[32m'pkg'\u001b[0m, \u001b[32m'provider'\u001b[0m: \u001b[32m'hydra'\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'path'\u001b[0m: \n", + "\u001b[32m'/Users/theeimer/Documents/git/arlbench/examples/configs'\u001b[0m, \u001b[32m'schema'\u001b[0m: \u001b[32m'file'\u001b[0m, \n", + "\u001b[32m'provider'\u001b[0m: \u001b[32m'main'\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'path'\u001b[0m: \u001b[32m'hydra_plugins.hydra_colorlog.conf'\u001b[0m, \u001b[32m'schema'\u001b[0m: \n", + "\u001b[32m'pkg'\u001b[0m, \u001b[32m'provider'\u001b[0m: \u001b[32m'hydra-colorlog'\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'path'\u001b[0m: \u001b[32m''\u001b[0m, \u001b[32m'schema'\u001b[0m: \u001b[32m'structured'\u001b[0m, \n", + "\u001b[32m'provider'\u001b[0m: \u001b[32m'schema'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m'output_dir'\u001b[0m: \u001b[32m'???'\u001b[0m, \u001b[32m'choices'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'search_space'\u001b[0m: \u001b[32m'dqn'\u001b[0m, \n", + "\u001b[32m'environment'\u001b[0m: \u001b[32m'cc_cartpole'\u001b[0m, \u001b[32m'algorithm'\u001b[0m: \u001b[32m'dqn'\u001b[0m, \u001b[32m'hydra/env'\u001b[0m: \u001b[32m'default'\u001b[0m, \n", + "\u001b[32m'hydra/callbacks'\u001b[0m: \u001b[3;35mNone\u001b[0m, \u001b[32m'hydra/job_logging'\u001b[0m: \u001b[32m'default'\u001b[0m, \u001b[32m'hydra/hydra_logging'\u001b[0m: \n", + "\u001b[32m'default'\u001b[0m, \u001b[32m'hydra/hydra_help'\u001b[0m: \u001b[32m'default'\u001b[0m, \u001b[32m'hydra/help'\u001b[0m: \u001b[32m'default'\u001b[0m, \n", + "\u001b[32m'hydra/sweeper'\u001b[0m: \u001b[32m'HyperRS'\u001b[0m, \u001b[32m'hydra/launcher'\u001b[0m: \u001b[32m'basic'\u001b[0m, \u001b[32m'hydra/output'\u001b[0m: \n", + "\u001b[32m'default'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'verbose'\u001b[0m: \u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'load_checkpoint'\u001b[0m: \u001b[32m''\u001b[0m, \u001b[32m'jax_enable_x64'\u001b[0m: \u001b[3;91mFalse\u001b[0m, \n", + "\u001b[32m'search_space.seed'\u001b[0m: \u001b[1;36m0\u001b[0m, \u001b[32m'autorl'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'seed'\u001b[0m: \u001b[1;36m42\u001b[0m, \u001b[32m'env_framework'\u001b[0m: \n", + "\u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32menvironment.framework\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'env_name'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32menvironment.name\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'env_kwargs'\u001b[0m: \n", + "\u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32menvironment.kwargs\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'eval_env_kwargs'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32menvironment.eval_kwargs\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \n", + "\u001b[32m'n_envs'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32menvironment.n_envs\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'algorithm'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32malgorithm\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'cnn_policy'\u001b[0m: \n", + "\u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32menvironment.cnn_policy\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'deterministic_eval'\u001b[0m: \n", + "\u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32menvironment.deterministic_eval\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'nas_config'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mnas_config\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \n", + "\u001b[32m'n_total_timesteps'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32menvironment.n_total_timesteps\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'checkpoint'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \n", + "\u001b[32m'checkpoint_name'\u001b[0m: \u001b[32m'default_checkpoint'\u001b[0m, \u001b[32m'checkpoint_dir'\u001b[0m: \u001b[32m'/tmp'\u001b[0m, \n", + "\u001b[32m'state_features'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m'objectives'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'reward_mean'\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m'optimize_objectives'\u001b[0m: \n", + "\u001b[32m'upper'\u001b[0m, \u001b[32m'n_steps'\u001b[0m: \u001b[1;36m10\u001b[0m, \u001b[32m'n_eval_steps'\u001b[0m: \u001b[1;36m10\u001b[0m, \u001b[32m'n_eval_episodes'\u001b[0m: \u001b[1;36m128\u001b[0m\u001b[1m}\u001b[0m, \n", + "\u001b[32m'algorithm'\u001b[0m: \u001b[32m'dqn'\u001b[0m, \u001b[32m'hp_config'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'buffer_prio_sampling'\u001b[0m: \u001b[3;91mFalse\u001b[0m, \u001b[32m'buffer_alpha'\u001b[0m:\n", + "\u001b[1;36m0.9\u001b[0m, \u001b[32m'buffer_beta'\u001b[0m: \u001b[1;36m0.9\u001b[0m, \u001b[32m'buffer_epsilon'\u001b[0m: \u001b[1;36m0.001\u001b[0m, \u001b[32m'buffer_batch_size'\u001b[0m: \u001b[1;36m16\u001b[0m, \n", + "\u001b[32m'buffer_size'\u001b[0m: \u001b[1;36m1000000\u001b[0m, \u001b[32m'initial_epsilon'\u001b[0m: \u001b[1;36m1.0\u001b[0m, \u001b[32m'target_epsilon'\u001b[0m: \u001b[1;36m0.05\u001b[0m, \u001b[32m'gamma'\u001b[0m:\n", + "\u001b[1;36m0.99\u001b[0m, \u001b[32m'gradient_steps'\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m'learning_rate'\u001b[0m: \u001b[1;36m0.0003\u001b[0m, \u001b[32m'learning_starts'\u001b[0m: \u001b[1;36m128\u001b[0m, \n", + "\u001b[32m'normalize_observations'\u001b[0m: \u001b[3;91mFalse\u001b[0m, \u001b[32m'train_freq'\u001b[0m: \u001b[1;36m4\u001b[0m, \u001b[32m'use_target_network'\u001b[0m: \u001b[3;92mTrue\u001b[0m, \n", + "\u001b[32m'target_update_interval'\u001b[0m: \u001b[1;36m1000\u001b[0m, \u001b[32m'tau'\u001b[0m: \u001b[1;36m1.0\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'nas_config'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'activation'\u001b[0m: \n", + "\u001b[32m'tanh'\u001b[0m, \u001b[32m'hidden_size'\u001b[0m: \u001b[1;36m64\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'environment'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'CartPole-v1'\u001b[0m, \u001b[32m'framework'\u001b[0m: \n", + "\u001b[32m'gymnax'\u001b[0m, \u001b[32m'n_total_timesteps'\u001b[0m: \u001b[1;36m100000.0\u001b[0m, \u001b[32m'kwargs'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'eval_kwargs'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \n", + "\u001b[32m'cnn_policy'\u001b[0m: \u001b[3;91mFalse\u001b[0m, \u001b[32m'deterministic_eval'\u001b[0m: \u001b[3;92mTrue\u001b[0m, \u001b[32m'n_envs'\u001b[0m: \u001b[1;36m8\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'search_space'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'seed'\u001b[0m: \u001b[1;36m0\u001b[0m, \u001b[32m'hyperparameters'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'hp_config.buffer_alpha'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \n", + "\u001b[32m'uniform_float'\u001b[0m, \u001b[32m'upper'\u001b[0m: \u001b[1;36m1.0\u001b[0m, \u001b[32m'lower'\u001b[0m: \u001b[1;36m0.01\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m0.9\u001b[0m, \u001b[32m'log'\u001b[0m: \u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \n", + "\u001b[32m'hp_config.buffer_batch_size'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'categorical'\u001b[0m, \u001b[32m'choices'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1;36m4\u001b[0m, \u001b[1;36m8\u001b[0m, \u001b[1;36m16\u001b[0m, \n", + "\u001b[1;36m32\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m16\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.buffer_beta'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'uniform_float'\u001b[0m, \u001b[32m'upper'\u001b[0m:\n", + "\u001b[1;36m1.0\u001b[0m, \u001b[32m'lower'\u001b[0m: \u001b[1;36m0.01\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m0.9\u001b[0m, \u001b[32m'log'\u001b[0m: \u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.buffer_epsilon'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'uniform_float'\u001b[0m, \u001b[32m'upper'\u001b[0m: \u001b[1;36m0.001\u001b[0m, \u001b[32m'lower'\u001b[0m: \u001b[1;36m1e-07\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m1e-06\u001b[0m, \n", + "\u001b[32m'log'\u001b[0m: \u001b[3;92mTrue\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.buffer_prio_sampling'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'categorical'\u001b[0m, \n", + "\u001b[32m'choices'\u001b[0m: \u001b[1m[\u001b[0m\u001b[3;92mTrue\u001b[0m, \u001b[3;91mFalse\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.buffer_size'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \n", + "\u001b[32m'uniform_int'\u001b[0m, \u001b[32m'upper'\u001b[0m: \u001b[1;36m10000000\u001b[0m, \u001b[32m'lower'\u001b[0m: \u001b[1;36m1024\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m1000000\u001b[0m, \u001b[32m'log'\u001b[0m: \n", + "\u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.initial_epsilon'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'uniform_float'\u001b[0m, \u001b[32m'upper'\u001b[0m: \u001b[1;36m1.0\u001b[0m, \n", + "\u001b[32m'lower'\u001b[0m: \u001b[1;36m0.5\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m1.0\u001b[0m, \u001b[32m'log'\u001b[0m: \u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.target_epsilon'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'uniform_float'\u001b[0m, \u001b[32m'upper'\u001b[0m: \u001b[1;36m0.2\u001b[0m, \u001b[32m'lower'\u001b[0m: \u001b[1;36m0.001\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m0.05\u001b[0m, \u001b[32m'log'\u001b[0m: \n", + "\u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.learning_rate'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'uniform_float'\u001b[0m, \u001b[32m'upper'\u001b[0m: \u001b[1;36m0.1\u001b[0m, \n", + "\u001b[32m'lower'\u001b[0m: \u001b[1;36m1e-06\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m0.0003\u001b[0m, \u001b[32m'log'\u001b[0m: \u001b[3;92mTrue\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.learning_starts'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'uniform_int'\u001b[0m, \u001b[32m'upper'\u001b[0m: \u001b[1;36m1024\u001b[0m, \u001b[32m'lower'\u001b[0m: \u001b[1;36m0\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m128\u001b[0m, \u001b[32m'log'\u001b[0m: \n", + "\u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.use_target_network'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'categorical'\u001b[0m, \u001b[32m'choices'\u001b[0m: \n", + "\u001b[1m[\u001b[0m\u001b[3;92mTrue\u001b[0m, \u001b[3;91mFalse\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[3;92mTrue\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.target_update_interval'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \n", + "\u001b[32m'uniform_int'\u001b[0m, \u001b[32m'upper'\u001b[0m: \u001b[1;36m1000\u001b[0m, \u001b[32m'lower'\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m1000\u001b[0m, \u001b[32m'log'\u001b[0m: \u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \n", + "\u001b[32m'hp_config.tau'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'uniform_float'\u001b[0m, \u001b[32m'upper'\u001b[0m: \u001b[1;36m1.0\u001b[0m, \u001b[32m'lower'\u001b[0m: \u001b[1;36m0.01\u001b[0m, \n", + "\u001b[32m'default'\u001b[0m: \u001b[1;36m1.0\u001b[0m, \u001b[32m'log'\u001b[0m: \u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'conditions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'child'\u001b[0m: \n", + "\u001b[32m'hp_config.target_update_interval'\u001b[0m, \u001b[32m'parent'\u001b[0m: \u001b[32m'hp_config.use_target_network'\u001b[0m, \n", + "\u001b[32m'value'\u001b[0m: \u001b[3;92mTrue\u001b[0m, \u001b[32m'type'\u001b[0m: \u001b[32m'EQ'\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'child'\u001b[0m: \u001b[32m'hp_config.tau'\u001b[0m, \u001b[32m'parent'\u001b[0m: \n", + "\u001b[32m'hp_config.use_target_network'\u001b[0m, \u001b[32m'value'\u001b[0m: \u001b[3;92mTrue\u001b[0m, \u001b[32m'type'\u001b[0m: \u001b[32m'EQ'\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'child'\u001b[0m: \n", + "\u001b[32m'hp_config.buffer_epsilon'\u001b[0m, \u001b[32m'parent'\u001b[0m: \u001b[32m'hp_config.buffer_prio_sampling'\u001b[0m, \u001b[32m'value'\u001b[0m:\n", + "\u001b[3;92mTrue\u001b[0m, \u001b[32m'type'\u001b[0m: \u001b[32m'EQ'\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'child'\u001b[0m: \u001b[32m'hp_config.buffer_alpha'\u001b[0m, \u001b[32m'parent'\u001b[0m: \n", + "\u001b[32m'hp_config.buffer_prio_sampling'\u001b[0m, \u001b[32m'value'\u001b[0m: \u001b[3;92mTrue\u001b[0m, \u001b[32m'type'\u001b[0m: \u001b[32m'EQ'\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'child'\u001b[0m: \n", + "\u001b[32m'hp_config.buffer_beta'\u001b[0m, \u001b[32m'parent'\u001b[0m: \u001b[32m'hp_config.buffer_prio_sampling'\u001b[0m, \u001b[32m'value'\u001b[0m: \n", + "\u001b[3;92mTrue\u001b[0m, \u001b[32m'type'\u001b[0m: \u001b[32m'EQ'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\n", + "Hydra context\n", + "\u001b[1;35mHydraContext\u001b[0m\u001b[1m(\u001b[0m\n", + " \u001b[33mconfig_loader\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;95mhydra._internal.config_loader_impl.ConfigLoaderImpl\u001b[0m\u001b[39m object at\u001b[0m\n", + "\u001b[1;36m0x319e96830\u001b[0m\u001b[39m>,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcallbacks\u001b[0m\u001b[39m=\u001b[0m\n", + "\u001b[1m)\u001b[0m\n", + "[2024-05-29 17:03:44,978][HYDRA] Sweep doesn't override default config.\n", + "[2024-05-29 17:03:44,988][HYDRA] Starting Sweep\n", + "[2024-05-29 17:03:44,992][HYDRA] Launching 16 jobs locally\n", + "[2024-05-29 17:03:44,992][HYDRA] \t#0 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=6028041 hp_config.initial_epsilon=0.7724415914984484 hp_config.learning_rate=0.0001313028028065861 hp_config.learning_starts=662 hp_config.target_epsilon=0.08807985504127581 hp_config.use_target_network=False\n", + "[2024-05-29 17:03:45,088][jax._src.xla_bridge][INFO] - Unable to initialize backend 'cuda': \n", + "[2024-05-29 17:03:45,088][jax._src.xla_bridge][INFO] - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'\n", + "[2024-05-29 17:03:45,089][jax._src.xla_bridge][INFO] - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: dlopen(libtpu.so, 0x0001): tried: 'libtpu.so' (no such file), '/System/Volumes/Preboot/Cryptexes/OSlibtpu.so' (no such file), '/Users/theeimer/anaconda3/envs/arlbench/bin/../lib/libtpu.so' (no such file), '/usr/lib/libtpu.so' (no such file, not in dyld cache), 'libtpu.so' (no such file), '/usr/local/lib/libtpu.so' (no such file), '/usr/lib/libtpu.so' (no such file, not in dyld cache)\n", + "[2024-05-29 17:03:45,090][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:03:45,090][root][INFO] - seed: 42\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "deterministic_eval: ${environment.deterministic_eval}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: ${environment.n_total_timesteps}\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:03:45,090][root][INFO] - Training started.\n", + "[2024-05-29 17:03:49,273][root][INFO] - Training finished.\n", + "[2024-05-29 17:03:49,280][HYDRA] \t#1 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=True hp_config.buffer_size=872227 hp_config.initial_epsilon=0.5101091987201629 hp_config.learning_rate=0.014557916623227784 hp_config.learning_starts=797 hp_config.target_epsilon=0.17413241750111702 hp_config.use_target_network=False hp_config.buffer_alpha=0.8011669785745563 hp_config.buffer_beta=0.46686456863040254 hp_config.buffer_epsilon=0.00013246974647468452\n", + "[2024-05-29 17:03:49,364][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:03:49,365][root][INFO] - seed: 42\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "deterministic_eval: ${environment.deterministic_eval}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: ${environment.n_total_timesteps}\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:03:49,366][root][INFO] - Training started.\n", + "[2024-05-29 17:03:52,785][root][INFO] - Training finished.\n", + "[2024-05-29 17:03:52,787][HYDRA] \t#2 : hp_config.buffer_batch_size=4 hp_config.buffer_prio_sampling=False hp_config.buffer_size=5218973 hp_config.initial_epsilon=0.7073309699952618 hp_config.learning_rate=2.1027036109989307e-05 hp_config.learning_starts=793 hp_config.target_epsilon=0.09177391611109317 hp_config.use_target_network=False\n", + "[2024-05-29 17:03:52,868][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:03:52,869][root][INFO] - seed: 42\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "deterministic_eval: ${environment.deterministic_eval}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: ${environment.n_total_timesteps}\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:03:52,869][root][INFO] - Training started.\n", + "[2024-05-29 17:03:55,990][root][INFO] - Training finished.\n", + "[2024-05-29 17:03:55,995][HYDRA] \t#3 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=4370896 hp_config.initial_epsilon=0.8488155979636325 hp_config.learning_rate=2.0004484210638855e-06 hp_config.learning_starts=683 hp_config.target_epsilon=0.13445693605401374 hp_config.use_target_network=True hp_config.buffer_alpha=0.1376370346783048 hp_config.buffer_beta=0.322274067414942 hp_config.buffer_epsilon=2.849988343697159e-06 hp_config.target_update_interval=571 hp_config.tau=0.44421549832769713\n", + "[2024-05-29 17:03:56,078][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:03:56,079][root][INFO] - seed: 42\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "deterministic_eval: ${environment.deterministic_eval}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: ${environment.n_total_timesteps}\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:03:56,079][root][INFO] - Training started.\n", + "[2024-05-29 17:03:59,482][root][INFO] - Training finished.\n", + "[2024-05-29 17:03:59,486][HYDRA] \t#4 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=True hp_config.buffer_size=2089577 hp_config.initial_epsilon=0.5806547589424982 hp_config.learning_rate=0.0018430691395340175 hp_config.learning_starts=259 hp_config.target_epsilon=0.09379584379840496 hp_config.use_target_network=True hp_config.buffer_alpha=0.16737988780906454 hp_config.buffer_beta=0.11927138975266208 hp_config.buffer_epsilon=4.2200573971950144e-05 hp_config.target_update_interval=139 hp_config.tau=0.20461653806325297\n", + "[2024-05-29 17:03:59,570][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:03:59,571][root][INFO] - seed: 42\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "deterministic_eval: ${environment.deterministic_eval}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: ${environment.n_total_timesteps}\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:03:59,571][root][INFO] - Training started.\n", + "[2024-05-29 17:04:02,968][root][INFO] - Training finished.\n", + "[2024-05-29 17:04:02,971][HYDRA] \t#5 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False\n", + "[2024-05-29 17:04:03,052][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:04:03,053][root][INFO] - seed: 42\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "deterministic_eval: ${environment.deterministic_eval}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: ${environment.n_total_timesteps}\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:04:03,053][root][INFO] - Training started.\n", + "[2024-05-29 17:04:06,164][root][INFO] - Training finished.\n", + "[2024-05-29 17:04:06,167][HYDRA] \t#6 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104\n", + "[2024-05-29 17:04:06,249][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:04:06,250][root][INFO] - seed: 42\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "deterministic_eval: ${environment.deterministic_eval}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: ${environment.n_total_timesteps}\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:04:06,250][root][INFO] - Training started.\n", + "[2024-05-29 17:04:09,594][root][INFO] - Training finished.\n", + "[2024-05-29 17:04:09,598][HYDRA] \t#7 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06\n", + "[2024-05-29 17:04:09,680][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:04:09,682][root][INFO] - seed: 42\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "deterministic_eval: ${environment.deterministic_eval}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: ${environment.n_total_timesteps}\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:04:09,682][root][INFO] - Training started.\n", + "[2024-05-29 17:04:12,849][root][INFO] - Training finished.\n", + "[2024-05-29 17:04:12,856][HYDRA] \t#8 : hp_config.buffer_batch_size=4 hp_config.buffer_prio_sampling=False hp_config.buffer_size=5920837 hp_config.initial_epsilon=0.7861259528954367 hp_config.learning_rate=1.3043921100581493e-05 hp_config.learning_starts=976 hp_config.target_epsilon=0.08997795034490785 hp_config.use_target_network=False\n", + "[2024-05-29 17:04:12,936][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:04:12,937][root][INFO] - seed: 42\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "deterministic_eval: ${environment.deterministic_eval}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: ${environment.n_total_timesteps}\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:04:12,937][root][INFO] - Training started.\n", + "[2024-05-29 17:04:16,111][root][INFO] - Training finished.\n", + "[2024-05-29 17:04:16,117][HYDRA] \t#9 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=6925631 hp_config.initial_epsilon=0.8626271399098202 hp_config.learning_rate=0.0003210863990696919 hp_config.learning_starts=979 hp_config.target_epsilon=0.12915404964669785 hp_config.use_target_network=True hp_config.target_update_interval=661 hp_config.tau=0.29717683113833965\n", + "[2024-05-29 17:04:16,199][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:04:16,200][root][INFO] - seed: 42\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "deterministic_eval: ${environment.deterministic_eval}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: ${environment.n_total_timesteps}\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:04:16,200][root][INFO] - Training started.\n", + "[2024-05-29 17:04:19,631][root][INFO] - Training finished.\n", + "[2024-05-29 17:04:19,637][HYDRA] \t#10 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=1355626 hp_config.initial_epsilon=0.6491411629780154 hp_config.learning_rate=0.0007076598458663513 hp_config.learning_starts=605 hp_config.target_epsilon=0.11529072452106619 hp_config.use_target_network=False hp_config.buffer_alpha=0.655582237301672 hp_config.buffer_beta=0.43710425107963424 hp_config.buffer_epsilon=0.00038564382646811196\n", + "[2024-05-29 17:04:19,718][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:04:19,720][root][INFO] - seed: 42\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "deterministic_eval: ${environment.deterministic_eval}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: ${environment.n_total_timesteps}\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:04:19,720][root][INFO] - Training started.\n", + "[2024-05-29 17:04:22,854][root][INFO] - Training finished.\n", + "[2024-05-29 17:04:22,857][HYDRA] \t#11 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=False hp_config.buffer_size=7039189 hp_config.initial_epsilon=0.5501134436561506 hp_config.learning_rate=0.03957428423834935 hp_config.learning_starts=732 hp_config.target_epsilon=0.19977055430700544 hp_config.use_target_network=True hp_config.target_update_interval=124 hp_config.tau=0.849528147029012\n", + "[2024-05-29 17:04:22,936][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:04:22,937][root][INFO] - seed: 42\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "deterministic_eval: ${environment.deterministic_eval}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: ${environment.n_total_timesteps}\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:04:22,937][root][INFO] - Training started.\n", + "[2024-05-29 17:04:26,236][root][INFO] - Training finished.\n", + "[2024-05-29 17:04:26,243][HYDRA] \t#12 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=False hp_config.buffer_size=4072440 hp_config.initial_epsilon=0.534583497727569 hp_config.learning_rate=0.0030700388104416038 hp_config.learning_starts=464 hp_config.target_epsilon=0.14468906429459924 hp_config.use_target_network=False\n", + "[2024-05-29 17:04:26,324][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:04:26,325][root][INFO] - seed: 42\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "deterministic_eval: ${environment.deterministic_eval}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: ${environment.n_total_timesteps}\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:04:26,325][root][INFO] - Training started.\n", + "[2024-05-29 17:04:29,498][root][INFO] - Training finished.\n", + "[2024-05-29 17:04:29,503][HYDRA] \t#13 : hp_config.buffer_batch_size=4 hp_config.buffer_prio_sampling=False hp_config.buffer_size=544348 hp_config.initial_epsilon=0.5999982624482001 hp_config.learning_rate=1.237681707595936e-06 hp_config.learning_starts=813 hp_config.target_epsilon=0.04556101292401565 hp_config.use_target_network=True hp_config.target_update_interval=165 hp_config.tau=0.6252636174847659\n", + "[2024-05-29 17:04:29,585][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:04:29,586][root][INFO] - seed: 42\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "deterministic_eval: ${environment.deterministic_eval}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: ${environment.n_total_timesteps}\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:04:29,586][root][INFO] - Training started.\n", + "[2024-05-29 17:04:32,983][root][INFO] - Training finished.\n", + "[2024-05-29 17:04:32,985][HYDRA] \t#14 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9342208 hp_config.initial_epsilon=0.806982977982948 hp_config.learning_rate=0.0004766109491313375 hp_config.learning_starts=604 hp_config.target_epsilon=0.14629428387383717 hp_config.use_target_network=True hp_config.buffer_alpha=0.40423885159393097 hp_config.buffer_beta=0.21774531148537093 hp_config.buffer_epsilon=5.55612520113227e-07 hp_config.target_update_interval=945 hp_config.tau=0.7421552870987946\n", + "[2024-05-29 17:04:33,068][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:04:33,069][root][INFO] - seed: 42\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "deterministic_eval: ${environment.deterministic_eval}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: ${environment.n_total_timesteps}\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:04:33,069][root][INFO] - Training started.\n", + "[2024-05-29 17:04:36,643][root][INFO] - Training finished.\n", + "[2024-05-29 17:04:36,650][HYDRA] \t#15 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=2544328 hp_config.initial_epsilon=0.5290145801619378 hp_config.learning_rate=0.00014862200906842985 hp_config.learning_starts=319 hp_config.target_epsilon=0.13957235427427644 hp_config.use_target_network=True hp_config.buffer_alpha=0.18780764078403847 hp_config.buffer_beta=0.034431941107417915 hp_config.buffer_epsilon=1.8577981490658524e-07 hp_config.target_update_interval=680 hp_config.tau=0.45915987611048487\n", + "[2024-05-29 17:04:36,732][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:04:36,733][root][INFO] - seed: 42\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "deterministic_eval: ${environment.deterministic_eval}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: ${environment.n_total_timesteps}\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:04:36,733][root][INFO] - Training started.\n", + "[2024-05-29 17:04:39,933][root][INFO] - Training finished.\n", + "[2024-05-29 17:04:39,939][HYDRA] Finished Iteration 1!\n", + "[2024-05-29 17:04:39,949][HYDRA] Current incumbent has a performance of 21.770000457763672.\n", + "[2024-05-29 17:04:39,949][HYDRA] Finished Sweep! Total duration was 54.96s, incumbent had a performance of 21.770000457763672\n", + "[2024-05-29 17:04:39,949][HYDRA] The incumbent configuration is Configuration(values={\n", + " 'hp_config.buffer_batch_size': 16,\n", + " 'hp_config.buffer_prio_sampling': False,\n", + " 'hp_config.buffer_size': 6028041,\n", + " 'hp_config.initial_epsilon': 0.7724415914984484,\n", + " 'hp_config.learning_rate': 0.0001313028028065861,\n", + " 'hp_config.learning_starts': 662,\n", + " 'hp_config.target_epsilon': 0.08807985504127581,\n", + " 'hp_config.use_target_network': False,\n", + "})\n", + "\n" + ] + } + ], + "source": [ + "!python run_arlbench.py --config-name=random_search --multirun" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's look how well the random search worked, let's plot the performance of each configuration:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results_file = 'results/sobol/dqn_CartPole-v1/42/runhistory.csv'\n", + "runhistory = pd.read_csv(results_file)\n", + "runhistory.plot(x='run_id', y='performance', kind='line', title='Configuration Performance over Time')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And what worked best?" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best performance: 370.4\n", + "\n", + "run_id 9\n", + "budget NaN\n", + "performance 370.398438\n", + "hp_config.buffer_batch_size 16\n", + "hp_config.buffer_prio_sampling False\n", + "hp_config.buffer_size 6925631\n", + "hp_config.initial_epsilon 0.862627\n", + "hp_config.learning_rate 0.000321\n", + "hp_config.learning_starts 979\n", + "hp_config.target_epsilon 0.129154\n", + "hp_config.use_target_network True\n", + "hp_config.buffer_alpha NaN\n", + "hp_config.buffer_beta NaN\n", + "hp_config.buffer_epsilon NaN\n", + "hp_config.target_update_interval 661.0\n", + "hp_config.tau 0.297177\n", + "Name: 9, dtype: object\n" + ] + } + ], + "source": [ + "print(\"Best performance: \", np.round(max(runhistory['performance']), decimals=2))\n", + "print(\"\")\n", + "print(runhistory.loc[runhistory['performance'].idxmax()])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's try a different optimizer, how about SMAC:" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Couldn't import CARP-S, the CARP-S Hypersweeper will not be available.\n", + "Couldn't import DEHB, the DEHB Hypersweeper will not be available.\n", + "Couldn't import HEBO, the HEBO Hypersweeper will not be available.\n", + "Couldn't import Nevergrad, the Nevergrad Hypersweeper will not be available.\n", + "Config \u001b[1m{\u001b[0m\u001b[32m'hydra'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'run'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'dir'\u001b[0m: \n", + "\u001b[32m'results/smac/$\u001b[0m\u001b[32m{\u001b[0m\u001b[32malgorithm\u001b[0m\u001b[32m}\u001b[0m\u001b[32m_$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mautorl.env_name\u001b[0m\u001b[32m}\u001b[0m\u001b[32m/$\u001b[0m\u001b[32m{\u001b[0m\u001b[32msmac_seed\u001b[0m\u001b[32m}\u001b[0m\u001b[32m/$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mseed\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'sweep'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'dir'\u001b[0m: \u001b[32m'results/smac/$\u001b[0m\u001b[32m{\u001b[0m\u001b[32malgorithm\u001b[0m\u001b[32m}\u001b[0m\u001b[32m_$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mautorl.env_name\u001b[0m\u001b[32m}\u001b[0m\u001b[32m/$\u001b[0m\u001b[32m{\u001b[0m\u001b[32msmac_seed\u001b[0m\u001b[32m}\u001b[0m\u001b[32m/$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mseed\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \n", + "\u001b[32m'subdir'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mhydra.job.num\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'launcher'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'_target_'\u001b[0m: \n", + "\u001b[32m'hydra._internal.core_plugins.basic_launcher.BasicLauncher'\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'sweeper'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'_target_'\u001b[0m: \u001b[32m'hydra_plugins.hypersweeper.hypersweeper.Hypersweeper'\u001b[0m, \n", + "\u001b[32m'opt_constructor'\u001b[0m: \u001b[32m'hydra_plugins.hyper_smac.hyper_smac.make_smac'\u001b[0m, \n", + "\u001b[32m'search_space'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32msearch_space\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'resume'\u001b[0m: \u001b[3;91mFalse\u001b[0m, \u001b[32m'budget'\u001b[0m: \u001b[3;35mNone\u001b[0m, \u001b[32m'n_trials'\u001b[0m: \n", + "\u001b[1;36m16\u001b[0m, \u001b[32m'budget_variable'\u001b[0m: \u001b[32m'autorl.n_total_timesteps'\u001b[0m, \u001b[32m'loading_variable'\u001b[0m: \u001b[3;35mNone\u001b[0m, \n", + "\u001b[32m'saving_variable'\u001b[0m: \u001b[3;35mNone\u001b[0m, \u001b[32m'sweeper_kwargs'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'min_budget'\u001b[0m: \u001b[1;36m10000\u001b[0m, \u001b[32m'max_budget'\u001b[0m: \n", + "\u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32menvironment.n_total_timesteps\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'optimizer_kwargs'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'smac_facade'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'_target_'\u001b[0m: \u001b[32m'smac.facade.multi_fidelity_facade.MultiFidelityFacade'\u001b[0m, \n", + "\u001b[32m'_partial_'\u001b[0m: \u001b[3;92mTrue\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'intensifier'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'_target_'\u001b[0m: \n", + "\u001b[32m'smac.facade.multi_fidelity_facade.MultiFidelityFacade.get_intensifier'\u001b[0m, \n", + "\u001b[32m'_partial_'\u001b[0m: \u001b[3;92mTrue\u001b[0m, \u001b[32m'eta'\u001b[0m: \u001b[1;36m2\u001b[0m, \u001b[32m'n_seeds'\u001b[0m: \u001b[1;36m1\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'scenario'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'seed'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32msmac_seed\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m,\n", + "\u001b[32m'n_trials'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mhydra.sweeper.n_trials\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'min_budget'\u001b[0m: \n", + "\u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mhydra.sweeper.sweeper_kwargs.min_budget\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'max_budget'\u001b[0m: \n", + "\u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32menvironment.n_total_timesteps\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'deterministic'\u001b[0m: \u001b[3;91mFalse\u001b[0m, \u001b[32m'n_workers'\u001b[0m: \u001b[1;36m1\u001b[0m, \n", + "\u001b[32m'output_directory'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mhydra.sweep.dir\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'seeds'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1;36m0\u001b[0m, \u001b[1;36m1\u001b[0m, \u001b[1;36m2\u001b[0m, \u001b[1;36m3\u001b[0m, \u001b[1;36m4\u001b[0m, \u001b[1;36m5\u001b[0m, \u001b[1;36m6\u001b[0m, \u001b[1;36m7\u001b[0m, \u001b[1;36m8\u001b[0m,\n", + "\u001b[1;36m9\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m'maximize'\u001b[0m: \u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'help'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'app_name'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mhydra.job.name\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'header'\u001b[0m: \n", + "\u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mhydra.help.app_name\u001b[0m\u001b[32m}\u001b[0m\u001b[32m is powered by Hydra.\\n'\u001b[0m, \u001b[32m'footer'\u001b[0m: \u001b[32m'Powered by Hydra \u001b[0m\n", + "\u001b[32m(\u001b[0m\u001b[32mhttps://hydra.cc\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\nUse --hydra-help to view Hydra specific help\\n'\u001b[0m, \u001b[32m'template'\u001b[0m:\n", + "\u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mhydra.help.header\u001b[0m\u001b[32m}\u001b[0m\u001b[32m\\\u001b[0m\u001b[32mn\u001b[0m\u001b[32m== Configuration groups ==\\nCompose your configuration \u001b[0m\n", + "\u001b[32mfrom those groups \u001b[0m\u001b[32m(\u001b[0m\u001b[32mgroup\u001b[0m\u001b[32m=\u001b[0m\u001b[32moption\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\n\\n$APP_CONFIG_GROUPS\\n\\\u001b[0m\u001b[32mn\u001b[0m\u001b[32m== Config ==\\nOverride\u001b[0m\n", + "\u001b[32manything in the config \u001b[0m\u001b[32m(\u001b[0m\u001b[32mfoo.\u001b[0m\u001b[32mbar\u001b[0m\u001b[32m=\u001b[0m\u001b[32mvalue\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\n\\n$CONFIG\\n\\n$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mhydra.help.footer\u001b[0m\u001b[32m}\u001b[0m\u001b[32m\\n'\u001b[0m\u001b[1m}\u001b[0m, \n", + "\u001b[32m'hydra_help'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'template'\u001b[0m: \u001b[32m\"Hydra \u001b[0m\u001b[32m(\u001b[0m\u001b[32m$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mhydra.runtime.version\u001b[0m\u001b[32m}\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\nSee \u001b[0m\n", + "\u001b[32mhttps://hydra.cc for more info.\\n\\\u001b[0m\u001b[32mn\u001b[0m\u001b[32m== Flags ==\\n$FLAGS_HELP\\n\\\u001b[0m\u001b[32mn\u001b[0m\u001b[32m== Configuration \u001b[0m\n", + "\u001b[32mgroups ==\\nCompose your configuration from those groups \u001b[0m\u001b[32m(\u001b[0m\u001b[32mFor example, append \u001b[0m\n", + "\u001b[32mhydra/\u001b[0m\u001b[32mjob_logging\u001b[0m\u001b[32m=\u001b[0m\u001b[32mdisabled\u001b[0m\u001b[32m to command line\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\n\\n$HYDRA_CONFIG_GROUPS\\n\\nUse \u001b[0m\n", + "\u001b[32m'--cfg hydra' to Show the Hydra config.\\n\"\u001b[0m, \u001b[32m'hydra_help'\u001b[0m: \u001b[32m'???'\u001b[0m\u001b[1m}\u001b[0m, \n", + "\u001b[32m'hydra_logging'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'version'\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m'formatters'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'simple'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'format'\u001b[0m: \n", + "\u001b[32m'\u001b[0m\u001b[32m[\u001b[0m\u001b[32m%\u001b[0m\u001b[32m(\u001b[0m\u001b[32masctime\u001b[0m\u001b[32m)\u001b[0m\u001b[32ms\u001b[0m\u001b[32m]\u001b[0m\u001b[32m[\u001b[0m\u001b[32mHYDRA\u001b[0m\u001b[32m]\u001b[0m\u001b[32m %\u001b[0m\u001b[32m(\u001b[0m\u001b[32mmessage\u001b[0m\u001b[32m)\u001b[0m\u001b[32ms'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'handlers'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'console'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'class'\u001b[0m: \n", + "\u001b[32m'logging.StreamHandler'\u001b[0m, \u001b[32m'formatter'\u001b[0m: \u001b[32m'simple'\u001b[0m, \u001b[32m'stream'\u001b[0m: \u001b[32m'ext://sys.stdout'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \n", + "\u001b[32m'root'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'level'\u001b[0m: \u001b[32m'INFO'\u001b[0m, \u001b[32m'handlers'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'console'\u001b[0m\u001b[1m]\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'loggers'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'logging_example'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'level'\u001b[0m: \u001b[32m'DEBUG'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'disable_existing_loggers'\u001b[0m: \u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \n", + "\u001b[32m'job_logging'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'version'\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m'formatters'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'simple'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'format'\u001b[0m: \n", + "\u001b[32m'\u001b[0m\u001b[32m[\u001b[0m\u001b[32m%\u001b[0m\u001b[32m(\u001b[0m\u001b[32masctime\u001b[0m\u001b[32m)\u001b[0m\u001b[32ms\u001b[0m\u001b[32m]\u001b[0m\u001b[32m[\u001b[0m\u001b[32m%\u001b[0m\u001b[32m(\u001b[0m\u001b[32mname\u001b[0m\u001b[32m)\u001b[0m\u001b[32ms\u001b[0m\u001b[32m]\u001b[0m\u001b[32m[\u001b[0m\u001b[32m%\u001b[0m\u001b[32m(\u001b[0m\u001b[32mlevelname\u001b[0m\u001b[32m)\u001b[0m\u001b[32ms\u001b[0m\u001b[32m]\u001b[0m\u001b[32m - %\u001b[0m\u001b[32m(\u001b[0m\u001b[32mmessage\u001b[0m\u001b[32m)\u001b[0m\u001b[32ms'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'handlers'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'console'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'class'\u001b[0m: \u001b[32m'logging.StreamHandler'\u001b[0m, \u001b[32m'formatter'\u001b[0m: \u001b[32m'simple'\u001b[0m, \u001b[32m'stream'\u001b[0m: \n", + "\u001b[32m'ext://sys.stdout'\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'file'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'class'\u001b[0m: \u001b[32m'logging.FileHandler'\u001b[0m, \u001b[32m'formatter'\u001b[0m: \n", + "\u001b[32m'simple'\u001b[0m, \u001b[32m'filename'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mhydra.runtime.output_dir\u001b[0m\u001b[32m}\u001b[0m\u001b[32m/$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mhydra.job.name\u001b[0m\u001b[32m}\u001b[0m\u001b[32m.log'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \n", + "\u001b[32m'root'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'level'\u001b[0m: \u001b[32m'INFO'\u001b[0m, \u001b[32m'handlers'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'console'\u001b[0m, \u001b[32m'file'\u001b[0m\u001b[1m]\u001b[0m\u001b[1m}\u001b[0m, \n", + "\u001b[32m'disable_existing_loggers'\u001b[0m: \u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'env'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'mode'\u001b[0m: \u001b[1m<\u001b[0m\u001b[1;95mRunMode.MULTIRUN:\u001b[0m\u001b[39m \u001b[0m\u001b[1;36m2\u001b[0m\u001b[1m>\u001b[0m, \n", + "\u001b[32m'searchpath'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m'callbacks'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'output_subdir'\u001b[0m: \u001b[32m'.hydra'\u001b[0m, \u001b[32m'overrides'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'hydra'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'hydra.\u001b[0m\u001b[32mmode\u001b[0m\u001b[32m=\u001b[0m\u001b[32mMULTIRUN\u001b[0m\u001b[32m'\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m'task'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'job'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'run_arlbench'\u001b[0m, \n", + "\u001b[32m'chdir'\u001b[0m: \u001b[3;92mTrue\u001b[0m, \u001b[32m'override_dirname'\u001b[0m: \u001b[32m''\u001b[0m, \u001b[32m'id'\u001b[0m: \u001b[32m'???'\u001b[0m, \u001b[32m'num'\u001b[0m: \u001b[32m'???'\u001b[0m, \u001b[32m'config_name'\u001b[0m:\n", + "\u001b[32m'smac'\u001b[0m, \u001b[32m'env_set'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'env_copy'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m'config'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'override_dirname'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'kv_sep'\u001b[0m:\n", + "\u001b[32m'='\u001b[0m, \u001b[32m'item_sep'\u001b[0m: \u001b[32m','\u001b[0m, \u001b[32m'exclude_keys'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'runtime'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'version'\u001b[0m: \u001b[32m'1.3.2'\u001b[0m, \n", + "\u001b[32m'version_base'\u001b[0m: \u001b[32m'1.3'\u001b[0m, \u001b[32m'cwd'\u001b[0m: \u001b[32m'/Users/theeimer/Documents/git/arlbench/examples'\u001b[0m,\n", + "\u001b[32m'config_sources'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'path'\u001b[0m: \u001b[32m'hydra.conf'\u001b[0m, \u001b[32m'schema'\u001b[0m: \u001b[32m'pkg'\u001b[0m, \u001b[32m'provider'\u001b[0m: \u001b[32m'hydra'\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[1m{\u001b[0m\u001b[32m'path'\u001b[0m: \u001b[32m'/Users/theeimer/Documents/git/arlbench/examples/configs'\u001b[0m, \u001b[32m'schema'\u001b[0m: \n", + "\u001b[32m'file'\u001b[0m, \u001b[32m'provider'\u001b[0m: \u001b[32m'main'\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'path'\u001b[0m: \u001b[32m'hydra_plugins.hydra_colorlog.conf'\u001b[0m, \n", + "\u001b[32m'schema'\u001b[0m: \u001b[32m'pkg'\u001b[0m, \u001b[32m'provider'\u001b[0m: \u001b[32m'hydra-colorlog'\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'path'\u001b[0m: \u001b[32m''\u001b[0m, \u001b[32m'schema'\u001b[0m: \n", + "\u001b[32m'structured'\u001b[0m, \u001b[32m'provider'\u001b[0m: \u001b[32m'schema'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m'output_dir'\u001b[0m: \u001b[32m'???'\u001b[0m, \u001b[32m'choices'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'search_space'\u001b[0m: \u001b[32m'dqn'\u001b[0m, \u001b[32m'environment'\u001b[0m: \u001b[32m'cc_cartpole'\u001b[0m, \u001b[32m'algorithm'\u001b[0m: \u001b[32m'dqn'\u001b[0m, \n", + "\u001b[32m'hydra/env'\u001b[0m: \u001b[32m'default'\u001b[0m, \u001b[32m'hydra/callbacks'\u001b[0m: \u001b[3;35mNone\u001b[0m, \u001b[32m'hydra/job_logging'\u001b[0m: \u001b[32m'default'\u001b[0m,\n", + "\u001b[32m'hydra/hydra_logging'\u001b[0m: \u001b[32m'default'\u001b[0m, \u001b[32m'hydra/hydra_help'\u001b[0m: \u001b[32m'default'\u001b[0m, \u001b[32m'hydra/help'\u001b[0m: \n", + "\u001b[32m'default'\u001b[0m, \u001b[32m'hydra/sweeper'\u001b[0m: \u001b[32m'HyperSMAC'\u001b[0m, \u001b[32m'hydra/launcher'\u001b[0m: \u001b[32m'basic'\u001b[0m, \n", + "\u001b[32m'hydra/output'\u001b[0m: \u001b[32m'default'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'verbose'\u001b[0m: \u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'load_checkpoint'\u001b[0m: \u001b[32m''\u001b[0m, \n", + "\u001b[32m'smac_seed'\u001b[0m: \u001b[1;36m0\u001b[0m, \u001b[32m'jax_enable_x64'\u001b[0m: \u001b[3;91mFalse\u001b[0m, \u001b[32m'seed'\u001b[0m: \u001b[1;36m42\u001b[0m, \u001b[32m'autorl'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'seed'\u001b[0m: \n", + "\u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mseed\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'env_framework'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32menvironment.framework\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'env_name'\u001b[0m: \n", + "\u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32menvironment.name\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'env_kwargs'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32menvironment.kwargs\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'eval_env_kwargs'\u001b[0m:\n", + "\u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32menvironment.eval_kwargs\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'n_envs'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32menvironment.n_envs\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'algorithm'\u001b[0m: \n", + "\u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32malgorithm\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'cnn_policy'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32menvironment.cnn_policy\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'nas_config'\u001b[0m: \n", + "\u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32mnas_config\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \u001b[32m'n_total_timesteps'\u001b[0m: \u001b[32m'$\u001b[0m\u001b[32m{\u001b[0m\u001b[32menvironment.n_total_timesteps\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m, \n", + "\u001b[32m'checkpoint'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m'checkpoint_name'\u001b[0m: \u001b[32m'default_checkpoint'\u001b[0m, \u001b[32m'checkpoint_dir'\u001b[0m: \n", + "\u001b[32m'/tmp'\u001b[0m, \u001b[32m'state_features'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m'objectives'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'reward_mean'\u001b[0m\u001b[1m]\u001b[0m, \n", + "\u001b[32m'optimize_objectives'\u001b[0m: \u001b[32m'upper'\u001b[0m, \u001b[32m'n_steps'\u001b[0m: \u001b[1;36m10\u001b[0m, \u001b[32m'n_eval_steps'\u001b[0m: \u001b[1;36m10\u001b[0m, \n", + "\u001b[32m'n_eval_episodes'\u001b[0m: \u001b[1;36m128\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'algorithm'\u001b[0m: \u001b[32m'dqn'\u001b[0m, \u001b[32m'hp_config'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'buffer_prio_sampling'\u001b[0m: \u001b[3;91mFalse\u001b[0m, \u001b[32m'buffer_alpha'\u001b[0m: \u001b[1;36m0.9\u001b[0m, \u001b[32m'buffer_beta'\u001b[0m: \u001b[1;36m0.9\u001b[0m, \n", + "\u001b[32m'buffer_epsilon'\u001b[0m: \u001b[1;36m0.001\u001b[0m, \u001b[32m'buffer_batch_size'\u001b[0m: \u001b[1;36m16\u001b[0m, \u001b[32m'buffer_size'\u001b[0m: \u001b[1;36m1000000\u001b[0m, \n", + "\u001b[32m'initial_epsilon'\u001b[0m: \u001b[1;36m1.0\u001b[0m, \u001b[32m'target_epsilon'\u001b[0m: \u001b[1;36m0.05\u001b[0m, \u001b[32m'gamma'\u001b[0m: \u001b[1;36m0.99\u001b[0m, \u001b[32m'gradient_steps'\u001b[0m:\n", + "\u001b[1;36m1\u001b[0m, \u001b[32m'learning_rate'\u001b[0m: \u001b[1;36m0.0003\u001b[0m, \u001b[32m'learning_starts'\u001b[0m: \u001b[1;36m128\u001b[0m, \u001b[32m'normalize_observations'\u001b[0m: \n", + "\u001b[3;91mFalse\u001b[0m, \u001b[32m'train_freq'\u001b[0m: \u001b[1;36m4\u001b[0m, \u001b[32m'use_target_network'\u001b[0m: \u001b[3;92mTrue\u001b[0m, \u001b[32m'target_update_interval'\u001b[0m: \n", + "\u001b[1;36m1000\u001b[0m, \u001b[32m'tau'\u001b[0m: \u001b[1;36m1.0\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'nas_config'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'activation'\u001b[0m: \u001b[32m'tanh'\u001b[0m, \u001b[32m'hidden_size'\u001b[0m: \u001b[1;36m64\u001b[0m\u001b[1m}\u001b[0m, \n", + "\u001b[32m'environment'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'CartPole-v1'\u001b[0m, \u001b[32m'framework'\u001b[0m: \u001b[32m'gymnax'\u001b[0m, \n", + "\u001b[32m'n_total_timesteps'\u001b[0m: \u001b[1;36m100000.0\u001b[0m, \u001b[32m'kwargs'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'eval_kwargs'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'cnn_policy'\u001b[0m: \n", + "\u001b[3;91mFalse\u001b[0m, \u001b[32m'deterministic_eval'\u001b[0m: \u001b[3;92mTrue\u001b[0m, \u001b[32m'n_envs'\u001b[0m: \u001b[1;36m8\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'search_space'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'seed'\u001b[0m: \u001b[1;36m0\u001b[0m, \n", + "\u001b[32m'hyperparameters'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'hp_config.buffer_alpha'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'uniform_float'\u001b[0m, \u001b[32m'upper'\u001b[0m:\n", + "\u001b[1;36m1.0\u001b[0m, \u001b[32m'lower'\u001b[0m: \u001b[1;36m0.01\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m0.9\u001b[0m, \u001b[32m'log'\u001b[0m: \u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \n", + "\u001b[32m'hp_config.buffer_batch_size'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'categorical'\u001b[0m, \u001b[32m'choices'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1;36m4\u001b[0m, \u001b[1;36m8\u001b[0m, \u001b[1;36m16\u001b[0m, \n", + "\u001b[1;36m32\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m16\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.buffer_beta'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'uniform_float'\u001b[0m, \u001b[32m'upper'\u001b[0m:\n", + "\u001b[1;36m1.0\u001b[0m, \u001b[32m'lower'\u001b[0m: \u001b[1;36m0.01\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m0.9\u001b[0m, \u001b[32m'log'\u001b[0m: \u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.buffer_epsilon'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'uniform_float'\u001b[0m, \u001b[32m'upper'\u001b[0m: \u001b[1;36m0.001\u001b[0m, \u001b[32m'lower'\u001b[0m: \u001b[1;36m1e-07\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m1e-06\u001b[0m, \n", + "\u001b[32m'log'\u001b[0m: \u001b[3;92mTrue\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.buffer_prio_sampling'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'categorical'\u001b[0m, \n", + "\u001b[32m'choices'\u001b[0m: \u001b[1m[\u001b[0m\u001b[3;92mTrue\u001b[0m, \u001b[3;91mFalse\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.buffer_size'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \n", + "\u001b[32m'uniform_int'\u001b[0m, \u001b[32m'upper'\u001b[0m: \u001b[1;36m10000000\u001b[0m, \u001b[32m'lower'\u001b[0m: \u001b[1;36m1024\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m1000000\u001b[0m, \u001b[32m'log'\u001b[0m: \n", + "\u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.initial_epsilon'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'uniform_float'\u001b[0m, \u001b[32m'upper'\u001b[0m: \u001b[1;36m1.0\u001b[0m, \n", + "\u001b[32m'lower'\u001b[0m: \u001b[1;36m0.5\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m1.0\u001b[0m, \u001b[32m'log'\u001b[0m: \u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.target_epsilon'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'uniform_float'\u001b[0m, \u001b[32m'upper'\u001b[0m: \u001b[1;36m0.2\u001b[0m, \u001b[32m'lower'\u001b[0m: \u001b[1;36m0.001\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m0.05\u001b[0m, \u001b[32m'log'\u001b[0m: \n", + "\u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.learning_rate'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'uniform_float'\u001b[0m, \u001b[32m'upper'\u001b[0m: \u001b[1;36m0.1\u001b[0m, \n", + "\u001b[32m'lower'\u001b[0m: \u001b[1;36m1e-06\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m0.0003\u001b[0m, \u001b[32m'log'\u001b[0m: \u001b[3;92mTrue\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.learning_starts'\u001b[0m: \n", + "\u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'uniform_int'\u001b[0m, \u001b[32m'upper'\u001b[0m: \u001b[1;36m1024\u001b[0m, \u001b[32m'lower'\u001b[0m: \u001b[1;36m0\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m128\u001b[0m, \u001b[32m'log'\u001b[0m: \n", + "\u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.use_target_network'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'categorical'\u001b[0m, \u001b[32m'choices'\u001b[0m: \n", + "\u001b[1m[\u001b[0m\u001b[3;92mTrue\u001b[0m, \u001b[3;91mFalse\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[3;92mTrue\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'hp_config.target_update_interval'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \n", + "\u001b[32m'uniform_int'\u001b[0m, \u001b[32m'upper'\u001b[0m: \u001b[1;36m1000\u001b[0m, \u001b[32m'lower'\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m'default'\u001b[0m: \u001b[1;36m1000\u001b[0m, \u001b[32m'log'\u001b[0m: \u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m, \n", + "\u001b[32m'hp_config.tau'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'uniform_float'\u001b[0m, \u001b[32m'upper'\u001b[0m: \u001b[1;36m1.0\u001b[0m, \u001b[32m'lower'\u001b[0m: \u001b[1;36m0.01\u001b[0m, \n", + "\u001b[32m'default'\u001b[0m: \u001b[1;36m1.0\u001b[0m, \u001b[32m'log'\u001b[0m: \u001b[3;91mFalse\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'conditions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'child'\u001b[0m: \n", + "\u001b[32m'hp_config.target_update_interval'\u001b[0m, \u001b[32m'parent'\u001b[0m: \u001b[32m'hp_config.use_target_network'\u001b[0m, \n", + "\u001b[32m'value'\u001b[0m: \u001b[3;92mTrue\u001b[0m, \u001b[32m'type'\u001b[0m: \u001b[32m'EQ'\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'child'\u001b[0m: \u001b[32m'hp_config.tau'\u001b[0m, \u001b[32m'parent'\u001b[0m: \n", + "\u001b[32m'hp_config.use_target_network'\u001b[0m, \u001b[32m'value'\u001b[0m: \u001b[3;92mTrue\u001b[0m, \u001b[32m'type'\u001b[0m: \u001b[32m'EQ'\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'child'\u001b[0m: \n", + "\u001b[32m'hp_config.buffer_epsilon'\u001b[0m, \u001b[32m'parent'\u001b[0m: \u001b[32m'hp_config.buffer_prio_sampling'\u001b[0m, \u001b[32m'value'\u001b[0m:\n", + "\u001b[3;92mTrue\u001b[0m, \u001b[32m'type'\u001b[0m: \u001b[32m'EQ'\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'child'\u001b[0m: \u001b[32m'hp_config.buffer_alpha'\u001b[0m, \u001b[32m'parent'\u001b[0m: \n", + "\u001b[32m'hp_config.buffer_prio_sampling'\u001b[0m, \u001b[32m'value'\u001b[0m: \u001b[3;92mTrue\u001b[0m, \u001b[32m'type'\u001b[0m: \u001b[32m'EQ'\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'child'\u001b[0m: \n", + "\u001b[32m'hp_config.buffer_beta'\u001b[0m, \u001b[32m'parent'\u001b[0m: \u001b[32m'hp_config.buffer_prio_sampling'\u001b[0m, \u001b[32m'value'\u001b[0m: \n", + "\u001b[3;92mTrue\u001b[0m, \u001b[32m'type'\u001b[0m: \u001b[32m'EQ'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\n", + "Hydra context\n", + "\u001b[1;35mHydraContext\u001b[0m\u001b[1m(\u001b[0m\n", + " \u001b[33mconfig_loader\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;95mhydra._internal.config_loader_impl.ConfigLoaderImpl\u001b[0m\u001b[39m object at\u001b[0m\n", + "\u001b[1;36m0x317196800\u001b[0m\u001b[39m>,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcallbacks\u001b[0m\u001b[39m=\u001b[0m\n", + "\u001b[1m)\u001b[0m\n", + "[2024-05-29 17:28:19,872][HYDRA] Sweep doesn't override default config.\n", + "[INFO][abstract_initial_design.py:95] Reducing the number of initial configurations from 130 to 4 (max_ratio == 0.25).\n", + "[INFO][abstract_initial_design.py:147] Using 4 initial design configurations and 0 additional configurations.\n", + "[INFO][hypersweeper_sweeper.py:402] Starting Sweep\n", + "[INFO][successive_halving.py:164] Successive Halving uses budget type BUDGETS with eta 2, min budget 10000, and max budget 100000.0.\n", + "[INFO][successive_halving.py:323] Number of configs in stage:\n", + "[INFO][successive_halving.py:325] --- Bracket 0: [8, 4, 2, 1]\n", + "[INFO][successive_halving.py:325] --- Bracket 1: [6, 3, 1]\n", + "[INFO][successive_halving.py:325] --- Bracket 2: [4, 2]\n", + "[INFO][successive_halving.py:325] --- Bracket 3: [4]\n", + "[INFO][successive_halving.py:327] Budgets in stage:\n", + "[INFO][successive_halving.py:329] --- Bracket 0: [12500.0, 25000.0, 50000.0, 100000.0]\n", + "[INFO][successive_halving.py:329] --- Bracket 1: [25000.0, 50000.0, 100000.0]\n", + "[INFO][successive_halving.py:329] --- Bracket 2: [50000.0, 100000.0]\n", + "[INFO][successive_halving.py:329] --- Bracket 3: [100000.0]\n", + "[2024-05-29 17:28:19,884][HYDRA] Launching 10 jobs locally\n", + "[2024-05-29 17:28:19,884][HYDRA] \t#0 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=12500.0 seed=0\n", + "[2024-05-29 17:28:19,984][jax._src.xla_bridge][INFO] - Unable to initialize backend 'cuda': \n", + "[2024-05-29 17:28:19,985][jax._src.xla_bridge][INFO] - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'\n", + "[2024-05-29 17:28:19,985][jax._src.xla_bridge][INFO] - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: dlopen(libtpu.so, 0x0001): tried: 'libtpu.so' (no such file), '/System/Volumes/Preboot/Cryptexes/OSlibtpu.so' (no such file), '/Users/theeimer/anaconda3/envs/arlbench/bin/../lib/libtpu.so' (no such file), '/usr/lib/libtpu.so' (no such file, not in dyld cache), 'libtpu.so' (no such file), '/usr/local/lib/libtpu.so' (no such file), '/usr/lib/libtpu.so' (no such file, not in dyld cache)\n", + "[2024-05-29 17:28:19,985][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:28:19,986][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:28:19,986][root][INFO] - Training started.\n", + "[2024-05-29 17:28:23,854][root][INFO] - Training finished.\n", + "[2024-05-29 17:28:23,862][HYDRA] \t#1 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=12500.0 seed=1\n", + "[2024-05-29 17:28:23,948][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:28:23,949][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:28:23,949][root][INFO] - Training started.\n", + "[2024-05-29 17:28:26,876][root][INFO] - Training finished.\n", + "[2024-05-29 17:28:26,882][HYDRA] \t#2 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=12500.0 seed=2\n", + "[2024-05-29 17:28:26,967][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:28:26,967][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:28:26,968][root][INFO] - Training started.\n", + "[2024-05-29 17:28:30,420][root][INFO] - Training finished.\n", + "[2024-05-29 17:28:30,427][HYDRA] \t#3 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=12500.0 seed=3\n", + "[2024-05-29 17:28:30,513][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:28:30,513][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:28:30,513][root][INFO] - Training started.\n", + "[2024-05-29 17:28:33,521][root][INFO] - Training finished.\n", + "[2024-05-29 17:28:33,528][HYDRA] \t#4 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=12500.0 seed=4\n", + "[2024-05-29 17:28:33,720][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:28:33,720][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:28:33,721][root][INFO] - Training started.\n", + "[2024-05-29 17:28:36,621][root][INFO] - Training finished.\n", + "[2024-05-29 17:28:36,628][HYDRA] \t#5 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=12500.0 seed=5\n", + "[2024-05-29 17:28:36,714][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:28:36,715][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:28:36,715][root][INFO] - Training started.\n", + "[2024-05-29 17:28:39,570][root][INFO] - Training finished.\n", + "[2024-05-29 17:28:39,576][HYDRA] \t#6 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=12500.0 seed=6\n", + "[2024-05-29 17:28:39,658][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:28:39,659][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:28:39,659][root][INFO] - Training started.\n", + "[2024-05-29 17:28:42,715][root][INFO] - Training finished.\n", + "[2024-05-29 17:28:42,721][HYDRA] \t#7 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=12500.0 seed=7\n", + "[2024-05-29 17:28:42,806][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:28:42,806][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:28:42,806][root][INFO] - Training started.\n", + "[2024-05-29 17:28:45,653][root][INFO] - Training finished.\n", + "[2024-05-29 17:28:45,660][HYDRA] \t#8 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=12500.0 seed=8\n", + "[2024-05-29 17:28:45,747][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:28:45,748][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:28:45,748][root][INFO] - Training started.\n", + "[2024-05-29 17:28:48,571][root][INFO] - Training finished.\n", + "[2024-05-29 17:28:48,576][HYDRA] \t#9 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=12500.0 seed=9\n", + "[2024-05-29 17:28:48,666][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:28:48,666][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:28:48,666][root][INFO] - Training started.\n", + "[2024-05-29 17:28:51,599][root][INFO] - Training finished.\n", + "[2024-05-29 17:28:51,609][HYDRA] Added config 3ae65a as new incumbent because there are no incumbents yet.\n", + "[2024-05-29 17:28:51,610][HYDRA] Finished Iteration 1!\n", + "[2024-05-29 17:28:51,610][HYDRA] Current incumbent has a performance of 35.34000015258789.\n", + "[2024-05-29 17:28:51,612][HYDRA] Launching 10 jobs locally\n", + "[2024-05-29 17:28:51,612][HYDRA] \t#10 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3835046 hp_config.initial_epsilon=0.9627983191463305 hp_config.learning_rate=0.014557916623227784 hp_config.learning_starts=819 hp_config.target_epsilon=0.12834428324417724 hp_config.use_target_network=True hp_config.target_update_interval=671 hp_config.tau=0.3700736632331964 autorl.n_total_timesteps=12500.0 seed=0\n", + "[2024-05-29 17:28:51,700][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:28:51,700][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:28:51,700][root][INFO] - Training started.\n", + "[2024-05-29 17:28:54,804][root][INFO] - Training finished.\n", + "[2024-05-29 17:28:54,808][HYDRA] \t#11 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3835046 hp_config.initial_epsilon=0.9627983191463305 hp_config.learning_rate=0.014557916623227784 hp_config.learning_starts=819 hp_config.target_epsilon=0.12834428324417724 hp_config.use_target_network=True hp_config.target_update_interval=671 hp_config.tau=0.3700736632331964 autorl.n_total_timesteps=12500.0 seed=1\n", + "[2024-05-29 17:28:54,889][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:28:54,890][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:28:54,890][root][INFO] - Training started.\n", + "[2024-05-29 17:28:57,703][root][INFO] - Training finished.\n", + "[2024-05-29 17:28:57,706][HYDRA] \t#12 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3835046 hp_config.initial_epsilon=0.9627983191463305 hp_config.learning_rate=0.014557916623227784 hp_config.learning_starts=819 hp_config.target_epsilon=0.12834428324417724 hp_config.use_target_network=True hp_config.target_update_interval=671 hp_config.tau=0.3700736632331964 autorl.n_total_timesteps=12500.0 seed=2\n", + "[2024-05-29 17:28:57,798][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:28:57,798][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:28:57,798][root][INFO] - Training started.\n", + "[2024-05-29 17:29:00,666][root][INFO] - Training finished.\n", + "[2024-05-29 17:29:00,670][HYDRA] \t#13 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3835046 hp_config.initial_epsilon=0.9627983191463305 hp_config.learning_rate=0.014557916623227784 hp_config.learning_starts=819 hp_config.target_epsilon=0.12834428324417724 hp_config.use_target_network=True hp_config.target_update_interval=671 hp_config.tau=0.3700736632331964 autorl.n_total_timesteps=12500.0 seed=3\n", + "[2024-05-29 17:29:00,750][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:29:00,751][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:29:00,751][root][INFO] - Training started.\n", + "[2024-05-29 17:29:03,673][root][INFO] - Training finished.\n", + "[2024-05-29 17:29:03,677][HYDRA] \t#14 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3835046 hp_config.initial_epsilon=0.9627983191463305 hp_config.learning_rate=0.014557916623227784 hp_config.learning_starts=819 hp_config.target_epsilon=0.12834428324417724 hp_config.use_target_network=True hp_config.target_update_interval=671 hp_config.tau=0.3700736632331964 autorl.n_total_timesteps=12500.0 seed=4\n", + "[2024-05-29 17:29:03,758][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:29:03,759][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:29:03,759][root][INFO] - Training started.\n", + "[2024-05-29 17:29:06,587][root][INFO] - Training finished.\n", + "[2024-05-29 17:29:06,590][HYDRA] \t#15 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3835046 hp_config.initial_epsilon=0.9627983191463305 hp_config.learning_rate=0.014557916623227784 hp_config.learning_starts=819 hp_config.target_epsilon=0.12834428324417724 hp_config.use_target_network=True hp_config.target_update_interval=671 hp_config.tau=0.3700736632331964 autorl.n_total_timesteps=12500.0 seed=5\n", + "[2024-05-29 17:29:06,671][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:29:06,672][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:29:06,672][root][INFO] - Training started.\n", + "[2024-05-29 17:29:09,637][root][INFO] - Training finished.\n", + "[2024-05-29 17:29:09,641][HYDRA] \t#16 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3835046 hp_config.initial_epsilon=0.9627983191463305 hp_config.learning_rate=0.014557916623227784 hp_config.learning_starts=819 hp_config.target_epsilon=0.12834428324417724 hp_config.use_target_network=True hp_config.target_update_interval=671 hp_config.tau=0.3700736632331964 autorl.n_total_timesteps=12500.0 seed=6\n", + "[2024-05-29 17:29:09,730][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:29:09,730][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:29:09,730][root][INFO] - Training started.\n", + "[2024-05-29 17:29:12,583][root][INFO] - Training finished.\n", + "[2024-05-29 17:29:12,587][HYDRA] \t#17 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3835046 hp_config.initial_epsilon=0.9627983191463305 hp_config.learning_rate=0.014557916623227784 hp_config.learning_starts=819 hp_config.target_epsilon=0.12834428324417724 hp_config.use_target_network=True hp_config.target_update_interval=671 hp_config.tau=0.3700736632331964 autorl.n_total_timesteps=12500.0 seed=7\n", + "[2024-05-29 17:29:12,669][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:29:12,669][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:29:12,669][root][INFO] - Training started.\n", + "[2024-05-29 17:29:15,464][root][INFO] - Training finished.\n", + "[2024-05-29 17:29:15,468][HYDRA] \t#18 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3835046 hp_config.initial_epsilon=0.9627983191463305 hp_config.learning_rate=0.014557916623227784 hp_config.learning_starts=819 hp_config.target_epsilon=0.12834428324417724 hp_config.use_target_network=True hp_config.target_update_interval=671 hp_config.tau=0.3700736632331964 autorl.n_total_timesteps=12500.0 seed=8\n", + "[2024-05-29 17:29:15,548][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:29:15,549][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:29:15,549][root][INFO] - Training started.\n", + "[2024-05-29 17:29:18,538][root][INFO] - Training finished.\n", + "[2024-05-29 17:29:18,541][HYDRA] \t#19 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3835046 hp_config.initial_epsilon=0.9627983191463305 hp_config.learning_rate=0.014557916623227784 hp_config.learning_starts=819 hp_config.target_epsilon=0.12834428324417724 hp_config.use_target_network=True hp_config.target_update_interval=671 hp_config.tau=0.3700736632331964 autorl.n_total_timesteps=12500.0 seed=9\n", + "[2024-05-29 17:29:18,623][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:29:18,624][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:29:18,624][root][INFO] - Training started.\n", + "[2024-05-29 17:29:21,475][root][INFO] - Training finished.\n", + "[2024-05-29 17:29:21,479][HYDRA] Finished Iteration 2!\n", + "[2024-05-29 17:29:21,479][HYDRA] Current incumbent has a performance of 35.34000015258789.\n", + "[2024-05-29 17:29:21,481][HYDRA] Launching 10 jobs locally\n", + "[2024-05-29 17:29:21,481][HYDRA] \t#20 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7917464 hp_config.initial_epsilon=0.5355180290989434 hp_config.learning_rate=0.0077764924510783885 hp_config.learning_starts=473 hp_config.target_epsilon=0.029527304194400236 hp_config.use_target_network=True hp_config.buffer_alpha=0.028601902431991594 hp_config.buffer_beta=0.9443105977294779 hp_config.buffer_epsilon=6.173405204074314e-05 hp_config.target_update_interval=211 hp_config.tau=0.5744948027137008 autorl.n_total_timesteps=12500.0 seed=0\n", + "[2024-05-29 17:29:21,567][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:29:21,568][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:29:21,568][root][INFO] - Training started.\n", + "[2024-05-29 17:29:24,755][root][INFO] - Training finished.\n", + "[2024-05-29 17:29:24,764][HYDRA] \t#21 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7917464 hp_config.initial_epsilon=0.5355180290989434 hp_config.learning_rate=0.0077764924510783885 hp_config.learning_starts=473 hp_config.target_epsilon=0.029527304194400236 hp_config.use_target_network=True hp_config.buffer_alpha=0.028601902431991594 hp_config.buffer_beta=0.9443105977294779 hp_config.buffer_epsilon=6.173405204074314e-05 hp_config.target_update_interval=211 hp_config.tau=0.5744948027137008 autorl.n_total_timesteps=12500.0 seed=1\n", + "[2024-05-29 17:29:24,905][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:29:24,907][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:29:24,909][root][INFO] - Training started.\n", + "[2024-05-29 17:29:28,211][root][INFO] - Training finished.\n", + "[2024-05-29 17:29:28,216][HYDRA] \t#22 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7917464 hp_config.initial_epsilon=0.5355180290989434 hp_config.learning_rate=0.0077764924510783885 hp_config.learning_starts=473 hp_config.target_epsilon=0.029527304194400236 hp_config.use_target_network=True hp_config.buffer_alpha=0.028601902431991594 hp_config.buffer_beta=0.9443105977294779 hp_config.buffer_epsilon=6.173405204074314e-05 hp_config.target_update_interval=211 hp_config.tau=0.5744948027137008 autorl.n_total_timesteps=12500.0 seed=2\n", + "[2024-05-29 17:29:28,308][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:29:28,309][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:29:28,309][root][INFO] - Training started.\n", + "[2024-05-29 17:29:31,655][root][INFO] - Training finished.\n", + "[2024-05-29 17:29:31,661][HYDRA] \t#23 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7917464 hp_config.initial_epsilon=0.5355180290989434 hp_config.learning_rate=0.0077764924510783885 hp_config.learning_starts=473 hp_config.target_epsilon=0.029527304194400236 hp_config.use_target_network=True hp_config.buffer_alpha=0.028601902431991594 hp_config.buffer_beta=0.9443105977294779 hp_config.buffer_epsilon=6.173405204074314e-05 hp_config.target_update_interval=211 hp_config.tau=0.5744948027137008 autorl.n_total_timesteps=12500.0 seed=3\n", + "[2024-05-29 17:29:31,756][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:29:31,756][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:29:31,756][root][INFO] - Training started.\n", + "[2024-05-29 17:29:35,036][root][INFO] - Training finished.\n", + "[2024-05-29 17:29:35,042][HYDRA] \t#24 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7917464 hp_config.initial_epsilon=0.5355180290989434 hp_config.learning_rate=0.0077764924510783885 hp_config.learning_starts=473 hp_config.target_epsilon=0.029527304194400236 hp_config.use_target_network=True hp_config.buffer_alpha=0.028601902431991594 hp_config.buffer_beta=0.9443105977294779 hp_config.buffer_epsilon=6.173405204074314e-05 hp_config.target_update_interval=211 hp_config.tau=0.5744948027137008 autorl.n_total_timesteps=12500.0 seed=4\n", + "[2024-05-29 17:29:35,135][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:29:35,136][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:29:35,136][root][INFO] - Training started.\n", + "[2024-05-29 17:29:38,656][root][INFO] - Training finished.\n", + "[2024-05-29 17:29:38,663][HYDRA] \t#25 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7917464 hp_config.initial_epsilon=0.5355180290989434 hp_config.learning_rate=0.0077764924510783885 hp_config.learning_starts=473 hp_config.target_epsilon=0.029527304194400236 hp_config.use_target_network=True hp_config.buffer_alpha=0.028601902431991594 hp_config.buffer_beta=0.9443105977294779 hp_config.buffer_epsilon=6.173405204074314e-05 hp_config.target_update_interval=211 hp_config.tau=0.5744948027137008 autorl.n_total_timesteps=12500.0 seed=5\n", + "[2024-05-29 17:29:38,749][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:29:38,749][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:29:38,749][root][INFO] - Training started.\n", + "[2024-05-29 17:29:42,113][root][INFO] - Training finished.\n", + "[2024-05-29 17:29:42,120][HYDRA] \t#26 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7917464 hp_config.initial_epsilon=0.5355180290989434 hp_config.learning_rate=0.0077764924510783885 hp_config.learning_starts=473 hp_config.target_epsilon=0.029527304194400236 hp_config.use_target_network=True hp_config.buffer_alpha=0.028601902431991594 hp_config.buffer_beta=0.9443105977294779 hp_config.buffer_epsilon=6.173405204074314e-05 hp_config.target_update_interval=211 hp_config.tau=0.5744948027137008 autorl.n_total_timesteps=12500.0 seed=6\n", + "[2024-05-29 17:29:42,206][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:29:42,207][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:29:42,207][root][INFO] - Training started.\n", + "[2024-05-29 17:29:45,213][root][INFO] - Training finished.\n", + "[2024-05-29 17:29:45,219][HYDRA] \t#27 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7917464 hp_config.initial_epsilon=0.5355180290989434 hp_config.learning_rate=0.0077764924510783885 hp_config.learning_starts=473 hp_config.target_epsilon=0.029527304194400236 hp_config.use_target_network=True hp_config.buffer_alpha=0.028601902431991594 hp_config.buffer_beta=0.9443105977294779 hp_config.buffer_epsilon=6.173405204074314e-05 hp_config.target_update_interval=211 hp_config.tau=0.5744948027137008 autorl.n_total_timesteps=12500.0 seed=7\n", + "[2024-05-29 17:29:45,335][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:29:45,336][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:29:45,336][root][INFO] - Training started.\n", + "[2024-05-29 17:29:48,212][root][INFO] - Training finished.\n", + "[2024-05-29 17:29:48,217][HYDRA] \t#28 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7917464 hp_config.initial_epsilon=0.5355180290989434 hp_config.learning_rate=0.0077764924510783885 hp_config.learning_starts=473 hp_config.target_epsilon=0.029527304194400236 hp_config.use_target_network=True hp_config.buffer_alpha=0.028601902431991594 hp_config.buffer_beta=0.9443105977294779 hp_config.buffer_epsilon=6.173405204074314e-05 hp_config.target_update_interval=211 hp_config.tau=0.5744948027137008 autorl.n_total_timesteps=12500.0 seed=8\n", + "[2024-05-29 17:29:48,302][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:29:48,303][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:29:48,303][root][INFO] - Training started.\n", + "[2024-05-29 17:29:51,361][root][INFO] - Training finished.\n", + "[2024-05-29 17:29:51,367][HYDRA] \t#29 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7917464 hp_config.initial_epsilon=0.5355180290989434 hp_config.learning_rate=0.0077764924510783885 hp_config.learning_starts=473 hp_config.target_epsilon=0.029527304194400236 hp_config.use_target_network=True hp_config.buffer_alpha=0.028601902431991594 hp_config.buffer_beta=0.9443105977294779 hp_config.buffer_epsilon=6.173405204074314e-05 hp_config.target_update_interval=211 hp_config.tau=0.5744948027137008 autorl.n_total_timesteps=12500.0 seed=9\n", + "[2024-05-29 17:29:51,455][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:29:51,456][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:29:51,456][root][INFO] - Training started.\n", + "[2024-05-29 17:29:54,303][root][INFO] - Training finished.\n", + "[2024-05-29 17:29:54,311][HYDRA] Finished Iteration 3!\n", + "[2024-05-29 17:29:54,311][HYDRA] Current incumbent has a performance of 35.34000015258789.\n", + "[2024-05-29 17:29:54,312][HYDRA] Launching 10 jobs locally\n", + "[2024-05-29 17:29:54,312][HYDRA] \t#30 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=5289432 hp_config.initial_epsilon=0.5435646498507704 hp_config.learning_rate=0.022390342721683703 hp_config.learning_starts=800 hp_config.target_epsilon=0.1889891144928672 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=0\n", + "[2024-05-29 17:29:54,395][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:29:54,396][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:29:54,396][root][INFO] - Training started.\n", + "[2024-05-29 17:29:57,197][root][INFO] - Training finished.\n", + "[2024-05-29 17:29:57,202][HYDRA] \t#31 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=5289432 hp_config.initial_epsilon=0.5435646498507704 hp_config.learning_rate=0.022390342721683703 hp_config.learning_starts=800 hp_config.target_epsilon=0.1889891144928672 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=1\n", + "[2024-05-29 17:29:57,285][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:29:57,286][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:29:57,286][root][INFO] - Training started.\n", + "[2024-05-29 17:30:00,044][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:00,050][HYDRA] \t#32 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=5289432 hp_config.initial_epsilon=0.5435646498507704 hp_config.learning_rate=0.022390342721683703 hp_config.learning_starts=800 hp_config.target_epsilon=0.1889891144928672 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=2\n", + "[2024-05-29 17:30:00,134][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:00,134][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:00,135][root][INFO] - Training started.\n", + "[2024-05-29 17:30:02,948][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:02,955][HYDRA] \t#33 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=5289432 hp_config.initial_epsilon=0.5435646498507704 hp_config.learning_rate=0.022390342721683703 hp_config.learning_starts=800 hp_config.target_epsilon=0.1889891144928672 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=3\n", + "[2024-05-29 17:30:03,042][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:03,043][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:03,043][root][INFO] - Training started.\n", + "[2024-05-29 17:30:05,762][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:05,769][HYDRA] \t#34 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=5289432 hp_config.initial_epsilon=0.5435646498507704 hp_config.learning_rate=0.022390342721683703 hp_config.learning_starts=800 hp_config.target_epsilon=0.1889891144928672 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=4\n", + "[2024-05-29 17:30:05,849][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:05,849][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:05,849][root][INFO] - Training started.\n", + "[2024-05-29 17:30:08,853][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:08,859][HYDRA] \t#35 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=5289432 hp_config.initial_epsilon=0.5435646498507704 hp_config.learning_rate=0.022390342721683703 hp_config.learning_starts=800 hp_config.target_epsilon=0.1889891144928672 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=5\n", + "[2024-05-29 17:30:08,942][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:08,943][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:08,943][root][INFO] - Training started.\n", + "[2024-05-29 17:30:11,639][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:11,655][HYDRA] \t#36 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=5289432 hp_config.initial_epsilon=0.5435646498507704 hp_config.learning_rate=0.022390342721683703 hp_config.learning_starts=800 hp_config.target_epsilon=0.1889891144928672 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=6\n", + "[2024-05-29 17:30:11,738][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:11,738][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:11,738][root][INFO] - Training started.\n", + "[2024-05-29 17:30:14,413][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:14,420][HYDRA] \t#37 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=5289432 hp_config.initial_epsilon=0.5435646498507704 hp_config.learning_rate=0.022390342721683703 hp_config.learning_starts=800 hp_config.target_epsilon=0.1889891144928672 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=7\n", + "[2024-05-29 17:30:14,501][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:14,502][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:14,502][root][INFO] - Training started.\n", + "[2024-05-29 17:30:17,193][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:17,199][HYDRA] \t#38 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=5289432 hp_config.initial_epsilon=0.5435646498507704 hp_config.learning_rate=0.022390342721683703 hp_config.learning_starts=800 hp_config.target_epsilon=0.1889891144928672 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=8\n", + "[2024-05-29 17:30:17,280][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:17,281][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:17,281][root][INFO] - Training started.\n", + "[2024-05-29 17:30:20,027][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:20,033][HYDRA] \t#39 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=False hp_config.buffer_size=5289432 hp_config.initial_epsilon=0.5435646498507704 hp_config.learning_rate=0.022390342721683703 hp_config.learning_starts=800 hp_config.target_epsilon=0.1889891144928672 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=9\n", + "[2024-05-29 17:30:20,115][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:20,116][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:20,116][root][INFO] - Training started.\n", + "[2024-05-29 17:30:22,849][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:22,856][HYDRA] Finished Iteration 4!\n", + "[2024-05-29 17:30:22,856][HYDRA] Current incumbent has a performance of 35.34000015258789.\n", + "[2024-05-29 17:30:22,858][HYDRA] Launching 10 jobs locally\n", + "[2024-05-29 17:30:22,858][HYDRA] \t#40 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=True hp_config.buffer_size=2089577 hp_config.initial_epsilon=0.5806547589424982 hp_config.learning_rate=0.0018430691395340175 hp_config.learning_starts=259 hp_config.target_epsilon=0.09379584379840496 hp_config.use_target_network=True hp_config.buffer_alpha=0.16737988780906454 hp_config.buffer_beta=0.11927138975266208 hp_config.buffer_epsilon=4.2200573971950144e-05 hp_config.target_update_interval=139 hp_config.tau=0.20461653806325297 autorl.n_total_timesteps=12500.0 seed=0\n", + "[2024-05-29 17:30:22,948][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:22,949][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:22,949][root][INFO] - Training started.\n", + "[2024-05-29 17:30:25,965][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:25,968][HYDRA] \t#41 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=True hp_config.buffer_size=2089577 hp_config.initial_epsilon=0.5806547589424982 hp_config.learning_rate=0.0018430691395340175 hp_config.learning_starts=259 hp_config.target_epsilon=0.09379584379840496 hp_config.use_target_network=True hp_config.buffer_alpha=0.16737988780906454 hp_config.buffer_beta=0.11927138975266208 hp_config.buffer_epsilon=4.2200573971950144e-05 hp_config.target_update_interval=139 hp_config.tau=0.20461653806325297 autorl.n_total_timesteps=12500.0 seed=1\n", + "[2024-05-29 17:30:26,379][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:26,379][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:26,379][root][INFO] - Training started.\n", + "[2024-05-29 17:30:29,422][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:29,426][HYDRA] \t#42 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=True hp_config.buffer_size=2089577 hp_config.initial_epsilon=0.5806547589424982 hp_config.learning_rate=0.0018430691395340175 hp_config.learning_starts=259 hp_config.target_epsilon=0.09379584379840496 hp_config.use_target_network=True hp_config.buffer_alpha=0.16737988780906454 hp_config.buffer_beta=0.11927138975266208 hp_config.buffer_epsilon=4.2200573971950144e-05 hp_config.target_update_interval=139 hp_config.tau=0.20461653806325297 autorl.n_total_timesteps=12500.0 seed=2\n", + "[2024-05-29 17:30:29,515][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:29,516][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:29,516][root][INFO] - Training started.\n", + "[2024-05-29 17:30:32,683][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:32,685][HYDRA] \t#43 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=True hp_config.buffer_size=2089577 hp_config.initial_epsilon=0.5806547589424982 hp_config.learning_rate=0.0018430691395340175 hp_config.learning_starts=259 hp_config.target_epsilon=0.09379584379840496 hp_config.use_target_network=True hp_config.buffer_alpha=0.16737988780906454 hp_config.buffer_beta=0.11927138975266208 hp_config.buffer_epsilon=4.2200573971950144e-05 hp_config.target_update_interval=139 hp_config.tau=0.20461653806325297 autorl.n_total_timesteps=12500.0 seed=3\n", + "[2024-05-29 17:30:32,772][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:32,773][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:32,773][root][INFO] - Training started.\n", + "[2024-05-29 17:30:35,749][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:35,752][HYDRA] \t#44 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=True hp_config.buffer_size=2089577 hp_config.initial_epsilon=0.5806547589424982 hp_config.learning_rate=0.0018430691395340175 hp_config.learning_starts=259 hp_config.target_epsilon=0.09379584379840496 hp_config.use_target_network=True hp_config.buffer_alpha=0.16737988780906454 hp_config.buffer_beta=0.11927138975266208 hp_config.buffer_epsilon=4.2200573971950144e-05 hp_config.target_update_interval=139 hp_config.tau=0.20461653806325297 autorl.n_total_timesteps=12500.0 seed=4\n", + "[2024-05-29 17:30:35,839][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:35,840][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:35,840][root][INFO] - Training started.\n", + "[2024-05-29 17:30:39,326][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:39,329][HYDRA] \t#45 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=True hp_config.buffer_size=2089577 hp_config.initial_epsilon=0.5806547589424982 hp_config.learning_rate=0.0018430691395340175 hp_config.learning_starts=259 hp_config.target_epsilon=0.09379584379840496 hp_config.use_target_network=True hp_config.buffer_alpha=0.16737988780906454 hp_config.buffer_beta=0.11927138975266208 hp_config.buffer_epsilon=4.2200573971950144e-05 hp_config.target_update_interval=139 hp_config.tau=0.20461653806325297 autorl.n_total_timesteps=12500.0 seed=5\n", + "[2024-05-29 17:30:39,418][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:39,418][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:39,418][root][INFO] - Training started.\n", + "[2024-05-29 17:30:42,640][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:42,643][HYDRA] \t#46 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=True hp_config.buffer_size=2089577 hp_config.initial_epsilon=0.5806547589424982 hp_config.learning_rate=0.0018430691395340175 hp_config.learning_starts=259 hp_config.target_epsilon=0.09379584379840496 hp_config.use_target_network=True hp_config.buffer_alpha=0.16737988780906454 hp_config.buffer_beta=0.11927138975266208 hp_config.buffer_epsilon=4.2200573971950144e-05 hp_config.target_update_interval=139 hp_config.tau=0.20461653806325297 autorl.n_total_timesteps=12500.0 seed=6\n", + "[2024-05-29 17:30:42,725][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:42,726][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:42,726][root][INFO] - Training started.\n", + "[2024-05-29 17:30:45,636][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:45,639][HYDRA] \t#47 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=True hp_config.buffer_size=2089577 hp_config.initial_epsilon=0.5806547589424982 hp_config.learning_rate=0.0018430691395340175 hp_config.learning_starts=259 hp_config.target_epsilon=0.09379584379840496 hp_config.use_target_network=True hp_config.buffer_alpha=0.16737988780906454 hp_config.buffer_beta=0.11927138975266208 hp_config.buffer_epsilon=4.2200573971950144e-05 hp_config.target_update_interval=139 hp_config.tau=0.20461653806325297 autorl.n_total_timesteps=12500.0 seed=7\n", + "[2024-05-29 17:30:45,763][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:45,763][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:45,763][root][INFO] - Training started.\n", + "[2024-05-29 17:30:48,721][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:48,724][HYDRA] \t#48 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=True hp_config.buffer_size=2089577 hp_config.initial_epsilon=0.5806547589424982 hp_config.learning_rate=0.0018430691395340175 hp_config.learning_starts=259 hp_config.target_epsilon=0.09379584379840496 hp_config.use_target_network=True hp_config.buffer_alpha=0.16737988780906454 hp_config.buffer_beta=0.11927138975266208 hp_config.buffer_epsilon=4.2200573971950144e-05 hp_config.target_update_interval=139 hp_config.tau=0.20461653806325297 autorl.n_total_timesteps=12500.0 seed=8\n", + "[2024-05-29 17:30:48,810][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:48,810][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:48,810][root][INFO] - Training started.\n", + "[2024-05-29 17:30:52,290][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:52,293][HYDRA] \t#49 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=True hp_config.buffer_size=2089577 hp_config.initial_epsilon=0.5806547589424982 hp_config.learning_rate=0.0018430691395340175 hp_config.learning_starts=259 hp_config.target_epsilon=0.09379584379840496 hp_config.use_target_network=True hp_config.buffer_alpha=0.16737988780906454 hp_config.buffer_beta=0.11927138975266208 hp_config.buffer_epsilon=4.2200573971950144e-05 hp_config.target_update_interval=139 hp_config.tau=0.20461653806325297 autorl.n_total_timesteps=12500.0 seed=9\n", + "[2024-05-29 17:30:52,379][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:52,379][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:52,380][root][INFO] - Training started.\n", + "[2024-05-29 17:30:55,471][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:55,475][HYDRA] Finished Iteration 5!\n", + "[2024-05-29 17:30:55,475][HYDRA] Current incumbent has a performance of 35.34000015258789.\n", + "[2024-05-29 17:30:55,477][HYDRA] Launching 10 jobs locally\n", + "[2024-05-29 17:30:55,477][HYDRA] \t#50 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=0\n", + "[2024-05-29 17:30:55,564][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:55,565][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:55,565][root][INFO] - Training started.\n", + "[2024-05-29 17:30:58,417][root][INFO] - Training finished.\n", + "[2024-05-29 17:30:58,419][HYDRA] \t#51 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=1\n", + "[2024-05-29 17:30:58,508][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:30:58,508][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:30:58,509][root][INFO] - Training started.\n", + "[2024-05-29 17:31:01,297][root][INFO] - Training finished.\n", + "[2024-05-29 17:31:01,299][HYDRA] \t#52 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=2\n", + "[2024-05-29 17:31:01,380][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:31:01,381][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:31:01,381][root][INFO] - Training started.\n", + "[2024-05-29 17:31:04,314][root][INFO] - Training finished.\n", + "[2024-05-29 17:31:04,316][HYDRA] \t#53 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=3\n", + "[2024-05-29 17:31:04,404][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:31:04,405][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:31:04,405][root][INFO] - Training started.\n", + "[2024-05-29 17:31:07,307][root][INFO] - Training finished.\n", + "[2024-05-29 17:31:07,310][HYDRA] \t#54 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=4\n", + "[2024-05-29 17:31:07,392][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:31:07,392][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:31:07,392][root][INFO] - Training started.\n", + "[2024-05-29 17:31:10,327][root][INFO] - Training finished.\n", + "[2024-05-29 17:31:10,329][HYDRA] \t#55 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=5\n", + "[2024-05-29 17:31:10,412][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:31:10,412][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:31:10,412][root][INFO] - Training started.\n", + "[2024-05-29 17:31:13,194][root][INFO] - Training finished.\n", + "[2024-05-29 17:31:13,196][HYDRA] \t#56 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=6\n", + "[2024-05-29 17:31:13,804][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:31:13,804][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:31:13,804][root][INFO] - Training started.\n", + "[2024-05-29 17:31:16,697][root][INFO] - Training finished.\n", + "[2024-05-29 17:31:16,699][HYDRA] \t#57 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=7\n", + "[2024-05-29 17:31:16,813][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:31:16,813][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:31:16,813][root][INFO] - Training started.\n", + "[2024-05-29 17:31:19,872][root][INFO] - Training finished.\n", + "[2024-05-29 17:31:19,874][HYDRA] \t#58 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=8\n", + "[2024-05-29 17:31:19,966][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:31:19,967][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:31:19,967][root][INFO] - Training started.\n", + "[2024-05-29 17:31:23,150][root][INFO] - Training finished.\n", + "[2024-05-29 17:31:23,152][HYDRA] \t#59 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=12500.0 seed=9\n", + "[2024-05-29 17:31:23,238][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:31:23,239][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:31:23,239][root][INFO] - Training started.\n", + "[2024-05-29 17:31:26,307][root][INFO] - Training finished.\n", + "[2024-05-29 17:31:26,311][HYDRA] Finished Iteration 6!\n", + "[2024-05-29 17:31:26,311][HYDRA] Current incumbent has a performance of 35.34000015258789.\n", + "[2024-05-29 17:31:26,312][HYDRA] Launching 10 jobs locally\n", + "[2024-05-29 17:31:26,313][HYDRA] \t#60 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=12500.0 seed=0\n", + "[2024-05-29 17:31:26,404][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:31:26,405][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:31:26,405][root][INFO] - Training started.\n", + "[2024-05-29 17:31:29,677][root][INFO] - Training finished.\n", + "[2024-05-29 17:31:29,682][HYDRA] \t#61 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=12500.0 seed=1\n", + "[2024-05-29 17:31:29,776][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:31:29,776][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:31:29,776][root][INFO] - Training started.\n", + "[2024-05-29 17:31:32,770][root][INFO] - Training finished.\n", + "[2024-05-29 17:31:32,773][HYDRA] \t#62 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=12500.0 seed=2\n", + "[2024-05-29 17:31:32,867][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:31:32,868][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:31:32,868][root][INFO] - Training started.\n", + "[2024-05-29 17:31:35,891][root][INFO] - Training finished.\n", + "[2024-05-29 17:31:35,895][HYDRA] \t#63 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=12500.0 seed=3\n", + "[2024-05-29 17:31:35,983][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:31:35,984][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:31:35,984][root][INFO] - Training started.\n", + "[2024-05-29 17:31:39,071][root][INFO] - Training finished.\n", + "[2024-05-29 17:31:39,075][HYDRA] \t#64 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=12500.0 seed=4\n", + "[2024-05-29 17:31:39,162][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:31:39,162][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:31:39,162][root][INFO] - Training started.\n", + "[2024-05-29 17:31:42,616][root][INFO] - Training finished.\n", + "[2024-05-29 17:31:42,620][HYDRA] \t#65 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=12500.0 seed=5\n", + "[2024-05-29 17:31:42,700][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:31:42,701][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:31:42,701][root][INFO] - Training started.\n", + "[2024-05-29 17:31:45,513][root][INFO] - Training finished.\n", + "[2024-05-29 17:31:45,517][HYDRA] \t#66 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=12500.0 seed=6\n", + "[2024-05-29 17:31:45,603][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:31:45,603][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:31:45,603][root][INFO] - Training started.\n", + "[2024-05-29 17:31:48,535][root][INFO] - Training finished.\n", + "[2024-05-29 17:31:48,539][HYDRA] \t#67 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=12500.0 seed=7\n", + "[2024-05-29 17:31:48,623][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:31:48,624][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:31:48,624][root][INFO] - Training started.\n", + "[2024-05-29 17:31:51,893][root][INFO] - Training finished.\n", + "[2024-05-29 17:31:51,896][HYDRA] \t#68 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=12500.0 seed=8\n", + "[2024-05-29 17:31:51,982][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:31:51,982][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:31:51,983][root][INFO] - Training started.\n", + "[2024-05-29 17:31:54,903][root][INFO] - Training finished.\n", + "[2024-05-29 17:31:54,906][HYDRA] \t#69 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=12500.0 seed=9\n", + "[2024-05-29 17:31:54,992][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:31:54,993][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:31:54,993][root][INFO] - Training started.\n", + "[2024-05-29 17:31:57,856][root][INFO] - Training finished.\n", + "[2024-05-29 17:31:57,861][HYDRA] Added config b8886b and rejected config 3ae65a as incumbent because it is not better than the incumbents on 1 instances:\n", + "[2024-05-29 17:31:57,862][HYDRA] Finished Iteration 7!\n", + "[2024-05-29 17:31:57,862][HYDRA] Current incumbent has a performance of 34.900001525878906.\n", + "[2024-05-29 17:31:57,864][HYDRA] Launching 10 jobs locally\n", + "[2024-05-29 17:31:57,864][HYDRA] \t#70 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=12500.0 seed=0\n", + "[2024-05-29 17:31:57,949][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:31:57,950][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:31:57,950][root][INFO] - Training started.\n", + "[2024-05-29 17:32:01,171][root][INFO] - Training finished.\n", + "[2024-05-29 17:32:01,178][HYDRA] \t#71 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=12500.0 seed=1\n", + "[2024-05-29 17:32:01,272][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:32:01,273][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:32:01,273][root][INFO] - Training started.\n", + "[2024-05-29 17:32:04,446][root][INFO] - Training finished.\n", + "[2024-05-29 17:32:04,454][HYDRA] \t#72 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=12500.0 seed=2\n", + "[2024-05-29 17:32:04,547][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:32:04,547][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:32:04,547][root][INFO] - Training started.\n", + "[2024-05-29 17:32:08,691][root][INFO] - Training finished.\n", + "[2024-05-29 17:32:08,697][HYDRA] \t#73 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=12500.0 seed=3\n", + "[2024-05-29 17:32:08,856][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:32:08,857][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:32:08,858][root][INFO] - Training started.\n", + "[2024-05-29 17:32:12,030][root][INFO] - Training finished.\n", + "[2024-05-29 17:32:12,037][HYDRA] \t#74 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=12500.0 seed=4\n", + "[2024-05-29 17:32:12,139][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:32:12,140][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:32:12,141][root][INFO] - Training started.\n", + "[2024-05-29 17:32:15,103][root][INFO] - Training finished.\n", + "[2024-05-29 17:32:15,110][HYDRA] \t#75 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=12500.0 seed=5\n", + "[2024-05-29 17:32:15,214][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:32:15,215][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:32:15,215][root][INFO] - Training started.\n", + "[2024-05-29 17:32:18,113][root][INFO] - Training finished.\n", + "[2024-05-29 17:32:18,119][HYDRA] \t#76 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=12500.0 seed=6\n", + "[2024-05-29 17:32:18,248][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:32:18,249][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:32:18,249][root][INFO] - Training started.\n", + "[2024-05-29 17:32:21,230][root][INFO] - Training finished.\n", + "[2024-05-29 17:32:21,236][HYDRA] \t#77 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=12500.0 seed=7\n", + "[2024-05-29 17:32:21,322][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:32:21,322][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:32:21,322][root][INFO] - Training started.\n", + "[2024-05-29 17:32:24,228][root][INFO] - Training finished.\n", + "[2024-05-29 17:32:24,234][HYDRA] \t#78 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=12500.0 seed=8\n", + "[2024-05-29 17:32:24,322][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:32:24,322][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:32:24,322][root][INFO] - Training started.\n", + "[2024-05-29 17:32:27,192][root][INFO] - Training finished.\n", + "[2024-05-29 17:32:27,198][HYDRA] \t#79 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=12500.0 seed=9\n", + "[2024-05-29 17:32:27,283][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:32:27,284][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 12500.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:32:27,284][root][INFO] - Training started.\n", + "[2024-05-29 17:32:30,562][root][INFO] - Training finished.\n", + "[2024-05-29 17:32:30,570][HYDRA] Finished Iteration 8!\n", + "[2024-05-29 17:32:30,570][HYDRA] Current incumbent has a performance of 34.900001525878906.\n", + "[2024-05-29 17:32:30,573][HYDRA] Launching 10 jobs locally\n", + "[2024-05-29 17:32:30,573][HYDRA] \t#80 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=25000.0 seed=0\n", + "[2024-05-29 17:32:30,662][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:32:30,662][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:32:30,662][root][INFO] - Training started.\n", + "[2024-05-29 17:32:33,857][root][INFO] - Training finished.\n", + "[2024-05-29 17:32:33,861][HYDRA] \t#81 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=25000.0 seed=1\n", + "[2024-05-29 17:32:33,949][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:32:33,950][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:32:33,950][root][INFO] - Training started.\n", + "[2024-05-29 17:32:37,014][root][INFO] - Training finished.\n", + "[2024-05-29 17:32:37,019][HYDRA] \t#82 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=25000.0 seed=2\n", + "[2024-05-29 17:32:37,114][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:32:37,114][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:32:37,115][root][INFO] - Training started.\n", + "[2024-05-29 17:32:41,240][root][INFO] - Training finished.\n", + "[2024-05-29 17:32:41,244][HYDRA] \t#83 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=25000.0 seed=3\n", + "[2024-05-29 17:32:41,335][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:32:41,336][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:32:41,336][root][INFO] - Training started.\n", + "[2024-05-29 17:32:44,592][root][INFO] - Training finished.\n", + "[2024-05-29 17:32:44,606][HYDRA] \t#84 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=25000.0 seed=4\n", + "[2024-05-29 17:32:44,792][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:32:44,792][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:32:44,792][root][INFO] - Training started.\n", + "[2024-05-29 17:32:47,796][root][INFO] - Training finished.\n", + "[2024-05-29 17:32:47,799][HYDRA] \t#85 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=25000.0 seed=5\n", + "[2024-05-29 17:32:47,887][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:32:47,887][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:32:47,887][root][INFO] - Training started.\n", + "[2024-05-29 17:32:51,006][root][INFO] - Training finished.\n", + "[2024-05-29 17:32:51,010][HYDRA] \t#86 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=25000.0 seed=6\n", + "[2024-05-29 17:32:51,099][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:32:51,099][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:32:51,099][root][INFO] - Training started.\n", + "[2024-05-29 17:32:54,077][root][INFO] - Training finished.\n", + "[2024-05-29 17:32:54,081][HYDRA] \t#87 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=25000.0 seed=7\n", + "[2024-05-29 17:32:54,178][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:32:54,179][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:32:54,179][root][INFO] - Training started.\n", + "[2024-05-29 17:32:57,241][root][INFO] - Training finished.\n", + "[2024-05-29 17:32:57,245][HYDRA] \t#88 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=25000.0 seed=8\n", + "[2024-05-29 17:32:57,335][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:32:57,336][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:32:57,336][root][INFO] - Training started.\n", + "[2024-05-29 17:33:00,303][root][INFO] - Training finished.\n", + "[2024-05-29 17:33:00,307][HYDRA] \t#89 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=25000.0 seed=9\n", + "[2024-05-29 17:33:00,395][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:33:00,396][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:33:00,396][root][INFO] - Training started.\n", + "[2024-05-29 17:33:03,407][root][INFO] - Training finished.\n", + "[2024-05-29 17:33:03,415][HYDRA] Finished Iteration 9!\n", + "[2024-05-29 17:33:03,415][HYDRA] Current incumbent has a performance of 34.900001525878906.\n", + "[2024-05-29 17:33:03,418][HYDRA] Launching 10 jobs locally\n", + "[2024-05-29 17:33:03,418][HYDRA] \t#90 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=25000.0 seed=0\n", + "[2024-05-29 17:33:03,504][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:33:03,505][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:33:03,505][root][INFO] - Training started.\n", + "[2024-05-29 17:33:06,582][root][INFO] - Training finished.\n", + "[2024-05-29 17:33:06,591][HYDRA] \t#91 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=25000.0 seed=1\n", + "[2024-05-29 17:33:06,685][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:33:06,686][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:33:06,686][root][INFO] - Training started.\n", + "[2024-05-29 17:33:10,043][root][INFO] - Training finished.\n", + "[2024-05-29 17:33:10,049][HYDRA] \t#92 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=25000.0 seed=2\n", + "[2024-05-29 17:33:10,139][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:33:10,140][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:33:10,140][root][INFO] - Training started.\n", + "[2024-05-29 17:33:14,078][root][INFO] - Training finished.\n", + "[2024-05-29 17:33:14,087][HYDRA] \t#93 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=25000.0 seed=3\n", + "[2024-05-29 17:33:14,176][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:33:14,177][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:33:14,177][root][INFO] - Training started.\n", + "[2024-05-29 17:33:17,144][root][INFO] - Training finished.\n", + "[2024-05-29 17:33:17,152][HYDRA] \t#94 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=25000.0 seed=4\n", + "[2024-05-29 17:33:17,240][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:33:17,241][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:33:17,241][root][INFO] - Training started.\n", + "[2024-05-29 17:33:20,343][root][INFO] - Training finished.\n", + "[2024-05-29 17:33:20,351][HYDRA] \t#95 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=25000.0 seed=5\n", + "[2024-05-29 17:33:20,442][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:33:20,442][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:33:20,442][root][INFO] - Training started.\n", + "[2024-05-29 17:33:23,574][root][INFO] - Training finished.\n", + "[2024-05-29 17:33:23,580][HYDRA] \t#96 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=25000.0 seed=6\n", + "[2024-05-29 17:33:23,668][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:33:23,669][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:33:23,669][root][INFO] - Training started.\n", + "[2024-05-29 17:33:26,643][root][INFO] - Training finished.\n", + "[2024-05-29 17:33:26,653][HYDRA] \t#97 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=25000.0 seed=7\n", + "[2024-05-29 17:33:26,742][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:33:26,743][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:33:26,743][root][INFO] - Training started.\n", + "[2024-05-29 17:33:29,764][root][INFO] - Training finished.\n", + "[2024-05-29 17:33:29,772][HYDRA] \t#98 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=25000.0 seed=8\n", + "[2024-05-29 17:33:29,863][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:33:29,864][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:33:29,864][root][INFO] - Training started.\n", + "[2024-05-29 17:33:33,017][root][INFO] - Training finished.\n", + "[2024-05-29 17:33:33,024][HYDRA] \t#99 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=25000.0 seed=9\n", + "[2024-05-29 17:33:33,112][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:33:33,113][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:33:33,113][root][INFO] - Training started.\n", + "[2024-05-29 17:33:36,179][root][INFO] - Training finished.\n", + "[2024-05-29 17:33:36,189][HYDRA] Finished Iteration 10!\n", + "[2024-05-29 17:33:36,189][HYDRA] Current incumbent has a performance of 34.900001525878906.\n", + "[2024-05-29 17:33:36,191][HYDRA] Launching 10 jobs locally\n", + "[2024-05-29 17:33:36,191][HYDRA] \t#100 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=25000.0 seed=0\n", + "[2024-05-29 17:33:36,277][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:33:36,277][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:33:36,281][root][INFO] - Training started.\n", + "[2024-05-29 17:33:39,203][root][INFO] - Training finished.\n", + "[2024-05-29 17:33:39,206][HYDRA] \t#101 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=25000.0 seed=1\n", + "[2024-05-29 17:33:39,294][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:33:39,294][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:33:39,295][root][INFO] - Training started.\n", + "[2024-05-29 17:33:42,325][root][INFO] - Training finished.\n", + "[2024-05-29 17:33:42,327][HYDRA] \t#102 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=25000.0 seed=2\n", + "[2024-05-29 17:33:42,414][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:33:42,415][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:33:42,415][root][INFO] - Training started.\n", + "[2024-05-29 17:33:45,307][root][INFO] - Training finished.\n", + "[2024-05-29 17:33:45,309][HYDRA] \t#103 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=25000.0 seed=3\n", + "[2024-05-29 17:33:45,396][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:33:45,396][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:33:45,396][root][INFO] - Training started.\n", + "[2024-05-29 17:33:49,305][root][INFO] - Training finished.\n", + "[2024-05-29 17:33:49,308][HYDRA] \t#104 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=25000.0 seed=4\n", + "[2024-05-29 17:33:49,394][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:33:49,395][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:33:49,395][root][INFO] - Training started.\n", + "[2024-05-29 17:33:52,435][root][INFO] - Training finished.\n", + "[2024-05-29 17:33:52,437][HYDRA] \t#105 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=25000.0 seed=5\n", + "[2024-05-29 17:33:52,526][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:33:52,526][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:33:52,526][root][INFO] - Training started.\n", + "[2024-05-29 17:33:55,475][root][INFO] - Training finished.\n", + "[2024-05-29 17:33:55,478][HYDRA] \t#106 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=25000.0 seed=6\n", + "[2024-05-29 17:33:55,576][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:33:55,576][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:33:55,576][root][INFO] - Training started.\n", + "[2024-05-29 17:33:58,564][root][INFO] - Training finished.\n", + "[2024-05-29 17:33:58,570][HYDRA] \t#107 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=25000.0 seed=7\n", + "[2024-05-29 17:33:58,663][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:33:58,664][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:33:58,664][root][INFO] - Training started.\n", + "[2024-05-29 17:34:01,623][root][INFO] - Training finished.\n", + "[2024-05-29 17:34:01,625][HYDRA] \t#108 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=25000.0 seed=8\n", + "[2024-05-29 17:34:01,713][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:34:01,714][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:34:01,714][root][INFO] - Training started.\n", + "[2024-05-29 17:34:04,668][root][INFO] - Training finished.\n", + "[2024-05-29 17:34:04,670][HYDRA] \t#109 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=False hp_config.buffer_size=971937 hp_config.initial_epsilon=0.918972453749402 hp_config.learning_rate=3.0233751521472095e-06 hp_config.learning_starts=1000 hp_config.target_epsilon=0.09426158912789262 hp_config.use_target_network=False autorl.n_total_timesteps=25000.0 seed=9\n", + "[2024-05-29 17:34:04,757][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:34:04,757][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:34:04,757][root][INFO] - Training started.\n", + "[2024-05-29 17:34:07,642][root][INFO] - Training finished.\n", + "[2024-05-29 17:34:07,647][HYDRA] Finished Iteration 11!\n", + "[2024-05-29 17:34:07,647][HYDRA] Current incumbent has a performance of 34.900001525878906.\n", + "[2024-05-29 17:34:07,649][HYDRA] Launching 10 jobs locally\n", + "[2024-05-29 17:34:07,649][HYDRA] \t#110 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=25000.0 seed=0\n", + "[2024-05-29 17:34:07,737][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:34:07,738][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:34:07,738][root][INFO] - Training started.\n", + "[2024-05-29 17:34:10,856][root][INFO] - Training finished.\n", + "[2024-05-29 17:34:10,863][HYDRA] \t#111 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=25000.0 seed=1\n", + "[2024-05-29 17:34:10,949][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:34:10,949][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:34:10,949][root][INFO] - Training started.\n", + "[2024-05-29 17:34:14,021][root][INFO] - Training finished.\n", + "[2024-05-29 17:34:14,030][HYDRA] \t#112 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=25000.0 seed=2\n", + "[2024-05-29 17:34:14,118][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:34:14,119][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:34:14,119][root][INFO] - Training started.\n", + "[2024-05-29 17:34:17,185][root][INFO] - Training finished.\n", + "[2024-05-29 17:34:17,193][HYDRA] \t#113 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=25000.0 seed=3\n", + "[2024-05-29 17:34:17,293][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:34:17,294][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:34:17,294][root][INFO] - Training started.\n", + "[2024-05-29 17:34:20,272][root][INFO] - Training finished.\n", + "[2024-05-29 17:34:20,280][HYDRA] \t#114 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=25000.0 seed=4\n", + "[2024-05-29 17:34:20,367][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:34:20,368][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:34:20,368][root][INFO] - Training started.\n", + "[2024-05-29 17:34:23,344][root][INFO] - Training finished.\n", + "[2024-05-29 17:34:23,351][HYDRA] \t#115 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=25000.0 seed=5\n", + "[2024-05-29 17:34:23,440][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:34:23,441][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:34:23,441][root][INFO] - Training started.\n", + "[2024-05-29 17:34:27,484][root][INFO] - Training finished.\n", + "[2024-05-29 17:34:27,493][HYDRA] \t#116 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=25000.0 seed=6\n", + "[2024-05-29 17:34:27,602][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:34:27,606][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:34:27,606][root][INFO] - Training started.\n", + "[2024-05-29 17:34:30,680][root][INFO] - Training finished.\n", + "[2024-05-29 17:34:30,687][HYDRA] \t#117 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=25000.0 seed=7\n", + "[2024-05-29 17:34:30,774][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:34:30,775][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:34:30,775][root][INFO] - Training started.\n", + "[2024-05-29 17:34:34,003][root][INFO] - Training finished.\n", + "[2024-05-29 17:34:34,012][HYDRA] \t#118 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=25000.0 seed=8\n", + "[2024-05-29 17:34:34,103][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:34:34,103][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:34:34,103][root][INFO] - Training started.\n", + "[2024-05-29 17:34:37,096][root][INFO] - Training finished.\n", + "[2024-05-29 17:34:37,104][HYDRA] \t#119 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=7163563 hp_config.initial_epsilon=0.6447030464736005 hp_config.learning_rate=8.24056159732675e-06 hp_config.learning_starts=601 hp_config.target_epsilon=0.005001401691311217 hp_config.use_target_network=False hp_config.buffer_alpha=0.014648521430621595 hp_config.buffer_beta=0.6810383714282678 hp_config.buffer_epsilon=1.2023527271263151e-06 autorl.n_total_timesteps=25000.0 seed=9\n", + "[2024-05-29 17:34:37,190][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:34:37,191][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:34:37,191][root][INFO] - Training started.\n", + "[2024-05-29 17:34:40,149][root][INFO] - Training finished.\n", + "[2024-05-29 17:34:40,160][HYDRA] Finished Iteration 12!\n", + "[2024-05-29 17:34:40,160][HYDRA] Current incumbent has a performance of 34.900001525878906.\n", + "[2024-05-29 17:34:40,162][HYDRA] Launching 10 jobs locally\n", + "[2024-05-29 17:34:40,162][HYDRA] \t#120 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=50000.0 seed=0\n", + "[2024-05-29 17:34:40,251][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:34:40,252][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:34:40,252][root][INFO] - Training started.\n", + "[2024-05-29 17:34:43,239][root][INFO] - Training finished.\n", + "[2024-05-29 17:34:43,243][HYDRA] \t#121 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=50000.0 seed=1\n", + "[2024-05-29 17:34:43,332][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:34:43,332][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:34:43,332][root][INFO] - Training started.\n", + "[2024-05-29 17:34:46,342][root][INFO] - Training finished.\n", + "[2024-05-29 17:34:46,346][HYDRA] \t#122 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=50000.0 seed=2\n", + "[2024-05-29 17:34:46,433][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:34:46,434][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:34:46,434][root][INFO] - Training started.\n", + "[2024-05-29 17:34:50,142][root][INFO] - Training finished.\n", + "[2024-05-29 17:34:50,146][HYDRA] \t#123 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=50000.0 seed=3\n", + "[2024-05-29 17:34:50,237][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:34:50,238][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:34:50,238][root][INFO] - Training started.\n", + "[2024-05-29 17:34:53,250][root][INFO] - Training finished.\n", + "[2024-05-29 17:34:53,254][HYDRA] \t#124 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=50000.0 seed=4\n", + "[2024-05-29 17:34:53,342][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:34:53,343][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:34:53,343][root][INFO] - Training started.\n", + "[2024-05-29 17:34:56,604][root][INFO] - Training finished.\n", + "[2024-05-29 17:34:56,608][HYDRA] \t#125 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=50000.0 seed=5\n", + "[2024-05-29 17:34:56,702][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:34:56,703][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:34:56,703][root][INFO] - Training started.\n", + "[2024-05-29 17:34:59,657][root][INFO] - Training finished.\n", + "[2024-05-29 17:34:59,661][HYDRA] \t#126 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=50000.0 seed=6\n", + "[2024-05-29 17:34:59,750][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:34:59,750][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:34:59,750][root][INFO] - Training started.\n", + "[2024-05-29 17:35:02,851][root][INFO] - Training finished.\n", + "[2024-05-29 17:35:02,868][HYDRA] \t#127 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=50000.0 seed=7\n", + "[2024-05-29 17:35:02,958][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:35:02,958][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:35:02,959][root][INFO] - Training started.\n", + "[2024-05-29 17:35:07,058][root][INFO] - Training finished.\n", + "[2024-05-29 17:35:07,063][HYDRA] \t#128 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=50000.0 seed=8\n", + "[2024-05-29 17:35:07,150][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:35:07,151][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:35:07,151][root][INFO] - Training started.\n", + "[2024-05-29 17:35:10,116][root][INFO] - Training finished.\n", + "[2024-05-29 17:35:10,120][HYDRA] \t#129 : hp_config.buffer_batch_size=8 hp_config.buffer_prio_sampling=True hp_config.buffer_size=3180530 hp_config.initial_epsilon=0.707131497257335 hp_config.learning_rate=2.0928470088911824e-06 hp_config.learning_starts=709 hp_config.target_epsilon=0.11375368938710846 hp_config.use_target_network=True hp_config.buffer_alpha=0.5280155729320327 hp_config.buffer_beta=0.10300110565085725 hp_config.buffer_epsilon=2.0127321428268346e-05 hp_config.target_update_interval=930 hp_config.tau=0.3253832629268104 autorl.n_total_timesteps=50000.0 seed=9\n", + "[2024-05-29 17:35:10,207][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:35:10,207][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:35:10,207][root][INFO] - Training started.\n", + "[2024-05-29 17:35:13,146][root][INFO] - Training finished.\n", + "[2024-05-29 17:35:13,153][HYDRA] Finished Iteration 13!\n", + "[2024-05-29 17:35:13,153][HYDRA] Current incumbent has a performance of 34.900001525878906.\n", + "[2024-05-29 17:35:13,156][HYDRA] Launching 10 jobs locally\n", + "[2024-05-29 17:35:13,156][HYDRA] \t#130 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=50000.0 seed=0\n", + "[2024-05-29 17:35:13,241][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:35:13,242][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:35:13,242][root][INFO] - Training started.\n", + "[2024-05-29 17:35:16,217][root][INFO] - Training finished.\n", + "[2024-05-29 17:35:16,223][HYDRA] \t#131 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=50000.0 seed=1\n", + "[2024-05-29 17:35:16,314][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:35:16,314][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:35:16,314][root][INFO] - Training started.\n", + "[2024-05-29 17:35:19,244][root][INFO] - Training finished.\n", + "[2024-05-29 17:35:19,251][HYDRA] \t#132 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=50000.0 seed=2\n", + "[2024-05-29 17:35:19,339][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:35:19,340][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:35:19,340][root][INFO] - Training started.\n", + "[2024-05-29 17:35:22,392][root][INFO] - Training finished.\n", + "[2024-05-29 17:35:22,398][HYDRA] \t#133 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=50000.0 seed=3\n", + "[2024-05-29 17:35:22,482][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:35:22,483][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:35:22,483][root][INFO] - Training started.\n", + "[2024-05-29 17:35:25,442][root][INFO] - Training finished.\n", + "[2024-05-29 17:35:25,449][HYDRA] \t#134 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=50000.0 seed=4\n", + "[2024-05-29 17:35:25,539][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:35:25,539][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:35:25,539][root][INFO] - Training started.\n", + "[2024-05-29 17:35:28,498][root][INFO] - Training finished.\n", + "[2024-05-29 17:35:28,504][HYDRA] \t#135 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=50000.0 seed=5\n", + "[2024-05-29 17:35:28,589][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:35:28,589][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:35:28,589][root][INFO] - Training started.\n", + "[2024-05-29 17:35:31,524][root][INFO] - Training finished.\n", + "[2024-05-29 17:35:31,531][HYDRA] \t#136 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=50000.0 seed=6\n", + "[2024-05-29 17:35:31,617][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:35:31,618][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:35:31,618][root][INFO] - Training started.\n", + "[2024-05-29 17:35:34,544][root][INFO] - Training finished.\n", + "[2024-05-29 17:35:34,551][HYDRA] \t#137 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=50000.0 seed=7\n", + "[2024-05-29 17:35:34,634][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:35:34,635][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:35:34,635][root][INFO] - Training started.\n", + "[2024-05-29 17:35:37,597][root][INFO] - Training finished.\n", + "[2024-05-29 17:35:37,606][HYDRA] \t#138 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=50000.0 seed=8\n", + "[2024-05-29 17:35:37,689][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:35:37,689][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:35:37,689][root][INFO] - Training started.\n", + "[2024-05-29 17:35:40,570][root][INFO] - Training finished.\n", + "[2024-05-29 17:35:40,580][HYDRA] \t#139 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=50000.0 seed=9\n", + "[2024-05-29 17:35:40,666][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:35:40,667][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 50000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:35:40,667][root][INFO] - Training started.\n", + "[2024-05-29 17:35:43,606][root][INFO] - Training finished.\n", + "[2024-05-29 17:35:43,615][HYDRA] Added config 3ae65a and rejected config b8886b as incumbent because it is not better than the incumbents on 1 instances:\n", + "[2024-05-29 17:35:43,617][HYDRA] Finished Iteration 14!\n", + "[2024-05-29 17:35:43,617][HYDRA] Current incumbent has a performance of 34.900001525878906.\n", + "[2024-05-29 17:35:43,619][HYDRA] Launching 10 jobs locally\n", + "[2024-05-29 17:35:43,619][HYDRA] \t#140 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=100000.0 seed=0\n", + "[2024-05-29 17:35:43,703][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:35:43,704][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 100000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:35:43,704][root][INFO] - Training started.\n", + "[2024-05-29 17:35:47,904][root][INFO] - Training finished.\n", + "[2024-05-29 17:35:47,909][HYDRA] \t#141 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=100000.0 seed=1\n", + "[2024-05-29 17:35:47,997][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:35:47,998][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 100000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:35:47,998][root][INFO] - Training started.\n", + "[2024-05-29 17:35:51,052][root][INFO] - Training finished.\n", + "[2024-05-29 17:35:51,060][HYDRA] \t#142 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=100000.0 seed=2\n", + "[2024-05-29 17:35:51,149][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:35:51,150][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 100000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:35:51,150][root][INFO] - Training started.\n", + "[2024-05-29 17:35:54,240][root][INFO] - Training finished.\n", + "[2024-05-29 17:35:54,245][HYDRA] \t#143 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=100000.0 seed=3\n", + "[2024-05-29 17:35:54,334][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:35:54,335][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 100000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:35:54,335][root][INFO] - Training started.\n", + "[2024-05-29 17:35:57,504][root][INFO] - Training finished.\n", + "[2024-05-29 17:35:57,512][HYDRA] \t#144 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=100000.0 seed=4\n", + "[2024-05-29 17:35:57,599][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:35:57,600][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 100000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:35:57,600][root][INFO] - Training started.\n", + "[2024-05-29 17:36:00,640][root][INFO] - Training finished.\n", + "[2024-05-29 17:36:00,647][HYDRA] \t#145 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=100000.0 seed=5\n", + "[2024-05-29 17:36:00,746][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:36:00,747][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 100000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:36:00,747][root][INFO] - Training started.\n", + "[2024-05-29 17:36:03,752][root][INFO] - Training finished.\n", + "[2024-05-29 17:36:03,758][HYDRA] \t#146 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=100000.0 seed=6\n", + "[2024-05-29 17:36:03,846][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:36:03,847][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 100000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:36:03,847][root][INFO] - Training started.\n", + "[2024-05-29 17:36:06,938][root][INFO] - Training finished.\n", + "[2024-05-29 17:36:06,944][HYDRA] \t#147 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=100000.0 seed=7\n", + "[2024-05-29 17:36:07,030][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:36:07,031][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 100000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:36:07,031][root][INFO] - Training started.\n", + "[2024-05-29 17:36:10,247][root][INFO] - Training finished.\n", + "[2024-05-29 17:36:10,253][HYDRA] \t#148 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=100000.0 seed=8\n", + "[2024-05-29 17:36:10,340][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:36:10,341][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 100000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:36:10,341][root][INFO] - Training started.\n", + "[2024-05-29 17:36:13,464][root][INFO] - Training finished.\n", + "[2024-05-29 17:36:13,473][HYDRA] \t#149 : hp_config.buffer_batch_size=16 hp_config.buffer_prio_sampling=True hp_config.buffer_size=9636665 hp_config.initial_epsilon=0.7840222805469661 hp_config.learning_rate=1.2620948285169311e-06 hp_config.learning_starts=1003 hp_config.target_epsilon=0.024536610747917714 hp_config.use_target_network=False hp_config.buffer_alpha=0.4615888288943831 hp_config.buffer_beta=0.6159747654951973 hp_config.buffer_epsilon=2.741773675242096e-06 autorl.n_total_timesteps=100000.0 seed=9\n", + "[2024-05-29 17:36:13,563][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:36:13,564][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 100000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:36:13,564][root][INFO] - Training started.\n", + "[2024-05-29 17:36:16,627][root][INFO] - Training finished.\n", + "[2024-05-29 17:36:16,637][HYDRA] Finished Iteration 15!\n", + "[2024-05-29 17:36:16,637][HYDRA] Current incumbent has a performance of 34.900001525878906.\n", + "[2024-05-29 17:36:16,835][HYDRA] Launching 10 jobs locally\n", + "[2024-05-29 17:36:16,835][HYDRA] \t#150 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3016463 hp_config.initial_epsilon=0.7954363806240866 hp_config.learning_rate=0.02881487733166084 hp_config.learning_starts=889 hp_config.target_epsilon=0.014764232095572472 hp_config.use_target_network=True hp_config.target_update_interval=435 hp_config.tau=0.45915987611048487 autorl.n_total_timesteps=25000.0 seed=0\n", + "[2024-05-29 17:36:16,920][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:36:16,921][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:36:16,921][root][INFO] - Training started.\n", + "[2024-05-29 17:36:20,276][root][INFO] - Training finished.\n", + "[2024-05-29 17:36:20,284][HYDRA] \t#151 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3016463 hp_config.initial_epsilon=0.7954363806240866 hp_config.learning_rate=0.02881487733166084 hp_config.learning_starts=889 hp_config.target_epsilon=0.014764232095572472 hp_config.use_target_network=True hp_config.target_update_interval=435 hp_config.tau=0.45915987611048487 autorl.n_total_timesteps=25000.0 seed=1\n", + "[2024-05-29 17:36:20,373][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:36:20,374][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:36:20,374][root][INFO] - Training started.\n", + "[2024-05-29 17:36:23,820][root][INFO] - Training finished.\n", + "[2024-05-29 17:36:23,824][HYDRA] \t#152 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3016463 hp_config.initial_epsilon=0.7954363806240866 hp_config.learning_rate=0.02881487733166084 hp_config.learning_starts=889 hp_config.target_epsilon=0.014764232095572472 hp_config.use_target_network=True hp_config.target_update_interval=435 hp_config.tau=0.45915987611048487 autorl.n_total_timesteps=25000.0 seed=2\n", + "[2024-05-29 17:36:23,918][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:36:23,918][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:36:23,918][root][INFO] - Training started.\n", + "[2024-05-29 17:36:27,141][root][INFO] - Training finished.\n", + "[2024-05-29 17:36:27,148][HYDRA] \t#153 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3016463 hp_config.initial_epsilon=0.7954363806240866 hp_config.learning_rate=0.02881487733166084 hp_config.learning_starts=889 hp_config.target_epsilon=0.014764232095572472 hp_config.use_target_network=True hp_config.target_update_interval=435 hp_config.tau=0.45915987611048487 autorl.n_total_timesteps=25000.0 seed=3\n", + "[2024-05-29 17:36:27,237][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:36:27,238][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:36:27,238][root][INFO] - Training started.\n", + "[2024-05-29 17:36:30,528][root][INFO] - Training finished.\n", + "[2024-05-29 17:36:30,532][HYDRA] \t#154 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3016463 hp_config.initial_epsilon=0.7954363806240866 hp_config.learning_rate=0.02881487733166084 hp_config.learning_starts=889 hp_config.target_epsilon=0.014764232095572472 hp_config.use_target_network=True hp_config.target_update_interval=435 hp_config.tau=0.45915987611048487 autorl.n_total_timesteps=25000.0 seed=4\n", + "[2024-05-29 17:36:30,623][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:36:30,624][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:36:30,624][root][INFO] - Training started.\n", + "[2024-05-29 17:36:33,694][root][INFO] - Training finished.\n", + "[2024-05-29 17:36:33,699][HYDRA] \t#155 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3016463 hp_config.initial_epsilon=0.7954363806240866 hp_config.learning_rate=0.02881487733166084 hp_config.learning_starts=889 hp_config.target_epsilon=0.014764232095572472 hp_config.use_target_network=True hp_config.target_update_interval=435 hp_config.tau=0.45915987611048487 autorl.n_total_timesteps=25000.0 seed=5\n", + "[2024-05-29 17:36:33,786][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:36:33,787][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:36:33,787][root][INFO] - Training started.\n", + "[2024-05-29 17:36:38,744][root][INFO] - Training finished.\n", + "[2024-05-29 17:36:38,751][HYDRA] \t#156 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3016463 hp_config.initial_epsilon=0.7954363806240866 hp_config.learning_rate=0.02881487733166084 hp_config.learning_starts=889 hp_config.target_epsilon=0.014764232095572472 hp_config.use_target_network=True hp_config.target_update_interval=435 hp_config.tau=0.45915987611048487 autorl.n_total_timesteps=25000.0 seed=6\n", + "[2024-05-29 17:36:38,842][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:36:38,843][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:36:38,843][root][INFO] - Training started.\n", + "[2024-05-29 17:36:42,143][root][INFO] - Training finished.\n", + "[2024-05-29 17:36:42,147][HYDRA] \t#157 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3016463 hp_config.initial_epsilon=0.7954363806240866 hp_config.learning_rate=0.02881487733166084 hp_config.learning_starts=889 hp_config.target_epsilon=0.014764232095572472 hp_config.use_target_network=True hp_config.target_update_interval=435 hp_config.tau=0.45915987611048487 autorl.n_total_timesteps=25000.0 seed=7\n", + "[2024-05-29 17:36:42,238][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:36:42,239][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:36:42,239][root][INFO] - Training started.\n", + "[2024-05-29 17:36:45,385][root][INFO] - Training finished.\n", + "[2024-05-29 17:36:45,390][HYDRA] \t#158 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3016463 hp_config.initial_epsilon=0.7954363806240866 hp_config.learning_rate=0.02881487733166084 hp_config.learning_starts=889 hp_config.target_epsilon=0.014764232095572472 hp_config.use_target_network=True hp_config.target_update_interval=435 hp_config.tau=0.45915987611048487 autorl.n_total_timesteps=25000.0 seed=8\n", + "[2024-05-29 17:36:45,480][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:36:45,481][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:36:45,481][root][INFO] - Training started.\n", + "[2024-05-29 17:36:48,560][root][INFO] - Training finished.\n", + "[2024-05-29 17:36:48,564][HYDRA] \t#159 : hp_config.buffer_batch_size=32 hp_config.buffer_prio_sampling=False hp_config.buffer_size=3016463 hp_config.initial_epsilon=0.7954363806240866 hp_config.learning_rate=0.02881487733166084 hp_config.learning_starts=889 hp_config.target_epsilon=0.014764232095572472 hp_config.use_target_network=True hp_config.target_update_interval=435 hp_config.tau=0.45915987611048487 autorl.n_total_timesteps=25000.0 seed=9\n", + "[2024-05-29 17:36:48,652][root][INFO] - Your AutoRL config is:\n", + "[2024-05-29 17:36:48,653][root][INFO] - seed: ${seed}\n", + "env_framework: ${environment.framework}\n", + "env_name: ${environment.name}\n", + "env_kwargs: ${environment.kwargs}\n", + "eval_env_kwargs: ${environment.eval_kwargs}\n", + "n_envs: ${environment.n_envs}\n", + "algorithm: ${algorithm}\n", + "cnn_policy: ${environment.cnn_policy}\n", + "nas_config: ${nas_config}\n", + "n_total_timesteps: 25000.0\n", + "checkpoint: []\n", + "checkpoint_name: default_checkpoint\n", + "checkpoint_dir: /tmp\n", + "state_features: []\n", + "objectives:\n", + "- reward_mean\n", + "optimize_objectives: upper\n", + "n_steps: 10\n", + "n_eval_steps: 10\n", + "n_eval_episodes: 128\n", + "\n", + "[2024-05-29 17:36:48,653][root][INFO] - Training started.\n", + "[2024-05-29 17:36:51,964][root][INFO] - Training finished.\n", + "[2024-05-29 17:36:51,976][HYDRA] Finished Iteration 16!\n", + "[2024-05-29 17:36:51,977][HYDRA] Current incumbent has a performance of 34.900001525878906.\n", + "[2024-05-29 17:36:51,978][HYDRA] Finished Sweep! Total duration was 512.1s, incumbent had a performance of 34.900001525878906\n", + "[2024-05-29 17:36:51,978][HYDRA] The incumbent configuration is Configuration(values={\n", + " 'hp_config.buffer_alpha': 0.5280155729320327,\n", + " 'hp_config.buffer_batch_size': 8,\n", + " 'hp_config.buffer_beta': 0.10300110565085725,\n", + " 'hp_config.buffer_epsilon': 2.0127321428268346e-05,\n", + " 'hp_config.buffer_prio_sampling': True,\n", + " 'hp_config.buffer_size': 3180530,\n", + " 'hp_config.initial_epsilon': 0.707131497257335,\n", + " 'hp_config.learning_rate': 2.0928470088911824e-06,\n", + " 'hp_config.learning_starts': 709,\n", + " 'hp_config.target_epsilon': 0.11375368938710846,\n", + " 'hp_config.target_update_interval': 930,\n", + " 'hp_config.tau': 0.3253832629268104,\n", + " 'hp_config.use_target_network': True,\n", + "})\n", + "\n" + ] + } + ], + "source": [ + "!python run_arlbench.py --config-name=smac -m" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "How did this one do compared to random search?" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results_file_smac = 'results/smac/dqn_CartPole-v1/0/42/runhistory.csv'\n", + "runhistory_smac = pd.read_csv(results_file_smac)\n", + "ax = runhistory.plot(x='run_id', y='performance', kind='line', title='Configuration Performance over Time')\n", + "runhistory_smac.plot(x='run_id', y='performance', kind='line', title='Configuration Performance over Time', ax=ax)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best performance: -34.9\n", + "\n", + "run_id 6\n", + "budget NaN\n", + "performance 126.84375\n", + "hp_config.buffer_batch_size 8\n", + "hp_config.buffer_prio_sampling True\n", + "hp_config.buffer_size 3180530\n", + "hp_config.initial_epsilon 0.707131\n", + "hp_config.learning_rate 0.000002\n", + "hp_config.learning_starts 709\n", + "hp_config.target_epsilon 0.113754\n", + "hp_config.use_target_network True\n", + "hp_config.buffer_alpha 0.528016\n", + "hp_config.buffer_beta 0.103001\n", + "hp_config.buffer_epsilon 0.00002\n", + "hp_config.target_update_interval 930.0\n", + "hp_config.tau 0.325383\n", + "Name: 6, dtype: object\n" + ] + } + ], + "source": [ + "print(\"Best performance: \", np.round(max(runhistory_smac['performance']), decimals=2))\n", + "print(\"\")\n", + "print(runhistory.loc[runhistory_smac['performance'].idxmax()])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "arlbench", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/run_heuristic_schedule.py b/examples/run_heuristic_schedule.py index 3de2808ee..f3023e870 100644 --- a/examples/run_heuristic_schedule.py +++ b/examples/run_heuristic_schedule.py @@ -9,6 +9,7 @@ import sys import traceback from typing import TYPE_CHECKING +import json import hydra import jax @@ -27,7 +28,11 @@ def run(cfg: DictConfig, logger: logging.Logger): # Reset environment and run for 10 steps _ = env.reset() + + rewards = [] + epsilons = [] for _ in range(10): + epsilons.append(cfg.hp_config.initial_epsilon) # The objectives are configured to return the mean reward _, objectives, _, _, _ = env.step(cfg.hp_config) if objectives["reward_mean"] > 30 and cfg.hp_config.initial_epsilon > 0.7: @@ -35,8 +40,12 @@ def run(cfg: DictConfig, logger: logging.Logger): cfg.hp_config.target_epsilon = 0.7 cfg.hp_config.initial_epsilon = 0.7 logger.info("Agent reached performance threshold, decreasing epsilon to 0.7") + rewards.append(float(objectives["reward_mean"])) logger.info(f"Training finished with a total reward of {objectives['reward_mean']}") + output = {"rewards": rewards, "epsilons": epsilons} + with open("output.json", "w") as f: + json.dump(output, f) @hydra.main(version_base=None, config_path="configs", config_name="epsilon_heuristic") def execute(cfg: DictConfig): diff --git a/examples/run_reactive_schedule.py b/examples/run_reactive_schedule.py index ac72d2d75..63792ed2c 100644 --- a/examples/run_reactive_schedule.py +++ b/examples/run_reactive_schedule.py @@ -9,6 +9,7 @@ import sys import traceback from typing import TYPE_CHECKING +import json import hydra import jax @@ -32,16 +33,15 @@ def run(cfg: DictConfig, logger: logging.Logger): # define a tolerance for the gradient norm tolerance = 1e-4 + rewards = [] + lrs = [] for i in range(100): - + lrs.append(cfg.hp_config.learning_rate) # Statistics here contain the number of steps and gradient information statistics, objectives, te, tr, _ = env.step(cfg.hp_config) grad_norm, _ = statistics["grad_info"] # If grad norm doesn't change much, spike the learning rate - if last_grad_norm is not None: - print(i) - print(abs(grad_norm - last_grad_norm)) if last_grad_norm is not None and abs(grad_norm - last_grad_norm) < tolerance: last_lr = cfg.hp_config.learning_rate cfg.hp_config.learning_rate *= 10 @@ -54,7 +54,12 @@ def run(cfg: DictConfig, logger: logging.Logger): spiked = False logger.info(f"Resetting learning rate to {cfg.hp_config.learning_rate}") last_grad_norm = grad_norm + rewards.append(float(objectives["reward_mean"])) + logger.info(f"Training finished with a total reward of {objectives['reward_mean']}") + output = {"rewards": rewards, "lr": lrs} + with open("output.json", "w") as f: + json.dump(output, f) @hydra.main(version_base=None, config_path="configs", config_name="gradient_lr") diff --git a/examples/schedules.ipynb b/examples/schedules.ipynb new file mode 100644 index 000000000..5be2d1b0a --- /dev/null +++ b/examples/schedules.ipynb @@ -0,0 +1,214 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Custom Schedules With ARLBench\n", + "\n", + "These examples will show you how to create and run schedules using ARLBench. First, let's look at how this looks.\n", + "To make any sort of schedule, you want to first create an instance of the AutoRLEnv:\n", + "\n", + "```python\n", + "# Initialize environment with general config\n", + "env = AutoRLEnv(cfg.autorl)\n", + "\n", + "# Reset environment\n", + "_ = env.reset()\n", + "```\n", + "\n", + "This looks like an RL env, so you should already be familiar with what comes next:\n", + "\n", + "```python\n", + "# Run for 10 steps\n", + "for _ in range(10):\n", + " # The objectives are configured to return the mean reward\n", + " _, objectives, _, _, _ = env.step(cfg.hp_config)\n", + "```\n", + "\n", + "Here we do 10 steps, the length of which is configured via the config.\n", + "Since the 'hp_config' is given at each step, we can adapt the hyperparameters for the next one." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can check out a really simple schedule heuristic we prepared beforehand, epsilon decay tied to the reward for DQN on CartPole:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Couldn't import CARP-S, the CARP-S Hypersweeper will not be available.\n", + "Couldn't import DEHB, the DEHB Hypersweeper will not be available.\n", + "Couldn't import HEBO, the HEBO Hypersweeper will not be available.\n", + "Couldn't import Nevergrad, the Nevergrad Hypersweeper will not be available.\n", + "[2024-05-29 16:30:58,241][root][INFO] - Starting run with epsilon value 0.9\n", + "[2024-05-29 16:30:58,255][jax._src.xla_bridge][INFO] - Unable to initialize backend 'cuda': \n", + "[2024-05-29 16:30:58,255][jax._src.xla_bridge][INFO] - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'\n", + "[2024-05-29 16:30:58,256][jax._src.xla_bridge][INFO] - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: dlopen(libtpu.so, 0x0001): tried: 'libtpu.so' (no such file), '/System/Volumes/Preboot/Cryptexes/OSlibtpu.so' (no such file), '/Users/theeimer/anaconda3/envs/arlbench/bin/../lib/libtpu.so' (no such file), '/usr/lib/libtpu.so' (no such file, not in dyld cache), 'libtpu.so' (no such file), '/usr/local/lib/libtpu.so' (no such file), '/usr/lib/libtpu.so' (no such file, not in dyld cache)\n", + "[2024-05-29 16:31:02,434][root][INFO] - Agent reached performance threshold, decreasing epsilon to 0.7\n", + "[2024-05-29 16:31:24,264][root][INFO] - Training finished with a total reward of 225.0\n" + ] + } + ], + "source": [ + "!python run_heuristic_schedule.py" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "result_dir = \"results/heuristic_schedule_dqn_CartPole-v1/42\"\n", + "with open(result_dir + \"/output.json\") as f:\n", + " results = json.load(f)\n", + "rewards = results[\"rewards\"]\n", + "epsilons = results[\"epsilons\"]\n", + "\n", + "f, (ax1, ax2) = plt.subplots(1, 2, sharey=False)\n", + "sns.lineplot({\"epsilon\": epsilons}, ax=ax1)\n", + "sns.lineplot({\"reward\": rewards}, ax=ax2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since we're not only returning objectives, but also some optional information about the RL state, we also tried to adjust the learning rate according to the grad norm:" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Couldn't import CARP-S, the CARP-S Hypersweeper will not be available.\n", + "Couldn't import DEHB, the DEHB Hypersweeper will not be available.\n", + "Couldn't import HEBO, the HEBO Hypersweeper will not be available.\n", + "Couldn't import Nevergrad, the Nevergrad Hypersweeper will not be available.\n", + "[2024-05-29 16:31:28,017][jax._src.xla_bridge][INFO] - Unable to initialize backend 'cuda': \n", + "[2024-05-29 16:31:28,017][jax._src.xla_bridge][INFO] - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'\n", + "[2024-05-29 16:31:28,017][jax._src.xla_bridge][INFO] - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: dlopen(libtpu.so, 0x0001): tried: 'libtpu.so' (no such file), '/System/Volumes/Preboot/Cryptexes/OSlibtpu.so' (no such file), '/Users/theeimer/anaconda3/envs/arlbench/bin/../lib/libtpu.so' (no such file), '/usr/lib/libtpu.so' (no such file, not in dyld cache), 'libtpu.so' (no such file), '/usr/local/lib/libtpu.so' (no such file), '/usr/lib/libtpu.so' (no such file, not in dyld cache)\n", + "[2024-05-29 16:35:28,381][root][INFO] - Training finished with a total reward of 276.8999938964844\n" + ] + } + ], + "source": [ + "!python run_reactive_schedule.py" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "result_dir = \"results/reactive_schedule_dqn_CartPole-v1/42\"\n", + "with open(result_dir + \"/output.json\") as f:\n", + " results = json.load(f)\n", + "rewards = results[\"rewards\"]\n", + "lrs = results[\"lr\"]\n", + "\n", + "f, (ax1, ax2) = plt.subplots(1, 2, sharey=False)\n", + "sns.lineplot({\"learning rate\": lrs}, ax=ax1)\n", + "sns.lineplot({\"reward\": rewards}, ax=ax2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see, these are maybe not the best schedule ideas ever, but they show you what you can do with schedules in ARLBench!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "arlbench", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}