diff --git a/scripts/load_agent_example.json b/scripts/load_agent_example.json index 2120ce538a..b0ada8edc4 100644 --- a/scripts/load_agent_example.json +++ b/scripts/load_agent_example.json @@ -1,28 +1,12 @@ { "id": "LoadExample", - "environment": "Eplus-5Zone-hot-continuous-stochastic-v1", + "environment": "Eplus-5zone-hot-continuous-stochastic-v1", "episodes": 5, "algorithm": { "name": "SB3-PPO" }, "env_params": { - "reward": "LinearReward", - "reward_kwargs": { - "temperature_variable": [ - "Zone Air Temperature(SPACE1-1)", - "Zone Air Temperature(SPACE1-2)" - ], - "energy_variable": "Facility Total HVAC Electricity Demand Rate(Whole Building)", - "range_comfort_winter": [ - 20.0, - 23.5 - ], - "range_comfort_summer": [ - 23.0, - 26.0 - ] - }, - "act_repeat": 1 + "reward": "LinearReward" }, "seed": 3, "model": "alex_ugr/sinergym/training:latest", diff --git a/scripts/train_agent_example.json b/scripts/train_agent_example.json index c04d90de01..147f90414e 100644 --- a/scripts/train_agent_example.json +++ b/scripts/train_agent_example.json @@ -21,23 +21,7 @@ } }, "env_params": { - "reward": "LinearReward", - "reward_kwargs": { - "temperature_variable": [ - "Zone Air Temperature(SPACE1-1)", - "Zone Air Temperature(SPACE1-2)" - ], - "energy_variable": "Facility Total HVAC Electricity Demand Rate(Whole Building)", - "range_comfort_winter": [ - 20.0, - 23.5 - ], - "range_comfort_summer": [ - 23.0, - 26.0 - ] - }, - "act_repeat": 1 + "reward": "LinearReward" }, "seed": 3, "model": null, diff --git a/sinergym/utils/evaluation.py b/sinergym/utils/evaluation.py index ffad12502d..d342681fb3 100644 --- a/sinergym/utils/evaluation.py +++ b/sinergym/utils/evaluation.py @@ -1,9 +1,10 @@ """Custom policy evaluations for Evaluation Callbacks.""" -from typing import Any, Callable, Dict, Optional, Union, Tuple, List +from typing import Any, Callable, Dict, List, Optional, Tuple, Union import gymnasium as gym import numpy as np +from stable_baselines3.common import type_aliases from stable_baselines3.common.vec_env import VecEnv @@ -17,7 +18,7 @@ def evaluate_policy( reward_threshold: Optional[float] = None, return_episode_rewards: bool = False, warn: bool = True, -) -> Union[Tuple[float, float], Tuple[List[float], List[int]]]: +) -> Dict[str, list]: """ Runs policy for ``n_eval_episodes`` episodes and returns average reward and other Sinergym metrics.