From 19af55e60846ccb1e44a5c71c4456d744f0b46bd Mon Sep 17 00:00:00 2001
From: Nico Christianson <nicolas.christianson@gmail.com>
Date: Mon, 30 Oct 2023 20:18:43 -0700
Subject: [PATCH] fixed TODOs in CogenEnv (#22)

* fixed TODOs in CogenEnv

* Resolving requested edits on CogenEnv and load_ambients.py for PR

* Resolving dtype issue

* Update env_cogen.yml

Better docs for cogen env.py

* Update CogenEnv to target PettingZoo v1.24

---------

Co-authored-by: Christopher Yeh <chrisyeh96@users.noreply.github.com>
---
 env_cogen.yml                           | 20 ++++++++++++-----
 sustaingym/data/cogen/load_ambients.py  | 14 +-----------
 sustaingym/envs/cogen/env.py            | 29 +++++++++++++++----------
 sustaingym/envs/cogen/multiagent_env.py | 24 +++++++++++---------
 tests/test_cogen.py                     |  9 +++-----
 5 files changed, 50 insertions(+), 46 deletions(-)

diff --git a/env_cogen.yml b/env_cogen.yml
index d7a0031..2f52286 100755
--- a/env_cogen.yml
+++ b/env_cogen.yml
@@ -5,8 +5,8 @@
 # conda remove --name sustaingym_cogen --all
 #
 # Notes
-# - ray[rllib]==2.7.0 only supports gymnasium 0.28.1, pettingzoo 0.24.*
-# - last updated: September 27, 2023
+# - ray[rllib]==2.7 only supports gymnasium 0.28.1, pettingzoo 0.24.*
+# - last updated: October 30, 2023
 name: sustaingym_cogen
 channels:
 - pytorch           # for pytorch
@@ -14,7 +14,6 @@ channels:
 - conda-forge
 dependencies:
 - python=3.11
-- cudatoolkit=11.8.0  # for TensorFlow 2.12
 - flake8
 - ipympl              # for Jupyter / VSCode notebooks
 - ipykernel           # for Jupyter / VSCode notebooks
@@ -25,16 +24,25 @@ dependencies:
 - pandas
 - pip
 - pytorch=2.0.1
-- pytorch-cuda=11.8   # for PyTorch 2.0
 - pytz=2023.3
 - seaborn
 - tqdm
 - xlrd                # for reading Excel files
 
+# for GPU
+- cudatoolkit=11.8.0  # for TensorFlow 2.12
+- pytorch-cuda=11.8   # for PyTorch 2.0
+
+
 - pip:
   - gymnasium==0.28.1
   - pettingzoo==1.24.1
-  - "ray[rllib]==2.7.0"
+  - "ray[rllib]==2.7.1"
   - tensorflow==2.14.*
+
+  # uncomment for CPU-only
+  # - onnxruntime
+
+  # for GPU
   - nvidia-cudnn-cu11==8.6.0.163  # for TensorFlow 2.12
-  - onnxruntime
\ No newline at end of file
+  - onnxruntime-gpu
\ No newline at end of file
diff --git a/sustaingym/data/cogen/load_ambients.py b/sustaingym/data/cogen/load_ambients.py
index 3af6be7..661a535 100644
--- a/sustaingym/data/cogen/load_ambients.py
+++ b/sustaingym/data/cogen/load_ambients.py
@@ -37,7 +37,6 @@ def wind_curve(x):
 def construct_df(renewables_magnitude: float = 0.) -> list[pd.DataFrame]:
     """
     Constructs the dataframe of all ambient conditions
-    Adding renewables (scaled by magnitude input) is currently not implemented TODO?
     """
     renewables_magnitude = float(renewables_magnitude)
 
@@ -114,15 +113,6 @@ def construct_df(renewables_magnitude: float = 0.) -> list[pd.DataFrame]:
         # get the wind power data
         wind_data = load_wind_data(renewables_magnitude)[:len(df)]
         df['Target Net Power'] = np.maximum(df['Target Net Power'] - wind_data, 0)
-        # for l in range(len(dfs)):
-        #     try:
-
-        #         dfs[l]['Target Net Power'] = np.maximum(dfs[l]['Target Net Power'] - wind_data[l],
-        #                                                 np.zeros_like(wind_data[l]))
-        #     except:
-        #         # if the wind data is not the same length as the ambient data,
-        #         # then we're just going to throw away this day anyway
-        #         pass
 
         try:
             path = os.path.join(DATA_DIR, f'ambients_wind={renewables_magnitude}.pkl')
@@ -136,9 +126,7 @@ def construct_df(renewables_magnitude: float = 0.) -> list[pd.DataFrame]:
     dates = df['Timestamp'].dt.date.unique()
     # drop the first and last days so each day has 96 datapoints
     dfs = [df[df['Timestamp'].dt.date == val] for val in dates][1:-1]
-    # exclude any day that has more or fewer than 96 intervals
-    # since this means the row is corrupted
-    # TODO: fix this later. Culprit is daylight savings.
+    # exclude any day that has more or fewer than 96 intervals due to daylight savings
     dfs = [df for df in dfs if len(df) == 96]
 
     return dfs
diff --git a/sustaingym/envs/cogen/env.py b/sustaingym/envs/cogen/env.py
index 124d273..944aee9 100644
--- a/sustaingym/envs/cogen/env.py
+++ b/sustaingym/envs/cogen/env.py
@@ -65,12 +65,12 @@ class CogenEnv(gym.Env):
         Natural gas price ($/MMBtu)   0                   7
 
     Args:
-        renewables_magnitude: TODO
-        ramp_penalty: TODO
-        supply_imbalance_penalty: TODO
-        constraint_violation_penalty: TODO
-        forecast_horizon: TODO
-        forecast_noise_std: TODO
+        renewables_magnitude: wind generation capacity
+        ramp_penalty: magnitude of penalty for generator ramping
+        supply_imbalance_penalty: magnitude of penalty for energy/steam supply-demand imbalance
+        constraint_violation_penalty: magnitude of penalty for other constraint violations
+        forecast_horizon: number of forecast steps to include in observation
+        forecast_noise_std: standard deviation of noise on future forecast steps
     """
     def __init__(self,
                  renewables_magnitude: float = 0.,
@@ -78,12 +78,13 @@ def __init__(self,
                  supply_imbalance_penalty: float = 1000,
                  constraint_violation_penalty: float = 1000,
                  forecast_horizon: int = 3,
-                 forecast_noise_std: float = 0.1,
+                 forecast_noise_std: float = 0.0,
                  ):
         self.ramp_penalty = ramp_penalty
         self.supply_imbalance_penalty = supply_imbalance_penalty
         self.constraint_violation_penalty = constraint_violation_penalty
         self.forecast_horizon = forecast_horizon
+        self.forecast_noise_std = forecast_noise_std
         # load the ambient conditions dataframes
         self.ambients_dfs = load_ambients.construct_df(renewables_magnitude=renewables_magnitude)
         self.n_days = len(self.ambients_dfs)
@@ -150,17 +151,23 @@ def __init__(self,
         })
 
     def _forecast_from_time(self, day: int, time_step: int) -> pd.DataFrame:
-        """Returns the forecast values starting at the given day and time step
-        for the following self.forecast_horizon + 1 time steps."""
+        """Gets forecast values starting at the given day and time step for
+        the following self.forecast_horizon + 1 time steps.
+
+        Returns:
+            forecast: DataFrame with 7 columns, type float32
+        """
         slice_df = self.ambients_dfs[day].iloc[time_step:min(time_step+self.forecast_horizon+1, self.timesteps_per_day)]
         # fix so that if the slice_df is not long enough, it will take the first values of the next day
-        # TODO: figure out what to do if we're on the last day and there is no next day
         if len(slice_df) < self.forecast_horizon + 1:
             slice_df = pd.concat([slice_df, self.ambients_dfs[day+1].iloc[:self.forecast_horizon + 1 - len(slice_df)]])
         cols = ['Ambient Temperature', 'Ambient Pressure',
                 'Ambient rel. Humidity', 'Target Net Power',
                 'Target Process Steam', 'Energy Price', 'Gas Price']
-        return slice_df[cols].astype(np.float32)
+        forecast = slice_df[cols]
+        # add iid gaussian noise to future observations
+        forecast.iloc[1:] += self.forecast_noise_std * self.np_random.normal(size=(self.forecast_horizon, 7))
+        return forecast.astype(np.float32)
 
     def _get_obs(self) -> dict[str, Any]:
         """Get the current observation.
diff --git a/sustaingym/envs/cogen/multiagent_env.py b/sustaingym/envs/cogen/multiagent_env.py
index d52eb6b..6ad16e7 100644
--- a/sustaingym/envs/cogen/multiagent_env.py
+++ b/sustaingym/envs/cogen/multiagent_env.py
@@ -18,7 +18,7 @@ class MultiAgentCogenEnv(ParallelEnv):
     """
 
     # PettingZoo API
-    metadata = {}
+    metadata: dict[str, Any] = {}
 
     def __init__(self,
                  renewables_magnitude: float = 0.,
@@ -26,8 +26,7 @@ def __init__(self,
                  supply_imbalance_penalty: float = 1000,
                  constraint_violation_penalty: float = 1000,
                  forecast_horizon: int = 12,
-                 forecast_noise_std: float = 0.1,
-                 ):
+                 forecast_noise_std: float = 0.1):
         super().__init__()
 
         self.single_env = CogenEnv(
@@ -44,6 +43,7 @@ def __init__(self,
 
         # every agent gets the same flattened observation space
         flat_observation_space = spaces.flatten_space(self.single_env.observation_space)
+        assert isinstance(flat_observation_space, spaces.Box)
         self.observation_spaces = {
             agent: flat_observation_space for agent in self.agents
         }
@@ -69,7 +69,7 @@ def step(self, actions: dict[str, dict[str, np.ndarray]]
                 dict[str, float],
                 dict[str, bool],
                 dict[str, bool],
-                dict[str, dict[str, Any]]]:
+                dict[str, dict]]:
         """Run one timestep of the Cogen environment's dynamics.
 
         Args:
@@ -82,13 +82,14 @@ def step(self, actions: dict[str, dict[str, np.ndarray]]
             truncateds: dict mapping agent_id to truncated
             infos: dict mapping agent_id to info
         """
-        action = {}
+        action: dict[str, np.ndarray] = {}
         for agent in self.agents:
             action |= actions[agent]
 
         # Use internal single-agent environment
         obs, _, terminated, truncated, info = self.single_env.step(action)
         flat_obs = spaces.flatten(self.single_env.observation_space, obs)
+        assert isinstance(flat_obs, np.ndarray)
 
         obss, rewards, terminateds, truncateds, infos = {}, {}, {}, {}, {}
         for agent in self.agents:
@@ -110,13 +111,12 @@ def step(self, actions: dict[str, dict[str, np.ndarray]]
 
         return obss, rewards, terminateds, truncateds, infos
 
-    # TODO: once we update to a newer version of PettingZoo (>=1.23), the
-    # reset() function definition may need to change
     def reset(self, seed: int | None = None, options: dict | None = None
               ) -> tuple[dict[str, np.ndarray], dict[str, dict[str, Any]]]:
         """Resets the environment."""
         obs, info = self.single_env.reset(seed=seed, options=options)
         flat_obs = spaces.flatten(self.single_env.observation_space, obs)
+        assert isinstance(flat_obs, np.ndarray)
 
         self.agents = self.possible_agents[:]
         obss = {agent: flat_obs for agent in self.agents}
@@ -131,16 +131,20 @@ def close(self) -> None:
         """Close the environment."""
         self.single_env.close()
 
-    def observation_space(self, agent: str) -> spaces.Space:
+    def observation_space(self, agent: str) -> spaces.Box:
         return self.observation_spaces[agent]
 
-    def action_space(self, agent: str) -> spaces.Box | spaces.Discrete:
+    def action_space(self, agent: str) -> spaces.Dict:
         return self.action_spaces[agent]
 
 
 class MultiAgentRLLibCogenEnv(MultiAgentCogenEnv, MultiAgentEnv):
     """MultiAgentRLLibCogenEnv extends MultiAgentCogenEnv to support the RLLib
-    MultiAgentEnv API (RLLib v2.6.3).
+    MultiAgentEnv API (RLLib v2.6.3, v2.7).
+
+    This class should be removed once
+    https://github.com/ray-project/ray/pull/39459
+    is included in a Ray RLLib release (likely v2.8).
     """
     def __init__(self,
                  renewables_magnitude: float = 0.,
diff --git a/tests/test_cogen.py b/tests/test_cogen.py
index f1d366e..066fab4 100644
--- a/tests/test_cogen.py
+++ b/tests/test_cogen.py
@@ -4,7 +4,7 @@
 
 import gymnasium.utils.env_checker
 from pettingzoo.test import parallel_api_test
-# from pettingzoo.test.seed_test import parallel_seed_test
+from pettingzoo.test.seed_test import parallel_seed_test
 import ray.rllib.utils
 
 from sustaingym.envs.cogen import (
@@ -35,11 +35,8 @@ def tearDown(self) -> None:
     def test_pettingzoo_parallel_api(self) -> None:
         parallel_api_test(self.env, num_cycles=1000)
 
-    # this test fails in PettingZoo v1.22.3 due to a PettingZoo bug
-    # https://github.com/Farama-Foundation/PettingZoo/issues/939
-    # TODO: uncomment once we upgrade to PettingZoo >= 1.23
-    # def test_pettingzoo_parallel_seed(self) -> None:
-    #     parallel_seed_test(MultiAgentCogenEnv)
+    def test_pettingzoo_parallel_seed(self) -> None:
+        parallel_seed_test(MultiAgentCogenEnv)
 
 
 class TestMultiAgentRLLibEnv(unittest.TestCase):