Merge pull request #660 from BDonnot/bd_dev

Some other additions in GreedyAgent and FromEpisodeData
Grid2op · Nov 8, 2024 · 14e0bdb · 14e0bdb
2 parents bd1679d + 7acf066
commit 14e0bdb
Show file tree

Hide file tree

Showing 14 changed files with 273 additions and 123 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -99,6 +99,8 @@ Native multi agents support:
 
 [1.11.0] - 202x-yy-zz
 -----------------------
+- [BREAKING] Change for `FromMultiEpisodeData` that disables the caching by default
+  when creating the data.
 - [FIXED] issue https://github.com/Grid2op/grid2op/issues/657
 - [FIXED] missing an import on the `MaskedEnvironment` class
 - [ADDED] possibility to set the "thermal limits" when calling `env.reset(..., options={"thermal limit": xxx})`
@@ -113,6 +115,12 @@ Native multi agents support:
   "chronics_hander" in the ObsEnv behaves (it now fully implements the public interface of 
   a "real" chronic_handler)
 - [IMPROVED] error message in the `FromNPY` class when the backend is checked
+- [IMRPOVED] the `FromMultiEpisodeData` class with the addition of the `caching` 
+  kwargs to allow / disable caching (which was default behavior in previous version) 
+- [IMPROVED] the `FromMultiEpisodeData` class that now returns also the path of the data
+- [IMPROVED] the classes inherited from `GreedyAgent` with the added possibility to 
+  do the `obs.simulate` on a different time horizon (kwarg `simulated_time_step`)
+- [IMPROVED] some type hints for some agent class
 
 [1.10.4] - 2024-10-15
 -------------------------

diff --git a/grid2op/Action/baseAction.py b/grid2op/Action/baseAction.py
@@ -1859,9 +1859,7 @@ def __call__(self) -> Tuple[dict, np.ndarray, np.ndarray, np.ndarray, np.ndarray
         )
 
     def _digest_shunt(self, dict_):
-        if not type(self).shunts_data_available:
-            return
-
+        cls = type(self)
         if "shunt" in dict_:
             ddict_ = dict_["shunt"]
 
@@ -1884,7 +1882,6 @@ def _digest_shunt(self, dict_):
                         vect_self[:] = tmp
                     elif isinstance(tmp, list):
                         # expected a list: (id shunt, new bus)
-                        cls = type(self)
                         for (sh_id, new_bus) in tmp:
                             if sh_id < 0:
                                 raise AmbiguousAction(
@@ -2380,18 +2377,36 @@ def update(self,
 
         """
         self._reset_vect()
-
+        cls = type(self)
+
         if dict_ is not None:
             for kk in dict_.keys():
-                if kk not in self.authorized_keys:
+                if kk not in cls.authorized_keys:
+                    if kk == "shunt" and not cls.shunts_data_available:
+                        # no warnings are raised in this case because if a warning
+                        # were raised it could crash some environment
+                        # with shunt in "init_state.json" with a backend that does not
+                        # handle shunt
+                        continue
+                    if kk == "set_storage" and cls.n_storage == 0:
+                        # no warnings are raised in this case because if a warning
+                        # were raised it could crash some environment
+                        # with storage in "init_state.json" but if the backend did not
+                        # handle storage units
+                        continue
                     warn = 'The key "{}" used to update an action will be ignored. Valid keys are {}'
-                    warn = warn.format(kk, self.authorized_keys)
+                    warn = warn.format(kk, cls.authorized_keys)
                     warnings.warn(warn)
 
-            self._digest_shunt(dict_)
+            if cls.shunts_data_available:
+                # do not digest shunt when backend does not support it
+                self._digest_shunt(dict_)
             self._digest_injection(dict_)
             self._digest_redispatching(dict_)
-            self._digest_storage(dict_)  # ADDED for battery
+            if cls.n_storage > 0:
+                # do not digest storage when backend does not
+                # support it
+                self._digest_storage(dict_)  # ADDED for battery
             self._digest_curtailment(dict_)  # ADDED for curtailment
             self._digest_setbus(dict_)
             self._digest_change_bus(dict_)

diff --git a/grid2op/Agent/baseAgent.py b/grid2op/Agent/baseAgent.py
@@ -32,7 +32,7 @@ class BaseAgent(RandomObject, ABC):
 
     def __init__(self, action_space: ActionSpace):
         RandomObject.__init__(self)
-        self.action_space = copy.deepcopy(action_space)
+        self.action_space : ActionSpace = copy.deepcopy(action_space)
 
     def reset(self, obs: BaseObservation):
         """

diff --git a/grid2op/Agent/greedyAgent.py b/grid2op/Agent/greedyAgent.py
@@ -7,10 +7,14 @@
 # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
 
 from abc import abstractmethod
+from typing import List
 import numpy as np
-from grid2op.Agent.baseAgent import BaseAgent
+from grid2op.Action import BaseAction, ActionSpace
+from grid2op.Observation import BaseObservation
 from grid2op.dtypes import dt_float
 
+from grid2op.Agent.baseAgent import BaseAgent
+
 
 class GreedyAgent(BaseAgent):
     """
@@ -23,12 +27,13 @@ class GreedyAgent(BaseAgent):
     override this class. Examples are provided with :class:`PowerLineSwitch` and :class:`TopologyGreedy`.
     """
 
-    def __init__(self, action_space):
+    def __init__(self, action_space: ActionSpace, simulated_time_step : int =1):
         BaseAgent.__init__(self, action_space)
         self.tested_action = None
         self.resulting_rewards = None
+        self.simulated_time_step = int(simulated_time_step)
 
-    def act(self, observation, reward, done=False):
+    def act(self, observation: BaseObservation, reward: float, done : bool=False) -> BaseAction:
         """
         By definition, all "greedy" agents are acting the same way. The only thing that can differentiate multiple
         agents is the actions that are tested.
@@ -64,7 +69,7 @@ def act(self, observation, reward, done=False):
                     simul_reward,
                     simul_has_error,
                     simul_info,
-                ) = observation.simulate(action)
+                ) = observation.simulate(action, time_step=self.simulated_time_step)
                 self.resulting_rewards[i] = simul_reward
             reward_idx = int(
                 np.argmax(self.resulting_rewards)
@@ -75,7 +80,7 @@ def act(self, observation, reward, done=False):
         return best_action
 
     @abstractmethod
-    def _get_tested_action(self, observation):
+    def _get_tested_action(self, observation: BaseObservation) -> List[BaseAction]:
         """
         Returns the list of all the candidate actions.
 

diff --git a/grid2op/Agent/powerlineSwitch.py b/grid2op/Agent/powerlineSwitch.py
@@ -6,9 +6,13 @@
 # SPDX-License-Identifier: MPL-2.0
 # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
 
+from typing import List
 import numpy as np
 
 from grid2op.dtypes import dt_bool
+from grid2op.Observation import BaseObservation
+from grid2op.Action import BaseAction, ActionSpace
+
 from grid2op.Agent.greedyAgent import GreedyAgent
 
 
@@ -27,20 +31,14 @@ class PowerLineSwitch(GreedyAgent):
 
     """
 
-    def __init__(self, action_space):
-        GreedyAgent.__init__(self, action_space)
+    def __init__(self, action_space: ActionSpace, simulated_time_step : int =1):
+        GreedyAgent.__init__(self, action_space, simulated_time_step=simulated_time_step)
 
-    def _get_tested_action(self, observation):
+    def _get_tested_action(self, observation: BaseObservation) -> List[BaseAction]:
         res = [self.action_space({})]  # add the do nothing
         for i in range(self.action_space.n_line):
             tmp = np.full(self.action_space.n_line, fill_value=False, dtype=dt_bool)
             tmp[i] = True
             action = self.action_space({"change_line_status": tmp})
-            if not observation.line_status[i]:
-                # so the action consisted in reconnecting the powerline
-                # i need to say on which bus (always on bus 1 for this type of agent)
-                action = action.update(
-                    {"set_bus": {"lines_or_id": [(i, 1)], "lines_ex_id": [(i, 1)]}}
-                )
             res.append(action)
         return res
diff --git a/grid2op/Agent/recoPowerlineAgent.py b/grid2op/Agent/recoPowerlineAgent.py
@@ -5,7 +5,11 @@
 # you can obtain one at http://mozilla.org/MPL/2.0/.
 # SPDX-License-Identifier: MPL-2.0
 # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
-import numpy as np
+
+from typing import List
+from grid2op.Observation import BaseObservation
+from grid2op.Action import BaseAction, ActionSpace
+
 from grid2op.Agent.greedyAgent import GreedyAgent
 
 
@@ -17,10 +21,10 @@ class RecoPowerlineAgent(GreedyAgent):
 
     """
 
-    def __init__(self, action_space):
-        GreedyAgent.__init__(self, action_space)
+    def __init__(self, action_space: ActionSpace, simulated_time_step : int =1):
+        GreedyAgent.__init__(self, action_space, simulated_time_step=simulated_time_step)
 
-    def _get_tested_action(self, observation):
+    def _get_tested_action(self, observation: BaseObservation) -> List[BaseAction]:
         res = [self.action_space({})]  # add the do nothing
         line_stat_s = observation.line_status
         cooldown = observation.time_before_cooldown_line

diff --git a/grid2op/Agent/topologyGreedy.py b/grid2op/Agent/topologyGreedy.py
@@ -6,6 +6,9 @@
 # SPDX-License-Identifier: MPL-2.0
 # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
 
+from typing import List
+from grid2op.Observation import BaseObservation
+from grid2op.Action import BaseAction, ActionSpace
 from grid2op.Agent.greedyAgent import GreedyAgent
 
 
@@ -22,11 +25,11 @@ class TopologyGreedy(GreedyAgent):
 
     """
 
-    def __init__(self, action_space):
-        GreedyAgent.__init__(self, action_space)
-        self.tested_action = None
+    def __init__(self, action_space: ActionSpace, simulated_time_step : int =1):
+        GreedyAgent.__init__(self, action_space, simulated_time_step=simulated_time_step)
+        self.tested_action : List[BaseAction]= None
 
-    def _get_tested_action(self, observation):
+    def _get_tested_action(self, observation: BaseObservation) -> List[BaseAction]:
         if self.tested_action is None:
             res = [self.action_space({})]  # add the do nothing
             # better use "get_all_unitary_topologies_set" and not "get_all_unitary_topologies_change"

diff --git a/grid2op/Chronics/fromMultiEpisodeData.py b/grid2op/Chronics/fromMultiEpisodeData.py
@@ -7,16 +7,9 @@
 # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
 
 from datetime import datetime, timedelta
-import os
-import numpy as np
-import copy
-import warnings
 from typing import Optional, Union, List, Dict, Literal
-from pathlib import Path
 
-from grid2op.Exceptions import (
-    ChronicsError, ChronicsNotFoundError
-)
+from grid2op.Exceptions import ChronicsError
 
 from grid2op.Chronics.gridValue import GridValue
 
@@ -40,6 +33,17 @@ class FromMultiEpisodeData(GridValue):
         - to make sure you are running the exact same episode, you need to create the environment
           with the :class:`grid2op.Opponent.FromEpisodeDataOpponent` opponent
 
+    .. versionchanged:: 1.11.0
+        Before versin 1.11.0 this class would load all the data in memory at the creation of the environment,
+        which could take lots of time and memory but once done a call to `env.reset` would be really fast.
+        
+        From grid2op >= 1.11.0 a kwargs `caching` has been added (default value is ``FALSE``) which
+        does not load everything in memory which makes it more memory efficient and (maybe) more time saving
+        (if some data happened to be loaded but never used). The default behaviour has then
+        changed.
+        
+        You can still benefit from previous behaviour by loading with `caching=True`
+        
     Examples
     ---------
     You can use this class this way:
@@ -110,21 +114,38 @@ def __init__(self,
                  max_iter=-1,
                  start_datetime=datetime(year=2019, month=1, day=1),
                  chunk_size=None,
-                 list_perfect_forecasts=None,  # TODO
+                 list_perfect_forecasts=None,
+                 caching : bool=False,
                  **kwargs,  # unused
                  ):
         super().__init__(time_interval, max_iter, start_datetime, chunk_size)
-        self.li_ep_data = [FromOneEpisodeData(path,
-                                              ep_data=el,
-                                              time_interval=time_interval,
-                                              max_iter=max_iter,
-                                              chunk_size=chunk_size,
-                                              list_perfect_forecasts=list_perfect_forecasts,
-                                              start_datetime=start_datetime)
-                           for el in li_ep_data
-                           ]
+        self._caching : bool = bool(caching)
+        self._path = path
+        self._chunk_size = chunk_size
+        self._list_perfect_forecasts = list_perfect_forecasts
+        self._input_li_ep_data = li_ep_data
+        if self._caching:
+            self.li_ep_data = [FromOneEpisodeData(path,
+                                                  ep_data=el,
+                                                  time_interval=time_interval,
+                                                  max_iter=max_iter,
+                                                  chunk_size=chunk_size,
+                                                  list_perfect_forecasts=list_perfect_forecasts,
+                                                  start_datetime=start_datetime)
+                               for el in li_ep_data
+                              ]
+        else:
+            self.li_ep_data = [None for _ in li_ep_data]
         self._prev_cache_id = len(self.li_ep_data) - 1
         self.data = self.li_ep_data[self._prev_cache_id]
+        if self.data is None:
+            self.data = FromOneEpisodeData(self._path,
+                                           ep_data=self._input_li_ep_data[self._prev_cache_id],
+                                           time_interval=self.time_interval,
+                                           max_iter=self.max_iter,
+                                           chunk_size=self._chunk_size,
+                                           list_perfect_forecasts=self._list_perfect_forecasts,
+                                           start_datetime=self.start_datetime)
         self._episode_data = self.data._episode_data  # used by the fromEpisodeDataOpponent
 
     def next_chronics(self):
@@ -144,6 +165,15 @@ def initialize(
     ):
 
         self.data = self.li_ep_data[self._prev_cache_id]
+        if self.data is None:
+            # data was not in cache:
+            self.data = FromOneEpisodeData(self._path,
+                                           ep_data=self._input_li_ep_data[self._prev_cache_id],
+                                           time_interval=self.time_interval,
+                                           max_iter=self.max_iter,
+                                           chunk_size=self._chunk_size,
+                                           list_perfect_forecasts=self._list_perfect_forecasts,
+                                           start_datetime=self.start_datetime)
         self.data.initialize(
             order_backend_loads,
             order_backend_prods,
@@ -168,12 +198,19 @@ def check_validity(self, backend):
     def forecasts(self):
         return self.data.forecasts()
 
-    def tell_id(self, id_num, previous=False):
-        id_num = int(id_num)
-        if not isinstance(id_num, (int, dt_int)):
+    def tell_id(self, id_num: str, previous=False):
+        try:
+            id_num = int(id_num)
+            path_ = None
+        except ValueError:
+            path_, id_num = id_num.split("@")
+            id_num = int(id_num)
+
+        if path_ is not None and path_ != self._path:
             raise ChronicsError("FromMultiEpisodeData can only be used with `tell_id` being an integer "
-                                "at the moment. Feel free to write a feature request if you want more.")
-
+                                "or if tell_id has the same path as the original file. "
+                                "Feel free to write a feature request if you want more.")
+
         self._prev_cache_id = id_num
         self._prev_cache_id %= len(self.li_ep_data)
 
@@ -182,7 +219,7 @@ def tell_id(self, id_num, previous=False):
             self._prev_cache_id %= len(self.li_ep_data)
 
     def get_id(self) -> str:
-        return f'{self._prev_cache_id }'
+        return f'{self._path}@{self._prev_cache_id}'
 
     def max_timestep(self):
         return self.data.max_timestep()

diff --git a/grid2op/Chronics/fromOneEpisodeData.py b/grid2op/Chronics/fromOneEpisodeData.py
@@ -177,20 +177,19 @@ def __init__(
         if self.path is not None:
             # logger: this has no impact
             pass
-
         if isinstance(ep_data, EpisodeData):
             self._episode_data = ep_data
         elif isinstance(ep_data, (str, Path)):
             try:
-                self._episode_data = EpisodeData.from_disk(*os.path.split(ep_data))
+                self._episode_data = EpisodeData.from_disk(*os.path.split(ep_data), _only_act_obs=True)
             except Exception as exc_:
                 raise ChronicsError("Impossible to build the FromOneEpisodeData with the `ep_data` provided.") from exc_
         elif isinstance(ep_data, (tuple, list)):
             if len(ep_data) != 2:
                 raise ChronicsError("When you provide a tuple, or a list, FromOneEpisodeData can only be used if this list has length 2. "
                                     f"Length {len(ep_data)} found.")
             try:
-                self._episode_data = EpisodeData.from_disk(*ep_data)
+                self._episode_data = EpisodeData.from_disk(*ep_data, _only_act_obs=True)
             except Exception as exc_:
                 raise ChronicsError("Impossible to build the FromOneEpisodeData with the `ep_data` provided.") from exc_
         else: