some improvment for base agents

Signed-off-by: DONNOT Benjamin <[email protected]>
BDonnot · Nov 8, 2024 · 1a8c605 · 1a8c605
1 parent 4e858dc
commit 1a8c605
Show file tree

Hide file tree

Showing 7 changed files with 61 additions and 23 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -118,6 +118,9 @@ Native multi agents support:
 - [IMRPOVED] the `FromMultiEpisodeData` class with the addition of the `caching` 
   kwargs to allow / disable caching (which was default behavior in previous version) 
 - [IMPROVED] the `FromMultiEpisodeData` class that now returns also the path of the data
+- [IMPROVED] the classes inherited from `GreedyAgent` with the added possibility to 
+  do the `obs.simulate` on a different time horizon (kwarg `simulated_time_step`)
+- [IMPROVED] some type hints for some agent class
 
 [1.10.4] - 2024-10-15
 -------------------------

diff --git a/grid2op/Agent/baseAgent.py b/grid2op/Agent/baseAgent.py
@@ -32,7 +32,7 @@ class BaseAgent(RandomObject, ABC):
 
     def __init__(self, action_space: ActionSpace):
         RandomObject.__init__(self)
-        self.action_space = copy.deepcopy(action_space)
+        self.action_space : ActionSpace = copy.deepcopy(action_space)
 
     def reset(self, obs: BaseObservation):
         """

diff --git a/grid2op/Agent/greedyAgent.py b/grid2op/Agent/greedyAgent.py
@@ -7,10 +7,14 @@
 # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
 
 from abc import abstractmethod
+from typing import List
 import numpy as np
-from grid2op.Agent.baseAgent import BaseAgent
+from grid2op.Action import BaseAction, ActionSpace
+from grid2op.Observation import BaseObservation
 from grid2op.dtypes import dt_float
 
+from grid2op.Agent.baseAgent import BaseAgent
+
 
 class GreedyAgent(BaseAgent):
     """
@@ -23,12 +27,13 @@ class GreedyAgent(BaseAgent):
     override this class. Examples are provided with :class:`PowerLineSwitch` and :class:`TopologyGreedy`.
     """
 
-    def __init__(self, action_space):
+    def __init__(self, action_space: ActionSpace, simulated_time_step : int =1):
         BaseAgent.__init__(self, action_space)
         self.tested_action = None
         self.resulting_rewards = None
+        self.simulated_time_step = int(simulated_time_step)
 
-    def act(self, observation, reward, done=False):
+    def act(self, observation: BaseObservation, reward: float, done : bool=False) -> BaseAction:
         """
         By definition, all "greedy" agents are acting the same way. The only thing that can differentiate multiple
         agents is the actions that are tested.
@@ -64,7 +69,7 @@ def act(self, observation, reward, done=False):
                     simul_reward,
                     simul_has_error,
                     simul_info,
-                ) = observation.simulate(action)
+                ) = observation.simulate(action, time_step=self.simulated_time_step)
                 self.resulting_rewards[i] = simul_reward
             reward_idx = int(
                 np.argmax(self.resulting_rewards)
@@ -75,7 +80,7 @@ def act(self, observation, reward, done=False):
         return best_action
 
     @abstractmethod
-    def _get_tested_action(self, observation):
+    def _get_tested_action(self, observation: BaseObservation) -> List[BaseAction]:
         """
         Returns the list of all the candidate actions.
 

diff --git a/grid2op/Agent/powerlineSwitch.py b/grid2op/Agent/powerlineSwitch.py
@@ -6,9 +6,13 @@
 # SPDX-License-Identifier: MPL-2.0
 # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
 
+from typing import List
 import numpy as np
 
 from grid2op.dtypes import dt_bool
+from grid2op.Observation import BaseObservation
+from grid2op.Action import BaseAction, ActionSpace
+
 from grid2op.Agent.greedyAgent import GreedyAgent
 
 
@@ -27,20 +31,14 @@ class PowerLineSwitch(GreedyAgent):
 
     """
 
-    def __init__(self, action_space):
-        GreedyAgent.__init__(self, action_space)
+    def __init__(self, action_space: ActionSpace, simulated_time_step : int =1):
+        GreedyAgent.__init__(self, action_space, simulated_time_step=simulated_time_step)
 
-    def _get_tested_action(self, observation):
+    def _get_tested_action(self, observation: BaseObservation) -> List[BaseAction]:
         res = [self.action_space({})]  # add the do nothing
         for i in range(self.action_space.n_line):
             tmp = np.full(self.action_space.n_line, fill_value=False, dtype=dt_bool)
             tmp[i] = True
             action = self.action_space({"change_line_status": tmp})
-            if not observation.line_status[i]:
-                # so the action consisted in reconnecting the powerline
-                # i need to say on which bus (always on bus 1 for this type of agent)
-                action = action.update(
-                    {"set_bus": {"lines_or_id": [(i, 1)], "lines_ex_id": [(i, 1)]}}
-                )
             res.append(action)
         return res
diff --git a/grid2op/Agent/recoPowerlineAgent.py b/grid2op/Agent/recoPowerlineAgent.py
@@ -5,7 +5,11 @@
 # you can obtain one at http://mozilla.org/MPL/2.0/.
 # SPDX-License-Identifier: MPL-2.0
 # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
-import numpy as np
+
+from typing import List
+from grid2op.Observation import BaseObservation
+from grid2op.Action import BaseAction, ActionSpace
+
 from grid2op.Agent.greedyAgent import GreedyAgent
 
 
@@ -17,10 +21,10 @@ class RecoPowerlineAgent(GreedyAgent):
 
     """
 
-    def __init__(self, action_space):
-        GreedyAgent.__init__(self, action_space)
+    def __init__(self, action_space: ActionSpace, simulated_time_step : int =1):
+        GreedyAgent.__init__(self, action_space, simulated_time_step=simulated_time_step)
 
-    def _get_tested_action(self, observation):
+    def _get_tested_action(self, observation: BaseObservation) -> List[BaseAction]:
         res = [self.action_space({})]  # add the do nothing
         line_stat_s = observation.line_status
         cooldown = observation.time_before_cooldown_line

diff --git a/grid2op/Agent/topologyGreedy.py b/grid2op/Agent/topologyGreedy.py
@@ -6,6 +6,9 @@
 # SPDX-License-Identifier: MPL-2.0
 # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
 
+from typing import List
+from grid2op.Observation import BaseObservation
+from grid2op.Action import BaseAction, ActionSpace
 from grid2op.Agent.greedyAgent import GreedyAgent
 
 
@@ -22,11 +25,11 @@ class TopologyGreedy(GreedyAgent):
 
     """
 
-    def __init__(self, action_space):
-        GreedyAgent.__init__(self, action_space)
-        self.tested_action = None
+    def __init__(self, action_space: ActionSpace, simulated_time_step : int =1):
+        GreedyAgent.__init__(self, action_space, simulated_time_step=simulated_time_step)
+        self.tested_action : List[BaseAction]= None
 
-    def _get_tested_action(self, observation):
+    def _get_tested_action(self, observation: BaseObservation) -> List[BaseAction]:
         if self.tested_action is None:
             res = [self.action_space({})]  # add the do nothing
             # better use "get_all_unitary_topologies_set" and not "get_all_unitary_topologies_change"

diff --git a/grid2op/tests/test_Agent.py b/grid2op/tests/test_Agent.py
@@ -131,6 +131,20 @@ def test_1_powerlineswitch(self):
             np.abs(cum_reward - expected_reward) <= self.tol_one
         ), f"The reward has not been properly computed {cum_reward} instead of {expected_reward}"
 
+    def test_1_powerlineswitch2(self):
+        agent = PowerLineSwitch(self.env.action_space, simulated_time_step=0)
+        with warnings.catch_warnings():
+            warnings.filterwarnings("error")
+            i, cum_reward, all_acts = self._aux_test_agent(agent, i_max=5)
+        assert (
+            i == 6
+        ), "The powerflow diverged before step 6 for powerline switch agent"
+        # switch to using df_float in the reward, change then the results
+        expected_reward = dt_float(541.0180053710938)
+        assert (
+            np.abs(cum_reward - expected_reward) <= self.tol_one
+        ), f"The reward has not been properly computed {cum_reward} instead of {expected_reward}"
+
     def test_2_busswitch(self):
         agent = TopologyGreedy(self.env.action_space)
         with warnings.catch_warnings():
@@ -148,6 +162,17 @@ def test_2_busswitch(self):
         assert (
             np.abs(cum_reward - expected_reward) <= self.tol_one
         ), f"The reward has not been properly computed {cum_reward} instead of {expected_reward}"
+
+    def test_2_busswitch2(self):
+        agent = TopologyGreedy(self.env.action_space, simulated_time_step=0)
+        with warnings.catch_warnings():
+            warnings.filterwarnings("error")
+            i, cum_reward, all_acts = self._aux_test_agent(agent, i_max=5)
+        assert i == 6, "The powerflow diverged before step 6 for greedy agent"
+        expected_reward = dt_float(541.0657348632812)  
+        assert (
+            np.abs(cum_reward - expected_reward) <= self.tol_one
+        ), f"The reward has not been properly computed {cum_reward} instead of {expected_reward}"
 
 
 class TestMake2Agents(HelperTests, unittest.TestCase):