Skip to content

Commit

Permalink
some improvment for base agents
Browse files Browse the repository at this point in the history
Signed-off-by: DONNOT Benjamin <[email protected]>
  • Loading branch information
BDonnot committed Nov 8, 2024
1 parent 4e858dc commit 1a8c605
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 23 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ Native multi agents support:
- [IMRPOVED] the `FromMultiEpisodeData` class with the addition of the `caching`
kwargs to allow / disable caching (which was default behavior in previous version)
- [IMPROVED] the `FromMultiEpisodeData` class that now returns also the path of the data
- [IMPROVED] the classes inherited from `GreedyAgent` with the added possibility to
do the `obs.simulate` on a different time horizon (kwarg `simulated_time_step`)
- [IMPROVED] some type hints for some agent class

[1.10.4] - 2024-10-15
-------------------------
Expand Down
2 changes: 1 addition & 1 deletion grid2op/Agent/baseAgent.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class BaseAgent(RandomObject, ABC):

def __init__(self, action_space: ActionSpace):
RandomObject.__init__(self)
self.action_space = copy.deepcopy(action_space)
self.action_space : ActionSpace = copy.deepcopy(action_space)

def reset(self, obs: BaseObservation):
"""
Expand Down
15 changes: 10 additions & 5 deletions grid2op/Agent/greedyAgent.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.

from abc import abstractmethod
from typing import List
import numpy as np
from grid2op.Agent.baseAgent import BaseAgent
from grid2op.Action import BaseAction, ActionSpace
from grid2op.Observation import BaseObservation
from grid2op.dtypes import dt_float

from grid2op.Agent.baseAgent import BaseAgent


class GreedyAgent(BaseAgent):
"""
Expand All @@ -23,12 +27,13 @@ class GreedyAgent(BaseAgent):
override this class. Examples are provided with :class:`PowerLineSwitch` and :class:`TopologyGreedy`.
"""

def __init__(self, action_space):
def __init__(self, action_space: ActionSpace, simulated_time_step : int =1):
BaseAgent.__init__(self, action_space)
self.tested_action = None
self.resulting_rewards = None
self.simulated_time_step = int(simulated_time_step)

def act(self, observation, reward, done=False):
def act(self, observation: BaseObservation, reward: float, done : bool=False) -> BaseAction:
"""
By definition, all "greedy" agents are acting the same way. The only thing that can differentiate multiple
agents is the actions that are tested.
Expand Down Expand Up @@ -64,7 +69,7 @@ def act(self, observation, reward, done=False):
simul_reward,
simul_has_error,
simul_info,
) = observation.simulate(action)
) = observation.simulate(action, time_step=self.simulated_time_step)
self.resulting_rewards[i] = simul_reward
reward_idx = int(
np.argmax(self.resulting_rewards)
Expand All @@ -75,7 +80,7 @@ def act(self, observation, reward, done=False):
return best_action

@abstractmethod
def _get_tested_action(self, observation):
def _get_tested_action(self, observation: BaseObservation) -> List[BaseAction]:
"""
Returns the list of all the candidate actions.
Expand Down
16 changes: 7 additions & 9 deletions grid2op/Agent/powerlineSwitch.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.

from typing import List
import numpy as np

from grid2op.dtypes import dt_bool
from grid2op.Observation import BaseObservation
from grid2op.Action import BaseAction, ActionSpace

from grid2op.Agent.greedyAgent import GreedyAgent


Expand All @@ -27,20 +31,14 @@ class PowerLineSwitch(GreedyAgent):
"""

def __init__(self, action_space):
GreedyAgent.__init__(self, action_space)
def __init__(self, action_space: ActionSpace, simulated_time_step : int =1):
GreedyAgent.__init__(self, action_space, simulated_time_step=simulated_time_step)

def _get_tested_action(self, observation):
def _get_tested_action(self, observation: BaseObservation) -> List[BaseAction]:
res = [self.action_space({})] # add the do nothing
for i in range(self.action_space.n_line):
tmp = np.full(self.action_space.n_line, fill_value=False, dtype=dt_bool)
tmp[i] = True
action = self.action_space({"change_line_status": tmp})
if not observation.line_status[i]:
# so the action consisted in reconnecting the powerline
# i need to say on which bus (always on bus 1 for this type of agent)
action = action.update(
{"set_bus": {"lines_or_id": [(i, 1)], "lines_ex_id": [(i, 1)]}}
)
res.append(action)
return res
12 changes: 8 additions & 4 deletions grid2op/Agent/recoPowerlineAgent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
import numpy as np

from typing import List
from grid2op.Observation import BaseObservation
from grid2op.Action import BaseAction, ActionSpace

from grid2op.Agent.greedyAgent import GreedyAgent


Expand All @@ -17,10 +21,10 @@ class RecoPowerlineAgent(GreedyAgent):
"""

def __init__(self, action_space):
GreedyAgent.__init__(self, action_space)
def __init__(self, action_space: ActionSpace, simulated_time_step : int =1):
GreedyAgent.__init__(self, action_space, simulated_time_step=simulated_time_step)

def _get_tested_action(self, observation):
def _get_tested_action(self, observation: BaseObservation) -> List[BaseAction]:
res = [self.action_space({})] # add the do nothing
line_stat_s = observation.line_status
cooldown = observation.time_before_cooldown_line
Expand Down
11 changes: 7 additions & 4 deletions grid2op/Agent/topologyGreedy.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.

from typing import List
from grid2op.Observation import BaseObservation
from grid2op.Action import BaseAction, ActionSpace
from grid2op.Agent.greedyAgent import GreedyAgent


Expand All @@ -22,11 +25,11 @@ class TopologyGreedy(GreedyAgent):
"""

def __init__(self, action_space):
GreedyAgent.__init__(self, action_space)
self.tested_action = None
def __init__(self, action_space: ActionSpace, simulated_time_step : int =1):
GreedyAgent.__init__(self, action_space, simulated_time_step=simulated_time_step)
self.tested_action : List[BaseAction]= None

def _get_tested_action(self, observation):
def _get_tested_action(self, observation: BaseObservation) -> List[BaseAction]:
if self.tested_action is None:
res = [self.action_space({})] # add the do nothing
# better use "get_all_unitary_topologies_set" and not "get_all_unitary_topologies_change"
Expand Down
25 changes: 25 additions & 0 deletions grid2op/tests/test_Agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,20 @@ def test_1_powerlineswitch(self):
np.abs(cum_reward - expected_reward) <= self.tol_one
), f"The reward has not been properly computed {cum_reward} instead of {expected_reward}"

def test_1_powerlineswitch2(self):
agent = PowerLineSwitch(self.env.action_space, simulated_time_step=0)
with warnings.catch_warnings():
warnings.filterwarnings("error")
i, cum_reward, all_acts = self._aux_test_agent(agent, i_max=5)
assert (
i == 6
), "The powerflow diverged before step 6 for powerline switch agent"
# switch to using df_float in the reward, change then the results
expected_reward = dt_float(541.0180053710938)
assert (
np.abs(cum_reward - expected_reward) <= self.tol_one
), f"The reward has not been properly computed {cum_reward} instead of {expected_reward}"

def test_2_busswitch(self):
agent = TopologyGreedy(self.env.action_space)
with warnings.catch_warnings():
Expand All @@ -148,6 +162,17 @@ def test_2_busswitch(self):
assert (
np.abs(cum_reward - expected_reward) <= self.tol_one
), f"The reward has not been properly computed {cum_reward} instead of {expected_reward}"

def test_2_busswitch2(self):
agent = TopologyGreedy(self.env.action_space, simulated_time_step=0)
with warnings.catch_warnings():
warnings.filterwarnings("error")
i, cum_reward, all_acts = self._aux_test_agent(agent, i_max=5)
assert i == 6, "The powerflow diverged before step 6 for greedy agent"
expected_reward = dt_float(541.0657348632812)
assert (
np.abs(cum_reward - expected_reward) <= self.tol_one
), f"The reward has not been properly computed {cum_reward} instead of {expected_reward}"


class TestMake2Agents(HelperTests, unittest.TestCase):
Expand Down

0 comments on commit 1a8c605

Please sign in to comment.