diff --git a/examples/multi_agents/ray_example3.py b/examples/multi_agents/ray_example3.py new file mode 100644 index 00000000..91815a2f --- /dev/null +++ b/examples/multi_agents/ray_example3.py @@ -0,0 +1,280 @@ +# Copyright (c) 2019-2022, RTE (https://www.rte-france.com) +# See AUTHORS.txt +# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0. +# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file, +# you can obtain one at http://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems. + +"""example with local observation and local actions""" + +import warnings +import numpy as np +import copy + +from gym.spaces import Discrete, Box + +from ray.rllib.env.multi_agent_env import MultiAgentEnv as MAEnv +from ray.rllib.policy.policy import PolicySpec, Policy + +import grid2op +from grid2op.Action import PlayableAction +from grid2op.multi_agent.multiAgentEnv import MultiAgentEnv +from grid2op.gym_compat import GymEnv, BoxGymObsSpace, DiscreteActSpace + +from lightsim2grid import LightSimBackend +from grid2op.gym_compat.utils import ALL_ATTR_FOR_DISCRETE +from grid2op.multi_agent import ClusterUtils + +ENV_NAME = "l2rpn_case14_sandbox" +DO_NOTHING_EPISODES = -1 # 200 + +env_for_cls = grid2op.make(ENV_NAME, + action_class=PlayableAction, + backend=LightSimBackend()) + + +# Get ACTION_DOMAINS by clustering the substations +ACTION_DOMAINS = ClusterUtils.cluster_substations(env_for_cls) + +# Get OBSERVATION_DOMAINS by clustering the substations +OBSERVATION_DOMAINS = ClusterUtils.cluster_substations(env_for_cls) + +# wrapper for gym env +class MAEnvWrapper(MAEnv): + def __init__(self, env_config=None): + super().__init__() + if env_config is None: + env_config = {} + + env = grid2op.make(ENV_NAME, + action_class=PlayableAction, + backend=LightSimBackend()) + + action_domains = copy.deepcopy(ACTION_DOMAINS) + if "action_domains" in env_config: + action_domains = env_config["action_domains"] + observation_domains = copy.deepcopy(OBSERVATION_DOMAINS) + if "observation_domains" in env_config: + observation_domains = env_config["observation_domains"] + self.ma_env = MultiAgentEnv(env, + action_domains, + observation_domains) + + self._agent_ids = set(self.ma_env.agents) + self.ma_env.seed(0) + self._agent_ids = self.ma_env.agents + + # see the grid2op doc on how to customize the observation space + # with the grid2op / gym interface. + self._gym_env = GymEnv(env) + self._gym_env.observation_space.close() + obs_attr_to_keep = ["gen_p", "rho"] + if "obs_attr_to_keep" in env_config: + obs_attr_to_keep = copy.deepcopy(env_config["obs_attr_to_keep"]) + self._gym_env.observation_space = BoxGymObsSpace(env.observation_space, + attr_to_keep=obs_attr_to_keep, + replace_nan_by_0=True # replace Nan by 0. + ) + + # we did not experiment yet with the "partially observable" setting + # so for now we suppose all agents see the same observation + # which is the full grid + self._aux_observation_space = { + agent_id : BoxGymObsSpace(self.ma_env.observation_spaces[agent_id], + attr_to_keep=obs_attr_to_keep, + replace_nan_by_0=True # replace Nan by 0. + ) + for agent_id in self.ma_env.agents + } + # to avoid "weird" pickle issues + self.observation_space = { + agent_id : Box(low=self._aux_observation_space[agent_id].low, + high=self._aux_observation_space[agent_id].high, + dtype=self._aux_observation_space[agent_id].dtype) + for agent_id in self.ma_env.agents + } + + # we represent the action as discrete action for now. + # It should work to encode then differently using the + # gym_compat module for example + act_attr_to_keep = ALL_ATTR_FOR_DISCRETE + if "act_attr_to_keep" in env_config: + act_attr_to_keep = copy.deepcopy(env_config["act_attr_to_keep"]) + + self._conv_action_space = { + agent_id : DiscreteActSpace(self.ma_env.action_spaces[agent_id], attr_to_keep=act_attr_to_keep) + for agent_id in self.ma_env.agents + } + + # to avoid "weird" pickle issues + self.action_space = { + agent_id : Discrete(n=self.ma_env.action_spaces[agent_id].n) + for agent_id in self.ma_env.agents + } + + def reset(self, *, seed=None, options=None): + if seed is not None: + self.seed(seed) + + # reset the underlying multi agent environment + obs = self.ma_env.reset() + + return self._format_obs(obs), {} + + def seed(self, seed): + return self.ma_env.seed(seed) + + + def _format_obs(self, grid2op_obs): + # grid2op_obs is a dictionnary, representing a "multi agent grid2op action" + + # convert the observation to a gym one + + # return the proper dictionnary + return { + agent_id : self._aux_observation_space[agent_id].to_gym(grid2op_obs[agent_id]) + for agent_id in self.ma_env.agents + } + + def step(self, actions): + # convert the action to grid2op + if actions: + grid2op_act = { + agent_id : self._conv_action_space[agent_id].from_gym(actions[agent_id]) + for agent_id in self.ma_env.agents + } + else: + grid2op_act = { + agent_id : self._conv_action_space[agent_id].from_gym(0) + for agent_id in self.ma_env.agents + } + + # just to retrieve the first agent id... + first_agent_id = next(iter(self.ma_env.agents)) + + # do a step in the underlying multi agent environment + obs, r, done, info = self.ma_env.step(grid2op_act) + + # all agents have the same flag "done" + done['__all__'] = done[first_agent_id] + + # now retrieve the observation in the proper form + gym_obs = self._format_obs(obs) + + # ignored for now + info = {} + truncateds = {k: False for k in self.ma_env.agents} + truncateds['__all__'] = truncateds[first_agent_id] + return gym_obs, r, done, truncateds, info + + +def policy_mapping_fn(agent_id, episode, worker, **kwargs): + return agent_id + + +if __name__ == "__main__": + import ray + # from ray.rllib.agents.ppo import ppo + from ray.rllib.algorithms.ppo import PPO, PPOConfig + import json + import os + import shutil + + ray_ma_env = MAEnvWrapper() + + checkpoint_root = "./ma_ppo_test_2ndsetting" + + # Where checkpoints are written: + shutil.rmtree(checkpoint_root, ignore_errors=True, onerror=None) + + # Where some data will be written and used by Tensorboard below: + ray_results = f'{os.getenv("HOME")}/ray_results/' + shutil.rmtree(ray_results, ignore_errors=True, onerror=None) + + info = ray.init(ignore_reinit_error=True) + print("Dashboard URL: http://{}".format(info.address_info["webui_url"])) + + # #Configs (see ray's doc for more information) + SELECT_ENV = MAEnvWrapper # Specifies the OpenAI Gym environment for Cart Pole + N_ITER = 1000 # Number of training runs. + + # config = ppo.DEFAULT_CONFIG.copy() # PPO's default configuration. See the next code cell. + # config["log_level"] = "WARN" # Suppress too many messages, but try "INFO" to see what can be printed. + + # # Other settings we might adjust: + # config["num_workers"] = 1 # Use > 1 for using more CPU cores, including over a cluster + # config["num_sgd_iter"] = 10 # Number of SGD (stochastic gradient descent) iterations per training minibatch. + # # I.e., for each minibatch of data, do this many passes over it to train. + # config["sgd_minibatch_size"] = 64 # The amount of data records per minibatch + # config["model"]["fcnet_hiddens"] = [100, 50] # + # config["num_cpus_per_worker"] = 0 # This avoids running out of resources in the notebook environment when this cell is re-executed + # config["vf_clip_param"] = 100 + + # # multi agent specific config + # config["multiagent"] = { + # "policies" : { + # "agent_0" : PolicySpec( + # action_space=ray_ma_env.action_space["agent_0"], + # observation_space=ray_ma_env.observation_space["agent_0"], + # ), + # "agent_1" : PolicySpec( + # action_space=ray_ma_env.action_space["agent_1"], + # observation_space=ray_ma_env.observation_space["agent_1"], + # ) + # }, + # "policy_mapping_fn": policy_mapping_fn, + # "policies_to_train": ["agent_0", "agent_1"], + # } + + # #Trainer + # agent = ppo.PPOTrainer(config, env=SELECT_ENV) + + + # see ray doc for this... + # syntax changes every ray major version apparently... + config = PPOConfig() + config = config.training(gamma=0.9, lr=0.01, kl_coeff=0.3, train_batch_size=128) + config = config.resources(num_gpus=0) + config = config.rollouts(num_rollout_workers=1) + + # multi agent parts + config.multi_agent(policies={ + "agent_0" : PolicySpec( + action_space=ray_ma_env.action_space["agent_0"], + observation_space=ray_ma_env.observation_space["agent_0"] + ), + "agent_1" : PolicySpec( + action_space=ray_ma_env.action_space["agent_1"], + observation_space=ray_ma_env.observation_space["agent_1"], + ) + }, + policy_mapping_fn = policy_mapping_fn, + policies_to_train= ["agent_0", "agent_1"]) + + #Trainer + agent = PPO(config=config, env=SELECT_ENV) + + results = [] + episode_data = [] + episode_json = [] + + for n in range(N_ITER): + result = agent.train() + results.append(result) + + episode = {'n': n, + 'episode_reward_min': result['episode_reward_min'], + 'episode_reward_mean': result['episode_reward_mean'], + 'episode_reward_max': result['episode_reward_max'], + 'episode_len_mean': result['episode_len_mean'] + } + + episode_data.append(episode) + episode_json.append(json.dumps(episode)) + file_name = agent.save(checkpoint_root) + + print(f'{n:3d}: Min/Mean/Max reward: {result["episode_reward_min"]:8.4f}/{result["episode_reward_mean"]:8.4f}/{result["episode_reward_max"]:8.4f}. Checkpoint saved to {file_name}') + + with open(f'{ray_results}/rewards.json', 'w') as outfile: + json.dump(episode_json, outfile) diff --git a/grid2op/multi_agent/__init__.py b/grid2op/multi_agent/__init__.py index be787cd4..b438db11 100644 --- a/grid2op/multi_agent/__init__.py +++ b/grid2op/multi_agent/__init__.py @@ -9,7 +9,8 @@ __all__ = ["SubGridAction", "SubGridObservation", "MultiAgentEnv", - "SubGridObjects"] + "SubGridObjects", + "ClusterUtils"] import warnings from grid2op.multi_agent.ma_exceptions import MultiAgentStillBeta @@ -25,3 +26,4 @@ from grid2op.multi_agent.subgridObservation import SubGridObservation from grid2op.multi_agent.multiAgentEnv import MultiAgentEnv from grid2op.multi_agent.subGridObjects import SubGridObjects +from grid2op.multi_agent.utils import ClusterUtils \ No newline at end of file diff --git a/grid2op/multi_agent/utils.py b/grid2op/multi_agent/utils.py index e6dcfb2e..fef758eb 100644 --- a/grid2op/multi_agent/utils.py +++ b/grid2op/multi_agent/utils.py @@ -6,8 +6,11 @@ # SPDX-License-Identifier: MPL-2.0 # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems. +from grid2op.Environment import Environment from numpy.random import shuffle - +import numpy as np +from sknetwork.clustering import Louvain +from scipy.sparse import csr_matrix def random_order(agents : list, *args, **kwargs): """Returns the random order @@ -74,4 +77,74 @@ def __eq__(self, other): and self._current_agent == other._current_agent and self.selected_agent == other.selected_agent ) - \ No newline at end of file + + +class ClusterUtils: + """ + Outputs clustered substation based on the Louvain graph clustering method. + """ + + # Create connectivity matrix + @staticmethod + def create_connectivity_matrix(env:Environment): + """ + Creates a connectivity matrix for the given grid environment. + + The connectivity matrix is a 2D NumPy array where the element at position (i, j) is 1 if there is a direct + connection between substation i and substation j, and 0 otherwise. The diagonal elements are set to 1 to indicate + self-connections. + + Args: + env (grid2op.Environment): The grid environment for which the connectivity matrix is to be created. + + Returns: + connectivity_matrix: A 2D Numpy array of dimension (env.n_sub, env.n_sub) representing the substation connectivity of the grid environment. + """ + connectivity_matrix = np.zeros((env.n_sub, env.n_sub)) + for line_id in range(env.n_line): + orig_sub = env.line_or_to_subid[line_id] + extrem_sub = env.line_ex_to_subid[line_id] + connectivity_matrix[orig_sub, extrem_sub] = 1 + connectivity_matrix[extrem_sub, orig_sub] = 1 + return connectivity_matrix + np.eye(env.n_sub) + + + + # Cluster substations + @staticmethod + def cluster_substations(env:Environment): + """ + Clusters substations in a power grid environment using the Louvain community detection algorithm. + + This function generates a connectivity matrix representing the connections between substations in the given environment, + and applies the Louvain algorithm to cluster the substations into communities. The resulting clusters are formatted into + a dictionary where each key corresponds to an agent and the value is a list of substations assigned to that agent. + + Args: + env (grid2op.Environment): The grid environment for which the connectivity matrix is to be created. + + Returns: + (MADict): + - keys : agents' names + - values : list of substations' id under the control of the agent. + """ + + # Generate the connectivity matrix + matrix = ClusterUtils.create_connectivity_matrix(env) + + # Perform clustering using Louvain algorithm + louvain = Louvain() + adjacency = csr_matrix(matrix) + labels = louvain.fit_predict(adjacency) + + # Group substations into clusters + clusters = {} + for node, label in enumerate(labels): + if label not in clusters: + clusters[label] = [] + clusters[label].append(node) + + # Format the clusters + formatted_clusters = {f'agent_{i}': nodes for i, nodes in enumerate(clusters.values())} + + return formatted_clusters \ No newline at end of file diff --git a/setup.py b/setup.py index 976d4ff1..f003e8dd 100644 --- a/setup.py +++ b/setup.py @@ -30,6 +30,7 @@ def my_test_suite(): "tqdm>=4.45.0", "networkx>=2.4", "requests>=2.23.0", + "scikit-network>=0.32.1", "packaging", # because gym changes the way it uses numpy prng in version 0.26 and i need both gym before and after... "typing_extensions" ],