Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimal Clustering of Substations for Topology Optimization Using the Louvain Algorithm #620

Open
wants to merge 10 commits into
base: dev_multiagent
Choose a base branch
from
280 changes: 280 additions & 0 deletions examples/multi_agents/ray_example3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
# Copyright (c) 2019-2022, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.

"""example with local observation and local actions"""

import warnings
import numpy as np
import copy

from gym.spaces import Discrete, Box

from ray.rllib.env.multi_agent_env import MultiAgentEnv as MAEnv
from ray.rllib.policy.policy import PolicySpec, Policy

import grid2op
from grid2op.Action import PlayableAction
from grid2op.multi_agent.multiAgentEnv import MultiAgentEnv
from grid2op.gym_compat import GymEnv, BoxGymObsSpace, DiscreteActSpace

from lightsim2grid import LightSimBackend
from grid2op.gym_compat.utils import ALL_ATTR_FOR_DISCRETE
from grid2op.multi_agent import ClusterUtils

ENV_NAME = "l2rpn_case14_sandbox"
DO_NOTHING_EPISODES = -1 # 200

env_for_cls = grid2op.make(ENV_NAME,
action_class=PlayableAction,
backend=LightSimBackend())


# Get ACTION_DOMAINS by clustering the substations
ACTION_DOMAINS = ClusterUtils.cluster_substations(env_for_cls)

# Get OBSERVATION_DOMAINS by clustering the substations
OBSERVATION_DOMAINS = ClusterUtils.cluster_substations(env_for_cls)

# wrapper for gym env
class MAEnvWrapper(MAEnv):
def __init__(self, env_config=None):
super().__init__()
if env_config is None:
env_config = {}

env = grid2op.make(ENV_NAME,
action_class=PlayableAction,
backend=LightSimBackend())

action_domains = copy.deepcopy(ACTION_DOMAINS)
if "action_domains" in env_config:
action_domains = env_config["action_domains"]
observation_domains = copy.deepcopy(OBSERVATION_DOMAINS)
if "observation_domains" in env_config:
observation_domains = env_config["observation_domains"]
self.ma_env = MultiAgentEnv(env,
action_domains,
observation_domains)

self._agent_ids = set(self.ma_env.agents)
self.ma_env.seed(0)
self._agent_ids = self.ma_env.agents

# see the grid2op doc on how to customize the observation space
# with the grid2op / gym interface.
self._gym_env = GymEnv(env)
self._gym_env.observation_space.close()
obs_attr_to_keep = ["gen_p", "rho"]
if "obs_attr_to_keep" in env_config:
obs_attr_to_keep = copy.deepcopy(env_config["obs_attr_to_keep"])
self._gym_env.observation_space = BoxGymObsSpace(env.observation_space,
attr_to_keep=obs_attr_to_keep,
replace_nan_by_0=True # replace Nan by 0.
)

# we did not experiment yet with the "partially observable" setting
# so for now we suppose all agents see the same observation
# which is the full grid
self._aux_observation_space = {
agent_id : BoxGymObsSpace(self.ma_env.observation_spaces[agent_id],
attr_to_keep=obs_attr_to_keep,
replace_nan_by_0=True # replace Nan by 0.
)
for agent_id in self.ma_env.agents
}
# to avoid "weird" pickle issues
self.observation_space = {
agent_id : Box(low=self._aux_observation_space[agent_id].low,
high=self._aux_observation_space[agent_id].high,
dtype=self._aux_observation_space[agent_id].dtype)
for agent_id in self.ma_env.agents
}

# we represent the action as discrete action for now.
# It should work to encode then differently using the
# gym_compat module for example
act_attr_to_keep = ALL_ATTR_FOR_DISCRETE
if "act_attr_to_keep" in env_config:
act_attr_to_keep = copy.deepcopy(env_config["act_attr_to_keep"])

self._conv_action_space = {
agent_id : DiscreteActSpace(self.ma_env.action_spaces[agent_id], attr_to_keep=act_attr_to_keep)
for agent_id in self.ma_env.agents
}

# to avoid "weird" pickle issues
self.action_space = {
agent_id : Discrete(n=self.ma_env.action_spaces[agent_id].n)
for agent_id in self.ma_env.agents
}

def reset(self, *, seed=None, options=None):
if seed is not None:
self.seed(seed)

# reset the underlying multi agent environment
obs = self.ma_env.reset()

return self._format_obs(obs), {}

def seed(self, seed):
return self.ma_env.seed(seed)


def _format_obs(self, grid2op_obs):
# grid2op_obs is a dictionnary, representing a "multi agent grid2op action"

# convert the observation to a gym one

# return the proper dictionnary
return {
agent_id : self._aux_observation_space[agent_id].to_gym(grid2op_obs[agent_id])
for agent_id in self.ma_env.agents
}

def step(self, actions):
# convert the action to grid2op
if actions:
grid2op_act = {
agent_id : self._conv_action_space[agent_id].from_gym(actions[agent_id])
for agent_id in self.ma_env.agents
}
else:
grid2op_act = {
agent_id : self._conv_action_space[agent_id].from_gym(0)
for agent_id in self.ma_env.agents
}

# just to retrieve the first agent id...
first_agent_id = next(iter(self.ma_env.agents))

# do a step in the underlying multi agent environment
obs, r, done, info = self.ma_env.step(grid2op_act)

# all agents have the same flag "done"
done['__all__'] = done[first_agent_id]

# now retrieve the observation in the proper form
gym_obs = self._format_obs(obs)

# ignored for now
info = {}
truncateds = {k: False for k in self.ma_env.agents}
truncateds['__all__'] = truncateds[first_agent_id]
return gym_obs, r, done, truncateds, info


def policy_mapping_fn(agent_id, episode, worker, **kwargs):
return agent_id


if __name__ == "__main__":
import ray
# from ray.rllib.agents.ppo import ppo
from ray.rllib.algorithms.ppo import PPO, PPOConfig
import json
import os
import shutil

ray_ma_env = MAEnvWrapper()

checkpoint_root = "./ma_ppo_test_2ndsetting"

# Where checkpoints are written:
shutil.rmtree(checkpoint_root, ignore_errors=True, onerror=None)

# Where some data will be written and used by Tensorboard below:
ray_results = f'{os.getenv("HOME")}/ray_results/'
shutil.rmtree(ray_results, ignore_errors=True, onerror=None)

info = ray.init(ignore_reinit_error=True)
print("Dashboard URL: http://{}".format(info.address_info["webui_url"]))

# #Configs (see ray's doc for more information)
SELECT_ENV = MAEnvWrapper # Specifies the OpenAI Gym environment for Cart Pole
N_ITER = 1000 # Number of training runs.

# config = ppo.DEFAULT_CONFIG.copy() # PPO's default configuration. See the next code cell.
# config["log_level"] = "WARN" # Suppress too many messages, but try "INFO" to see what can be printed.

# # Other settings we might adjust:
# config["num_workers"] = 1 # Use > 1 for using more CPU cores, including over a cluster
# config["num_sgd_iter"] = 10 # Number of SGD (stochastic gradient descent) iterations per training minibatch.
# # I.e., for each minibatch of data, do this many passes over it to train.
# config["sgd_minibatch_size"] = 64 # The amount of data records per minibatch
# config["model"]["fcnet_hiddens"] = [100, 50] #
# config["num_cpus_per_worker"] = 0 # This avoids running out of resources in the notebook environment when this cell is re-executed
# config["vf_clip_param"] = 100

# # multi agent specific config
# config["multiagent"] = {
# "policies" : {
# "agent_0" : PolicySpec(
# action_space=ray_ma_env.action_space["agent_0"],
# observation_space=ray_ma_env.observation_space["agent_0"],
# ),
# "agent_1" : PolicySpec(
# action_space=ray_ma_env.action_space["agent_1"],
# observation_space=ray_ma_env.observation_space["agent_1"],
# )
# },
# "policy_mapping_fn": policy_mapping_fn,
# "policies_to_train": ["agent_0", "agent_1"],
# }

# #Trainer
# agent = ppo.PPOTrainer(config, env=SELECT_ENV)


# see ray doc for this...
# syntax changes every ray major version apparently...
config = PPOConfig()
config = config.training(gamma=0.9, lr=0.01, kl_coeff=0.3, train_batch_size=128)
config = config.resources(num_gpus=0)
config = config.rollouts(num_rollout_workers=1)

# multi agent parts
config.multi_agent(policies={
"agent_0" : PolicySpec(
action_space=ray_ma_env.action_space["agent_0"],
observation_space=ray_ma_env.observation_space["agent_0"]
),
"agent_1" : PolicySpec(
action_space=ray_ma_env.action_space["agent_1"],
observation_space=ray_ma_env.observation_space["agent_1"],
)
},
policy_mapping_fn = policy_mapping_fn,
policies_to_train= ["agent_0", "agent_1"])

#Trainer
agent = PPO(config=config, env=SELECT_ENV)

results = []
episode_data = []
episode_json = []

for n in range(N_ITER):
result = agent.train()
results.append(result)

episode = {'n': n,
'episode_reward_min': result['episode_reward_min'],
'episode_reward_mean': result['episode_reward_mean'],
'episode_reward_max': result['episode_reward_max'],
'episode_len_mean': result['episode_len_mean']
}

episode_data.append(episode)
episode_json.append(json.dumps(episode))
file_name = agent.save(checkpoint_root)

print(f'{n:3d}: Min/Mean/Max reward: {result["episode_reward_min"]:8.4f}/{result["episode_reward_mean"]:8.4f}/{result["episode_reward_max"]:8.4f}. Checkpoint saved to {file_name}')

with open(f'{ray_results}/rewards.json', 'w') as outfile:
json.dump(episode_json, outfile)
4 changes: 3 additions & 1 deletion grid2op/multi_agent/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
__all__ = ["SubGridAction",
"SubGridObservation",
"MultiAgentEnv",
"SubGridObjects"]
"SubGridObjects",
"ClusterUtils"]
import warnings

from grid2op.multi_agent.ma_exceptions import MultiAgentStillBeta
Expand All @@ -25,3 +26,4 @@
from grid2op.multi_agent.subgridObservation import SubGridObservation
from grid2op.multi_agent.multiAgentEnv import MultiAgentEnv
from grid2op.multi_agent.subGridObjects import SubGridObjects
from grid2op.multi_agent.utils import ClusterUtils
77 changes: 75 additions & 2 deletions grid2op/multi_agent/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.

from grid2op.Environment import Environment
from numpy.random import shuffle

import numpy as np
from sknetwork.clustering import Louvain
from scipy.sparse import csr_matrix

def random_order(agents : list, *args, **kwargs):
"""Returns the random order
Expand Down Expand Up @@ -74,4 +77,74 @@ def __eq__(self, other):
and self._current_agent == other._current_agent
and self.selected_agent == other.selected_agent
)



class ClusterUtils:
"""
Outputs clustered substation based on the Louvain graph clustering method.
"""

# Create connectivity matrix
@staticmethod
def create_connectivity_matrix(env:Environment):
"""
Creates a connectivity matrix for the given grid environment.

The connectivity matrix is a 2D NumPy array where the element at position (i, j) is 1 if there is a direct
connection between substation i and substation j, and 0 otherwise. The diagonal elements are set to 1 to indicate
self-connections.

Args:
env (grid2op.Environment): The grid environment for which the connectivity matrix is to be created.

Returns:
connectivity_matrix: A 2D Numpy array of dimension (env.n_sub, env.n_sub) representing the substation connectivity of the grid environment.
"""
connectivity_matrix = np.zeros((env.n_sub, env.n_sub))
for line_id in range(env.n_line):
orig_sub = env.line_or_to_subid[line_id]
extrem_sub = env.line_ex_to_subid[line_id]
connectivity_matrix[orig_sub, extrem_sub] = 1
connectivity_matrix[extrem_sub, orig_sub] = 1
return connectivity_matrix + np.eye(env.n_sub)



# Cluster substations
@staticmethod
def cluster_substations(env:Environment):
"""
Clusters substations in a power grid environment using the Louvain community detection algorithm.

This function generates a connectivity matrix representing the connections between substations in the given environment,
and applies the Louvain algorithm to cluster the substations into communities. The resulting clusters are formatted into
a dictionary where each key corresponds to an agent and the value is a list of substations assigned to that agent.

Args:
env (grid2op.Environment): The grid environment for which the connectivity matrix is to be created.

Returns:
(MADict):
- keys : agents' names
- values : list of substations' id under the control of the agent.
"""

# Generate the connectivity matrix
matrix = ClusterUtils.create_connectivity_matrix(env)

# Perform clustering using Louvain algorithm
louvain = Louvain()
adjacency = csr_matrix(matrix)
labels = louvain.fit_predict(adjacency)

# Group substations into clusters
clusters = {}
for node, label in enumerate(labels):
if label not in clusters:
clusters[label] = []
clusters[label].append(node)

# Format the clusters
formatted_clusters = {f'agent_{i}': nodes for i, nodes in enumerate(clusters.values())}

return formatted_clusters
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def my_test_suite():
"tqdm>=4.45.0",
"networkx>=2.4",
"requests>=2.23.0",
"scikit-network>=0.32.1",
"packaging", # because gym changes the way it uses numpy prng in version 0.26 and i need both gym before and after...
"typing_extensions"
],
Expand Down