From a2d03f99e587af153ae0ac50fb94ba6272e4fff2 Mon Sep 17 00:00:00 2001 From: Maxwell Standen Date: Thu, 13 Oct 2022 16:29:59 +1030 Subject: [PATCH] updating RLLib Wrapper and associated documentation --- CybORG/Agents/Wrappers/ChallengeWrapper.py | 1 - CybORG/Agents/Wrappers/RLLIBWrapper.py | 50 -------------- CybORG/Agents/Wrappers/RLLibWrapper.py | 79 ++++++++++++++++++++++ CybORG/Agents/Wrappers/rl_libWrapper.py | 50 -------------- README.md | 73 ++++++++++---------- 5 files changed, 115 insertions(+), 138 deletions(-) delete mode 100644 CybORG/Agents/Wrappers/RLLIBWrapper.py create mode 100644 CybORG/Agents/Wrappers/RLLibWrapper.py delete mode 100644 CybORG/Agents/Wrappers/rl_libWrapper.py diff --git a/CybORG/Agents/Wrappers/ChallengeWrapper.py b/CybORG/Agents/Wrappers/ChallengeWrapper.py index dec76de4..06bcecbf 100644 --- a/CybORG/Agents/Wrappers/ChallengeWrapper.py +++ b/CybORG/Agents/Wrappers/ChallengeWrapper.py @@ -15,7 +15,6 @@ def __init__(self, agent_name: str, env, raise ValueError('Invalid Agent Name') env = table_wrapper(env, output_mode='vector') - env = EnumActionWrapper(env) env = OpenAIGymWrapper(agent_name=agent_name, env=env) self.env = env diff --git a/CybORG/Agents/Wrappers/RLLIBWrapper.py b/CybORG/Agents/Wrappers/RLLIBWrapper.py deleted file mode 100644 index 5d15d572..00000000 --- a/CybORG/Agents/Wrappers/RLLIBWrapper.py +++ /dev/null @@ -1,50 +0,0 @@ -import inspect -import numpy as np -from ray.rllib.agents import ppo -from ray.tune import register_env -from CybORG import CybORG -from CybORG.agents import B_lineAgent, GreenAgent -from CybORG.agents.wrappers import ChallengeWrapper - -class RLlibWrapper(ChallengeWrapper): - def init(self, agent_name, env, reward_threshold=None, max_steps=None): - super().__init__(agent_name, env, reward_threshold, max_steps) - - def step(self, action=None): - obs, reward, done, info = self.env.step(action=action) - self.step_counter += 1 - if self.max_steps is not None and self.step_counter >= self.max_steps: - done = True - return np.float32(obs), reward, done, info - - def reset(self): - self.step_counter = 0 - obs = self.env.reset() - return np.float32(obs) - -def env_creator(env_config: dict): - path = str(inspect.getfile(CybORG)) - path = path[:-7] + f'/Shared/Scenarios/Scenario1b.yaml' - agents = {"Red": B_lineAgent, "Green": GreenAgent} - cyborg = CybORG(scenario_file=path, environment='sim', agents=agents) - env = RLlibWrapper(env=cyborg, agent_name="Blue,", max_steps=100) - return env - -def print_results(results_dict): - train_iter = results_dict["training_iteration"] - r_mean = results_dict["episode_reward_mean"] - r_max = results_dict["episode_reward_max"] - r_min = results_dict["episode_reward_min"] - print(f"{train_iter:4d} \tr_mean: {r_mean:.1f} \tr_max: {r_max:.1f} \tr_min: {r_min: .1f}") - -if __name__ == "__main__": - register_env(name="CybORG", env_creator=env_creator) - config = ppo.DEFAULT_CONFIG.copy() - agent = ppo.PPOTrainer(config=config, env="CybORG") - - train_steps = 1e6 - total_steps = 0 - while total_steps < train_steps: - results = agent.train() - print_results(results) - total_steps = results["timesteps_total"] diff --git a/CybORG/Agents/Wrappers/RLLibWrapper.py b/CybORG/Agents/Wrappers/RLLibWrapper.py new file mode 100644 index 00000000..5c865b89 --- /dev/null +++ b/CybORG/Agents/Wrappers/RLLibWrapper.py @@ -0,0 +1,79 @@ +import inspect +import numpy as np +from ray.rllib.agents import ppo +from ray.rllib.env import ParallelPettingZooEnv +from ray.tune import register_env +from CybORG import CybORG +from CybORG.Agents import B_lineAgent, GreenAgent +from CybORG.Agents.Wrappers import ChallengeWrapper + +from CybORG.Agents.Wrappers.PettingZooParallelWrapper import PettingZooParallelWrapper +from CybORG.Simulator.Scenarios import FileReaderScenarioGenerator, DroneSwarmScenarioGenerator + + +class RLLibWrapper(ChallengeWrapper): + def init(self, agent_name, env, reward_threshold=None, max_steps=None): + super().__init__(agent_name, env, reward_threshold, max_steps) + + def step(self, action=None): + obs, reward, done, info = self.env.step(action=action) + self.step_counter += 1 + if self.max_steps is not None and self.step_counter >= self.max_steps: + done = True + return np.float32(obs), reward, done, info + + def reset(self): + self.step_counter = 0 + obs = self.env.reset() + return np.float32(obs) + + +def env_creator_CC1(env_config: dict): + path = str(inspect.getfile(CybORG)) + path = path[:-7] + f'/Simulator/Scenarios/scenario_files/Scenario1b.yaml' + sg = FileReaderScenarioGenerator(path) + agents = {"Red": B_lineAgent(), "Green": GreenAgent()} + cyborg = CybORG(scenario_generator=sg, environment='sim', agents=agents) + env = RLLibWrapper(env=cyborg, agent_name="Blue", max_steps=100) + return env + + +def env_creator_CC2(env_config: dict): + path = str(inspect.getfile(CybORG)) + path = path[:-7] + f'/Simulator/Scenarios/scenario_files/Scenario2.yaml' + sg = FileReaderScenarioGenerator(path) + agents = {"Red": B_lineAgent(), "Green": GreenAgent()} + cyborg = CybORG(scenario_generator=sg, environment='sim', agents=agents) + env = RLLibWrapper(env=cyborg, agent_name="Blue", max_steps=100) + return env + + +def env_creator_CC3(env_config: dict): + sg = DroneSwarmScenarioGenerator() + cyborg = CybORG(scenario_generator=sg, environment='sim') + env = ParallelPettingZooEnv(PettingZooParallelWrapper(env=cyborg)) + return env + + +def print_results(results_dict): + train_iter = results_dict["training_iteration"] + r_mean = results_dict["episode_reward_mean"] + r_max = results_dict["episode_reward_max"] + r_min = results_dict["episode_reward_min"] + print(f"{train_iter:4d} \tr_mean: {r_mean:.1f} \tr_max: {r_max:.1f} \tr_min: {r_min: .1f}") + + +if __name__ == "__main__": + register_env(name="CC1", env_creator=env_creator_CC1) + register_env(name="CC2", env_creator=env_creator_CC2) + register_env(name="CC3", env_creator=env_creator_CC3) + config = ppo.DEFAULT_CONFIG.copy() + for env in ['CC1', 'CC2', 'CC3']: + agent = ppo.PPOTrainer(config=config, env=env) + + train_steps = 1e2 + total_steps = 0 + while total_steps < train_steps: + results = agent.train() + print_results(results) + total_steps = results["timesteps_total"] diff --git a/CybORG/Agents/Wrappers/rl_libWrapper.py b/CybORG/Agents/Wrappers/rl_libWrapper.py deleted file mode 100644 index 5d15d572..00000000 --- a/CybORG/Agents/Wrappers/rl_libWrapper.py +++ /dev/null @@ -1,50 +0,0 @@ -import inspect -import numpy as np -from ray.rllib.agents import ppo -from ray.tune import register_env -from CybORG import CybORG -from CybORG.agents import B_lineAgent, GreenAgent -from CybORG.agents.wrappers import ChallengeWrapper - -class RLlibWrapper(ChallengeWrapper): - def init(self, agent_name, env, reward_threshold=None, max_steps=None): - super().__init__(agent_name, env, reward_threshold, max_steps) - - def step(self, action=None): - obs, reward, done, info = self.env.step(action=action) - self.step_counter += 1 - if self.max_steps is not None and self.step_counter >= self.max_steps: - done = True - return np.float32(obs), reward, done, info - - def reset(self): - self.step_counter = 0 - obs = self.env.reset() - return np.float32(obs) - -def env_creator(env_config: dict): - path = str(inspect.getfile(CybORG)) - path = path[:-7] + f'/Shared/Scenarios/Scenario1b.yaml' - agents = {"Red": B_lineAgent, "Green": GreenAgent} - cyborg = CybORG(scenario_file=path, environment='sim', agents=agents) - env = RLlibWrapper(env=cyborg, agent_name="Blue,", max_steps=100) - return env - -def print_results(results_dict): - train_iter = results_dict["training_iteration"] - r_mean = results_dict["episode_reward_mean"] - r_max = results_dict["episode_reward_max"] - r_min = results_dict["episode_reward_min"] - print(f"{train_iter:4d} \tr_mean: {r_mean:.1f} \tr_max: {r_max:.1f} \tr_min: {r_min: .1f}") - -if __name__ == "__main__": - register_env(name="CybORG", env_creator=env_creator) - config = ppo.DEFAULT_CONFIG.copy() - agent = ppo.PPOTrainer(config=config, env="CybORG") - - train_steps = 1e6 - total_steps = 0 - while total_steps < train_steps: - results = agent.train() - print_results(results) - total_steps = results["timesteps_total"] diff --git a/README.md b/README.md index 6f853972..992e1322 100644 --- a/README.md +++ b/README.md @@ -14,61 +14,39 @@ pip install -e . ## Creating the environment -Import the necessary classes: -``` + +Create a CybORG environment with the DroneSwarm Scenario that is used for CAGE Challenge 3: + +```python from CybORG import CybORG -from CybORG.Agents import RedMeanderAgent, B_lineAgent, SleepAgent -from CybORG.Agents.Wrappers import OpenAIGymWrapper, FixedFlatWrapper -from CybORG.Agents.Wrappers.PettingZooParallelWrapper import PettingZooParallelWrapper from CybORG.Simulator.Scenarios.DroneSwarmScenarioGenerator import DroneSwarmScenarioGenerator -``` -Create a CybORG environment with: -```python sg = DroneSwarmScenarioGenerator() cyborg = CybORG(sg, 'sim') ``` - - - -To create an environment where the red agent has preexisting knowledge of the network and attempts to beeline to the Operational Server use: - - +The default_red_agent parameter of the DroneSwarmScenarioGenerator allows you to alter the red agent behaviour. Here is an example of a red agent that randomly selects a drone to exploit and seize control of: ```python -red_agent = B_lineAgent() -cyborg = CybORG(sg, 'sim', agents={'Red': red_agent}) -``` -To create an environment where the red agent meanders through the network and attempts to take control of all hosts in the network use: - - +from CybORG import CybORG +from CybORG.Simulator.Scenarios.DroneSwarmScenarioGenerator import DroneSwarmScenarioGenerator +from CybORG.Agents.SimpleAgents.DroneRedAgent import DroneRedAgent -```python -red_agent = RedMeanderAgent() -cyborg = CybORG(sg, 'sim', agents={'Red': red_agent}) -``` -To create an environment where the red agent always takes the sleep action use: -```python -red_agent = SleepAgent() -cyborg = CybORG(sg, 'sim', agents={'Red': red_agent}) +red_agent = DroneRedAgent +sg = DroneSwarmScenarioGenerator(default_red_agent=red_agent) +cyborg = CybORG(sg, 'sim') ``` - ## Wrappers - To alter the interface with CybORG, [wrappers](CybORG/Agents/Wrappers) are avaliable. -* [OpenAIGymWrapper](CybORG/Agents/Wrappers/OpenAIGymWrapper.py) - alters the interface to conform to the OpenAI Gym specification. +* [OpenAIGymWrapper](CybORG/Agents/Wrappers/OpenAIGymWrapper.py) - alters the interface to conform to the OpenAI Gym specification. Requires the observation to be changed into a fixed size array. * [FixedFlatWrapper](CybORG/Agents/Wrappers/FixedFlatWrapper.py) - converts the observation from a dictionary format into a fixed size 1-dimensional vector of floats -* [EnumActionWrapper](CybORG/Agents/Wrappers/EnumActionWrapper.py) - converts the action space into a single integer -* [IntListToActionWrapper](CybORG/Agents/Wrappers/IntListToAction.py) - converts the action classes and parameters into a list of integers -* [BlueTableWrapper](CybORG/Agents/Wrappers/BlueTableWrapper.py) - aggregates information from observations and converts into a 1-dimensional vector of integers * [PettingZooParallelWrapper](CybORG/Agents/Wrappers/PettingZooParallelWrapper.py) - alters the interface to conform to the PettingZoo Parallel specification * [ActionsCommsPettingZooParallelWrapper](CybORG/Agents/Wrappers/CommsPettingZooParallelWrapper.py) - Extends the PettingZoo Parallel interface to automatically communicate what action an agent performed to other agents * [ObsCommsPettingZooParallelWrapper](CybORG/Agents/Wrappers/CommsPettingZooParallelWrapper.py) - Extends the PettingZoo Parallel interface to automatically communicate elements of an agent's observation to other agents @@ -81,6 +59,11 @@ To alter the interface with CybORG, [wrappers](CybORG/Agents/Wrappers) are avali The OpenAI Gym Wrapper allows interaction with a single external agent. The name of that external agent must be specified at the creation of the OpenAI Gym Wrapper. ```python +from CybORG import CybORG +from CybORG.Simulator.Scenarios.DroneSwarmScenarioGenerator import DroneSwarmScenarioGenerator +from CybORG.Agents.Wrappers.OpenAIGymWrapper import OpenAIGymWrapper +from CybORG.Agents.Wrappers.FixedFlatWrapper import FixedFlatWrapper + sg = DroneSwarmScenarioGenerator() cyborg = CybORG(sg, 'sim') agent_name = 'blue_agent_0' @@ -93,6 +76,10 @@ observation, reward, done, info = open_ai_wrapped_cyborg.step(0) The PettingZoo Parallel Wrapper allows multiple agents to interact with the environment simultaneously. ```python +from CybORG import CybORG +from CybORG.Simulator.Scenarios.DroneSwarmScenarioGenerator import DroneSwarmScenarioGenerator +from CybORG.Agents.Wrappers.PettingZooParallelWrapper import PettingZooParallelWrapper + sg = DroneSwarmScenarioGenerator() cyborg = CybORG(sg, 'sim') open_ai_wrapped_cyborg = PettingZooParallelWrapper(cyborg) @@ -101,7 +88,19 @@ observations, rewards, dones, infos = open_ai_wrapped_cyborg.step({'blue_agent_0 ### Ray/RLLib wrapper ```python -# TODO +from CybORG import CybORG +from CybORG.Simulator.Scenarios.DroneSwarmScenarioGenerator import DroneSwarmScenarioGenerator +from CybORG.Agents.Wrappers.PettingZooParallelWrapper import PettingZooParallelWrapper +from ray.rllib.env import ParallelPettingZooEnv +from ray.tune import register_env + +def env_creator_CC3(env_config: dict): + sg = DroneSwarmScenarioGenerator() + cyborg = CybORG(scenario_generator=sg, environment='sim') + env = ParallelPettingZooEnv(PettingZooParallelWrapper(env=cyborg)) + return env + +register_env(name="CC3", env_creator=env_creator_CC3) ``` @@ -122,10 +121,10 @@ def wrap(env): ``` The agent under evaluation is defined on line 35. To evaluate an agent, extend the [BaseAgent](CybORG/Agents/SimpleAgents/BaseAgent.py). -We have included the [BlueLoadAgent](CybORG/Agents/SimpleAgents/BlueLoadAgent.py) as an example of an agent that uses the stable_baselines3 library. +We have included the [RandomAgent](CybORG/Agents/SimpleAgents/RandomAgent.py) as an example of an agent that performs random actions. ``` # Change this line to load your agent -agent = BlueLoadAgent() +agents = {agent: RandomAgent() for agent in wrapped_cyborg.possible_agents} ``` ## Additional Readings