From a2d03f99e587af153ae0ac50fb94ba6272e4fff2 Mon Sep 17 00:00:00 2001
From: Maxwell Standen <maxwell.standen@gmail.com>
Date: Thu, 13 Oct 2022 16:29:59 +1030
Subject: [PATCH] updating RLLib Wrapper and associated documentation

---
 CybORG/Agents/Wrappers/ChallengeWrapper.py |  1 -
 CybORG/Agents/Wrappers/RLLIBWrapper.py     | 50 --------------
 CybORG/Agents/Wrappers/RLLibWrapper.py     | 79 ++++++++++++++++++++++
 CybORG/Agents/Wrappers/rl_libWrapper.py    | 50 --------------
 README.md                                  | 73 ++++++++++----------
 5 files changed, 115 insertions(+), 138 deletions(-)
 delete mode 100644 CybORG/Agents/Wrappers/RLLIBWrapper.py
 create mode 100644 CybORG/Agents/Wrappers/RLLibWrapper.py
 delete mode 100644 CybORG/Agents/Wrappers/rl_libWrapper.py

diff --git a/CybORG/Agents/Wrappers/ChallengeWrapper.py b/CybORG/Agents/Wrappers/ChallengeWrapper.py
index dec76de4..06bcecbf 100644
--- a/CybORG/Agents/Wrappers/ChallengeWrapper.py
+++ b/CybORG/Agents/Wrappers/ChallengeWrapper.py
@@ -15,7 +15,6 @@ def __init__(self, agent_name: str, env,
             raise ValueError('Invalid Agent Name')
 
         env = table_wrapper(env, output_mode='vector')
-        env = EnumActionWrapper(env)
         env = OpenAIGymWrapper(agent_name=agent_name, env=env)
 
         self.env = env
diff --git a/CybORG/Agents/Wrappers/RLLIBWrapper.py b/CybORG/Agents/Wrappers/RLLIBWrapper.py
deleted file mode 100644
index 5d15d572..00000000
--- a/CybORG/Agents/Wrappers/RLLIBWrapper.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import inspect
-import numpy as np
-from ray.rllib.agents import ppo
-from ray.tune import register_env
-from CybORG import CybORG
-from CybORG.agents import B_lineAgent, GreenAgent
-from CybORG.agents.wrappers import ChallengeWrapper
-
-class RLlibWrapper(ChallengeWrapper):
-    def init(self, agent_name, env, reward_threshold=None, max_steps=None):
-        super().__init__(agent_name, env, reward_threshold, max_steps)
-
-    def step(self, action=None):
-        obs, reward, done, info = self.env.step(action=action)
-        self.step_counter += 1
-        if self.max_steps is not None and self.step_counter >= self.max_steps:
-            done = True
-        return np.float32(obs), reward, done, info
-
-    def reset(self):
-        self.step_counter = 0
-        obs = self.env.reset()
-        return np.float32(obs)
-
-def env_creator(env_config: dict):
-    path = str(inspect.getfile(CybORG))
-    path = path[:-7] + f'/Shared/Scenarios/Scenario1b.yaml'
-    agents = {"Red": B_lineAgent, "Green": GreenAgent}
-    cyborg = CybORG(scenario_file=path, environment='sim', agents=agents)
-    env = RLlibWrapper(env=cyborg, agent_name="Blue,", max_steps=100)
-    return env
-
-def print_results(results_dict):
-    train_iter = results_dict["training_iteration"]
-    r_mean = results_dict["episode_reward_mean"]
-    r_max = results_dict["episode_reward_max"]
-    r_min = results_dict["episode_reward_min"]
-    print(f"{train_iter:4d} \tr_mean: {r_mean:.1f} \tr_max: {r_max:.1f} \tr_min: {r_min: .1f}")
-
-if __name__ == "__main__":
-    register_env(name="CybORG", env_creator=env_creator)
-    config = ppo.DEFAULT_CONFIG.copy()
-    agent = ppo.PPOTrainer(config=config, env="CybORG")
-
-    train_steps = 1e6
-    total_steps = 0
-    while total_steps < train_steps:
-        results = agent.train()
-        print_results(results)
-        total_steps = results["timesteps_total"]
diff --git a/CybORG/Agents/Wrappers/RLLibWrapper.py b/CybORG/Agents/Wrappers/RLLibWrapper.py
new file mode 100644
index 00000000..5c865b89
--- /dev/null
+++ b/CybORG/Agents/Wrappers/RLLibWrapper.py
@@ -0,0 +1,79 @@
+import inspect
+import numpy as np
+from ray.rllib.agents import ppo
+from ray.rllib.env import ParallelPettingZooEnv
+from ray.tune import register_env
+from CybORG import CybORG
+from CybORG.Agents import B_lineAgent, GreenAgent
+from CybORG.Agents.Wrappers import ChallengeWrapper
+
+from CybORG.Agents.Wrappers.PettingZooParallelWrapper import PettingZooParallelWrapper
+from CybORG.Simulator.Scenarios import FileReaderScenarioGenerator, DroneSwarmScenarioGenerator
+
+
+class RLLibWrapper(ChallengeWrapper):
+    def init(self, agent_name, env, reward_threshold=None, max_steps=None):
+        super().__init__(agent_name, env, reward_threshold, max_steps)
+
+    def step(self, action=None):
+        obs, reward, done, info = self.env.step(action=action)
+        self.step_counter += 1
+        if self.max_steps is not None and self.step_counter >= self.max_steps:
+            done = True
+        return np.float32(obs), reward, done, info
+
+    def reset(self):
+        self.step_counter = 0
+        obs = self.env.reset()
+        return np.float32(obs)
+
+
+def env_creator_CC1(env_config: dict):
+    path = str(inspect.getfile(CybORG))
+    path = path[:-7] + f'/Simulator/Scenarios/scenario_files/Scenario1b.yaml'
+    sg = FileReaderScenarioGenerator(path)
+    agents = {"Red": B_lineAgent(), "Green": GreenAgent()}
+    cyborg = CybORG(scenario_generator=sg, environment='sim', agents=agents)
+    env = RLLibWrapper(env=cyborg, agent_name="Blue", max_steps=100)
+    return env
+
+
+def env_creator_CC2(env_config: dict):
+    path = str(inspect.getfile(CybORG))
+    path = path[:-7] + f'/Simulator/Scenarios/scenario_files/Scenario2.yaml'
+    sg = FileReaderScenarioGenerator(path)
+    agents = {"Red": B_lineAgent(), "Green": GreenAgent()}
+    cyborg = CybORG(scenario_generator=sg, environment='sim', agents=agents)
+    env = RLLibWrapper(env=cyborg, agent_name="Blue", max_steps=100)
+    return env
+
+
+def env_creator_CC3(env_config: dict):
+    sg = DroneSwarmScenarioGenerator()
+    cyborg = CybORG(scenario_generator=sg, environment='sim')
+    env = ParallelPettingZooEnv(PettingZooParallelWrapper(env=cyborg))
+    return env
+
+
+def print_results(results_dict):
+    train_iter = results_dict["training_iteration"]
+    r_mean = results_dict["episode_reward_mean"]
+    r_max = results_dict["episode_reward_max"]
+    r_min = results_dict["episode_reward_min"]
+    print(f"{train_iter:4d} \tr_mean: {r_mean:.1f} \tr_max: {r_max:.1f} \tr_min: {r_min: .1f}")
+
+
+if __name__ == "__main__":
+    register_env(name="CC1", env_creator=env_creator_CC1)
+    register_env(name="CC2", env_creator=env_creator_CC2)
+    register_env(name="CC3", env_creator=env_creator_CC3)
+    config = ppo.DEFAULT_CONFIG.copy()
+    for env in ['CC1', 'CC2', 'CC3']:
+        agent = ppo.PPOTrainer(config=config, env=env)
+
+        train_steps = 1e2
+        total_steps = 0
+        while total_steps < train_steps:
+            results = agent.train()
+            print_results(results)
+            total_steps = results["timesteps_total"]
diff --git a/CybORG/Agents/Wrappers/rl_libWrapper.py b/CybORG/Agents/Wrappers/rl_libWrapper.py
deleted file mode 100644
index 5d15d572..00000000
--- a/CybORG/Agents/Wrappers/rl_libWrapper.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import inspect
-import numpy as np
-from ray.rllib.agents import ppo
-from ray.tune import register_env
-from CybORG import CybORG
-from CybORG.agents import B_lineAgent, GreenAgent
-from CybORG.agents.wrappers import ChallengeWrapper
-
-class RLlibWrapper(ChallengeWrapper):
-    def init(self, agent_name, env, reward_threshold=None, max_steps=None):
-        super().__init__(agent_name, env, reward_threshold, max_steps)
-
-    def step(self, action=None):
-        obs, reward, done, info = self.env.step(action=action)
-        self.step_counter += 1
-        if self.max_steps is not None and self.step_counter >= self.max_steps:
-            done = True
-        return np.float32(obs), reward, done, info
-
-    def reset(self):
-        self.step_counter = 0
-        obs = self.env.reset()
-        return np.float32(obs)
-
-def env_creator(env_config: dict):
-    path = str(inspect.getfile(CybORG))
-    path = path[:-7] + f'/Shared/Scenarios/Scenario1b.yaml'
-    agents = {"Red": B_lineAgent, "Green": GreenAgent}
-    cyborg = CybORG(scenario_file=path, environment='sim', agents=agents)
-    env = RLlibWrapper(env=cyborg, agent_name="Blue,", max_steps=100)
-    return env
-
-def print_results(results_dict):
-    train_iter = results_dict["training_iteration"]
-    r_mean = results_dict["episode_reward_mean"]
-    r_max = results_dict["episode_reward_max"]
-    r_min = results_dict["episode_reward_min"]
-    print(f"{train_iter:4d} \tr_mean: {r_mean:.1f} \tr_max: {r_max:.1f} \tr_min: {r_min: .1f}")
-
-if __name__ == "__main__":
-    register_env(name="CybORG", env_creator=env_creator)
-    config = ppo.DEFAULT_CONFIG.copy()
-    agent = ppo.PPOTrainer(config=config, env="CybORG")
-
-    train_steps = 1e6
-    total_steps = 0
-    while total_steps < train_steps:
-        results = agent.train()
-        print_results(results)
-        total_steps = results["timesteps_total"]
diff --git a/README.md b/README.md
index 6f853972..992e1322 100644
--- a/README.md
+++ b/README.md
@@ -14,61 +14,39 @@ pip install -e .
 
 
 ## Creating the environment
-Import the necessary classes:
-```
+
+Create a CybORG environment with the DroneSwarm Scenario that is used for CAGE Challenge 3:
+
+```python
 from CybORG import CybORG
-from CybORG.Agents import RedMeanderAgent, B_lineAgent, SleepAgent
-from CybORG.Agents.Wrappers import OpenAIGymWrapper, FixedFlatWrapper
-from CybORG.Agents.Wrappers.PettingZooParallelWrapper import PettingZooParallelWrapper
 from CybORG.Simulator.Scenarios.DroneSwarmScenarioGenerator import DroneSwarmScenarioGenerator
-```
 
-Create a CybORG environment with:
-```python
 sg = DroneSwarmScenarioGenerator()
 cyborg = CybORG(sg, 'sim')
 ```
 
- 
-
-
-To create an environment where the red agent has preexisting knowledge of the network and attempts to beeline to the Operational Server use:
-
- 
+The default_red_agent parameter of the DroneSwarmScenarioGenerator allows you to alter the red agent behaviour. Here is an example of a red agent that randomly selects a drone to exploit and seize control of:
 
 ```python
-red_agent = B_lineAgent()
-cyborg = CybORG(sg, 'sim', agents={'Red': red_agent})
-```
-To create an environment where the red agent meanders through the network and attempts to take control of all hosts in the network use:
-
- 
+from CybORG import CybORG
+from CybORG.Simulator.Scenarios.DroneSwarmScenarioGenerator import DroneSwarmScenarioGenerator
+from CybORG.Agents.SimpleAgents.DroneRedAgent import DroneRedAgent
 
-```python
-red_agent = RedMeanderAgent()
-cyborg = CybORG(sg, 'sim', agents={'Red': red_agent})
-```
-To create an environment where the red agent always takes the sleep action use:
-```python
-red_agent = SleepAgent()
-cyborg = CybORG(sg, 'sim', agents={'Red': red_agent})
+red_agent = DroneRedAgent
+sg = DroneSwarmScenarioGenerator(default_red_agent=red_agent)
+cyborg = CybORG(sg, 'sim')
 ```
 
- 
 
 ## Wrappers
 
- 
 
 To alter the interface with CybORG, [wrappers](CybORG/Agents/Wrappers) are avaliable.
 
  
 
-* [OpenAIGymWrapper](CybORG/Agents/Wrappers/OpenAIGymWrapper.py) - alters the interface to conform to the OpenAI Gym specification.
+* [OpenAIGymWrapper](CybORG/Agents/Wrappers/OpenAIGymWrapper.py) - alters the interface to conform to the OpenAI Gym specification. Requires the observation to be changed into a fixed size array.
 * [FixedFlatWrapper](CybORG/Agents/Wrappers/FixedFlatWrapper.py) - converts the observation from a dictionary format into a fixed size 1-dimensional vector of floats
-* [EnumActionWrapper](CybORG/Agents/Wrappers/EnumActionWrapper.py) - converts the action space into a single integer
-* [IntListToActionWrapper](CybORG/Agents/Wrappers/IntListToAction.py) - converts the action classes and parameters into a list of integers
-* [BlueTableWrapper](CybORG/Agents/Wrappers/BlueTableWrapper.py) - aggregates information from observations and converts into a 1-dimensional vector of integers
 * [PettingZooParallelWrapper](CybORG/Agents/Wrappers/PettingZooParallelWrapper.py) - alters the interface to conform to the PettingZoo Parallel specification
     * [ActionsCommsPettingZooParallelWrapper](CybORG/Agents/Wrappers/CommsPettingZooParallelWrapper.py) - Extends the PettingZoo Parallel interface to automatically communicate what action an agent performed to other agents
     * [ObsCommsPettingZooParallelWrapper](CybORG/Agents/Wrappers/CommsPettingZooParallelWrapper.py) - Extends the PettingZoo Parallel interface to automatically communicate elements of an agent's observation to other agents
@@ -81,6 +59,11 @@ To alter the interface with CybORG, [wrappers](CybORG/Agents/Wrappers) are avali
 The OpenAI Gym Wrapper allows interaction with a single external agent. The name of that external agent must be specified at the creation of the OpenAI Gym Wrapper.
 
 ```python
+from CybORG import CybORG
+from CybORG.Simulator.Scenarios.DroneSwarmScenarioGenerator import DroneSwarmScenarioGenerator
+from CybORG.Agents.Wrappers.OpenAIGymWrapper import OpenAIGymWrapper
+from CybORG.Agents.Wrappers.FixedFlatWrapper import FixedFlatWrapper
+
 sg = DroneSwarmScenarioGenerator()
 cyborg = CybORG(sg, 'sim')
 agent_name = 'blue_agent_0'
@@ -93,6 +76,10 @@ observation, reward, done, info = open_ai_wrapped_cyborg.step(0)
 The PettingZoo Parallel Wrapper allows multiple agents to interact with the environment simultaneously.
 
 ```python
+from CybORG import CybORG
+from CybORG.Simulator.Scenarios.DroneSwarmScenarioGenerator import DroneSwarmScenarioGenerator
+from CybORG.Agents.Wrappers.PettingZooParallelWrapper import PettingZooParallelWrapper
+
 sg = DroneSwarmScenarioGenerator()
 cyborg = CybORG(sg, 'sim')
 open_ai_wrapped_cyborg = PettingZooParallelWrapper(cyborg)
@@ -101,7 +88,19 @@ observations, rewards, dones, infos = open_ai_wrapped_cyborg.step({'blue_agent_0
 
 ### Ray/RLLib wrapper  
 ```python
-# TODO
+from CybORG import CybORG
+from CybORG.Simulator.Scenarios.DroneSwarmScenarioGenerator import DroneSwarmScenarioGenerator
+from CybORG.Agents.Wrappers.PettingZooParallelWrapper import PettingZooParallelWrapper
+from ray.rllib.env import ParallelPettingZooEnv
+from ray.tune import register_env
+
+def env_creator_CC3(env_config: dict):
+    sg = DroneSwarmScenarioGenerator()
+    cyborg = CybORG(scenario_generator=sg, environment='sim')
+    env = ParallelPettingZooEnv(PettingZooParallelWrapper(env=cyborg))
+    return env
+
+register_env(name="CC3", env_creator=env_creator_CC3)
 ```
  
 
@@ -122,10 +121,10 @@ def wrap(env):
 ```
 The agent under evaluation is defined on line 35. 
 To evaluate an agent, extend the [BaseAgent](CybORG/Agents/SimpleAgents/BaseAgent.py). 
-We have included the [BlueLoadAgent](CybORG/Agents/SimpleAgents/BlueLoadAgent.py) as an example of an agent that uses the stable_baselines3 library.
+We have included the [RandomAgent](CybORG/Agents/SimpleAgents/RandomAgent.py) as an example of an agent that performs random actions.
 ```
 # Change this line to load your agent
-agent = BlueLoadAgent()
+agents = {agent: RandomAgent() for agent in wrapped_cyborg.possible_agents}
 ```
 
 ## Additional Readings