updating RLLib Wrapper and associated documentation

cage-challenge · Oct 13, 2022 · a2d03f9 · a2d03f9
1 parent e562614
commit a2d03f9
Show file tree

Hide file tree

Showing 5 changed files with 115 additions and 138 deletions.
diff --git a/CybORG/Agents/Wrappers/ChallengeWrapper.py b/CybORG/Agents/Wrappers/ChallengeWrapper.py
@@ -15,7 +15,6 @@ def __init__(self, agent_name: str, env,
             raise ValueError('Invalid Agent Name')
 
         env = table_wrapper(env, output_mode='vector')
-        env = EnumActionWrapper(env)
         env = OpenAIGymWrapper(agent_name=agent_name, env=env)
 
         self.env = env

diff --git a/CybORG/Agents/Wrappers/RLLIBWrapper.py b/CybORG/Agents/Wrappers/RLLIBWrapper.py
diff --git a/CybORG/Agents/Wrappers/RLLibWrapper.py b/CybORG/Agents/Wrappers/RLLibWrapper.py
@@ -0,0 +1,79 @@
+import inspect
+import numpy as np
+from ray.rllib.agents import ppo
+from ray.rllib.env import ParallelPettingZooEnv
+from ray.tune import register_env
+from CybORG import CybORG
+from CybORG.Agents import B_lineAgent, GreenAgent
+from CybORG.Agents.Wrappers import ChallengeWrapper
+
+from CybORG.Agents.Wrappers.PettingZooParallelWrapper import PettingZooParallelWrapper
+from CybORG.Simulator.Scenarios import FileReaderScenarioGenerator, DroneSwarmScenarioGenerator
+
+
+class RLLibWrapper(ChallengeWrapper):
+    def init(self, agent_name, env, reward_threshold=None, max_steps=None):
+        super().__init__(agent_name, env, reward_threshold, max_steps)
+
+    def step(self, action=None):
+        obs, reward, done, info = self.env.step(action=action)
+        self.step_counter += 1
+        if self.max_steps is not None and self.step_counter >= self.max_steps:
+            done = True
+        return np.float32(obs), reward, done, info
+
+    def reset(self):
+        self.step_counter = 0
+        obs = self.env.reset()
+        return np.float32(obs)
+
+
+def env_creator_CC1(env_config: dict):
+    path = str(inspect.getfile(CybORG))
+    path = path[:-7] + f'/Simulator/Scenarios/scenario_files/Scenario1b.yaml'
+    sg = FileReaderScenarioGenerator(path)
+    agents = {"Red": B_lineAgent(), "Green": GreenAgent()}
+    cyborg = CybORG(scenario_generator=sg, environment='sim', agents=agents)
+    env = RLLibWrapper(env=cyborg, agent_name="Blue", max_steps=100)
+    return env
+
+
+def env_creator_CC2(env_config: dict):
+    path = str(inspect.getfile(CybORG))
+    path = path[:-7] + f'/Simulator/Scenarios/scenario_files/Scenario2.yaml'
+    sg = FileReaderScenarioGenerator(path)
+    agents = {"Red": B_lineAgent(), "Green": GreenAgent()}
+    cyborg = CybORG(scenario_generator=sg, environment='sim', agents=agents)
+    env = RLLibWrapper(env=cyborg, agent_name="Blue", max_steps=100)
+    return env
+
+
+def env_creator_CC3(env_config: dict):
+    sg = DroneSwarmScenarioGenerator()
+    cyborg = CybORG(scenario_generator=sg, environment='sim')
+    env = ParallelPettingZooEnv(PettingZooParallelWrapper(env=cyborg))
+    return env
+
+
+def print_results(results_dict):
+    train_iter = results_dict["training_iteration"]
+    r_mean = results_dict["episode_reward_mean"]
+    r_max = results_dict["episode_reward_max"]
+    r_min = results_dict["episode_reward_min"]
+    print(f"{train_iter:4d} \tr_mean: {r_mean:.1f} \tr_max: {r_max:.1f} \tr_min: {r_min: .1f}")
+
+
+if __name__ == "__main__":
+    register_env(name="CC1", env_creator=env_creator_CC1)
+    register_env(name="CC2", env_creator=env_creator_CC2)
+    register_env(name="CC3", env_creator=env_creator_CC3)
+    config = ppo.DEFAULT_CONFIG.copy()
+    for env in ['CC1', 'CC2', 'CC3']:
+        agent = ppo.PPOTrainer(config=config, env=env)
+
+        train_steps = 1e2
+        total_steps = 0
+        while total_steps < train_steps:
+            results = agent.train()
+            print_results(results)
+            total_steps = results["timesteps_total"]
diff --git a/CybORG/Agents/Wrappers/rl_libWrapper.py b/CybORG/Agents/Wrappers/rl_libWrapper.py
diff --git a/README.md b/README.md
@@ -14,61 +14,39 @@ pip install -e .
 
 
 ## Creating the environment
-Import the necessary classes:
-```
+
+Create a CybORG environment with the DroneSwarm Scenario that is used for CAGE Challenge 3:
+
+```python
 from CybORG import CybORG
-from CybORG.Agents import RedMeanderAgent, B_lineAgent, SleepAgent
-from CybORG.Agents.Wrappers import OpenAIGymWrapper, FixedFlatWrapper
-from CybORG.Agents.Wrappers.PettingZooParallelWrapper import PettingZooParallelWrapper
 from CybORG.Simulator.Scenarios.DroneSwarmScenarioGenerator import DroneSwarmScenarioGenerator
-```
 
-Create a CybORG environment with:
-```python
 sg = DroneSwarmScenarioGenerator()
 cyborg = CybORG(sg, 'sim')
 ```
 
-
-
-
-To create an environment where the red agent has preexisting knowledge of the network and attempts to beeline to the Operational Server use:
-
-
+The default_red_agent parameter of the DroneSwarmScenarioGenerator allows you to alter the red agent behaviour. Here is an example of a red agent that randomly selects a drone to exploit and seize control of:
 
 ```python
-red_agent = B_lineAgent()
-cyborg = CybORG(sg, 'sim', agents={'Red': red_agent})
-```
-To create an environment where the red agent meanders through the network and attempts to take control of all hosts in the network use:
-
-
+from CybORG import CybORG
+from CybORG.Simulator.Scenarios.DroneSwarmScenarioGenerator import DroneSwarmScenarioGenerator
+from CybORG.Agents.SimpleAgents.DroneRedAgent import DroneRedAgent
 
-```python
-red_agent = RedMeanderAgent()
-cyborg = CybORG(sg, 'sim', agents={'Red': red_agent})
-```
-To create an environment where the red agent always takes the sleep action use:
-```python
-red_agent = SleepAgent()
-cyborg = CybORG(sg, 'sim', agents={'Red': red_agent})
+red_agent = DroneRedAgent
+sg = DroneSwarmScenarioGenerator(default_red_agent=red_agent)
+cyborg = CybORG(sg, 'sim')
 ```
 
-
 
 ## Wrappers
 
-
 
 To alter the interface with CybORG, [wrappers](CybORG/Agents/Wrappers) are avaliable.
 
 
 
-* [OpenAIGymWrapper](CybORG/Agents/Wrappers/OpenAIGymWrapper.py) - alters the interface to conform to the OpenAI Gym specification.
+* [OpenAIGymWrapper](CybORG/Agents/Wrappers/OpenAIGymWrapper.py) - alters the interface to conform to the OpenAI Gym specification. Requires the observation to be changed into a fixed size array.
 * [FixedFlatWrapper](CybORG/Agents/Wrappers/FixedFlatWrapper.py) - converts the observation from a dictionary format into a fixed size 1-dimensional vector of floats
-* [EnumActionWrapper](CybORG/Agents/Wrappers/EnumActionWrapper.py) - converts the action space into a single integer
-* [IntListToActionWrapper](CybORG/Agents/Wrappers/IntListToAction.py) - converts the action classes and parameters into a list of integers
-* [BlueTableWrapper](CybORG/Agents/Wrappers/BlueTableWrapper.py) - aggregates information from observations and converts into a 1-dimensional vector of integers
 * [PettingZooParallelWrapper](CybORG/Agents/Wrappers/PettingZooParallelWrapper.py) - alters the interface to conform to the PettingZoo Parallel specification
     * [ActionsCommsPettingZooParallelWrapper](CybORG/Agents/Wrappers/CommsPettingZooParallelWrapper.py) - Extends the PettingZoo Parallel interface to automatically communicate what action an agent performed to other agents
     * [ObsCommsPettingZooParallelWrapper](CybORG/Agents/Wrappers/CommsPettingZooParallelWrapper.py) - Extends the PettingZoo Parallel interface to automatically communicate elements of an agent's observation to other agents
@@ -81,6 +59,11 @@ To alter the interface with CybORG, [wrappers](CybORG/Agents/Wrappers) are avali
 The OpenAI Gym Wrapper allows interaction with a single external agent. The name of that external agent must be specified at the creation of the OpenAI Gym Wrapper.
 
 ```python
+from CybORG import CybORG
+from CybORG.Simulator.Scenarios.DroneSwarmScenarioGenerator import DroneSwarmScenarioGenerator
+from CybORG.Agents.Wrappers.OpenAIGymWrapper import OpenAIGymWrapper
+from CybORG.Agents.Wrappers.FixedFlatWrapper import FixedFlatWrapper
+
 sg = DroneSwarmScenarioGenerator()
 cyborg = CybORG(sg, 'sim')
 agent_name = 'blue_agent_0'
@@ -93,6 +76,10 @@ observation, reward, done, info = open_ai_wrapped_cyborg.step(0)
 The PettingZoo Parallel Wrapper allows multiple agents to interact with the environment simultaneously.
 
 ```python
+from CybORG import CybORG
+from CybORG.Simulator.Scenarios.DroneSwarmScenarioGenerator import DroneSwarmScenarioGenerator
+from CybORG.Agents.Wrappers.PettingZooParallelWrapper import PettingZooParallelWrapper
+
 sg = DroneSwarmScenarioGenerator()
 cyborg = CybORG(sg, 'sim')
 open_ai_wrapped_cyborg = PettingZooParallelWrapper(cyborg)
@@ -101,7 +88,19 @@ observations, rewards, dones, infos = open_ai_wrapped_cyborg.step({'blue_agent_0
 
 ### Ray/RLLib wrapper  
 ```python
-# TODO
+from CybORG import CybORG
+from CybORG.Simulator.Scenarios.DroneSwarmScenarioGenerator import DroneSwarmScenarioGenerator
+from CybORG.Agents.Wrappers.PettingZooParallelWrapper import PettingZooParallelWrapper
+from ray.rllib.env import ParallelPettingZooEnv
+from ray.tune import register_env
+
+def env_creator_CC3(env_config: dict):
+    sg = DroneSwarmScenarioGenerator()
+    cyborg = CybORG(scenario_generator=sg, environment='sim')
+    env = ParallelPettingZooEnv(PettingZooParallelWrapper(env=cyborg))
+    return env
+
+register_env(name="CC3", env_creator=env_creator_CC3)
 ```
 
 
@@ -122,10 +121,10 @@ def wrap(env):
 ```
 The agent under evaluation is defined on line 35. 
 To evaluate an agent, extend the [BaseAgent](CybORG/Agents/SimpleAgents/BaseAgent.py). 
-We have included the [BlueLoadAgent](CybORG/Agents/SimpleAgents/BlueLoadAgent.py) as an example of an agent that uses the stable_baselines3 library.
+We have included the [RandomAgent](CybORG/Agents/SimpleAgents/RandomAgent.py) as an example of an agent that performs random actions.
 ```
 # Change this line to load your agent
-agent = BlueLoadAgent()
+agents = {agent: RandomAgent() for agent in wrapped_cyborg.possible_agents}
 ```
 
 ## Additional Readings