From 37a13f4a76fecf161184bd528d094f9f27ceeee1 Mon Sep 17 00:00:00 2001
From: Alessandro Palmas <alessandropalmas.mail@gmail.com>
Date: Thu, 27 Jul 2023 22:40:44 -0400
Subject: [PATCH] Update engine_mock to use timer

---
 .vscode/settings.json                    |  3 +-
 diambra/arena/utils/engine_mock.py       | 77 ++++++++++++------------
 diambra/arena/utils/integratedGames.json | 18 ++++--
 examples/diambra_arena_gist.py           |  9 +++
 tests/env_exec_interface.py              |  7 ++-
 tests/man_test_random.py                 |  8 ++-
 tests/test_integration.py                |  2 +-
 tests/test_random.py                     |  1 +
 8 files changed, 74 insertions(+), 51 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 26822786..157c40ab 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -2,7 +2,8 @@
     "python.testing.cwd": "${workspaceFolder}/tests/",
     "python.testing.pytestArgs": [
         "--rootdir",
-        "${workspaceFolder}/tests"
+        "${workspaceFolder}/tests",
+        "-s"
     ],
     "python.testing.unittestEnabled": false,
     "python.testing.pytestEnabled": true
diff --git a/diambra/arena/utils/engine_mock.py b/diambra/arena/utils/engine_mock.py
index 33cd74f6..ab5164fb 100644
--- a/diambra/arena/utils/engine_mock.py
+++ b/diambra/arena/utils/engine_mock.py
@@ -6,11 +6,10 @@
 
 class DiambraEngineMock:
 
-    def __init__(self, steps_per_round=20, fps=1000):
+    def __init__(self, fps=1000):
 
         # Game features
         self.game_data = None
-        self.steps_per_round = steps_per_round
         self.fps = fps
 
         # Random seed
@@ -18,10 +17,10 @@ def __init__(self, steps_per_round=20, fps=1000):
         random.seed(time_dep_seed)
 
         # Class state variables initialization
-        self.n_steps = 0
+        self.timer = 0
         self.n_rounds_won = 0
         self.n_rounds_lost = 0
-        self.n_stages = 0
+        self.current_stage_number = 0
         self.n_continue = 0
         self.side_p1 = 0
         self.side_p2 = 1
@@ -42,22 +41,22 @@ def generate_ram_states(self):
             self.ram_states[k][3] = random.choices(range(v[1], v[2] + 1))[0]
 
         # Setting meaningful values to ram states
-        self.ram_states["stage"][3] = self.n_stages + 1
+        self.ram_states["stage"][3] = self.current_stage_number + 1
         self.ram_states["SideP1"][3] = self.side_p1
         self.ram_states["SideP2"][3] = self.side_p2
         self.ram_states["WinsP1"][3] = self.n_rounds_won
         self.ram_states["WinsP2"][3] = self.n_rounds_lost
 
-        self.ram_states["CharP1"][3] = self.char_p1
-        self.ram_states["CharP2"][3] = self.char_p2
+        values = [[self.char_p1, self.char_p2], [self.health_p1, self.health_p2]]
 
-        if self.game_data["number_of_chars_per_round"] == 1:
-            self.ram_states["HealthP1"][3] = self.health_p1
-            self.ram_states["HealthP2"][3] = self.health_p2
-        else:
-            for idx in range(self.game_data["number_of_chars_per_round"]):
-                self.ram_states["Health{}P1".format(idx+1)][3] = self.health_p1
-                self.ram_states["Health{}P2".format(idx+1)][3] = self.health_p2
+        for idx, state in enumerate(["Char", "Health"]):
+            for jdx, text in enumerate(["", "1", "2", "3"]):
+                for kdx, player in enumerate(["P1", "P2"]):
+                    key = "{}{}{}".format(state, text, player)
+                    if (key in self.ram_states):
+                        self.ram_states[key][3] = values[idx][kdx]
+
+        self.ram_states["timer"][3] = int(self.timer)
 
     # Send env settings, retrieve env info and int variables list [pb low level]
     def _mock_env_init(self, env_settings_pb):
@@ -91,7 +90,8 @@ def _mock_env_init(self, env_settings_pb):
 
         self.continue_per_episode = - int(self.settings.continue_game) if self.settings.continue_game < 0.0 else int(self.settings.continue_game*10)
         self.delta_health = self.game_data["health"][1] - self.game_data["health"][0]
-        self.base_hit = int(self.delta_health * (self.game_data["n_actions"][0] + self.game_data["n_actions"][0]) / (self.game_data["n_actions"][1] * (self.steps_per_round - 1)))
+        self.base_hit = int(self.delta_health * self.game_data["n_actions"][1] /
+                              ((self.game_data["n_actions"][0] + self.game_data["n_actions"][2]) * (self.game_data["ram_states"]["timer"][2] / self.settings.step_ratio)))
 
         # Generate the ram states map
         self.ram_states = self.game_data["ram_states"]
@@ -137,7 +137,7 @@ def _mock_env_init(self, env_settings_pb):
         return response
 
     def generate_frame(self):
-        frame = np.ones((self.frame_shape), dtype=np.int8) * ((self.n_stages * self.game_data["rounds_per_stage"] + self.n_steps) % 255)
+        frame = np.ones((self.frame_shape), dtype=np.int8) * ((self.current_stage_number * self.game_data["rounds_per_stage"] + int(self.timer)) % 255)
         return frame.tobytes()
 
     # Set delta health
@@ -147,10 +147,9 @@ def set_perfect_chance(self):
     # Reset game state
     def reset_state(self):
         # Reset class state
-        self.n_steps = 0
         self.n_rounds_won = 0
         self.n_rounds_lost = 0
-        self.n_stages = 0
+        self.current_stage_number = 0
         self.n_continue = 0
 
         # Actions
@@ -179,6 +178,7 @@ def reset_state(self):
         self.side_p2 = 1
         self.health_p1 = self.game_data["health"][1]
         self.health_p2 = self.game_data["health"][1]
+        self.timer = self.game_data["ram_states"]["timer"][2]
 
         self.reward = 0
 
@@ -195,14 +195,14 @@ def reset_state(self):
                 self.char_p2 = self.game_data["char_list"].index(self.settings.characters.p2[0])
 
         elif self.player == "P1":
-            self.char_p2 = self.n_stages
+            self.char_p2 = self.current_stage_number
             if (self.settings.characters.p1[0] == "Random"):
                 self.char_p1 = random.choices(range(len(self.game_data["char_list"])))[0]
             else:
                 self.char_p1 = self.game_data["char_list"].index(self.settings.characters.p1[0])
 
         else:
-            self.char_p1 = self.n_stages
+            self.char_p1 = self.current_stage_number
             if (self.settings.characters.p2[0] == "Random"):
                 self.char_p2 = random.choices(range(len(self.game_data["char_list"])))[0]
             else:
@@ -212,7 +212,7 @@ def reset_state(self):
     def new_game_state(self, mov_p1=0, att_p1=0, mov_p2=0, att_p2=0):
 
         # Sleep to simulate computer time elapsed
-        time.sleep(1.0/self.fps)
+        time.sleep(1.0/(self.settings.step_ratio * self.fps))
 
         # Actions
         self.mov_p1 = mov_p1
@@ -227,30 +227,34 @@ def new_game_state(self, mov_p1=0, att_p1=0, mov_p2=0, att_p2=0):
         self.episode_done_ = False
         self.env_done_ = False
 
-        self.n_steps += 1
+        self.timer -= (1.0 * self.settings.step_ratio) / 60.0
 
         starting_health_p1 = self.health_p1
         starting_health_p2 = self.health_p2
 
         # Health evolution
-        hit_prob = self.base_round_winning_probability ** self.n_stages
+        hit_prob = self.base_round_winning_probability ** self.current_stage_number
 
-        if self.player == "P2":
-            if not self.perfect:
-                self.health_p2 -= random.choices([self.base_hit, 0], [1.0 - hit_prob, hit_prob])[0]
+        if self.player == "P1P2":
             if att_p1 != 0:
+                self.health_p2 -= random.choices([self.base_hit, 0], [hit_prob, 1.0 - hit_prob])[0]
+            if att_p2 != 0:
                 self.health_p1 -= random.choices([self.base_hit, 0], [hit_prob, 1.0 - hit_prob])[0]
-        else:
-            self.health_p1 -= random.choices([self.base_hit, 0], [1.0 - hit_prob, hit_prob])[0]
+        elif self.player == "P1":
             if att_p1 != 0:
                 self.health_p2 -= random.choices([self.base_hit, 0], [hit_prob, 1.0 - hit_prob])[0]
-            if (self.player == "P1P2" and att_p2 == 0) or self.perfect:
-                self.health_p1 = starting_health_p1
+            if not self.perfect:
+                self.health_p1 -= random.choices([self.base_hit, 0], [1.0 - hit_prob, hit_prob])[0]
+        else:
+            if att_p1 != 0:
+                self.health_p1 -= random.choices([self.base_hit, 0], [hit_prob, 1.0 - hit_prob])[0]
+            if not self.perfect:
+                self.health_p2 -= random.choices([self.base_hit, 0], [1.0 - hit_prob, hit_prob])[0]
 
         self.health_p1 = max(self.health_p1, self.game_data["health"][0])
         self.health_p2 = max(self.health_p2, self.game_data["health"][0])
 
-        if (min(self.health_p1, self.health_p2) == self.game_data["health"][0]) or ((self.n_steps % self.steps_per_round) == 0):
+        if (min(self.health_p1, self.health_p2) == self.game_data["health"][0]) or (self.timer <= 0):
             self.round_done_ = True
 
             if self.health_p1 > self.health_p2:
@@ -280,16 +284,16 @@ def new_game_state(self, mov_p1=0, att_p1=0, mov_p2=0, att_p2=0):
 
         if self.n_rounds_won == self.game_data["rounds_per_stage"]:
             self.stage_done_ = True
-            self.n_stages += 1
+            self.current_stage_number += 1
             self.n_rounds_won = 0
             self.n_rounds_lost = 0
             if self.player == "P1P2":
                 self.game_done_ = True
                 self.episode_done_ = True
             elif self.player == "P1":
-                self.char_p2 = self.n_stages
+                self.char_p2 = self.current_stage_number
             else:
-                self.char_p1 = self.n_stages
+                self.char_p1 = self.current_stage_number
 
         if self.n_rounds_lost == self.game_data["rounds_per_stage"]:
             self.game_done_ = True
@@ -300,7 +304,7 @@ def new_game_state(self, mov_p1=0, att_p1=0, mov_p2=0, att_p2=0):
                 self.n_rounds_won = 0
                 self.n_rounds_lost = 0
 
-        if self.n_stages == self.game_data["stages_per_game"]:
+        if self.current_stage_number == self.game_data["stages_per_game"]:
             self.game_done_ = True
             self.episode_done_ = True
 
@@ -312,12 +316,11 @@ def new_game_state(self, mov_p1=0, att_p1=0, mov_p2=0, att_p2=0):
 
         if np.any([self.round_done_, self.stage_done_, self.game_done_]):
 
-            self.n_steps = 0
-
             self.side_p1 = 0
             self.side_p2 = 1
             self.health_p1 = self.game_data["health"][1]
             self.health_p2 = self.game_data["health"][1]
+            self.timer = self.game_data["ram_states"]["timer"][2]
 
             # Set delta healths
             self.set_perfect_chance()
diff --git a/diambra/arena/utils/integratedGames.json b/diambra/arena/utils/integratedGames.json
index eeff1f30..df5b5db6 100644
--- a/diambra/arena/utils/integratedGames.json
+++ b/diambra/arena/utils/integratedGames.json
@@ -39,7 +39,8 @@
                 "CharP1": [2, 0, 10],
                 "CharP2": [2, 0, 10],
                 "HealthP1": [1, 0, 208],
-                "HealthP2": [1, 0, 208]
+                "HealthP2": [1, 0, 208],
+                "timer": [1, 0, 40]
             },
             "cfg": {"H": "But6", "P": "But1", "K": "But2"}
         },
@@ -99,7 +100,8 @@
                 "SuperMaxCountP1": [1, 1, 3],
                 "SuperMaxCountP2": [1, 1, 3],
                 "HealthP1": [1, -1, 160],
-                "HealthP2": [1, -1, 160]
+                "HealthP2": [1, -1, 160],
+                "timer": [1, 0, 99]
             },
             "cfg": {"LP": "But4", "MP": "But1", "HP": "But5", "LK": "But3", "MK": "But2", "HK": "But6"}
         },
@@ -162,7 +164,8 @@
                 "ActiveCharP1": [0, 0, 1],
                 "ActiveCharP2": [0, 0, 1],
                 "BarStatusP1": [2, 0, 4],
-                "BarStatusP2": [2, 0, 4]
+                "BarStatusP2": [2, 0, 4],
+                "timer": [1, 0, 60]
             },
             "cfg": {"LP": "But4", "RP": "But1", "LK": "But3", "RK": "But2", "TAG": "But6"}
         },
@@ -211,7 +214,8 @@
                 "AggressorBarP1": [1, 0, 48],
                 "AggressorBarP2": [1, 0, 48],
                 "HealthP1": [1, 0, 166],
-                "HealthP2": [1, 0, 166]
+                "HealthP2": [1, 0, 166],
+                "timer": [1, 0, 100]
             },
             "cfg": {"HP": "But1", "HK": "But2", "LK": "But3",  "LP": "But4", "RUN": "But5", "BLK": "But6"}
         },
@@ -275,7 +279,8 @@
                 "PowerBarP1": [1, 0, 64],
                 "PowerBarP2": [1, 0, 64],
                 "HealthP1": [1, 0, 125],
-                "HealthP2": [1, 0, 125]
+                "HealthP2": [1, 0, 125],
+                "timer": [1, 0, 60]
             },
             "cfg": {"WS": "But1", "MS": "But2", "K": "But3", "M": "But4"}
         },
@@ -337,7 +342,8 @@
                 "WinsP1": [1, 0, 3],
                 "WinsP2": [1, 0, 3],
                 "BarTypeP1": [2, 0, 7],
-                "BarTypeP2": [2, 0, 7]
+                "BarTypeP2": [2, 0, 7],
+                "timer": [1, 0, 60]
             },
             "cfg": {"WP": "But1", "WK": "But2", "SP": "But3", "SK": "But4"}
         }
diff --git a/examples/diambra_arena_gist.py b/examples/diambra_arena_gist.py
index 63ea3fd3..0529f4c6 100755
--- a/examples/diambra_arena_gist.py
+++ b/examples/diambra_arena_gist.py
@@ -3,18 +3,27 @@
 
 if __name__ == '__main__':
 
+    # Environment creation
     env = diambra.arena.make("doapp")
+
+    # Environment reset
     observation = env.reset()
 
+    # Agent-Environment interaction loop
     while True:
+        # (Optional) Environment rendering
         env.render()
 
+        # Action random sampling
         actions = env.action_space.sample()
 
+        # Environment stepping
         observation, reward, done, info = env.step(actions)
 
+        # Episode end (Done condition) check
         if done:
             observation = env.reset()
             break
 
+    # Environment shutdown
     env.close()
diff --git a/tests/env_exec_interface.py b/tests/env_exec_interface.py
index 0876d27a..d834a474 100755
--- a/tests/env_exec_interface.py
+++ b/tests/env_exec_interface.py
@@ -8,7 +8,8 @@
 default_args = {
     "interactive_viz": False,
     "n_episodes": 1,
-    "no_action": False
+    "no_action": False,
+    "render": False
 }
 
 def env_exec(settings, wrappers_settings, traj_rec_settings, args=default_args):
@@ -92,7 +93,7 @@ def env_exec(settings, wrappers_settings, traj_rec_settings, args=default_args):
             print("done =", done)
             for k, v in info.items():
                 print("info[\"{}\"] = {}".format(k, v))
-            env.show_obs(observation, wait_key)
+            env.show_obs(observation, wait_key, args["render"])
             print("--")
             print("Current Cumulative Reward =", cumulative_ep_rew)
 
@@ -107,7 +108,7 @@ def env_exec(settings, wrappers_settings, traj_rec_settings, args=default_args):
                 cumulative_ep_rew = 0.0
 
                 observation = env.reset()
-                env.show_obs(observation, wait_key)
+                env.show_obs(observation, wait_key, args["render"])
 
             if np.any([info["round_done"], info["stage_done"], info["game_done"], info["ep_done"]]):
 
diff --git a/tests/man_test_random.py b/tests/man_test_random.py
index 8aa2a918..4202561e 100644
--- a/tests/man_test_random.py
+++ b/tests/man_test_random.py
@@ -21,11 +21,12 @@
     parser.add_argument("--nEpisodes", type=int, default=1, help="Number of episodes")
     parser.add_argument("--continueGame", type=float, default=-1.0, help="ContinueGame flag (-inf,+1.0]")
     parser.add_argument("--actionSpace", type=str, default="discrete", help="discrete/multi_discrete")
-    parser.add_argument("--attButComb", type=bool, default=False, help="Use attack button combinations (0=F)/1=T")
+    parser.add_argument("--attButComb", type=bool, default=False, help="Use attack button combinations")
     parser.add_argument("--noAction", type=int, default=0, help="If to use no action policy (0=False)")
     parser.add_argument("--recordTraj", type=bool, default=False, help="If to record trajectories")
     parser.add_argument("--hardcore", type=bool, default=False, help="Hard core mode")
-    parser.add_argument("--interactiveViz", type=int, default=0, help="Interactive Visualization (0=False)")
+    parser.add_argument("--interactiveViz", type=bool, default=False, help="Interactive Visualization (False)")
+    parser.add_argument("--render", type=bool, default=True, help="Render frame (False)")
     parser.add_argument("--envAddress", type=str, default="", help="diambraEngine Address")
     parser.add_argument("--wrappers", type=bool, default=False, help="If to use wrappers")
     opt = parser.parse_args()
@@ -89,8 +90,9 @@
 
     # Args
     args = {}
-    args["interactive_viz"] = bool(opt.interactiveViz)
+    args["interactive_viz"] = opt.interactiveViz
     args["no_action"] = True if opt.noAction == 1 else False
     args["n_episodes"] = opt.nEpisodes
+    args["render"] = opt.render
 
     env_exec(settings, wrappers_settings, traj_rec_settings, args)
diff --git a/tests/test_integration.py b/tests/test_integration.py
index b42bcf5a..d2eab600 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -19,7 +19,7 @@ def func(game_id, player, continue_game, action_space, attack_buttons_combinatio
     args = {}
     args["interactive_viz"] = False
     args["n_episodes"] = 1
-
+    args["render"] = False
     args["no_action"] = random.choices([True, False], [no_action_probability, 1.0 - no_action_probability])[0]
 
     try:
diff --git a/tests/test_random.py b/tests/test_random.py
index 4d31eb26..78473709 100755
--- a/tests/test_random.py
+++ b/tests/test_random.py
@@ -22,6 +22,7 @@ def func(player, continue_game, action_space, attack_buttons_combination, frame_
     args["interactive_viz"] = False
     args["n_episodes"] = 1
     args["no_action"] = random.choices([True, False], [no_action_prob, 1.0 - no_action_prob])[0]
+    args["render"] = False
 
     diambra_engine_mock = DiambraEngineMock()