From 37a13f4a76fecf161184bd528d094f9f27ceeee1 Mon Sep 17 00:00:00 2001 From: Alessandro Palmas Date: Thu, 27 Jul 2023 22:40:44 -0400 Subject: [PATCH] Update engine_mock to use timer --- .vscode/settings.json | 3 +- diambra/arena/utils/engine_mock.py | 77 ++++++++++++------------ diambra/arena/utils/integratedGames.json | 18 ++++-- examples/diambra_arena_gist.py | 9 +++ tests/env_exec_interface.py | 7 ++- tests/man_test_random.py | 8 ++- tests/test_integration.py | 2 +- tests/test_random.py | 1 + 8 files changed, 74 insertions(+), 51 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 26822786..157c40ab 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,7 +2,8 @@ "python.testing.cwd": "${workspaceFolder}/tests/", "python.testing.pytestArgs": [ "--rootdir", - "${workspaceFolder}/tests" + "${workspaceFolder}/tests", + "-s" ], "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true diff --git a/diambra/arena/utils/engine_mock.py b/diambra/arena/utils/engine_mock.py index 33cd74f6..ab5164fb 100644 --- a/diambra/arena/utils/engine_mock.py +++ b/diambra/arena/utils/engine_mock.py @@ -6,11 +6,10 @@ class DiambraEngineMock: - def __init__(self, steps_per_round=20, fps=1000): + def __init__(self, fps=1000): # Game features self.game_data = None - self.steps_per_round = steps_per_round self.fps = fps # Random seed @@ -18,10 +17,10 @@ def __init__(self, steps_per_round=20, fps=1000): random.seed(time_dep_seed) # Class state variables initialization - self.n_steps = 0 + self.timer = 0 self.n_rounds_won = 0 self.n_rounds_lost = 0 - self.n_stages = 0 + self.current_stage_number = 0 self.n_continue = 0 self.side_p1 = 0 self.side_p2 = 1 @@ -42,22 +41,22 @@ def generate_ram_states(self): self.ram_states[k][3] = random.choices(range(v[1], v[2] + 1))[0] # Setting meaningful values to ram states - self.ram_states["stage"][3] = self.n_stages + 1 + self.ram_states["stage"][3] = self.current_stage_number + 1 self.ram_states["SideP1"][3] = self.side_p1 self.ram_states["SideP2"][3] = self.side_p2 self.ram_states["WinsP1"][3] = self.n_rounds_won self.ram_states["WinsP2"][3] = self.n_rounds_lost - self.ram_states["CharP1"][3] = self.char_p1 - self.ram_states["CharP2"][3] = self.char_p2 + values = [[self.char_p1, self.char_p2], [self.health_p1, self.health_p2]] - if self.game_data["number_of_chars_per_round"] == 1: - self.ram_states["HealthP1"][3] = self.health_p1 - self.ram_states["HealthP2"][3] = self.health_p2 - else: - for idx in range(self.game_data["number_of_chars_per_round"]): - self.ram_states["Health{}P1".format(idx+1)][3] = self.health_p1 - self.ram_states["Health{}P2".format(idx+1)][3] = self.health_p2 + for idx, state in enumerate(["Char", "Health"]): + for jdx, text in enumerate(["", "1", "2", "3"]): + for kdx, player in enumerate(["P1", "P2"]): + key = "{}{}{}".format(state, text, player) + if (key in self.ram_states): + self.ram_states[key][3] = values[idx][kdx] + + self.ram_states["timer"][3] = int(self.timer) # Send env settings, retrieve env info and int variables list [pb low level] def _mock_env_init(self, env_settings_pb): @@ -91,7 +90,8 @@ def _mock_env_init(self, env_settings_pb): self.continue_per_episode = - int(self.settings.continue_game) if self.settings.continue_game < 0.0 else int(self.settings.continue_game*10) self.delta_health = self.game_data["health"][1] - self.game_data["health"][0] - self.base_hit = int(self.delta_health * (self.game_data["n_actions"][0] + self.game_data["n_actions"][0]) / (self.game_data["n_actions"][1] * (self.steps_per_round - 1))) + self.base_hit = int(self.delta_health * self.game_data["n_actions"][1] / + ((self.game_data["n_actions"][0] + self.game_data["n_actions"][2]) * (self.game_data["ram_states"]["timer"][2] / self.settings.step_ratio))) # Generate the ram states map self.ram_states = self.game_data["ram_states"] @@ -137,7 +137,7 @@ def _mock_env_init(self, env_settings_pb): return response def generate_frame(self): - frame = np.ones((self.frame_shape), dtype=np.int8) * ((self.n_stages * self.game_data["rounds_per_stage"] + self.n_steps) % 255) + frame = np.ones((self.frame_shape), dtype=np.int8) * ((self.current_stage_number * self.game_data["rounds_per_stage"] + int(self.timer)) % 255) return frame.tobytes() # Set delta health @@ -147,10 +147,9 @@ def set_perfect_chance(self): # Reset game state def reset_state(self): # Reset class state - self.n_steps = 0 self.n_rounds_won = 0 self.n_rounds_lost = 0 - self.n_stages = 0 + self.current_stage_number = 0 self.n_continue = 0 # Actions @@ -179,6 +178,7 @@ def reset_state(self): self.side_p2 = 1 self.health_p1 = self.game_data["health"][1] self.health_p2 = self.game_data["health"][1] + self.timer = self.game_data["ram_states"]["timer"][2] self.reward = 0 @@ -195,14 +195,14 @@ def reset_state(self): self.char_p2 = self.game_data["char_list"].index(self.settings.characters.p2[0]) elif self.player == "P1": - self.char_p2 = self.n_stages + self.char_p2 = self.current_stage_number if (self.settings.characters.p1[0] == "Random"): self.char_p1 = random.choices(range(len(self.game_data["char_list"])))[0] else: self.char_p1 = self.game_data["char_list"].index(self.settings.characters.p1[0]) else: - self.char_p1 = self.n_stages + self.char_p1 = self.current_stage_number if (self.settings.characters.p2[0] == "Random"): self.char_p2 = random.choices(range(len(self.game_data["char_list"])))[0] else: @@ -212,7 +212,7 @@ def reset_state(self): def new_game_state(self, mov_p1=0, att_p1=0, mov_p2=0, att_p2=0): # Sleep to simulate computer time elapsed - time.sleep(1.0/self.fps) + time.sleep(1.0/(self.settings.step_ratio * self.fps)) # Actions self.mov_p1 = mov_p1 @@ -227,30 +227,34 @@ def new_game_state(self, mov_p1=0, att_p1=0, mov_p2=0, att_p2=0): self.episode_done_ = False self.env_done_ = False - self.n_steps += 1 + self.timer -= (1.0 * self.settings.step_ratio) / 60.0 starting_health_p1 = self.health_p1 starting_health_p2 = self.health_p2 # Health evolution - hit_prob = self.base_round_winning_probability ** self.n_stages + hit_prob = self.base_round_winning_probability ** self.current_stage_number - if self.player == "P2": - if not self.perfect: - self.health_p2 -= random.choices([self.base_hit, 0], [1.0 - hit_prob, hit_prob])[0] + if self.player == "P1P2": if att_p1 != 0: + self.health_p2 -= random.choices([self.base_hit, 0], [hit_prob, 1.0 - hit_prob])[0] + if att_p2 != 0: self.health_p1 -= random.choices([self.base_hit, 0], [hit_prob, 1.0 - hit_prob])[0] - else: - self.health_p1 -= random.choices([self.base_hit, 0], [1.0 - hit_prob, hit_prob])[0] + elif self.player == "P1": if att_p1 != 0: self.health_p2 -= random.choices([self.base_hit, 0], [hit_prob, 1.0 - hit_prob])[0] - if (self.player == "P1P2" and att_p2 == 0) or self.perfect: - self.health_p1 = starting_health_p1 + if not self.perfect: + self.health_p1 -= random.choices([self.base_hit, 0], [1.0 - hit_prob, hit_prob])[0] + else: + if att_p1 != 0: + self.health_p1 -= random.choices([self.base_hit, 0], [hit_prob, 1.0 - hit_prob])[0] + if not self.perfect: + self.health_p2 -= random.choices([self.base_hit, 0], [1.0 - hit_prob, hit_prob])[0] self.health_p1 = max(self.health_p1, self.game_data["health"][0]) self.health_p2 = max(self.health_p2, self.game_data["health"][0]) - if (min(self.health_p1, self.health_p2) == self.game_data["health"][0]) or ((self.n_steps % self.steps_per_round) == 0): + if (min(self.health_p1, self.health_p2) == self.game_data["health"][0]) or (self.timer <= 0): self.round_done_ = True if self.health_p1 > self.health_p2: @@ -280,16 +284,16 @@ def new_game_state(self, mov_p1=0, att_p1=0, mov_p2=0, att_p2=0): if self.n_rounds_won == self.game_data["rounds_per_stage"]: self.stage_done_ = True - self.n_stages += 1 + self.current_stage_number += 1 self.n_rounds_won = 0 self.n_rounds_lost = 0 if self.player == "P1P2": self.game_done_ = True self.episode_done_ = True elif self.player == "P1": - self.char_p2 = self.n_stages + self.char_p2 = self.current_stage_number else: - self.char_p1 = self.n_stages + self.char_p1 = self.current_stage_number if self.n_rounds_lost == self.game_data["rounds_per_stage"]: self.game_done_ = True @@ -300,7 +304,7 @@ def new_game_state(self, mov_p1=0, att_p1=0, mov_p2=0, att_p2=0): self.n_rounds_won = 0 self.n_rounds_lost = 0 - if self.n_stages == self.game_data["stages_per_game"]: + if self.current_stage_number == self.game_data["stages_per_game"]: self.game_done_ = True self.episode_done_ = True @@ -312,12 +316,11 @@ def new_game_state(self, mov_p1=0, att_p1=0, mov_p2=0, att_p2=0): if np.any([self.round_done_, self.stage_done_, self.game_done_]): - self.n_steps = 0 - self.side_p1 = 0 self.side_p2 = 1 self.health_p1 = self.game_data["health"][1] self.health_p2 = self.game_data["health"][1] + self.timer = self.game_data["ram_states"]["timer"][2] # Set delta healths self.set_perfect_chance() diff --git a/diambra/arena/utils/integratedGames.json b/diambra/arena/utils/integratedGames.json index eeff1f30..df5b5db6 100644 --- a/diambra/arena/utils/integratedGames.json +++ b/diambra/arena/utils/integratedGames.json @@ -39,7 +39,8 @@ "CharP1": [2, 0, 10], "CharP2": [2, 0, 10], "HealthP1": [1, 0, 208], - "HealthP2": [1, 0, 208] + "HealthP2": [1, 0, 208], + "timer": [1, 0, 40] }, "cfg": {"H": "But6", "P": "But1", "K": "But2"} }, @@ -99,7 +100,8 @@ "SuperMaxCountP1": [1, 1, 3], "SuperMaxCountP2": [1, 1, 3], "HealthP1": [1, -1, 160], - "HealthP2": [1, -1, 160] + "HealthP2": [1, -1, 160], + "timer": [1, 0, 99] }, "cfg": {"LP": "But4", "MP": "But1", "HP": "But5", "LK": "But3", "MK": "But2", "HK": "But6"} }, @@ -162,7 +164,8 @@ "ActiveCharP1": [0, 0, 1], "ActiveCharP2": [0, 0, 1], "BarStatusP1": [2, 0, 4], - "BarStatusP2": [2, 0, 4] + "BarStatusP2": [2, 0, 4], + "timer": [1, 0, 60] }, "cfg": {"LP": "But4", "RP": "But1", "LK": "But3", "RK": "But2", "TAG": "But6"} }, @@ -211,7 +214,8 @@ "AggressorBarP1": [1, 0, 48], "AggressorBarP2": [1, 0, 48], "HealthP1": [1, 0, 166], - "HealthP2": [1, 0, 166] + "HealthP2": [1, 0, 166], + "timer": [1, 0, 100] }, "cfg": {"HP": "But1", "HK": "But2", "LK": "But3", "LP": "But4", "RUN": "But5", "BLK": "But6"} }, @@ -275,7 +279,8 @@ "PowerBarP1": [1, 0, 64], "PowerBarP2": [1, 0, 64], "HealthP1": [1, 0, 125], - "HealthP2": [1, 0, 125] + "HealthP2": [1, 0, 125], + "timer": [1, 0, 60] }, "cfg": {"WS": "But1", "MS": "But2", "K": "But3", "M": "But4"} }, @@ -337,7 +342,8 @@ "WinsP1": [1, 0, 3], "WinsP2": [1, 0, 3], "BarTypeP1": [2, 0, 7], - "BarTypeP2": [2, 0, 7] + "BarTypeP2": [2, 0, 7], + "timer": [1, 0, 60] }, "cfg": {"WP": "But1", "WK": "But2", "SP": "But3", "SK": "But4"} } diff --git a/examples/diambra_arena_gist.py b/examples/diambra_arena_gist.py index 63ea3fd3..0529f4c6 100755 --- a/examples/diambra_arena_gist.py +++ b/examples/diambra_arena_gist.py @@ -3,18 +3,27 @@ if __name__ == '__main__': + # Environment creation env = diambra.arena.make("doapp") + + # Environment reset observation = env.reset() + # Agent-Environment interaction loop while True: + # (Optional) Environment rendering env.render() + # Action random sampling actions = env.action_space.sample() + # Environment stepping observation, reward, done, info = env.step(actions) + # Episode end (Done condition) check if done: observation = env.reset() break + # Environment shutdown env.close() diff --git a/tests/env_exec_interface.py b/tests/env_exec_interface.py index 0876d27a..d834a474 100755 --- a/tests/env_exec_interface.py +++ b/tests/env_exec_interface.py @@ -8,7 +8,8 @@ default_args = { "interactive_viz": False, "n_episodes": 1, - "no_action": False + "no_action": False, + "render": False } def env_exec(settings, wrappers_settings, traj_rec_settings, args=default_args): @@ -92,7 +93,7 @@ def env_exec(settings, wrappers_settings, traj_rec_settings, args=default_args): print("done =", done) for k, v in info.items(): print("info[\"{}\"] = {}".format(k, v)) - env.show_obs(observation, wait_key) + env.show_obs(observation, wait_key, args["render"]) print("--") print("Current Cumulative Reward =", cumulative_ep_rew) @@ -107,7 +108,7 @@ def env_exec(settings, wrappers_settings, traj_rec_settings, args=default_args): cumulative_ep_rew = 0.0 observation = env.reset() - env.show_obs(observation, wait_key) + env.show_obs(observation, wait_key, args["render"]) if np.any([info["round_done"], info["stage_done"], info["game_done"], info["ep_done"]]): diff --git a/tests/man_test_random.py b/tests/man_test_random.py index 8aa2a918..4202561e 100644 --- a/tests/man_test_random.py +++ b/tests/man_test_random.py @@ -21,11 +21,12 @@ parser.add_argument("--nEpisodes", type=int, default=1, help="Number of episodes") parser.add_argument("--continueGame", type=float, default=-1.0, help="ContinueGame flag (-inf,+1.0]") parser.add_argument("--actionSpace", type=str, default="discrete", help="discrete/multi_discrete") - parser.add_argument("--attButComb", type=bool, default=False, help="Use attack button combinations (0=F)/1=T") + parser.add_argument("--attButComb", type=bool, default=False, help="Use attack button combinations") parser.add_argument("--noAction", type=int, default=0, help="If to use no action policy (0=False)") parser.add_argument("--recordTraj", type=bool, default=False, help="If to record trajectories") parser.add_argument("--hardcore", type=bool, default=False, help="Hard core mode") - parser.add_argument("--interactiveViz", type=int, default=0, help="Interactive Visualization (0=False)") + parser.add_argument("--interactiveViz", type=bool, default=False, help="Interactive Visualization (False)") + parser.add_argument("--render", type=bool, default=True, help="Render frame (False)") parser.add_argument("--envAddress", type=str, default="", help="diambraEngine Address") parser.add_argument("--wrappers", type=bool, default=False, help="If to use wrappers") opt = parser.parse_args() @@ -89,8 +90,9 @@ # Args args = {} - args["interactive_viz"] = bool(opt.interactiveViz) + args["interactive_viz"] = opt.interactiveViz args["no_action"] = True if opt.noAction == 1 else False args["n_episodes"] = opt.nEpisodes + args["render"] = opt.render env_exec(settings, wrappers_settings, traj_rec_settings, args) diff --git a/tests/test_integration.py b/tests/test_integration.py index b42bcf5a..d2eab600 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -19,7 +19,7 @@ def func(game_id, player, continue_game, action_space, attack_buttons_combinatio args = {} args["interactive_viz"] = False args["n_episodes"] = 1 - + args["render"] = False args["no_action"] = random.choices([True, False], [no_action_probability, 1.0 - no_action_probability])[0] try: diff --git a/tests/test_random.py b/tests/test_random.py index 4d31eb26..78473709 100755 --- a/tests/test_random.py +++ b/tests/test_random.py @@ -22,6 +22,7 @@ def func(player, continue_game, action_space, attack_buttons_combination, frame_ args["interactive_viz"] = False args["n_episodes"] = 1 args["no_action"] = random.choices([True, False], [no_action_prob, 1.0 - no_action_prob])[0] + args["render"] = False diambra_engine_mock = DiambraEngineMock()