Update engine_mock to use timer

diambra · Jul 28, 2023 · 37a13f4 · 37a13f4
1 parent 7a69884
commit 37a13f4
Show file tree

Hide file tree

Showing 8 changed files with 74 additions and 51 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -2,7 +2,8 @@
  "python.testing.cwd": "${workspaceFolder}/tests/",
  "python.testing.pytestArgs": [
  "--rootdir",
- "${workspaceFolder}/tests"
+ "${workspaceFolder}/tests",
+ "-s"
  ],
  "python.testing.unittestEnabled": false,
  "python.testing.pytestEnabled": true

diff --git a/diambra/arena/utils/engine_mock.py b/diambra/arena/utils/engine_mock.py
@@ -6,22 +6,21 @@
 
 class DiambraEngineMock:
 
- def __init__(self, steps_per_round=20, fps=1000):
+ def __init__(self, fps=1000):
 
  # Game features
  self.game_data = None
- self.steps_per_round = steps_per_round
  self.fps = fps
 
  # Random seed
  time_dep_seed = int((time.time() - int(time.time() - 0.5)) * 1000)
  random.seed(time_dep_seed)
 
  # Class state variables initialization
- self.n_steps = 0
+ self.timer = 0
  self.n_rounds_won = 0
  self.n_rounds_lost = 0
- self.n_stages = 0
+ self.current_stage_number = 0
  self.n_continue = 0
  self.side_p1 = 0
  self.side_p2 = 1
@@ -42,22 +41,22 @@ def generate_ram_states(self):
  self.ram_states[k][3] = random.choices(range(v[1], v[2] + 1))[0]
 
  # Setting meaningful values to ram states
- self.ram_states["stage"][3] = self.n_stages + 1
+ self.ram_states["stage"][3] = self.current_stage_number + 1
  self.ram_states["SideP1"][3] = self.side_p1
  self.ram_states["SideP2"][3] = self.side_p2
  self.ram_states["WinsP1"][3] = self.n_rounds_won
  self.ram_states["WinsP2"][3] = self.n_rounds_lost
 
- self.ram_states["CharP1"][3] = self.char_p1
- self.ram_states["CharP2"][3] = self.char_p2
+ values = [[self.char_p1, self.char_p2], [self.health_p1, self.health_p2]]
 
- if self.game_data["number_of_chars_per_round"] == 1:
- self.ram_states["HealthP1"][3] = self.health_p1
- self.ram_states["HealthP2"][3] = self.health_p2
- else:
- for idx in range(self.game_data["number_of_chars_per_round"]):
- self.ram_states["Health{}P1".format(idx+1)][3] = self.health_p1
- self.ram_states["Health{}P2".format(idx+1)][3] = self.health_p2
+ for idx, state in enumerate(["Char", "Health"]):
+ for jdx, text in enumerate(["", "1", "2", "3"]):
+ for kdx, player in enumerate(["P1", "P2"]):
+ key = "{}{}{}".format(state, text, player)
+ if (key in self.ram_states):
+ self.ram_states[key][3] = values[idx][kdx]
+
+ self.ram_states["timer"][3] = int(self.timer)
 
  # Send env settings, retrieve env info and int variables list [pb low level]
  def _mock_env_init(self, env_settings_pb):
@@ -91,7 +90,8 @@ def _mock_env_init(self, env_settings_pb):
 
  self.continue_per_episode = - int(self.settings.continue_game) if self.settings.continue_game < 0.0 else int(self.settings.continue_game*10)
  self.delta_health = self.game_data["health"][1] - self.game_data["health"][0]
- self.base_hit = int(self.delta_health * (self.game_data["n_actions"][0] + self.game_data["n_actions"][0]) / (self.game_data["n_actions"][1] * (self.steps_per_round - 1)))
+ self.base_hit = int(self.delta_health * self.game_data["n_actions"][1] /
+ ((self.game_data["n_actions"][0] + self.game_data["n_actions"][2]) * (self.game_data["ram_states"]["timer"][2] / self.settings.step_ratio)))
 
  # Generate the ram states map
  self.ram_states = self.game_data["ram_states"]
@@ -137,7 +137,7 @@ def _mock_env_init(self, env_settings_pb):
  return response
 
  def generate_frame(self):
- frame = np.ones((self.frame_shape), dtype=np.int8) * ((self.n_stages * self.game_data["rounds_per_stage"] + self.n_steps) % 255)
+ frame = np.ones((self.frame_shape), dtype=np.int8) * ((self.current_stage_number * self.game_data["rounds_per_stage"] + int(self.timer)) % 255)
  return frame.tobytes()
 
  # Set delta health
@@ -147,10 +147,9 @@ def set_perfect_chance(self):
  # Reset game state
  def reset_state(self):
  # Reset class state
- self.n_steps = 0
  self.n_rounds_won = 0
  self.n_rounds_lost = 0
- self.n_stages = 0
+ self.current_stage_number = 0
  self.n_continue = 0
 
  # Actions
@@ -179,6 +178,7 @@ def reset_state(self):
  self.side_p2 = 1
  self.health_p1 = self.game_data["health"][1]
  self.health_p2 = self.game_data["health"][1]
+ self.timer = self.game_data["ram_states"]["timer"][2]
 
  self.reward = 0
 
@@ -195,14 +195,14 @@ def reset_state(self):
  self.char_p2 = self.game_data["char_list"].index(self.settings.characters.p2[0])
 
  elif self.player == "P1":
- self.char_p2 = self.n_stages
+ self.char_p2 = self.current_stage_number
  if (self.settings.characters.p1[0] == "Random"):
  self.char_p1 = random.choices(range(len(self.game_data["char_list"])))[0]
  else:
  self.char_p1 = self.game_data["char_list"].index(self.settings.characters.p1[0])
 
  else:
- self.char_p1 = self.n_stages
+ self.char_p1 = self.current_stage_number
  if (self.settings.characters.p2[0] == "Random"):
  self.char_p2 = random.choices(range(len(self.game_data["char_list"])))[0]
  else:
@@ -212,7 +212,7 @@ def reset_state(self):
  def new_game_state(self, mov_p1=0, att_p1=0, mov_p2=0, att_p2=0):
 
  # Sleep to simulate computer time elapsed
- time.sleep(1.0/self.fps)
+ time.sleep(1.0/(self.settings.step_ratio * self.fps))
 
  # Actions
  self.mov_p1 = mov_p1
@@ -227,30 +227,34 @@ def new_game_state(self, mov_p1=0, att_p1=0, mov_p2=0, att_p2=0):
  self.episode_done_ = False
  self.env_done_ = False
 
- self.n_steps += 1
+ self.timer -= (1.0 * self.settings.step_ratio) / 60.0
 
  starting_health_p1 = self.health_p1
  starting_health_p2 = self.health_p2
 
  # Health evolution
- hit_prob = self.base_round_winning_probability ** self.n_stages
+ hit_prob = self.base_round_winning_probability ** self.current_stage_number
 
- if self.player == "P2":
- if not self.perfect:
- self.health_p2 -= random.choices([self.base_hit, 0], [1.0 - hit_prob, hit_prob])[0]
+ if self.player == "P1P2":
  if att_p1 != 0:
+ self.health_p2 -= random.choices([self.base_hit, 0], [hit_prob, 1.0 - hit_prob])[0]
+ if att_p2 != 0:
  self.health_p1 -= random.choices([self.base_hit, 0], [hit_prob, 1.0 - hit_prob])[0]
- else:
- self.health_p1 -= random.choices([self.base_hit, 0], [1.0 - hit_prob, hit_prob])[0]
+ elif self.player == "P1":
  if att_p1 != 0:
  self.health_p2 -= random.choices([self.base_hit, 0], [hit_prob, 1.0 - hit_prob])[0]
- if (self.player == "P1P2" and att_p2 == 0) or self.perfect:
- self.health_p1 = starting_health_p1
+ if not self.perfect:
+ self.health_p1 -= random.choices([self.base_hit, 0], [1.0 - hit_prob, hit_prob])[0]
+ else:
+ if att_p1 != 0:
+ self.health_p1 -= random.choices([self.base_hit, 0], [hit_prob, 1.0 - hit_prob])[0]
+ if not self.perfect:
+ self.health_p2 -= random.choices([self.base_hit, 0], [1.0 - hit_prob, hit_prob])[0]
 
  self.health_p1 = max(self.health_p1, self.game_data["health"][0])
  self.health_p2 = max(self.health_p2, self.game_data["health"][0])
 
- if (min(self.health_p1, self.health_p2) == self.game_data["health"][0]) or ((self.n_steps % self.steps_per_round) == 0):
+ if (min(self.health_p1, self.health_p2) == self.game_data["health"][0]) or (self.timer <= 0):
  self.round_done_ = True
 
  if self.health_p1 > self.health_p2:
@@ -280,16 +284,16 @@ def new_game_state(self, mov_p1=0, att_p1=0, mov_p2=0, att_p2=0):
 
  if self.n_rounds_won == self.game_data["rounds_per_stage"]:
  self.stage_done_ = True
- self.n_stages += 1
+ self.current_stage_number += 1
  self.n_rounds_won = 0
  self.n_rounds_lost = 0
  if self.player == "P1P2":
  self.game_done_ = True
  self.episode_done_ = True
  elif self.player == "P1":
- self.char_p2 = self.n_stages
+ self.char_p2 = self.current_stage_number
  else:
- self.char_p1 = self.n_stages
+ self.char_p1 = self.current_stage_number
 
  if self.n_rounds_lost == self.game_data["rounds_per_stage"]:
  self.game_done_ = True
@@ -300,7 +304,7 @@ def new_game_state(self, mov_p1=0, att_p1=0, mov_p2=0, att_p2=0):
  self.n_rounds_won = 0
  self.n_rounds_lost = 0
 
- if self.n_stages == self.game_data["stages_per_game"]:
+ if self.current_stage_number == self.game_data["stages_per_game"]:
  self.game_done_ = True
  self.episode_done_ = True
 
@@ -312,12 +316,11 @@ def new_game_state(self, mov_p1=0, att_p1=0, mov_p2=0, att_p2=0):
 
  if np.any([self.round_done_, self.stage_done_, self.game_done_]):
 
- self.n_steps = 0
-
  self.side_p1 = 0
  self.side_p2 = 1
  self.health_p1 = self.game_data["health"][1]
  self.health_p2 = self.game_data["health"][1]
+ self.timer = self.game_data["ram_states"]["timer"][2]
 
  # Set delta healths
  self.set_perfect_chance()

diff --git a/diambra/arena/utils/integratedGames.json b/diambra/arena/utils/integratedGames.json
@@ -39,7 +39,8 @@
  "CharP1": [2, 0, 10],
  "CharP2": [2, 0, 10],
  "HealthP1": [1, 0, 208],
- "HealthP2": [1, 0, 208]
+ "HealthP2": [1, 0, 208],
+ "timer": [1, 0, 40]
  },
  "cfg": {"H": "But6", "P": "But1", "K": "But2"}
  },
@@ -99,7 +100,8 @@
  "SuperMaxCountP1": [1, 1, 3],
  "SuperMaxCountP2": [1, 1, 3],
  "HealthP1": [1, -1, 160],
- "HealthP2": [1, -1, 160]
+ "HealthP2": [1, -1, 160],
+ "timer": [1, 0, 99]
  },
  "cfg": {"LP": "But4", "MP": "But1", "HP": "But5", "LK": "But3", "MK": "But2", "HK": "But6"}
  },
@@ -162,7 +164,8 @@
  "ActiveCharP1": [0, 0, 1],
  "ActiveCharP2": [0, 0, 1],
  "BarStatusP1": [2, 0, 4],
- "BarStatusP2": [2, 0, 4]
+ "BarStatusP2": [2, 0, 4],
+ "timer": [1, 0, 60]
  },
  "cfg": {"LP": "But4", "RP": "But1", "LK": "But3", "RK": "But2", "TAG": "But6"}
  },
@@ -211,7 +214,8 @@
  "AggressorBarP1": [1, 0, 48],
  "AggressorBarP2": [1, 0, 48],
  "HealthP1": [1, 0, 166],
- "HealthP2": [1, 0, 166]
+ "HealthP2": [1, 0, 166],
+ "timer": [1, 0, 100]
  },
  "cfg": {"HP": "But1", "HK": "But2", "LK": "But3", "LP": "But4", "RUN": "But5", "BLK": "But6"}
  },
@@ -275,7 +279,8 @@
  "PowerBarP1": [1, 0, 64],
  "PowerBarP2": [1, 0, 64],
  "HealthP1": [1, 0, 125],
- "HealthP2": [1, 0, 125]
+ "HealthP2": [1, 0, 125],
+ "timer": [1, 0, 60]
  },
  "cfg": {"WS": "But1", "MS": "But2", "K": "But3", "M": "But4"}
  },
@@ -337,7 +342,8 @@
  "WinsP1": [1, 0, 3],
  "WinsP2": [1, 0, 3],
  "BarTypeP1": [2, 0, 7],
- "BarTypeP2": [2, 0, 7]
+ "BarTypeP2": [2, 0, 7],
+ "timer": [1, 0, 60]
  },
  "cfg": {"WP": "But1", "WK": "But2", "SP": "But3", "SK": "But4"}
  }

diff --git a/examples/diambra_arena_gist.py b/examples/diambra_arena_gist.py
@@ -3,18 +3,27 @@
 
 if __name__ == '__main__':
 
+ # Environment creation
  env = diambra.arena.make("doapp")
+
+ # Environment reset
  observation = env.reset()
 
+ # Agent-Environment interaction loop
  while True:
+ # (Optional) Environment rendering
  env.render()
 
+ # Action random sampling
  actions = env.action_space.sample()
 
+ # Environment stepping
  observation, reward, done, info = env.step(actions)
 
+ # Episode end (Done condition) check
  if done:
  observation = env.reset()
  break
 
+ # Environment shutdown
  env.close()
diff --git a/tests/env_exec_interface.py b/tests/env_exec_interface.py
@@ -8,7 +8,8 @@
 default_args = {
  "interactive_viz": False,
  "n_episodes": 1,
- "no_action": False
+ "no_action": False,
+ "render": False
 }
 
 def env_exec(settings, wrappers_settings, traj_rec_settings, args=default_args):
@@ -92,7 +93,7 @@ def env_exec(settings, wrappers_settings, traj_rec_settings, args=default_args):
  print("done =", done)
  for k, v in info.items():
  print("info[\"{}\"] = {}".format(k, v))
- env.show_obs(observation, wait_key)
+ env.show_obs(observation, wait_key, args["render"])
  print("--")
  print("Current Cumulative Reward =", cumulative_ep_rew)
 
@@ -107,7 +108,7 @@ def env_exec(settings, wrappers_settings, traj_rec_settings, args=default_args):
  cumulative_ep_rew = 0.0
 
  observation = env.reset()
- env.show_obs(observation, wait_key)
+ env.show_obs(observation, wait_key, args["render"])
 
  if np.any([info["round_done"], info["stage_done"], info["game_done"], info["ep_done"]]):
 

diff --git a/tests/man_test_random.py b/tests/man_test_random.py
@@ -21,11 +21,12 @@
  parser.add_argument("--nEpisodes", type=int, default=1, help="Number of episodes")
  parser.add_argument("--continueGame", type=float, default=-1.0, help="ContinueGame flag (-inf,+1.0]")
  parser.add_argument("--actionSpace", type=str, default="discrete", help="discrete/multi_discrete")
- parser.add_argument("--attButComb", type=bool, default=False, help="Use attack button combinations (0=F)/1=T")
+ parser.add_argument("--attButComb", type=bool, default=False, help="Use attack button combinations")
  parser.add_argument("--noAction", type=int, default=0, help="If to use no action policy (0=False)")
  parser.add_argument("--recordTraj", type=bool, default=False, help="If to record trajectories")
  parser.add_argument("--hardcore", type=bool, default=False, help="Hard core mode")
- parser.add_argument("--interactiveViz", type=int, default=0, help="Interactive Visualization (0=False)")
+ parser.add_argument("--interactiveViz", type=bool, default=False, help="Interactive Visualization (False)")
+ parser.add_argument("--render", type=bool, default=True, help="Render frame (False)")
  parser.add_argument("--envAddress", type=str, default="", help="diambraEngine Address")
  parser.add_argument("--wrappers", type=bool, default=False, help="If to use wrappers")
  opt = parser.parse_args()
@@ -89,8 +90,9 @@
 
  # Args
  args = {}
- args["interactive_viz"] = bool(opt.interactiveViz)
+ args["interactive_viz"] = opt.interactiveViz
  args["no_action"] = True if opt.noAction == 1 else False
  args["n_episodes"] = opt.nEpisodes
+ args["render"] = opt.render
 
  env_exec(settings, wrappers_settings, traj_rec_settings, args)
diff --git a/tests/test_integration.py b/tests/test_integration.py
@@ -19,7 +19,7 @@ def func(game_id, player, continue_game, action_space, attack_buttons_combinatio
  args = {}
  args["interactive_viz"] = False
  args["n_episodes"] = 1
-
+ args["render"] = False
  args["no_action"] = random.choices([True, False], [no_action_probability, 1.0 - no_action_probability])[0]
 
  try:

diff --git a/tests/test_random.py b/tests/test_random.py
@@ -22,6 +22,7 @@ def func(player, continue_game, action_space, attack_buttons_combination, frame_
  args["interactive_viz"] = False
  args["n_episodes"] = 1
  args["no_action"] = random.choices([True, False], [no_action_prob, 1.0 - no_action_prob])[0]
+ args["render"] = False
 
  diambra_engine_mock = DiambraEngineMock()