From e66c1812405ac886a5bab397734b869b4ef4ff40 Mon Sep 17 00:00:00 2001 From: thatguy11325 <148832074+thatguy11325@users.noreply.github.com> Date: Sun, 13 Oct 2024 10:46:01 -0400 Subject: [PATCH] More tuning to the sweep config --- config.yaml | 2 +- pokemonred_puffer/environment.py | 2 ++ pokemonred_puffer/sweep.py | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/config.yaml b/config.yaml index 1a47b43..cb01e79 100644 --- a/config.yaml +++ b/config.yaml @@ -93,7 +93,7 @@ train: compile: True compile_mode: "reduce-overhead" float32_matmul_precision: "high" - total_timesteps: 500_000_000 # 100_000_000_000 for full games + total_timesteps: 1_000_000_000 # 100_000_000_000 for full games batch_size: 65536 minibatch_size: 2048 learning_rate: 2.0e-4 diff --git a/pokemonred_puffer/environment.py b/pokemonred_puffer/environment.py index 11ee2cf..77ddd81 100644 --- a/pokemonred_puffer/environment.py +++ b/pokemonred_puffer/environment.py @@ -1435,6 +1435,8 @@ def agent_stats(self, action): "in_battle": self.read_m("wIsInBattle") > 0, "event": self.progress_reward["event"], "max_steps": self.get_max_steps(), + # redundant but this is so we don't interfere with the swarm logic + "required_count": len(self.required_events) + len(self.required_items), } | { "exploration": { diff --git a/pokemonred_puffer/sweep.py b/pokemonred_puffer/sweep.py index 8a4b988..65661b4 100644 --- a/pokemonred_puffer/sweep.py +++ b/pokemonred_puffer/sweep.py @@ -140,13 +140,13 @@ def launch_sweep( finished.add(run["name"]) summary_metrics = json.loads(run["summaryMetrics"]) if ( - "environment/stats/event" in summary_metrics + "environment/stats/required_count" in summary_metrics and "performance/uptime" in summary_metrics ): obs_in = ObservationInParam( input=json.loads(run["config"])["x"]["value"], # TODO: try out other stats like required count - output=summary_metrics["environment/stats/event"], + output=summary_metrics["environment/stats/required_count"], cost=summary_metrics["performance/uptime"], ) carbs.observe(obs_in)