added default game

kywch · May 16, 2024 · 7d6835a · 7d6835a
1 parent 5f1a2f9
commit 7d6835a
Show file tree

Hide file tree

Showing 6 changed files with 260 additions and 26 deletions.
diff --git a/evaluate.py b/evaluate.py
@@ -3,6 +3,7 @@
 import random
 import logging
 import argparse
+from collections import defaultdict
 
 import numpy as np
 
@@ -17,11 +18,12 @@
 
 NUM_PVP_EVAL_EPISODE = 200
 GAME_CLS = {
+    "survive": environment.DefaultGame,
     "battle": environment.TeamBattle,
+    "task": environment.AgentTaskEval,
     "race": environment.RacetoCenter,
     "koh": environment.EasyKingoftheHill,
     "sandwich": environment.Sandwich,
-    # "radio": environment.RadioRaid,
 }
 
 ENV_CONFIG = pufferlib.namespace(
@@ -35,6 +37,7 @@
         "num_agents_per_team": 8,
         "resilient_population": 0,
         "spawn_immunity": 20,
+        "curriculum_file_path": "curriculum/neurips_curriculum_with_embedding.pkl",
     }
 )
 
@@ -170,11 +173,12 @@ def perform_eval(self, game, seed, num_eval_episode, save_file_prefix):
         }
 
         game_results = []
+        task_results = {}
         cnt_episode = 0
         while cnt_episode < num_eval_episode:
             _, infos = clean_pufferl.evaluate(pufferl_data)
 
-            for vals in infos.values():
+            for pol, vals in infos.items():
                 cnt_episode += sum(vals["episode_done"])
                 if "game_scores" in vals:
                     for episode in vals["game_scores"]:
@@ -189,12 +193,28 @@ def perform_eval(self, game, seed, num_eval_episode, save_file_prefix):
                             }
                         )
 
+                # task_results is for the task-level info, used in AgentTaskEval
+                if game == "task":
+                    if pol not in task_results:
+                        task_results[pol] = defaultdict(list)
+                    for k, v in vals.items():
+                        if k == "length":
+                            task_results[pol][k] += v  # length is a plain list
+                        if k.startswith("curriculum"):
+                            task_results[pol][k] += [vv[0] for vv in v]
+
             pufferl_data.sort_keys = []  # TODO: check if this solves memory leak
 
             print(f"\nSeed: {seed}, evaluated {cnt_episode} episodes.\n")
 
         file_name = f"{save_file_prefix}_{seed}.json"
         self._save_results(game_results, file_name)
+
+        if game == "task":
+            # Individual task completion info
+            file_name = f"curriculum_info_{seed}.json"
+            self._save_results(task_results, file_name)
+
         clean_pufferl.close(pufferl_data)
         return game_results, file_name
 
@@ -228,7 +248,7 @@ def run(self, game, seed=None, num_episode=None, save_file_prefix=None):
         "--game",
         type=str,
         default="all",
-        choices="all battle race koh sandwich".split(),
+        choices="all battle race koh sandwich task".split(),
         help="Game to evaluate/replay",
     )
     parser.add_argument("-s", "--seed", type=int, default=1, help="Random seed")
@@ -247,6 +267,7 @@ def run(self, game, seed=None, num_episode=None, save_file_prefix=None):
 
     if args.game == "all":
         game_list = list(GAME_CLS.keys())
+        game_list.remove("task")  # task is only for AgentTaskEval
     elif args.game in GAME_CLS:
         game_list = [args.game]
     else:

diff --git a/proc_task_eval.py b/proc_task_eval.py
@@ -0,0 +1,183 @@
+import os
+import json
+import logging
+import argparse
+
+import numpy as np
+import polars as pl
+
+# Make the table output simpler
+pl.Config.set_tbl_hide_dataframe_shape(True)
+pl.Config.set_tbl_formatting("NOTHING")
+pl.Config.set_tbl_hide_column_data_types(True)
+
+# string matching for task names
+WEIGHT_DICT = {
+    "TickGE": ("survival", 100 / 6),  # 1 survival task
+    "PLAYER_KILL": ("combat", 100 / (6 * 3)),  # 3 combat tasks
+    "DefeatEntity": ("combat", 100 / (6 * 3)),
+    "GO_FARTHEST": ("exploration", 100 / (6 * 2)),  # 2 exploration tasks
+    "OccupyTile": ("exploration", 100 / (6 * 2)),
+    "AttainSkill": ("skill", 100 / (6 * 8)),  # 8 skill tasks
+    "HarvestItem": ("item", 100 / (6 * 44)),  # 44 item tasks
+    "ConsumeItem": ("item", 100 / (6 * 44)),
+    "EquipItem": ("item", 100 / (6 * 44)),
+    "FullyArmed": ("item", 100 / (6 * 44)),
+    "EARN_GOLD": ("market", 100 / (6 * 5)),  # 5 market tasks
+    "BUY_ITEM": ("market", 100 / (6 * 5)),
+    "EarnGold": ("market", 100 / (6 * 5)),
+    "HoardGold": ("market", 100 / (6 * 5)),
+    "MakeProfit": ("market", 100 / (6 * 5)),
+}
+
+
+def get_task_weight(task_name):
+    for key, val in WEIGHT_DICT.items():
+        if key in task_name:
+            return val
+    logging.warning(f"Task name {task_name} not found in weight dict")
+    return "etc", 0
+
+
+def get_summary_dict(progress, key):
+    # progress = vals if key == "length" else [v[0] for v in vals]
+    summ = {"count": len(progress), "mean": np.mean(progress), "median": np.median(progress)}
+
+    if key == "length":
+        progress = np.array(progress) / 1023  # full episode length
+
+    summ["completed"] = np.mean([1 if v >= 1 else 0 for v in progress])
+    summ["over30pcnt"] = np.mean([1 if v >= 0.3 else 0 for v in progress])
+    return summ
+
+
+def summarize_single_eval(data, weighted_score=False):
+    summary = {}
+
+    # task-level info
+    for key, vals in data.items():
+        if key.startswith("curriculum") or key == "length":
+            summary[key] = get_summary_dict(vals, key)
+
+            if weighted_score and key.startswith("curriculum"):
+                category, weight = get_task_weight(key)
+                summary[key]["category"] = category
+                summary[key]["weight"] = weight
+                summary[key]["weighted_score"] = summary[key]["mean"] * weight
+
+    # meta info
+    summary["avg_progress"] = np.mean(
+        [v["mean"] for k, v in summary.items() if k.startswith("curriculum")]
+    )
+    if weighted_score:
+        summary["weighted_score"] = np.sum(
+            [v["weighted_score"] for k, v in summary.items() if k.startswith("curriculum")]
+        )
+    return summary
+
+
+def process_eval_files(policy_store_dir, eval_prefix):
+    summ_policy = []
+    summ_task = []
+
+    for file in os.listdir(policy_store_dir):
+        # NOTE: assumes the file naming convention is 'curriculum_info_<seed>.json'
+        if not file.startswith(eval_prefix) or not file.endswith(".json"):
+            continue
+
+        random_seed = file.split("_")[2].replace(".json", "")
+
+        with open(os.path.join(policy_store_dir, file), "r") as f:
+            data = json.load(f)
+
+        for pol_name, pol_data in data.items():
+            if len(pol_data) == 0:
+                continue
+
+            mode = "pvp" if len(pol_data) > 1 else "pve"
+
+            summary = summarize_single_eval(pol_data, weighted_score=True)
+            summ_policy.append(
+                {
+                    "policy_name": pol_name,
+                    "mode": mode,
+                    "seed": random_seed,
+                    "count": summary["length"]["count"],
+                    "length": summary["length"]["mean"],
+                    "score": summary["avg_progress"],
+                    "weighted_score": summary["weighted_score"],
+                }
+            )
+
+            # also gather the results across random seeds for each task, then average
+            for task_name, task_data in summary.items():
+                if not task_name.startswith("curriculum"):
+                    continue
+                summ_task.append(
+                    {
+                        "category": task_data["category"],
+                        "task_name": task_name,
+                        "weight": task_data["weight"],
+                        "policy_name": pol_name,
+                        "mode": mode,
+                        "seed": random_seed,
+                        "count": task_data["count"],
+                        "score": task_data["mean"],
+                    }
+                )
+
+    summ_df = pl.DataFrame(summ_policy).sort(["policy_name", "mode", "seed"])
+    summ_grp = summ_df.group_by(["policy_name", "mode"]).agg(
+        pl.col("length").mean(),
+        pl.col("score").mean(),
+        pl.col("weighted_score").mean(),
+    )
+    summ_grp = summ_grp.sort("weighted_score", descending=True)
+    summ_grp.write_csv(
+        os.path.join(policy_store_dir, "score_summary.tsv"), separator="\t", float_precision=6
+    )
+    print("\nPolicy score summary, sorted by weighted_score:")
+    print(summ_grp)
+
+    task_df = pl.DataFrame(summ_task).sort(["mode", "category", "task_name", "policy_name", "seed"])
+    task_grp = task_df.group_by(["mode", "category", "task_name", "policy_name"]).agg(
+        pl.col("score").mean()
+    )
+    task_grp = task_grp.sort(["mode", "category", "task_name", "policy_name"])
+    task_grp.write_csv(
+        os.path.join(policy_store_dir, "score_task_summary.tsv"), separator="\t", float_precision=6
+    )
+    cate_grp = task_df.group_by(["mode", "category", "policy_name"]).agg(pl.col("score").mean())
+    cate_grp = cate_grp.sort(["mode", "category", "policy_name"])
+    cate_grp.write_csv(
+        os.path.join(policy_store_dir, "score_category_summary.tsv"),
+        separator="\t",
+        float_precision=6,
+    )
+
+    if len(summ_df["seed"].unique()) > 1:
+        summ_df.write_csv(
+            os.path.join(policy_store_dir, "score_by_seed.tsv"), separator="\t", float_precision=6
+        )
+        task_df.write_csv(
+            os.path.join(policy_store_dir, "score_by_task_seed.tsv"),
+            separator="\t",
+            float_precision=6,
+        )
+
+    return summ_df, summ_grp, task_df, task_grp, cate_grp
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Process the evaluation result files")
+    parser.add_argument("policy_store_dir", type=str, help="Path to the policy directory")
+    parser.add_argument(
+        "-p",
+        "--prefix",
+        type=str,
+        default="curriculum_",
+        help="Prefix of the evaluation result files",
+    )
+    args = parser.parse_args()
+
+    process_eval_files(args.policy_store_dir, args.prefix)
diff --git a/reinforcement_learning/environment.py b/reinforcement_learning/environment.py
@@ -29,7 +29,7 @@ def get_team_dict(num_agents, num_agents_per_team):
     }
 
 
-class TeamBattle(ng.TeamBattle):
+class DefaultGame(ng.DefaultGame):
     _next_fog_onset = None
     _next_fog_speed = None
     _next_num_npc = None
@@ -60,7 +60,18 @@ def _set_config(self):
         self.config.set_for_episode("NPC_N", npc_num)
 
 
+class TeamBattle(ng.TeamBattle, DefaultGame):
+    pass
+
+
 class AgentTraining(ng.AgentTraining):
+    _next_num_npc = None
+
+    def _set_config(self):
+        self.config.reset()
+        npc_num = self._next_num_npc or self._np_random.integers(64, 256)
+        self.config.set_for_episode("NPC_N", npc_num)
+
     def _get_candidate_tasks(self, eval_mode=False):
         with open(self.config.CURRICULUM_FILE_PATH, "rb") as f:
             # curriculum file may have been changed, so read the file when sampling
@@ -95,7 +106,7 @@ def _define_tasks(self):
         return self._make_agent_tasks(cand_specs)
 
 
-class AmmoTraining(ng.AgentTraining):
+class AmmoTraining(AgentTraining):
     def is_compatible(self):
         return self.config.are_systems_enabled(["COMBAT", "EQUIPMENT", "PROFESSION"])
 
@@ -198,19 +209,17 @@ def __init__(self, env_args: Namespace):
         self.set("HORIZON", env_args.max_episode_length)
         self.set("MAP_N", env_args.num_maps)
         self.set("TEAMS", get_team_dict(env_args.num_agents, env_args.num_agents_per_team))
-        # self.set(
-        #     "DEATH_FOG_ONSET",
-        #     env_args.death_fog_tick if isinstance(env_args.death_fog_tick, int) else None,
-        # )
         self.set("PATH_MAPS", f"{env_args.maps_path}/{env_args.map_size}/")
         self.set("MAP_CENTER", env_args.map_size)
-        # self.set("NPC_N", env_args.num_npcs)
-        self.set("NPC_LEVEL_MULTIPLIER", 0.5)  # make the high-level npcs weaker
+
         self.set("RESOURCE_RESILIENT_POPULATION", env_args.resilient_population)
         self.set("COMBAT_SPAWN_IMMUNITY", env_args.spawn_immunity)
 
         self.set("CURRICULUM_FILE_PATH", env_args.curriculum_file_path)
 
+        # Make the high-level npcs weaker. Huge impact on the difficulty
+        self.set("NPC_LEVEL_MULTIPLIER", 0.5)
+
 
 class FullGameConfig(
     MiniGameConfig,
@@ -249,6 +258,8 @@ def make_env_creator(
         game_packs = [(TeamBattle, 5), (AmmoTraining, 1)]
     elif train_flag == "tb_curr":
         game_packs = [(TeamBattle, 1), (AgentTraining, 1)]
+    elif train_flag == "full_surv":
+        game_packs = [(DefaultGame, 1), (TeamBattle, 1), (AgentTraining, 1)]
     else:
         raise ValueError(f"Invalid train_flag: {train_flag}")