Skip to content

Commit

Permalink
added default game
Browse files Browse the repository at this point in the history
  • Loading branch information
kywch committed May 16, 2024
1 parent 5f1a2f9 commit 7d6835a
Show file tree
Hide file tree
Showing 6 changed files with 260 additions and 26 deletions.
27 changes: 24 additions & 3 deletions evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import random
import logging
import argparse
from collections import defaultdict

import numpy as np

Expand All @@ -17,11 +18,12 @@

NUM_PVP_EVAL_EPISODE = 200
GAME_CLS = {
"survive": environment.DefaultGame,
"battle": environment.TeamBattle,
"task": environment.AgentTaskEval,
"race": environment.RacetoCenter,
"koh": environment.EasyKingoftheHill,
"sandwich": environment.Sandwich,
# "radio": environment.RadioRaid,
}

ENV_CONFIG = pufferlib.namespace(
Expand All @@ -35,6 +37,7 @@
"num_agents_per_team": 8,
"resilient_population": 0,
"spawn_immunity": 20,
"curriculum_file_path": "curriculum/neurips_curriculum_with_embedding.pkl",
}
)

Expand Down Expand Up @@ -170,11 +173,12 @@ def perform_eval(self, game, seed, num_eval_episode, save_file_prefix):
}

game_results = []
task_results = {}
cnt_episode = 0
while cnt_episode < num_eval_episode:
_, infos = clean_pufferl.evaluate(pufferl_data)

for vals in infos.values():
for pol, vals in infos.items():
cnt_episode += sum(vals["episode_done"])
if "game_scores" in vals:
for episode in vals["game_scores"]:
Expand All @@ -189,12 +193,28 @@ def perform_eval(self, game, seed, num_eval_episode, save_file_prefix):
}
)

# task_results is for the task-level info, used in AgentTaskEval
if game == "task":
if pol not in task_results:
task_results[pol] = defaultdict(list)
for k, v in vals.items():
if k == "length":
task_results[pol][k] += v # length is a plain list
if k.startswith("curriculum"):
task_results[pol][k] += [vv[0] for vv in v]

pufferl_data.sort_keys = [] # TODO: check if this solves memory leak

print(f"\nSeed: {seed}, evaluated {cnt_episode} episodes.\n")

file_name = f"{save_file_prefix}_{seed}.json"
self._save_results(game_results, file_name)

if game == "task":
# Individual task completion info
file_name = f"curriculum_info_{seed}.json"
self._save_results(task_results, file_name)

clean_pufferl.close(pufferl_data)
return game_results, file_name

Expand Down Expand Up @@ -228,7 +248,7 @@ def run(self, game, seed=None, num_episode=None, save_file_prefix=None):
"--game",
type=str,
default="all",
choices="all battle race koh sandwich".split(),
choices="all battle race koh sandwich task".split(),
help="Game to evaluate/replay",
)
parser.add_argument("-s", "--seed", type=int, default=1, help="Random seed")
Expand All @@ -247,6 +267,7 @@ def run(self, game, seed=None, num_episode=None, save_file_prefix=None):

if args.game == "all":
game_list = list(GAME_CLS.keys())
game_list.remove("task") # task is only for AgentTaskEval
elif args.game in GAME_CLS:
game_list = [args.game]
else:
Expand Down
183 changes: 183 additions & 0 deletions proc_task_eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
import os
import json
import logging
import argparse

import numpy as np
import polars as pl

# Make the table output simpler
pl.Config.set_tbl_hide_dataframe_shape(True)
pl.Config.set_tbl_formatting("NOTHING")
pl.Config.set_tbl_hide_column_data_types(True)

# string matching for task names
WEIGHT_DICT = {
"TickGE": ("survival", 100 / 6), # 1 survival task
"PLAYER_KILL": ("combat", 100 / (6 * 3)), # 3 combat tasks
"DefeatEntity": ("combat", 100 / (6 * 3)),
"GO_FARTHEST": ("exploration", 100 / (6 * 2)), # 2 exploration tasks
"OccupyTile": ("exploration", 100 / (6 * 2)),
"AttainSkill": ("skill", 100 / (6 * 8)), # 8 skill tasks
"HarvestItem": ("item", 100 / (6 * 44)), # 44 item tasks
"ConsumeItem": ("item", 100 / (6 * 44)),
"EquipItem": ("item", 100 / (6 * 44)),
"FullyArmed": ("item", 100 / (6 * 44)),
"EARN_GOLD": ("market", 100 / (6 * 5)), # 5 market tasks
"BUY_ITEM": ("market", 100 / (6 * 5)),
"EarnGold": ("market", 100 / (6 * 5)),
"HoardGold": ("market", 100 / (6 * 5)),
"MakeProfit": ("market", 100 / (6 * 5)),
}


def get_task_weight(task_name):
for key, val in WEIGHT_DICT.items():
if key in task_name:
return val
logging.warning(f"Task name {task_name} not found in weight dict")
return "etc", 0


def get_summary_dict(progress, key):
# progress = vals if key == "length" else [v[0] for v in vals]
summ = {"count": len(progress), "mean": np.mean(progress), "median": np.median(progress)}

if key == "length":
progress = np.array(progress) / 1023 # full episode length

summ["completed"] = np.mean([1 if v >= 1 else 0 for v in progress])
summ["over30pcnt"] = np.mean([1 if v >= 0.3 else 0 for v in progress])
return summ


def summarize_single_eval(data, weighted_score=False):
summary = {}

# task-level info
for key, vals in data.items():
if key.startswith("curriculum") or key == "length":
summary[key] = get_summary_dict(vals, key)

if weighted_score and key.startswith("curriculum"):
category, weight = get_task_weight(key)
summary[key]["category"] = category
summary[key]["weight"] = weight
summary[key]["weighted_score"] = summary[key]["mean"] * weight

# meta info
summary["avg_progress"] = np.mean(
[v["mean"] for k, v in summary.items() if k.startswith("curriculum")]
)
if weighted_score:
summary["weighted_score"] = np.sum(
[v["weighted_score"] for k, v in summary.items() if k.startswith("curriculum")]
)
return summary


def process_eval_files(policy_store_dir, eval_prefix):
summ_policy = []
summ_task = []

for file in os.listdir(policy_store_dir):
# NOTE: assumes the file naming convention is 'curriculum_info_<seed>.json'
if not file.startswith(eval_prefix) or not file.endswith(".json"):
continue

random_seed = file.split("_")[2].replace(".json", "")

with open(os.path.join(policy_store_dir, file), "r") as f:
data = json.load(f)

for pol_name, pol_data in data.items():
if len(pol_data) == 0:
continue

mode = "pvp" if len(pol_data) > 1 else "pve"

summary = summarize_single_eval(pol_data, weighted_score=True)
summ_policy.append(
{
"policy_name": pol_name,
"mode": mode,
"seed": random_seed,
"count": summary["length"]["count"],
"length": summary["length"]["mean"],
"score": summary["avg_progress"],
"weighted_score": summary["weighted_score"],
}
)

# also gather the results across random seeds for each task, then average
for task_name, task_data in summary.items():
if not task_name.startswith("curriculum"):
continue
summ_task.append(
{
"category": task_data["category"],
"task_name": task_name,
"weight": task_data["weight"],
"policy_name": pol_name,
"mode": mode,
"seed": random_seed,
"count": task_data["count"],
"score": task_data["mean"],
}
)

summ_df = pl.DataFrame(summ_policy).sort(["policy_name", "mode", "seed"])
summ_grp = summ_df.group_by(["policy_name", "mode"]).agg(
pl.col("length").mean(),
pl.col("score").mean(),
pl.col("weighted_score").mean(),
)
summ_grp = summ_grp.sort("weighted_score", descending=True)
summ_grp.write_csv(
os.path.join(policy_store_dir, "score_summary.tsv"), separator="\t", float_precision=6
)
print("\nPolicy score summary, sorted by weighted_score:")
print(summ_grp)

task_df = pl.DataFrame(summ_task).sort(["mode", "category", "task_name", "policy_name", "seed"])
task_grp = task_df.group_by(["mode", "category", "task_name", "policy_name"]).agg(
pl.col("score").mean()
)
task_grp = task_grp.sort(["mode", "category", "task_name", "policy_name"])
task_grp.write_csv(
os.path.join(policy_store_dir, "score_task_summary.tsv"), separator="\t", float_precision=6
)
cate_grp = task_df.group_by(["mode", "category", "policy_name"]).agg(pl.col("score").mean())
cate_grp = cate_grp.sort(["mode", "category", "policy_name"])
cate_grp.write_csv(
os.path.join(policy_store_dir, "score_category_summary.tsv"),
separator="\t",
float_precision=6,
)

if len(summ_df["seed"].unique()) > 1:
summ_df.write_csv(
os.path.join(policy_store_dir, "score_by_seed.tsv"), separator="\t", float_precision=6
)
task_df.write_csv(
os.path.join(policy_store_dir, "score_by_task_seed.tsv"),
separator="\t",
float_precision=6,
)

return summ_df, summ_grp, task_df, task_grp, cate_grp


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Process the evaluation result files")
parser.add_argument("policy_store_dir", type=str, help="Path to the policy directory")
parser.add_argument(
"-p",
"--prefix",
type=str,
default="curriculum_",
help="Prefix of the evaluation result files",
)
args = parser.parse_args()

process_eval_files(args.policy_store_dir, args.prefix)
27 changes: 19 additions & 8 deletions reinforcement_learning/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def get_team_dict(num_agents, num_agents_per_team):
}


class TeamBattle(ng.TeamBattle):
class DefaultGame(ng.DefaultGame):
_next_fog_onset = None
_next_fog_speed = None
_next_num_npc = None
Expand Down Expand Up @@ -60,7 +60,18 @@ def _set_config(self):
self.config.set_for_episode("NPC_N", npc_num)


class TeamBattle(ng.TeamBattle, DefaultGame):
pass


class AgentTraining(ng.AgentTraining):
_next_num_npc = None

def _set_config(self):
self.config.reset()
npc_num = self._next_num_npc or self._np_random.integers(64, 256)
self.config.set_for_episode("NPC_N", npc_num)

def _get_candidate_tasks(self, eval_mode=False):
with open(self.config.CURRICULUM_FILE_PATH, "rb") as f:
# curriculum file may have been changed, so read the file when sampling
Expand Down Expand Up @@ -95,7 +106,7 @@ def _define_tasks(self):
return self._make_agent_tasks(cand_specs)


class AmmoTraining(ng.AgentTraining):
class AmmoTraining(AgentTraining):
def is_compatible(self):
return self.config.are_systems_enabled(["COMBAT", "EQUIPMENT", "PROFESSION"])

Expand Down Expand Up @@ -198,19 +209,17 @@ def __init__(self, env_args: Namespace):
self.set("HORIZON", env_args.max_episode_length)
self.set("MAP_N", env_args.num_maps)
self.set("TEAMS", get_team_dict(env_args.num_agents, env_args.num_agents_per_team))
# self.set(
# "DEATH_FOG_ONSET",
# env_args.death_fog_tick if isinstance(env_args.death_fog_tick, int) else None,
# )
self.set("PATH_MAPS", f"{env_args.maps_path}/{env_args.map_size}/")
self.set("MAP_CENTER", env_args.map_size)
# self.set("NPC_N", env_args.num_npcs)
self.set("NPC_LEVEL_MULTIPLIER", 0.5) # make the high-level npcs weaker

self.set("RESOURCE_RESILIENT_POPULATION", env_args.resilient_population)
self.set("COMBAT_SPAWN_IMMUNITY", env_args.spawn_immunity)

self.set("CURRICULUM_FILE_PATH", env_args.curriculum_file_path)

# Make the high-level npcs weaker. Huge impact on the difficulty
self.set("NPC_LEVEL_MULTIPLIER", 0.5)


class FullGameConfig(
MiniGameConfig,
Expand Down Expand Up @@ -249,6 +258,8 @@ def make_env_creator(
game_packs = [(TeamBattle, 5), (AmmoTraining, 1)]
elif train_flag == "tb_curr":
game_packs = [(TeamBattle, 1), (AgentTraining, 1)]
elif train_flag == "full_surv":
game_packs = [(DefaultGame, 1), (TeamBattle, 1), (AgentTraining, 1)]
else:
raise ValueError(f"Invalid train_flag: {train_flag}")

Expand Down
Loading

0 comments on commit 7d6835a

Please sign in to comment.