From eb028eaad12df15484edc9a5f87a0682f3d561e8 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Mon, 4 Nov 2024 14:12:30 -0700 Subject: [PATCH 01/61] update generate_hdim_and_seed for population methods --- oai_agents/common/population.py | 60 +++++++++++++++++------- tests/test_oai_agents/test_population.py | 39 +++++++++++++++ 2 files changed, 82 insertions(+), 17 deletions(-) create mode 100644 tests/test_oai_agents/test_population.py diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index bdd1176d..3ddc35bd 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -7,6 +7,8 @@ from .curriculum import Curriculum +import random + def train_agent_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_dim, serialize): ''' @@ -77,28 +79,52 @@ def ensure_we_will_have_enough_agents_in_population(teammates_len, f" num_SPs_to_train: {num_SPs_to_train}." -def generate_hdim_and_seed(num_SPs_to_train): - ''' - (hidden_dim, seed) = reward of selfplay - (256, 68)=362, (64, 14)=318 - (256, 13)=248, (64, 0)=230 - (256, 48)=20, (64, 30)=0 +def generate_hdim_and_seed(num_of_required_agents): ''' - # Tested in 3-chefs-small-kitchen: - good_seeds = [68, 14, 13, 0] - good_hdims = [256, 64, 256, 64] + Generates lists of seeds and hidden dimensions for a given number of agents. - # Not tested: - other_seeds_copied_from_HAHA = [2907, 2907, 105, 105, 8, 32, 128, 512] - other_hdims_copied_from_HAHA = [64, 256, 64, 256, 16, 64, 256, 1024] + Each setting is a pair (hidden_dim, seed). If the number of required agents + is less than or equal to the number of predefined settings, it selects from + the predefined seeds and hidden dimensions. Otherwise, it generates random + seeds and hidden dimensions to fill the remaining number of agents. - all_seeds = good_seeds + other_seeds_copied_from_HAHA - all_hdims = good_hdims + other_hdims_copied_from_HAHA + Arguments: + num_of_required_agents -- the number of (hidden_dim, seed) pairs to generate. - selected_seeds = all_seeds[:num_SPs_to_train] - selected_hdims = all_hdims[:num_SPs_to_train] - return selected_seeds, selected_hdims + Returns: + selected_seeds -- list of selected seeds + selected_hdims -- list of selected hidden dimensions + ''' + + # Predefined seeds and hidden dimensions + seeds = [1010, 2020, 2602, 13, 68, 2907, 105, 128] + hdims = [256] * len(seeds) + + # Initialize selected lists + selected_seeds = [] + selected_hdims = [] + + # Check if we have enough predefined pairs + if num_of_required_agents <= len(seeds): + # Select predefined seeds and hdims + selected_seeds = seeds[:num_of_required_agents] + selected_hdims = hdims[:num_of_required_agents] + else: + # Use all predefined settings + selected_seeds = seeds[:] + selected_hdims = hdims[:] + + # Generate additional random settings if more agents are needed + remaining = num_of_required_agents - len(seeds) + available_seeds = set(range(0, 5000)) - set(selected_seeds) + random_seeds = random.sample(available_seeds, remaining) # Generate random seeds + random_hdims = random.choices([256, 512], k=remaining) # Generate random hidden dimensions + + # Append randomly generated settings to selected lists + selected_seeds += random_seeds + selected_hdims += random_hdims + return selected_seeds, selected_hdims def save_population(args, population): name_prefix = 'pop' diff --git a/tests/test_oai_agents/test_population.py b/tests/test_oai_agents/test_population.py new file mode 100644 index 00000000..9b3d0004 --- /dev/null +++ b/tests/test_oai_agents/test_population.py @@ -0,0 +1,39 @@ +# test_population.py + +from oai_agents.common.population import generate_hdim_and_seed + +def test_generate_hdim_and_seed(): + ''' + Test function for generate_hdim_and_seed to ensure: + 1. The number of (hidden_dim, seed) pairs matches the number of required agents. + 2. All generated seeds are unique. + 3. Hidden dimensions are as expected (either 64 or 256). + ''' + + # Test cases + test_cases = [3, 5, 8, 10] # Testing for fewer than, equal to, and more than predefined settings + + for num_agents in test_cases: + print(f"\nTesting with {num_agents} agents:") + + # Generate (hidden_dim, seed) pairs + selected_seeds, selected_hdims = generate_hdim_and_seed(num_agents) + + # Check that the correct number of agents is generated + assert len(selected_seeds) == num_agents, f"Expected {num_agents} seeds, got {len(selected_seeds)}" + assert len(selected_hdims) == num_agents, f"Expected {num_agents} hidden dims, got {len(selected_hdims)}" + + # Check that all seeds are unique + assert len(set(selected_seeds)) == num_agents, "Duplicate seeds found in the generated seeds." + + # Check that hidden dims are from the valid set (64, 256) + assert all(hdim in [256, 512] for hdim in selected_hdims), "Invalid hidden dimension found. Only 64 and 256 are allowed." + + print(f"Test passed for {num_agents} agents.") + print("Selected seeds:", selected_seeds) + print("Selected hidden dimensions:", selected_hdims) + +# Ensure that this test script only runs when executed directly +if __name__ == "__main__": + print("Running tests in population.py...") + test_generate_hdim_and_seed() From eb1d84e8e9c88360bd0212292f60664957bb5193 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Mon, 4 Nov 2024 14:29:55 -0700 Subject: [PATCH 02/61] Replace aamas25 in population.py by tag.CheckedPoints.FINAL_TRAINED_MODEL --- oai_agents/common/population.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 3ddc35bd..8e7f361c 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -3,7 +3,7 @@ import dill from oai_agents.agents.rl import RLAgentTrainer -from oai_agents.common.tags import AgentPerformance, TeamType +from oai_agents.common.tags import AgentPerformance, TeamType, CheckedPoints from .curriculum import Curriculum @@ -142,7 +142,7 @@ def save_population(args, population): seed=None, ) rt.agents = population[layout_name] - rt.save_agents(tag='aamas25') + rt.save_agents(tag=CheckedPoints.FINAL_TRAINED_MODEL) def get_population(args, @@ -153,7 +153,7 @@ def get_population(args, num_SPs_to_train, unseen_teammates_len=0, force_training=False, - tag='aamas25', + tag=CheckedPoints.FINAL_TRAINED_MODEL, ): population = {layout_name: [] for layout_name in args.layout_names} @@ -202,4 +202,4 @@ def get_population(args, save_population(args=args, population=population) - return population + return population \ No newline at end of file From 2a0febc14fe4d626aacaccec61ab9bfb5e2c9f3c Mon Sep 17 00:00:00 2001 From: ttopiac Date: Tue, 5 Nov 2024 09:58:42 -0700 Subject: [PATCH 03/61] Replace the first loop in get_best_SP_agent by a line of code. --- oai_agents/common/teammates_collection.py | 3 +-- scripts/utils/train_helper.py | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/oai_agents/common/teammates_collection.py b/oai_agents/common/teammates_collection.py index 415c45d6..b0594d91 100644 --- a/oai_agents/common/teammates_collection.py +++ b/oai_agents/common/teammates_collection.py @@ -176,8 +176,7 @@ def generate_TC(args, def get_best_SP_agent(args, population): agents_scores_averaged_over_layouts = [] - for layout_name in args.layout_names: - all_agents = [agent for agent in population[layout_name]] + all_agents = [agent for agent in population[args.layout_names[0]]] for agent in all_agents: scores = [agent.layout_scores[layout_name] for layout_name in args.layout_names] diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py index 43129ccc..e369a60d 100644 --- a/scripts/utils/train_helper.py +++ b/scripts/utils/train_helper.py @@ -1,6 +1,6 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.tags import TeamType -from oai_agents.common.population import get_population +from oai_agents.common.population import get_population, generate_hdim_and_seed from oai_agents.common.teammates_collection import generate_TC, get_best_SP_agent, generate_TC_for_ADV_agent, update_TC_w_ADV_teammates from oai_agents.common.curriculum import Curriculum from .common import load_agents, generate_name @@ -8,6 +8,7 @@ from oai_agents.common.tags import CheckedPoints + def get_SP_agent(args, train_types, eval_types, curriculum, tag=None): name = generate_name(args, prefix=Prefix.SELF_PLAY, From b592e6cc54e13a2984ec2ae11cfe8f68de5ee8d8 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Tue, 5 Nov 2024 15:52:05 -0700 Subject: [PATCH 04/61] Add two CheckedPoints tags, including CheckedModelPrefix and REWARD_SUBSTR --- oai_agents/agents/rl.py | 58 +++++++++---------- oai_agents/common/population.py | 3 +- oai_agents/common/tags.py | 20 ++++--- oai_agents/common/teammates_collection.py | 8 ++- .../fix_pop_ck_list_after_continued_run.py | 8 +-- 5 files changed, 51 insertions(+), 46 deletions(-) diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index a2971ddf..5f611e0b 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -17,30 +17,30 @@ class RLAgentTrainer(OAITrainer): ''' Train an RL agent to play with a teammates_collection of agents.''' - def __init__(self, teammates_collection, args, + def __init__(self, teammates_collection, args, agent, epoch_timesteps, n_envs, - seed, learner_type, + seed, learner_type, train_types=[], eval_types=[], - curriculum=None, num_layers=2, hidden_dim=256, + curriculum=None, num_layers=2, hidden_dim=256, checkpoint_rate=None, name=None, env=None, eval_envs=None, use_cnn=False, use_lstm=False, use_frame_stack=False, taper_layers=False, use_policy_clone=False, deterministic=False): - + name = name or 'rl_agent' super(RLAgentTrainer, self).__init__(name, args, seed=seed) - + self.args = args self.device = args.device self.teammates_len = self.args.teammates_len self.num_players = self.args.num_players self.curriculum = curriculum - + self.epoch_timesteps = epoch_timesteps self.n_envs = n_envs self.hidden_dim = hidden_dim self.num_layers = num_layers - + self.seed = seed self.checkpoint_rate = checkpoint_rate self.encoding_fn = ENCODING_SCHEMES[args.encoding_fn] @@ -53,7 +53,7 @@ def __init__(self, teammates_collection, args, self.learner_type = learner_type self.env, self.eval_envs = self.get_envs(env, eval_envs, deterministic, learner_type) - + self.learning_agent, self.agents = self.get_learning_agent(agent) self.teammates_collection, self.eval_teammates_collection = self.get_teammates_collection(_tms_clctn = teammates_collection, learning_agent = self.learning_agent, @@ -109,7 +109,7 @@ def get_learning_agent(self, agent): def get_teammates_collection(self, _tms_clctn, learning_agent, train_types=[], eval_types=[]): ''' Returns a dictionary of teammates_collection for training and evaluation - dict + dict teammates_collection = { 'layout_name': { 'TeamType.HIGH_FIRST': [[agent1, agent2], ...], @@ -122,18 +122,18 @@ def get_teammates_collection(self, _tms_clctn, learning_agent, train_types=[], e if _tms_clctn == {}: _tms_clctn = { TeammatesCollection.TRAIN: { - layout_name: + layout_name: {TeamType.SELF_PLAY: [[learning_agent for _ in range(self.teammates_len)]]} for layout_name in self.args.layout_names }, TeammatesCollection.EVAL: { - layout_name: + layout_name: {TeamType.SELF_PLAY: [[learning_agent for _ in range(self.teammates_len)]]} for layout_name in self.args.layout_names } } - else: + else: for layout in self.args.layout_names: for tt in _tms_clctn[TeammatesCollection.TRAIN][layout]: if tt == TeamType.SELF_PLAY: @@ -181,7 +181,7 @@ def get_envs(self, _env, _eval_envs, deterministic, learner_type): 'deterministic': deterministic,'args': self.args, 'learner_type': learner_type} env = make_vec_env(OvercookedGymEnv, n_envs=self.args.n_envs, seed=self.seed, vec_env_cls=VEC_ENV_CLS, env_kwargs=env_kwargs) - + eval_envs_kwargs = {'is_eval_env': True, 'horizon': 400, 'stack_frames': self.use_frame_stack, 'deterministic': deterministic, 'args': self.args, 'learner_type': learner_type} eval_envs = [OvercookedGymEnv(**{'env_index': i, **eval_envs_kwargs}) for i in range(self.n_layouts)] @@ -195,7 +195,7 @@ def get_envs(self, _env, _eval_envs, deterministic, learner_type): def get_sb3_agent(self): - layers = [self.hidden_dim // (2**i) for i in range(self.num_layers)] if self.taper_layers else [self.hidden_dim] * self.num_layers + layers = [self.hidden_dim // (2**i) for i in range(self.num_layers)] if self.taper_layers else [self.hidden_dim] * self.num_layers policy_kwargs = dict(net_arch=dict(pi=layers, vf=layers)) if self.use_cnn: @@ -226,7 +226,7 @@ def get_sb3_agent(self): def check_teammates_collection_structure(self, teammates_collection): - ''' + ''' teammates_collection = { 'layout_name': { 'high_perf_first': [[agent1, agent2], ...], @@ -236,7 +236,7 @@ def check_teammates_collection_structure(self, teammates_collection): }, } ''' - for layout in teammates_collection: + for layout in teammates_collection: for team_type in teammates_collection[layout]: for teammates in teammates_collection[layout][team_type]: assert len(teammates) == self.teammates_len,\ @@ -246,7 +246,7 @@ def check_teammates_collection_structure(self, teammates_collection): def _get_constructor_parameters(self): - return dict(args=self.args, name=self.name, use_lstm=self.use_lstm, + return dict(args=self.args, name=self.name, use_lstm=self.use_lstm, use_frame_stack=self.use_frame_stack, hidden_dim=self.hidden_dim, seed=self.seed) @@ -266,7 +266,7 @@ def should_evaluate(self, steps): steps_divisable_by_5 = (steps + 1) % 5 == 0 mean_rew_greater_than_best = mean_training_rew > self.best_training_rew and self.learning_agent.num_timesteps >= 5e6 checkpoint_rate_reached = self.checkpoint_rate and self.learning_agent.num_timesteps // self.checkpoint_rate > (len(self.ck_list) - 1) - + return steps_divisable_by_5 or mean_rew_greater_than_best or checkpoint_rate_reached def log_details(self, experiment_name, total_train_timesteps): @@ -296,11 +296,11 @@ def train_agents(self, total_train_timesteps, exp_name=None): if self.checkpoint_rate is not None: self.ck_list = [] - path, tag = self.save_agents(tag=f'ck_{len(self.ck_list)}') + path, tag = self.save_agents(tag=f'{CheckedPoints.FIRST_CHECKED_MODEL}') self.ck_list.append(({k: 0 for k in self.args.layout_names}, path, tag)) best_path, best_tag = None, None - + steps = 0 curr_timesteps = 0 prev_timesteps = self.learning_agent.num_timesteps @@ -308,18 +308,18 @@ def train_agents(self, total_train_timesteps, exp_name=None): while curr_timesteps < total_train_timesteps: self.curriculum.update(current_step=steps) - # TODO: eventually, teammates_collection should be turned into its own class with 'select' - # and 'update' functions that can be leveraged during training so the teammates_collection + # TODO: eventually, teammates_collection should be turned into its own class with 'select' + # and 'update' functions that can be leveraged during training so the teammates_collection # doesn't need to be created before training begins, this would allow us to generate a random # TC each round of training (like in the original FCP paper), until then, we have to leverage - # the ALL_MIX TeamType to achieve random teammate selection + # the ALL_MIX TeamType to achieve random teammate selection self.set_new_teammates(curriculum=self.curriculum) # In each iteration the agent collects n_envs * n_steps experiences # This continues until self.learning_agent.num_timesteps > epoch_timesteps is reached. self.learning_agent.learn(self.epoch_timesteps) - - + + curr_timesteps += self.learning_agent.num_timesteps - prev_timesteps prev_timesteps = self.learning_agent.num_timesteps @@ -332,7 +332,7 @@ def train_agents(self, total_train_timesteps, exp_name=None): if self.checkpoint_rate: if self.learning_agent.num_timesteps // self.checkpoint_rate > (len(self.ck_list) - 1): - path, tag = self.save_agents(tag=f'ck_{len(self.ck_list)}_rew_{mean_reward}') + path, tag = self.save_agents(tag=f'{CheckedPoints.CHECKED_MODEL_PREFIX}{len(self.ck_list)}{CheckedPoints.REWARD_SUBSTR}{mean_reward}') self.ck_list.append((rew_per_layout, path, tag)) if mean_reward >= self.best_score: @@ -353,7 +353,7 @@ def find_closest_score_path_tag(target_score, all_score_path_tag): closest_score = abs(score - target_score) closest_score_path_tag = (score, path, tag) return closest_score_path_tag - + @staticmethod def get_agents_and_set_score_and_perftag(args, layout_name, scores_path_tag, performance_tag, ck_list): score, path, tag = scores_path_tag @@ -361,7 +361,7 @@ def get_agents_and_set_score_and_perftag(args, layout_name, scores_path_tag, per for agent in all_agents: agent.layout_scores[layout_name] = score agent.layout_performance_tags[layout_name] = performance_tag - + # set other layouts's scores. Can't set their performance tags because we don't know it but it doesn't matter, we don't use the perftag for agent in all_agents: for scores, ck_path, ck_tag in ck_list: @@ -379,7 +379,7 @@ def get_checkedpoints_agents(args, ck_list, layout_name): AgentPerformance.HIGH_MEDIUM AgentPerformance.MEDIUM AgentPerformance.MEDIUM_LOW - AgentPerformance.LOW + AgentPerformance.LOW It categorizes by setting their score and performance tag: OAIAgent.layout_scores OAIAgent.layout_performance_tags diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 8e7f361c..ae55c23c 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -10,7 +10,7 @@ import random -def train_agent_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_dim, serialize): +def train_agent_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_dim, serialize, force_training): ''' Returns ckeckpoints_list either serialized or not based on serialize flag @@ -179,6 +179,7 @@ def get_population(args, (args, total_training_timesteps, ck_rate, seed[i], h_dim[i], True) for i in range(num_SPs_to_train) ] + if args.parallel: with concurrent.futures.ProcessPoolExecutor(max_workers=args.max_concurrent_jobs) as executor: arg_lists = list(zip(*inputs)) diff --git a/oai_agents/common/tags.py b/oai_agents/common/tags.py index 2da41a12..bbbb1cfb 100644 --- a/oai_agents/common/tags.py +++ b/oai_agents/common/tags.py @@ -1,10 +1,10 @@ from enum import Enum class AgentPerformance: ''' - Agent performance refers to the reward an agent receives after playing in - self-play scenarios. For example, consider an agent, X, with + Agent performance refers to the reward an agent receives after playing in + self-play scenarios. For example, consider an agent, X, with AgentPerformance.HIGH.This means X has participated in an Overcooked game - with multiple copies of itself, and the self-play team achieved a total + with multiple copies of itself, and the self-play team achieved a total reward categorized as high performance. ''' HIGH = 'H' @@ -14,7 +14,7 @@ class AgentPerformance: LOW = 'L' ALL = [HIGH, HIGH_MEDIUM, MEDIUM, MEDIUM_LOW, LOW] - + NOTSET = 'NS' @@ -24,13 +24,13 @@ class TeamType: For example if teammates_len is 2, and the team type is HIGH_PRIORITY Then the list of agents are sorted based on score in a descending order and the first 2 agents are selected. - + SP: All agents are the same agent SPL: N-1 agents are the same agent, 1 agent is a low performing agent SPM: ... ''' - HIGH_FIRST = 'H' + HIGH_FIRST = 'H' MEDIUM_FIRST = 'M' MIDDLE_FIRST = 'MID' LOW_FIRST = 'L' @@ -41,8 +41,8 @@ class TeamType: HIGH_LOW_RANDOM = 'HLR' # Used to create a list of all possible permutations of agents from the teammate population - # TODO: eventually, teammates_collection should be turned into its own class with 'select' - # and 'update' functions that can be leveraged during training so the teammates_collection + # TODO: eventually, teammates_collection should be turned into its own class with 'select' + # and 'update' functions that can be leveraged during training so the teammates_collection # doesn't need to be created before training begins, once that happens we can remove the AMX # type ALL_MIX = 'AMX' @@ -82,7 +82,7 @@ class TeammatesCollection: EVAL = 'eval' class CheckedPoints(): - # During training, we saved models if it reaches the best eval/training reward, worst eval/training reward. + # During training, we saved models if it reaches the best eval/training reward, worst eval/training reward. # In addition, we also save the very last one. # For all of them, we assign them a tag so that we can use them in the future. # For example, if the training reaches best evaluation reward, we saved it with a tag BEST_EVAL_REWARD. @@ -92,6 +92,8 @@ class CheckedPoints(): WORST_TRAIN_REWARD ='worst_train_reward' FINAL_TRAINED_MODEL = 'last' FIRST_CHECKED_MODEL = 'ck_0' + CHECKED_MODEL_PREFIX = 'ck_' + REWARD_SUBSTR = '_rew_' class Prefix: SELF_PLAY = 'SP' diff --git a/oai_agents/common/teammates_collection.py b/oai_agents/common/teammates_collection.py index b0594d91..57ab7821 100644 --- a/oai_agents/common/teammates_collection.py +++ b/oai_agents/common/teammates_collection.py @@ -174,17 +174,19 @@ def generate_TC(args, def get_best_SP_agent(args, population): + # all_agents = [agent for agent in population[args.layout_names[0]]] + all_agents = get_all_agents( args.layout_names[0], population) agents_scores_averaged_over_layouts = [] - all_agents = [agent for agent in population[args.layout_names[0]]] - for agent in all_agents: scores = [agent.layout_scores[layout_name] for layout_name in args.layout_names] agents_scores_averaged_over_layouts.append((agent, sum(scores)/len(scores))) best_agent = max(agents_scores_averaged_over_layouts, key=lambda x: x[1]) return best_agent[0] - +def get_all_agents(layout_name, population): + all_agents = [agent for agent in population[layout_name]] + return all_agents def update_eval_collection_with_eval_types_from_file(args, agent, unseen_teammates_len, eval_types, eval_collection): for teammates in eval_types: diff --git a/sandbox/fix_pop_ck_list_after_continued_run.py b/sandbox/fix_pop_ck_list_after_continued_run.py index 2c9ddd36..fdf54db2 100644 --- a/sandbox/fix_pop_ck_list_after_continued_run.py +++ b/sandbox/fix_pop_ck_list_after_continued_run.py @@ -4,7 +4,7 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.arguments import get_arguments -from oai_agents.common.tags import TeamType +from oai_agents.common.tags import TeamType, CheckedPoints from oai_agents.common.learner import LearnerType def fix_ck_list(initial_run_root, continued_run_root, corrected_run_root, ck_starts_from): @@ -64,7 +64,7 @@ def fix_pop(args, initial_run_root, continued_run_root, corrected_run_root): corrected_run_exp = re.search(r'agent_models/(.*)', corrected_run_root).group(1) population_initial = {layout_name: [] for layout_name in args.layout_names} - population_continued = {layout_name: [] for layout_name in args.layout_names} + population_continued = {layout_name: [] for layout_name in args.layout_names} for layout_name in args.layout_names: name = f'pop_{layout_name}' @@ -75,7 +75,7 @@ def fix_pop(args, initial_run_root, continued_run_root, corrected_run_root): args.exp_dir = continued_run_exp population_continued[layout_name] = RLAgentTrainer.load_agents(args, name=name, tag='aamas25') print(f"Loaded {name} in {continued_run_exp}, size: {len(population_continued[layout_name])}") - + all_agents = population_initial[layout_name] + population_continued[layout_name] rt = RLAgentTrainer( @@ -98,7 +98,7 @@ def fix_pop(args, initial_run_root, continued_run_root, corrected_run_root): def set_input(): args = get_arguments() args.teammates_len = 4 - args.num_players = args.teammates_len + 1 + args.num_players = args.teammates_len + 1 args.layout_names = ['selected_5_chefs_counter_circuit', 'selected_5_chefs_secret_coordination_ring', 'selected_5_chefs_storage_room'] From 9bc2e37b7c37b1100049275e1b21ba206464fb32 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Tue, 5 Nov 2024 20:48:14 -0700 Subject: [PATCH 05/61] Add a function for us to list agent's checked tags and also a test function to test it. --- oai_agents/agents/base_agent.py | 66 ++++++++++++++---- tests/test_oai_agents/test_base_agent.py | 85 ++++++++++++++++++++++++ 2 files changed, 139 insertions(+), 12 deletions(-) create mode 100644 tests/test_oai_agents/test_base_agent.py diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index db6dc8e0..c50ec1ed 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -23,6 +23,7 @@ import wandb import os import random +import re class OAIAgent(nn.Module, ABC): """ @@ -46,7 +47,7 @@ def __init__(self, name, args): self.prev_subtask = Subtasks.SUBTASKS_TO_IDS['unknown'] self.use_hrl_obs = False self.on_reset = True - + self.layout_scores = { layout_name: -1 for layout_name in args.layout_names } @@ -180,7 +181,7 @@ def load(cls, path: str, args: argparse.Namespace) -> 'OAIAgent': class SB3Wrapper(OAIAgent): - + def __init__(self, agent, name, args): super(SB3Wrapper, self).__init__(name, args) self.agent = agent @@ -363,7 +364,7 @@ def __init__(self, name, args, seed=None): if th.cuda.is_available(): th.cuda.manual_seed_all(seed) th.backends.cudnn.deterministic = True - + self.eval_teammates_collection = {} self.teammates_collection = {} @@ -395,14 +396,14 @@ def linear_anneal(progress_remaining: float) -> float: def evaluate(self, eval_agent, num_eps_per_layout_per_tm=5, visualize=False, timestep=None, log_wandb=True, deterministic=False): - + timestep = timestep if timestep is not None else eval_agent.num_timesteps tot_mean_reward = [] rew_per_layout_per_teamtype = {} rew_per_layout = {} ''' - dict + dict teammates_collection = { 'layout_name': { 'TeamType.HIGH_FIRST': [[agent1, agent2], ...], @@ -412,7 +413,7 @@ def evaluate(self, eval_agent, num_eps_per_layout_per_tm=5, visualize=False, tim }, } ''' - for _, env in enumerate(self.eval_envs): + for _, env in enumerate(self.eval_envs): rew_per_layout_per_teamtype[env.layout_name] = { teamtype: [] for teamtype in self.eval_teammates_collection[env.layout_name] } @@ -430,8 +431,8 @@ def evaluate(self, eval_agent, num_eps_per_layout_per_tm=5, visualize=False, tim deterministic=deterministic, warn=False, render=visualize) tot_mean_reward.append(mean_reward) rew_per_layout_per_teamtype[env.layout_name][teamtype].append(mean_reward) - - + + rew_per_layout_per_teamtype[env.layout_name] = {teamtype: np.mean(rew_per_layout_per_teamtype[env.layout_name][teamtype]) for teamtype in rew_per_layout_per_teamtype[env.layout_name]} rew_per_layout[env.layout_name] = np.mean([rew_per_layout_per_teamtype[env.layout_name][teamtype] for teamtype in rew_per_layout_per_teamtype[env.layout_name]]) @@ -439,7 +440,7 @@ def evaluate(self, eval_agent, num_eps_per_layout_per_tm=5, visualize=False, tim wandb.log({f'eval_mean_reward_{env.layout_name}': rew_per_layout[env.layout_name], 'timestep': timestep}) for teamtype in rew_per_layout_per_teamtype[env.layout_name]: wandb.log({f'eval_mean_reward_{env.layout_name}_teamtype_{teamtype}': rew_per_layout_per_teamtype[env.layout_name][teamtype], 'timestep': timestep}) - + if log_wandb: wandb.log({f'eval_mean_reward': np.mean(tot_mean_reward), 'timestep': timestep}) return np.mean(tot_mean_reward), rew_per_layout @@ -451,13 +452,13 @@ def set_new_teammates(self, curriculum): population_teamtypes = self.teammates_collection[layout_name] teammates = curriculum.select_teammates(population_teamtypes=population_teamtypes) - + assert len(teammates) == self.args.teammates_len assert type(teammates) == list for teammate in teammates: assert isinstance(teammate, SB3Wrapper) - + self.env.env_method('set_teammates', teammates, indices=i) @@ -475,7 +476,7 @@ def save_agents(self, path: Union[Path, None] = None, tag: Union[str, None] = No path = self.args.base_dir / 'agent_models' / self.args.exp_dir / self.name else: path = self.args.base_dir / 'agent_models'/ self.name - + tag = tag or self.args.exp_name save_path = path / tag / 'trainer_file' agent_path = path / tag / 'agents_dir' @@ -510,3 +511,44 @@ def load_agents(args, name: str=None, path: Union[Path, None] = None, tag: Union agent.to(device) agents.append(agent) return agents + + @staticmethod + def list_agent_checked_tags(args, name: str=None, path: Union[Path, None] = None) -> List[str]: + ''' + Lists only tags that start with CheckedPoints.CHECKED_MODEL_PREFIX, followed by an integer. + If the integer is greater than 0, it must be followed by CheckedPoints.REWARD_SUBSTR and a floating-point number. + + Parameters: + - args: Experiment arguments containing base directory info. + - name: The name of the agent (or experiment) for which tags should be listed. + - path: Optional. If provided, it overrides the default path to the agents directory. + + Returns: + - A list of tags (directories) that match the specified pattern. + ''' + if not path: + if args.exp_dir: + path = args.base_dir / 'agent_models' / args.exp_dir / name + else: + path = args.base_dir / 'agent_models' / name + + # Ensure the directory exists + if not path.exists() or not path.is_dir(): + raise FileNotFoundError(f"Agent directory not found: {path}") + + # Define the prefix and the regular expression to match the pattern + prefix = CheckedPoints.CHECKED_MODEL_PREFIX + reward_substr = CheckedPoints.REWARD_SUBSTR + pattern = re.compile(f"^{re.escape(prefix)}(\\d+)(?:{re.escape(reward_substr)}[\\d.]+)?$") + + # List all subdirectories (tags) that match the pattern + tags = [] + for tag in path.iterdir(): + if tag.is_dir() and pattern.match(tag.name): + match = pattern.match(tag.name) + integer_part = int(match.group(1)) + # Only add tags that either have no reward substring for integer 0, or have it when integer > 0 + if integer_part == 0 or (integer_part > 0 and reward_substr in tag.name): + tags.append(tag.name) + + return tags diff --git a/tests/test_oai_agents/test_base_agent.py b/tests/test_oai_agents/test_base_agent.py new file mode 100644 index 00000000..9c18603f --- /dev/null +++ b/tests/test_oai_agents/test_base_agent.py @@ -0,0 +1,85 @@ +from oai_agents.agents.base_agent import OAITrainer +from pathlib import Path +from oai_agents.common.tags import CheckedPoints +import shutil + +def test_list_agent_checked_tags(): + # Define base directory based on the current working directory + base_dir = Path.cwd() + + # Set up the directory structure for testing + # This will create the following structure within the current working directory: + # + # / + # └── agent_models/ + # └── test_agents_folder/ + # └── test_agent/ + # ├── ck_0/ + # ├── ck_1_rew_59.5/ + # ├── ck_2_rew_140.0/ + # ├── ck_10_rew_336.8888888888889/ + # ├── ck_3_invalid/ # Should not match + # ├── ck_4_rew_invalid/ # Should not match + # ├── unrelated_tag/ # Should not match + # ├── best/ # Should not match + # └── last/ # Should not match + # + # Only `ck_0`, `ck_1_rew_59.5`, `ck_2_rew_140.0`, and `ck_10_rew_336.8888888888889` + # should be returned by the function. + + test_dir = base_dir / "agent_models" / "test_agents_folder" / "test_agent" + test_dir.mkdir(parents=True, exist_ok=True) # Ensure all parent directories are created + + # Simulate directory structure with various tags + tag_names = [ + "ck_0", + "ck_1_rew_59.5", + "ck_2_rew_140.0", + "ck_10_rew_336.8888888888889", + "ck_3_invalid", # Invalid because it doesn't have a valid float after the integer + "ck_4_rew_invalid", # Invalid because reward value is not a float + "unrelated_tag", # Invalid because it doesn't start with `CheckedPoints.CHECKED_MODEL_PREFIX` + "best", + "last" + ] + + # Create these tag directories within the test directory + for tag_name in tag_names: + (test_dir / tag_name).mkdir(parents=True, exist_ok=True) + + # Mock args object with base_dir and exp_dir pointing to the test directory + class MockArgs: + def __init__(self, base_dir, exp_dir, layout_names=[]): + self.base_dir = base_dir + self.exp_dir = "test_agents_folder" + self.layout_names = layout_names + + args = MockArgs(base_dir=base_dir, exp_dir="test_agents_folder") + + # Call the function to test + checked_tags = OAITrainer.list_agent_checked_tags(args, name="test_agent") + + # Expected tags should only include those that match the pattern + expected_tags = [ + "ck_0", + "ck_1_rew_59.5", + "ck_2_rew_140.0", + "ck_10_rew_336.8888888888889" + ] + + # Print results for verification + if sorted(checked_tags) == sorted(expected_tags): + print("Test passed: Tags returned as expected.") + else: + print(f"Test failed: Expected {expected_tags}, but got {checked_tags}") + + # Clean up the test directories after the test + # This will remove the entire "agent_models/test_agents_folder" structure created for testing + shutil.rmtree(base_dir / "agent_models" / "test_agents_folder") + +# Run the test function +test_list_agent_checked_tags() + + + + From 4bf7932b797e1170e6f5fc0d39c77e538e313d63 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Tue, 5 Nov 2024 20:50:58 -0700 Subject: [PATCH 06/61] rewrite a comment --- oai_agents/agents/base_agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index c50ec1ed..b3303830 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -519,8 +519,8 @@ def list_agent_checked_tags(args, name: str=None, path: Union[Path, None] = None If the integer is greater than 0, it must be followed by CheckedPoints.REWARD_SUBSTR and a floating-point number. Parameters: - - args: Experiment arguments containing base directory info. - - name: The name of the agent (or experiment) for which tags should be listed. + - args: Experiment arguments containing base directory info and experiment directory info. + - name: The name of the agent for which tags should be listed. - path: Optional. If provided, it overrides the default path to the agents directory. Returns: From a9a874eb65d2f8326b28c3da15508a607bcc91a4 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Mon, 4 Nov 2024 14:12:30 -0700 Subject: [PATCH 07/61] update generate_hdim_and_seed for population methods --- oai_agents/common/population.py | 60 +++++++++++++++++------- tests/test_oai_agents/test_population.py | 39 +++++++++++++++ 2 files changed, 82 insertions(+), 17 deletions(-) create mode 100644 tests/test_oai_agents/test_population.py diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 25dc75fc..0f3bfdb0 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -7,6 +7,8 @@ from .curriculum import Curriculum +import random + def train_agent_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_dim, serialize): ''' @@ -77,28 +79,52 @@ def ensure_we_will_have_enough_agents_in_population(teammates_len, f" num_SPs_to_train: {num_SPs_to_train}." -def generate_hdim_and_seed(num_SPs_to_train): - ''' - (hidden_dim, seed) = reward of selfplay - (256, 68)=362, (64, 14)=318 - (256, 13)=248, (64, 0)=230 - (256, 48)=20, (64, 30)=0 +def generate_hdim_and_seed(num_of_required_agents): ''' - # Tested in 3-chefs-small-kitchen: - good_seeds = [68, 14, 13, 0] - good_hdims = [256, 64, 256, 64] + Generates lists of seeds and hidden dimensions for a given number of agents. - # Not tested: - other_seeds_copied_from_HAHA = [2907, 2907, 105, 105, 8, 32, 128, 512] - other_hdims_copied_from_HAHA = [64, 256, 64, 256, 16, 64, 256, 1024] + Each setting is a pair (hidden_dim, seed). If the number of required agents + is less than or equal to the number of predefined settings, it selects from + the predefined seeds and hidden dimensions. Otherwise, it generates random + seeds and hidden dimensions to fill the remaining number of agents. - all_seeds = good_seeds + other_seeds_copied_from_HAHA - all_hdims = good_hdims + other_hdims_copied_from_HAHA + Arguments: + num_of_required_agents -- the number of (hidden_dim, seed) pairs to generate. - selected_seeds = all_seeds[:num_SPs_to_train] - selected_hdims = all_hdims[:num_SPs_to_train] - return selected_seeds, selected_hdims + Returns: + selected_seeds -- list of selected seeds + selected_hdims -- list of selected hidden dimensions + ''' + + # Predefined seeds and hidden dimensions + seeds = [1010, 2020, 2602, 13, 68, 2907, 105, 128] + hdims = [256] * len(seeds) + + # Initialize selected lists + selected_seeds = [] + selected_hdims = [] + + # Check if we have enough predefined pairs + if num_of_required_agents <= len(seeds): + # Select predefined seeds and hdims + selected_seeds = seeds[:num_of_required_agents] + selected_hdims = hdims[:num_of_required_agents] + else: + # Use all predefined settings + selected_seeds = seeds[:] + selected_hdims = hdims[:] + + # Generate additional random settings if more agents are needed + remaining = num_of_required_agents - len(seeds) + available_seeds = set(range(0, 5000)) - set(selected_seeds) + random_seeds = random.sample(available_seeds, remaining) # Generate random seeds + random_hdims = random.choices([256, 512], k=remaining) # Generate random hidden dimensions + + # Append randomly generated settings to selected lists + selected_seeds += random_seeds + selected_hdims += random_hdims + return selected_seeds, selected_hdims def save_population(args, population): name_prefix = 'pop' diff --git a/tests/test_oai_agents/test_population.py b/tests/test_oai_agents/test_population.py new file mode 100644 index 00000000..9b3d0004 --- /dev/null +++ b/tests/test_oai_agents/test_population.py @@ -0,0 +1,39 @@ +# test_population.py + +from oai_agents.common.population import generate_hdim_and_seed + +def test_generate_hdim_and_seed(): + ''' + Test function for generate_hdim_and_seed to ensure: + 1. The number of (hidden_dim, seed) pairs matches the number of required agents. + 2. All generated seeds are unique. + 3. Hidden dimensions are as expected (either 64 or 256). + ''' + + # Test cases + test_cases = [3, 5, 8, 10] # Testing for fewer than, equal to, and more than predefined settings + + for num_agents in test_cases: + print(f"\nTesting with {num_agents} agents:") + + # Generate (hidden_dim, seed) pairs + selected_seeds, selected_hdims = generate_hdim_and_seed(num_agents) + + # Check that the correct number of agents is generated + assert len(selected_seeds) == num_agents, f"Expected {num_agents} seeds, got {len(selected_seeds)}" + assert len(selected_hdims) == num_agents, f"Expected {num_agents} hidden dims, got {len(selected_hdims)}" + + # Check that all seeds are unique + assert len(set(selected_seeds)) == num_agents, "Duplicate seeds found in the generated seeds." + + # Check that hidden dims are from the valid set (64, 256) + assert all(hdim in [256, 512] for hdim in selected_hdims), "Invalid hidden dimension found. Only 64 and 256 are allowed." + + print(f"Test passed for {num_agents} agents.") + print("Selected seeds:", selected_seeds) + print("Selected hidden dimensions:", selected_hdims) + +# Ensure that this test script only runs when executed directly +if __name__ == "__main__": + print("Running tests in population.py...") + test_generate_hdim_and_seed() From e418ebfb4252ff94cc0b2c9cc4ff606be3393a4e Mon Sep 17 00:00:00 2001 From: ttopiac Date: Mon, 4 Nov 2024 14:29:55 -0700 Subject: [PATCH 08/61] Replace aamas25 in population.py by tag.CheckedPoints.FINAL_TRAINED_MODEL --- oai_agents/common/population.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 0f3bfdb0..967258c9 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -5,6 +5,7 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.tags import AgentPerformance, KeyCheckpoints, TeamType + from .curriculum import Curriculum import random @@ -202,4 +203,4 @@ def get_population(args, save_population(args=args, population=population) - return population + return population \ No newline at end of file From b7511637a84a9c0010352126a78fc43074463dc3 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Tue, 5 Nov 2024 09:58:42 -0700 Subject: [PATCH 09/61] Replace the first loop in get_best_SP_agent by a line of code. --- oai_agents/common/teammates_collection.py | 3 +-- scripts/utils/train_helper.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/oai_agents/common/teammates_collection.py b/oai_agents/common/teammates_collection.py index 415c45d6..b0594d91 100644 --- a/oai_agents/common/teammates_collection.py +++ b/oai_agents/common/teammates_collection.py @@ -176,8 +176,7 @@ def generate_TC(args, def get_best_SP_agent(args, population): agents_scores_averaged_over_layouts = [] - for layout_name in args.layout_names: - all_agents = [agent for agent in population[layout_name]] + all_agents = [agent for agent in population[args.layout_names[0]]] for agent in all_agents: scores = [agent.layout_scores[layout_name] for layout_name in args.layout_names] diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py index ef085710..f8ed99da 100644 --- a/scripts/utils/train_helper.py +++ b/scripts/utils/train_helper.py @@ -1,6 +1,6 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.tags import TeamType -from oai_agents.common.population import get_population +from oai_agents.common.population import get_population, generate_hdim_and_seed from oai_agents.common.teammates_collection import generate_TC, get_best_SP_agent, generate_TC_for_ADV_agent, update_TC_w_ADV_teammates from oai_agents.common.curriculum import Curriculum from .common import load_agents, generate_name From 0d4f99d0b5f0488ddde7b5784f8a1afaba2dc1ce Mon Sep 17 00:00:00 2001 From: ttopiac Date: Thu, 7 Nov 2024 09:08:08 -0700 Subject: [PATCH 10/61] Fix merge conflicts --- oai_agents/agents/rl.py | 6 +++--- oai_agents/common/population.py | 3 ++- oai_agents/common/tags.py | 2 ++ oai_agents/common/teammates_collection.py | 8 +++++--- sandbox/fix_pop_ck_list_after_continued_run.py | 2 +- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index 8850c870..1bc66ff3 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -265,7 +265,7 @@ def should_evaluate(self, steps): steps_divisable_by_15 = (steps + 1) % 15 == 0 mean_rew_greater_than_best = mean_training_rew > self.best_training_rew and self.learning_agent.num_timesteps >= 5e6 checkpoint_rate_reached = self.checkpoint_rate and self.learning_agent.num_timesteps // self.checkpoint_rate > (len(self.ck_list) - 1) - + return steps_divisable_by_15 or mean_rew_greater_than_best or checkpoint_rate_reached def log_details(self, experiment_name, total_train_timesteps): @@ -295,7 +295,7 @@ def train_agents(self, total_train_timesteps, tag, exp_name=None): if self.checkpoint_rate is not None: self.ck_list = [] - path, tag = self.save_agents(tag=f'ck_{len(self.ck_list)}') + path, tag = self.save_agents(tag=f'{KeyCheckpoints.CHECKED_MODEL_PREFIX}{len(self.ck_list)}') self.ck_list.append(({k: 0 for k in self.args.layout_names}, path, tag)) best_path, best_tag = None, None @@ -331,7 +331,7 @@ def train_agents(self, total_train_timesteps, tag, exp_name=None): if self.checkpoint_rate: if self.learning_agent.num_timesteps // self.checkpoint_rate > (len(self.ck_list) - 1): - path, tag = self.save_agents(tag=f'ck_{len(self.ck_list)}_rew_{mean_reward}') + path, tag = self.save_agents(tag=f'{KeyCheckpoints.CHECKED_MODEL_PREFIX}{len(self.ck_list)}{KeyCheckpoints.REWARD_SUBSTR}{mean_reward}') self.ck_list.append((rew_per_layout, path, tag)) if mean_reward >= self.best_score: diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 967258c9..4ea9304d 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -11,7 +11,7 @@ import random -def train_agent_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_dim, serialize): +def train_agent_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_dim, serialize, force_training): ''' Returns ckeckpoints_list either serialized or not based on serialize flag @@ -180,6 +180,7 @@ def get_population(args, (args, total_training_timesteps, ck_rate, seed[i], h_dim[i], True) for i in range(num_SPs_to_train) ] + if args.parallel: with concurrent.futures.ProcessPoolExecutor(max_workers=args.max_concurrent_jobs) as executor: arg_lists = list(zip(*inputs)) diff --git a/oai_agents/common/tags.py b/oai_agents/common/tags.py index 7e70b9cb..a123ef65 100644 --- a/oai_agents/common/tags.py +++ b/oai_agents/common/tags.py @@ -84,6 +84,8 @@ class TeammatesCollection: class KeyCheckpoints: # Tags to identify the type of model checkpoint to save/load BEST_EVAL_REWARD = 'best' # Use only for evaluation MOST_RECENT_TRAINED_MODEL = 'last' # Use only for training + CHECKED_MODEL_PREFIX = 'ck_' + REWARD_SUBSTR = '_rew_' class Prefix: SELF_PLAY = 'SP' diff --git a/oai_agents/common/teammates_collection.py b/oai_agents/common/teammates_collection.py index b0594d91..57ab7821 100644 --- a/oai_agents/common/teammates_collection.py +++ b/oai_agents/common/teammates_collection.py @@ -174,17 +174,19 @@ def generate_TC(args, def get_best_SP_agent(args, population): + # all_agents = [agent for agent in population[args.layout_names[0]]] + all_agents = get_all_agents( args.layout_names[0], population) agents_scores_averaged_over_layouts = [] - all_agents = [agent for agent in population[args.layout_names[0]]] - for agent in all_agents: scores = [agent.layout_scores[layout_name] for layout_name in args.layout_names] agents_scores_averaged_over_layouts.append((agent, sum(scores)/len(scores))) best_agent = max(agents_scores_averaged_over_layouts, key=lambda x: x[1]) return best_agent[0] - +def get_all_agents(layout_name, population): + all_agents = [agent for agent in population[layout_name]] + return all_agents def update_eval_collection_with_eval_types_from_file(args, agent, unseen_teammates_len, eval_types, eval_collection): for teammates in eval_types: diff --git a/sandbox/fix_pop_ck_list_after_continued_run.py b/sandbox/fix_pop_ck_list_after_continued_run.py index eece6e0b..5e1ad08d 100644 --- a/sandbox/fix_pop_ck_list_after_continued_run.py +++ b/sandbox/fix_pop_ck_list_after_continued_run.py @@ -4,7 +4,7 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.arguments import get_arguments -from oai_agents.common.tags import TeamType +from oai_agents.common.tags import TeamType, CheckedPoints from oai_agents.common.learner import LearnerType from oai_agents.common.tags import KeyCheckpoints From c15969004a30c10f49872fdaad6d597e4a39b8ef Mon Sep 17 00:00:00 2001 From: ttopiac Date: Tue, 5 Nov 2024 20:48:14 -0700 Subject: [PATCH 11/61] Add a function for us to list agent's checked tags and also a test function to test it. --- oai_agents/agents/base_agent.py | 44 +++++++++++- tests/test_oai_agents/test_base_agent.py | 85 ++++++++++++++++++++++++ 2 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 tests/test_oai_agents/test_base_agent.py diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index 7aa3ec79..74a1d90e 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -23,6 +23,7 @@ import wandb import os import random +import re class OAIAgent(nn.Module, ABC): """ @@ -405,7 +406,7 @@ def evaluate(self, eval_agent, num_eps_per_layout_per_tm=5, visualize=False, tim # This is outside of the for loop, meaning that each time we evaluate the same player positions across all layouts for a fair comparison selected_p_indexes = random.sample(range(self.args.num_players), min(3, self.args.num_players)) - for _, env in enumerate(self.eval_envs): + for _, env in enumerate(self.eval_envs): rew_per_layout_per_teamtype[env.layout_name] = { teamtype: [] for teamtype in self.eval_teammates_collection[env.layout_name] } @@ -503,3 +504,44 @@ def load_agents(args, tag, name: str=None, path: Union[Path, None] = None): agent.to(device) agents.append(agent) return agents + + @staticmethod + def list_agent_checked_tags(args, name: str=None, path: Union[Path, None] = None) -> List[str]: + ''' + Lists only tags that start with CheckedPoints.CHECKED_MODEL_PREFIX, followed by an integer. + If the integer is greater than 0, it must be followed by CheckedPoints.REWARD_SUBSTR and a floating-point number. + + Parameters: + - args: Experiment arguments containing base directory info. + - name: The name of the agent (or experiment) for which tags should be listed. + - path: Optional. If provided, it overrides the default path to the agents directory. + + Returns: + - A list of tags (directories) that match the specified pattern. + ''' + if not path: + if args.exp_dir: + path = args.base_dir / 'agent_models' / args.exp_dir / name + else: + path = args.base_dir / 'agent_models' / name + + # Ensure the directory exists + if not path.exists() or not path.is_dir(): + raise FileNotFoundError(f"Agent directory not found: {path}") + + # Define the prefix and the regular expression to match the pattern + prefix = KeyCheckpoints.CHECKED_MODEL_PREFIX + reward_substr = KeyCheckpoints.REWARD_SUBSTR + pattern = re.compile(f"^{re.escape(prefix)}(\\d+)(?:{re.escape(reward_substr)}[\\d.]+)?$") + + # List all subdirectories (tags) that match the pattern + tags = [] + for tag in path.iterdir(): + if tag.is_dir() and pattern.match(tag.name): + match = pattern.match(tag.name) + integer_part = int(match.group(1)) + # Only add tags that either have no reward substring for integer 0, or have it when integer > 0 + if integer_part == 0 or (integer_part > 0 and reward_substr in tag.name): + tags.append(tag.name) + + return tags diff --git a/tests/test_oai_agents/test_base_agent.py b/tests/test_oai_agents/test_base_agent.py new file mode 100644 index 00000000..9c18603f --- /dev/null +++ b/tests/test_oai_agents/test_base_agent.py @@ -0,0 +1,85 @@ +from oai_agents.agents.base_agent import OAITrainer +from pathlib import Path +from oai_agents.common.tags import CheckedPoints +import shutil + +def test_list_agent_checked_tags(): + # Define base directory based on the current working directory + base_dir = Path.cwd() + + # Set up the directory structure for testing + # This will create the following structure within the current working directory: + # + # / + # └── agent_models/ + # └── test_agents_folder/ + # └── test_agent/ + # ├── ck_0/ + # ├── ck_1_rew_59.5/ + # ├── ck_2_rew_140.0/ + # ├── ck_10_rew_336.8888888888889/ + # ├── ck_3_invalid/ # Should not match + # ├── ck_4_rew_invalid/ # Should not match + # ├── unrelated_tag/ # Should not match + # ├── best/ # Should not match + # └── last/ # Should not match + # + # Only `ck_0`, `ck_1_rew_59.5`, `ck_2_rew_140.0`, and `ck_10_rew_336.8888888888889` + # should be returned by the function. + + test_dir = base_dir / "agent_models" / "test_agents_folder" / "test_agent" + test_dir.mkdir(parents=True, exist_ok=True) # Ensure all parent directories are created + + # Simulate directory structure with various tags + tag_names = [ + "ck_0", + "ck_1_rew_59.5", + "ck_2_rew_140.0", + "ck_10_rew_336.8888888888889", + "ck_3_invalid", # Invalid because it doesn't have a valid float after the integer + "ck_4_rew_invalid", # Invalid because reward value is not a float + "unrelated_tag", # Invalid because it doesn't start with `CheckedPoints.CHECKED_MODEL_PREFIX` + "best", + "last" + ] + + # Create these tag directories within the test directory + for tag_name in tag_names: + (test_dir / tag_name).mkdir(parents=True, exist_ok=True) + + # Mock args object with base_dir and exp_dir pointing to the test directory + class MockArgs: + def __init__(self, base_dir, exp_dir, layout_names=[]): + self.base_dir = base_dir + self.exp_dir = "test_agents_folder" + self.layout_names = layout_names + + args = MockArgs(base_dir=base_dir, exp_dir="test_agents_folder") + + # Call the function to test + checked_tags = OAITrainer.list_agent_checked_tags(args, name="test_agent") + + # Expected tags should only include those that match the pattern + expected_tags = [ + "ck_0", + "ck_1_rew_59.5", + "ck_2_rew_140.0", + "ck_10_rew_336.8888888888889" + ] + + # Print results for verification + if sorted(checked_tags) == sorted(expected_tags): + print("Test passed: Tags returned as expected.") + else: + print(f"Test failed: Expected {expected_tags}, but got {checked_tags}") + + # Clean up the test directories after the test + # This will remove the entire "agent_models/test_agents_folder" structure created for testing + shutil.rmtree(base_dir / "agent_models" / "test_agents_folder") + +# Run the test function +test_list_agent_checked_tags() + + + + From 1763e961e048fc082e348946b6911b305b8413f9 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Tue, 5 Nov 2024 20:50:58 -0700 Subject: [PATCH 12/61] rewrite a comment --- oai_agents/agents/base_agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index 74a1d90e..6a18695a 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -512,8 +512,8 @@ def list_agent_checked_tags(args, name: str=None, path: Union[Path, None] = None If the integer is greater than 0, it must be followed by CheckedPoints.REWARD_SUBSTR and a floating-point number. Parameters: - - args: Experiment arguments containing base directory info. - - name: The name of the agent (or experiment) for which tags should be listed. + - args: Experiment arguments containing base directory info and experiment directory info. + - name: The name of the agent for which tags should be listed. - path: Optional. If provided, it overrides the default path to the agents directory. Returns: From b4cb9e8a05d0ff375b738706c9630d5d697910f3 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Tue, 5 Nov 2024 09:58:42 -0700 Subject: [PATCH 13/61] Replace the first loop in get_best_SP_agent by a line of code. --- oai_agents/common/teammates_collection.py | 2 ++ scripts/utils/train_helper.py | 1 + 2 files changed, 3 insertions(+) diff --git a/oai_agents/common/teammates_collection.py b/oai_agents/common/teammates_collection.py index 57ab7821..d64d44d4 100644 --- a/oai_agents/common/teammates_collection.py +++ b/oai_agents/common/teammates_collection.py @@ -178,6 +178,8 @@ def get_best_SP_agent(args, population): all_agents = get_all_agents( args.layout_names[0], population) agents_scores_averaged_over_layouts = [] + all_agents = [agent for agent in population[args.layout_names[0]]] + for agent in all_agents: scores = [agent.layout_scores[layout_name] for layout_name in args.layout_names] agents_scores_averaged_over_layouts.append((agent, sum(scores)/len(scores))) diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py index f8ed99da..ea39c966 100644 --- a/scripts/utils/train_helper.py +++ b/scripts/utils/train_helper.py @@ -1,6 +1,7 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.tags import TeamType from oai_agents.common.population import get_population, generate_hdim_and_seed +from oai_agents.common.population import get_population, generate_hdim_and_seed from oai_agents.common.teammates_collection import generate_TC, get_best_SP_agent, generate_TC_for_ADV_agent, update_TC_w_ADV_teammates from oai_agents.common.curriculum import Curriculum from .common import load_agents, generate_name From ed9153d67c08b1d265555c7485a1842f2f1ddc6b Mon Sep 17 00:00:00 2001 From: ttopiac Date: Tue, 5 Nov 2024 15:52:05 -0700 Subject: [PATCH 14/61] Add two CheckedPoints tags, including CheckedModelPrefix and REWARD_SUBSTR --- oai_agents/agents/rl.py | 1 + oai_agents/common/teammates_collection.py | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index 1bc66ff3..56d1ef40 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -29,6 +29,7 @@ def __init__(self, teammates_collection, args, name = name or 'rl_agent' super(RLAgentTrainer, self).__init__(name, args, seed=seed) + self.args = args self.device = args.device self.teammates_len = self.args.teammates_len diff --git a/oai_agents/common/teammates_collection.py b/oai_agents/common/teammates_collection.py index d64d44d4..57ab7821 100644 --- a/oai_agents/common/teammates_collection.py +++ b/oai_agents/common/teammates_collection.py @@ -178,8 +178,6 @@ def get_best_SP_agent(args, population): all_agents = get_all_agents( args.layout_names[0], population) agents_scores_averaged_over_layouts = [] - all_agents = [agent for agent in population[args.layout_names[0]]] - for agent in all_agents: scores = [agent.layout_scores[layout_name] for layout_name in args.layout_names] agents_scores_averaged_over_layouts.append((agent, sum(scores)/len(scores))) From 1f27549395e524cc48c4402b57c1eb12420c3f93 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Thu, 7 Nov 2024 09:45:31 -0700 Subject: [PATCH 15/61] Replace CheckedPoints by KeyCheckpoints --- oai_agents/agents/base_agent.py | 8 ++++---- oai_agents/agents/rl.py | 6 +++--- oai_agents/common/population.py | 4 ++-- sandbox/fix_pop_ck_list_after_continued_run.py | 2 +- scripts/train_agents.py | 4 ++-- tests/test_oai_agents/test_base_agent.py | 4 ++-- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index 102cb553..bd353bac 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -522,8 +522,8 @@ def load_agents(args, tag, name: str=None, path: Union[Path, None] = None): @staticmethod def list_agent_checked_tags(args, name: str=None, path: Union[Path, None] = None) -> List[str]: ''' - Lists only tags that start with CheckedPoints.CHECKED_MODEL_PREFIX, followed by an integer. - If the integer is greater than 0, it must be followed by CheckedPoints.REWARD_SUBSTR and a floating-point number. + Lists only tags that start with KeyCheckpoints.CHECKED_MODEL_PREFIX, followed by an integer. + If the integer is greater than 0, it must be followed by KeyCheckpoints.REWARD_SUBSTR and a floating-point number. Parameters: - args: Experiment arguments containing base directory info and experiment directory info. @@ -548,8 +548,8 @@ def list_agent_checked_tags(args, name: str=None, path: Union[Path, None] = None prefix = KeyCheckpoints.CHECKED_MODEL_PREFIX reward_substr = KeyCheckpoints.REWARD_SUBSTR ======= - prefix = CheckedPoints.CHECKED_MODEL_PREFIX - reward_substr = CheckedPoints.REWARD_SUBSTR + prefix = KeyCheckpoints.CHECKED_MODEL_PREFIX + reward_substr = KeyCheckpoints.REWARD_SUBSTR >>>>>>> 4bf7932b797e1170e6f5fc0d39c77e538e313d63 pattern = re.compile(f"^{re.escape(prefix)}(\\d+)(?:{re.escape(reward_substr)}[\\d.]+)?$") diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index f27254f6..835052ba 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -303,7 +303,7 @@ def train_agents(self, total_train_timesteps, tag, exp_name=None): <<<<<<< HEAD path, tag = self.save_agents(tag=f'{KeyCheckpoints.CHECKED_MODEL_PREFIX}{len(self.ck_list)}') ======= - path, tag = self.save_agents(tag=f'{CheckedPoints.FIRST_CHECKED_MODEL}') + path, tag = self.save_agents(tag=f'{KeyCheckpoints.FIRST_CHECKED_MODEL}') >>>>>>> 4bf7932b797e1170e6f5fc0d39c77e538e313d63 self.ck_list.append(({k: 0 for k in self.args.layout_names}, path, tag)) @@ -343,7 +343,7 @@ def train_agents(self, total_train_timesteps, tag, exp_name=None): <<<<<<< HEAD path, tag = self.save_agents(tag=f'{KeyCheckpoints.CHECKED_MODEL_PREFIX}{len(self.ck_list)}{KeyCheckpoints.REWARD_SUBSTR}{mean_reward}') ======= - path, tag = self.save_agents(tag=f'{CheckedPoints.CHECKED_MODEL_PREFIX}{len(self.ck_list)}{CheckedPoints.REWARD_SUBSTR}{mean_reward}') + path, tag = self.save_agents(tag=f'{KeyCheckpoints.CHECKED_MODEL_PREFIX}{len(self.ck_list)}{KeyCheckpoints.REWARD_SUBSTR}{mean_reward}') >>>>>>> 4bf7932b797e1170e6f5fc0d39c77e538e313d63 self.ck_list.append((rew_per_layout, path, tag)) @@ -384,7 +384,7 @@ def get_agents_and_set_score_and_perftag(args, layout_name, scores_path_tag, per return all_agents @staticmethod - def get_checkedpoints_agents(args, ck_list, layout_name): + def get_KeyCheckpoints_agents(args, ck_list, layout_name): ''' categorizes agents using performance tags based on the checkpoint list AgentPerformance.HIGH diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 4ea9304d..86807daf 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -188,7 +188,7 @@ def get_population(args, for dilled_res in dilled_results: checkpoints_list = dill.loads(dilled_res) for layout_name in args.layout_names: - layout_pop = RLAgentTrainer.get_checkedpoints_agents(args, checkpoints_list, layout_name) + layout_pop = RLAgentTrainer.get_KeyCheckpoints_agents(args, checkpoints_list, layout_name) population[layout_name].extend(layout_pop) else: for inp in inputs: @@ -199,7 +199,7 @@ def get_population(args, h_dim=inp[4], serialize=False) for layout_name in args.layout_names: - layout_pop = RLAgentTrainer.get_checkedpoints_agents(args, checkpoints_list, layout_name) + layout_pop = RLAgentTrainer.get_KeyCheckpoints_agents(args, checkpoints_list, layout_name) population[layout_name].extend(layout_pop) save_population(args=args, population=population) diff --git a/sandbox/fix_pop_ck_list_after_continued_run.py b/sandbox/fix_pop_ck_list_after_continued_run.py index 5e1ad08d..4dd1345c 100644 --- a/sandbox/fix_pop_ck_list_after_continued_run.py +++ b/sandbox/fix_pop_ck_list_after_continued_run.py @@ -4,7 +4,7 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.arguments import get_arguments -from oai_agents.common.tags import TeamType, CheckedPoints +from oai_agents.common.tags import TeamType, KeyCheckpoints from oai_agents.common.learner import LearnerType from oai_agents.common.tags import KeyCheckpoints diff --git a/scripts/train_agents.py b/scripts/train_agents.py index 52218307..1a053c8e 100644 --- a/scripts/train_agents.py +++ b/scripts/train_agents.py @@ -62,8 +62,8 @@ def SPN_1ADV_XSPCKP(args) -> None: ''' In N-agents games, a randomly initialized agent will be trained with N-X copies of itself and X unseen teammates. X unseen teammates can be composed by either one of the two conditions: - (a) 1 adversary and X-1 self-play checkedpoints. - (b) X self-play checkedpoints. + (a) 1 adversary and X-1 self-play KeyCheckpoints. + (b) X self-play KeyCheckpoints. e.g. when N is 4 and X is 1, the team can be composed by [SP, SP, SP, ADV] or [SP, SP, SP, H] or [SP, SP, SP, M] or [SP, SP, SP, L] in a 4-chef layout. when N is 4 and X is 2, the team can be composed diff --git a/tests/test_oai_agents/test_base_agent.py b/tests/test_oai_agents/test_base_agent.py index 9c18603f..1ea13e6a 100644 --- a/tests/test_oai_agents/test_base_agent.py +++ b/tests/test_oai_agents/test_base_agent.py @@ -1,6 +1,6 @@ from oai_agents.agents.base_agent import OAITrainer from pathlib import Path -from oai_agents.common.tags import CheckedPoints +from oai_agents.common.tags import KeyCheckpoints import shutil def test_list_agent_checked_tags(): @@ -38,7 +38,7 @@ def test_list_agent_checked_tags(): "ck_10_rew_336.8888888888889", "ck_3_invalid", # Invalid because it doesn't have a valid float after the integer "ck_4_rew_invalid", # Invalid because reward value is not a float - "unrelated_tag", # Invalid because it doesn't start with `CheckedPoints.CHECKED_MODEL_PREFIX` + "unrelated_tag", # Invalid because it doesn't start with `KeyCheckpoints.CHECKED_MODEL_PREFIX` "best", "last" ] From 706a84ce0694fe6f0f38427bab055cc7fde9432f Mon Sep 17 00:00:00 2001 From: ttopiac Date: Thu, 7 Nov 2024 09:47:54 -0700 Subject: [PATCH 16/61] Fix uncommited merged conflicts in rl.py --- oai_agents/agents/rl.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index 835052ba..9df11e92 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -267,11 +267,7 @@ def should_evaluate(self, steps): mean_rew_greater_than_best = mean_training_rew > self.best_training_rew and self.learning_agent.num_timesteps >= 5e6 checkpoint_rate_reached = self.checkpoint_rate and self.learning_agent.num_timesteps // self.checkpoint_rate > (len(self.ck_list) - 1) -<<<<<<< HEAD return steps_divisable_by_15 or mean_rew_greater_than_best or checkpoint_rate_reached -======= - return steps_divisable_by_5 or mean_rew_greater_than_best or checkpoint_rate_reached ->>>>>>> 4bf7932b797e1170e6f5fc0d39c77e538e313d63 def log_details(self, experiment_name, total_train_timesteps): print("Training agent: " + self.name + ", for experiment: " + experiment_name) @@ -300,11 +296,7 @@ def train_agents(self, total_train_timesteps, tag, exp_name=None): if self.checkpoint_rate is not None: self.ck_list = [] -<<<<<<< HEAD path, tag = self.save_agents(tag=f'{KeyCheckpoints.CHECKED_MODEL_PREFIX}{len(self.ck_list)}') -======= - path, tag = self.save_agents(tag=f'{KeyCheckpoints.FIRST_CHECKED_MODEL}') ->>>>>>> 4bf7932b797e1170e6f5fc0d39c77e538e313d63 self.ck_list.append(({k: 0 for k in self.args.layout_names}, path, tag)) best_path, best_tag = None, None @@ -340,11 +332,7 @@ def train_agents(self, total_train_timesteps, tag, exp_name=None): if self.checkpoint_rate: if self.learning_agent.num_timesteps // self.checkpoint_rate > (len(self.ck_list) - 1): -<<<<<<< HEAD path, tag = self.save_agents(tag=f'{KeyCheckpoints.CHECKED_MODEL_PREFIX}{len(self.ck_list)}{KeyCheckpoints.REWARD_SUBSTR}{mean_reward}') -======= - path, tag = self.save_agents(tag=f'{KeyCheckpoints.CHECKED_MODEL_PREFIX}{len(self.ck_list)}{KeyCheckpoints.REWARD_SUBSTR}{mean_reward}') ->>>>>>> 4bf7932b797e1170e6f5fc0d39c77e538e313d63 self.ck_list.append((rew_per_layout, path, tag)) if mean_reward >= self.best_score: From a75825114fe0e7b266029de112184c117bd79d7b Mon Sep 17 00:00:00 2001 From: ttopiac Date: Thu, 7 Nov 2024 09:51:02 -0700 Subject: [PATCH 17/61] Fix conflicts in base_agent.py --- oai_agents/agents/base_agent.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index bd353bac..5df17f5d 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -402,24 +402,10 @@ def evaluate(self, eval_agent, num_eps_per_layout_per_tm=5, visualize=False, tim rew_per_layout_per_teamtype = {} rew_per_layout = {} -<<<<<<< HEAD # To reduce evaluation time: instead of evaluating all players, we randomly select three of player positions for evaluation # This is outside of the for loop, meaning that each time we evaluate the same player positions across all layouts for a fair comparison selected_p_indexes = random.sample(range(self.args.num_players), min(3, self.args.num_players)) -======= - ''' - dict - teammates_collection = { - 'layout_name': { - 'TeamType.HIGH_FIRST': [[agent1, agent2], ...], - 'TeamType.MEDIUM_FIRST': [[agent3, agent4], ...], - 'TeamType.LOW_FIRST': [[agent5, agent6], ...], - 'TeamType.RANDOM': [[agent7, agent8], ...], - }, - } - ''' ->>>>>>> 4bf7932b797e1170e6f5fc0d39c77e538e313d63 for _, env in enumerate(self.eval_envs): rew_per_layout_per_teamtype[env.layout_name] = { teamtype: [] for teamtype in self.eval_teammates_collection[env.layout_name] @@ -544,13 +530,8 @@ def list_agent_checked_tags(args, name: str=None, path: Union[Path, None] = None raise FileNotFoundError(f"Agent directory not found: {path}") # Define the prefix and the regular expression to match the pattern -<<<<<<< HEAD - prefix = KeyCheckpoints.CHECKED_MODEL_PREFIX - reward_substr = KeyCheckpoints.REWARD_SUBSTR -======= prefix = KeyCheckpoints.CHECKED_MODEL_PREFIX reward_substr = KeyCheckpoints.REWARD_SUBSTR ->>>>>>> 4bf7932b797e1170e6f5fc0d39c77e538e313d63 pattern = re.compile(f"^{re.escape(prefix)}(\\d+)(?:{re.escape(reward_substr)}[\\d.]+)?$") # List all subdirectories (tags) that match the pattern From a5a1fcbc1e29dfd17b202a2e0bb212ca8c1dc0fa Mon Sep 17 00:00:00 2001 From: ttopiac Date: Thu, 7 Nov 2024 09:56:17 -0700 Subject: [PATCH 18/61] Replace get_KeyCheckpoints_agents by get_checkedpoints_agents --- oai_agents/agents/rl.py | 2 +- oai_agents/common/population.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index 9df11e92..56d1ef40 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -372,7 +372,7 @@ def get_agents_and_set_score_and_perftag(args, layout_name, scores_path_tag, per return all_agents @staticmethod - def get_KeyCheckpoints_agents(args, ck_list, layout_name): + def get_checkedpoints_agents(args, ck_list, layout_name): ''' categorizes agents using performance tags based on the checkpoint list AgentPerformance.HIGH diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 86807daf..4ea9304d 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -188,7 +188,7 @@ def get_population(args, for dilled_res in dilled_results: checkpoints_list = dill.loads(dilled_res) for layout_name in args.layout_names: - layout_pop = RLAgentTrainer.get_KeyCheckpoints_agents(args, checkpoints_list, layout_name) + layout_pop = RLAgentTrainer.get_checkedpoints_agents(args, checkpoints_list, layout_name) population[layout_name].extend(layout_pop) else: for inp in inputs: @@ -199,7 +199,7 @@ def get_population(args, h_dim=inp[4], serialize=False) for layout_name in args.layout_names: - layout_pop = RLAgentTrainer.get_KeyCheckpoints_agents(args, checkpoints_list, layout_name) + layout_pop = RLAgentTrainer.get_checkedpoints_agents(args, checkpoints_list, layout_name) population[layout_name].extend(layout_pop) save_population(args=args, population=population) From 1858053b011f60787d7ecc8b6a41ea85cd867c75 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Thu, 7 Nov 2024 10:00:05 -0700 Subject: [PATCH 19/61] ? --- oai_agents/common/tags.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/oai_agents/common/tags.py b/oai_agents/common/tags.py index c03c39d1..a123ef65 100644 --- a/oai_agents/common/tags.py +++ b/oai_agents/common/tags.py @@ -81,23 +81,9 @@ class TeammatesCollection: TRAIN = 'train' EVAL = 'eval' -<<<<<<< HEAD class KeyCheckpoints: # Tags to identify the type of model checkpoint to save/load BEST_EVAL_REWARD = 'best' # Use only for evaluation MOST_RECENT_TRAINED_MODEL = 'last' # Use only for training -======= -class CheckedPoints(): - # During training, we saved models if it reaches the best eval/training reward, worst eval/training reward. - # In addition, we also save the very last one. - # For all of them, we assign them a tag so that we can use them in the future. - # For example, if the training reaches best evaluation reward, we saved it with a tag BEST_EVAL_REWARD. - BEST_EVAL_REWARD = 'best' - WORST_EVAL_REWARD ='worst' - BEST_TRAIN_REWARD = 'best_train_reward' - WORST_TRAIN_REWARD ='worst_train_reward' - FINAL_TRAINED_MODEL = 'last' - FIRST_CHECKED_MODEL = 'ck_0' ->>>>>>> 4bf7932b797e1170e6f5fc0d39c77e538e313d63 CHECKED_MODEL_PREFIX = 'ck_' REWARD_SUBSTR = '_rew_' From 821a46e55ca1e773daa803ee9945985c410747fa Mon Sep 17 00:00:00 2001 From: ttopiac Date: Thu, 7 Nov 2024 10:03:34 -0700 Subject: [PATCH 20/61] Fix train_helper.py --- scripts/utils/train_helper.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py index 6b9507dc..f8ed99da 100644 --- a/scripts/utils/train_helper.py +++ b/scripts/utils/train_helper.py @@ -1,7 +1,6 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.tags import TeamType from oai_agents.common.population import get_population, generate_hdim_and_seed -from oai_agents.common.population import get_population, generate_hdim_and_seed from oai_agents.common.teammates_collection import generate_TC, get_best_SP_agent, generate_TC_for_ADV_agent, update_TC_w_ADV_teammates from oai_agents.common.curriculum import Curriculum from .common import load_agents, generate_name @@ -9,12 +8,7 @@ from oai_agents.common.tags import KeyCheckpoints -<<<<<<< HEAD def get_SP_agent(args, train_types, eval_types, curriculum, tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL): -======= - -def get_SP_agent(args, train_types, eval_types, curriculum, tag=None): ->>>>>>> 4bf7932b797e1170e6f5fc0d39c77e538e313d63 name = generate_name(args, prefix=Prefix.SELF_PLAY, seed=args.SP_seed, From 610eb96c1159a822f9f5815fd0e49195d5d11284 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Thu, 7 Nov 2024 15:42:44 -0700 Subject: [PATCH 21/61] little things --- scripts/train_agents.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/train_agents.py b/scripts/train_agents.py index 1a053c8e..55fd2e2b 100644 --- a/scripts/train_agents.py +++ b/scripts/train_agents.py @@ -312,7 +312,7 @@ def set_input(args): args.fcp_total_training_timesteps = int(5e6 * args.how_long) args.n_x_fcp_total_training_timesteps = int(2 * args.fcp_total_training_timesteps * args.how_long) - args.SP_seed, args.SP_h_dim = 68, 256 + args.SP_seed, args.SP_h_dim = 1010, 256 args.N_X_SP_seed, args.N_X_SP_h_dim = 1010, 256 args.FCP_seed, args.FCP_h_dim = 2020, 256 args.N_X_FCP_seed, args.N_X_FCP_h_dim = 2602, 256 @@ -348,14 +348,14 @@ def set_input(args): args.adversary_force_training = False args.primary_force_training = False - args.teammates_len = 2 - args.how_long = 6 # Not effective in quick_test mode + args.teammates_len = 3 + args.how_long = 6*4 # Not effective in quick_test mode set_input(args=args) - SPN_1ADV_XSPCKP(args=args) + # SPN_1ADV_XSPCKP(args=args) - # SP(args) + SP(args) # FCP_traditional(args=args) From 4d5780bd1bb260f11b5745eb2a5073249cf07956 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Thu, 7 Nov 2024 15:54:52 -0700 Subject: [PATCH 22/61] Add SPN_XSPCKP_HP_TYPE --- scripts/train_agents.py | 44 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/scripts/train_agents.py b/scripts/train_agents.py index 55fd2e2b..9f2e915b 100644 --- a/scripts/train_agents.py +++ b/scripts/train_agents.py @@ -161,6 +161,46 @@ def SPN_XSPCKP(args) -> None: unseen_teammates_len=unseen_teammates_len, ) +def SPN_XSPCKP_HP_TYPE(args) -> None: + ''' + In N-agents games, a randomly initialized agent will be trained with N-X copies of itself + and X homogeneous unseen teammates, which are checkpoints saved during a previous self-play process. + These saved checkpoints are cateogorized into High, Medium, Low performance. + e.g. + when N is 4 and X is 1, the team can be composed by [SP, SP, SP, H], [SP, SP, SP, M], [SP, SP, SP, L] in a 4-chef layout. + when N is 4 and X is 2, the team can be composed [SP, SP, H, H], [SP, SP, M, M], [SP, SP, L, L] in a 4-chef layout. + + + Please note that + - X is the number of unseen teammate. + - X is assigned by the variable, unseen_teammates_len, in the funciton. + + + :param pop_force_training: Boolean that, if true, indicates population should be generated, otherwise load it from file + :param primary_force_training: Boolean that, if true, indicates the SP agent teammates_collection should be trained instead of loaded from file. + ''' + unseen_teammates_len = 1 + primary_train_types = [TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_HIGH, + # TeamType.SELF_PLAY_MEDIUM, TeamType.SELF_PLAY_MEDIUM, TeamType.SELF_PLAY_MEDIUM, TeamType.SELF_PLAY_MEDIUM, + # TeamType.SELF_PLAY_LOW, TeamType.SELF_PLAY_LOW, TeamType.SELF_PLAY_LOW, TeamType.SELF_PLAY_LOW, + ] + primary_eval_types = { + 'generate': [TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_MEDIUM, TeamType.SELF_PLAY_LOW], + 'load': [] + } + + curriculum = Curriculum(train_types = primary_train_types, + is_random=True, + ) + + get_N_X_SP_agents( + args, + n_x_sp_train_types = curriculum.train_types, + n_x_sp_eval_types=primary_eval_types, + curriculum=curriculum, + unseen_teammates_len=unseen_teammates_len, + ) + def FCP_mhri(args): ''' @@ -353,9 +393,11 @@ def set_input(args): set_input(args=args) + SPN_XSPCKP_HP_TYPE(args=args) + # SPN_1ADV_XSPCKP(args=args) - SP(args) + # SP(args) # FCP_traditional(args=args) From 9d95ed471fb3e67f2def3d9d1dc3401f924d448b Mon Sep 17 00:00:00 2001 From: ttopiac Date: Mon, 11 Nov 2024 11:53:07 -0700 Subject: [PATCH 23/61] Renmae get_poulation by get_categorized_population --- oai_agents/common/population.py | 2 +- scripts/utils/train_helper.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 4ea9304d..0d373887 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -146,7 +146,7 @@ def save_population(args, population): rt.save_agents(tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL) -def get_population(args, +def get_categorized_population(args, ck_rate, total_training_timesteps, train_types, diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py index f8ed99da..aa7d2858 100644 --- a/scripts/utils/train_helper.py +++ b/scripts/utils/train_helper.py @@ -1,6 +1,6 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.tags import TeamType -from oai_agents.common.population import get_population, generate_hdim_and_seed +from oai_agents.common.population import get_categorized_population, generate_hdim_and_seed from oai_agents.common.teammates_collection import generate_TC, get_best_SP_agent, generate_TC_for_ADV_agent, update_TC_w_ADV_teammates from oai_agents.common.curriculum import Curriculum from .common import load_agents, generate_name @@ -74,7 +74,7 @@ def get_N_X_SP_agents(args, if agents: return agents[0] - population = get_population( + population = get_categorized_population( args=args, ck_rate=args.pop_total_training_timesteps // args.num_of_ckpoints, total_training_timesteps=args.pop_total_training_timesteps, @@ -268,7 +268,7 @@ def get_FCP_agent_w_pop(args, train_types=fcp_train_types, has_curriculum = not fcp_curriculum.is_random) - population = get_population( + population = get_categorized_population( args=args, ck_rate=args.pop_total_training_timesteps // args.num_of_ckpoints, total_training_timesteps=args.pop_total_training_timesteps, From 0b49c5cf4463f988c97e818e452f7fb21f4055b5 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Mon, 11 Nov 2024 11:54:54 -0700 Subject: [PATCH 24/61] Rename save_population by save_categorized_population --- oai_agents/common/population.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 0d373887..a6d2e648 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -127,7 +127,7 @@ def generate_hdim_and_seed(num_of_required_agents): return selected_seeds, selected_hdims -def save_population(args, population): +def save_categorized_population(args, population): name_prefix = 'pop' for layout_name in args.layout_names: rt = RLAgentTrainer( @@ -202,6 +202,6 @@ def get_categorized_population(args, layout_pop = RLAgentTrainer.get_checkedpoints_agents(args, checkpoints_list, layout_name) population[layout_name].extend(layout_pop) - save_population(args=args, population=population) + save_categorized_population(args=args, population=population) return population \ No newline at end of file From 59b9824c97804e829e25597e589198daf21b33cf Mon Sep 17 00:00:00 2001 From: ttopiac Date: Mon, 11 Nov 2024 12:52:10 -0700 Subject: [PATCH 25/61] Ensure the save the last model with last tag. --- oai_agents/agents/base_agent.py | 2 +- oai_agents/agents/rl.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index 5df17f5d..ea199c98 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -513,7 +513,7 @@ def list_agent_checked_tags(args, name: str=None, path: Union[Path, None] = None Parameters: - args: Experiment arguments containing base directory info and experiment directory info. - - name: The name of the agent for which tags should be listed. + - name: The name of the agent, for which tags should be listed. - path: Optional. If provided, it overrides the default path to the agents directory. Returns: diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index 56d1ef40..bcb072ee 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -340,7 +340,7 @@ def train_agents(self, total_train_timesteps, tag, exp_name=None): print(f'New best evaluation score of {mean_reward} reached, model saved to {best_path}/{best_tag}') self.best_score = mean_reward steps += 1 - self.save_agents() + self.save_agents(tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL) self.agents = RLAgentTrainer.load_agents(args=self.args, name=self.name, tag=tag) run.finish() From a59aa60c192aa05e8371f9a719a5b29ffdcbc961 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Mon, 11 Nov 2024 13:15:53 -0700 Subject: [PATCH 26/61] Remove tag from RLAgentTrainer.train_agents --- oai_agents/agents/rl.py | 2 +- oai_agents/common/population.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index bcb072ee..ff38c382 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -286,7 +286,7 @@ def log_details(self, experiment_name, total_train_timesteps): print("Final sparse reward ratio: ", self.args.final_sparse_r_ratio) - def train_agents(self, total_train_timesteps, tag, exp_name=None): + def train_agents(self, total_train_timesteps, exp_name=None): experiment_name = self.get_experiment_name(exp_name) run = wandb.init(project="overcooked_ai", entity=self.args.wandb_ent, dir=str(self.args.base_dir / 'wandb'), reinit=True, name=experiment_name, mode=self.args.wandb_mode, diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index a6d2e648..cb018ceb 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -36,8 +36,7 @@ def train_agent_with_checkpoints(args, total_training_timesteps, ck_rate, seed, For curriculum, whenever we don't care about the order of the training types, we can set is_random=True. For SP agents, they only are trained with themselves so the order doesn't matter. ''' - - rlat.train_agents(total_train_timesteps=total_training_timesteps, tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL) + rlat.train_agents(total_train_timesteps=total_training_timesteps) checkpoints_list = rlat.ck_list if serialize: From 9dfdea00d0305c8c65bf1f8b9f49a4d3471ff34a Mon Sep 17 00:00:00 2001 From: ttopiac Date: Mon, 11 Nov 2024 14:54:33 -0700 Subject: [PATCH 27/61] use checked_model_name_handler.generate_checked_model --- oai_agents/agents/base_agent.py | 23 +---- oai_agents/agents/rl.py | 6 +- .../common/checked_model_name_handler.py | 86 +++++++++++++++++++ 3 files changed, 93 insertions(+), 22 deletions(-) create mode 100644 oai_agents/common/checked_model_name_handler.py diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index ea199c98..979a2aa0 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -3,6 +3,7 @@ from oai_agents.common.state_encodings import ENCODING_SCHEMES from oai_agents.common.subtasks import calculate_completed_subtask, get_doable_subtasks, Subtasks from oai_agents.common.tags import AgentPerformance, TeamType, KeyCheckpoints +from oai_agents.common.checked_model_name_handler import CheckedModelNameHandler from oai_agents.gym_environments.base_overcooked_env import USEABLE_COUNTERS from overcooked_ai_py.mdp.overcooked_mdp import Action @@ -525,23 +526,5 @@ def list_agent_checked_tags(args, name: str=None, path: Union[Path, None] = None else: path = args.base_dir / 'agent_models' / name - # Ensure the directory exists - if not path.exists() or not path.is_dir(): - raise FileNotFoundError(f"Agent directory not found: {path}") - - # Define the prefix and the regular expression to match the pattern - prefix = KeyCheckpoints.CHECKED_MODEL_PREFIX - reward_substr = KeyCheckpoints.REWARD_SUBSTR - pattern = re.compile(f"^{re.escape(prefix)}(\\d+)(?:{re.escape(reward_substr)}[\\d.]+)?$") - - # List all subdirectories (tags) that match the pattern - tags = [] - for tag in path.iterdir(): - if tag.is_dir() and pattern.match(tag.name): - match = pattern.match(tag.name) - integer_part = int(match.group(1)) - # Only add tags that either have no reward substring for integer 0, or have it when integer > 0 - if integer_part == 0 or (integer_part > 0 and reward_substr in tag.name): - tags.append(tag.name) - - return tags + handler = CheckedModelNameHandler() + return handler.get_checked_model_tags(path=path) diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index ff38c382..1ec51986 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -4,6 +4,7 @@ from oai_agents.common.state_encodings import ENCODING_SCHEMES from oai_agents.common.tags import AgentPerformance, TeamType, TeammatesCollection, KeyCheckpoints from oai_agents.gym_environments.base_overcooked_env import OvercookedGymEnv +from oai_agents.common.checked_model_name_handler import CheckedModelNameHandler import numpy as np import random @@ -294,9 +295,10 @@ def train_agents(self, total_train_timesteps, exp_name=None): self.log_details(experiment_name, total_train_timesteps) + ckname_handler = CheckedModelNameHandler() if self.checkpoint_rate is not None: self.ck_list = [] - path, tag = self.save_agents(tag=f'{KeyCheckpoints.CHECKED_MODEL_PREFIX}{len(self.ck_list)}') + path, tag = self.save_agents(tag=ckname_handler.generate_checked_model_name(id=len(self.ck_list))) self.ck_list.append(({k: 0 for k in self.args.layout_names}, path, tag)) best_path, best_tag = None, None @@ -332,7 +334,7 @@ def train_agents(self, total_train_timesteps, exp_name=None): if self.checkpoint_rate: if self.learning_agent.num_timesteps // self.checkpoint_rate > (len(self.ck_list) - 1): - path, tag = self.save_agents(tag=f'{KeyCheckpoints.CHECKED_MODEL_PREFIX}{len(self.ck_list)}{KeyCheckpoints.REWARD_SUBSTR}{mean_reward}') + path, tag = self.save_agents(tag=ckname_handler.generate_checked_model_name(id=len(self.ck_list), mean_reward=mean_reward)) self.ck_list.append((rew_per_layout, path, tag)) if mean_reward >= self.best_score: diff --git a/oai_agents/common/checked_model_name_handler.py b/oai_agents/common/checked_model_name_handler.py new file mode 100644 index 00000000..7e13cabf --- /dev/null +++ b/oai_agents/common/checked_model_name_handler.py @@ -0,0 +1,86 @@ +from oai_agents.common.tags import KeyCheckpoints +import re +from typing import Optional + +from pathlib import Path +from typing import List, Union +from oai_agents.common.tags import KeyCheckpoints + +class CheckedModelNameHandler: + def __init__(self): + """ + Initializes the CheckedModelNameHandler with optional custom prefix and reward substring. + + :param prefix: Custom prefix for model names. + :param reward_substr: Custom reward substring for model names. + """ + self.prefix = KeyCheckpoints.CHECKED_MODEL_PREFIX + self.reward_substr = KeyCheckpoints.REWARD_SUBSTR + self.pattern = re.compile(f"^{re.escape(self.prefix)}(\\d+)(?:{re.escape(self.reward_substr)}[\\d.]+)?$") + + def generate_checked_model_name(self, id: int, mean_reward: Optional[float] = None) -> str: + """ + Generate a checked model name based on the given id and mean reward. + + :param id: The identifier for the model, used as a numeric suffix. + :param mean_reward: The mean reward to include in the model name, if applicable. + :return: A string representing the generated checked model name. + :raises ValueError: If id is negative or if mean_reward is not provided for ids greater than 0. + """ + # Validate id + if id < 0: + raise ValueError("id must be a non-negative integer.") + + # When id is 0, mean_reward can be None + if id == 0: + return f"{self.prefix}{id}" + + # For id > 0, mean_reward must be provided + if mean_reward is None: + raise ValueError("mean_reward must be provided for ids greater than 0.") + + # Return the model name including mean_reward + return f"{self.prefix}{id}{self.reward_substr}{mean_reward}" + + + + def is_valid_checked_model_name(self, model_name: str) -> bool: + """ + Check if a model name matches the required pattern for checked models. + + :param model_name: The model name to validate. + :return: True if the model name matches the pattern; otherwise, False. + """ + return bool(self.pattern.match(model_name)) + + def get_checked_model_tags(self, path: Union[Path, None] = None) -> List[str]: + """ + Retrieve all valid checked model tags (subdirectories) under the specified path that match the pattern. + + :param path: The directory path to search for valid checked model tags. Can be a Path object or None. + :return: A list of valid checked model tag names. + :raises ValueError: If the path is None. + :raises FileNotFoundError: If the specified path does not exist. + :raises NotADirectoryError: If the specified path is not a directory. + """ + if path is None: + raise ValueError("The path cannot be None.") + + # # Convert to Path if not already a Path object + path = Path(path) if not isinstance(path, Path) else path + + if not path.exists(): + raise FileNotFoundError(f"The specified path '{path}' does not exist.") + if not path.is_dir(): + raise NotADirectoryError(f"The specified path '{path}' is not a directory.") + + tags = [] + for tag in path.iterdir(): + if tag.is_dir() and self.pattern.match(tag.name): + match = self.pattern.match(tag.name) + integer_part = int(match.group(1)) + # Only add tags that either have no reward substring for integer 0, or have it when integer > 0 + if integer_part == 0 or (integer_part > 0 and self.reward_substr in tag.name): + tags.append(tag.name) + return tags + From eee6bf1633848beda23ce9de95d6fbc98ad5517e Mon Sep 17 00:00:00 2001 From: ttopiac Date: Mon, 11 Nov 2024 15:08:36 -0700 Subject: [PATCH 28/61] Clean checked_model_name_handler.py --- oai_agents/agents/base_agent.py | 2 +- oai_agents/agents/rl.py | 4 +- .../common/checked_model_name_handler.py | 48 +++++++------------ 3 files changed, 20 insertions(+), 34 deletions(-) diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index 979a2aa0..4ee184e9 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -527,4 +527,4 @@ def list_agent_checked_tags(args, name: str=None, path: Union[Path, None] = None path = args.base_dir / 'agent_models' / name handler = CheckedModelNameHandler() - return handler.get_checked_model_tags(path=path) + return handler.get_all_checked_tags(path=path) diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index 1ec51986..6dbbd81c 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -298,7 +298,7 @@ def train_agents(self, total_train_timesteps, exp_name=None): ckname_handler = CheckedModelNameHandler() if self.checkpoint_rate is not None: self.ck_list = [] - path, tag = self.save_agents(tag=ckname_handler.generate_checked_model_name(id=len(self.ck_list))) + path, tag = self.save_agents(tag=ckname_handler.generate_tag(id=len(self.ck_list))) self.ck_list.append(({k: 0 for k in self.args.layout_names}, path, tag)) best_path, best_tag = None, None @@ -334,7 +334,7 @@ def train_agents(self, total_train_timesteps, exp_name=None): if self.checkpoint_rate: if self.learning_agent.num_timesteps // self.checkpoint_rate > (len(self.ck_list) - 1): - path, tag = self.save_agents(tag=ckname_handler.generate_checked_model_name(id=len(self.ck_list), mean_reward=mean_reward)) + path, tag = self.save_agents(tag=ckname_handler.generate_tag(id=len(self.ck_list), mean_reward=mean_reward)) self.ck_list.append((rew_per_layout, path, tag)) if mean_reward >= self.best_score: diff --git a/oai_agents/common/checked_model_name_handler.py b/oai_agents/common/checked_model_name_handler.py index 7e13cabf..65f93a4c 100644 --- a/oai_agents/common/checked_model_name_handler.py +++ b/oai_agents/common/checked_model_name_handler.py @@ -1,59 +1,47 @@ from oai_agents.common.tags import KeyCheckpoints import re -from typing import Optional - from pathlib import Path -from typing import List, Union -from oai_agents.common.tags import KeyCheckpoints +from typing import Optional, List, Union class CheckedModelNameHandler: def __init__(self): """ - Initializes the CheckedModelNameHandler with optional custom prefix and reward substring. - - :param prefix: Custom prefix for model names. - :param reward_substr: Custom reward substring for model names. + Initializes the CheckedModelNameHandler with default prefix and reward substring. """ self.prefix = KeyCheckpoints.CHECKED_MODEL_PREFIX self.reward_substr = KeyCheckpoints.REWARD_SUBSTR self.pattern = re.compile(f"^{re.escape(self.prefix)}(\\d+)(?:{re.escape(self.reward_substr)}[\\d.]+)?$") - def generate_checked_model_name(self, id: int, mean_reward: Optional[float] = None) -> str: + def generate_tag(self, id: int, mean_reward: Optional[float] = None) -> str: """ Generate a checked model name based on the given id and mean reward. :param id: The identifier for the model, used as a numeric suffix. - :param mean_reward: The mean reward to include in the model name, if applicable. + :param mean_reward: Optional mean reward to include in the model name, required for ids greater than 0. :return: A string representing the generated checked model name. :raises ValueError: If id is negative or if mean_reward is not provided for ids greater than 0. """ - # Validate id if id < 0: - raise ValueError("id must be a non-negative integer.") + raise ValueError("ID must be a non-negative integer.") - # When id is 0, mean_reward can be None if id == 0: return f"{self.prefix}{id}" - # For id > 0, mean_reward must be provided if mean_reward is None: - raise ValueError("mean_reward must be provided for ids greater than 0.") + raise ValueError("Mean reward must be provided for IDs greater than 0.") - # Return the model name including mean_reward return f"{self.prefix}{id}{self.reward_substr}{mean_reward}" - - - def is_valid_checked_model_name(self, model_name: str) -> bool: + def is_valid_checked_tag(self, tag: str) -> bool: """ - Check if a model name matches the required pattern for checked models. + Check if a tag name matches the required pattern for checked models. - :param model_name: The model name to validate. - :return: True if the model name matches the pattern; otherwise, False. + :param tag: The tag name to validate. + :return: True if the tag name matches the pattern; otherwise, False. """ - return bool(self.pattern.match(model_name)) + return bool(self.pattern.match(tag)) - def get_checked_model_tags(self, path: Union[Path, None] = None) -> List[str]: + def get_all_checked_tags(self, path: Union[Path, None] = None) -> List[str]: """ Retrieve all valid checked model tags (subdirectories) under the specified path that match the pattern. @@ -66,7 +54,6 @@ def get_checked_model_tags(self, path: Union[Path, None] = None) -> List[str]: if path is None: raise ValueError("The path cannot be None.") - # # Convert to Path if not already a Path object path = Path(path) if not isinstance(path, Path) else path if not path.exists(): @@ -75,12 +62,11 @@ def get_checked_model_tags(self, path: Union[Path, None] = None) -> List[str]: raise NotADirectoryError(f"The specified path '{path}' is not a directory.") tags = [] - for tag in path.iterdir(): - if tag.is_dir() and self.pattern.match(tag.name): - match = self.pattern.match(tag.name) + for tag_path in path.iterdir(): + if tag_path.is_dir() and self.pattern.match(tag_path.name): + match = self.pattern.match(tag_path.name) integer_part = int(match.group(1)) # Only add tags that either have no reward substring for integer 0, or have it when integer > 0 - if integer_part == 0 or (integer_part > 0 and self.reward_substr in tag.name): - tags.append(tag.name) + if integer_part == 0 or (integer_part > 0 and self.reward_substr in tag_path.name): + tags.append(tag_path.name) return tags - From f8797f61acc2e96f7dcdfc302673bb1b783e8ffd Mon Sep 17 00:00:00 2001 From: ttopiac Date: Thu, 14 Nov 2024 11:51:52 -0700 Subject: [PATCH 29/61] Add new layout and new agents info to evaluate_agents.py and eval_constants.py --- scripts/evaluate_agents.py | 182 +++++++++++++++++++++++++++----- scripts/utils/eval_constants.py | 104 +++++++++++++++--- 2 files changed, 246 insertions(+), 40 deletions(-) diff --git a/scripts/evaluate_agents.py b/scripts/evaluate_agents.py index 03690d10..89b1ebab 100644 --- a/scripts/evaluate_agents.py +++ b/scripts/evaluate_agents.py @@ -1,7 +1,7 @@ import multiprocessing as mp import os from pathlib import Path -mp.set_start_method('spawn', force=True) +mp.set_start_method('spawn', force=True) import hashlib import sys @@ -27,6 +27,9 @@ THREE_PLAYERS_LOW_EVAL, THREE_PLAYERS_MEDIUM_EVAL, THREE_PLAYERS_HIGH_EVAL, + FOUR_PLAYERS_LOW_EVAL, + FOUR_PLAYERS_MEDIUM_EVAL, + FOUR_PLAYERS_HIGH_EVAL, FIVE_PLAYERS_LOW_EVAL, FIVE_PLAYERS_MEDIUM_FOR_ALL_BESIDES_STORAGE_ROOM_EVAL, FIVE_PLAYERS_HIGH_FOR_ALL_BESIDES_STORAGE_ROOM_EVAL, @@ -46,17 +49,27 @@ class Eval: } LAYOUT_NAMES_PATHs = { - 'selected_2_chefs_coordination_ring': { + 'selected_2_chefs_double_counter_circuit': { Eval.LOW: TWO_PLAYERS_LOW_EVAL, Eval.MEDIUM: TWO_PLAYERS_MEDIUM_EVAL, Eval.HIGH:TWO_PLAYERS_HIGH_EVAL }, - 'selected_2_chefs_counter_circuit': { + 'selected_2_chefs_secret_coordination_ring': { Eval.LOW: TWO_PLAYERS_LOW_EVAL, Eval.MEDIUM: TWO_PLAYERS_MEDIUM_EVAL, Eval.HIGH:TWO_PLAYERS_HIGH_EVAL }, - 'selected_2_chefs_cramped_room': { + 'selected_2_chefs_spacious_room_few_resources': { + Eval.LOW: TWO_PLAYERS_LOW_EVAL, + Eval.MEDIUM: TWO_PLAYERS_MEDIUM_EVAL, + Eval.HIGH:TWO_PLAYERS_HIGH_EVAL + }, + 'selected_2_chefs_spacious_room_no_counter_space': { + Eval.LOW: TWO_PLAYERS_LOW_EVAL, + Eval.MEDIUM: TWO_PLAYERS_MEDIUM_EVAL, + Eval.HIGH:TWO_PLAYERS_HIGH_EVAL + }, + 'selected_2_chefs_storage_room': { Eval.LOW: TWO_PLAYERS_LOW_EVAL, Eval.MEDIUM: TWO_PLAYERS_MEDIUM_EVAL, Eval.HIGH:TWO_PLAYERS_HIGH_EVAL @@ -78,6 +91,33 @@ class Eval: Eval.HIGH: THREE_PLAYERS_HIGH_EVAL, }, + 'selected_4_chefs_double_counter_circuit': { + Eval.LOW: FOUR_PLAYERS_LOW_EVAL, + Eval.MEDIUM: FOUR_PLAYERS_MEDIUM_EVAL, + Eval.HIGH: FOUR_PLAYERS_HIGH_EVAL + }, + 'selected_4_chefs_secret_coordination_ring': { + Eval.LOW: FOUR_PLAYERS_LOW_EVAL, + Eval.MEDIUM: FOUR_PLAYERS_MEDIUM_EVAL, + Eval.HIGH: FOUR_PLAYERS_HIGH_EVAL + }, + 'selected_4_chefs_spacious_room_few_resources': { + Eval.LOW: FOUR_PLAYERS_LOW_EVAL, + Eval.MEDIUM: FOUR_PLAYERS_MEDIUM_EVAL, + Eval.HIGH: FOUR_PLAYERS_HIGH_EVAL + }, + 'selected_4_chefs_spacious_room_no_counter_space': { + Eval.LOW: FOUR_PLAYERS_LOW_EVAL, + Eval.MEDIUM: FOUR_PLAYERS_MEDIUM_EVAL, + Eval.HIGH: FOUR_PLAYERS_HIGH_EVAL + }, + 'selected_4_chefs_storage_room': { + Eval.LOW: FOUR_PLAYERS_LOW_EVAL, + Eval.MEDIUM: FOUR_PLAYERS_MEDIUM_EVAL, + Eval.HIGH: FOUR_PLAYERS_HIGH_EVAL + }, + + 'selected_5_chefs_counter_circuit': { Eval.LOW: FIVE_PLAYERS_LOW_EVAL, Eval.MEDIUM: FIVE_PLAYERS_MEDIUM_FOR_ALL_BESIDES_STORAGE_ROOM_EVAL, @@ -217,7 +257,7 @@ def process_reward(reward): for idx, agent_name in enumerate(all_mean_rewards): mean_values = [v / num_teamsets for v in cross_exp_mean[agent_name]] std_values = [v / num_teamsets for v in cross_exp_std[agent_name]] - + x = x_values + idx * width - width * (num_agents - 1) / 2 ax.bar(x, mean_values, width, yerr=std_values, label=f"Agent: {agent_name}", capsize=5) @@ -338,7 +378,8 @@ def evaluate_agent(args, def evaluate_agent_for_layout(agent_name, path, layout_names, p_idxes, args, deterministic, max_num_teams_per_layout_per_x, number_of_eps, teammate_lvl_set: Sequence[Eval]): - fn_args = (agent_name, path, tuple(layout_names), tuple(p_idxes), tuple([(k, tuple(v) if isinstance(v, list) else v) for k,v in vars(args).items()]), deterministic, max_num_teams_per_layout_per_x, number_of_eps, tuple(teammate_lvl_set)) + # Including the file content in hash generation to avoid incorrect cache reuse + fn_args = (agent_name, tuple(layout_names), tuple(p_idxes), tuple([(k, tuple(v) if isinstance(v, list) else v) for k,v in vars(args).items()]), deterministic, max_num_teams_per_layout_per_x, number_of_eps, tuple(teammate_lvl_set)) m = hashlib.md5() for s in fn_args: m.update(str(s).encode()) @@ -403,33 +444,124 @@ def run_parallel_evaluation(args, all_agents_paths, layout_names, p_idxes, deter def get_2_player_input(args): args.num_players = 2 - layout_names = ['selected_2_chefs_coordination_ring', - 'selected_2_chefs_counter_circuit', - 'selected_2_chefs_cramped_room'] + layout_names = [ + # 'selected_2_chefs_coordination_ring', + # 'selected_2_chefs_counter_circuit', + # 'selected_2_chefs_cramped_room', + 'selected_2_chefs_double_counter_circuit', + 'selected_2_chefs_secret_coordination_ring', + 'selected_2_chefs_spacious_room_few_resources', + 'selected_2_chefs_spacious_room_no_counter_space', + 'selected_2_chefs_storage_room' + ] + + p_idxes = [0, 1] + + all_agents_paths = { + 'SP': 'agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/best', + 'LMH CUR': 'agent_models/Final/2/N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL]_cur_originaler/best', + 'LAST ALMH RAN REUSED 3A 60M': 'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV]_ran_originaler_attack2/best', + 'LAST ALMH CUR REUSED 3A 60M': 'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV]_cur_originaler_attack2/best', + # 'LAST ALMH-SP RAN REUSED 3A': 'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV_SP]_cur_originaler_attack2/best', + # 'LAST A-SP RAN REUSED 3A': 'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SP_SPADV]_ran_originaler_attack2/best', + 'LAST AMH CUR 3A':'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack2/best', + # 'LAST AMH CUR 2A':'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack1/best', + # 'LAST AMH CUR 1A':'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack0/best', + 'BEST AMH CUR 3A':'agent_models/Final/2-ego-play-with-best-adv/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack2/best', + # 'BEST AMH CUR 2A':'agent_models/Final/2-ego-play-with-best-adv/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack1/best', + # 'BEST AMH CUR 1A':'agent_models/Final/2-ego-play-with-best-adv/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack0/best', + 'H': 'agent_models/Final/2/N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH]_ran_originaler/best', + + } + + teammate_lvl_sets = [ + [Eval.LOW], + [Eval.MEDIUM], + [Eval.HIGH] + ] + + return layout_names, p_idxes, all_agents_paths, teammate_lvl_sets, args + + +def get_2_player_input(args): + args.num_players = 2 + layout_names = [ + # 'selected_2_chefs_coordination_ring', + # 'selected_2_chefs_counter_circuit', + # 'selected_2_chefs_cramped_room', + 'selected_2_chefs_double_counter_circuit', + 'selected_2_chefs_secret_coordination_ring', + 'selected_2_chefs_spacious_room_few_resources', + 'selected_2_chefs_spacious_room_no_counter_space', + 'selected_2_chefs_storage_room' + ] + p_idxes = [0, 1] all_agents_paths = { - # 'N-1-SP FCP CUR': 'agent_models/Result/2/N-1-SP_s1010_h256_tr(SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL)_cur/best', - # 'N-1-SP FCP RAN': 'agent_models/Result/2/N-1-SP_s1010_h256_tr(SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL)_ran/best', - 'SP': 'agent_models/Result/2/SP_hd64_seed14/best', - 'FCP corrected': 'agent_models/FCP_correct/2/FCP_s2020_h256_tr(AMX)_ran/best', - # 'N-1-SP ADV': 'agent_models/Result/2/MAP_SP_hd64_seed14/originaler-selfisherplay/2/pwadv_s14_h64_tr(SP_SPADV)_ran/best', - - # 'N-1-SP FCP + ADV CUR [attack 2]': 'agent_models/Result/2/PWADV-N-1-SP_s1010_h256_tr(SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV)_cur_supporter_attack2/best', - # These reuse CH's ADV, the first one has SP, the second one doesn't - # 'N-1-SP FCP + ADV RAN CH [attack 2]': 'agent_models/Result/2/adv_reused_sp/PWADV-N-1-SP_s1010_h256_tr(SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV_SP)_ran_originaler_attack2/best', - # 'N-1-SP FCP + ADV CUR CH NO SP [attack 2]': 'agent_models/Result/2/adv_reused_no_sp/PWADV-N-1-SP_s1010_h256_tr(SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV)_cur_originaler_attack2/best', - 'N-1-SP SPCKP + ADV MAP [attack 2]': 'agent_models/Result/2/rerun/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack2/best', - # 'N-1-SP FCP + ADV CUR [attack 0]': 'agent_models/Result/2/PWADV-N-1-SP_s1010_h256_tr(SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV)_cur_supporter_attack0/best', - # 'N-1-SP FCP + ADV CUR [attack 1]': 'agent_models/Result/2/PWADV-N-1-SP_s1010_h256_tr(SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV)_cur_supporter_attack1/best', + 'SP': 'agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/best', + 'LMH CUR': 'agent_models/Final/2/N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL]_cur_originaler/best', + 'LAST ALMH RAN REUSED 3A 60M': 'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV]_ran_originaler_attack2/best', + 'LAST ALMH CUR REUSED 3A 60M': 'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV]_cur_originaler_attack2/best', + # 'LAST ALMH-SP RAN REUSED 3A': 'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV_SP]_cur_originaler_attack2/best', + # 'LAST A-SP RAN REUSED 3A': 'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SP_SPADV]_ran_originaler_attack2/best', + 'LAST AMH CUR 3A':'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack2/best', + # 'LAST AMH CUR 2A':'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack1/best', + # 'LAST AMH CUR 1A':'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack0/best', + 'BEST AMH CUR 3A':'agent_models/Final/2-ego-play-with-best-adv/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack2/best', + # 'BEST AMH CUR 2A':'agent_models/Final/2-ego-play-with-best-adv/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack1/best', + # 'BEST AMH CUR 1A':'agent_models/Final/2-ego-play-with-best-adv/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack0/best', + 'H': 'agent_models/Final/2/N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH]_ran_originaler/best', + } + teammate_lvl_sets = [ [Eval.LOW], [Eval.MEDIUM], [Eval.HIGH] ] + return layout_names, p_idxes, all_agents_paths, teammate_lvl_sets, args +def get_4_player_input(args): + args.num_players = 4 + layout_names = [ + # 'selected_4_chefs_coordination_ring', + # 'selected_4_chefs_counter_circuit', + # 'selected_4_chefs_cramped_room', + 'selected_4_chefs_double_counter_circuit', + 'selected_4_chefs_secret_coordination_ring', + 'selected_4_chefs_spacious_room_few_resources', + 'selected_4_chefs_spacious_room_no_counter_space', + 'selected_4_chefs_storage_room' + ] + + p_idxes = [0, 1, 2, 3] + + all_agents_paths = { + 'SP': 'agent_models/Final/4/SP_s1010_h256_tr[SP]_ran/best', + 'LMH CUR': 'agent_models/Final/4/N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL]_cur_originaler/best', + # 'LAST ALMH RAN REUSED 3A 60M': 'agent_models/Final/4/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV]_ran_originaler_attack2/best', + # 'LAST ALMH CUR REUSED 3A 60M': 'agent_models/Final/4/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV]_cur_originaler_attack2/best', + # 'LAST ALMH-SP RAN REUSED 3A': 'agent_models/Final/4/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV_SP]_cur_originaler_attack2/best', + # 'LAST A-SP RAN REUSED 3A': 'agent_models/Final/4/PWADV-N-1-SP_s1010_h256_tr[SP_SPADV]_ran_originaler_attack2/best', + # 'LAST AMH CUR 3A':'agent_models/Final/4/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack2/best', + 'LAST AMH CUR 2A':'agent_models/Final/4/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack1/best', + 'LAST AMH CUR 1A':'agent_models/Final/4/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack0/best', + 'BEST AMH CUR 3A':'agent_models/Final/4-ego-play-with-best-adv/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack2/best', + # 'BEST AMH CUR 2A':'agent_models/Final/4-ego-play-with-best-adv/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack1/best', + # 'BEST AMH CUR 1A':'agent_models/Final/4-ego-play-with-best-adv/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack0/best', + # 'H': 'agent_models/Final/4/N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH]_ran_originaler/best', + + } + + teammate_lvl_sets = [ + [Eval.LOW], + [Eval.MEDIUM], + [Eval.HIGH] + ] + + return layout_names, p_idxes, all_agents_paths, teammate_lvl_sets, args def get_3_player_input(args): args.num_players = 3 @@ -473,7 +605,7 @@ def get_5_player_input(args): if __name__ == "__main__": args = get_arguments() - layout_names, p_idxes, all_agents_paths, teammate_lvl_sets, args = get_2_player_input(args) + layout_names, p_idxes, all_agents_paths, teammate_lvl_sets, args = get_4_player_input(args) # layout_names, p_idxes, all_agents_paths, teammate_lvl_sets, args = get_3_player_input(args) # layout_names, p_idxes, all_agents_paths, teammate_lvl_sets, args = get_5_player_input(args) @@ -519,7 +651,7 @@ def get_5_player_input(args): unseen_counts=unseen_counts, # display_delivery=show_delivery_num, plot_name=plot_name) - + # plot_evaluation_results_line(all_mean_rewards=all_mean_rewards, # all_std_rewards=all_std_rewards, @@ -527,5 +659,5 @@ def get_5_player_input(args): # teammate_lvl_sets=teammate_lvl_sets, # num_players=args.num_players, # plot_name=plot_name) - + diff --git a/scripts/utils/eval_constants.py b/scripts/utils/eval_constants.py index a7e6dcfa..edda23b7 100644 --- a/scripts/utils/eval_constants.py +++ b/scripts/utils/eval_constants.py @@ -1,24 +1,98 @@ +# TWO_PLAYERS_LOW_EVAL = [ +# 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_0', +# 'agent_models/Result/Eval/2/SP_hd256_seed42/ck_1_rew_18.666666666666668', +# 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_1_rew_22.0', +# 'agent_models/Result/Eval/2/SP_hd256_seed7/ck_1_rew_38.0', +# 'agent_models/Result/Eval/2/SP_hd64_seed1995/ck_0', +# ] +# TWO_PLAYERS_MEDIUM_EVAL = [ +# 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_2_rew_108.66666666666667', +# 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_3_rew_170.66666666666666', +# 'agent_models/Result/Eval/2/SP_hd256_seed7/ck_2_rew_106.66666666666667', +# 'agent_models/Result/Eval/2/SP_hd256_seed7/ck_3_rew_192.66666666666666', +# 'agent_models/Result/Eval/2/SP_hd256_seed42/ck_2_rew_110.66666666666667', +# ] +# TWO_PLAYERS_HIGH_EVAL = [ +# 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_24_rew_298.0', +# 'agent_models/Result/Eval/2/SP_hd64_seed1995/ck_20_rew_286.6666666666667', +# 'agent_models/Result/Eval/2/SP_hd256_seed7/ck_24_rew_258.0', +# 'agent_models/Result/Eval/2/SP_hd256_seed42/ck_22_rew_262.0', +# 'agent_models/Result/Eval/2/SP_hd256_seed42/ck_13_rew_274.0', +# ] + TWO_PLAYERS_LOW_EVAL = [ - 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_0', - 'agent_models/Result/Eval/2/SP_hd256_seed42/ck_1_rew_18.666666666666668', - 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_1_rew_22.0', - 'agent_models/Result/Eval/2/SP_hd256_seed7/ck_1_rew_38.0', - 'agent_models/Result/Eval/2/SP_hd64_seed1995/ck_0', + "agent_models/Final/2/SP_hd64_seed0/ck_0", + "agent_models/Final/2/SP_hd64_seed14/ck_0", + "agent_models/Final/2/SP_hd256_seed13/ck_0", + "agent_models/Final/2/SP_hd256_seed68/ck_0", + "agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/ck_0", + "agent_models/Final/2/SP_hd64_seed14/ck_1_rew_18.4", + "agent_models/Final/2/SP_hd64_seed0/ck_1_rew_28.8", + "agent_models/Final/2/SP_hd256_seed13/ck_1_rew_30.8", + "agent_models/Final/2/SP_hd256_seed68/ck_1_rew_56.8" ] + TWO_PLAYERS_MEDIUM_EVAL = [ - 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_2_rew_108.66666666666667', - 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_3_rew_170.66666666666666', - 'agent_models/Result/Eval/2/SP_hd256_seed7/ck_2_rew_106.66666666666667', - 'agent_models/Result/Eval/2/SP_hd256_seed7/ck_3_rew_192.66666666666666', - 'agent_models/Result/Eval/2/SP_hd256_seed42/ck_2_rew_110.66666666666667', + "agent_models/Final/2/SP_hd64_seed14/ck_2_rew_88.4", + "agent_models/Final/2/SP_hd64_seed0/ck_2_rew_122.8", + "agent_models/Final/2/SP_hd256_seed13/ck_2_rew_128.8", + "agent_models/Final/2/SP_hd256_seed68/ck_2_rew_156.0", + "agent_models/Final/2/SP_hd64_seed14/ck_3_rew_152.8", + "agent_models/Final/2/SP_hd64_seed0/ck_3_rew_171.6" ] + TWO_PLAYERS_HIGH_EVAL = [ - 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_24_rew_298.0', - 'agent_models/Result/Eval/2/SP_hd64_seed1995/ck_20_rew_286.6666666666667', - 'agent_models/Result/Eval/2/SP_hd256_seed7/ck_24_rew_258.0', - 'agent_models/Result/Eval/2/SP_hd256_seed42/ck_22_rew_262.0', - 'agent_models/Result/Eval/2/SP_hd256_seed42/ck_13_rew_274.0', + "agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/ck_10_rew_238.0", + "agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/ck_9_rew_232.8", + "agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/ck_8_rew_234.0", + "agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/ck_7_rew_230.8", + "agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/ck_6_rew_230.8", + "agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/ck_5_rew_230.8", + "agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/ck_4_rew_224.8", + "agent_models/Final/2/SP_hd64_seed14/ck_10_rew_226.8", + "agent_models/Final/2/SP_hd64_seed14/ck_9_rew_224.0", + "agent_models/Final/2/SP_hd256_seed13/ck_5_rew_217.2", + "agent_models/Final/2/SP_hd64_seed0/ck_10_rew_221.6", + "agent_models/Final/2/SP_hd256_seed68/ck_10_rew_209.2", + "agent_models/Final/2/SP_hd64_seed14/ck_5_rew_212.0", + "agent_models/Final/2/SP_hd256_seed68/ck_9_rew_213.6" +] + +# Define the paths for four-player evaluation in three different lists +FOUR_PLAYERS_LOW_EVAL = [ + "agent_models/Final/4/SP_s1010_h256_tr[SP]_ran/ck_0", + "agent_models/Final/4/SP_hd64_seed0/ck_0", + "agent_models/Final/4/SP_hd64_seed14/ck_0", + "agent_models/Final/4/SP_hd256_seed13/ck_0", + "agent_models/Final/4/SP_hd256_seed68/ck_0", + "agent_models/Final/4/SP_hd64_seed0/ck_1_rew_54.2", + "agent_models/Final/4/SP_hd256_seed68/ck_1_rew_66.0", + "agent_models/Final/4/SP_hd256_seed13/ck_1_rew_79.0", + "agent_models/Final/4/SP_hd64_seed14/ck_1_rew_44.0", + "agent_models/Final/4/SP_hd256_seed68/ck_2_rew_142.0" +] + +FOUR_PLAYERS_MEDIUM_EVAL = [ + "agent_models/Final/4/SP_hd64_seed14/ck_2_rew_122.2", + "agent_models/Final/4/SP_hd256_seed13/ck_2_rew_197.2", + "agent_models/Final/4/SP_hd64_seed0/ck_3_rew_168.0", + "agent_models/Final/4/SP_hd256_seed68/ck_3_rew_214.0", + "agent_models/Final/4/SP_hd64_seed0/ck_4_rew_204.6", + "agent_models/Final/4/SP_hd64_seed14/ck_4_rew_243.6" +] + +FOUR_PLAYERS_HIGH_EVAL = [ + "agent_models/Final/4/SP_s1010_h256_tr[SP]_ran/ck_8_rew_308.0", + "agent_models/Final/4/SP_s1010_h256_tr[SP]_ran/ck_6_rew_309.6", + "agent_models/Final/4/SP_s1010_h256_tr[SP]_ran/ck_5_rew_299.7", + "agent_models/Final/4/SP_hd64_seed0/ck_10_rew_302.4", + "agent_models/Final/4/SP_hd64_seed14/ck_10_rew_295.6", + "agent_models/Final/4/SP_hd256_seed68/ck_9_rew_296.8", + "agent_models/Final/4/SP_hd256_seed68/ck_8_rew_296.2", + "agent_models/Final/4/SP_hd64_seed14/ck_9_rew_289.0", + "agent_models/Final/4/SP_hd256_seed13/ck_9_rew_299.2", + "agent_models/Final/4/SP_hd256_seed13/ck_10_rew_290.8" ] THREE_PLAYERS_LOW_EVAL = [ From 1391d244409fdeae1f7b6520755c44d48b606b5b Mon Sep 17 00:00:00 2001 From: ttopiac Date: Mon, 4 Nov 2024 14:12:30 -0700 Subject: [PATCH 30/61] update generate_hdim_and_seed for population methods --- oai_agents/common/population.py | 58 +++++++++++++++++------- tests/test_oai_agents/test_population.py | 39 ++++++++++++++++ 2 files changed, 81 insertions(+), 16 deletions(-) create mode 100644 tests/test_oai_agents/test_population.py diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 278c6bc4..d16dbd80 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -8,6 +8,8 @@ from .curriculum import Curriculum +import random + def _get_most_recent_checkpoint(args, name: str) -> str: if args.exp_dir: @@ -105,28 +107,52 @@ def ensure_we_will_have_enough_agents_in_population(teammates_len, f" num_SPs_to_train: {num_SPs_to_train}." -def generate_hdim_and_seed(num_SPs_to_train): +def generate_hdim_and_seed(num_of_required_agents): ''' - (hidden_dim, seed) = reward of selfplay - (256, 68)=362, (64, 14)=318 - (256, 13)=248, (64, 0)=230 - (256, 48)=20, (64, 30)=0 + Generates lists of seeds and hidden dimensions for a given number of agents. + + Each setting is a pair (hidden_dim, seed). If the number of required agents + is less than or equal to the number of predefined settings, it selects from + the predefined seeds and hidden dimensions. Otherwise, it generates random + seeds and hidden dimensions to fill the remaining number of agents. + + Arguments: + num_of_required_agents -- the number of (hidden_dim, seed) pairs to generate. + + Returns: + selected_seeds -- list of selected seeds + selected_hdims -- list of selected hidden dimensions ''' - # Tested in 3-chefs-small-kitchen: - good_seeds = [68, 14, 13, 0] - good_hdims = [256, 64, 256, 64] - # Not tested: - other_seeds_copied_from_HAHA = [2907, 2907, 105, 105, 8, 32, 128, 512] - other_hdims_copied_from_HAHA = [64, 256, 64, 256, 16, 64, 256, 1024] + # Predefined seeds and hidden dimensions + seeds = [1010, 2020, 2602, 13, 68, 2907, 105, 128] + hdims = [256] * len(seeds) - all_seeds = good_seeds + other_seeds_copied_from_HAHA - all_hdims = good_hdims + other_hdims_copied_from_HAHA + # Initialize selected lists + selected_seeds = [] + selected_hdims = [] - selected_seeds = all_seeds[:num_SPs_to_train] - selected_hdims = all_hdims[:num_SPs_to_train] - return selected_seeds, selected_hdims + # Check if we have enough predefined pairs + if num_of_required_agents <= len(seeds): + # Select predefined seeds and hdims + selected_seeds = seeds[:num_of_required_agents] + selected_hdims = hdims[:num_of_required_agents] + else: + # Use all predefined settings + selected_seeds = seeds[:] + selected_hdims = hdims[:] + + # Generate additional random settings if more agents are needed + remaining = num_of_required_agents - len(seeds) + available_seeds = set(range(0, 5000)) - set(selected_seeds) + random_seeds = random.sample(available_seeds, remaining) # Generate random seeds + random_hdims = random.choices([256, 512], k=remaining) # Generate random hidden dimensions + # Append randomly generated settings to selected lists + selected_seeds += random_seeds + selected_hdims += random_hdims + + return selected_seeds, selected_hdims def save_population(args, population): name_prefix = 'pop' diff --git a/tests/test_oai_agents/test_population.py b/tests/test_oai_agents/test_population.py new file mode 100644 index 00000000..9b3d0004 --- /dev/null +++ b/tests/test_oai_agents/test_population.py @@ -0,0 +1,39 @@ +# test_population.py + +from oai_agents.common.population import generate_hdim_and_seed + +def test_generate_hdim_and_seed(): + ''' + Test function for generate_hdim_and_seed to ensure: + 1. The number of (hidden_dim, seed) pairs matches the number of required agents. + 2. All generated seeds are unique. + 3. Hidden dimensions are as expected (either 64 or 256). + ''' + + # Test cases + test_cases = [3, 5, 8, 10] # Testing for fewer than, equal to, and more than predefined settings + + for num_agents in test_cases: + print(f"\nTesting with {num_agents} agents:") + + # Generate (hidden_dim, seed) pairs + selected_seeds, selected_hdims = generate_hdim_and_seed(num_agents) + + # Check that the correct number of agents is generated + assert len(selected_seeds) == num_agents, f"Expected {num_agents} seeds, got {len(selected_seeds)}" + assert len(selected_hdims) == num_agents, f"Expected {num_agents} hidden dims, got {len(selected_hdims)}" + + # Check that all seeds are unique + assert len(set(selected_seeds)) == num_agents, "Duplicate seeds found in the generated seeds." + + # Check that hidden dims are from the valid set (64, 256) + assert all(hdim in [256, 512] for hdim in selected_hdims), "Invalid hidden dimension found. Only 64 and 256 are allowed." + + print(f"Test passed for {num_agents} agents.") + print("Selected seeds:", selected_seeds) + print("Selected hidden dimensions:", selected_hdims) + +# Ensure that this test script only runs when executed directly +if __name__ == "__main__": + print("Running tests in population.py...") + test_generate_hdim_and_seed() From cff8151cc44d7dad06798a7a81174cf09ace185d Mon Sep 17 00:00:00 2001 From: ttopiac Date: Mon, 4 Nov 2024 14:29:55 -0700 Subject: [PATCH 31/61] Replace aamas25 in population.py by tag.CheckedPoints.FINAL_TRAINED_MODEL --- oai_agents/common/population.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index d16dbd80..581038bd 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -6,6 +6,7 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.tags import AgentPerformance, KeyCheckpoints, TeamType + from .curriculum import Curriculum import random @@ -230,4 +231,4 @@ def get_population(args, save_population(args=args, population=population) - return population + return population \ No newline at end of file From d1ebd7f30ce62f0ee3630761531f5e5d7d7430f0 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Tue, 5 Nov 2024 09:58:42 -0700 Subject: [PATCH 32/61] Replace the first loop in get_best_SP_agent by a line of code. --- oai_agents/common/teammates_collection.py | 3 +-- scripts/utils/train_helper.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/oai_agents/common/teammates_collection.py b/oai_agents/common/teammates_collection.py index d54134bb..6af8298e 100644 --- a/oai_agents/common/teammates_collection.py +++ b/oai_agents/common/teammates_collection.py @@ -269,8 +269,7 @@ def generate_TC(args, def get_best_SP_agent(args, population): agents_scores_averaged_over_layouts = [] - for layout_name in args.layout_names: - all_agents = [agent for agent in population[layout_name]] + all_agents = [agent for agent in population[args.layout_names[0]]] for agent in all_agents: scores = [agent.layout_scores[layout_name] for layout_name in args.layout_names] diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py index 441514cd..d968a1e3 100644 --- a/scripts/utils/train_helper.py +++ b/scripts/utils/train_helper.py @@ -1,6 +1,6 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.tags import TeamType -from oai_agents.common.population import get_population +from oai_agents.common.population import get_population, generate_hdim_and_seed from oai_agents.common.teammates_collection import generate_TC, get_best_SP_agent, generate_TC_for_ADV_agent, update_TC_w_ADV_teammates from oai_agents.common.curriculum import Curriculum from .common import load_agents, generate_name From a4b1fdf46cbf5ea063a6af9287e5eec3103de98e Mon Sep 17 00:00:00 2001 From: ttopiac Date: Thu, 7 Nov 2024 09:08:08 -0700 Subject: [PATCH 33/61] Fix merge conflicts --- oai_agents/agents/rl.py | 2 +- oai_agents/common/population.py | 1 + oai_agents/common/tags.py | 2 ++ oai_agents/common/teammates_collection.py | 8 +++++--- sandbox/fix_pop_ck_list_after_continued_run.py | 2 +- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index acb3aec3..0468005b 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -347,7 +347,7 @@ def train_agents(self, total_train_timesteps, tag_for_returning_agent, exp_name= if self.checkpoint_rate: if self.learning_agent.num_timesteps // self.checkpoint_rate > (len(self.ck_list) - 1): - path, tag = self.save_agents(tag=f'ck_{len(self.ck_list)}_rew_{mean_reward}') + path, tag = self.save_agents(tag=f'{KeyCheckpoints.CHECKED_MODEL_PREFIX}{len(self.ck_list)}{KeyCheckpoints.REWARD_SUBSTR}{mean_reward}') self.ck_list.append((rew_per_layout, path, tag)) if mean_reward >= self.best_score: diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 581038bd..404ba2bf 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -208,6 +208,7 @@ def get_population(args, (args, total_training_timesteps, ck_rate, seed[i], h_dim[i], True) for i in range(num_SPs_to_train) ] + if args.parallel: with concurrent.futures.ProcessPoolExecutor(max_workers=args.max_concurrent_jobs) as executor: arg_lists = list(zip(*inputs)) diff --git a/oai_agents/common/tags.py b/oai_agents/common/tags.py index 9f8a0b75..308f05b7 100644 --- a/oai_agents/common/tags.py +++ b/oai_agents/common/tags.py @@ -83,6 +83,8 @@ class TeammatesCollection: class KeyCheckpoints: # Tags to identify the type of model checkpoint to save/load BEST_EVAL_REWARD = 'best' # Use only for evaluation MOST_RECENT_TRAINED_MODEL = 'last' # Use only for training + CHECKED_MODEL_PREFIX = 'ck_' + REWARD_SUBSTR = '_rew_' class Prefix: SELF_PLAY = 'SP' diff --git a/oai_agents/common/teammates_collection.py b/oai_agents/common/teammates_collection.py index 6af8298e..6f53f403 100644 --- a/oai_agents/common/teammates_collection.py +++ b/oai_agents/common/teammates_collection.py @@ -267,17 +267,19 @@ def generate_TC(args, def get_best_SP_agent(args, population): + # all_agents = [agent for agent in population[args.layout_names[0]]] + all_agents = get_all_agents( args.layout_names[0], population) agents_scores_averaged_over_layouts = [] - all_agents = [agent for agent in population[args.layout_names[0]]] - for agent in all_agents: scores = [agent.layout_scores[layout_name] for layout_name in args.layout_names] agents_scores_averaged_over_layouts.append((agent, sum(scores)/len(scores))) best_agent = max(agents_scores_averaged_over_layouts, key=lambda x: x[1]) return best_agent[0] - +def get_all_agents(layout_name, population): + all_agents = [agent for agent in population[layout_name]] + return all_agents def update_eval_collection_with_eval_types_from_file(args, agent, unseen_teammates_len, eval_types, eval_collection): for teammates in eval_types: diff --git a/sandbox/fix_pop_ck_list_after_continued_run.py b/sandbox/fix_pop_ck_list_after_continued_run.py index 04093777..92c0303d 100644 --- a/sandbox/fix_pop_ck_list_after_continued_run.py +++ b/sandbox/fix_pop_ck_list_after_continued_run.py @@ -4,7 +4,7 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.arguments import get_arguments -from oai_agents.common.tags import TeamType +from oai_agents.common.tags import TeamType, CheckedPoints from oai_agents.common.learner import LearnerType from oai_agents.common.tags import KeyCheckpoints From 4c0f0d188af17816a4202352ff12086517ef19db Mon Sep 17 00:00:00 2001 From: ttopiac Date: Tue, 5 Nov 2024 20:48:14 -0700 Subject: [PATCH 34/61] Add a function for us to list agent's checked tags and also a test function to test it. --- oai_agents/agents/base_agent.py | 42 ++++++++++++ tests/test_oai_agents/test_base_agent.py | 85 ++++++++++++++++++++++++ 2 files changed, 127 insertions(+) create mode 100644 tests/test_oai_agents/test_base_agent.py diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index 9f959316..b262f5dc 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -24,6 +24,7 @@ import os import random import pickle as pkl +import re class OAIAgent(nn.Module, ABC): """ @@ -519,3 +520,44 @@ def load_agents(args, tag, name: str=None, path: Union[Path, None] = None): env_info = pkl.load(f) return agents, env_info, saved_variables + + @staticmethod + def list_agent_checked_tags(args, name: str=None, path: Union[Path, None] = None) -> List[str]: + ''' + Lists only tags that start with CheckedPoints.CHECKED_MODEL_PREFIX, followed by an integer. + If the integer is greater than 0, it must be followed by CheckedPoints.REWARD_SUBSTR and a floating-point number. + + Parameters: + - args: Experiment arguments containing base directory info. + - name: The name of the agent (or experiment) for which tags should be listed. + - path: Optional. If provided, it overrides the default path to the agents directory. + + Returns: + - A list of tags (directories) that match the specified pattern. + ''' + if not path: + if args.exp_dir: + path = args.base_dir / 'agent_models' / args.exp_dir / name + else: + path = args.base_dir / 'agent_models' / name + + # Ensure the directory exists + if not path.exists() or not path.is_dir(): + raise FileNotFoundError(f"Agent directory not found: {path}") + + # Define the prefix and the regular expression to match the pattern + prefix = KeyCheckpoints.CHECKED_MODEL_PREFIX + reward_substr = KeyCheckpoints.REWARD_SUBSTR + pattern = re.compile(f"^{re.escape(prefix)}(\\d+)(?:{re.escape(reward_substr)}[\\d.]+)?$") + + # List all subdirectories (tags) that match the pattern + tags = [] + for tag in path.iterdir(): + if tag.is_dir() and pattern.match(tag.name): + match = pattern.match(tag.name) + integer_part = int(match.group(1)) + # Only add tags that either have no reward substring for integer 0, or have it when integer > 0 + if integer_part == 0 or (integer_part > 0 and reward_substr in tag.name): + tags.append(tag.name) + + return tags diff --git a/tests/test_oai_agents/test_base_agent.py b/tests/test_oai_agents/test_base_agent.py new file mode 100644 index 00000000..9c18603f --- /dev/null +++ b/tests/test_oai_agents/test_base_agent.py @@ -0,0 +1,85 @@ +from oai_agents.agents.base_agent import OAITrainer +from pathlib import Path +from oai_agents.common.tags import CheckedPoints +import shutil + +def test_list_agent_checked_tags(): + # Define base directory based on the current working directory + base_dir = Path.cwd() + + # Set up the directory structure for testing + # This will create the following structure within the current working directory: + # + # / + # └── agent_models/ + # └── test_agents_folder/ + # └── test_agent/ + # ├── ck_0/ + # ├── ck_1_rew_59.5/ + # ├── ck_2_rew_140.0/ + # ├── ck_10_rew_336.8888888888889/ + # ├── ck_3_invalid/ # Should not match + # ├── ck_4_rew_invalid/ # Should not match + # ├── unrelated_tag/ # Should not match + # ├── best/ # Should not match + # └── last/ # Should not match + # + # Only `ck_0`, `ck_1_rew_59.5`, `ck_2_rew_140.0`, and `ck_10_rew_336.8888888888889` + # should be returned by the function. + + test_dir = base_dir / "agent_models" / "test_agents_folder" / "test_agent" + test_dir.mkdir(parents=True, exist_ok=True) # Ensure all parent directories are created + + # Simulate directory structure with various tags + tag_names = [ + "ck_0", + "ck_1_rew_59.5", + "ck_2_rew_140.0", + "ck_10_rew_336.8888888888889", + "ck_3_invalid", # Invalid because it doesn't have a valid float after the integer + "ck_4_rew_invalid", # Invalid because reward value is not a float + "unrelated_tag", # Invalid because it doesn't start with `CheckedPoints.CHECKED_MODEL_PREFIX` + "best", + "last" + ] + + # Create these tag directories within the test directory + for tag_name in tag_names: + (test_dir / tag_name).mkdir(parents=True, exist_ok=True) + + # Mock args object with base_dir and exp_dir pointing to the test directory + class MockArgs: + def __init__(self, base_dir, exp_dir, layout_names=[]): + self.base_dir = base_dir + self.exp_dir = "test_agents_folder" + self.layout_names = layout_names + + args = MockArgs(base_dir=base_dir, exp_dir="test_agents_folder") + + # Call the function to test + checked_tags = OAITrainer.list_agent_checked_tags(args, name="test_agent") + + # Expected tags should only include those that match the pattern + expected_tags = [ + "ck_0", + "ck_1_rew_59.5", + "ck_2_rew_140.0", + "ck_10_rew_336.8888888888889" + ] + + # Print results for verification + if sorted(checked_tags) == sorted(expected_tags): + print("Test passed: Tags returned as expected.") + else: + print(f"Test failed: Expected {expected_tags}, but got {checked_tags}") + + # Clean up the test directories after the test + # This will remove the entire "agent_models/test_agents_folder" structure created for testing + shutil.rmtree(base_dir / "agent_models" / "test_agents_folder") + +# Run the test function +test_list_agent_checked_tags() + + + + From 48f7afc9ca754b51dc2af40f730ba31c60679410 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Tue, 5 Nov 2024 20:50:58 -0700 Subject: [PATCH 35/61] rewrite a comment --- oai_agents/agents/base_agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index b262f5dc..bb3575b6 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -528,8 +528,8 @@ def list_agent_checked_tags(args, name: str=None, path: Union[Path, None] = None If the integer is greater than 0, it must be followed by CheckedPoints.REWARD_SUBSTR and a floating-point number. Parameters: - - args: Experiment arguments containing base directory info. - - name: The name of the agent (or experiment) for which tags should be listed. + - args: Experiment arguments containing base directory info and experiment directory info. + - name: The name of the agent for which tags should be listed. - path: Optional. If provided, it overrides the default path to the agents directory. Returns: From 25fa6c387d3d66407f48ae57466f8d4a2cc0fe90 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Tue, 5 Nov 2024 09:58:42 -0700 Subject: [PATCH 36/61] Replace the first loop in get_best_SP_agent by a line of code. --- oai_agents/common/teammates_collection.py | 2 ++ scripts/utils/train_helper.py | 1 + 2 files changed, 3 insertions(+) diff --git a/oai_agents/common/teammates_collection.py b/oai_agents/common/teammates_collection.py index 6f53f403..d6de428b 100644 --- a/oai_agents/common/teammates_collection.py +++ b/oai_agents/common/teammates_collection.py @@ -271,6 +271,8 @@ def get_best_SP_agent(args, population): all_agents = get_all_agents( args.layout_names[0], population) agents_scores_averaged_over_layouts = [] + all_agents = [agent for agent in population[args.layout_names[0]]] + for agent in all_agents: scores = [agent.layout_scores[layout_name] for layout_name in args.layout_names] agents_scores_averaged_over_layouts.append((agent, sum(scores)/len(scores))) diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py index d968a1e3..6c8964a1 100644 --- a/scripts/utils/train_helper.py +++ b/scripts/utils/train_helper.py @@ -1,6 +1,7 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.tags import TeamType from oai_agents.common.population import get_population, generate_hdim_and_seed +from oai_agents.common.population import get_population, generate_hdim_and_seed from oai_agents.common.teammates_collection import generate_TC, get_best_SP_agent, generate_TC_for_ADV_agent, update_TC_w_ADV_teammates from oai_agents.common.curriculum import Curriculum from .common import load_agents, generate_name From d5e726342252caf039379ff6120139093e77e07f Mon Sep 17 00:00:00 2001 From: ttopiac Date: Tue, 5 Nov 2024 15:52:05 -0700 Subject: [PATCH 37/61] Add two CheckedPoints tags, including CheckedModelPrefix and REWARD_SUBSTR --- oai_agents/agents/rl.py | 1 + oai_agents/common/teammates_collection.py | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index 0468005b..b0270412 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -31,6 +31,7 @@ def __init__(self, teammates_collection, args, name = name or 'rl_agent' super(RLAgentTrainer, self).__init__(name, args, seed=seed) + self.args = args self.device = args.device self.teammates_len = self.args.teammates_len diff --git a/oai_agents/common/teammates_collection.py b/oai_agents/common/teammates_collection.py index d6de428b..6f53f403 100644 --- a/oai_agents/common/teammates_collection.py +++ b/oai_agents/common/teammates_collection.py @@ -271,8 +271,6 @@ def get_best_SP_agent(args, population): all_agents = get_all_agents( args.layout_names[0], population) agents_scores_averaged_over_layouts = [] - all_agents = [agent for agent in population[args.layout_names[0]]] - for agent in all_agents: scores = [agent.layout_scores[layout_name] for layout_name in args.layout_names] agents_scores_averaged_over_layouts.append((agent, sum(scores)/len(scores))) From 40e96eae697a3e2ebcb50f98fd7686c8b2148d29 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Mon, 4 Nov 2024 14:12:30 -0700 Subject: [PATCH 38/61] update generate_hdim_and_seed for population methods --- oai_agents/common/population.py | 1 - 1 file changed, 1 deletion(-) diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 404ba2bf..d7cd697b 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -8,7 +8,6 @@ from .curriculum import Curriculum - import random From 060db240748288c9f38d5bccea7b6a8715131a4b Mon Sep 17 00:00:00 2001 From: ttopiac Date: Tue, 5 Nov 2024 09:58:42 -0700 Subject: [PATCH 39/61] Replace the first loop in get_best_SP_agent by a line of code. --- scripts/utils/train_helper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py index 6c8964a1..d968a1e3 100644 --- a/scripts/utils/train_helper.py +++ b/scripts/utils/train_helper.py @@ -1,7 +1,6 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.tags import TeamType from oai_agents.common.population import get_population, generate_hdim_and_seed -from oai_agents.common.population import get_population, generate_hdim_and_seed from oai_agents.common.teammates_collection import generate_TC, get_best_SP_agent, generate_TC_for_ADV_agent, update_TC_w_ADV_teammates from oai_agents.common.curriculum import Curriculum from .common import load_agents, generate_name From b66f06b3bd9495ef7aea08c6d15aaaa3b7d6a101 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Tue, 5 Nov 2024 15:52:05 -0700 Subject: [PATCH 40/61] Add two CheckedPoints tags, including CheckedModelPrefix and REWARD_SUBSTR --- oai_agents/agents/rl.py | 1 - 1 file changed, 1 deletion(-) diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index b0270412..0468005b 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -31,7 +31,6 @@ def __init__(self, teammates_collection, args, name = name or 'rl_agent' super(RLAgentTrainer, self).__init__(name, args, seed=seed) - self.args = args self.device = args.device self.teammates_len = self.args.teammates_len From f90ccde61beb029b01a7f213bf5beb55769e68cc Mon Sep 17 00:00:00 2001 From: ttopiac Date: Thu, 7 Nov 2024 09:45:31 -0700 Subject: [PATCH 41/61] Replace CheckedPoints by KeyCheckpoints --- oai_agents/agents/base_agent.py | 4 ++-- oai_agents/agents/rl.py | 5 +++-- oai_agents/common/population.py | 4 ++-- sandbox/fix_pop_ck_list_after_continued_run.py | 2 +- scripts/train_agents.py | 4 ++-- tests/test_oai_agents/test_base_agent.py | 4 ++-- 6 files changed, 12 insertions(+), 11 deletions(-) diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index bb3575b6..817060fe 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -524,8 +524,8 @@ def load_agents(args, tag, name: str=None, path: Union[Path, None] = None): @staticmethod def list_agent_checked_tags(args, name: str=None, path: Union[Path, None] = None) -> List[str]: ''' - Lists only tags that start with CheckedPoints.CHECKED_MODEL_PREFIX, followed by an integer. - If the integer is greater than 0, it must be followed by CheckedPoints.REWARD_SUBSTR and a floating-point number. + Lists only tags that start with KeyCheckpoints.CHECKED_MODEL_PREFIX, followed by an integer. + If the integer is greater than 0, it must be followed by KeyCheckpoints.REWARD_SUBSTR and a floating-point number. Parameters: - args: Experiment arguments containing base directory info and experiment directory info. diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index 0468005b..e1dd1188 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -311,9 +311,10 @@ def train_agents(self, total_train_timesteps, tag_for_returning_agent, exp_name= self.ck_list = [(c[0], path, c[2]) for c in resume_ck_list] if resume_ck_list else [({k: 0 for k in self.args.layout_names}, path, ck) for ck in ckpts] else: self.ck_list = [] - path, tag = self.save_agents(tag=f'ck_{len(self.ck_list)}') + path, tag = self.save_agents(tag=f'{KeyCheckpoints.CHECKED_MODEL_PREFIX}{len(self.ck_list)}') self.ck_list.append(({k: 0 for k in self.args.layout_names}, path, tag)) + best_path, best_tag = None, None self.steps = self.start_step @@ -387,7 +388,7 @@ def get_agents_and_set_score_and_perftag(args, layout_name, scores_path_tag, per return all_agents @staticmethod - def get_checkedpoints_agents(args, ck_list, layout_name): + def get_KeyCheckpoints_agents(args, ck_list, layout_name): ''' categorizes agents using performance tags based on the checkpoint list AgentPerformance.HIGH diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index d7cd697b..c8ea4953 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -215,7 +215,7 @@ def get_population(args, for dilled_res in dilled_results: checkpoints_list = dill.loads(dilled_res) for layout_name in args.layout_names: - layout_pop = RLAgentTrainer.get_checkedpoints_agents(args, checkpoints_list, layout_name) + layout_pop = RLAgentTrainer.get_KeyCheckpoints_agents(args, checkpoints_list, layout_name) population[layout_name].extend(layout_pop) else: for inp in inputs: @@ -226,7 +226,7 @@ def get_population(args, h_dim=inp[4], serialize=False) for layout_name in args.layout_names: - layout_pop = RLAgentTrainer.get_checkedpoints_agents(args, checkpoints_list, layout_name) + layout_pop = RLAgentTrainer.get_KeyCheckpoints_agents(args, checkpoints_list, layout_name) population[layout_name].extend(layout_pop) save_population(args=args, population=population) diff --git a/sandbox/fix_pop_ck_list_after_continued_run.py b/sandbox/fix_pop_ck_list_after_continued_run.py index 92c0303d..7adfa929 100644 --- a/sandbox/fix_pop_ck_list_after_continued_run.py +++ b/sandbox/fix_pop_ck_list_after_continued_run.py @@ -4,7 +4,7 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.arguments import get_arguments -from oai_agents.common.tags import TeamType, CheckedPoints +from oai_agents.common.tags import TeamType, KeyCheckpoints from oai_agents.common.learner import LearnerType from oai_agents.common.tags import KeyCheckpoints diff --git a/scripts/train_agents.py b/scripts/train_agents.py index 21b3a319..d44b0fd4 100644 --- a/scripts/train_agents.py +++ b/scripts/train_agents.py @@ -61,8 +61,8 @@ def SPN_1ADV_XSPCKP(args) -> None: ''' In N-agents games, a randomly initialized agent will be trained with N-X copies of itself and X unseen teammates. X unseen teammates can be composed by either one of the two conditions: - (a) 1 adversary and X-1 self-play checkedpoints. - (b) X self-play checkedpoints. + (a) 1 adversary and X-1 self-play KeyCheckpoints. + (b) X self-play KeyCheckpoints. e.g. when N is 4 and X is 1, the team can be composed by [SP, SP, SP, ADV] or [SP, SP, SP, H] or [SP, SP, SP, M] or [SP, SP, SP, L] in a 4-chef layout. when N is 4 and X is 2, the team can be composed diff --git a/tests/test_oai_agents/test_base_agent.py b/tests/test_oai_agents/test_base_agent.py index 9c18603f..1ea13e6a 100644 --- a/tests/test_oai_agents/test_base_agent.py +++ b/tests/test_oai_agents/test_base_agent.py @@ -1,6 +1,6 @@ from oai_agents.agents.base_agent import OAITrainer from pathlib import Path -from oai_agents.common.tags import CheckedPoints +from oai_agents.common.tags import KeyCheckpoints import shutil def test_list_agent_checked_tags(): @@ -38,7 +38,7 @@ def test_list_agent_checked_tags(): "ck_10_rew_336.8888888888889", "ck_3_invalid", # Invalid because it doesn't have a valid float after the integer "ck_4_rew_invalid", # Invalid because reward value is not a float - "unrelated_tag", # Invalid because it doesn't start with `CheckedPoints.CHECKED_MODEL_PREFIX` + "unrelated_tag", # Invalid because it doesn't start with `KeyCheckpoints.CHECKED_MODEL_PREFIX` "best", "last" ] From 2af78f832bd5d9e3eec8c9ea27cb30652890a3cb Mon Sep 17 00:00:00 2001 From: ttopiac Date: Thu, 7 Nov 2024 09:56:17 -0700 Subject: [PATCH 42/61] Replace get_KeyCheckpoints_agents by get_checkedpoints_agents --- oai_agents/agents/rl.py | 2 +- oai_agents/common/population.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index e1dd1188..da243010 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -388,7 +388,7 @@ def get_agents_and_set_score_and_perftag(args, layout_name, scores_path_tag, per return all_agents @staticmethod - def get_KeyCheckpoints_agents(args, ck_list, layout_name): + def get_checkedpoints_agents(args, ck_list, layout_name): ''' categorizes agents using performance tags based on the checkpoint list AgentPerformance.HIGH diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index c8ea4953..d7cd697b 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -215,7 +215,7 @@ def get_population(args, for dilled_res in dilled_results: checkpoints_list = dill.loads(dilled_res) for layout_name in args.layout_names: - layout_pop = RLAgentTrainer.get_KeyCheckpoints_agents(args, checkpoints_list, layout_name) + layout_pop = RLAgentTrainer.get_checkedpoints_agents(args, checkpoints_list, layout_name) population[layout_name].extend(layout_pop) else: for inp in inputs: @@ -226,7 +226,7 @@ def get_population(args, h_dim=inp[4], serialize=False) for layout_name in args.layout_names: - layout_pop = RLAgentTrainer.get_KeyCheckpoints_agents(args, checkpoints_list, layout_name) + layout_pop = RLAgentTrainer.get_checkedpoints_agents(args, checkpoints_list, layout_name) population[layout_name].extend(layout_pop) save_population(args=args, population=population) From 06c8631c816acbc2d6cac44f5a5868f9001f90a9 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Thu, 7 Nov 2024 15:42:44 -0700 Subject: [PATCH 43/61] little things --- scripts/train_agents.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/train_agents.py b/scripts/train_agents.py index d44b0fd4..cacd0d61 100644 --- a/scripts/train_agents.py +++ b/scripts/train_agents.py @@ -305,7 +305,7 @@ def set_input(args): args.fcp_total_training_timesteps = int(5e6 * args.how_long) args.n_x_fcp_total_training_timesteps = int(2 * args.fcp_total_training_timesteps * args.how_long) - args.SP_seed, args.SP_h_dim = 68, 256 + args.SP_seed, args.SP_h_dim = 1010, 256 args.N_X_SP_seed, args.N_X_SP_h_dim = 1010, 256 args.FCP_seed, args.FCP_h_dim = 2020, 256 args.N_X_FCP_seed, args.N_X_FCP_h_dim = 2602, 256 @@ -341,12 +341,12 @@ def set_input(args): args.adversary_force_training = False args.primary_force_training = False - args.teammates_len = 2 - args.how_long = 6 # Not effective in quick_test mode + args.teammates_len = 3 + args.how_long = 6*4 # Not effective in quick_test mode set_input(args=args) - SPN_1ADV_XSPCKP(args=args) + # SPN_1ADV_XSPCKP(args=args) #SP(args) From a8793a726e34bb7f83ec0bf8ec2e5790953fb8b7 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Thu, 7 Nov 2024 15:54:52 -0700 Subject: [PATCH 44/61] Add SPN_XSPCKP_HP_TYPE --- scripts/train_agents.py | 42 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/scripts/train_agents.py b/scripts/train_agents.py index cacd0d61..413da171 100644 --- a/scripts/train_agents.py +++ b/scripts/train_agents.py @@ -154,6 +154,46 @@ def SPN_XSPCKP(args) -> None: unseen_teammates_len=unseen_teammates_len, ) +def SPN_XSPCKP_HP_TYPE(args) -> None: + ''' + In N-agents games, a randomly initialized agent will be trained with N-X copies of itself + and X homogeneous unseen teammates, which are checkpoints saved during a previous self-play process. + These saved checkpoints are cateogorized into High, Medium, Low performance. + e.g. + when N is 4 and X is 1, the team can be composed by [SP, SP, SP, H], [SP, SP, SP, M], [SP, SP, SP, L] in a 4-chef layout. + when N is 4 and X is 2, the team can be composed [SP, SP, H, H], [SP, SP, M, M], [SP, SP, L, L] in a 4-chef layout. + + + Please note that + - X is the number of unseen teammate. + - X is assigned by the variable, unseen_teammates_len, in the funciton. + + + :param pop_force_training: Boolean that, if true, indicates population should be generated, otherwise load it from file + :param primary_force_training: Boolean that, if true, indicates the SP agent teammates_collection should be trained instead of loaded from file. + ''' + unseen_teammates_len = 1 + primary_train_types = [TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_HIGH, + # TeamType.SELF_PLAY_MEDIUM, TeamType.SELF_PLAY_MEDIUM, TeamType.SELF_PLAY_MEDIUM, TeamType.SELF_PLAY_MEDIUM, + # TeamType.SELF_PLAY_LOW, TeamType.SELF_PLAY_LOW, TeamType.SELF_PLAY_LOW, TeamType.SELF_PLAY_LOW, + ] + primary_eval_types = { + 'generate': [TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_MEDIUM, TeamType.SELF_PLAY_LOW], + 'load': [] + } + + curriculum = Curriculum(train_types = primary_train_types, + is_random=True, + ) + + get_N_X_SP_agents( + args, + n_x_sp_train_types = curriculum.train_types, + n_x_sp_eval_types=primary_eval_types, + curriculum=curriculum, + unseen_teammates_len=unseen_teammates_len, + ) + def FCP_mhri(args): ''' @@ -346,6 +386,8 @@ def set_input(args): set_input(args=args) + SPN_XSPCKP_HP_TYPE(args=args) + # SPN_1ADV_XSPCKP(args=args) #SP(args) From 4338577ca9855caf0f080dfd5e5915875fe7df10 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Mon, 11 Nov 2024 11:53:07 -0700 Subject: [PATCH 45/61] Renmae get_poulation by get_categorized_population --- oai_agents/common/population.py | 2 +- scripts/utils/train_helper.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index d7cd697b..c7907f52 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -173,7 +173,7 @@ def save_population(args, population): rt.save_agents(tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL) -def get_population(args, +def get_categorized_population(args, ck_rate, total_training_timesteps, train_types, diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py index d968a1e3..556de25d 100644 --- a/scripts/utils/train_helper.py +++ b/scripts/utils/train_helper.py @@ -1,6 +1,6 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.tags import TeamType -from oai_agents.common.population import get_population, generate_hdim_and_seed +from oai_agents.common.population import get_categorized_population, generate_hdim_and_seed from oai_agents.common.teammates_collection import generate_TC, get_best_SP_agent, generate_TC_for_ADV_agent, update_TC_w_ADV_teammates from oai_agents.common.curriculum import Curriculum from .common import load_agents, generate_name @@ -74,7 +74,7 @@ def get_N_X_SP_agents(args, if agents: return agents[0] - population = get_population( + population = get_categorized_population( args=args, ck_rate=args.pop_total_training_timesteps // args.num_of_ckpoints, total_training_timesteps=args.pop_total_training_timesteps, @@ -269,7 +269,7 @@ def get_FCP_agent_w_pop(args, train_types=fcp_train_types, has_curriculum = not fcp_curriculum.is_random) - population = get_population( + population = get_categorized_population( args=args, ck_rate=args.pop_total_training_timesteps // args.num_of_ckpoints, total_training_timesteps=args.pop_total_training_timesteps, From 7d55c9f1d8fdd8be76a4786f6f32891f6af8a5ad Mon Sep 17 00:00:00 2001 From: ttopiac Date: Mon, 11 Nov 2024 11:54:54 -0700 Subject: [PATCH 46/61] Rename save_population by save_categorized_population --- oai_agents/common/population.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index c7907f52..5da869f6 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -154,7 +154,7 @@ def generate_hdim_and_seed(num_of_required_agents): return selected_seeds, selected_hdims -def save_population(args, population): +def save_categorized_population(args, population): name_prefix = 'pop' for layout_name in args.layout_names: rt = RLAgentTrainer( @@ -229,6 +229,6 @@ def get_categorized_population(args, layout_pop = RLAgentTrainer.get_checkedpoints_agents(args, checkpoints_list, layout_name) population[layout_name].extend(layout_pop) - save_population(args=args, population=population) + save_categorized_population(args=args, population=population) return population \ No newline at end of file From 93ed176d493d188a88a42e9963af8d40a2e1a5e1 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Mon, 11 Nov 2024 12:52:10 -0700 Subject: [PATCH 47/61] Ensure the save the last model with last tag. --- oai_agents/agents/base_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index 817060fe..056a5caf 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -529,7 +529,7 @@ def list_agent_checked_tags(args, name: str=None, path: Union[Path, None] = None Parameters: - args: Experiment arguments containing base directory info and experiment directory info. - - name: The name of the agent for which tags should be listed. + - name: The name of the agent, for which tags should be listed. - path: Optional. If provided, it overrides the default path to the agents directory. Returns: From 9af6a21428ccacb4b15583da38a3f922fddbadd9 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Mon, 11 Nov 2024 14:54:33 -0700 Subject: [PATCH 48/61] use checked_model_name_handler.generate_checked_model --- oai_agents/agents/base_agent.py | 23 +---- oai_agents/agents/rl.py | 4 +- .../common/checked_model_name_handler.py | 86 +++++++++++++++++++ 3 files changed, 92 insertions(+), 21 deletions(-) create mode 100644 oai_agents/common/checked_model_name_handler.py diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index 056a5caf..0eab54d9 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -3,6 +3,7 @@ from oai_agents.common.state_encodings import ENCODING_SCHEMES from oai_agents.common.subtasks import calculate_completed_subtask, get_doable_subtasks, Subtasks from oai_agents.common.tags import AgentPerformance, TeamType, KeyCheckpoints +from oai_agents.common.checked_model_name_handler import CheckedModelNameHandler from oai_agents.gym_environments.base_overcooked_env import USEABLE_COUNTERS from overcooked_ai_py.mdp.overcooked_mdp import Action @@ -541,23 +542,5 @@ def list_agent_checked_tags(args, name: str=None, path: Union[Path, None] = None else: path = args.base_dir / 'agent_models' / name - # Ensure the directory exists - if not path.exists() or not path.is_dir(): - raise FileNotFoundError(f"Agent directory not found: {path}") - - # Define the prefix and the regular expression to match the pattern - prefix = KeyCheckpoints.CHECKED_MODEL_PREFIX - reward_substr = KeyCheckpoints.REWARD_SUBSTR - pattern = re.compile(f"^{re.escape(prefix)}(\\d+)(?:{re.escape(reward_substr)}[\\d.]+)?$") - - # List all subdirectories (tags) that match the pattern - tags = [] - for tag in path.iterdir(): - if tag.is_dir() and pattern.match(tag.name): - match = pattern.match(tag.name) - integer_part = int(match.group(1)) - # Only add tags that either have no reward substring for integer 0, or have it when integer > 0 - if integer_part == 0 or (integer_part > 0 and reward_substr in tag.name): - tags.append(tag.name) - - return tags + handler = CheckedModelNameHandler() + return handler.get_checked_model_tags(path=path) diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index da243010..6511d39c 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -4,6 +4,7 @@ from oai_agents.common.state_encodings import ENCODING_SCHEMES from oai_agents.common.tags import AgentPerformance, TeamType, TeammatesCollection, KeyCheckpoints from oai_agents.gym_environments.base_overcooked_env import OvercookedGymEnv +from oai_agents.common.checked_model_name_handler import CheckedModelNameHandler import numpy as np import random @@ -300,6 +301,7 @@ def train_agents(self, total_train_timesteps, tag_for_returning_agent, exp_name= self.log_details(experiment_name, total_train_timesteps) + ckname_handler = CheckedModelNameHandler() if self.checkpoint_rate is not None: if self.args.resume: path = self.args.base_dir / 'agent_models' / experiment_name @@ -348,7 +350,7 @@ def train_agents(self, total_train_timesteps, tag_for_returning_agent, exp_name= if self.checkpoint_rate: if self.learning_agent.num_timesteps // self.checkpoint_rate > (len(self.ck_list) - 1): - path, tag = self.save_agents(tag=f'{KeyCheckpoints.CHECKED_MODEL_PREFIX}{len(self.ck_list)}{KeyCheckpoints.REWARD_SUBSTR}{mean_reward}') + path, tag = self.save_agents(tag=ckname_handler.generate_checked_model_name(id=len(self.ck_list), mean_reward=mean_reward)) self.ck_list.append((rew_per_layout, path, tag)) if mean_reward >= self.best_score: diff --git a/oai_agents/common/checked_model_name_handler.py b/oai_agents/common/checked_model_name_handler.py new file mode 100644 index 00000000..7e13cabf --- /dev/null +++ b/oai_agents/common/checked_model_name_handler.py @@ -0,0 +1,86 @@ +from oai_agents.common.tags import KeyCheckpoints +import re +from typing import Optional + +from pathlib import Path +from typing import List, Union +from oai_agents.common.tags import KeyCheckpoints + +class CheckedModelNameHandler: + def __init__(self): + """ + Initializes the CheckedModelNameHandler with optional custom prefix and reward substring. + + :param prefix: Custom prefix for model names. + :param reward_substr: Custom reward substring for model names. + """ + self.prefix = KeyCheckpoints.CHECKED_MODEL_PREFIX + self.reward_substr = KeyCheckpoints.REWARD_SUBSTR + self.pattern = re.compile(f"^{re.escape(self.prefix)}(\\d+)(?:{re.escape(self.reward_substr)}[\\d.]+)?$") + + def generate_checked_model_name(self, id: int, mean_reward: Optional[float] = None) -> str: + """ + Generate a checked model name based on the given id and mean reward. + + :param id: The identifier for the model, used as a numeric suffix. + :param mean_reward: The mean reward to include in the model name, if applicable. + :return: A string representing the generated checked model name. + :raises ValueError: If id is negative or if mean_reward is not provided for ids greater than 0. + """ + # Validate id + if id < 0: + raise ValueError("id must be a non-negative integer.") + + # When id is 0, mean_reward can be None + if id == 0: + return f"{self.prefix}{id}" + + # For id > 0, mean_reward must be provided + if mean_reward is None: + raise ValueError("mean_reward must be provided for ids greater than 0.") + + # Return the model name including mean_reward + return f"{self.prefix}{id}{self.reward_substr}{mean_reward}" + + + + def is_valid_checked_model_name(self, model_name: str) -> bool: + """ + Check if a model name matches the required pattern for checked models. + + :param model_name: The model name to validate. + :return: True if the model name matches the pattern; otherwise, False. + """ + return bool(self.pattern.match(model_name)) + + def get_checked_model_tags(self, path: Union[Path, None] = None) -> List[str]: + """ + Retrieve all valid checked model tags (subdirectories) under the specified path that match the pattern. + + :param path: The directory path to search for valid checked model tags. Can be a Path object or None. + :return: A list of valid checked model tag names. + :raises ValueError: If the path is None. + :raises FileNotFoundError: If the specified path does not exist. + :raises NotADirectoryError: If the specified path is not a directory. + """ + if path is None: + raise ValueError("The path cannot be None.") + + # # Convert to Path if not already a Path object + path = Path(path) if not isinstance(path, Path) else path + + if not path.exists(): + raise FileNotFoundError(f"The specified path '{path}' does not exist.") + if not path.is_dir(): + raise NotADirectoryError(f"The specified path '{path}' is not a directory.") + + tags = [] + for tag in path.iterdir(): + if tag.is_dir() and self.pattern.match(tag.name): + match = self.pattern.match(tag.name) + integer_part = int(match.group(1)) + # Only add tags that either have no reward substring for integer 0, or have it when integer > 0 + if integer_part == 0 or (integer_part > 0 and self.reward_substr in tag.name): + tags.append(tag.name) + return tags + From 64756b903a5e70a3ace3394fe76a6da191970f72 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Mon, 11 Nov 2024 15:08:36 -0700 Subject: [PATCH 49/61] Clean checked_model_name_handler.py --- oai_agents/agents/base_agent.py | 2 +- oai_agents/agents/rl.py | 2 +- .../common/checked_model_name_handler.py | 48 +++++++------------ 3 files changed, 19 insertions(+), 33 deletions(-) diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index 0eab54d9..6c0a6cdf 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -543,4 +543,4 @@ def list_agent_checked_tags(args, name: str=None, path: Union[Path, None] = None path = args.base_dir / 'agent_models' / name handler = CheckedModelNameHandler() - return handler.get_checked_model_tags(path=path) + return handler.get_all_checked_tags(path=path) diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index 6511d39c..190a911c 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -350,7 +350,7 @@ def train_agents(self, total_train_timesteps, tag_for_returning_agent, exp_name= if self.checkpoint_rate: if self.learning_agent.num_timesteps // self.checkpoint_rate > (len(self.ck_list) - 1): - path, tag = self.save_agents(tag=ckname_handler.generate_checked_model_name(id=len(self.ck_list), mean_reward=mean_reward)) + path, tag = self.save_agents(tag=ckname_handler.generate_tag(id=len(self.ck_list), mean_reward=mean_reward)) self.ck_list.append((rew_per_layout, path, tag)) if mean_reward >= self.best_score: diff --git a/oai_agents/common/checked_model_name_handler.py b/oai_agents/common/checked_model_name_handler.py index 7e13cabf..65f93a4c 100644 --- a/oai_agents/common/checked_model_name_handler.py +++ b/oai_agents/common/checked_model_name_handler.py @@ -1,59 +1,47 @@ from oai_agents.common.tags import KeyCheckpoints import re -from typing import Optional - from pathlib import Path -from typing import List, Union -from oai_agents.common.tags import KeyCheckpoints +from typing import Optional, List, Union class CheckedModelNameHandler: def __init__(self): """ - Initializes the CheckedModelNameHandler with optional custom prefix and reward substring. - - :param prefix: Custom prefix for model names. - :param reward_substr: Custom reward substring for model names. + Initializes the CheckedModelNameHandler with default prefix and reward substring. """ self.prefix = KeyCheckpoints.CHECKED_MODEL_PREFIX self.reward_substr = KeyCheckpoints.REWARD_SUBSTR self.pattern = re.compile(f"^{re.escape(self.prefix)}(\\d+)(?:{re.escape(self.reward_substr)}[\\d.]+)?$") - def generate_checked_model_name(self, id: int, mean_reward: Optional[float] = None) -> str: + def generate_tag(self, id: int, mean_reward: Optional[float] = None) -> str: """ Generate a checked model name based on the given id and mean reward. :param id: The identifier for the model, used as a numeric suffix. - :param mean_reward: The mean reward to include in the model name, if applicable. + :param mean_reward: Optional mean reward to include in the model name, required for ids greater than 0. :return: A string representing the generated checked model name. :raises ValueError: If id is negative or if mean_reward is not provided for ids greater than 0. """ - # Validate id if id < 0: - raise ValueError("id must be a non-negative integer.") + raise ValueError("ID must be a non-negative integer.") - # When id is 0, mean_reward can be None if id == 0: return f"{self.prefix}{id}" - # For id > 0, mean_reward must be provided if mean_reward is None: - raise ValueError("mean_reward must be provided for ids greater than 0.") + raise ValueError("Mean reward must be provided for IDs greater than 0.") - # Return the model name including mean_reward return f"{self.prefix}{id}{self.reward_substr}{mean_reward}" - - - def is_valid_checked_model_name(self, model_name: str) -> bool: + def is_valid_checked_tag(self, tag: str) -> bool: """ - Check if a model name matches the required pattern for checked models. + Check if a tag name matches the required pattern for checked models. - :param model_name: The model name to validate. - :return: True if the model name matches the pattern; otherwise, False. + :param tag: The tag name to validate. + :return: True if the tag name matches the pattern; otherwise, False. """ - return bool(self.pattern.match(model_name)) + return bool(self.pattern.match(tag)) - def get_checked_model_tags(self, path: Union[Path, None] = None) -> List[str]: + def get_all_checked_tags(self, path: Union[Path, None] = None) -> List[str]: """ Retrieve all valid checked model tags (subdirectories) under the specified path that match the pattern. @@ -66,7 +54,6 @@ def get_checked_model_tags(self, path: Union[Path, None] = None) -> List[str]: if path is None: raise ValueError("The path cannot be None.") - # # Convert to Path if not already a Path object path = Path(path) if not isinstance(path, Path) else path if not path.exists(): @@ -75,12 +62,11 @@ def get_checked_model_tags(self, path: Union[Path, None] = None) -> List[str]: raise NotADirectoryError(f"The specified path '{path}' is not a directory.") tags = [] - for tag in path.iterdir(): - if tag.is_dir() and self.pattern.match(tag.name): - match = self.pattern.match(tag.name) + for tag_path in path.iterdir(): + if tag_path.is_dir() and self.pattern.match(tag_path.name): + match = self.pattern.match(tag_path.name) integer_part = int(match.group(1)) # Only add tags that either have no reward substring for integer 0, or have it when integer > 0 - if integer_part == 0 or (integer_part > 0 and self.reward_substr in tag.name): - tags.append(tag.name) + if integer_part == 0 or (integer_part > 0 and self.reward_substr in tag_path.name): + tags.append(tag_path.name) return tags - From c914ddf09f556e3f9eae2780b3b9ce6ee38bfaf5 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Thu, 14 Nov 2024 11:51:52 -0700 Subject: [PATCH 50/61] Add new layout and new agents info to evaluate_agents.py and eval_constants.py --- scripts/evaluate_agents.py | 197 ++++++++++++++++++++++++++------ scripts/utils/eval_constants.py | 104 ++++++++++++++--- 2 files changed, 251 insertions(+), 50 deletions(-) diff --git a/scripts/evaluate_agents.py b/scripts/evaluate_agents.py index 71a03c1a..004f4a99 100644 --- a/scripts/evaluate_agents.py +++ b/scripts/evaluate_agents.py @@ -1,7 +1,7 @@ import multiprocessing as mp import os from pathlib import Path -mp.set_start_method('spawn', force=True) +mp.set_start_method('spawn', force=True) import hashlib import sys @@ -27,6 +27,9 @@ THREE_PLAYERS_LOW_EVAL, THREE_PLAYERS_MEDIUM_EVAL, THREE_PLAYERS_HIGH_EVAL, + FOUR_PLAYERS_LOW_EVAL, + FOUR_PLAYERS_MEDIUM_EVAL, + FOUR_PLAYERS_HIGH_EVAL, FIVE_PLAYERS_LOW_EVAL, FIVE_PLAYERS_MEDIUM_FOR_ALL_BESIDES_STORAGE_ROOM_EVAL, FIVE_PLAYERS_HIGH_FOR_ALL_BESIDES_STORAGE_ROOM_EVAL, @@ -46,17 +49,27 @@ class Eval: } LAYOUT_NAMES_PATHs = { - 'selected_2_chefs_coordination_ring': { + 'selected_2_chefs_double_counter_circuit': { Eval.LOW: TWO_PLAYERS_LOW_EVAL, Eval.MEDIUM: TWO_PLAYERS_MEDIUM_EVAL, Eval.HIGH:TWO_PLAYERS_HIGH_EVAL }, - 'selected_2_chefs_counter_circuit': { + 'selected_2_chefs_secret_coordination_ring': { Eval.LOW: TWO_PLAYERS_LOW_EVAL, Eval.MEDIUM: TWO_PLAYERS_MEDIUM_EVAL, Eval.HIGH:TWO_PLAYERS_HIGH_EVAL }, - 'selected_2_chefs_cramped_room': { + 'selected_2_chefs_spacious_room_few_resources': { + Eval.LOW: TWO_PLAYERS_LOW_EVAL, + Eval.MEDIUM: TWO_PLAYERS_MEDIUM_EVAL, + Eval.HIGH:TWO_PLAYERS_HIGH_EVAL + }, + 'selected_2_chefs_spacious_room_no_counter_space': { + Eval.LOW: TWO_PLAYERS_LOW_EVAL, + Eval.MEDIUM: TWO_PLAYERS_MEDIUM_EVAL, + Eval.HIGH:TWO_PLAYERS_HIGH_EVAL + }, + 'selected_2_chefs_storage_room': { Eval.LOW: TWO_PLAYERS_LOW_EVAL, Eval.MEDIUM: TWO_PLAYERS_MEDIUM_EVAL, Eval.HIGH:TWO_PLAYERS_HIGH_EVAL @@ -78,6 +91,33 @@ class Eval: Eval.HIGH: THREE_PLAYERS_HIGH_EVAL, }, + 'selected_4_chefs_double_counter_circuit': { + Eval.LOW: FOUR_PLAYERS_LOW_EVAL, + Eval.MEDIUM: FOUR_PLAYERS_MEDIUM_EVAL, + Eval.HIGH: FOUR_PLAYERS_HIGH_EVAL + }, + 'selected_4_chefs_secret_coordination_ring': { + Eval.LOW: FOUR_PLAYERS_LOW_EVAL, + Eval.MEDIUM: FOUR_PLAYERS_MEDIUM_EVAL, + Eval.HIGH: FOUR_PLAYERS_HIGH_EVAL + }, + 'selected_4_chefs_spacious_room_few_resources': { + Eval.LOW: FOUR_PLAYERS_LOW_EVAL, + Eval.MEDIUM: FOUR_PLAYERS_MEDIUM_EVAL, + Eval.HIGH: FOUR_PLAYERS_HIGH_EVAL + }, + 'selected_4_chefs_spacious_room_no_counter_space': { + Eval.LOW: FOUR_PLAYERS_LOW_EVAL, + Eval.MEDIUM: FOUR_PLAYERS_MEDIUM_EVAL, + Eval.HIGH: FOUR_PLAYERS_HIGH_EVAL + }, + 'selected_4_chefs_storage_room': { + Eval.LOW: FOUR_PLAYERS_LOW_EVAL, + Eval.MEDIUM: FOUR_PLAYERS_MEDIUM_EVAL, + Eval.HIGH: FOUR_PLAYERS_HIGH_EVAL + }, + + 'selected_5_chefs_counter_circuit': { Eval.LOW: FIVE_PLAYERS_LOW_EVAL, Eval.MEDIUM: FIVE_PLAYERS_MEDIUM_FOR_ALL_BESIDES_STORAGE_ROOM_EVAL, @@ -218,7 +258,7 @@ def process_reward(reward): for idx, agent_name in enumerate(all_mean_rewards): mean_values = [v / num_teamsets for v in cross_exp_mean[agent_name]] std_values = [v / num_teamsets for v in cross_exp_std[agent_name]] - + x = x_values + idx * width - width * (num_agents - 1) / 2 ax.bar(x, mean_values, width, yerr=std_values, label=f"Agent: {agent_name}", capsize=5) @@ -345,7 +385,7 @@ def evaluate_agent_for_layout(agent_name, path, layout_names, p_idxes, args, det m.update(str(s).encode()) arg_hash = m.hexdigest() cached_eval = Path(f"eval_cache/eval_{arg_hash}.pkl") - + if cached_eval.is_file(): print(f"Loading cached evaluation for agent {agent_name}") with open(cached_eval, "rb") as f: @@ -405,37 +445,124 @@ def run_parallel_evaluation(args, all_agents_paths, layout_names, p_idxes, deter def get_2_player_input(args): args.num_players = 2 - layout_names = ['selected_2_chefs_coordination_ring', - 'selected_2_chefs_counter_circuit', - 'selected_2_chefs_cramped_room'] + layout_names = [ + # 'selected_2_chefs_coordination_ring', + # 'selected_2_chefs_counter_circuit', + # 'selected_2_chefs_cramped_room', + 'selected_2_chefs_double_counter_circuit', + 'selected_2_chefs_secret_coordination_ring', + 'selected_2_chefs_spacious_room_few_resources', + 'selected_2_chefs_spacious_room_no_counter_space', + 'selected_2_chefs_storage_room' + ] + p_idxes = [0, 1] - all_agents_paths = { - 'SP': 'agent_models/Result/2/SP_hd64_seed14/best', - 'FCP': 'agent_models/FCP_correct/2/FCP_s2020_h256_tr(AMX)_ran/best', + all_agents_paths = { + 'SP': 'agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/best', + 'LMH CUR': 'agent_models/Final/2/N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL]_cur_originaler/best', + 'LAST ALMH RAN REUSED 3A 60M': 'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV]_ran_originaler_attack2/best', + 'LAST ALMH CUR REUSED 3A 60M': 'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV]_cur_originaler_attack2/best', + # 'LAST ALMH-SP RAN REUSED 3A': 'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV_SP]_cur_originaler_attack2/best', + # 'LAST A-SP RAN REUSED 3A': 'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SP_SPADV]_ran_originaler_attack2/best', + 'LAST AMH CUR 3A':'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack2/best', + # 'LAST AMH CUR 2A':'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack1/best', + # 'LAST AMH CUR 1A':'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack0/best', + 'BEST AMH CUR 3A':'agent_models/Final/2-ego-play-with-best-adv/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack2/best', + # 'BEST AMH CUR 2A':'agent_models/Final/2-ego-play-with-best-adv/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack1/best', + # 'BEST AMH CUR 1A':'agent_models/Final/2-ego-play-with-best-adv/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack0/best', + 'H': 'agent_models/Final/2/N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH]_ran_originaler/best', + + } + + teammate_lvl_sets = [ + [Eval.LOW], + [Eval.MEDIUM], + [Eval.HIGH] + ] + + return layout_names, p_idxes, all_agents_paths, teammate_lvl_sets, args + + +def get_2_player_input(args): + args.num_players = 2 + layout_names = [ + # 'selected_2_chefs_coordination_ring', + # 'selected_2_chefs_counter_circuit', + # 'selected_2_chefs_cramped_room', + 'selected_2_chefs_double_counter_circuit', + 'selected_2_chefs_secret_coordination_ring', + 'selected_2_chefs_spacious_room_few_resources', + 'selected_2_chefs_spacious_room_no_counter_space', + 'selected_2_chefs_storage_room' + ] - 'ALMH CUR 3A': 'agent_models/ALMH_CUR/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV]_cur_originaler_attack2/best', - 'ALMH RAN 3A': 'agent_models/ALMH_RAN/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV]_ran_originaler_attack2/best', - 'AMH CUR 3A': 'agent_models/AMH_CUR/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack2/best', - 'AMH RAN 3A': 'agent_models/AMH_RAN/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_ran_originaler_attack2/best', + p_idxes = [0, 1] - 'ALMH CUR 2A': 'agent_models/ALMH_CUR/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV]_cur_originaler_attack1/best', - 'ALMH RAN 2A': 'agent_models/ALMH_RAN/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV]_ran_originaler_attack1/best', - 'AMH CUR 2A': 'agent_models/AMH_CUR/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack1/best', - 'AMH RAN 2A': 'agent_models/AMH_RAN/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_ran_originaler_attack1/best', + all_agents_paths = { + 'SP': 'agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/best', + 'LMH CUR': 'agent_models/Final/2/N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL]_cur_originaler/best', + 'LAST ALMH RAN REUSED 3A 60M': 'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV]_ran_originaler_attack2/best', + 'LAST ALMH CUR REUSED 3A 60M': 'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV]_cur_originaler_attack2/best', + # 'LAST ALMH-SP RAN REUSED 3A': 'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV_SP]_cur_originaler_attack2/best', + # 'LAST A-SP RAN REUSED 3A': 'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SP_SPADV]_ran_originaler_attack2/best', + 'LAST AMH CUR 3A':'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack2/best', + # 'LAST AMH CUR 2A':'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack1/best', + # 'LAST AMH CUR 1A':'agent_models/Final/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack0/best', + 'BEST AMH CUR 3A':'agent_models/Final/2-ego-play-with-best-adv/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack2/best', + # 'BEST AMH CUR 2A':'agent_models/Final/2-ego-play-with-best-adv/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack1/best', + # 'BEST AMH CUR 1A':'agent_models/Final/2-ego-play-with-best-adv/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack0/best', + 'H': 'agent_models/Final/2/N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH]_ran_originaler/best', - 'ALMH CUR 1A': 'agent_models/ALMH_CUR/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV]_cur_originaler_attack0/best', - 'ALMH RAN 1A': 'agent_models/ALMH_RAN/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV]_ran_originaler_attack0/best', - 'AMH CUR 1A': 'agent_models/AMH_CUR/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack0/best', - 'AMH RAN 1A': 'agent_models/AMH_RAN/2/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_ran_originaler_attack0/best' } + teammate_lvl_sets = [ [Eval.LOW], [Eval.MEDIUM], [Eval.HIGH] ] + return layout_names, p_idxes, all_agents_paths, teammate_lvl_sets, args +def get_4_player_input(args): + args.num_players = 4 + layout_names = [ + # 'selected_4_chefs_coordination_ring', + # 'selected_4_chefs_counter_circuit', + # 'selected_4_chefs_cramped_room', + 'selected_4_chefs_double_counter_circuit', + 'selected_4_chefs_secret_coordination_ring', + 'selected_4_chefs_spacious_room_few_resources', + 'selected_4_chefs_spacious_room_no_counter_space', + 'selected_4_chefs_storage_room' + ] + + p_idxes = [0, 1, 2, 3] + + all_agents_paths = { + 'SP': 'agent_models/Final/4/SP_s1010_h256_tr[SP]_ran/best', + 'LMH CUR': 'agent_models/Final/4/N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL]_cur_originaler/best', + # 'LAST ALMH RAN REUSED 3A 60M': 'agent_models/Final/4/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV]_ran_originaler_attack2/best', + # 'LAST ALMH CUR REUSED 3A 60M': 'agent_models/Final/4/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV]_cur_originaler_attack2/best', + # 'LAST ALMH-SP RAN REUSED 3A': 'agent_models/Final/4/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPL_SPL_SPL_SPL_SPADV_SP]_cur_originaler_attack2/best', + # 'LAST A-SP RAN REUSED 3A': 'agent_models/Final/4/PWADV-N-1-SP_s1010_h256_tr[SP_SPADV]_ran_originaler_attack2/best', + # 'LAST AMH CUR 3A':'agent_models/Final/4/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack2/best', + 'LAST AMH CUR 2A':'agent_models/Final/4/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack1/best', + 'LAST AMH CUR 1A':'agent_models/Final/4/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack0/best', + 'BEST AMH CUR 3A':'agent_models/Final/4-ego-play-with-best-adv/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack2/best', + # 'BEST AMH CUR 2A':'agent_models/Final/4-ego-play-with-best-adv/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack1/best', + # 'BEST AMH CUR 1A':'agent_models/Final/4-ego-play-with-best-adv/PWADV-N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH_SPM_SPM_SPM_SPM_SPADV]_cur_originaler_attack0/best', + # 'H': 'agent_models/Final/4/N-1-SP_s1010_h256_tr[SPH_SPH_SPH_SPH]_ran_originaler/best', + + } + + teammate_lvl_sets = [ + [Eval.LOW], + [Eval.MEDIUM], + [Eval.HIGH] + ] + + return layout_names, p_idxes, all_agents_paths, teammate_lvl_sets, args def get_3_player_input(args): args.num_players = 3 @@ -489,8 +616,8 @@ def get_5_player_input(args): if __name__ == "__main__": args = get_arguments() - # layout_names, p_idxes, all_agents_paths, teammate_lvl_sets, args = get_2_player_input(args) - layout_names, p_idxes, all_agents_paths, teammate_lvl_sets, args = get_3_player_input(args) + layout_names, p_idxes, all_agents_paths, teammate_lvl_sets, args = get_4_player_input(args) + # layout_names, p_idxes, all_agents_paths, teammate_lvl_sets, args = get_3_player_input(args) # layout_names, p_idxes, all_agents_paths, teammate_lvl_sets, args = get_5_player_input(args) deterministic = False # deterministic = True does not actually work :sweat_smile: @@ -529,13 +656,13 @@ def get_5_player_input(args): unseen_counts=unseen_counts, display_delivery=show_delivery_num, plot_name=plot_name) - - - plot_evaluation_results_line(all_mean_rewards=all_mean_rewards, - all_std_rewards=all_std_rewards, - layout_names=layout_names, - teammate_lvl_sets=teammate_lvl_sets, - num_players=args.num_players, - plot_name=plot_name) - + + + # plot_evaluation_results_line(all_mean_rewards=all_mean_rewards, + # all_std_rewards=all_std_rewards, + # layout_names=layout_names, + # teammate_lvl_sets=teammate_lvl_sets, + # num_players=args.num_players, + # plot_name=plot_name) + diff --git a/scripts/utils/eval_constants.py b/scripts/utils/eval_constants.py index a7e6dcfa..edda23b7 100644 --- a/scripts/utils/eval_constants.py +++ b/scripts/utils/eval_constants.py @@ -1,24 +1,98 @@ +# TWO_PLAYERS_LOW_EVAL = [ +# 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_0', +# 'agent_models/Result/Eval/2/SP_hd256_seed42/ck_1_rew_18.666666666666668', +# 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_1_rew_22.0', +# 'agent_models/Result/Eval/2/SP_hd256_seed7/ck_1_rew_38.0', +# 'agent_models/Result/Eval/2/SP_hd64_seed1995/ck_0', +# ] +# TWO_PLAYERS_MEDIUM_EVAL = [ +# 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_2_rew_108.66666666666667', +# 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_3_rew_170.66666666666666', +# 'agent_models/Result/Eval/2/SP_hd256_seed7/ck_2_rew_106.66666666666667', +# 'agent_models/Result/Eval/2/SP_hd256_seed7/ck_3_rew_192.66666666666666', +# 'agent_models/Result/Eval/2/SP_hd256_seed42/ck_2_rew_110.66666666666667', +# ] +# TWO_PLAYERS_HIGH_EVAL = [ +# 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_24_rew_298.0', +# 'agent_models/Result/Eval/2/SP_hd64_seed1995/ck_20_rew_286.6666666666667', +# 'agent_models/Result/Eval/2/SP_hd256_seed7/ck_24_rew_258.0', +# 'agent_models/Result/Eval/2/SP_hd256_seed42/ck_22_rew_262.0', +# 'agent_models/Result/Eval/2/SP_hd256_seed42/ck_13_rew_274.0', +# ] + TWO_PLAYERS_LOW_EVAL = [ - 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_0', - 'agent_models/Result/Eval/2/SP_hd256_seed42/ck_1_rew_18.666666666666668', - 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_1_rew_22.0', - 'agent_models/Result/Eval/2/SP_hd256_seed7/ck_1_rew_38.0', - 'agent_models/Result/Eval/2/SP_hd64_seed1995/ck_0', + "agent_models/Final/2/SP_hd64_seed0/ck_0", + "agent_models/Final/2/SP_hd64_seed14/ck_0", + "agent_models/Final/2/SP_hd256_seed13/ck_0", + "agent_models/Final/2/SP_hd256_seed68/ck_0", + "agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/ck_0", + "agent_models/Final/2/SP_hd64_seed14/ck_1_rew_18.4", + "agent_models/Final/2/SP_hd64_seed0/ck_1_rew_28.8", + "agent_models/Final/2/SP_hd256_seed13/ck_1_rew_30.8", + "agent_models/Final/2/SP_hd256_seed68/ck_1_rew_56.8" ] + TWO_PLAYERS_MEDIUM_EVAL = [ - 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_2_rew_108.66666666666667', - 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_3_rew_170.66666666666666', - 'agent_models/Result/Eval/2/SP_hd256_seed7/ck_2_rew_106.66666666666667', - 'agent_models/Result/Eval/2/SP_hd256_seed7/ck_3_rew_192.66666666666666', - 'agent_models/Result/Eval/2/SP_hd256_seed42/ck_2_rew_110.66666666666667', + "agent_models/Final/2/SP_hd64_seed14/ck_2_rew_88.4", + "agent_models/Final/2/SP_hd64_seed0/ck_2_rew_122.8", + "agent_models/Final/2/SP_hd256_seed13/ck_2_rew_128.8", + "agent_models/Final/2/SP_hd256_seed68/ck_2_rew_156.0", + "agent_models/Final/2/SP_hd64_seed14/ck_3_rew_152.8", + "agent_models/Final/2/SP_hd64_seed0/ck_3_rew_171.6" ] + TWO_PLAYERS_HIGH_EVAL = [ - 'agent_models/Result/Eval/2/SP_hd64_seed11/ck_24_rew_298.0', - 'agent_models/Result/Eval/2/SP_hd64_seed1995/ck_20_rew_286.6666666666667', - 'agent_models/Result/Eval/2/SP_hd256_seed7/ck_24_rew_258.0', - 'agent_models/Result/Eval/2/SP_hd256_seed42/ck_22_rew_262.0', - 'agent_models/Result/Eval/2/SP_hd256_seed42/ck_13_rew_274.0', + "agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/ck_10_rew_238.0", + "agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/ck_9_rew_232.8", + "agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/ck_8_rew_234.0", + "agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/ck_7_rew_230.8", + "agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/ck_6_rew_230.8", + "agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/ck_5_rew_230.8", + "agent_models/Final/2/SP_s1010_h256_tr[SP]_ran/ck_4_rew_224.8", + "agent_models/Final/2/SP_hd64_seed14/ck_10_rew_226.8", + "agent_models/Final/2/SP_hd64_seed14/ck_9_rew_224.0", + "agent_models/Final/2/SP_hd256_seed13/ck_5_rew_217.2", + "agent_models/Final/2/SP_hd64_seed0/ck_10_rew_221.6", + "agent_models/Final/2/SP_hd256_seed68/ck_10_rew_209.2", + "agent_models/Final/2/SP_hd64_seed14/ck_5_rew_212.0", + "agent_models/Final/2/SP_hd256_seed68/ck_9_rew_213.6" +] + +# Define the paths for four-player evaluation in three different lists +FOUR_PLAYERS_LOW_EVAL = [ + "agent_models/Final/4/SP_s1010_h256_tr[SP]_ran/ck_0", + "agent_models/Final/4/SP_hd64_seed0/ck_0", + "agent_models/Final/4/SP_hd64_seed14/ck_0", + "agent_models/Final/4/SP_hd256_seed13/ck_0", + "agent_models/Final/4/SP_hd256_seed68/ck_0", + "agent_models/Final/4/SP_hd64_seed0/ck_1_rew_54.2", + "agent_models/Final/4/SP_hd256_seed68/ck_1_rew_66.0", + "agent_models/Final/4/SP_hd256_seed13/ck_1_rew_79.0", + "agent_models/Final/4/SP_hd64_seed14/ck_1_rew_44.0", + "agent_models/Final/4/SP_hd256_seed68/ck_2_rew_142.0" +] + +FOUR_PLAYERS_MEDIUM_EVAL = [ + "agent_models/Final/4/SP_hd64_seed14/ck_2_rew_122.2", + "agent_models/Final/4/SP_hd256_seed13/ck_2_rew_197.2", + "agent_models/Final/4/SP_hd64_seed0/ck_3_rew_168.0", + "agent_models/Final/4/SP_hd256_seed68/ck_3_rew_214.0", + "agent_models/Final/4/SP_hd64_seed0/ck_4_rew_204.6", + "agent_models/Final/4/SP_hd64_seed14/ck_4_rew_243.6" +] + +FOUR_PLAYERS_HIGH_EVAL = [ + "agent_models/Final/4/SP_s1010_h256_tr[SP]_ran/ck_8_rew_308.0", + "agent_models/Final/4/SP_s1010_h256_tr[SP]_ran/ck_6_rew_309.6", + "agent_models/Final/4/SP_s1010_h256_tr[SP]_ran/ck_5_rew_299.7", + "agent_models/Final/4/SP_hd64_seed0/ck_10_rew_302.4", + "agent_models/Final/4/SP_hd64_seed14/ck_10_rew_295.6", + "agent_models/Final/4/SP_hd256_seed68/ck_9_rew_296.8", + "agent_models/Final/4/SP_hd256_seed68/ck_8_rew_296.2", + "agent_models/Final/4/SP_hd64_seed14/ck_9_rew_289.0", + "agent_models/Final/4/SP_hd256_seed13/ck_9_rew_299.2", + "agent_models/Final/4/SP_hd256_seed13/ck_10_rew_290.8" ] THREE_PLAYERS_LOW_EVAL = [ From ea8776855def29cba9214379c60d383acf017ea8 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Thu, 14 Nov 2024 14:03:11 -0700 Subject: [PATCH 51/61] Add tag_for_returning_agent input to a call on train_agents() --- scripts/generate_agents_for_eval.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/generate_agents_for_eval.py b/scripts/generate_agents_for_eval.py index 848f8157..3f69a708 100644 --- a/scripts/generate_agents_for_eval.py +++ b/scripts/generate_agents_for_eval.py @@ -3,7 +3,7 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.arguments import get_arguments -from oai_agents.common.tags import TeamType, TeammatesCollection +from oai_agents.common.tags import TeamType, TeammatesCollection, KeyCheckpoints from scripts.utils import get_fcp_population @@ -18,7 +18,7 @@ def train_FCP(args, name, teammates_collection, train_types, total_training_time train_types=train_types, seed=2602, ) - fcp_trainer.train_agents(total_train_timesteps=total_training_timesteps) + fcp_trainer.train_agents(total_train_timesteps=total_training_timesteps, tag_for_returning_agent=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL) def set_input(args, quick_test=False): @@ -26,8 +26,8 @@ def set_input(args, quick_test=False): args.teammates_len = 2 args.num_players = args.teammates_len + 1 # 3 players = 1 agent + 2 teammates args.exp_dir = f'eval/{args.teammates_len+1}_chefs' - - if not quick_test: + + if not quick_test: args.n_envs = 50 args.epoch_timesteps = 1e5 args.pop_total_training_timesteps = 5e6 @@ -76,7 +76,7 @@ def set_input(args, quick_test=False): ) teammates_collection[TeammatesCollection.EVAL] = teammates_collection[TeammatesCollection.TRAIN] - + # TODO: run this in parallel for fcp_train_types in all_FCP_train_types: vb = '_'.join(fcp_train_types) From b4e535e7706dada20a1f090b2b77c1913a33b04d Mon Sep 17 00:00:00 2001 From: ttopiac Date: Thu, 14 Nov 2024 16:27:22 -0700 Subject: [PATCH 52/61] Add get_model_path and move get_most_recent_ckeckpints to base_agent.py and also make sure anything related to it using them properly.. --- oai_agents/agents/base_agent.py | 63 +++++++++++++++++------ oai_agents/agents/il.py | 4 +- oai_agents/agents/rl.py | 15 +++--- oai_agents/common/population.py | 14 +---- oai_agents/common/teammates_collection.py | 2 +- 5 files changed, 58 insertions(+), 40 deletions(-) diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index 6c0a6cdf..d65d7069 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -17,7 +17,7 @@ import numpy as np import torch as th import torch.nn as nn -from typing import List, Tuple, Union +from typing import List, Tuple, Union, Optional import stable_baselines3.common.distributions as sb3_distributions from stable_baselines3.common.evaluation import evaluate_policy from stable_baselines3.common.vec_env.stacked_observations import StackedObservations @@ -466,11 +466,9 @@ def get_agents(self) -> List[OAIAgent]: def save_agents(self, path: Union[Path, None] = None, tag: Union[str, None] = None): ''' Saves each agent that the trainer is training ''' - if not path: - if self.args.exp_dir: - path = self.args.base_dir / 'agent_models' / self.args.exp_dir / self.name - else: - path = self.args.base_dir / 'agent_models'/ self.name + path = path or OAITrainer.get_model_path(base_dir=self.args.base_dir, + exp_folder=self.args.exp_dir, + model_name=self.name) tag = tag or self.args.exp_name save_path = path / tag / 'trainer_file' @@ -497,11 +495,9 @@ def save_agents(self, path: Union[Path, None] = None, tag: Union[str, None] = No @staticmethod def load_agents(args, tag, name: str=None, path: Union[Path, None] = None): ''' Loads each agent that the trainer is training ''' - if not path: - if args.exp_dir: - path = args.base_dir / 'agent_models' / args.exp_dir / name - else: - path = args.base_dir / 'agent_models'/ name + path = path or OAITrainer.get_model_path(base_dir=args.base_dir, + exp_folder=args.exp_dir, + model_name=name) tag = tag or args.exp_name load_path = path / tag / 'trainer_file' @@ -536,11 +532,46 @@ def list_agent_checked_tags(args, name: str=None, path: Union[Path, None] = None Returns: - A list of tags (directories) that match the specified pattern. ''' - if not path: - if args.exp_dir: - path = args.base_dir / 'agent_models' / args.exp_dir / name - else: - path = args.base_dir / 'agent_models' / name + path = path or OAITrainer.get_model_path(base_dir=args.base_dir, + exp_folder=args.exp_dir, + model_name=name) handler = CheckedModelNameHandler() return handler.get_all_checked_tags(path=path) + + @staticmethod + def get_most_recent_checkpoint(args, name: str) -> str: + path = OAITrainer.get_model_path(base_dir=args.base_dir, + exp_folder=args.exp_dir, + model_name=name) + + ckpts = [name for name in os.listdir(path) if name.startswith(KeyCheckpoints.CHECKED_MODEL_PREFIX)] + ckpts_nums = [int(c.split('_')[1]) for c in ckpts] + last_ckpt_num = max(ckpts_nums) + return [c for c in ckpts if c.startswith(f"{KeyCheckpoints.CHECKED_MODEL_PREFIX}{last_ckpt_num}")][0] + + @staticmethod + def get_model_path(base_dir: Union[str, Path], exp_folder: Optional[str], model_name: str) -> Path: + """ + Constructs a path for saving or loading an agent model. + + Parameters: + base_dir (str or Path): The base directory where models are stored. + exp_folder (str or None): The experiment folder name, or None if not applicable. + model_name (str): The name of the model. + + Returns: + Path: A Path object representing the constructed path. + """ + # Ensure base_dir is a Path object + base_dir = Path(base_dir) if isinstance(base_dir, str) else base_dir + + experiment_name = OAITrainer.get_experiment_name(exp_folder=exp_folder, model_name=model_name) + + path = base_dir / 'agent_models' /experiment_name + + return path + + @staticmethod + def get_experiment_name(self, exp_folder: Optional[str], model_name: str): + return f"{exp_folder}/{model_name}" if exp_folder else model_name \ No newline at end of file diff --git a/oai_agents/agents/il.py b/oai_agents/agents/il.py index 6c0e87e2..7cb40381 100644 --- a/oai_agents/agents/il.py +++ b/oai_agents/agents/il.py @@ -184,11 +184,11 @@ def run_epoch(self, agent_idx): self.agents[agent_idx].eval() return np.mean(losses) - def train_agents(self, epochs=100, exp_name=None): + def train_agents(self, epochs=100): """ Training routine """ if self.datasets is None: self.setup_datasets() - exp_name = exp_name or self.args.exp_name + exp_name = self.args.exp_name run = wandb.init(project="overcooked_ai", entity=self.args.wandb_ent, dir=str(self.args.base_dir / 'wandb'), reinit=True, name=exp_name + '_' + self.name, mode=self.args.wandb_mode) diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index e0e210cf..f768c2ba 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -14,6 +14,7 @@ from sb3_contrib import RecurrentPPO, MaskablePPO import wandb import os +from typing import Optional VEC_ENV_CLS = DummyVecEnv # @@ -263,9 +264,6 @@ def wrap_agent(self, sb3_agent, name): return SB3LSTMWrapper(sb3_agent, name, self.args) return SB3Wrapper(sb3_agent, name, self.args) - def get_experiment_name(self, exp_name): - return exp_name or str(self.args.exp_dir) + '/' + self.name - def should_evaluate(self, steps): mean_training_rew = np.mean([ep_info["r"] for ep_info in self.learning_agent.agent.ep_info_buffer]) @@ -294,18 +292,19 @@ def log_details(self, experiment_name, total_train_timesteps): print("Final sparse reward ratio: ", self.args.final_sparse_r_ratio) - def train_agents(self, total_train_timesteps, tag_for_returning_agent, exp_name=None, resume_ck_list=None): - experiment_name = self.get_experiment_name(exp_name) + def train_agents(self, total_train_timesteps, tag_for_returning_agent, resume_ck_list=None): + experiment_name = RLAgentTrainer.get_experiment_name(exp_folder=self.args.exp_dir, model_name=self.name) run = wandb.init(project="overcooked_ai", entity=self.args.wandb_ent, dir=str(self.args.base_dir / 'wandb'), reinit=True, name=experiment_name, mode=self.args.wandb_mode, resume="allow") self.log_details(experiment_name, total_train_timesteps) - ckname_handler = CheckedModelNameHandler() if self.checkpoint_rate is not None: if self.args.resume: - path = self.args.base_dir / 'agent_models' / experiment_name + path = RLAgentTrainer.get_model_path(base_dir=self.args_base_dir, + exp_folder=self.args.exp_dir, + model_name=self.name) ckpts = [name for name in os.listdir(path) if name.startswith("ck")] ckpts_nums = [int(c.split('_')[1]) for c in ckpts] @@ -351,7 +350,7 @@ def train_agents(self, total_train_timesteps, tag_for_returning_agent, exp_name= if self.checkpoint_rate: if self.learning_agent.num_timesteps // self.checkpoint_rate > (len(self.ck_list) - 1): - path, tag = self.save_agents(tag=ckname_handler.generate_tag(id=len(self.ck_list), mean_reward=mean_reward)) + path, tag = self.save_agents(tag=CheckedModelNameHandler.generate_tag(id=len(self.ck_list), mean_reward=mean_reward)) self.ck_list.append((rew_per_layout, path, tag)) if mean_reward >= self.best_score: diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index ec35ccb2..be09d47d 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -11,18 +11,6 @@ import random -def _get_most_recent_checkpoint(args, name: str) -> str: - if args.exp_dir: - path = args.base_dir / 'agent_models' / args.exp_dir / name - else: - path = args.base_dir / 'agent_models' / name - - - ckpts = [name for name in os.listdir(path) if name.startswith("ck")] - ckpts_nums = [int(c.split('_')[1]) for c in ckpts] - last_ckpt_num = max(ckpts_nums) - return [c for c in ckpts if c.startswith(f"ck_{last_ckpt_num}")][0] - def train_agent_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_dim, serialize, force_training): ''' Returns ckeckpoints_list @@ -35,7 +23,7 @@ def train_agent_with_checkpoints(args, total_training_timesteps, ck_rate, seed, start_timestep = 0 ck_rewards = None if args.resume: - last_ckpt = _get_most_recent_checkpoint(args, name) + last_ckpt = RLAgentTrainer.get_most_recent_checkpoint(args, name=name) agent_ckpt_info, env_info, training_info = RLAgentTrainer.load_agents(args, name=name, tag=last_ckpt) agent_ckpt = agent_ckpt_info[0] start_step = env_info["step_count"] diff --git a/oai_agents/common/teammates_collection.py b/oai_agents/common/teammates_collection.py index 6f53f403..6f69fb39 100644 --- a/oai_agents/common/teammates_collection.py +++ b/oai_agents/common/teammates_collection.py @@ -285,7 +285,7 @@ def update_eval_collection_with_eval_types_from_file(args, agent, unseen_teammat for teammates in eval_types: if teammates.team_type not in eval_collection[teammates.layout_name]: eval_collection[teammates.layout_name][teammates.team_type] = [] - tms_path = Path.cwd() / 'agent_models' / teammates.names[0] + tms_path = RLAgentTrainer.get_model_path(base_dir=Path.cwd(), model_name=teammates.names[0]) if teammates.load_from_pop_structure: layout_population, _, _ = RLAgentTrainer.load_agents(args, path=tms_path, tag=teammates.tags[0]) agents_perftag_score_all = [(agent, From ac0f68aca11c328714d4851fd466fb1bc7575daa Mon Sep 17 00:00:00 2001 From: ttopiac Date: Fri, 15 Nov 2024 09:07:53 -0700 Subject: [PATCH 53/61] Add feature to allow user to use generate_hdim_and_seed for not only training but also evaluation.. --- oai_agents/common/population.py | 69 +++++++++++++++--------- tests/test_oai_agents/test_population.py | 36 +++++++------ 2 files changed, 64 insertions(+), 41 deletions(-) diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index be09d47d..09ab4e84 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -11,7 +11,7 @@ import random -def train_agent_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_dim, serialize, force_training): +def train_agent_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_dim, serialize): ''' Returns ckeckpoints_list either serialized or not based on serialize flag @@ -52,7 +52,9 @@ def train_agent_with_checkpoints(args, total_training_timesteps, ck_rate, seed, For SP agents, they only are trained with themselves so the order doesn't matter. ''' - rlat.train_agents(total_train_timesteps=total_training_timesteps, tag_for_returning_agent=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL, resume_ck_list=ck_rewards) + rlat.train_agents(total_train_timesteps=total_training_timesteps, + tag_for_returning_agent=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL, + resume_ck_list=ck_rewards) checkpoints_list = rlat.ck_list if serialize: @@ -95,9 +97,9 @@ def ensure_we_will_have_enough_agents_in_population(teammates_len, f" num_SPs_to_train: {num_SPs_to_train}." -def generate_hdim_and_seed(num_of_required_agents): +def generate_hdim_and_seed(for_training: bool, num_of_required_agents: int): ''' - Generates lists of seeds and hidden dimensions for a given number of agents. + Generates lists of seeds and hidden dimensions for a given number of agents for training or evaluation. Each setting is a pair (hidden_dim, seed). If the number of required agents is less than or equal to the number of predefined settings, it selects from @@ -105,6 +107,7 @@ def generate_hdim_and_seed(num_of_required_agents): seeds and hidden dimensions to fill the remaining number of agents. Arguments: + for_training -- a boolean indicating whether to generate settings for training (True) or evaluation (False). num_of_required_agents -- the number of (hidden_dim, seed) pairs to generate. Returns: @@ -112,9 +115,25 @@ def generate_hdim_and_seed(num_of_required_agents): selected_hdims -- list of selected hidden dimensions ''' - # Predefined seeds and hidden dimensions - seeds = [1010, 2020, 2602, 13, 68, 2907, 105, 128] - hdims = [256] * len(seeds) + # Predefined seeds and hidden dimensions for training + training_seeds = [1010, 2020, 2602, 13, 68, 2907, 105, 128] + training_hdims = [256] * len(training_seeds) + + # Predefined seeds and hidden dimensions for evaluation + evaluation_seeds = [3031, 4041, 5051, 3708, 3809, 3910, 4607, 5506] + evaluation_hdims = [256] * len(evaluation_seeds) + + # Select appropriate predefined settings based on the input setting + if for_training: + seeds = training_seeds + hdims = training_hdims + min_seed = 0 + max_seed = 2999 + else: + seeds = evaluation_seeds + hdims = evaluation_hdims + min_seed, max_seed = 3000, 5999 + # Initialize selected lists selected_seeds = [] @@ -132,9 +151,9 @@ def generate_hdim_and_seed(num_of_required_agents): # Generate additional random settings if more agents are needed remaining = num_of_required_agents - len(seeds) - available_seeds = set(range(0, 5000)) - set(selected_seeds) + available_seeds = list(set(range(min_seed, max_seed)) - set(selected_seeds)) random_seeds = random.sample(available_seeds, remaining) # Generate random seeds - random_hdims = random.choices([256, 512], k=remaining) # Generate random hidden dimensions + random_hdims = [256] * remaining # Generate random hidden dimensions # Append randomly generated settings to selected lists selected_seeds += random_seeds @@ -161,16 +180,16 @@ def save_categorized_population(args, population): rt.save_agents(tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL) -def get_categorized_population(args, - ck_rate, - total_training_timesteps, - train_types, - eval_types, - num_SPs_to_train, - unseen_teammates_len=0, - force_training=False, - tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL, - ): +def get_categorized_population( args, + ck_rate, + total_training_timesteps, + train_types, + eval_types, + num_SPs_to_train, + unseen_teammates_len=0, + force_training=False, + tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL, + ): population = {layout_name: [] for layout_name in args.layout_names} @@ -190,7 +209,7 @@ def get_categorized_population(args, eval_types=eval_types, num_SPs_to_train=num_SPs_to_train) - seed, h_dim = generate_hdim_and_seed(num_SPs_to_train) + seed, h_dim = generate_hdim_and_seed(for_training=True, num_of_required_agents=num_SPs_to_train) inputs = [ (args, total_training_timesteps, ck_rate, seed[i], h_dim[i], True) for i in range(num_SPs_to_train) ] @@ -208,11 +227,11 @@ def get_categorized_population(args, else: for inp in inputs: checkpoints_list = train_agent_with_checkpoints(args=inp[0], - total_training_timesteps = inp[1], - ck_rate=inp[2], - seed=inp[3], - h_dim=inp[4], - serialize=False) + total_training_timesteps = inp[1], + ck_rate=inp[2], + seed=inp[3], + h_dim=inp[4], + serialize=False) for layout_name in args.layout_names: layout_pop = RLAgentTrainer.get_checkedpoints_agents(args, checkpoints_list, layout_name) population[layout_name].extend(layout_pop) diff --git a/tests/test_oai_agents/test_population.py b/tests/test_oai_agents/test_population.py index 9b3d0004..df3bc219 100644 --- a/tests/test_oai_agents/test_population.py +++ b/tests/test_oai_agents/test_population.py @@ -7,33 +7,37 @@ def test_generate_hdim_and_seed(): Test function for generate_hdim_and_seed to ensure: 1. The number of (hidden_dim, seed) pairs matches the number of required agents. 2. All generated seeds are unique. - 3. Hidden dimensions are as expected (either 64 or 256). + 3. Hidden dimensions are as expected (256). ''' # Test cases test_cases = [3, 5, 8, 10] # Testing for fewer than, equal to, and more than predefined settings - for num_agents in test_cases: - print(f"\nTesting with {num_agents} agents:") + for for_training in [True, False]: + setting_type = "training" if for_training else "evaluation" + print(f"\nTesting for {setting_type} settings:") - # Generate (hidden_dim, seed) pairs - selected_seeds, selected_hdims = generate_hdim_and_seed(num_agents) + for num_agents in test_cases: + print(f"\nTesting with {num_agents} agents:") - # Check that the correct number of agents is generated - assert len(selected_seeds) == num_agents, f"Expected {num_agents} seeds, got {len(selected_seeds)}" - assert len(selected_hdims) == num_agents, f"Expected {num_agents} hidden dims, got {len(selected_hdims)}" + # Generate (hidden_dim, seed) pairs + selected_seeds, selected_hdims = generate_hdim_and_seed(for_training=for_training, num_of_required_agents=num_agents) - # Check that all seeds are unique - assert len(set(selected_seeds)) == num_agents, "Duplicate seeds found in the generated seeds." + # Check that the correct number of agents is generated + assert len(selected_seeds) == num_agents, f"Expected {num_agents} seeds, got {len(selected_seeds)}" + assert len(selected_hdims) == num_agents, f"Expected {num_agents} hidden dims, got {len(selected_hdims)}" - # Check that hidden dims are from the valid set (64, 256) - assert all(hdim in [256, 512] for hdim in selected_hdims), "Invalid hidden dimension found. Only 64 and 256 are allowed." + # Check that all seeds are unique + assert len(set(selected_seeds)) == num_agents, "Duplicate seeds found in the generated seeds." - print(f"Test passed for {num_agents} agents.") - print("Selected seeds:", selected_seeds) - print("Selected hidden dimensions:", selected_hdims) + # Check that hidden dims are from the valid set (256) + assert all(hdim == 256 for hdim in selected_hdims), "Invalid hidden dimension found. Only 256 is allowed." + + print(f"Test passed for {num_agents} agents.") + print("Selected seeds:", selected_seeds) + print("Selected hidden dimensions:", selected_hdims) # Ensure that this test script only runs when executed directly if __name__ == "__main__": - print("Running tests in population.py...") + print("Running tests for generate_hdim_and_seed...") test_generate_hdim_and_seed() From c824b64ff62188950f7f2b694332e87e173a1035 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Fri, 15 Nov 2024 10:26:59 -0700 Subject: [PATCH 54/61] Rename methods for generating SP agent populations to distinguish them from methods for generating other agent populations --- oai_agents/common/population.py | 16 ++++++++-------- scripts/utils/train_helper.py | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 09ab4e84..e1d5081a 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -11,7 +11,7 @@ import random -def train_agent_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_dim, serialize): +def train_SP_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_dim, serialize): ''' Returns ckeckpoints_list either serialized or not based on serialize flag @@ -62,7 +62,7 @@ def train_agent_with_checkpoints(args, total_training_timesteps, ck_rate, seed, return checkpoints_list -def ensure_we_will_have_enough_agents_in_population(teammates_len, +def ensure_enough_SP_agents(teammates_len, train_types, eval_types, num_SPs_to_train, @@ -161,7 +161,7 @@ def generate_hdim_and_seed(for_training: bool, num_of_required_agents: int): return selected_seeds, selected_hdims -def save_categorized_population(args, population): +def save_categorized_SP_population(args, population): name_prefix = 'pop' for layout_name in args.layout_names: rt = RLAgentTrainer( @@ -180,7 +180,7 @@ def save_categorized_population(args, population): rt.save_agents(tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL) -def get_categorized_population( args, +def get_categorized_SP_population(args, ck_rate, total_training_timesteps, train_types, @@ -203,7 +203,7 @@ def get_categorized_population( args, except FileNotFoundError as e: print(f'Could not find saved population, creating them from scratch...\nFull Error: {e}') - ensure_we_will_have_enough_agents_in_population(teammates_len=args.teammates_len, + ensure_enough_SP_agents(teammates_len=args.teammates_len, unseen_teammates_len=unseen_teammates_len, train_types=train_types, eval_types=eval_types, @@ -218,7 +218,7 @@ def get_categorized_population( args, if args.parallel: with concurrent.futures.ProcessPoolExecutor(max_workers=args.max_concurrent_jobs) as executor: arg_lists = list(zip(*inputs)) - dilled_results = list(executor.map(train_agent_with_checkpoints, *arg_lists)) + dilled_results = list(executor.map(train_SP_with_checkpoints, *arg_lists)) for dilled_res in dilled_results: checkpoints_list = dill.loads(dilled_res) for layout_name in args.layout_names: @@ -226,7 +226,7 @@ def get_categorized_population( args, population[layout_name].extend(layout_pop) else: for inp in inputs: - checkpoints_list = train_agent_with_checkpoints(args=inp[0], + checkpoints_list = train_SP_with_checkpoints(args=inp[0], total_training_timesteps = inp[1], ck_rate=inp[2], seed=inp[3], @@ -236,6 +236,6 @@ def get_categorized_population( args, layout_pop = RLAgentTrainer.get_checkedpoints_agents(args, checkpoints_list, layout_name) population[layout_name].extend(layout_pop) - save_categorized_population(args=args, population=population) + save_categorized_SP_population(args=args, population=population) return population \ No newline at end of file diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py index 556de25d..a3cba198 100644 --- a/scripts/utils/train_helper.py +++ b/scripts/utils/train_helper.py @@ -1,6 +1,6 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.tags import TeamType -from oai_agents.common.population import get_categorized_population, generate_hdim_and_seed +from oai_agents.common.population import get_categorized_SP_population, generate_hdim_and_seed from oai_agents.common.teammates_collection import generate_TC, get_best_SP_agent, generate_TC_for_ADV_agent, update_TC_w_ADV_teammates from oai_agents.common.curriculum import Curriculum from .common import load_agents, generate_name @@ -74,7 +74,7 @@ def get_N_X_SP_agents(args, if agents: return agents[0] - population = get_categorized_population( + population = get_categorized_SP_population( args=args, ck_rate=args.pop_total_training_timesteps // args.num_of_ckpoints, total_training_timesteps=args.pop_total_training_timesteps, @@ -269,7 +269,7 @@ def get_FCP_agent_w_pop(args, train_types=fcp_train_types, has_curriculum = not fcp_curriculum.is_random) - population = get_categorized_population( + population = get_categorized_SP_population( args=args, ck_rate=args.pop_total_training_timesteps // args.num_of_ckpoints, total_training_timesteps=args.pop_total_training_timesteps, From 6d291e6f40e6d3a58dbb86d5ccd6b38ab06f0850 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Fri, 15 Nov 2024 16:12:02 -0700 Subject: [PATCH 55/61] Update resume: train agent if no checkpoint exists --- oai_agents/agents/base_agent.py | 7 ++++++- oai_agents/common/population.py | 13 +++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index d65d7069..d59e0452 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -544,8 +544,13 @@ def get_most_recent_checkpoint(args, name: str) -> str: path = OAITrainer.get_model_path(base_dir=args.base_dir, exp_folder=args.exp_dir, model_name=name) - + if not path.exists(): + print(f"Warning: The directory {path} does not exist.") + return None ckpts = [name for name in os.listdir(path) if name.startswith(KeyCheckpoints.CHECKED_MODEL_PREFIX)] + if not ckpts: + print(f"Warning: No checkpoints found in {path} with prefix '{KeyCheckpoints.CHECKED_MODEL_PREFIX}'.") + return None ckpts_nums = [int(c.split('_')[1]) for c in ckpts] last_ckpt_num = max(ckpts_nums) return [c for c in ckpts if c.startswith(f"{KeyCheckpoints.CHECKED_MODEL_PREFIX}{last_ckpt_num}")][0] diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index e1d5081a..8b08ab01 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -24,12 +24,13 @@ def train_SP_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_d ck_rewards = None if args.resume: last_ckpt = RLAgentTrainer.get_most_recent_checkpoint(args, name=name) - agent_ckpt_info, env_info, training_info = RLAgentTrainer.load_agents(args, name=name, tag=last_ckpt) - agent_ckpt = agent_ckpt_info[0] - start_step = env_info["step_count"] - start_timestep = env_info["timestep_count"] - ck_rewards = training_info["ck_list"] - print(f"Restarting training from step: {start_step} (timestep: {start_timestep})") + if last_ckpt: + agent_ckpt_info, env_info, training_info = RLAgentTrainer.load_agents(args, name=name, tag=last_ckpt) + agent_ckpt = agent_ckpt_info[0] + start_step = env_info["step_count"] + start_timestep = env_info["timestep_count"] + ck_rewards = training_info["ck_list"] + print(f"Restarting training from step: {start_step} (timestep: {start_timestep})") rlat = RLAgentTrainer( From 54750c0f88a6aa48d5ed50987ec30aa3f1e3c5aa Mon Sep 17 00:00:00 2001 From: ttopiac Date: Sat, 16 Nov 2024 11:26:08 -0700 Subject: [PATCH 56/61] Remove a bug in ensure_enough_SP_agents --- oai_agents/common/population.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 8b08ab01..58e235e6 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -64,11 +64,11 @@ def train_SP_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_d def ensure_enough_SP_agents(teammates_len, - train_types, - eval_types, - num_SPs_to_train, - unseen_teammates_len=0, # only used for SPX teamtypes - ): + train_types, + eval_types, + num_SPs_to_train, + unseen_teammates_len=0, # only used for SPX teamtypes + ): total_population_len = len(AgentPerformance.ALL) * num_SPs_to_train @@ -85,8 +85,8 @@ def ensure_enough_SP_agents(teammates_len, for eval_type in eval_types: if eval_type in TeamType.ALL_TYPES_BESIDES_SP: eval_agents_len += teammates_len - elif train_type == TeamType.SELF_PLAY or train_type == TeamType.SELF_PLAY_ADVERSARY: - train_agents_len += 0 + elif eval_type == TeamType.SELF_PLAY or eval_type == TeamType.SELF_PLAY_ADVERSARY: + eval_agents_len += 0 else: eval_agents_len += unseen_teammates_len @@ -205,10 +205,10 @@ def get_categorized_SP_population(args, print(f'Could not find saved population, creating them from scratch...\nFull Error: {e}') ensure_enough_SP_agents(teammates_len=args.teammates_len, - unseen_teammates_len=unseen_teammates_len, - train_types=train_types, - eval_types=eval_types, - num_SPs_to_train=num_SPs_to_train) + unseen_teammates_len=unseen_teammates_len, + train_types=train_types, + eval_types=eval_types, + num_SPs_to_train=num_SPs_to_train) seed, h_dim = generate_hdim_and_seed(for_training=True, num_of_required_agents=num_SPs_to_train) inputs = [ From bd4a6fa45d781622ffddca06ef68d566814f6435 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Sun, 17 Nov 2024 17:10:53 -0700 Subject: [PATCH 57/61] Increase readability for functions with long arugments. --- oai_agents/common/multi_setup_trainer.py | 0 scripts/train_agents.py | 208 +++++---- scripts/utils/train_helper.py | 519 ++++++++++++++--------- 3 files changed, 416 insertions(+), 311 deletions(-) create mode 100644 oai_agents/common/multi_setup_trainer.py diff --git a/oai_agents/common/multi_setup_trainer.py b/oai_agents/common/multi_setup_trainer.py new file mode 100644 index 00000000..e69de29b diff --git a/scripts/train_agents.py b/scripts/train_agents.py index 413da171..4a306b55 100644 --- a/scripts/train_agents.py +++ b/scripts/train_agents.py @@ -6,11 +6,12 @@ from oai_agents.common.learner import LearnerType from oai_agents.common.curriculum import Curriculum -from scripts.utils import (get_SP_agent, - get_FCP_agent_w_pop, - get_N_X_FCP_agents, - get_N_X_SP_agents, - ) +from scripts.utils import ( + get_SP_agent, + get_FCP_agent_w_pop, + get_N_X_FCP_agents, + get_N_X_SP_agents, +) def SP(args): primary_train_types = [TeamType.SELF_PLAY] @@ -20,11 +21,12 @@ def SP(args): } curriculum = Curriculum(train_types=primary_train_types, is_random=True) - get_SP_agent(args=args, - train_types=curriculum.train_types, - eval_types=primary_eval_types, - curriculum=curriculum - ) + get_SP_agent( + args=args, + train_types=curriculum.train_types, + eval_types=primary_eval_types, + curriculum=curriculum + ) def SPN_1ADV(args) -> None: @@ -41,8 +43,14 @@ def SPN_1ADV(args) -> None: adversary_play_config = AdversaryPlayConfig.MAP primary_train_types = [TeamType.SELF_PLAY, TeamType.SELF_PLAY_ADVERSARY] - primary_eval_types = {'generate': [TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_LOW, TeamType.SELF_PLAY_ADVERSARY], - 'load': []} + primary_eval_types = { + 'generate': [ + TeamType.SELF_PLAY_HIGH, + TeamType.SELF_PLAY_LOW, + TeamType.SELF_PLAY_ADVERSARY + ], + 'load': [] + } curriculum = Curriculum(train_types = primary_train_types, is_random = True) @@ -77,21 +85,29 @@ def SPN_1ADV_XSPCKP(args) -> None: adversary_play_config = AdversaryPlayConfig.MAP primary_train_types = [TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_MEDIUM, TeamType.SELF_PLAY_ADVERSARY] - primary_eval_types = {'generate': [TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_LOW, TeamType.SELF_PLAY_ADVERSARY], - 'load': []} + primary_eval_types = { + 'generate': [ + TeamType.SELF_PLAY_HIGH, + TeamType.SELF_PLAY_LOW, + TeamType.SELF_PLAY_ADVERSARY + ], + 'load': [] + } - curriculum = Curriculum(train_types = primary_train_types, - is_random = False, - total_steps = args.n_x_sp_total_training_timesteps//args.epoch_timesteps, - training_phases_durations_in_order={ - (TeamType.SELF_PLAY_ADVERSARY): 0.5, - }, - rest_of_the_training_probabilities={ - TeamType.SELF_PLAY_MEDIUM: 0.3, - TeamType.SELF_PLAY_HIGH: 0.3, - TeamType.SELF_PLAY_ADVERSARY: 0.4, - }, - probabilities_decay_over_time=0) + curriculum = Curriculum( + train_types = primary_train_types, + is_random = False, + total_steps = args.n_x_sp_total_training_timesteps//args.epoch_timesteps, + training_phases_durations_in_order={ + (TeamType.SELF_PLAY_ADVERSARY): 0.5, + }, + rest_of_the_training_probabilities={ + TeamType.SELF_PLAY_MEDIUM: 0.3, + TeamType.SELF_PLAY_HIGH: 0.3, + TeamType.SELF_PLAY_ADVERSARY: 0.4, + }, + probabilities_decay_over_time=0 + ) get_N_X_SP_agents( args, n_x_sp_train_types=curriculum.train_types, @@ -126,9 +142,9 @@ def SPN_XSPCKP(args) -> None: unseen_teammates_len = 1 primary_train_types = [TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_MEDIUM, TeamType.SELF_PLAY_LOW] primary_eval_types = { - 'generate': [TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_LOW], - 'load': [] - } + 'generate': [TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_LOW], + 'load': [] + } curriculum = Curriculum(train_types = primary_train_types, is_random=False, @@ -154,46 +170,6 @@ def SPN_XSPCKP(args) -> None: unseen_teammates_len=unseen_teammates_len, ) -def SPN_XSPCKP_HP_TYPE(args) -> None: - ''' - In N-agents games, a randomly initialized agent will be trained with N-X copies of itself - and X homogeneous unseen teammates, which are checkpoints saved during a previous self-play process. - These saved checkpoints are cateogorized into High, Medium, Low performance. - e.g. - when N is 4 and X is 1, the team can be composed by [SP, SP, SP, H], [SP, SP, SP, M], [SP, SP, SP, L] in a 4-chef layout. - when N is 4 and X is 2, the team can be composed [SP, SP, H, H], [SP, SP, M, M], [SP, SP, L, L] in a 4-chef layout. - - - Please note that - - X is the number of unseen teammate. - - X is assigned by the variable, unseen_teammates_len, in the funciton. - - - :param pop_force_training: Boolean that, if true, indicates population should be generated, otherwise load it from file - :param primary_force_training: Boolean that, if true, indicates the SP agent teammates_collection should be trained instead of loaded from file. - ''' - unseen_teammates_len = 1 - primary_train_types = [TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_HIGH, - # TeamType.SELF_PLAY_MEDIUM, TeamType.SELF_PLAY_MEDIUM, TeamType.SELF_PLAY_MEDIUM, TeamType.SELF_PLAY_MEDIUM, - # TeamType.SELF_PLAY_LOW, TeamType.SELF_PLAY_LOW, TeamType.SELF_PLAY_LOW, TeamType.SELF_PLAY_LOW, - ] - primary_eval_types = { - 'generate': [TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_MEDIUM, TeamType.SELF_PLAY_LOW], - 'load': [] - } - - curriculum = Curriculum(train_types = primary_train_types, - is_random=True, - ) - - get_N_X_SP_agents( - args, - n_x_sp_train_types = curriculum.train_types, - n_x_sp_eval_types=primary_eval_types, - curriculum=curriculum, - unseen_teammates_len=unseen_teammates_len, - ) - def FCP_mhri(args): ''' @@ -206,26 +182,29 @@ def FCP_mhri(args): primary_eval_types = {'generate' : [TeamType.HIGH_FIRST], 'load': []} - fcp_curriculum = Curriculum(train_types = primary_train_types, - is_random=False, - total_steps = args.fcp_total_training_timesteps//args.epoch_timesteps, - training_phases_durations_in_order={ - (TeamType.LOW_FIRST): 0.5, - (TeamType.MEDIUM_FIRST): 0.125, - (TeamType.HIGH_FIRST): 0.125, - }, - rest_of_the_training_probabilities={ - TeamType.LOW_FIRST: 0.4, - TeamType.MEDIUM_FIRST: 0.3, - TeamType.HIGH_FIRST: 0.3, - }, - probabilities_decay_over_time=0 - ) + fcp_curriculum = Curriculum( + train_types = primary_train_types, + is_random=False, + total_steps = args.fcp_total_training_timesteps//args.epoch_timesteps, + training_phases_durations_in_order={ + (TeamType.LOW_FIRST): 0.5, + (TeamType.MEDIUM_FIRST): 0.125, + (TeamType.HIGH_FIRST): 0.125, + }, + rest_of_the_training_probabilities={ + TeamType.LOW_FIRST: 0.4, + TeamType.MEDIUM_FIRST: 0.3, + TeamType.HIGH_FIRST: 0.3, + }, + probabilities_decay_over_time=0 + ) - _, _ = get_FCP_agent_w_pop(args, - fcp_train_types = fcp_curriculum.train_types, - fcp_eval_types=primary_eval_types, - fcp_curriculum=fcp_curriculum) + _, _ = get_FCP_agent_w_pop( + args, + fcp_train_types = fcp_curriculum.train_types, + fcp_eval_types=primary_eval_types, + fcp_curriculum=fcp_curriculum + ) @@ -236,15 +215,18 @@ def FCP_traditional(args): ''' primary_train_types = [TeamType.ALL_MIX] - primary_eval_types = {'generate' : [TeamType.HIGH_FIRST, TeamType.LOW_FIRST], - 'load': []} + primary_eval_types = { + 'generate' : [TeamType.HIGH_FIRST, TeamType.LOW_FIRST], + 'load': [] + } fcp_curriculum = Curriculum(train_types=primary_train_types, is_random=True) - _, _ = get_FCP_agent_w_pop(args, - fcp_train_types=fcp_curriculum.train_types, - fcp_eval_types=primary_eval_types, - fcp_curriculum=fcp_curriculum, - ) + _, _ = get_FCP_agent_w_pop( + args, + fcp_train_types=fcp_curriculum.train_types, + fcp_eval_types=primary_eval_types, + fcp_curriculum=fcp_curriculum, + ) def N_1_FCP(args): @@ -254,19 +236,31 @@ def N_1_FCP(args): fcp_eval_types = {'generate' : [], 'load': []} fcp_curriculum = Curriculum(train_types=fcp_train_types, is_random=True) - primary_train_types = [TeamType.SELF_PLAY_LOW, TeamType.SELF_PLAY_MEDIUM, TeamType.SELF_PLAY_HIGH] - primary_eval_types = {'generate': [TeamType.SELF_PLAY_LOW, TeamType.SELF_PLAY_MEDIUM, TeamType.SELF_PLAY_HIGH], - 'load': []} + primary_train_types = [ + TeamType.SELF_PLAY_LOW, + TeamType.SELF_PLAY_MEDIUM, + TeamType.SELF_PLAY_HIGH + ] + primary_eval_types = { + 'generate': [ + TeamType.SELF_PLAY_LOW, + TeamType.SELF_PLAY_MEDIUM, + TeamType.SELF_PLAY_HIGH + ], + 'load': [] + } n_1_fcp_curriculum = Curriculum(train_types=primary_train_types, is_random=True) - get_N_X_FCP_agents(args=args, - fcp_train_types=fcp_curriculum.train_types, - fcp_eval_types=fcp_eval_types, - n_1_fcp_train_types=n_1_fcp_curriculum.train_types, - n_1_fcp_eval_types=primary_eval_types, - fcp_curriculum=fcp_curriculum, - n_1_fcp_curriculum=n_1_fcp_curriculum, - unseen_teammates_len=unseen_teammates_len) + get_N_X_FCP_agents( + args=args, + fcp_train_types=fcp_curriculum.train_types, + fcp_eval_types=fcp_eval_types, + n_1_fcp_train_types=n_1_fcp_curriculum.train_types, + n_1_fcp_eval_types=primary_eval_types, + fcp_curriculum=fcp_curriculum, + n_1_fcp_curriculum=n_1_fcp_curriculum, + unseen_teammates_len=unseen_teammates_len + ) def set_input(args): @@ -386,8 +380,6 @@ def set_input(args): set_input(args=args) - SPN_XSPCKP_HP_TYPE(args=args) - # SPN_1ADV_XSPCKP(args=args) #SP(args) diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py index a3cba198..290cbcdf 100644 --- a/scripts/utils/train_helper.py +++ b/scripts/utils/train_helper.py @@ -8,7 +8,13 @@ from oai_agents.common.tags import KeyCheckpoints -def get_SP_agent(args, train_types, eval_types, curriculum, tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL): +def get_SP_agent( + args, + train_types, + eval_types, + curriculum, + tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL + ): name = generate_name(args, prefix=Prefix.SELF_PLAY, seed=args.SP_seed, @@ -34,25 +40,34 @@ def get_SP_agent(args, train_types, eval_types, curriculum, tag=KeyCheckpoints.M checkpoint_rate=args.pop_total_training_timesteps // args.num_of_ckpoints, ) - selfplay_trainer.train_agents(total_train_timesteps=args.pop_total_training_timesteps, tag_for_returning_agent=tag) + selfplay_trainer.train_agents( + total_train_timesteps=args.pop_total_training_timesteps, + tag_for_returning_agent=tag + ) return selfplay_trainer.get_agents()[0] -def get_N_X_SP_agents(args, - unseen_teammates_len:int, - n_x_sp_train_types:list, - n_x_sp_eval_types:dict, - curriculum:Curriculum, - tag:str=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL, - attack_rounds:int=-1, - adversary_play_config:str=None) -> tuple: - - curriculum.validate_curriculum_types(expected_types = [TeamType.SELF_PLAY_HIGH, - TeamType.SELF_PLAY_MEDIUM, - TeamType.SELF_PLAY_LOW, - TeamType.SELF_PLAY, - TeamType.SELF_PLAY_ADVERSARY], - unallowed_types = TeamType.ALL_TYPES_BESIDES_SP) +def get_N_X_SP_agents( + args, + unseen_teammates_len:int, + n_x_sp_train_types:list, + n_x_sp_eval_types:dict, + curriculum:Curriculum, + tag:str=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL, + attack_rounds:int=-1, + adversary_play_config:str=None + ): + + curriculum.validate_curriculum_types( + expected_types = [ + TeamType.SELF_PLAY_HIGH, + TeamType.SELF_PLAY_MEDIUM, + TeamType.SELF_PLAY_LOW, + TeamType.SELF_PLAY, + TeamType.SELF_PLAY_ADVERSARY + ], + unallowed_types = TeamType.ALL_TYPES_BESIDES_SP + ) if TeamType.SELF_PLAY_ADVERSARY in n_x_sp_train_types: @@ -62,14 +77,15 @@ def get_N_X_SP_agents(args, prefix = 'N-' + str(unseen_teammates_len) + '-SP' suffix = args.primary_learner_type - name = generate_name(args, - prefix = prefix, - seed = args.N_X_SP_seed, - h_dim = args.N_X_SP_h_dim, - train_types = n_x_sp_train_types, - has_curriculum = not curriculum.is_random, - suffix=suffix, - ) + name = generate_name( + args, + prefix = prefix, + seed = args.N_X_SP_seed, + h_dim = args.N_X_SP_h_dim, + train_types = n_x_sp_train_types, + has_curriculum = not curriculum.is_random, + suffix=suffix, + ) agents = load_agents(args, name=name, tag=tag, force_training=args.primary_force_training) if agents: return agents[0] @@ -87,187 +103,252 @@ def get_N_X_SP_agents(args, ) if TeamType.SELF_PLAY_ADVERSARY in n_x_sp_train_types: - joint_ADV_N_X_SP(args=args, - population=population, - curriculum=curriculum, - unseen_teammates_len=unseen_teammates_len, - adversary_play_config=adversary_play_config, - attack_rounds=attack_rounds, - n_x_sp_eval_types=n_x_sp_eval_types - ) + joint_ADV_N_X_SP( + args=args, + population=population, + curriculum=curriculum, + unseen_teammates_len=unseen_teammates_len, + adversary_play_config=adversary_play_config, + attack_rounds=attack_rounds, + n_x_sp_eval_types=n_x_sp_eval_types + ) else: - no_ADV_N_X_SP(args=args, - population=population, - curriculum=curriculum, - unseen_teammates_len=unseen_teammates_len, - n_x_sp_eval_types=n_x_sp_eval_types - ) - - -def joint_ADV_N_X_SP(args, population, curriculum, unseen_teammates_len, adversary_play_config, attack_rounds, n_x_sp_eval_types, tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL): + no_ADV_N_X_SP( + args=args, + population=population, + curriculum=curriculum, + unseen_teammates_len=unseen_teammates_len, + n_x_sp_eval_types=n_x_sp_eval_types + ) + + +def joint_ADV_N_X_SP( + args, + population, + curriculum, + unseen_teammates_len, + adversary_play_config, + attack_rounds, + n_x_sp_eval_types, + tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL + ): assert TeamType.SELF_PLAY_ADVERSARY in curriculum.train_types agent_to_be_attacked = get_best_SP_agent(args=args, population=population) adversary_agents = [] for attack_round in range(attack_rounds): - adversary_agent = get_adversary_agent(args=args, - agent_to_be_attacked=agent_to_be_attacked, - attack_round=attack_round) + adversary_agent = get_adversary_agent( + args=args, + agent_to_be_attacked=agent_to_be_attacked, + attack_round=attack_round + ) adversary_agents.append(adversary_agent) - name = generate_name(args, - prefix = f'PWADV-N-{unseen_teammates_len}-SP', - seed = args.N_X_SP_seed, - h_dim = args.N_X_SP_h_dim, - train_types = curriculum.train_types, - has_curriculum = not curriculum.is_random, - suffix=args.primary_learner_type + '_attack' + str(attack_round), - ) - - agents = load_agents(args, name=name, tag=tag, force_training=args.primary_force_training) + name = generate_name( + args, + prefix = f'PWADV-N-{unseen_teammates_len}-SP', + seed = args.N_X_SP_seed, + h_dim = args.N_X_SP_h_dim, + train_types = curriculum.train_types, + has_curriculum = not curriculum.is_random, + suffix=args.primary_learner_type + '_attack' + str(attack_round), + ) + + agents = load_agents( + args, + name=name, + tag=tag, + force_training=args.primary_force_training + ) if agents: agent_to_be_attacked = agents[0] continue - random_init_agent = RLAgentTrainer.generate_randomly_initialized_agent(args=args, - name=name, - learner_type=args.primary_learner_type, - hidden_dim=args.N_X_SP_h_dim, - seed=args.N_X_SP_seed) - - teammates_collection = generate_TC(args=args, - population=population, - agent=random_init_agent, - train_types=curriculum.train_types, - eval_types_to_generate=n_x_sp_eval_types['generate'], - eval_types_to_read_from_file=n_x_sp_eval_types['load'], - unseen_teammates_len=unseen_teammates_len, - use_entire_population_for_train_types_teammates=True) - - teammates_collection = update_TC_w_ADV_teammates(args=args, - teammates_collection=teammates_collection, - primary_agent=random_init_agent, - adversaries=adversary_agents, - adversary_play_config=adversary_play_config) + random_init_agent = RLAgentTrainer.generate_randomly_initialized_agent( + args=args, + name=name, + learner_type=args.primary_learner_type, + hidden_dim=args.N_X_SP_h_dim, + seed=args.N_X_SP_seed + ) + + teammates_collection = generate_TC( + args=args, + population=population, + agent=random_init_agent, + train_types=curriculum.train_types, + eval_types_to_generate=n_x_sp_eval_types['generate'], + eval_types_to_read_from_file=n_x_sp_eval_types['load'], + unseen_teammates_len=unseen_teammates_len, + use_entire_population_for_train_types_teammates=True + ) + + teammates_collection = update_TC_w_ADV_teammates( + args=args, + teammates_collection=teammates_collection, + primary_agent=random_init_agent, + adversaries=adversary_agents, + adversary_play_config=adversary_play_config + ) if attack_round == attack_rounds-1: total_train_timesteps = 4*args.n_x_sp_total_training_timesteps else: total_train_timesteps = args.n_x_sp_total_training_timesteps - n_x_sp_types_trainer = RLAgentTrainer(name=name, - args=args, - agent=random_init_agent, - teammates_collection=teammates_collection, - epoch_timesteps=args.epoch_timesteps, - n_envs=args.n_envs, - curriculum=curriculum, - seed=args.N_X_SP_seed, - hidden_dim=args.N_X_SP_h_dim, - learner_type=args.primary_learner_type, - checkpoint_rate=total_train_timesteps // args.num_of_ckpoints, - ) + n_x_sp_types_trainer = RLAgentTrainer( + name=name, + args=args, + agent=random_init_agent, + teammates_collection=teammates_collection, + epoch_timesteps=args.epoch_timesteps, + n_envs=args.n_envs, + curriculum=curriculum, + seed=args.N_X_SP_seed, + hidden_dim=args.N_X_SP_h_dim, + learner_type=args.primary_learner_type, + checkpoint_rate=total_train_timesteps // args.num_of_ckpoints, + ) n_x_sp_types_trainer.train_agents(total_train_timesteps=total_train_timesteps, tag_for_returning_agent=tag) agent_to_be_attacked = n_x_sp_types_trainer.get_agents()[0] -def no_ADV_N_X_SP(args, population, curriculum, unseen_teammates_len, n_x_sp_eval_types, tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL): +def no_ADV_N_X_SP( + args, + population, + curriculum, + unseen_teammates_len, + n_x_sp_eval_types, + tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL + ): assert TeamType.SELF_PLAY_ADVERSARY not in curriculum.train_types - name = generate_name(args, - prefix = f'N-{unseen_teammates_len}-SP', - seed = args.N_X_SP_seed, - h_dim = args.N_X_SP_h_dim, - train_types = curriculum.train_types, - has_curriculum = not curriculum.is_random, - suffix=args.primary_learner_type, - ) + name = generate_name( + args, + prefix = f'N-{unseen_teammates_len}-SP', + seed = args.N_X_SP_seed, + h_dim = args.N_X_SP_h_dim, + train_types = curriculum.train_types, + has_curriculum = not curriculum.is_random, + suffix=args.primary_learner_type, + ) agents = load_agents(args, name=name, tag=tag, force_training=args.primary_force_training) if agents: return agents[0] - random_init_agent = RLAgentTrainer.generate_randomly_initialized_agent(args=args, - name=name, - learner_type=args.primary_learner_type, - hidden_dim=args.N_X_SP_h_dim, - seed=args.N_X_SP_seed) - - teammates_collection = generate_TC(args=args, - population=population, - agent=random_init_agent, - train_types=curriculum.train_types, - eval_types_to_generate=n_x_sp_eval_types['generate'], - eval_types_to_read_from_file=n_x_sp_eval_types['load'], - unseen_teammates_len=unseen_teammates_len, - use_entire_population_for_train_types_teammates=True) - - n_x_sp_types_trainer = RLAgentTrainer(name=name, - args=args, - agent=random_init_agent, - teammates_collection=teammates_collection, - epoch_timesteps=args.epoch_timesteps, - n_envs=args.n_envs, - curriculum=curriculum, - seed=args.N_X_SP_seed, - hidden_dim=args.N_X_SP_h_dim, - learner_type=args.primary_learner_type, - checkpoint_rate=args.n_x_sp_total_training_timesteps // args.num_of_ckpoints, - ) - n_x_sp_types_trainer.train_agents(total_train_timesteps=args.n_x_sp_total_training_timesteps, tag_for_returning_agent=tag) - - - -def get_adversary_agent(args, agent_to_be_attacked, attack_round, tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL): + random_init_agent = RLAgentTrainer.generate_randomly_initialized_agent( + args=args, + name=name, + learner_type=args.primary_learner_type, + hidden_dim=args.N_X_SP_h_dim, + seed=args.N_X_SP_seed + ) + + teammates_collection = generate_TC( + args=args, + population=population, + agent=random_init_agent, + train_types=curriculum.train_types, + eval_types_to_generate=n_x_sp_eval_types['generate'], + eval_types_to_read_from_file=n_x_sp_eval_types['load'], + unseen_teammates_len=unseen_teammates_len, + use_entire_population_for_train_types_teammates=True + ) + + n_x_sp_types_trainer = RLAgentTrainer( + name=name, + args=args, + agent=random_init_agent, + teammates_collection=teammates_collection, + epoch_timesteps=args.epoch_timesteps, + n_envs=args.n_envs, + curriculum=curriculum, + seed=args.N_X_SP_seed, + hidden_dim=args.N_X_SP_h_dim, + learner_type=args.primary_learner_type, + checkpoint_rate=args.n_x_sp_total_training_timesteps // args.num_of_ckpoints, + ) + n_x_sp_types_trainer.train_agents( + total_train_timesteps=args.n_x_sp_total_training_timesteps, + tag_for_returning_agent=tag + ) + + + +def get_adversary_agent( + args, + agent_to_be_attacked, + attack_round, + tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL + ): # It doesn't matter what we set the variable, adversary_teammates_teamtype, # the purpose of it is to maintain consistent naming and correct TC/curriculum creation adversary_teammates_teamtype = TeamType.HIGH_FIRST - teammates_collection = generate_TC_for_ADV_agent(args=args, - agent_to_be_attacked=agent_to_be_attacked, - teamtype=adversary_teammates_teamtype) + teammates_collection = generate_TC_for_ADV_agent( + args=args, + agent_to_be_attacked=agent_to_be_attacked, + teamtype=adversary_teammates_teamtype + ) + + name = generate_name( + args, + prefix='ADV', + seed=args.ADV_seed, + h_dim=args.ADV_h_dim, + train_types=[adversary_teammates_teamtype], + has_curriculum=False, + suffix=args.adversary_learner_type +'_attack'+ str(attack_round) + ) - name = generate_name(args, - prefix='ADV', - seed=args.ADV_seed, - h_dim=args.ADV_h_dim, - train_types=[adversary_teammates_teamtype], - has_curriculum=False, - suffix=args.adversary_learner_type +'_attack'+ str(attack_round)) - - agents = load_agents(args, name=name, tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL, force_training=args.adversary_force_training) + agents = load_agents( + args, + name=name, + tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL, + force_training=args.adversary_force_training + ) if agents: return agents[0] - adversary_trainer = RLAgentTrainer(name=name, - args=args, - agent=None, - teammates_collection=teammates_collection, - epoch_timesteps=args.epoch_timesteps, - n_envs=args.n_envs, - curriculum=Curriculum(train_types=[adversary_teammates_teamtype], is_random=True), - seed=args.ADV_seed, - hidden_dim=args.ADV_h_dim, - learner_type=args.adversary_learner_type, - checkpoint_rate=args.adversary_total_training_timesteps // args.num_of_ckpoints) - adversary_trainer.train_agents(total_train_timesteps=args.adversary_total_training_timesteps, tag_for_returning_agent=tag) + adversary_trainer = RLAgentTrainer( + name=name, + args=args, + agent=None, + teammates_collection=teammates_collection, + epoch_timesteps=args.epoch_timesteps, + n_envs=args.n_envs, + curriculum=Curriculum(train_types=[adversary_teammates_teamtype], is_random=True), + seed=args.ADV_seed, + hidden_dim=args.ADV_h_dim, + learner_type=args.adversary_learner_type, + checkpoint_rate=args.adversary_total_training_timesteps // args.num_of_ckpoints + ) + adversary_trainer.train_agents( + total_train_timesteps=args.adversary_total_training_timesteps, + tag_for_returning_agent=tag + ) return adversary_trainer.get_agents()[0] -def get_FCP_agent_w_pop(args, - fcp_train_types, - fcp_eval_types, - fcp_curriculum, - tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL): +def get_FCP_agent_w_pop( + args, + fcp_train_types, + fcp_eval_types, + fcp_curriculum, + tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL + ): - name = generate_name(args, - prefix=Prefix.FICTITIOUS_CO_PLAY, - seed=args.FCP_seed, - h_dim=args.FCP_h_dim, - train_types=fcp_train_types, - has_curriculum = not fcp_curriculum.is_random) + name = generate_name( + args, + prefix=Prefix.FICTITIOUS_CO_PLAY, + seed=args.FCP_seed, + h_dim=args.FCP_h_dim, + train_types=fcp_train_types, + has_curriculum = not fcp_curriculum.is_random + ) population = get_categorized_SP_population( args=args, @@ -280,14 +361,21 @@ def get_FCP_agent_w_pop(args, tag=tag ) - teammates_collection = generate_TC(args=args, - population=population, - train_types=fcp_train_types, - eval_types_to_generate=fcp_eval_types['generate'], - eval_types_to_read_from_file=fcp_eval_types['load'], - use_entire_population_for_train_types_teammates=False) + teammates_collection = generate_TC( + args=args, + population=population, + train_types=fcp_train_types, + eval_types_to_generate=fcp_eval_types['generate'], + eval_types_to_read_from_file=fcp_eval_types['load'], + use_entire_population_for_train_types_teammates=False + ) - agents = load_agents(args, name=name, tag=tag, force_training=args.primary_force_training) + agents = load_agents( + args, + name=name, + tag=tag, + force_training=args.primary_force_training + ) if agents: return agents[0], population @@ -305,48 +393,70 @@ def get_FCP_agent_w_pop(args, checkpoint_rate=args.fcp_total_training_timesteps // args.num_of_ckpoints, ) - fcp_trainer.train_agents(total_train_timesteps=args.fcp_total_training_timesteps, tag_for_returning_agent=tag) + fcp_trainer.train_agents( + total_train_timesteps=args.fcp_total_training_timesteps, + tag_for_returning_agent=tag + ) return fcp_trainer.get_agents()[0], population -def get_N_X_FCP_agents(args, - fcp_train_types, - fcp_eval_types, - n_1_fcp_train_types, - n_1_fcp_eval_types, - fcp_curriculum, - n_1_fcp_curriculum, - unseen_teammates_len, - tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL): - - n_1_fcp_curriculum.validate_curriculum_types(expected_types = [TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_MEDIUM, TeamType.SELF_PLAY_LOW], - unallowed_types= TeamType.ALL_TYPES_BESIDES_SP) +def get_N_X_FCP_agents( + args, + fcp_train_types, + fcp_eval_types, + n_1_fcp_train_types, + n_1_fcp_eval_types, + fcp_curriculum, + n_1_fcp_curriculum, + unseen_teammates_len, + tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL + ): + + n_1_fcp_curriculum.validate_curriculum_types( + expected_types = [ + TeamType.SELF_PLAY_HIGH, + TeamType.SELF_PLAY_MEDIUM, + TeamType.SELF_PLAY_LOW + ], + unallowed_types= TeamType.ALL_TYPES_BESIDES_SP + ) - name = generate_name(args, - prefix=f'N-{unseen_teammates_len}-FCP', - seed=args.N_X_FCP_seed, - h_dim=args.N_X_FCP_h_dim, - train_types=n_1_fcp_curriculum.train_types, - has_curriculum = not fcp_curriculum.is_random) + name = generate_name( + args, + prefix=f'N-{unseen_teammates_len}-FCP', + seed=args.N_X_FCP_seed, + h_dim=args.N_X_FCP_h_dim, + train_types=n_1_fcp_curriculum.train_types, + has_curriculum = not fcp_curriculum.is_random + ) - agents = load_agents(args, name=name, tag=tag, force_training=args.primary_force_training) + agents = load_agents( + args, + name=name, + tag=tag, + force_training=args.primary_force_training + ) if agents: return agents[0] - fcp_agent, population = get_FCP_agent_w_pop(args, - fcp_train_types=fcp_train_types, - fcp_eval_types=fcp_eval_types, - fcp_curriculum=fcp_curriculum) + fcp_agent, population = get_FCP_agent_w_pop( + args, + fcp_train_types=fcp_train_types, + fcp_eval_types=fcp_eval_types, + fcp_curriculum=fcp_curriculum + ) - teammates_collection = generate_TC(args=args, - population=population, - agent=fcp_agent, - train_types=n_1_fcp_train_types, - eval_types_to_generate=n_1_fcp_eval_types['generate'], - eval_types_to_read_from_file=n_1_fcp_eval_types['load'], - unseen_teammates_len=unseen_teammates_len, - use_entire_population_for_train_types_teammates=False) + teammates_collection = generate_TC( + args=args, + population=population, + agent=fcp_agent, + train_types=n_1_fcp_train_types, + eval_types_to_generate=n_1_fcp_eval_types['generate'], + eval_types_to_read_from_file=n_1_fcp_eval_types['load'], + unseen_teammates_len=unseen_teammates_len, + use_entire_population_for_train_types_teammates=False + ) fcp_trainer = RLAgentTrainer( name=name, @@ -362,5 +472,8 @@ def get_N_X_FCP_agents(args, checkpoint_rate=args.n_x_fcp_total_training_timesteps // args.num_of_ckpoints, ) - fcp_trainer.train_agents(total_train_timesteps=args.n_x_fcp_total_training_timesteps, tag_for_returning_agent=tag) + fcp_trainer.train_agents( + total_train_timesteps=args.n_x_fcp_total_training_timesteps, + tag_for_returning_agent=tag + ) return fcp_trainer.get_agents()[0], teammates_collection From 92a55bf531cd691f31bcf4b680a4db769e0669c5 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Tue, 19 Nov 2024 18:16:42 -0700 Subject: [PATCH 58/61] Add a MultiSetupTrainer --- oai_agents/agents/base_agent.py | 10 +- oai_agents/agents/rl.py | 93 +++++++----- oai_agents/common/arguments.py | 3 + oai_agents/common/multi_setup_trainer.py | 183 +++++++++++++++++++++++ oai_agents/common/population.py | 67 +++++---- scripts/train_agents.py | 34 +++-- scripts/utils/__init__.py | 2 +- scripts/utils/train_helper.py | 28 +++- 8 files changed, 331 insertions(+), 89 deletions(-) diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index d59e0452..7186a6e7 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -541,9 +541,11 @@ def list_agent_checked_tags(args, name: str=None, path: Union[Path, None] = None @staticmethod def get_most_recent_checkpoint(args, name: str) -> str: - path = OAITrainer.get_model_path(base_dir=args.base_dir, - exp_folder=args.exp_dir, - model_name=name) + path = OAITrainer.get_model_path( + base_dir=args.base_dir, + exp_folder=args.exp_dir, + model_name=name + ) if not path.exists(): print(f"Warning: The directory {path} does not exist.") return None @@ -578,5 +580,5 @@ def get_model_path(base_dir: Union[str, Path], exp_folder: Optional[str], model_ return path @staticmethod - def get_experiment_name(self, exp_folder: Optional[str], model_name: str): + def get_experiment_name(exp_folder: Optional[str], model_name: str): return f"{exp_folder}/{model_name}" if exp_folder else model_name \ No newline at end of file diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index f768c2ba..a1afe6e0 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -20,14 +20,16 @@ class RLAgentTrainer(OAITrainer): ''' Train an RL agent to play with a teammates_collection of agents.''' - def __init__(self, teammates_collection, args, - agent, epoch_timesteps, n_envs, - seed, learner_type, - train_types=[], eval_types=[], - curriculum=None, num_layers=2, hidden_dim=256, - checkpoint_rate=None, name=None, env=None, eval_envs=None, - use_cnn=False, use_lstm=False, use_frame_stack=False, - taper_layers=False, use_policy_clone=False, deterministic=False, start_step: int=0, start_timestep: int=0): + def __init__( + self, teammates_collection, args, + agent, epoch_timesteps, n_envs, + seed, learner_type, + train_types=[], eval_types=[], + curriculum=None, num_layers=2, hidden_dim=256, + checkpoint_rate=None, name=None, env=None, eval_envs=None, + use_cnn=False, use_lstm=False, use_frame_stack=False, + taper_layers=False, use_policy_clone=False, deterministic=False, start_step: int=0, start_timestep: int=0 + ): name = name or 'rl_agent' @@ -65,20 +67,23 @@ def __init__(self, teammates_collection, args, self.start_timestep = start_timestep self.learning_agent, self.agents = self.get_learning_agent(agent) - self.teammates_collection, self.eval_teammates_collection = self.get_teammates_collection(_tms_clctn = teammates_collection, - learning_agent = self.learning_agent, - train_types = train_types, - eval_types = eval_types) + self.teammates_collection, self.eval_teammates_collection = self.get_teammates_collection( + _tms_clctn = teammates_collection, + learning_agent = self.learning_agent, + train_types = train_types, + eval_types = eval_types + ) self.best_score, self.best_training_rew = -1, float('-inf') @classmethod - def generate_randomly_initialized_agent(cls, - args, - learner_type:str, - name:str, - seed:int, - hidden_dim:int, - ) -> OAIAgent: + def generate_randomly_initialized_agent( + cls, + args, + learner_type:str, + name:str, + seed:int, + hidden_dim:int, + ) -> OAIAgent: ''' Generate a randomly initialized learning agent using the RLAgentTrainer class This function does not perform any learning @@ -87,16 +92,17 @@ def generate_randomly_initialized_agent(cls, :param seed: Random seed :returns: An untrained, randomly inititalized RL agent ''' - trainer = cls(name=name, - args=args, - agent=None, - teammates_collection={}, - epoch_timesteps=args.epoch_timesteps, - n_envs=args.n_envs, - seed=seed, - hidden_dim=hidden_dim, - learner_type=learner_type, - ) + trainer = cls( + name=name, + args=args, + agent=None, + teammates_collection={}, + epoch_timesteps=args.epoch_timesteps, + n_envs=args.n_envs, + seed=seed, + hidden_dim=hidden_dim, + learner_type=learner_type, + ) learning_agent, _ = trainer.get_learning_agent(None) return learning_agent @@ -267,13 +273,13 @@ def wrap_agent(self, sb3_agent, name): def should_evaluate(self, steps): mean_training_rew = np.mean([ep_info["r"] for ep_info in self.learning_agent.agent.ep_info_buffer]) - self.best_training_rew *= 0.98 + self.best_training_rew *= 1.00 - steps_divisable_by_15 = (steps + 1) % 15 == 0 + steps_divisible_by_x = (steps + 1) % 15 == 0 mean_rew_greater_than_best = mean_training_rew > self.best_training_rew and self.learning_agent.num_timesteps >= 5e6 checkpoint_rate_reached = self.checkpoint_rate and self.learning_agent.num_timesteps // self.checkpoint_rate > (len(self.ck_list) - 1) - return steps_divisable_by_15 or mean_rew_greater_than_best or checkpoint_rate_reached + return steps_divisible_by_x or mean_rew_greater_than_best or checkpoint_rate_reached def log_details(self, experiment_name, total_train_timesteps): print("Training agent: " + self.name + ", for experiment: " + experiment_name) @@ -302,15 +308,23 @@ def train_agents(self, total_train_timesteps, tag_for_returning_agent, resume_ck if self.checkpoint_rate is not None: if self.args.resume: - path = RLAgentTrainer.get_model_path(base_dir=self.args_base_dir, - exp_folder=self.args.exp_dir, - model_name=self.name) - - ckpts = [name for name in os.listdir(path) if name.startswith("ck")] + path = RLAgentTrainer.get_model_path( + base_dir=self.args.base_dir, + exp_folder=self.args.exp_dir, + model_name=self.name + ) + if not path.exists(): + print(f"Warning: The directory {path} does not exist.") + return None + ckpts = [name for name in os.listdir(path) if name.startswith(KeyCheckpoints.CHECKED_MODEL_PREFIX)] + if not ckpts: + print(f"Warning: No checkpoints found in {path} with prefix '{KeyCheckpoints.CHECKED_MODEL_PREFIX}'.") + return None ckpts_nums = [int(c.split('_')[1]) for c in ckpts] sorted_idxs = np.argsort(ckpts_nums) ckpts = [ckpts[i] for i in sorted_idxs] - self.ck_list = [(c[0], path, c[2]) for c in resume_ck_list] if resume_ck_list else [({k: 0 for k in self.args.layout_names}, path, ck) for ck in ckpts] + self.ck_list = [(c[0], path, c[2]) for c in resume_ck_list] if resume_ck_list else [ + ({k: 0 for k in self.args.layout_names}, path, ck) for ck in ckpts] else: self.ck_list = [] path, tag = self.save_agents(tag=f'{KeyCheckpoints.CHECKED_MODEL_PREFIX}{len(self.ck_list)}') @@ -322,6 +336,7 @@ def train_agents(self, total_train_timesteps, tag_for_returning_agent, resume_ck self.steps = self.start_step curr_timesteps = self.start_timestep prev_timesteps = self.learning_agent.num_timesteps + ck_name_handler = CheckedModelNameHandler() while curr_timesteps < total_train_timesteps: self.curriculum.update(current_step=self.steps) @@ -350,7 +365,7 @@ def train_agents(self, total_train_timesteps, tag_for_returning_agent, resume_ck if self.checkpoint_rate: if self.learning_agent.num_timesteps // self.checkpoint_rate > (len(self.ck_list) - 1): - path, tag = self.save_agents(tag=CheckedModelNameHandler.generate_tag(id=len(self.ck_list), mean_reward=mean_reward)) + path, tag = self.save_agents(tag=ck_name_handler.generate_tag(id=len(self.ck_list), mean_reward=mean_reward)) self.ck_list.append((rew_per_layout, path, tag)) if mean_reward >= self.best_score: diff --git a/oai_agents/common/arguments.py b/oai_agents/common/arguments.py index 1185bfd6..388e3dac 100644 --- a/oai_agents/common/arguments.py +++ b/oai_agents/common/arguments.py @@ -77,6 +77,9 @@ def get_arguments(additional_args=[]): parser.add_argument("--num-of-ckpoints", type=int, default=10) parser.add_argument("--resume", action="store_true", default=False, help="Restart from last checkpoint for population training only") + parser.add_argument("--for-evaluation", action="store_true", default=False, help="The trained agents are used for evaluating other agents. Please note that seeds and h_dim are different when agents are trained for evaluating others.)") + parser.add_argument("--num-of-training-variants", type=int, default=4) + for parser_arg, parser_kwargs in additional_args: parser.add_argument(parser_arg, **parser_kwargs) diff --git a/oai_agents/common/multi_setup_trainer.py b/oai_agents/common/multi_setup_trainer.py index e69de29b..52899ace 100644 --- a/oai_agents/common/multi_setup_trainer.py +++ b/oai_agents/common/multi_setup_trainer.py @@ -0,0 +1,183 @@ +import concurrent.futures +import random +from scripts.utils.common import generate_name +from oai_agents.common.learner import LearnerType +from oai_agents.common.tags import Prefix, KeyCheckpoints +from oai_agents.agents.rl import RLAgentTrainer +import dill + + +class MultiSetupTrainer: + def __init__( + self, + args, + train_types, + eval_types, + curriculum, + tag_for_returning_agent + ): + self.args = args + self.train_types = train_types + self.eval_types = eval_types + self.curriculum = curriculum + self.tag_for_returning_agent = tag_for_returning_agent + + self.parallel = args.parallel + self.num_of_training_variants = args.num_of_training_variants + self.for_evaluation = args.for_evaluation + + def generate_hdim_and_seed(self): + training_seeds = [1010, 2020, 2602, 13, 68, 2907, 105, 128] + training_hdims = [256] * len(training_seeds) + + evaluation_seeds = [3031, 4041, 5051, 3708, 3809, 3910, 4607, 5506] + evaluation_hdims = [256] * len(evaluation_seeds) + + if self.for_evaluation: + seeds = evaluation_seeds + hdims = evaluation_hdims + min_seed, max_seed = 3000, 5999 + else: + seeds = training_seeds + hdims = training_hdims + min_seed, max_seed = 0, 2999 + + selected_seeds = [] + selected_hdims = [] + + if self.num_of_training_variants <= len(seeds): + selected_seeds = seeds[:self.num_of_training_variants] + selected_hdims = hdims[:self.num_of_training_variants] + else: + selected_seeds = seeds[:] + selected_hdims = hdims[:] + + remaining = self.num_of_training_variants - len(seeds) + available_seeds = list(set(range(min_seed, max_seed + 1)) - set(selected_seeds)) + random_seeds = random.sample(available_seeds, remaining) + random_hdims = [256] * remaining + + selected_seeds += random_seeds + selected_hdims += random_hdims + + return selected_seeds, selected_hdims + + def get_trained_agent(self, seed, h_dim): + raise NotImplementedError("This method should be implemented by subclasses.") + + def get_multiple_trained_agents(self): + agents = [] + + seeds, hdims = self.generate_hdim_and_seed() + inputs = [ + (seeds[i], hdims[i]) + for i in range(self.num_of_training_variants) + ] + + if self.args.parallel: + with concurrent.futures.ProcessPoolExecutor( + max_workers=self.args.max_concurrent_jobs) as executor: + arg_lists = list(zip(*inputs)) + executor.map(self.get_trained_agent, *arg_lists) + # dilled_results = list(executor.map(self.get_trained_agent, *arg_lists)) + # for dilled_res in dilled_results: + # agent = dill.loads(dilled_res) + # agents.append(agent) + else: + for i in range(self.num_of_training_variants): + agents.append(self.get_trained_agent(seed=seeds[i], h_dim=hdims[i])) + + # return agents + + def get_reinforcement_agent( + self, + name, + teammates_collection, + curriculum, + h_dim, + seed, + learner_type, + checkpoint_rate, + total_train_timesteps, + ): + agent_ckpt = None + start_step = 0 + start_timestep = 0 + ck_list = None + if self.args.resume: + last_ckpt = RLAgentTrainer.get_most_recent_checkpoint(args=self.args, name=name) + if last_ckpt: + agent_ckpt_info, env_info, training_info = RLAgentTrainer.load_agents(args=self.args, name=name, tag=last_ckpt) + agent_ckpt = agent_ckpt_info[0] + start_step = env_info["step_count"] + start_timestep = env_info["timestep_count"] + ck_list = training_info["ck_list"] + print(f"Restarting training from step: {start_step} (timestep: {start_timestep})") + + rlat = RLAgentTrainer( + args=self.args, + name=name, + teammates_collection=teammates_collection, + curriculum=curriculum, + hidden_dim=h_dim, + seed=seed, + checkpoint_rate=checkpoint_rate, + learner_type=learner_type, + agent=agent_ckpt, + epoch_timesteps=self.args.epoch_timesteps, + n_envs=self.args.n_envs, + start_step=start_step, + start_timestep=start_timestep + ) + + rlat.train_agents( + total_train_timesteps=total_train_timesteps, + tag_for_returning_agent=self.tag_for_returning_agent, + resume_ck_list=ck_list + ) + + agent = rlat.get_agents()[0] + + # if self.parallel: + # dill.dumps(agent) + + # return agent + + +class MultiSetupSPTrainer(MultiSetupTrainer): + def get_trained_agent(self, seed, h_dim): + name = generate_name( + args=self.args, + prefix=Prefix.SELF_PLAY, + seed=seed, + h_dim=h_dim, + train_types=self.train_types, + has_curriculum=not self.curriculum.is_random + ) + + return self.get_reinforcement_agent( + name=name, + teammates_collection={}, + curriculum=self.curriculum, + h_dim=h_dim, + seed=seed, + learner_type=self.args.primary_learner_type, + checkpoint_rate=self.args.pop_total_training_timesteps // self.args.num_of_ckpoints, + total_train_timesteps=self.args.pop_total_training_timesteps, + ) + +def get_SP_agents(args, train_types, eval_types, curriculum, tag_for_returning_agent): + sp_trainer = MultiSetupSPTrainer( + args=args, + train_types=train_types, + eval_types=eval_types, + curriculum=curriculum, + tag_for_returning_agent=tag_for_returning_agent, + ) + return sp_trainer.get_multiple_trained_agents() + +# Example usage: +# sp_trainer = MultiSetupSPTrainer(args=args, num_of_training_variants=4, train_types=train_types, eval_types=eval_types, curriculum=curriculum, tag=tag) +# trained_agents = sp_trainer.get_multiple_trained_agents() +# Alternatively: +# trained_agents = get_SP_agents(args=args, num_of_training_variants=4, train_types=train_types, eval_types=eval_types, curriculum=curriculum, parallel=True, tag=tag) diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 58e235e6..9ea72d55 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -53,9 +53,11 @@ def train_SP_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_d For SP agents, they only are trained with themselves so the order doesn't matter. ''' - rlat.train_agents(total_train_timesteps=total_training_timesteps, - tag_for_returning_agent=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL, - resume_ck_list=ck_rewards) + rlat.train_agents( + total_train_timesteps=total_training_timesteps, + tag_for_returning_agent=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL, + resume_ck_list=ck_rewards + ) checkpoints_list = rlat.ck_list if serialize: @@ -181,16 +183,17 @@ def save_categorized_SP_population(args, population): rt.save_agents(tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL) -def get_categorized_SP_population(args, - ck_rate, - total_training_timesteps, - train_types, - eval_types, - num_SPs_to_train, - unseen_teammates_len=0, - force_training=False, - tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL, - ): +def get_categorized_SP_population( + args, + ck_rate, + total_training_timesteps, + train_types, + eval_types, + num_SPs_to_train, + unseen_teammates_len=0, + force_training=False, + tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL, + ): population = {layout_name: [] for layout_name in args.layout_names} @@ -204,15 +207,19 @@ def get_categorized_SP_population(args, except FileNotFoundError as e: print(f'Could not find saved population, creating them from scratch...\nFull Error: {e}') - ensure_enough_SP_agents(teammates_len=args.teammates_len, - unseen_teammates_len=unseen_teammates_len, - train_types=train_types, - eval_types=eval_types, - num_SPs_to_train=num_SPs_to_train) + ensure_enough_SP_agents( + teammates_len=args.teammates_len, + unseen_teammates_len=unseen_teammates_len, + train_types=train_types, + eval_types=eval_types, + num_SPs_to_train=num_SPs_to_train + ) - seed, h_dim = generate_hdim_and_seed(for_training=True, num_of_required_agents=num_SPs_to_train) + seed, h_dim = generate_hdim_and_seed( + for_training=True, num_of_required_agents=num_SPs_to_train) inputs = [ - (args, total_training_timesteps, ck_rate, seed[i], h_dim[i], True) for i in range(num_SPs_to_train) + (args, total_training_timesteps, ck_rate, seed[i], h_dim[i], True) + for i in range(num_SPs_to_train) ] @@ -223,18 +230,22 @@ def get_categorized_SP_population(args, for dilled_res in dilled_results: checkpoints_list = dill.loads(dilled_res) for layout_name in args.layout_names: - layout_pop = RLAgentTrainer.get_checkedpoints_agents(args, checkpoints_list, layout_name) + layout_pop = RLAgentTrainer.get_checkedpoints_agents( + args, checkpoints_list, layout_name) population[layout_name].extend(layout_pop) else: for inp in inputs: - checkpoints_list = train_SP_with_checkpoints(args=inp[0], - total_training_timesteps = inp[1], - ck_rate=inp[2], - seed=inp[3], - h_dim=inp[4], - serialize=False) + checkpoints_list = train_SP_with_checkpoints( + args=inp[0], + total_training_timesteps = inp[1], + ck_rate=inp[2], + seed=inp[3], + h_dim=inp[4], + serialize=False + ) for layout_name in args.layout_names: - layout_pop = RLAgentTrainer.get_checkedpoints_agents(args, checkpoints_list, layout_name) + layout_pop = RLAgentTrainer.get_checkedpoints_agents( + args, checkpoints_list, layout_name) population[layout_name].extend(layout_pop) save_categorized_SP_population(args=args, population=population) diff --git a/scripts/train_agents.py b/scripts/train_agents.py index 4a306b55..93e16363 100644 --- a/scripts/train_agents.py +++ b/scripts/train_agents.py @@ -7,6 +7,7 @@ from oai_agents.common.curriculum import Curriculum from scripts.utils import ( + get_SP_agents, get_SP_agent, get_FCP_agent_w_pop, get_N_X_FCP_agents, @@ -21,13 +22,21 @@ def SP(args): } curriculum = Curriculum(train_types=primary_train_types, is_random=True) - get_SP_agent( + get_SP_agents( args=args, train_types=curriculum.train_types, eval_types=primary_eval_types, - curriculum=curriculum + curriculum=curriculum, + tag_for_returning_agent=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL ) + # get_SP_agent( + # args=args, + # train_types=curriculum.train_types, + # eval_types=primary_eval_types, + # curriculum=curriculum + # ) + def SPN_1ADV(args) -> None: ''' @@ -52,8 +61,8 @@ def SPN_1ADV(args) -> None: 'load': [] } - curriculum = Curriculum(train_types = primary_train_types, - is_random = True) + curriculum = Curriculum( + train_types = primary_train_types, is_random = True) get_N_X_SP_agents( args, n_x_sp_train_types=curriculum.train_types, @@ -83,7 +92,11 @@ def SPN_1ADV_XSPCKP(args) -> None: attack_rounds = 3 unseen_teammates_len = 1 adversary_play_config = AdversaryPlayConfig.MAP - primary_train_types = [TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_MEDIUM, TeamType.SELF_PLAY_ADVERSARY] + primary_train_types = [ + TeamType.SELF_PLAY_HIGH, + TeamType.SELF_PLAY_MEDIUM, + TeamType.SELF_PLAY_ADVERSARY + ] primary_eval_types = { 'generate': [ @@ -351,7 +364,8 @@ def set_input(args): else: # Used for doing quick tests args.num_of_ckpoints = 10 args.sb_verbose = 1 - args.wandb_mode = 'disabled' + # args.wandb_mode = 'disabled' + args.wandb_mode = 'online' args.n_envs = 2 args.epoch_timesteps = 2 @@ -370,19 +384,21 @@ def set_input(args): args = get_arguments() args.quick_test = False args.parallel = True + args.num_of_training_variants = 4 + args.device = 'cpu' args.pop_force_training = False args.adversary_force_training = False args.primary_force_training = False - args.teammates_len = 3 - args.how_long = 6*4 # Not effective in quick_test mode + args.teammates_len = 1 + args.how_long = 8 # Not effective in quick_test mode set_input(args=args) # SPN_1ADV_XSPCKP(args=args) - #SP(args) + SP(args) # FCP_traditional(args=args) diff --git a/scripts/utils/__init__.py b/scripts/utils/__init__.py index f03281e6..1d691992 100644 --- a/scripts/utils/__init__.py +++ b/scripts/utils/__init__.py @@ -1,4 +1,4 @@ -from .train_helper import get_SP_agent, get_FCP_agent_w_pop, get_N_X_FCP_agents, get_N_X_SP_agents +from .train_helper import get_SP_agents, get_SP_agent, get_FCP_agent_w_pop, get_N_X_FCP_agents, get_N_X_SP_agents from .eval_helper import get_eval_types_to_load from .eval_constants import * diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py index 290cbcdf..e20ccd46 100644 --- a/scripts/utils/train_helper.py +++ b/scripts/utils/train_helper.py @@ -4,10 +4,20 @@ from oai_agents.common.teammates_collection import generate_TC, get_best_SP_agent, generate_TC_for_ADV_agent, update_TC_w_ADV_teammates from oai_agents.common.curriculum import Curriculum from .common import load_agents, generate_name -from oai_agents.common.tags import Prefix -from oai_agents.common.tags import KeyCheckpoints +from oai_agents.common.tags import Prefix, KeyCheckpoints +from oai_agents.common.multi_setup_trainer import MultiSetupSPTrainer +def get_SP_agents(args, train_types, eval_types, curriculum, tag_for_returning_agent): + sp_trainer = MultiSetupSPTrainer( + args=args, + train_types=train_types, + eval_types=eval_types, + curriculum=curriculum, + tag_for_returning_agent=tag_for_returning_agent, + ) + return sp_trainer.get_multiple_trained_agents() + def get_SP_agent( args, train_types, @@ -15,12 +25,14 @@ def get_SP_agent( curriculum, tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL ): - name = generate_name(args, - prefix=Prefix.SELF_PLAY, - seed=args.SP_seed, - h_dim=args.SP_h_dim, - train_types=train_types, - has_curriculum= not curriculum.is_random) + name = generate_name( + args, + prefix=Prefix.SELF_PLAY, + seed=args.SP_seed, + h_dim=args.SP_h_dim, + train_types=train_types, + has_curriculum=not curriculum.is_random + ) agents = load_agents(args, name=name, tag=tag, force_training=args.pop_force_training) if agents: From 8a0ac4f678c35c913773be4292c888d2121a0f8a Mon Sep 17 00:00:00 2001 From: ttopiac Date: Mon, 2 Dec 2024 14:53:25 -0700 Subject: [PATCH 59/61] Modify multi_setup_trainer methods and make them to be the same as what we have done in population.py --- oai_agents/common/multi_setup_trainer.py | 20 +++++++++++++------- scripts/train_agents.py | 4 ++-- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/oai_agents/common/multi_setup_trainer.py b/oai_agents/common/multi_setup_trainer.py index 52899ace..ebec6de7 100644 --- a/oai_agents/common/multi_setup_trainer.py +++ b/oai_agents/common/multi_setup_trainer.py @@ -78,14 +78,19 @@ def get_multiple_trained_agents(self): with concurrent.futures.ProcessPoolExecutor( max_workers=self.args.max_concurrent_jobs) as executor: arg_lists = list(zip(*inputs)) - executor.map(self.get_trained_agent, *arg_lists) - # dilled_results = list(executor.map(self.get_trained_agent, *arg_lists)) + # executor.map(self.get_trained_agent, *arg_lists) + dilled_results = list(executor.map(self.get_trained_agent, *arg_lists)) + for dilled_res in dilled_results: + checkpoints_list = dill.loads(dilled_res) # for dilled_res in dilled_results: # agent = dill.loads(dilled_res) # agents.append(agent) else: - for i in range(self.num_of_training_variants): - agents.append(self.get_trained_agent(seed=seeds[i], h_dim=hdims[i])) + for inp in inputs: + checkpoints_list = self.get_trained_agent(seed=seeds[i], h_dim=hdims[i]) + + # for i in range(self.num_of_training_variants): + # agents.append(self.get_trained_agent(seed=seeds[i], h_dim=hdims[i])) # return agents @@ -137,11 +142,12 @@ def get_reinforcement_agent( ) agent = rlat.get_agents()[0] + checkpoint_list = rlat.ck_list - # if self.parallel: - # dill.dumps(agent) + if self.parallel: + return dill.dumps(checkpoint_list) - # return agent + return checkpoint_list class MultiSetupSPTrainer(MultiSetupTrainer): diff --git a/scripts/train_agents.py b/scripts/train_agents.py index 93e16363..90add793 100644 --- a/scripts/train_agents.py +++ b/scripts/train_agents.py @@ -384,8 +384,8 @@ def set_input(args): args = get_arguments() args.quick_test = False args.parallel = True - args.num_of_training_variants = 4 - args.device = 'cpu' + args.num_of_training_variants = 2 + # args.device = 'cpu' args.pop_force_training = False args.adversary_force_training = False From d4191521d32eb8a70e395017bda6e19cbf51a4c3 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Fri, 6 Dec 2024 18:11:07 -0700 Subject: [PATCH 60/61] Make sure saving the correct timestep, step and ck_list so that when resuming training, RLAgentTrainer can work properly --- oai_agents/agents/base_agent.py | 11 +++-- oai_agents/agents/rl.py | 54 ++++++++++++++++-------- oai_agents/common/multi_setup_trainer.py | 6 ++- scripts/train_agents.py | 9 ++-- scripts/utils/train_helper.py | 6 ++- 5 files changed, 57 insertions(+), 29 deletions(-) diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index 7186a6e7..9dab0d77 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -358,6 +358,7 @@ def __init__(self, name, args, seed=None): self.name = name self.args = args self.ck_list = [] + self.n_envs = args.n_envs if seed is not None: os.environ['PYTHONASHSEED'] = str(seed) th.manual_seed(seed) @@ -466,9 +467,11 @@ def get_agents(self) -> List[OAIAgent]: def save_agents(self, path: Union[Path, None] = None, tag: Union[str, None] = None): ''' Saves each agent that the trainer is training ''' - path = path or OAITrainer.get_model_path(base_dir=self.args.base_dir, - exp_folder=self.args.exp_dir, - model_name=self.name) + path = path or OAITrainer.get_model_path( + base_dir=self.args.base_dir, + exp_folder=self.args.exp_dir, + model_name=self.name + ) tag = tag or self.args.exp_name save_path = path / tag / 'trainer_file' @@ -481,6 +484,7 @@ def save_agents(self, path: Union[Path, None] = None, tag: Union[str, None] = No agent.save(agent_path_i) save_dict['agent_fns'].append(f'agent_{i}') save_dict["ck_list"] = self.ck_list + save_dict["n_envs"] = self.n_envs th.save(save_dict, save_path) with open(env_path, "wb") as f: step_counts = self.env.get_attr("step_count") @@ -490,6 +494,7 @@ def save_agents(self, path: Union[Path, None] = None, tag: Union[str, None] = No "timestep_count": timestep_count, "step_count": self.steps }, f) + print(f"we saved timestep_count: {timestep_count} and step_count:{self.steps}") return path, tag @staticmethod diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index a1afe6e0..b79c9f0d 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -83,6 +83,7 @@ def generate_randomly_initialized_agent( name:str, seed:int, hidden_dim:int, + n_envs: int ) -> OAIAgent: ''' Generate a randomly initialized learning agent using the RLAgentTrainer class @@ -98,7 +99,7 @@ def generate_randomly_initialized_agent( agent=None, teammates_collection={}, epoch_timesteps=args.epoch_timesteps, - n_envs=args.n_envs, + n_envs=n_envs, seed=seed, hidden_dim=hidden_dim, learner_type=learner_type, @@ -297,7 +298,10 @@ def log_details(self, experiment_name, total_train_timesteps): print("Dynamic Reward: ", self.args.dynamic_reward) print("Final sparse reward ratio: ", self.args.final_sparse_r_ratio) - + def save_init_model_and_cklist(self): + self.ck_list = [] + path, tag = self.save_agents(tag=f'{KeyCheckpoints.CHECKED_MODEL_PREFIX}{0}') + self.ck_list.append(({k: 0 for k in self.args.layout_names}, path, tag)) def train_agents(self, total_train_timesteps, tag_for_returning_agent, resume_ck_list=None): experiment_name = RLAgentTrainer.get_experiment_name(exp_folder=self.args.exp_dir, model_name=self.name) run = wandb.init(project="overcooked_ai", entity=self.args.wandb_ent, dir=str(self.args.base_dir / 'wandb'), @@ -315,27 +319,30 @@ def train_agents(self, total_train_timesteps, tag_for_returning_agent, resume_ck ) if not path.exists(): print(f"Warning: The directory {path} does not exist.") - return None - ckpts = [name for name in os.listdir(path) if name.startswith(KeyCheckpoints.CHECKED_MODEL_PREFIX)] - if not ckpts: - print(f"Warning: No checkpoints found in {path} with prefix '{KeyCheckpoints.CHECKED_MODEL_PREFIX}'.") - return None - ckpts_nums = [int(c.split('_')[1]) for c in ckpts] - sorted_idxs = np.argsort(ckpts_nums) - ckpts = [ckpts[i] for i in sorted_idxs] - self.ck_list = [(c[0], path, c[2]) for c in resume_ck_list] if resume_ck_list else [ - ({k: 0 for k in self.args.layout_names}, path, ck) for ck in ckpts] + self.save_init_model_and_cklist() + else: + ckpts = [name for name in os.listdir(path) if name.startswith(KeyCheckpoints.CHECKED_MODEL_PREFIX)] + if not ckpts: + print(f"Warning: No checkpoints found in {path} with prefix '{KeyCheckpoints.CHECKED_MODEL_PREFIX}'.") + self.save_init_model_and_cklist() + else: + ckpts_nums = [int(c.split('_')[1]) for c in ckpts] + sorted_idxs = np.argsort(ckpts_nums) + ckpts = [ckpts[i] for i in sorted_idxs] + self.ck_list = [(c[0], path, c[2]) for c in resume_ck_list] if resume_ck_list else [ + ({k: 0 for k in self.args.layout_names}, path, ck) for ck in ckpts] else: - self.ck_list = [] - path, tag = self.save_agents(tag=f'{KeyCheckpoints.CHECKED_MODEL_PREFIX}{len(self.ck_list)}') - self.ck_list.append(({k: 0 for k in self.args.layout_names}, path, tag)) + self.save_init_model_and_cklist() best_path, best_tag = None, None self.steps = self.start_step - curr_timesteps = self.start_timestep + self.learning_agent.num_timesteps = self.n_envs*self.start_timestep + curr_timesteps = self.n_envs*self.start_timestep prev_timesteps = self.learning_agent.num_timesteps + print(f"curr_timesteps: {curr_timesteps}") + print(f"prev_timesteps: {prev_timesteps}") ck_name_handler = CheckedModelNameHandler() while curr_timesteps < total_train_timesteps: @@ -356,6 +363,8 @@ def train_agents(self, total_train_timesteps, tag_for_returning_agent, resume_ck curr_timesteps += self.learning_agent.num_timesteps - prev_timesteps prev_timesteps = self.learning_agent.num_timesteps + self.steps += 1 + if self.should_evaluate(steps=self.steps): mean_training_rew = np.mean([ep_info["r"] for ep_info in self.learning_agent.agent.ep_info_buffer]) if mean_training_rew >= self.best_training_rew: @@ -365,14 +374,23 @@ def train_agents(self, total_train_timesteps, tag_for_returning_agent, resume_ck if self.checkpoint_rate: if self.learning_agent.num_timesteps // self.checkpoint_rate > (len(self.ck_list) - 1): - path, tag = self.save_agents(tag=ck_name_handler.generate_tag(id=len(self.ck_list), mean_reward=mean_reward)) + print(f"len(self.ck_list): {len(self.ck_list)}") + print(f"self.learning_agent.num_timesteps: {self.learning_agent.num_timesteps}") + print(f"curr_timesteps: {curr_timesteps}") + path = OAITrainer.get_model_path( + base_dir=self.args.base_dir, + exp_folder=self.args.exp_dir, + model_name=self.name + ) + tag = ck_name_handler.generate_tag(id=len(self.ck_list), mean_reward=mean_reward) self.ck_list.append((rew_per_layout, path, tag)) + path, tag = self.save_agents(path=path, tag=tag) + if mean_reward >= self.best_score: best_path, best_tag = self.save_agents(tag=KeyCheckpoints.BEST_EVAL_REWARD) print(f'New best evaluation score of {mean_reward} reached, model saved to {best_path}/{best_tag}') self.best_score = mean_reward - self.steps += 1 self.save_agents(tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL) self.agents, _, _ = RLAgentTrainer.load_agents(args=self.args, name=self.name, tag=tag_for_returning_agent) run.finish() diff --git a/oai_agents/common/multi_setup_trainer.py b/oai_agents/common/multi_setup_trainer.py index ebec6de7..a94cf0c5 100644 --- a/oai_agents/common/multi_setup_trainer.py +++ b/oai_agents/common/multi_setup_trainer.py @@ -109,6 +109,7 @@ def get_reinforcement_agent( start_step = 0 start_timestep = 0 ck_list = None + n_envs=self.args.n_envs if self.args.resume: last_ckpt = RLAgentTrainer.get_most_recent_checkpoint(args=self.args, name=name) if last_ckpt: @@ -117,7 +118,8 @@ def get_reinforcement_agent( start_step = env_info["step_count"] start_timestep = env_info["timestep_count"] ck_list = training_info["ck_list"] - print(f"Restarting training from step: {start_step} (timestep: {start_timestep})") + n_envs = training_info["n_envs"] + print(f"The model with {seed} Restarting training from step: {start_step} (timestep: {start_timestep})") rlat = RLAgentTrainer( args=self.args, @@ -130,7 +132,7 @@ def get_reinforcement_agent( learner_type=learner_type, agent=agent_ckpt, epoch_timesteps=self.args.epoch_timesteps, - n_envs=self.args.n_envs, + n_envs=n_envs, start_step=start_step, start_timestep=start_timestep ) diff --git a/scripts/train_agents.py b/scripts/train_agents.py index 90add793..596f5340 100644 --- a/scripts/train_agents.py +++ b/scripts/train_agents.py @@ -362,14 +362,15 @@ def set_input(args): args.exp_dir = f'Final/{args.num_players}' else: # Used for doing quick tests - args.num_of_ckpoints = 10 + args.num_of_ckpoints = 5 args.sb_verbose = 1 # args.wandb_mode = 'disabled' args.wandb_mode = 'online' args.n_envs = 2 args.epoch_timesteps = 2 - args.pop_total_training_timesteps = 3500 + # args.pop_total_training_timesteps = 3500 + args.pop_total_training_timesteps = 1000*5 args.n_x_sp_total_training_timesteps = 1000 args.adversary_total_training_timesteps = 1000 @@ -382,9 +383,9 @@ def set_input(args): if __name__ == '__main__': args = get_arguments() - args.quick_test = False + args.quick_test = True args.parallel = True - args.num_of_training_variants = 2 + args.num_of_training_variants = 1 # args.device = 'cpu' args.pop_force_training = False diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py index e20ccd46..e81d690f 100644 --- a/scripts/utils/train_helper.py +++ b/scripts/utils/train_helper.py @@ -182,7 +182,8 @@ def joint_ADV_N_X_SP( name=name, learner_type=args.primary_learner_type, hidden_dim=args.N_X_SP_h_dim, - seed=args.N_X_SP_seed + seed=args.N_X_SP_seed, + n_envs=args.n_envs ) teammates_collection = generate_TC( @@ -256,7 +257,8 @@ def no_ADV_N_X_SP( name=name, learner_type=args.primary_learner_type, hidden_dim=args.N_X_SP_h_dim, - seed=args.N_X_SP_seed + seed=args.N_X_SP_seed, + n_envs=args.n_envs ) teammates_collection = generate_TC( From a8179ed3192dc4a116ad6c9cfdfecd51ecb4d6b7 Mon Sep 17 00:00:00 2001 From: ttopiac Date: Sat, 7 Dec 2024 22:44:25 -0700 Subject: [PATCH 61/61] Let RLAgentTrainer in population.py use saved n_envs information when there is one. --- oai_agents/agents/rl.py | 15 +++++++-------- oai_agents/common/population.py | 4 +++- scripts/train_agents.py | 2 +- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index b79c9f0d..f456ce67 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -339,13 +339,12 @@ def train_agents(self, total_train_timesteps, tag_for_returning_agent, resume_ck self.steps = self.start_step self.learning_agent.num_timesteps = self.n_envs*self.start_timestep - curr_timesteps = self.n_envs*self.start_timestep - prev_timesteps = self.learning_agent.num_timesteps - print(f"curr_timesteps: {curr_timesteps}") - print(f"prev_timesteps: {prev_timesteps}") + # curr_timesteps = self.n_envs*self.start_timestep + # prev_timesteps = self.learning_agent.num_timesteps + print(f"curr_timesteps: {self.learning_agent.num_timesteps}") ck_name_handler = CheckedModelNameHandler() - while curr_timesteps < total_train_timesteps: + while self.learning_agent.num_timesteps < total_train_timesteps: self.curriculum.update(current_step=self.steps) # TODO: eventually, teammates_collection should be turned into its own class with 'select' @@ -360,8 +359,8 @@ def train_agents(self, total_train_timesteps, tag_for_returning_agent, resume_ck self.learning_agent.learn(self.epoch_timesteps) - curr_timesteps += self.learning_agent.num_timesteps - prev_timesteps - prev_timesteps = self.learning_agent.num_timesteps + # curr_timesteps += self.learning_agent.num_timesteps - prev_timesteps + # prev_timesteps = self.learning_agent.num_timesteps self.steps += 1 @@ -376,7 +375,7 @@ def train_agents(self, total_train_timesteps, tag_for_returning_agent, resume_ck if self.learning_agent.num_timesteps // self.checkpoint_rate > (len(self.ck_list) - 1): print(f"len(self.ck_list): {len(self.ck_list)}") print(f"self.learning_agent.num_timesteps: {self.learning_agent.num_timesteps}") - print(f"curr_timesteps: {curr_timesteps}") + print(f"curr_timesteps: {self.learning_agent.num_timesteps}") path = OAITrainer.get_model_path( base_dir=self.args.base_dir, exp_folder=self.args.exp_dir, diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 9ea72d55..63f442b5 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -22,6 +22,7 @@ def train_SP_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_d start_step = 0 start_timestep = 0 ck_rewards = None + n_envs=args.n_envs if args.resume: last_ckpt = RLAgentTrainer.get_most_recent_checkpoint(args, name=name) if last_ckpt: @@ -30,6 +31,7 @@ def train_SP_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_d start_step = env_info["step_count"] start_timestep = env_info["timestep_count"] ck_rewards = training_info["ck_list"] + n_envs = training_info["n_envs"] print(f"Restarting training from step: {start_step} (timestep: {start_timestep})") @@ -39,7 +41,7 @@ def train_SP_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_d agent=agent_ckpt, teammates_collection={}, # automatically creates SP type epoch_timesteps=args.epoch_timesteps, - n_envs=args.n_envs, + n_envs=n_envs, hidden_dim=h_dim, seed=seed, checkpoint_rate=ck_rate, diff --git a/scripts/train_agents.py b/scripts/train_agents.py index 596f5340..6cd7cefd 100644 --- a/scripts/train_agents.py +++ b/scripts/train_agents.py @@ -385,7 +385,7 @@ def set_input(args): args = get_arguments() args.quick_test = True args.parallel = True - args.num_of_training_variants = 1 + args.num_of_training_variants = 3 # args.device = 'cpu' args.pop_force_training = False