diff --git a/nmmo/core/env.py b/nmmo/core/env.py index f8367610..3cc41570 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -1,7 +1,7 @@ import functools import random -import copy -from typing import Any, Dict, List, Optional, Union, Tuple +from typing import Any, Dict, List, Callable +from collections import defaultdict from ordered_set import OrderedSet import gym @@ -16,8 +16,7 @@ from nmmo.entity.entity import Entity from nmmo.systems.item import Item from nmmo.task.game_state import GameStateGenerator -from nmmo.task.task_api import Task -from nmmo.task.scenario import default_task +from nmmo.task import task_api from scripted.baselines import Scripted class Env(ParallelEnv): @@ -41,15 +40,7 @@ def __init__(self, self._gamestate_generator = GameStateGenerator(self.realm, self.config) self.game_state = None - # Default task: rewards 1 each turn agent is alive - self.tasks: List[Tuple[Task,float]] = None - self._task_encoding = None - self._task_embedding_size = -1 - t = default_task(self.possible_agents) - self.change_task(t, - embedding_size=self._task_embedding_size, - task_encoding=self._task_encoding, - reset=False) + self.tasks = task_api.nmmo_default_task(self.possible_agents) # pylint: disable=method-cache-max-size-none @functools.lru_cache(maxsize=None) @@ -88,12 +79,6 @@ def box(rows, cols): if self.config.PROVIDE_ACTION_TARGETS: obs_space['ActionTargets'] = self.action_space(None) - if self._task_encoding: - obs_space['Task'] = gym.spaces.Box( - low=-2**20, high=2**20, - shape=(self._task_embedding_size,), - dtype=np.float32) - return gym.spaces.Dict(obs_space) def _init_random(self, seed): @@ -131,38 +116,18 @@ def action_space(self, agent): ############################################################################ # Core API - def change_task(self, - new_tasks: List[Union[Tuple[Task, float], Task]], - task_encoding: Optional[Dict[int, np.ndarray]] = None, - embedding_size: int=16, - reset: bool=True, - map_id=None, - seed=None, - options=None): - """ Changes the task given to each agent - - Args: - new_task: The task to complete and calculate rewards - task_encoding: A mapping from eid to encoded task - embedding_size: The size of each embedding - reset: Resets the environment - """ - self._tasks = [t if isinstance(t, Tuple) else (t,1) for t in new_tasks] - self._task_encoding = task_encoding - self._task_embedding_size = embedding_size - if reset: - self.reset(map_id=map_id, seed=seed, options=options) - # TODO: This doesn't conform to the PettingZoo API # pylint: disable=arguments-renamed - def reset(self, map_id=None, seed=None, options=None): + def reset(self, map_id=None, seed=None, options=None, + make_task_fn: Callable=None): '''OpenAI Gym API reset function Loads a new game map and returns initial observations Args: - idx: Map index to load. Selects a random map by default - + map_id: Map index to load. Selects a random map by default + seed: random seed to use + make_task_fn: A function to make tasks Returns: observations, as documented by _compute_observations() @@ -186,16 +151,16 @@ def reset(self, map_id=None, seed=None, options=None): if isinstance(ent.agent, Scripted): self.scripted_agents.add(eid) - self.tasks = copy.deepcopy(self._tasks) self.obs = self._compute_observations() self._gamestate_generator = GameStateGenerator(self.realm, self.config) - gym_obs = {} - for a, o in self.obs.items(): - gym_obs[a] = o.to_gym() - if self._task_encoding: - gym_obs[a]['Task'] = self._encode_goal().get(a,np.zeros(self._task_embedding_size)) - return gym_obs + if make_task_fn is not None: + self.tasks = make_task_fn() + else: + for task in self.tasks: + task.reset() + + return {a: o.to_gym() for a,o in self.obs.items()} def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): '''Simulates one game tick or timestep @@ -308,11 +273,7 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): # Store the observations, since actions reference them self.obs = self._compute_observations() - gym_obs = {} - for a, o in self.obs.items(): - gym_obs[a] = o.to_gym() - if self._task_encoding: - gym_obs[a]['Task'] = self._encode_goal()[a] + gym_obs = {a: o.to_gym() for a,o in self.obs.items()} rewards, infos = self._compute_rewards(self.obs.keys(), dones) @@ -321,8 +282,6 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): def _validate_actions(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): '''Deserialize action arg values and validate actions For now, it does a basic validation (e.g., value is not none). - - TODO(kywch): add sophisticated validation like use/sell/give on the same item ''' validated_actions = {} @@ -423,9 +382,6 @@ def _compute_observations(self): inventory, market) return obs - def _encode_goal(self): - return self._task_encoding - def _compute_rewards(self, agents: List[AgentID], dones: Dict[AgentID, bool]): '''Computes the reward for the specified agent @@ -442,27 +398,23 @@ def _compute_rewards(self, agents: List[AgentID], dones: Dict[AgentID, bool]): entity identified by ent_id. ''' # Initialization - self.game_state = self._gamestate_generator.generate(self.realm, self.obs) - infos = {} - for eid in agents: - infos[eid] = {} - infos[eid]['task'] = {} - rewards = {eid: 0 for eid in agents} + infos = {agent_id: {'task': {}} for agent_id in agents} + rewards = defaultdict(int) + agents = set(agents) + reward_cache = {} # Compute Rewards and infos - for task, weight in self.tasks: - task_rewards, task_infos = task.compute_rewards(self.game_state) - for eid, reward in task_rewards.items(): - # Rewards, weighted - rewards[eid] = rewards.get(eid,0) + reward * weight - # Infos - for eid, info in task_infos.items(): - if eid in infos: - infos[eid]['task'] = {**infos[eid]['task'], **info} - - # Remove rewards for dead agents (?) - for eid in dones: - rewards[eid] = 0 + self.game_state = self._gamestate_generator.generate(self.realm, self.obs) + for task in self.tasks: + if task in reward_cache: + task_rewards, task_infos = reward_cache[task] + else: + task_rewards, task_infos = task.compute_rewards(self.game_state) + reward_cache[task] = (task_rewards, task_infos) + for agent_id, reward in task_rewards.items(): + if agent_id in agents and agent_id not in dones: + rewards[agent_id] = rewards.get(agent_id,0) + reward + infos[agent_id]['task'][task.name] = task_infos[agent_id] # progress return rewards, infos diff --git a/nmmo/core/realm.py b/nmmo/core/realm.py index 52bcb557..8b3da598 100644 --- a/nmmo/core/realm.py +++ b/nmmo/core/realm.py @@ -76,9 +76,6 @@ def reset(self, map_id: int = None): self.log_helper.reset() self.event_log.reset() - if self._replay_helper is not None: - self._replay_helper.reset() - self.map.reset(map_id or np.random.randint(self.config.MAP_N) + 1) # EntityState and ItemState tables must be empty after players/npcs.reset() @@ -104,6 +101,9 @@ def reset(self, map_id: int = None): Item.INSTANCE_ID = 0 self.items = {} + if self._replay_helper is not None: + self._replay_helper.reset() + def packet(self): """Client packet""" return { diff --git a/nmmo/lib/spawn.py b/nmmo/lib/spawn.py index 97508266..8dffc750 100644 --- a/nmmo/lib/spawn.py +++ b/nmmo/lib/spawn.py @@ -135,5 +135,4 @@ def get_team_spawn_positions(config, num_teams): idx = int(len(side)*(i+1)/(teams_per_sides + 1)) team_spawn_positions.append(side[idx]) - np.random.shuffle(team_spawn_positions) return team_spawn_positions diff --git a/nmmo/lib/team_helper.py b/nmmo/lib/team_helper.py index caa81695..e624dca5 100644 --- a/nmmo/lib/team_helper.py +++ b/nmmo/lib/team_helper.py @@ -1,6 +1,5 @@ from typing import Dict, List - class TeamHelper(): def __init__(self, teams: Dict[int, List[int]]): self.teams = teams @@ -23,3 +22,16 @@ def agent_id(self, team_id: int, position: int) -> int: def is_agent_in_team(self, agent_id:int , team_id: int) -> bool: return agent_id in self.teams[team_id] + + def get_target_agent(self, team_id: int, target: str): + if target == 'left_team': + return self.teams[(team_id+1) % self.num_teams] + if target == 'left_team_leader': + return self.teams[(team_id+1) % self.num_teams][0] + if target == 'right_team': + return self.teams[(team_id-1) % self.num_teams] + if target == 'right_team_leader': + return self.teams[(team_id-1) % self.num_teams][0] + if target == 'my_team_leader': + return self.teams[team_id][0] + return None diff --git a/nmmo/render/replay_helper.py b/nmmo/render/replay_helper.py index bb58e6f0..a16564e7 100644 --- a/nmmo/render/replay_helper.py +++ b/nmmo/render/replay_helper.py @@ -37,6 +37,7 @@ def reset(self): self.packets = [] self.map = None self._i = 0 + self.update() # to capture the initial packet def __len__(self): return len(self.packets) diff --git a/nmmo/systems/skill.py b/nmmo/systems/skill.py index e8f43a0b..59e93c5c 100644 --- a/nmmo/systems/skill.py +++ b/nmmo/systems/skill.py @@ -265,13 +265,13 @@ def update(self): if not config.RESOURCE_SYSTEM_ENABLED: return + if config.IMMORTAL: + return + depletion = config.RESOURCE_DEPLETION_RATE water = self.entity.resources.water water.decrement(depletion) - if self.config.IMMORTAL: - return - if not self.harvest_adjacent(material.Water, deplete=False): return @@ -288,6 +288,9 @@ def update(self): if not config.RESOURCE_SYSTEM_ENABLED: return + if config.IMMORTAL: + return + depletion = config.RESOURCE_DEPLETION_RATE food = self.entity.resources.food food.decrement(depletion) diff --git a/nmmo/task/__init__.py b/nmmo/task/__init__.py index 4bd5626b..034e22ec 100644 --- a/nmmo/task/__init__.py +++ b/nmmo/task/__init__.py @@ -1,4 +1,3 @@ from .game_state import * +from .predicate_api import * from .task_api import * -from .scenario import * -from .team_helper import * diff --git a/nmmo/task/base_predicates.py b/nmmo/task/base_predicates.py index 60e99ded..4f8dbaf1 100644 --- a/nmmo/task/base_predicates.py +++ b/nmmo/task/base_predicates.py @@ -1,9 +1,9 @@ #pylint: disable=invalid-name, unused-argument, no-value-for-parameter from __future__ import annotations +from typing import Iterable import numpy as np from numpy import count_nonzero as count -from nmmo.task.task_api import OR, define_predicate from nmmo.task.group import Group from nmmo.task.game_state import GameState from nmmo.task import constraint @@ -13,23 +13,23 @@ from nmmo.lib.material import Material from nmmo.lib import utils -@define_predicate +def norm(progress): + return max(min(progress, 1.0), 0.0) + def Success(gs: GameState, subject: Group): ''' Returns True. For debugging. ''' return True -@define_predicate def TickGE(gs: GameState, subject: Group = constraint.TEAM_GROUPS, num_tick: int = constraint.ScalarConstraint()): """True if the current tick is greater than or equal to the specified num_tick. Is progress counter. """ - return gs.current_tick / num_tick + return norm(gs.current_tick / num_tick) -@define_predicate def CanSeeTile(gs: GameState, subject: Group = constraint.TEAM_GROUPS, tile_type: type[Material]= constraint.MATERIAL_CONSTRAINT): @@ -37,21 +37,18 @@ def CanSeeTile(gs: GameState, """ return any(tile_type.index in t for t in subject.obs.tile.material_id) -@define_predicate def StayAlive(gs: GameState, subject: Group = constraint.TEAM_GROUPS): """True if all subjects are alive. """ return count(subject.health > 0) == len(subject) -@define_predicate def AllDead(gs: GameState, subject: Group = constraint.TEAM_GROUPS): """True if all subjects are dead. """ - return 1.0 - count(subject.health) / len(subject) + return norm(1.0 - count(subject.health) / len(subject)) -@define_predicate def OccupyTile(gs: GameState, subject: Group, row: int = constraint.COORDINATE_CONSTRAINT, @@ -60,7 +57,6 @@ def OccupyTile(gs: GameState, """ return np.any((subject.row == row) & (subject.col == col)) -@define_predicate def AllMembersWithinRange(gs: GameState, subject: Group = constraint.TEAM_GROUPS, dist: int = constraint.COORDINATE_CONSTRAINT): @@ -71,9 +67,8 @@ def AllMembersWithinRange(gs: GameState, subject.col.max()-subject.col.min()) if current_dist <= 0: return 1.0 - return dist / current_dist + return norm(dist / current_dist) -@define_predicate def CanSeeAgent(gs: GameState, subject: Group = constraint.TEAM_GROUPS, target: int = constraint.AGENT_NUMBER_CONSTRAINT): @@ -81,15 +76,13 @@ def CanSeeAgent(gs: GameState, """ return any(target in e.ids for e in subject.obs.entities) -@define_predicate def CanSeeGroup(gs: GameState, subject: Group = constraint.TEAM_GROUPS, - target: Group = constraint.TEAM_GROUPS): + target: Iterable[int] = constraint.AgentListConstraint): """ Returns True if subject can see any of target """ - return OR(*(CanSeeAgent(subject, agent) for agent in target.agents)) + return any(CanSeeAgent(gs, subject, agent) for agent in target) -@define_predicate def DistanceTraveled(gs: GameState, subject: Group = constraint.TEAM_GROUPS, dist: int = constraint.ScalarConstraint()): @@ -101,9 +94,8 @@ def DistanceTraveled(gs: GameState, r = subject.row c = subject.col dists = utils.linf(list(zip(r,c)),[gs.spawn_pos[id_] for id_ in subject.entity.id]) - return dists.sum() / dist + return norm(dists.sum() / dist) -@define_predicate def AttainSkill(gs: GameState, subject: Group = constraint.TEAM_GROUPS, skill: Skill = constraint.SKILL_CONSTRAINT, @@ -113,74 +105,83 @@ def AttainSkill(gs: GameState, is greather than or equal to num_agent """ skill_level = getattr(subject,skill.__name__.lower() + '_level') - return sum(skill_level >= level) / num_agent + return norm(sum(skill_level >= level) / num_agent) -@define_predicate def CountEvent(gs: GameState, subject: Group = constraint.TEAM_GROUPS, event: str = constraint.EVENTCODE_CONSTRAINT, - N: int = constraint.ScalarConstraint()): + N: int = constraint.EVENT_NUMBER_CONSTRAINT): """True if the number of events occured in subject corresponding to event >= N """ - return len(getattr(subject.event, event)) / N + return norm(len(getattr(subject.event, event)) / N) -@define_predicate def ScoreHit(gs: GameState, subject: Group = constraint.TEAM_GROUPS, combat_style: type[Skill] = constraint.COMBAT_SKILL_CONSTRAINT, - N: int = constraint.ScalarConstraint()): + N: int = constraint.EVENT_NUMBER_CONSTRAINT): """True if the number of hits scored in style combat_style >= count """ hits = subject.event.SCORE_HIT.combat_style == combat_style.SKILL_ID - return count(hits) / N + return norm(count(hits) / N) + +def DefeatEntity(gs: GameState, + subject: Group = constraint.TEAM_GROUPS, + agent_type: str = constraint.AGENT_TYPE_CONSTRAINT, + level: int = constraint.PROGRESSION_CONSTRAINT, + num_agent: int = constraint.AGENT_NUMBER_CONSTRAINT): + """True if the number of agents (agent_type, >= level) defeated + is greater than or equal to num_agent + """ + # NOTE: there is no way to tell if an agent is a teammate or an enemy + # so agents can get rewarded for killing their own teammates + defeated_type = subject.event.PLAYER_KILL.target_ent > 0 if agent_type == 'player' \ + else subject.event.PLAYER_KILL.target_ent < 0 + defeated = defeated_type & (subject.event.PLAYER_KILL.level >= level) + if num_agent > 0: + return norm(count(defeated) / num_agent) + return 1.0 -@define_predicate def HoardGold(gs: GameState, subject: Group = constraint.TEAM_GROUPS, - amount: int = constraint.ScalarConstraint()): + amount: int = constraint.GOLD_CONSTRAINT): """True iff the summed gold of all teammate is greater than or equal to amount. """ - return subject.gold.sum() / amount + return norm(subject.gold.sum() / amount) -@define_predicate def EarnGold(gs: GameState, subject: Group = constraint.TEAM_GROUPS, - amount: int = constraint.ScalarConstraint()): + amount: int = constraint.GOLD_CONSTRAINT): """ True if the total amount of gold earned is greater than or equal to amount. """ - return subject.event.EARN_GOLD.gold.sum() / amount + return norm(subject.event.EARN_GOLD.gold.sum() / amount) -@define_predicate def SpendGold(gs: GameState, subject: Group = constraint.TEAM_GROUPS, - amount: int = constraint.ScalarConstraint()): + amount: int = constraint.GOLD_CONSTRAINT): """ True if the total amount of gold spent is greater than or equal to amount. """ - return subject.event.BUY_ITEM.gold.sum() / amount + return norm(subject.event.BUY_ITEM.gold.sum() / amount) -@define_predicate def MakeProfit(gs: GameState, subject: Group = constraint.TEAM_GROUPS, - amount: int = constraint.ScalarConstraint()): + amount: int = constraint.GOLD_CONSTRAINT): """ True if the total amount of gold earned-spent is greater than or equal to amount. """ profits = subject.event.EARN_GOLD.gold.sum() costs = subject.event.BUY_ITEM.gold.sum() - return (profits-costs) / amount + return norm((profits-costs) / amount) -@define_predicate def InventorySpaceGE(gs: GameState, subject: Group = constraint.TEAM_GROUPS, - space: int = constraint.ScalarConstraint()): + space: int = constraint.INVENTORY_CONSTRAINT): """True if the inventory space of every subjects is greater than or equal to the space. Otherwise false. """ max_space = gs.config.ITEM_INVENTORY_CAPACITY return all(max_space - inv.len >= space for inv in subject.obs.inventory) -@define_predicate def OwnItem(gs: GameState, subject: Group = constraint.TEAM_GROUPS, item: type[Item] = constraint.ITEM_CONSTRAINT, @@ -191,12 +192,11 @@ def OwnItem(gs: GameState, """ owned = (subject.item.type_id == item.ITEM_TYPE_ID) & \ (subject.item.level >= level) - return sum(subject.item.quantity[owned]) / quantity + return norm(sum(subject.item.quantity[owned]) / quantity) -@define_predicate def EquipItem(gs: GameState, subject: Group = constraint.TEAM_GROUPS, - item: type[Item] = constraint.ITEM_CONSTRAINT, + item: type[Item] = constraint.EQUIPABLE_CONSTRAINT, level: int = constraint.PROGRESSION_CONSTRAINT, num_agent: int = constraint.AGENT_NUMBER_CONSTRAINT): """True if the number of agents that equip the item (_item_type, >=_level) @@ -206,10 +206,9 @@ def EquipItem(gs: GameState, (subject.item.level >= level) & \ (subject.item.equipped > 0) if num_agent > 0: - return count(equipped) / num_agent + return norm(count(equipped) / num_agent) return 1.0 -@define_predicate def FullyArmed(gs: GameState, subject: Group = constraint.TEAM_GROUPS, combat_style: type[Skill] = constraint.COMBAT_SKILL_CONSTRAINT, @@ -234,53 +233,49 @@ def FullyArmed(gs: GameState, _, equipment_numbers = np.unique(subject.item.owner_id[lvl_flt & type_flt], return_counts=True) if num_agent > 0: - return (equipment_numbers >= len(item_ids.items())).sum() / num_agent + return norm((equipment_numbers >= len(item_ids.items())).sum() / num_agent) return 1.0 -@define_predicate def ConsumeItem(gs: GameState, subject: Group = constraint.TEAM_GROUPS, item: type[Item] = constraint.CONSUMABLE_CONSTRAINT, level: int = constraint.PROGRESSION_CONSTRAINT, - quantity: int = constraint.ScalarConstraint()): + quantity: int = constraint.EVENT_NUMBER_CONSTRAINT): """True if total quantity consumed of item type above level is >= quantity """ type_flt = subject.event.CONSUME_ITEM.type == item.ITEM_TYPE_ID lvl_flt = subject.event.CONSUME_ITEM.level >= level - return subject.event.CONSUME_ITEM.number[type_flt & lvl_flt].sum() / quantity + return norm(subject.event.CONSUME_ITEM.number[type_flt & lvl_flt].sum() / quantity) -@define_predicate def HarvestItem(gs: GameState, subject: Group = constraint.TEAM_GROUPS, - item: type[Item] = constraint.ITEM_CONSTRAINT, + item: type[Item] = constraint.HARVEST_CONSTRAINT, level: int = constraint.PROGRESSION_CONSTRAINT, - quantity: int = constraint.ScalarConstraint()): + quantity: int = constraint.EVENT_NUMBER_CONSTRAINT): """True if total quantity harvested of item type above level is >= quantity """ type_flt = subject.event.HARVEST_ITEM.type == item.ITEM_TYPE_ID lvl_flt = subject.event.HARVEST_ITEM.level >= level - return subject.event.HARVEST_ITEM.number[type_flt & lvl_flt].sum() / quantity + return norm(subject.event.HARVEST_ITEM.number[type_flt & lvl_flt].sum() / quantity) -@define_predicate def ListItem(gs: GameState, subject: Group = constraint.TEAM_GROUPS, item: type[Item] = constraint.ITEM_CONSTRAINT, level: int = constraint.PROGRESSION_CONSTRAINT, - quantity: int = constraint.ScalarConstraint()): + quantity: int = constraint.EVENT_NUMBER_CONSTRAINT): """True if total quantity listed of item type above level is >= quantity """ type_flt = subject.event.LIST_ITEM.type == item.ITEM_TYPE_ID lvl_flt = subject.event.LIST_ITEM.level >= level - return subject.event.LIST_ITEM.number[type_flt & lvl_flt].sum() / quantity + return norm(subject.event.LIST_ITEM.number[type_flt & lvl_flt].sum() / quantity) -@define_predicate def BuyItem(gs: GameState, subject: Group = constraint.TEAM_GROUPS, item: type[Item] = constraint.ITEM_CONSTRAINT, level: int = constraint.PROGRESSION_CONSTRAINT, - quantity: int = constraint.ScalarConstraint()): + quantity: int = constraint.EVENT_NUMBER_CONSTRAINT): """True if total quantity purchased of item type above level is >= quantity """ type_flt = subject.event.BUY_ITEM.type == item.ITEM_TYPE_ID lvl_flt = subject.event.BUY_ITEM.level >= level - return subject.event.BUY_ITEM.number[type_flt & lvl_flt].sum() / quantity + return norm(subject.event.BUY_ITEM.number[type_flt & lvl_flt].sum() / quantity) diff --git a/nmmo/task/constraint.py b/nmmo/task/constraint.py index 5f2b48fc..d19686bc 100644 --- a/nmmo/task/constraint.py +++ b/nmmo/task/constraint.py @@ -2,13 +2,13 @@ import random from numbers import Number -from typing import Union, Callable +from typing import Union, Callable, Dict from abc import ABC, abstractmethod from nmmo.systems import skill, item from nmmo.lib import material +from nmmo.lib.log import EventCode from nmmo.core.config import Config -from nmmo.task.team_helper import TeamHelper class InvalidConstraint(Exception): pass @@ -43,11 +43,18 @@ def sample(self, config: Config): def __str__(self): return self.__class__.__name__ +# This is a dummy function for GroupConstraint +# NOTE: config does not have team info +def sample_one_big_team(config): + from nmmo.task.group import Group + team = list(range(1, config.PLAYER_N+1)) + return [Group(team, 'All')] + class GroupConstraint(Constraint): """ Ensures that all agents of a group exist in a config """ def __init__(self, - sample_fn = lambda c: TeamHelper.generate_from_config(c).all_teams, + sample_fn = sample_one_big_team, systems = None): """ Params @@ -68,6 +75,23 @@ def check(self, config, value): def sample(self, config): return random.choice(self._sample_fn(config)) + def sample_from_teams(self, teams: Dict[int, Dict]): + from nmmo.task.group import Group + team_id = random.choice(list(teams.keys())) + return Group(teams[team_id], str(team_id)) + +class AgentListConstraint(Constraint): + """ Ensures that all agents of the list exist in a config + """ + def check(self, config, value): + for agent in value: + if agent > config.PLAYER_N: + return False + return True + + def sample(self, config): + return None + class ScalarConstraint(Constraint): def __init__(self, low: Union[Callable, Number] = 0, @@ -97,7 +121,7 @@ def sample(self, config): class DiscreteConstraint(Constraint): def __init__(self, space, systems=None): super().__init__(systems) - self._space = space + self._space = set(space) def check(self, config: Config, value): if not super().check(config,value): @@ -105,37 +129,34 @@ def check(self, config: Config, value): return value in self._space def sample(self, config: Config): + # NOTE: this does NOT need to be deterministic return random.choice(self._space) # Group Constraints TEAM_GROUPS = GroupConstraint() -INDIVIDUAL_GROUPS=GroupConstraint(sample_fn=lambda c:TeamHelper.generate_from_config(c).all_agents) +INDIVIDUAL_GROUPS=GroupConstraint() +AGENT_LIST_CONSTRAINT = AgentListConstraint() -# System Constraints +# Tile Constraints MATERIAL_CONSTRAINT = DiscreteConstraint(space=list(material.All.materials), systems=['TERRAIN_SYSTEM_ENABLED', 'RESOURCE_SYSTEM_ENABLED']) HABITABLE_CONSTRAINT = DiscreteConstraint(space=list(material.Habitable.materials), systems=['TERRAIN_SYSTEM_ENABLED']) + +# Event Constraints +event_names = [k for k, v in EventCode.__dict__.items() if isinstance(v,int)] +EVENTCODE_CONSTRAINT = DiscreteConstraint(space=event_names) + +# Skill Constraints combat_skills = [skill.Melee, skill.Mage, skill.Range] -basic_skills = [skill.Water, skill.Food] harvest_skills = [skill.Fishing, skill.Herbalism, skill.Prospecting, skill.Alchemy, skill.Carving] -SKILL_CONSTRAINT = DiscreteConstraint(space=combat_skills+basic_skills+harvest_skills, +SKILL_CONSTRAINT = DiscreteConstraint(space=combat_skills+harvest_skills, systems=['PROFESSION_SYSTEM_ENABLED']) COMBAT_SKILL_CONSTRAINT = DiscreteConstraint(space=combat_skills, systems=['PROFESSION_SYSTEM_ENABLED']) -EVENTCODE_CONSTRAINT = DiscreteConstraint(space=['EAT_FOOD', - 'DRINK_WATER', - 'SCORE_HIT', - 'PLAYER_KILL', - 'CONSUME_ITEM', - 'GIVE_ITEM', - 'DESTROY_ITEM', - 'HARVEST_ITEM', - 'GIVE_GOLD', - 'LIST_ITEM', - 'EARN_GOLD', - 'BUY_ITEM']) + +# Item Constraints armour = [item.Hat, item.Top, item.Bottom] weapons = [item.Spear, item.Bow, item.Wand] tools = [item.Axe, item.Gloves, item.Rod, item.Pickaxe, item.Chisel] @@ -143,10 +164,20 @@ def sample(self, config: Config): consumables = [item.Potion, item.Ration] ITEM_CONSTRAINT = DiscreteConstraint(space=armour+weapons+tools+ammunition+consumables, systems=['ITEM_SYSTEM_ENABLED']) +EQUIPABLE_CONSTRAINT = DiscreteConstraint(space=armour+weapons+tools+ammunition, + systems=['ITEM_SYSTEM_ENABLED']) CONSUMABLE_CONSTRAINT = DiscreteConstraint(space=consumables, systems=['ITEM_SYSTEM_ENABLED']) +HARVEST_CONSTRAINT = DiscreteConstraint(space=weapons+ammunition+consumables, + systems=['ITEM_SYSTEM_ENABLED']) + # Config Constraints COORDINATE_CONSTRAINT = ScalarConstraint(high = lambda c: c.MAP_CENTER) PROGRESSION_CONSTRAINT = ScalarConstraint(high = lambda c: c.PROGRESSION_LEVEL_MAX+1) INVENTORY_CONSTRAINT = ScalarConstraint(high=lambda c: c.ITEM_INVENTORY_CAPACITY+1) AGENT_NUMBER_CONSTRAINT = ScalarConstraint(low = 1, high = lambda c: c.PLAYER_N+1) + +# Arbitrary Constraints +EVENT_NUMBER_CONSTRAINT = ScalarConstraint(low = 1, high = 110) +GOLD_CONSTRAINT = ScalarConstraint(low = 1, high = 1000) +AGENT_TYPE_CONSTRAINT = DiscreteConstraint(space=['npc','player']) diff --git a/nmmo/task/game_state.py b/nmmo/task/game_state.py index 583d0797..fb57ba4c 100644 --- a/nmmo/task/game_state.py +++ b/nmmo/task/game_state.py @@ -43,7 +43,7 @@ class GameState: def entity_or_none(self, ent_id): flt_ent = self.entity_data[:, EntityAttr['id']] == ent_id if np.any(flt_ent): - return EntityAttr.parse_array(self.entity_data[flt_ent][0]) + return EntityState.parse_array(self.entity_data[flt_ent][0]) return None diff --git a/nmmo/task/group.py b/nmmo/task/group.py index 29aa1b0b..442778c1 100644 --- a/nmmo/task/group.py +++ b/nmmo/task/group.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import Dict, Iterable, TYPE_CHECKING +from typing import Dict, Union, Iterable, TYPE_CHECKING from collections import OrderedDict from collections.abc import Set, Sequence @@ -10,9 +10,11 @@ class Group(Sequence, Set): ''' An immutable, ordered, unique group of agents involved in a task ''' def __init__(self, - agents: Iterable[int], + agents: Union(Iterable[int], int), name: str=None): + if isinstance(agents, int): + agents = (agents,) assert len(agents) > 0, "Team must have at least one agent" self.name = name if name else f"Agent({','.join([str(e) for e in agents])})" # Remove duplicates diff --git a/nmmo/task/predicate_api.py b/nmmo/task/predicate_api.py new file mode 100644 index 00000000..e71f2cc1 --- /dev/null +++ b/nmmo/task/predicate_api.py @@ -0,0 +1,299 @@ +from __future__ import annotations +from typing import Callable, List, Optional, Tuple, Union, Iterable, TYPE_CHECKING +from types import FunctionType +from abc import ABC, abstractmethod +import inspect +from numbers import Real + +from nmmo.core.config import Config +from nmmo.task.group import Group, union +from nmmo.task.game_state import GameState +from nmmo.task.constraint import Constraint, InvalidConstraint, GroupConstraint + +if TYPE_CHECKING: + from nmmo.task.task_api import Task + +class InvalidPredicateDefinition(Exception): + pass + +class Predicate(ABC): + """ A mapping from a game state to bounded [0, 1] float + """ + def __init__(self, + subject: Group, + *args, + constraints: Optional[List[Tuple[str,Optional[Constraint]]]] = None, + **kwargs): + self.name = self._make_name(self.__class__.__name__, args, kwargs) + + self._groups: List[Group] = [x for x in list(args) + list(kwargs.values()) + if isinstance(x, Group)] + + self._groups.append(subject) + + self._args = args + self._kwargs = kwargs + self._constraints = constraints + self._config = None + self._subject = subject + + def __call__(self, gs: GameState) -> float: + """ Calculates score + + Params: + gs: GameState + + Returns: + progress: float bounded between [0, 1], 1 is considered to be true + """ + if not self._config == gs.config: + # TODO(mark) should we make this explicitly called by environment + self._reset(gs.config) + # Update views + for group in self._groups: + group.update(gs) + # Calculate score + # cache = gs.cache_result + if self.name in gs.cache_result: + progress = gs.cache_result[self.name] + else: + progress = max(min(self._evaluate(gs)*1.0,1.0),0.0) + gs.cache_result[self.name] = progress + return progress + + def _reset(self, config: Config): + self._config = config + if not self.check(self._config): + raise InvalidConstraint() + + def check(self, config: Config): + """ Checks whether the predicate is valid + + A satisfiable predicate "makes sense" given a config + ie. Not trying to reach target off the map + """ + if not GroupConstraint().check(config, self._subject): + return False + for i, (name, constraint) in enumerate(self._constraints): + if constraint is None: + continue + if i < len(self._args): + if not constraint.check(config, self._args[i]): + return False + elif not constraint.check(config, self._kwargs[name]): + return False + return True + + def sample(self, config: Config, **overload): + """ Samples a concrete instance of a given task. + + Allows overloading of previous parameters. + """ + # Sample Constraint + nargs = [arg.sample(config) if isinstance(arg, Constraint) else arg + for arg in self._args] + nkwargs = {k : v.sample(config) if isinstance(v, Constraint) else v + for k,v in self._kwargs.items()} + for i, (name, _) in enumerate(self._constraints): + if i < len(nargs): + if name in nkwargs: + raise InvalidPredicateDefinition("Constraints should match arguments.") + nkwargs[name] = nargs[i] + else: + break + + for k, v in overload.items(): + nkwargs[k] = v + # Result + return self.__class__(**nkwargs) + + @abstractmethod + def _evaluate(self, gs: GameState) -> float: + """ A mapping from a game state to the desirability/progress of that state. + __call__() will cap its value to [0, 1] + """ + raise NotImplementedError + + def _make_name(self, class_name, args, kwargs) -> str: + name = [class_name] + \ + list(map(arg_to_string, args)) + \ + [f"{arg_to_string(key)}:{arg_to_string(arg)}" for key, arg in kwargs.items()] + name = "("+'_'.join(name).replace(' ', '')+")" + return name + + def __str__(self): + return self.name + + @property + def subject(self): + return self._subject + + def create_task(self, task_cls: Task=None, + assignee: Union[Iterable[int], int]=None, + reward_multiplier=1.0) -> Task: + """ Creates a task from this predicate""" + if task_cls is None: + from nmmo.task.task_api import Task + task_cls = Task + + if assignee is None: + # the new task is assigned to this predicate's subject + assignee = self._subject.agents + + return task_cls(eval_fn=self, assignee=assignee, reward_multiplier=reward_multiplier) + + def __and__(self, other): + return AND(self, other) + def __or__(self, other): + return OR(self, other) + def __invert__(self): + return NOT(self) + def __add__(self, other): + return ADD(self, other) + def __radd__(self, other): + return ADD(self, other) + def __sub__(self, other): + return SUB(self, other) + def __rsub__(self, other): + return SUB(self, other) + def __mul__(self, other): + return MUL(self, other) + def __rmul__(self, other): + return MUL(self, other) + +# _make_name helper functions +def arg_to_string(arg): + if isinstance(arg, (type, FunctionType)): # class or function + return arg.__name__ + if arg is None: + return 'Any' + return str(arg) + +################################################ + +def make_predicate(fn: Callable) -> type[Predicate]: + """ Syntactic sugar API for defining predicates from function + """ + signature = inspect.signature(fn) + for i, param in enumerate(signature.parameters.values()): + if i == 0 and param.name != 'gs': + raise InvalidPredicateDefinition('First parameter must be gs: GameState') + if i == 1 and (param.name != 'subject'): + raise InvalidPredicateDefinition("Second parameter must be subject: Group") + + class FunctionPredicate(Predicate): + def __init__(self, *args, **kwargs) -> None: + constraints = [] + self._signature = signature + args = list(args) + for i, param in enumerate(self._signature.parameters.values()): + if i == 0: + continue + # Calculate list of constraints + if isinstance(param.default, Constraint): + constraints.append((param.name,param.default)) + else: + constraints.append((param.name,None)) + # Insert default values from function definition + if not param.name in kwargs and i-1 >= len(args): + if param.default == inspect.Parameter.empty: + args.append(param.default) + else: + kwargs[param.name] = param.default + super().__init__(*args, **kwargs, constraints=constraints) + self._args = args + self._kwargs = kwargs + self.name = self._make_name(fn.__name__, args, kwargs) + def _evaluate(self, gs: GameState) -> float: + # pylint: disable=redefined-builtin, unused-variable + __doc = fn.__doc__ + result = fn(gs, *self._args, **self._kwargs) + if isinstance(result, Predicate): + return result(gs) + return result + + return FunctionPredicate + + +################################################ +class PredicateOperator(Predicate): + def __init__(self, n, *predicates: Union[Predicate, Real], subject: Group=None): + if not n(len(predicates)): + raise InvalidPredicateDefinition(f"Need {n} arguments") + predicates = list(predicates) + self._subject_argument = subject + if subject is None: + try: + subject = union(*[p.subject + for p in filter(lambda p: isinstance(p, Predicate), predicates)]) + except AttributeError: + subject = GroupConstraint() + super().__init__(subject, *predicates) + + for i, p in enumerate(predicates): + if isinstance(p, Real): + predicates[i] = lambda _,v=predicates[i] : v + self._predicates = predicates + + def check(self, config: Config) -> bool: + return all((p.check(config) if isinstance(p, Predicate) + else True for p in self._predicates)) + + def sample(self, config: Config, cls: type[PredicateOperator], **kwargs): + subject = self._subject_argument if 'subject' not in kwargs else kwargs['subject'] + predicates = [p.sample(config, **kwargs) if isinstance(p, Predicate) + else p(None) for p in self._predicates] + return cls(*predicates, subject=subject) + +class OR(PredicateOperator, Predicate): + def __init__(self, *predicates: Predicate, subject: Group=None): + super().__init__(lambda n: n>0, *predicates, subject=subject) + def _evaluate(self, gs: GameState) -> float: + # using max as OR for the [0,1] float + return max(p(gs) for p in self._predicates) + def sample(self, config: Config, **kwargs): + return super().sample(config, OR, **kwargs) + +class AND(PredicateOperator, Predicate): + def __init__(self, *predicates: Predicate, subject: Group=None): + super().__init__(lambda n: n>0, *predicates, subject=subject) + def _evaluate(self, gs: GameState) -> float: + # using min as AND for the [0,1] float + return min(p(gs) for p in self._predicates) + def sample(self, config: Config, **kwargs): + return super().sample(config, AND, **kwargs) + +class NOT(PredicateOperator, Predicate): + def __init__(self, predicate: Predicate, subject: Group=None): + super().__init__(lambda n: n==1, predicate, subject=subject) + def _evaluate(self, gs: GameState) -> float: + return 1.0 - self._predicates[0](gs) + def sample(self, config: Config, **kwargs): + return super().sample(config, NOT, **kwargs) + +class ADD(PredicateOperator, Predicate): + def __init__(self, *predicate: Union[Predicate, Real], subject: Group=None): + super().__init__(lambda n: n>0, *predicate, subject=subject) + def _evaluate(self, gs: GameState) -> float: + return max(min(sum(p(gs) for p in self._predicates),1.0),0.0) + def sample(self, config: Config, **kwargs): + return super().sample(config, ADD, **kwargs) + +class SUB(PredicateOperator, Predicate): + def __init__(self, p: Predicate, q: Union[Predicate, Real], subject: Group=None): + super().__init__(lambda n: n==2, p,q, subject=subject) + def _evaluate(self, gs: GameState) -> float: + return max(min(self._predicates[0](gs)-self._predicates[1](gs),1.0),0.0) + def sample(self, config: Config, **kwargs): + return super().sample(config, SUB, **kwargs) + +class MUL(PredicateOperator, Predicate): + def __init__(self, *predicate: Union[Predicate, Real], subject: Group=None): + super().__init__(lambda n: n>0, *predicate, subject=subject) + def _evaluate(self, gs: GameState) -> float: + result = 1.0 + for p in self._predicates: + result = result * p(gs) + return max(min(result,1.0),0.0) + def sample(self, config: Config, **kwargs): + return super().sample(config, MUL, **kwargs) diff --git a/nmmo/task/scenario.py b/nmmo/task/scenario.py deleted file mode 100644 index 7efeb210..00000000 --- a/nmmo/task/scenario.py +++ /dev/null @@ -1,76 +0,0 @@ -from __future__ import annotations - -import copy -from typing import Callable, Union, Iterable, \ - Optional, List, Tuple -from nmmo.core.config import Config -from nmmo.task.group import Group -from nmmo.task.team_helper import TeamHelper -from nmmo.task.task_api import Task, Repeat -from nmmo.task.base_predicates import StayAlive - -class Scenario: - ''' Utility class to aid in defining common tasks - ''' - def __init__(self, config: Config): - config = copy.deepcopy(config) - self.team_helper = TeamHelper.generate_from_config(config) - self.config = config - self._tasks: List[Task] = [] - - def add_task(self, task: Task): - self._tasks.append(task) - - def add_tasks(self, - tasks: Union[Task, - Iterable[Task], - Callable[[Group], Task]], - groups: Optional[Union[str,Iterable[Group]]] = 'teams') -> None: - # pylint: disable=unnecessary-lambda-assignment - """ Utility function to define symmetric tasks - - Params: - - tasks: - Iterable[Task]: - For each Task in the iterable, add to scenario. - Callable[[Group], Task]: - A function taking in a group and return a task. - The result from applying this function to "groups" is added to - the scenario. - Task: - Mapped to Callable by overriding subject - - groups: - Foreach group in groups, add a task. - """ - # Tasks - if isinstance(tasks, Iterable): - for task in tasks: - self.add_task(task) - return - - # Functional Syntax - # Tasks - if isinstance(tasks, Task): - task_generator = lambda group: tasks.sample(config=self.config, subject=group) - else: - task_generator = tasks - # Groups - if isinstance(groups, str): - assert(groups in ['agents','teams']) - if groups == 'agents': - groups = self.team_helper.all_agents - elif groups == 'teams': - groups = self.team_helper.all_teams - # Create - self.add_tasks([task_generator(group) for group in groups]) - - @property - def tasks(self) -> List[Task]: - return self._tasks - -def default_task(agents) -> List[Tuple[Task, float]]: - '''Generates the default reward on env.init - ''' - return [Repeat(StayAlive(Group([agent]))) for agent in agents] diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index df18db83..8bc5d587 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -1,141 +1,85 @@ -from __future__ import annotations -from typing import Callable, Dict, List, Optional, Tuple, Union -from abc import ABC, abstractmethod -import inspect -from numbers import Real -import math +# pylint: disable=unused-import +from typing import Callable, Iterable, Dict, List, Union, Tuple +from types import FunctionType +from abc import ABC -from nmmo.core.config import Config -from nmmo.task.group import Group, union -from nmmo.task.game_state import GameState -from nmmo.task.constraint import Constraint, InvalidConstraint, GroupConstraint - -class InvalidTaskDefinition(Exception): - pass +from nmmo.task.group import Group +from nmmo.task.predicate_api import Predicate, make_predicate, arg_to_string +from nmmo.task import base_predicates as bp +from nmmo.lib.team_helper import TeamHelper class Task(ABC): - """ A task is used to calculate rewards for agents in "assignee" + """ A task is used to calculate rewards for agents in assignee + based on the predicate and game state """ def __init__(self, - subject: Group, - *args, - constraints: Optional[List[Tuple[str,Optional[Constraint]]]] = None, - **kwargs): - self.name = self._make_name(self.__class__.__name__, args, kwargs) - - def is_group(x): - return isinstance(x, Group) - self._groups: List[Group] = list(filter(is_group, args)) - self._groups = self._groups + list(filter(is_group, kwargs.values())) - self._groups.append(subject) - - self._args = args - self._kwargs = kwargs - self._constraints = constraints - self._config = None - self._score = 0.0 - self._subject = subject + eval_fn: Callable, + assignee: Union[Iterable[int], int], + reward_multiplier = 1.0): + if isinstance(assignee, int): + self._assignee = (assignee,) + else: + assert len(assignee) > 0, "Assignee cannot be empty" + self._assignee = tuple(set(assignee)) # dedup + self._eval_fn = eval_fn + self._progress = 0.0 + self._completed = False + self._reward_multiplier = reward_multiplier - def compute_rewards(self, gs) -> Tuple[Dict[int, float], Dict[int, Dict]]: - """ Environment facing API + self.name = self._make_name(self.__class__.__name__, + eval_fn=eval_fn, assignee=self._assignee) - Returns rewards and infos for all agents in subject - """ - reward = self(gs) - self._score - self._score += reward - rewards = {int(ent_id): reward for ent_id in self._subject} - infos = {int(ent_id): {self.name: self._score} - for ent_id in self._subject} - return rewards, infos + def reset(self): + self._progress = 0.0 + self._completed = False - def __call__(self, gs: GameState) -> float: - """ Calculates score + @property + def assignee(self) -> Tuple[int]: + return self._assignee - Params: - gs: GameState + @property + def completed(self) -> bool: + return self._completed - Returns: - score - """ - if not self._config == gs.config: - # TODO(mark) should we make this explicitly called by environment - self._reset(gs.config) - # Update views - for group in self._groups: - group.update(gs) - # Calculate score - cache = gs.cache_result - if self.name in cache: - score = cache[self.name] - else: - score = self._evaluate(gs) - cache[self.name] = score - # Calculate score - return score - - def _reset(self, config: Config): - self._score = 0.0 - self._config = config - if not self.check(self._config): - raise InvalidConstraint() - - def check(self, config: Config): - """ Checks whether the task is valid - - A satisfiable task "makes sense" given a config - ie. Not trying to reach target off the map - """ - if not GroupConstraint().check(config, self._subject): - return False - for i, (name, constraint) in enumerate(self._constraints): - if constraint is None: - continue - if i < len(self._args): - if not constraint.check(config, self._args[i]): - return False - elif not constraint.check(config, self._kwargs[name]): - return False - return True - - def sample(self, config: Config, **overload): - """ Samples a concrete instance of a given task. - - Allows overloading of previous parameters. - """ - # Sample Constraint - nargs = [arg.sample(config) if isinstance(arg, Constraint) else arg - for arg in self._args] - nkwargs = {k : v.sample(config) if isinstance(v, Constraint) else v - for k,v in self._kwargs.items()} - for i, (name, _) in enumerate(self._constraints): - if i < len(nargs): - if name in nkwargs: - raise InvalidTaskDefinition("Constraints should match arguments.") - nkwargs[name] = nargs[i] - else: - break + @property + def reward_multiplier(self) -> float: + return self._reward_multiplier - for k, v in overload.items(): - nkwargs[k] = v - # Result - return self.__class__(**nkwargs) + def _map_progress_to_reward(self, gs) -> float: + """ The default reward is the diff between the old and new progress. + Once the task is completed, no more reward is provided. - @abstractmethod - def _evaluate(self, gs: GameState) -> float: - """ A mapping from a game state to the desirability of that state. + Override this function to create a custom reward function """ - raise NotImplementedError + if self._completed: + return 0.0 + + new_progress = max(min(self._eval_fn(gs)*1.0,1.0),0.0) + diff = new_progress - self._progress + self._progress = new_progress + if self._progress >= 1: + self._completed = True - def _make_name(self, class_name, args, kwargs) -> str: - def arg_to_string(arg): - if isinstance(arg, type): # class - return arg.__name__ - if arg is None: - return 'Any' - return str(arg) + return diff + def compute_rewards(self, gs) -> Tuple[Dict[int, float], Dict[int, Dict]]: + """ Environment facing API + + Returns rewards and infos for all agents in subject + """ + reward = self._map_progress_to_reward(gs) * self._reward_multiplier + rewards = {int(ent_id): reward for ent_id in self._assignee} + infos = {int(ent_id): {'reward': reward, + 'progress': self._progress, + 'completed': self._completed} + for ent_id in self._assignee} + + # NOTE: tasks do not know whether assignee agents are alive or dead + # so the Env must check it before filling in rewards and infos + return rewards, infos + + def _make_name(self, class_name, **kwargs) -> str: name = [class_name] + \ - list(map(arg_to_string, args)) + \ [f"{arg_to_string(key)}:{arg_to_string(arg)}" for key, arg in kwargs.items()] name = "("+'_'.join(name).replace(' ', '')+")" return name @@ -143,233 +87,110 @@ def arg_to_string(arg): def __str__(self): return self.name - @property - def subject(self): - return self._subject - - def __add__(self, other): - return ADD(self, other) - def __radd__(self, other): - return ADD(self, other) - def __mul__(self, other): - return MUL(self, other) - def __rmul__(self, other): - return MUL(self, other) - def __and__(self, other): - return AND(self, other) - def __or__(self, other): - return OR(self, other) - def __invert__(self): - return NOT(self) - -class Predicate(Task): - """ A task with evaluate restricted to boolean values. - - True = 1.0 - False = 0.0 +class OngoingTask(Task): + def _map_progress_to_reward(self, gs) -> float: + """Keep returning the progress reward after the task is completed. + However, this task tracks the completion status in the same manner. + """ + self._progress = max(min(self._eval_fn(gs)*1.0,1.0),0.0) + if self._progress >= 1: + self._completed = True + return self._progress + + +###################################################################### + +# The same task is assigned each agent in agent_list individually +# with the agent as the predicate subject and task assignee +def make_same_task(predicate: Union[Predicate, Callable], + agent_list: Iterable[int], + task_cls = Task, **kwargs) -> List[Task]: + # if a function is provided, make it a predicate class + if isinstance(predicate, FunctionType): + predicate = make_predicate(predicate) + + return [predicate(Group(agent_id),**kwargs).create_task(task_cls=task_cls) + for agent_id in agent_list] + +def nmmo_default_task(agent_list: Iterable[int], test_mode=None) -> List[Task]: + # (almost) no overhead in env._compute_rewards() + if test_mode == 'no_task': + return [] + + # eval function on Predicate class, but does not use Group during eval + if test_mode == 'dummy_eval_fn': + # pylint: disable=unused-argument + return make_same_task(lambda gs, subject: True, agent_list, task_cls=OngoingTask) + + # the default is to use the predicate class + return make_same_task(bp.StayAlive, agent_list, task_cls=OngoingTask) + +###################################################################### +# TODO: a lot to improve below + +REWARD_TO = ['agent', 'team'] +VALID_TARGET = ['left_team', 'left_team_leader', + 'right_team', 'right_team_leader', + 'my_team_leader'] + +def make_team_tasks(teams, task_spec) -> List[Task]: """ - def __call__(self, gs: GameState) -> float: - if not self._config == gs.config: - self._reset(gs.config) - # Update views - for group in self._groups: - group.update(gs) - # Calculate score - cache = gs.cache_result - if self.name in cache: - score = cache[self.name] - else: - score = max(min(self._evaluate(gs)*1,1.0),0.0) - cache[self.name] = score - # Calculate score - return score - - def __and__(self, other): - return PAND(self, other) - def __or__(self, other): - return POR(self, other) - def __invert__(self): - return PNOT(self) - def __rshift__(self, other): - return IMPLY(self, other) - -################################################ - -def define_task(fn: Callable) -> type[Task]: - """ Syntactic sugar API for defining tasks - - See examples at base_predicates.py + task_spec: a list of tuples (reward_to, eval_fn, **kwargs) + + each tuple is assigned to the teams """ - signature = inspect.signature(fn) - for i, param in enumerate(signature.parameters.values()): - if i == 0 and param.name != 'gs': - raise InvalidTaskDefinition('First parameter must be gs: GameState') - if i == 1 and (param.name != 'subject'): - raise InvalidTaskDefinition("Second parameter must be subject: Group") - - class FunctionTask(Task): - def __init__(self, *args, **kwargs) -> None: - constraints = [] - self._signature = signature - args = list(args) - for i, param in enumerate(self._signature.parameters.values()): - if i == 0: - continue - # Calculate list of constraints - if isinstance(param.default, Constraint): - constraints.append((param.name,param.default)) - else: - constraints.append((param.name,None)) - # Insert default values from function definition - if not param.name in kwargs and i-1 >= len(args): - if param.default == inspect.Parameter.empty: - args.append(param.default) - else: - kwargs[param.name] = param.default - super().__init__(*args, **kwargs, constraints=constraints) - self._args = args - self._kwargs = kwargs - self.name = self._make_name(fn.__name__, args, kwargs) - def _evaluate(self, gs: GameState) -> float: - # pylint: disable=redefined-builtin, unused-variable - __doc = fn.__doc__ - result = fn(gs, *self._args, **self._kwargs) - if isinstance(result, Task): - return result(gs) - return result - - return FunctionTask - -def define_predicate(fn: Callable) -> type[Predicate]: - T = define_task(fn) - class FunctionPredicate(Predicate, T): - # pylint: disable=super-init-not-called - def __init__(self, *args, **kwargs) -> None: - T.__init__(self, *args, **kwargs) - return FunctionPredicate - -################################################ -class TaskOperator(Task): - def __init__(self, n, *tasks: Union[Task, Real] ,subject: Group=None): - if not n(len(tasks)): - raise InvalidTaskDefinition(f"Need {n} arguments") - tasks = list(tasks) - self._subject_argument = subject - if subject is None: - try: - subject = union(*[t.subject for t in filter(lambda t: isinstance(t, Task), tasks)]) - except AttributeError: - subject = GroupConstraint() - super().__init__(subject, *tasks) - - for i, t in enumerate(tasks): - if isinstance(t, Real): - tasks[i] = lambda _,v=tasks[i] : v - self._tasks = tasks - - def check(self, config: Config) -> bool: - return all((t.check(config) if isinstance(t, Task) else True for t in self._tasks)) - - def sample(self, config: Config, cls: type[TaskOperator], **kwargs): - subject = self._subject_argument if 'subject' not in kwargs else kwargs['subject'] - tasks = [t.sample(config, **kwargs) if isinstance(t, Task) else t(None) for t in self._tasks] - return cls(*tasks, subject=subject) -class OR(TaskOperator): - def __init__(self, *tasks: Union[Task, Real], subject: Group=None): - super().__init__(lambda n: n>0, *tasks, subject=subject) - def _evaluate(self, gs: GameState) -> float: - return max(t(gs) for t in self._tasks) - def sample(self, config: Config, **kwargs): - return super().sample(config, OR, **kwargs) - -class AND(TaskOperator): - def __init__(self, *tasks: Union[Task, Real], subject: Group=None): - super().__init__(lambda n: n>0, *tasks, subject=subject) - def _evaluate(self, gs: GameState) -> float: - return min(t(gs) for t in self._tasks) - def sample(self, config: Config, **kwargs): - return super().sample(config, AND, **kwargs) - -class NOT(TaskOperator): - def __init__(self, *tasks: Union[Task, Real], subject: Group=None): - super().__init__(lambda n: n>0, *tasks, subject=subject) - def _evaluate(self, gs: GameState) -> float: - return -sum(t(gs) for t in self._tasks) - def sample(self, config: Config, **kwargs): - return super().sample(config, NOT, **kwargs) - -class ADD(TaskOperator): - def __init__(self, *tasks: Union[Task, Real], subject: Group=None): - super().__init__(lambda n: n>0, *tasks, subject=subject) - def _evaluate(self, gs: GameState) -> float: - return sum(t(gs) for t in self._tasks) - def sample(self, config: Config, **kwargs): - return super().sample(config, ADD, **kwargs) - -class MUL(TaskOperator): - def __init__(self, *tasks: Union[Task, Real], subject: Group=None): - super().__init__(lambda n: n>0, *tasks, subject=subject) - def _evaluate(self, gs: GameState) -> float: - result = 1.0 - for t in self._tasks: - result = result * t(gs) - return result - def sample(self, config: Config, **kwargs): - return super().sample(config, MUL, **kwargs) - -class POR(TaskOperator, Predicate): - def __init__(self, *tasks: Predicate, subject: Group=None): - super().__init__(lambda n: n>0, *tasks, subject=subject) - def _evaluate(self, gs: GameState) -> float: - return any(t(gs) for t in self._tasks) - def sample(self, config: Config, **kwargs): - return super().sample(config, POR, **kwargs) - -class PAND(TaskOperator, Predicate): - def __init__(self, *tasks: Predicate, subject: Group=None): - super().__init__(lambda n: n>0, *tasks, subject=subject) - def _evaluate(self, gs: GameState) -> float: - return all(t(gs) for t in self._tasks) - def sample(self, config: Config, **kwargs): - return super().sample(config, PAND, **kwargs) - -class PNOT(TaskOperator, Predicate): - def __init__(self, task: Predicate, subject: Group=None): - super().__init__(lambda n: n==1, task, subject=subject) - def _evaluate(self, gs: GameState) -> float: - return not self._tasks[0](gs) - def sample(self, config: Config, **kwargs): - return super().sample(config, PNOT, **kwargs) - -class IMPLY(TaskOperator, Predicate): - def __init__(self, p: Predicate, q: Predicate, subject: Group=None): - super().__init__(lambda n: n==2, p,q, subject=subject) - def _evaluate(self, gs: GameState) -> float: - if self._tasks[0](gs): - return self._tasks[1](gs) - return True - def sample(self, config: Config, **kwargs): - return super().sample(config, IMPLY, **kwargs) - -class Once(TaskOperator): - def __init__(self, task: Task, subject: Group=None): - super().__init__(lambda n: n==1, task, subject=subject) - self._maximum_score = -math.inf - def _evaluate(self, gs: GameState) -> float: - self._maximum_score = max(self._maximum_score, self._tasks[0](gs)) - return self._maximum_score - def sample(self, config: Config, **kwargs): - return super().sample(config, Once, **kwargs) - -class Repeat(TaskOperator): - def __init__(self, task: Task, subject: Group=None): - super().__init__(lambda n: n==1, task, subject=subject) - self._current_score = 0 - def _evaluate(self, gs: GameState) -> float: - self._current_score += self._tasks[0](gs) - return self._current_score - def sample(self, config: Config, **kwargs): - return super().sample(config, Repeat, **kwargs) - -# TODO(mark) should we define the remaining available operators -# such as multiply, modulo... + tasks = [] + team_list = list(teams.keys()) + team_helper = TeamHelper(teams) + for idx in range(min(len(team_list), len(task_spec))): + team_id = team_list[idx] + reward_to, pred_fn, kwargs = task_spec[team_id] + + assert reward_to in REWARD_TO, 'Wrong reward target' + + if 'task_cls' in kwargs: + task_cls = kwargs.pop('task_cls') + else: + task_cls = Task + + # reserve 'target' for relative agent mapping + if 'target' in kwargs: + target = kwargs.pop('target') + assert target in VALID_TARGET, 'Invalid target' + # translate target to specific agent ids using team_helper + target = team_helper.get_target_agent(team_id, target) + kwargs['target'] = target + + # handle some special cases and instantiate the predicate first + predicate = None + if isinstance(pred_fn, FunctionType): + # if a function is provided as a predicate + pred_cls = make_predicate(pred_fn) + + # TODO: should create a test for these + if pred_fn in [bp.AllDead]: + kwargs.pop('target') # remove target + predicate = pred_cls(Group(target), **kwargs) + if pred_fn in [bp.StayAlive] and 'target' in kwargs: + kwargs.pop('target') # remove target + predicate = pred_cls(Group(target), **kwargs) + + # create the task + if reward_to == 'team': + assignee = team_helper.teams[team_id] + if predicate is None: + tasks.append(pred_cls(Group(assignee), **kwargs).create_task(task_cls=task_cls)) + else: + # this branch is for the cases like AllDead, StayAlive + tasks.append(predicate.create_task(assignee=assignee, task_cls=task_cls)) + + elif reward_to == 'agent': + agent_list = team_helper.teams[team_id] + if predicate is None: + tasks += make_same_task(pred_cls, agent_list, task_cls=task_cls, **kwargs) + else: + # this branch is for the cases like AllDead, StayAlive + tasks += [predicate.create_task(assignee=agent_id, task_cls=task_cls) + for agent_id in agent_list] + + return tasks diff --git a/nmmo/task/team_helper.py b/nmmo/task/team_helper.py deleted file mode 100644 index a8b0aa67..00000000 --- a/nmmo/task/team_helper.py +++ /dev/null @@ -1,57 +0,0 @@ -from typing import List -from nmmo.task.group import Group - -class TeamHelper: - ''' Provides a mapping from ent_id to group as equivalent to the grouping - expected by the policy - ''' - - def __init__(self, agents: List[int], num_teams: int): - assert len(agents) % num_teams == 0 - self.team_size = len(agents) // num_teams - self._team_to_ent, self._ent_to_team = self._map_ent_team(agents, num_teams) - - def _map_ent_team(self, agents, num_teams): - _team_to_ent = {} - _ent_to_team = {} - for ent_id in agents: - # to assigne agent 1 to team 0, and so forth - pop_id = (ent_id - 1) % num_teams - _ent_to_team[ent_id] = pop_id - if pop_id in _team_to_ent: - _team_to_ent[pop_id].append(ent_id) - else: - _team_to_ent[pop_id] = [ent_id] - - return _team_to_ent, _ent_to_team - - def team(self, pop_id: int) -> Group: - assert pop_id in self._team_to_ent, "Wrong pop_id" - return Group(self._team_to_ent[pop_id], f"Team.{pop_id}") - - def own_team(self, ent_id: int) -> Group: - assert ent_id in self._ent_to_team, "Wrong ent_id" - pop_id = self._ent_to_team[ent_id] - return Group(self._team_to_ent[pop_id], f"Team.{pop_id}") - - def left_team(self, ent_id: int) -> Group: - assert ent_id in self._ent_to_team, "Wrong ent_id" - pop_id = (self._ent_to_team[ent_id] - 1) % len(self._team_to_ent) - return Group(self._team_to_ent[pop_id], f"Team.{pop_id}") - - def right_team(self, ent_id: int) -> Group: - assert ent_id in self._ent_to_team, "Wrong ent_id" - pop_id = (self._ent_to_team[ent_id] + 1) % len(self._team_to_ent) - return Group(self._team_to_ent[pop_id], f"Team.{pop_id}") - - @property - def all_agents(self) -> Group: - return Group(list(self._ent_to_team.keys()), "All") - - @property - def all_teams(self) -> List[Group]: - return list((Group(v,str(k)) for k,v in self._team_to_ent.items())) - - @staticmethod - def generate_from_config(config): - return TeamHelper(list(range(1, config.PLAYER_N+1)), len(config.PLAYERS)) diff --git a/tests/core/test_env.py b/tests/core/test_env.py index fb1bd7a4..1ddeb677 100644 --- a/tests/core/test_env.py +++ b/tests/core/test_env.py @@ -1,4 +1,3 @@ - import unittest from typing import List @@ -18,10 +17,8 @@ # 30 seems to be enough to test variety of agent actions TEST_HORIZON = 30 RANDOM_SEED = random.randint(0, 10000) -# TODO: We should check that milestones have been reached, to make -# sure that the agents aren't just dying + class Config(nmmo.config.Small, nmmo.config.AllGameSystems): - RENDER = False SPECIALIZE = True PLAYERS = [ baselines.Fisher, baselines.Herbalist, baselines.Prospector, diff --git a/tests/render/test_render_save.py b/tests/render/test_render_save.py index 21b463dd..f1f3801e 100644 --- a/tests/render/test_render_save.py +++ b/tests/render/test_render_save.py @@ -1,30 +1,82 @@ -'''Manual test for render client connectivity''' +'''Manual test for render client connectivity and save replay''' +import nmmo +from nmmo.core.config import (AllGameSystems, Combat, Communication, + Equipment, Exchange, Item, Medium, Profession, + Progression, Resource, Small, Terrain) +from nmmo.task.task_api import nmmo_default_task +from nmmo.render.render_client import WebsocketRenderer +from nmmo.render.replay_helper import FileReplayHelper +from scripted import baselines + +def create_config(base, nent, *systems): + # pylint: disable=redefined-outer-name + systems = (base, *systems) + name = '_'.join(cls.__name__ for cls in systems) + + conf = type(name, systems, {})() + + conf.TERRAIN_TRAIN_MAPS = 1 + conf.TERRAIN_EVAL_MAPS = 1 + conf.IMMORTAL = True + conf.PLAYER_N = nent + conf.PLAYERS = [baselines.Random] + + return conf + +no_npc_small_1_pop_conf = create_config(Small, 1, Terrain, Resource, + Combat, Progression, Item, Equipment, Profession, Exchange, Communication) + +no_npc_med_1_pop_conf = create_config(Medium, 1, Terrain, Resource, + Combat, Progression, Item, Equipment, Profession, Exchange, Communication) + +no_npc_med_100_pop_conf = create_config(Medium, 100, Terrain, Resource, + Combat, Progression, Item, Equipment, Profession, Exchange, Communication) + +all_small_1_pop_conf = create_config(Small, 1, AllGameSystems) + +all_med_1_pop_conf = create_config(Medium, 1, AllGameSystems) + +all_med_100_pop_conf = create_config(Medium, 100, AllGameSystems) + +conf_dict = { + 'no_npc_small_1_pop': no_npc_small_1_pop_conf, + 'no_npc_med_1_pop': no_npc_med_1_pop_conf, + 'no_npc_med_100_pop': no_npc_med_100_pop_conf, + 'all_small_1_pop': all_small_1_pop_conf, + 'all_med_1_pop': all_med_1_pop_conf, + 'all_med_100_pop': all_med_100_pop_conf +} if __name__ == '__main__': import random - import nmmo + from tqdm import tqdm - # pylint: disable=import-error - from nmmo.render.render_client import WebsocketRenderer from tests.testhelpers import ScriptedAgentTestConfig TEST_HORIZON = 100 RANDOM_SEED = random.randint(0, 9999) - # config.RENDER option is gone, - # RENDER can be done without setting any config config = ScriptedAgentTestConfig() config.NPC_SPAWN_ATTEMPTS = 8 - env = nmmo.Env(config) - env.reset(seed=RANDOM_SEED) + replay_helper = FileReplayHelper() + + for name, config in conf_dict.items(): + env = nmmo.Env(config) + + # to make replay, one should create replay_helper + # and run the below line + env.realm.record_replay(replay_helper) + + tasks = nmmo_default_task(env.possible_agents, 'no_task') + env.reset(seed=RANDOM_SEED, new_tasks=tasks) - # the renderer is external to the env, so need to manually initiate it - renderer = WebsocketRenderer(env.realm) + # the renderer is external to the env, so need to manually initiate it + renderer = WebsocketRenderer(env.realm) - for tick in range(TEST_HORIZON): - env.step({}) - renderer.render_realm() + for tick in tqdm(range(TEST_HORIZON)): + env.step({}) + renderer.render_realm() - # save the packet: this is possible because config.SAVE_REPLAY = True - env.realm.save_replay(f'replay_seed_{RANDOM_SEED:04d}.json', compress=False) + # NOTE: the web client has trouble loading the compressed replay file + replay_helper.save(f'replay_{name}_seed_{RANDOM_SEED:04d}.json', compress=False) diff --git a/tests/task/test_demo_task_creation.py b/tests/task/test_demo_task_creation.py index 4aaa1894..5f5e532c 100644 --- a/tests/task/test_demo_task_creation.py +++ b/tests/task/test_demo_task_creation.py @@ -1,14 +1,21 @@ +# pylint: disable=invalid-name,unused-argument,unused-variable import unittest from tests.testhelpers import ScriptedAgentTestConfig from nmmo.core.env import Env from nmmo.lib.log import EventCode from nmmo.systems import skill -from nmmo.task import base_predicates as p +from nmmo.task import predicate_api as p from nmmo.task import task_api as t +from nmmo.task import base_predicates as bp from nmmo.task.game_state import GameState from nmmo.task.group import Group -from nmmo.task.scenario import Scenario + +def rollout(env, tasks, steps=5): + env.reset(make_task_fn=lambda: tasks) + for _ in range(steps): + env.step({}) + return env.step({}) class TestDemoTask(unittest.TestCase): @@ -22,165 +29,230 @@ class Tier: NORMAL = 6 / REWARD_SCALE HARD = 11 / REWARD_SCALE - # Usage of inbuilt predicate - def player_kills(scenario: Scenario): - scenario.add_tasks(p.CountEvent(event='PLAYER_KILL',N=1)*Tier.EASY) - scenario.add_tasks(p.CountEvent(event='PLAYER_KILL',N=2)*Tier.NORMAL) - scenario.add_tasks(p.CountEvent(event='PLAYER_KILL',N=3)*Tier.HARD) - return scenario.tasks + # Predicates defined below can be evaluated over one agent or several agents, + # which are sepcified separately + # Reward multiplier is indendent from predicates and used by tasks. + # The multipliers are just shown to indicate the difficulty level of predicates + + # Usage of base predicates (see nmmo/task/base_predicates.py) + player_kills = [ # (predicate, kwargs, reward_multiplier) + (bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 1}, Tier.EASY), + (bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 2}, Tier.NORMAL), + (bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 3}, Tier.HARD)] - def exploration(scenario: Scenario): - scenario.add_tasks(p.DistanceTraveled(dist=16)*Tier.EASY) - scenario.add_tasks(p.DistanceTraveled(dist=32)*Tier.NORMAL) - scenario.add_tasks(p.DistanceTraveled(dist=64)*Tier.HARD) - return scenario.tasks + exploration = [ # (predicate, reward_multiplier) + (bp.DistanceTraveled, {'dist': 16}, Tier.EASY), + (bp.DistanceTraveled, {'dist': 32}, Tier.NORMAL), + (bp.DistanceTraveled, {'dist': 64}, Tier.HARD)] # Demonstrates custom predicate - return float/boolean - @t.define_predicate def EquipmentLevel(gs: GameState, subject: Group, number: int): - equipped = (subject.item.equipped>0) + equipped = subject.item.equipped > 0 levels = subject.item.level[equipped] return levels.sum() >= number - def equipment(scenario: Scenario): - scenario.add_tasks(EquipmentLevel(number=1 )*Tier.EASY, groups='agents') - scenario.add_tasks(EquipmentLevel(number=5 )*Tier.NORMAL, groups='agents') - scenario.add_tasks(EquipmentLevel(number=10)*Tier.HARD, groups='agents') - return scenario.tasks + equipment = [ # (predicate, reward_multiplier) + (EquipmentLevel, {'number': 1}, Tier.EASY), + (EquipmentLevel, {'number': 5}, Tier.NORMAL), + (EquipmentLevel, {'number': 10}, Tier.HARD)] - @t.define_predicate def CombatSkill(gs, subject, lvl): - return t.OR(p.AttainSkill(subject, skill.Melee, lvl, 1), - p.AttainSkill(subject, skill.Range, lvl, 1), - p.AttainSkill(subject, skill.Mage, lvl, 1)) + # OR on predicate functions: max over all progress + return max(bp.AttainSkill(gs, subject, skill.Melee, lvl, 1), + bp.AttainSkill(gs, subject, skill.Range, lvl, 1), + bp.AttainSkill(gs, subject, skill.Mage, lvl, 1)) - def combat(scenario: Scenario): - scenario.add_tasks(CombatSkill(lvl=2)*Tier.EASY, groups='agents') - scenario.add_tasks(CombatSkill(lvl=3)*Tier.NORMAL, groups='agents') - scenario.add_tasks(CombatSkill(lvl=4)*Tier.HARD, groups='agents') - return scenario.tasks + combat = [ # (predicate, reward_multiplier) + (CombatSkill, {'lvl': 2}, Tier.EASY), + (CombatSkill, {'lvl': 3}, Tier.NORMAL), + (CombatSkill, {'lvl': 4}, Tier.HARD)] - @t.define_predicate def ForageSkill(gs, subject, lvl): - return t.OR(p.AttainSkill(subject, skill.Fishing, lvl, 1), - p.AttainSkill(subject, skill.Herbalism, lvl, 1), - p.AttainSkill(subject, skill.Prospecting, lvl, 1), - p.AttainSkill(subject, skill.Carving, lvl, 1), - p.AttainSkill(subject, skill.Alchemy, lvl, 1)) - - def foraging(scenario: Scenario): - scenario.add_tasks(ForageSkill(lvl=2)*Tier.EASY) - scenario.add_tasks(ForageSkill(lvl=3)*Tier.NORMAL) - scenario.add_tasks(ForageSkill(lvl=4)*Tier.HARD) - return scenario.tasks - - # Demonstrate task scenario definition API - def all_tasks(scenario: Scenario): - player_kills(scenario) - exploration(scenario) - equipment(scenario) - combat(scenario) - foraging(scenario) - return scenario.tasks + return max(bp.AttainSkill(gs, subject, skill.Fishing, lvl, 1), + bp.AttainSkill(gs, subject, skill.Herbalism, lvl, 1), + bp.AttainSkill(gs, subject, skill.Prospecting, lvl, 1), + bp.AttainSkill(gs, subject, skill.Carving, lvl, 1), + bp.AttainSkill(gs, subject, skill.Alchemy, lvl, 1)) + + foraging = [ # (predicate, reward_multiplier) + (ForageSkill, {'lvl': 2}, Tier.EASY), + (ForageSkill, {'lvl': 3}, Tier.NORMAL), + (ForageSkill, {'lvl': 4}, Tier.HARD)] # Test rollout - task_generators = [player_kills, exploration, equipment, combat, foraging, all_tasks] - for tg in task_generators: - config = ScriptedAgentTestConfig() - env = Env(config) - scenario = Scenario(config) - tasks = tg(scenario) - env.change_task(tasks) - for _ in range(10): - env.step({}) + config = ScriptedAgentTestConfig() + env = Env(config) + + # Creating and testing "team" tasks + # i.e., predicates are evalauated over all team members, + # and all team members get the same reward from each task + + # The team mapping can come from anywhere. + # The below is an arbitrary example and even doesn't include all agents + teams = {0: [1, 2, 3, 4], 1: [5, 6, 7, 8]} + + # Making player_kills and exploration team tasks, + team_tasks = [] + for pred_fn, kwargs, weight in player_kills + exploration: + pred_cls = p.make_predicate(pred_fn) + for team in teams.values(): + team_tasks.append( + pred_cls(Group(team), **kwargs).create_task(reward_multiplier=weight)) + + # Run the environment with these tasks + # check rewards and infos for the task info + obs, rewards, dones, infos = rollout(env, team_tasks) + + # Creating and testing the same task for all agents + # i.e, each agent gets evaluated and rewarded individually + same_tasks = [] + for pred_fn, kwargs, weight in exploration + equipment + combat + foraging: + pred_cls = p.make_predicate(pred_fn) + for agent_id in env.possible_agents: + same_tasks.append( + pred_cls(Group([agent_id]), **kwargs).create_task(reward_multiplier=weight)) + + # Run the environment with these tasks + # check rewards and infos for the task info + obs, rewards, dones, infos = rollout(env, same_tasks) # DONE def test_player_kill_reward(self): - """ Reward 0.1 per player defeated, 1 for first and 3rd kills + # pylint: disable=no-value-for-parameter + """ Design a predicate with a complex progress scheme """ config = ScriptedAgentTestConfig() env = Env(config) - scenario = Scenario(config) # PARTICIPANT WRITES # ==================================== - @t.define_task - def KillTask(gs: GameState, - subject: Group): - """ Reward 0.1 per player defeated, with a bonus for the 1st and 3rd kills. + def KillPredicate(gs: GameState, + subject: Group): + """The progress, the max of which is 1, should + * increase small for each player kill + * increase big for the 1st and 3rd kills + * reach 1 with 10 kills """ num_kills = len(subject.event.PLAYER_KILL) - score = num_kills * 0.1 + progress = num_kills * 0.06 if num_kills >= 1: - score += 1 + progress += .1 if num_kills >= 3: - score += 1 - return score + progress += .3 + return min(progress, 1.0) - scenario.add_tasks(lambda agent: KillTask(agent), groups='agents') - # ==================================== + # participants don't need to know about Predicate classes + kill_pred_cls = p.make_predicate(KillPredicate) + kill_tasks = [kill_pred_cls(Group(agent_id)).create_task() + for agent_id in env.possible_agents] # Test Reward - env.change_task(scenario.tasks) + env.reset(make_task_fn=lambda: kill_tasks) players = env.realm.players code = EventCode.PLAYER_KILL env.realm.event_log.record(code, players[1], target=players[3]) env.realm.event_log.record(code, players[2], target=players[4]) env.realm.event_log.record(code, players[2], target=players[5]) env.realm.event_log.record(EventCode.EAT_FOOD, players[2]) - # Award given as designed - # Agent 1 kills 1 - reward 1 + 0.1 - # Agent 2 kills 2 - reward 1 + 0.2 - # Agent 3 kills 0 - reward 0 - _, rewards, _, _ = env.step({}) - self.assertEqual(rewards[1],1.1) - self.assertEqual(rewards[2],1.2) - self.assertEqual(rewards[3],0) - # No reward when no changes - _, rewards, _, _ = env.step({}) - self.assertEqual(rewards[1],0) - self.assertEqual(rewards[2],0) - self.assertEqual(rewards[3],0) - # Test task reset on env reset - env.reset() + + # Award given as designed + # Agent 1 kills 1 - reward .06 + .1 + # Agent 2 kills 2 - reward .12 + .1 + # Agent 3 kills 0 - reward 0 _, rewards, _, _ = env.step({}) - self.assertEqual(env.tasks[0][0]._score,0) + self.assertEqual(rewards[1], 0.16) + self.assertEqual(rewards[2], 0.22) + self.assertEqual(rewards[3], 0) - # Test Rollout - env.change_task(scenario.tasks) - for _ in range(10): - env.step({}) + # No reward when no changes + _, rewards, _, _ = env.step({}) + self.assertEqual(rewards[1], 0) + self.assertEqual(rewards[2], 0) + self.assertEqual(rewards[3], 0) # DONE - def test_combination_task_reward(self): + def test_predicate_math(self): + # pylint: disable=no-value-for-parameter config = ScriptedAgentTestConfig() env = Env(config) - scenario = Scenario(config) - task = t.OR(p.CountEvent(event='PLAYER_KILL',N=5),p.TickGE(num_tick=5)) - task = task * 5 - scenario.add_tasks(task) + # each predicate function returns float, so one can do math on them + def PredicateMath(gs, subject): + progress = 0.8 * bp.CountEvent(gs, subject, event='PLAYER_KILL', N=7) + \ + 1.1 * bp.TickGE(gs, subject, num_tick=3) + # NOTE: the resulting progress will be bounded from [0, 1] afterwards + return progress + + # participants don't need to know about Predicate classes + pred_math_cls = p.make_predicate(PredicateMath) + task_for_agent_1 = pred_math_cls(Group(1)).create_task() # Test Reward - env.change_task(scenario.tasks) + env.reset(make_task_fn=lambda: [task_for_agent_1]) code = EventCode.PLAYER_KILL players = env.realm.players env.realm.event_log.record(code, players[1], target=players[2]) env.realm.event_log.record(code, players[1], target=players[3]) _, rewards, _, _ = env.step({}) - self.assertEqual(rewards[1],2) + self.assertAlmostEqual(rewards[1], 0.8*2/7 + 1.1*1/3) - for _ in range(4): + for _ in range(2): _, _, _, infos = env.step({}) - - self.assertEqual(list(infos[1]['task'].values())[0],5.0) + + # 0.8*2/7 + 1.1 > 1, but the progress is maxed at 1 + self.assertEqual(infos[1]['task'][env.tasks[0].name]['progress'], 1.0) + self.assertTrue(env.tasks[0].completed) # because progress >= 1 # DONE + def test_make_team_tasks_using_task_spec(self): + # NOTE: len(teams) and len(task_spec) don't need to match + teams = {0:[1,2,3], 1:[4,5], 2:[6,7], 3:[8,9], 4:[10,11]} + + """ task_spec is a list of tuple (reward_to, predicate class, kwargs) + + each tuple in the task_spec will create tasks for a team in teams + + reward_to: must be in ['team', 'agent'] + * 'team' create a single team task, in which all team members get rewarded + * 'agent' create a task for each agent, in which only the agent gets rewarded + + predicate class from the base predicates or custom predicates like above + + kwargs are the additional args that go into predicate. There are also special keys + * 'target' must be ['left_team', 'right_team', 'left_team_leader', 'right_team_leader'] + these str will be translated into the actual agent ids + * 'task_cls' is optional. If not provided, the standard Task is used. """ + task_spec = [ # (reward_to, predicate function, kwargs) + ('team', bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 1}), # one task + ('agent', bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 2}), + ('agent', bp.AllDead, {'target': 'left_team'}), + ('team', bp.CanSeeAgent, {'target': 'right_team_leader', 'task_cls': t.OngoingTask})] + + config = ScriptedAgentTestConfig() + env = Env(config) + + env.reset(make_task_fn=lambda: t.make_team_tasks(teams, task_spec)) + + self.assertEqual(len(env.tasks), 6) # 6 tasks were created + self.assertEqual(env.tasks[0].name, # team 0 task assigned to agents 1,2,3 + '(Task_eval_fn:(CountEvent_(1,2,3)_event:PLAYER_KILL_N:1)_assignee:(1,2,3))') + self.assertEqual(env.tasks[1].name, # team 1, agent task assigned to agent 4 + '(Task_eval_fn:(CountEvent_(4,)_event:PLAYER_KILL_N:2)_assignee:(4,))') + self.assertEqual(env.tasks[2].name, # team 1, agent task assigned to agent 5 + '(Task_eval_fn:(CountEvent_(5,)_event:PLAYER_KILL_N:2)_assignee:(5,))') + self.assertEqual(env.tasks[3].name, # team 2, agent 6 task, left_team is team 3 (agents 8,9) + '(Task_eval_fn:(AllDead_(8,9))_assignee:(6,))') + self.assertEqual(env.tasks[5].name, # team 3 task, right_team is team 2 (6,7), leader 6 + '(OngoingTask_eval_fn:(CanSeeAgent_(8,9)_target:6)_assignee:(8,9))') + + for _ in range(2): + env.step({}) + if __name__ == '__main__': unittest.main() diff --git a/tests/task/test_manual_curriculum.py b/tests/task/test_manual_curriculum.py new file mode 100644 index 00000000..5fcc6bbc --- /dev/null +++ b/tests/task/test_manual_curriculum.py @@ -0,0 +1,304 @@ +'''Manual test for creating learning curriculum manually''' +# pylint: disable=invalid-name,redefined-outer-name,bad-builtin + +import nmmo +import nmmo.lib.material as Material +from nmmo.task import base_predicates as bp +from nmmo.task.task_api import OngoingTask, make_team_tasks +from nmmo.task import constraint as c + + +EVENT_NUMBER_GOAL = [1, 2, 3, 4, 5, 7, 9, 12, 15, 20, 30, 50] +INFREQUENT_GOAL = list(range(1, 10)) +STAY_ALIVE_GOAL = [50, 100, 150, 200, 300, 500] +TEAM_NUMBER_GOAL = [10, 20, 30, 50, 70, 100] +LEVEL_GOAL = list(range(1, 10)) # TODO: get config +AGENT_NUM_GOAL = [1, 2, 3, 4, 5] # competition team size: 8 +ITEM_NUM_GOAL = AGENT_NUM_GOAL +TEAM_ITEM_GOAL = [1, 3, 5, 7, 10, 15, 20] +SKILLS = c.combat_skills + c.harvest_skills +COMBAT_STYLE = c.combat_skills +ALL_ITEM = c.armour + c.weapons + c.tools + c.ammunition + c.consumables +EQUIP_ITEM = c.armour + c.weapons + c.tools + c.ammunition +HARVEST_ITEM = c.weapons + c.ammunition + c.consumables + +""" task_spec is a list of tuple (reward_to, predicate class, kwargs) + + each tuple in the task_spec will create tasks for a team in teams + + reward_to: must be in ['team', 'agent'] + * 'team' create a single team task, in which all team members get rewarded + * 'agent' create a task for each agent, in which only the agent gets rewarded + + predicate class from the base predicates or custom predicates like above + + kwargs are the additional args that go into predicate. There are also special keys + * 'target' must be ['left_team', 'right_team', 'left_team_leader', 'right_team_leader'] + these str will be translated into the actual agent ids + * 'task_cls' is optional. If not provided, the standard Task is used. """ +task_spec = [] + +# explore, eat, drink, attack any agent, harvest any item, level up any skill +# which can happen frequently +essential_skills = ['GO_FARTHEST', 'EAT_FOOD', 'DRINK_WATER', + 'SCORE_HIT', 'HARVEST_ITEM', 'LEVEL_UP'] +for event_code in essential_skills: + task_spec += [('agent', bp.CountEvent, {'event': event_code, 'N': cnt}) + for cnt in EVENT_NUMBER_GOAL] + +# item/market skills, which happen less frequently or should not do too much +item_skills = ['CONSUME_ITEM', 'GIVE_ITEM', 'DESTROY_ITEM', 'EQUIP_ITEM', + 'GIVE_GOLD', 'LIST_ITEM', 'EARN_GOLD', 'BUY_ITEM'] +for event_code in item_skills: + task_spec += [('agent', bp.CountEvent, {'event': event_code, 'N': cnt}) + for cnt in INFREQUENT_GOAL] # less than 10 + +# find resource tiles +for resource in Material.Harvestable: + for reward_to in ['agent', 'team']: + task_spec.append((reward_to, bp.CanSeeTile, {'tile_type': resource})) + +# stay alive ... like ... for 300 ticks +# i.e., getting incremental reward for each tick alive as an individual or a team +for reward_to in ['agent', 'team']: + for num_tick in STAY_ALIVE_GOAL: + task_spec.append((reward_to, bp.TickGE, {'num_tick': num_tick})) + +# protect the leader: get reward for each tick the leader is alive +task_spec.append(('team', bp.StayAlive, {'target': 'my_team_leader', 'task_cls': OngoingTask})) + +# want the other team or team leader to die +for target in ['left_team', 'left_team_leader', 'right_team', 'right_team_leader']: + task_spec.append(('team', bp.AllDead, {'target': target})) + +# occupy the center tile, assuming the Medium map size +# TODO: it'd be better to have some intermediate targets toward the center +for reward_to in ['agent', 'team']: + task_spec.append((reward_to, bp.OccupyTile, {'row': 80, 'col': 80})) # TODO: get config + +# form a tight formation, for a certain number of ticks +def PracticeFormation(gs, subject, dist, num_tick): + return bp.AllMembersWithinRange(gs, subject, dist) * bp.TickGE(gs, subject, num_tick) +for dist in [1, 3, 5, 10]: + task_spec += [('team', PracticeFormation, {'dist': dist, 'num_tick': num_tick}) + for num_tick in STAY_ALIVE_GOAL] + +# find the other team leader +for reward_to in ['agent', 'team']: + for target in ['left_team_leader', 'right_team_leader']: + task_spec.append((reward_to, bp.CanSeeAgent, {'target': target})) + +# find the other team (any agent) +for reward_to in ['agent']: #, 'team']: + for target in ['left_team', 'right_team']: + task_spec.append((reward_to, bp.CanSeeGroup, {'target': target})) + +# explore the map -- sum the l-inf distance traveled by all subjects +for dist in [10, 20, 30, 50, 100]: # each agent + task_spec.append(('agent', bp.DistanceTraveled, {'dist': dist})) +for dist in [30, 50, 70, 100, 150, 200, 300, 500]: # summed over all team members + task_spec.append(('team', bp.DistanceTraveled, {'dist': dist})) + +# level up a skill +for skill in SKILLS: + for level in LEVEL_GOAL: + # since this is an agent task, num_agent must be 1 + task_spec.append(('agent', bp.AttainSkill, {'skill': skill, 'level': level, 'num_agent': 1})) + +# make attain skill a team task by varying the number of agents +for skill in SKILLS: + for level in LEVEL_GOAL: + for num_agent in AGENT_NUM_GOAL: + if level + num_agent <= 6 or num_agent == 1: # heuristic prune + task_spec.append(('team', bp.AttainSkill, + {'skill': skill, 'level': level,'num_agent': num_agent})) + +# practice specific combat style +for style in COMBAT_STYLE: + for cnt in EVENT_NUMBER_GOAL: + task_spec.append(('agent', bp.ScoreHit, {'combat_style': style, 'N': cnt})) + for cnt in TEAM_NUMBER_GOAL: + task_spec.append(('team', bp.ScoreHit, {'combat_style': style, 'N': cnt})) + +# defeat agents of a certain level as a team +for agent_type in ['player', 'npc']: # c.AGENT_TYPE_CONSTRAINT + for level in LEVEL_GOAL: + for num_agent in AGENT_NUM_GOAL: + if level + num_agent <= 6 or num_agent == 1: # heuristic prune + task_spec.append(('team', bp.DefeatEntity, + {'agent_type': agent_type, 'level': level, 'num_agent': num_agent})) + +# hoarding gold -- evaluated on the current gold +for amount in EVENT_NUMBER_GOAL: + task_spec.append(('agent', bp.HoardGold, {'amount': amount})) +for amount in TEAM_NUMBER_GOAL: + task_spec.append(('team', bp.HoardGold, {'amount': amount})) + +# earning gold -- evaluated on the total gold earned by selling items +# does NOT include looted gold +for amount in EVENT_NUMBER_GOAL: + task_spec.append(('agent', bp.EarnGold, {'amount': amount})) +for amount in TEAM_NUMBER_GOAL: + task_spec.append(('team', bp.EarnGold, {'amount': amount})) + +# spending gold, by buying items +for amount in EVENT_NUMBER_GOAL: + task_spec.append(('agent', bp.SpendGold, {'amount': amount})) +for amount in TEAM_NUMBER_GOAL: + task_spec.append(('team', bp.SpendGold, {'amount': amount})) + +# making profits by trading -- only buying and selling are counted +for amount in EVENT_NUMBER_GOAL: + task_spec.append(('agent', bp.MakeProfit, {'amount': amount})) +for amount in TEAM_NUMBER_GOAL: + task_spec.append(('team', bp.MakeProfit, {'amount': amount})) + +# managing inventory space +def PracticeInventoryManagement(gs, subject, space, num_tick): + return bp.InventorySpaceGE(gs, subject, space) * bp.TickGE(gs, subject, num_tick) +for space in [2, 4, 8]: + task_spec += [('agent', PracticeInventoryManagement, {'space': space, 'num_tick': num_tick}) + for num_tick in STAY_ALIVE_GOAL] + +# own item, evaluated on the current inventory +for item in ALL_ITEM: + for level in LEVEL_GOAL: + # agent task + for quantity in ITEM_NUM_GOAL: + if level + quantity <= 6 or quantity == 1: # heuristic prune + task_spec.append(('agent', bp.OwnItem, + {'item': item, 'level': level, 'quantity': quantity})) + + # team task + for quantity in TEAM_ITEM_GOAL: + if level + quantity <= 10 or quantity == 1: # heuristic prune + task_spec.append(('team', bp.OwnItem, + {'item': item, 'level': level, 'quantity': quantity})) + +# equip item, evaluated on the current inventory and equipment status +for item in EQUIP_ITEM: + for level in LEVEL_GOAL: + # agent task + task_spec.append(('agent', bp.EquipItem, + {'item': item, 'level': level, 'num_agent': 1})) + + # team task + for num_agent in AGENT_NUM_GOAL: + if level + num_agent <= 6 or num_agent == 1: # heuristic prune + task_spec.append(('team', bp.EquipItem, + {'item': item, 'level': level, 'num_agent': num_agent})) + +# consume items (ration, potion), evaluated based on the event log +for item in c.consumables: + for level in LEVEL_GOAL: + # agent task + for quantity in ITEM_NUM_GOAL: + if level + quantity <= 6 or quantity == 1: # heuristic prune + task_spec.append(('agent', bp.ConsumeItem, + {'item': item, 'level': level, 'quantity': quantity})) + + # team task + for quantity in TEAM_ITEM_GOAL: + if level + quantity <= 10 or quantity == 1: # heuristic prune + task_spec.append(('team', bp.ConsumeItem, + {'item': item, 'level': level, 'quantity': quantity})) + +# harvest items, evaluated based on the event log +for item in HARVEST_ITEM: + for level in LEVEL_GOAL: + # agent task + for quantity in ITEM_NUM_GOAL: + if level + quantity <= 6 or quantity == 1: # heuristic prune + task_spec.append(('agent', bp.HarvestItem, + {'item': item, 'level': level, 'quantity': quantity})) + + # team task + for quantity in TEAM_ITEM_GOAL: + if level + quantity <= 10 or quantity == 1: # heuristic prune + task_spec.append(('team', bp.HarvestItem, + {'item': item, 'level': level, 'quantity': quantity})) + +# list items, evaluated based on the event log +for item in ALL_ITEM: + for level in LEVEL_GOAL: + # agent task + for quantity in ITEM_NUM_GOAL: + if level + quantity <= 6 or quantity == 1: # heuristic prune + task_spec.append(('agent', bp.ListItem, + {'item': item, 'level': level, 'quantity': quantity})) + + # team task + for quantity in TEAM_ITEM_GOAL: + if level + quantity <= 10 or quantity == 1: # heuristic prune + task_spec.append(('team', bp.ListItem, + {'item': item, 'level': level, 'quantity': quantity})) + +# buy items, evaluated based on the event log +for item in ALL_ITEM: + for level in LEVEL_GOAL: + # agent task + for quantity in ITEM_NUM_GOAL: + if level + quantity <= 6 or quantity == 1: # heuristic prune + task_spec.append(('agent', bp.BuyItem, + {'item': item, 'level': level, 'quantity': quantity})) + + # team task + for quantity in TEAM_ITEM_GOAL: + if level + quantity <= 10 or quantity == 1: # heuristic prune + task_spec.append(('team', bp.BuyItem, + {'item': item, 'level': level, 'quantity': quantity})) + +# fully armed, evaluated based on the current player/inventory status +for style in COMBAT_STYLE: + for level in LEVEL_GOAL: + for num_agent in AGENT_NUM_GOAL: + if level + num_agent <= 6 or num_agent == 1: # heuristic prune + task_spec.append(('team', bp.FullyArmed, + {'combat_style': style, 'level': level, 'num_agent': num_agent})) + + +if __name__ == '__main__': + # pylint: disable=bare-except + import psutil + from contextlib import contextmanager + import multiprocessing as mp + import numpy as np + import pickle + + @contextmanager + def create_pool(num_proc): + pool = mp.Pool(processes=num_proc) + yield pool + pool.close() + pool.join() + + def check_task_spec(spec_list): + teams = {0:[1,2,3], 1:[4,5], 2:[6,7], 3:[8,9], 4:[10,11]} + config = nmmo.config.Default() + env = nmmo.Env(config) + for idx, single_spec in enumerate(spec_list): + # pylint: disable=cell-var-from-loop + test_task = make_team_tasks(teams, [single_spec]) + try: + env.reset(make_task_fn=lambda: test_task) + for _ in range(3): + env.step({}) + except: + print('invalid task spec:', single_spec) + + if idx > 0 and idx % 50 == 0: + print(idx, 'task specs checked.') + + # 3590 task specs: divide the specs into chunks + num_cores = psutil.cpu_count(logical=False) + spec_chunks = np.array_split(task_spec, num_cores) + with create_pool(num_cores) as pool: + pool.map(check_task_spec, spec_chunks) + + # print(sample_task[0].name) + # if len(sample_task) > 1: + # print(sample_task[-1].name) + + # test if the task spec is pickalable + with open('manual_curriculum.pkl', 'wb') as f: + pickle.dump(task_spec, f) diff --git a/tests/task/test_predicates.py b/tests/task/test_predicates.py index b1590dad..f2f61f0e 100644 --- a/tests/task/test_predicates.py +++ b/tests/task/test_predicates.py @@ -1,5 +1,5 @@ import unittest -from typing import List, Tuple +from typing import List, Tuple, Union, Iterable import random from tests.testhelpers import ScriptedAgentTestConfig, provide_item @@ -15,27 +15,21 @@ # pylint: disable=import-error from nmmo.core.env import Env -from nmmo.task.task_api import Task, TaskOperator +from nmmo.task.predicate_api import Predicate, make_predicate +from nmmo.task.task_api import OngoingTask from nmmo.task.group import Group import nmmo.task.base_predicates as bp # use the constant reward of 1 for testing predicates NUM_AGENT = 6 -ALL_AGENT = Group(list(range(1, NUM_AGENT+1)), 'All') +ALL_AGENT = list(range(1, NUM_AGENT+1)) -class Change(TaskOperator): - def __init__(self, task: Task, subject: Group=None): - super().__init__(lambda n: n==1, task, subject=subject) - def _evaluate(self, gs) -> float: - return self._tasks[0](gs) - def sample(self, config, **kwargs): - return super().sample(config, Change, **kwargs) class TestBasePredicate(unittest.TestCase): # pylint: disable=protected-access,invalid-name,no-member def _get_taskenv(self, - test_tasks: List[Tuple[Task, Group]], + test_preds: List[Tuple[Predicate, Union[Iterable[int], int]]], grass_map=False): config = ScriptedAgentTestConfig() @@ -43,10 +37,12 @@ def _get_taskenv(self, config.PLAYER_N = NUM_AGENT config.IMMORTAL = True - tasks = [Change(tsk, subject=team) for tsk, team in test_tasks] + # OngoingTask keeps evaluating and returns progress as the reward + # vs. Task stops evaluating once the task is completed, returns reward = delta(progress) + test_tasks = [OngoingTask(pred, assignee) for pred, assignee in test_preds] env = Env(config) - env.change_task(tasks) + env.reset(make_task_fn=lambda: test_tasks) if grass_map: MS = env.config.MAP_SIZE @@ -60,41 +56,46 @@ def _get_taskenv(self, return env - def _check_result(self, env, test_tasks, infos, true_task): - for tid, (task, assignee) in enumerate(test_tasks): + def _check_result(self, env, test_preds, infos, true_task): + for tid, (predicate, assignee) in enumerate(test_preds): # result is cached when at least one assignee is alive so that the task is evaled - if set(assignee).intersection(infos): - self.assertEqual(int(env.game_state.cache_result[task.name]), tid in true_task) + if len(set(assignee) & set(infos)) > 0: + self.assertEqual(int(env.game_state.cache_result[predicate.name]), + int(tid in true_task)) + for ent_id in infos: if ent_id in assignee: # the agents that are assigned the task get evaluated for reward - self.assertEqual(int(infos[ent_id]['task'][Change(task,assignee).name]), + self.assertEqual(int(infos[ent_id]['task'][env.tasks[tid].name]['reward']), int(tid in true_task)) else: # the agents that are not assigned the task are not evaluated - self.assertTrue(task.name not in infos[ent_id]['task']) + self.assertTrue(env.tasks[tid].name not in infos[ent_id]['task']) def _check_progress(self, task, infos, value): - """ Some predicates return a float in the range 0-1 indicating completion progress. + """ Tasks return a float in the range 0-1 indicating completion progress. """ - predicate, assignee = task[0], task[1] for ent_id in infos: - if ent_id in assignee: - self.assertAlmostEqual(infos[ent_id]['task'][Change(predicate,assignee).name],value) + if ent_id in task.assignee: + self.assertAlmostEqual(infos[ent_id]['task'][task.name]['progress'],value) def test_tickge_stay_alive_rip(self): + tickge_pred_cls = make_predicate(bp.TickGE) + stay_alive_pred_cls = make_predicate(bp.StayAlive) + all_dead_pred_cls = make_predicate(bp.AllDead) + tick_true = 5 death_note = [1, 2, 3] - test_tasks = [ # (Predicate, Team) - (bp.TickGE(Group([1]), tick_true), ALL_AGENT), - (bp.StayAlive(Group([1, 3])), ALL_AGENT), - (bp.StayAlive(Group([3, 4])), Group([1, 2])), - (bp.StayAlive(Group([4])), Group([5, 6])), - (bp.AllDead(Group([1, 3])), ALL_AGENT), - (bp.AllDead(Group([3, 4])), Group([1, 2])), - (bp.AllDead(Group([4])), Group([5, 6]))] + test_preds = [ # (instantiated predicate, task assignee) + (tickge_pred_cls(Group([1]), tick_true), ALL_AGENT), + (stay_alive_pred_cls(Group([1,3])), ALL_AGENT), + (stay_alive_pred_cls(Group([3,4])), [1,2]), + (stay_alive_pred_cls(Group([4])), [5,6]), + (all_dead_pred_cls(Group([1,3])), ALL_AGENT), + (all_dead_pred_cls(Group([3,4])), [1,2]), + (all_dead_pred_cls(Group([4])), [5,6])] - env = self._get_taskenv(test_tasks) + env = self._get_taskenv(test_preds) for _ in range(tick_true-1): _, _, _, infos = env.step({}) @@ -104,8 +105,8 @@ def test_tickge_stay_alive_rip(self): # and all AllDead tasks (ti in [4, 5, 6]) are false true_task = [1, 2, 3] - self._check_result(env, test_tasks, infos, true_task) - self._check_progress(test_tasks[0], infos, (tick_true-1) / tick_true) + self._check_result(env, test_preds, infos, true_task) + self._check_progress(env.tasks[0], infos, (tick_true-1) / tick_true) # kill agents 1-3 for ent_id in death_note: @@ -132,25 +133,27 @@ def test_tickge_stay_alive_rip(self): # StayAlive(1,3) and StayAlive(3,4) are false, StayAlive(4) is true # AllDead(1,3) is true, AllDead(3,4) and AllDead(4) are false true_task = [0, 3, 4] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # 3 is dead but 4 is alive. Half of agents killed, 50% completion. - self._check_progress(test_tasks[5], infos, 0.5) + self._check_progress(env.tasks[5], infos, 0.5) # DONE def test_can_see_tile(self): + canseetile_pred_cls = make_predicate(bp.CanSeeTile) + a1_target = Material.Foilage a2_target = Material.Water - test_tasks = [ # (Predicate, Team), the reward is 1 by default - (bp.CanSeeTile(Group([1]), a1_target), ALL_AGENT), # True - (bp.CanSeeTile(Group([1,3,5]), a2_target), ALL_AGENT), # False - (bp.CanSeeTile(Group([2]), a2_target), Group([1,2,3])), # True - (bp.CanSeeTile(Group([2,5,6]), a1_target), ALL_AGENT), # False - (bp.CanSeeTile(ALL_AGENT, a2_target), Group([2,3,4]))] # True + test_preds = [ # (instantiated predicate, task assignee) + (canseetile_pred_cls(Group([1]), a1_target), ALL_AGENT), # True + (canseetile_pred_cls(Group([1,3,5]), a2_target), ALL_AGENT), # False + (canseetile_pred_cls(Group([2]), a2_target), [1,2,3]), # True + (canseetile_pred_cls(Group([2,5,6]), a1_target), ALL_AGENT), # False + (canseetile_pred_cls(Group(ALL_AGENT), a2_target), [2,3,4])] # True # setup env with all grass map - env = self._get_taskenv(test_tasks, grass_map=True) + env = self._get_taskenv(test_preds, grass_map=True) # Two corners to the target materials BORDER = env.config.MAP_BORDER @@ -172,7 +175,7 @@ def test_can_see_tile(self): # no target tiles are found, so all are false true_task = [] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # Team one to foilage, team two to water change_agent_pos(env.realm,1,(BORDER,MS-2)) # agent 1, team 0, foilage @@ -183,19 +186,22 @@ def test_can_see_tile(self): # t0, t2, t4 are true true_task = [0, 2, 4] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_can_see_agent(self): + cansee_agent_pred_cls = make_predicate(bp.CanSeeAgent) + cansee_group_pred_cls = make_predicate(bp.CanSeeGroup) + search_target = 1 - test_tasks = [ # (Predicate, Team), the reward is 1 by default - (bp.CanSeeAgent(Group([1]), search_target), ALL_AGENT), # Always True - (bp.CanSeeAgent(Group([2]), search_target), Group([2,3,4])), # False -> True -> True - (bp.CanSeeAgent(Group([3,4,5]), search_target), Group([1,2,3])), # False -> False -> True - (bp.CanSeeGroup(Group([1]), Group([3,4])), ALL_AGENT)] # False -> False -> True + test_preds = [ # (Predicate, Team), the reward is 1 by default + (cansee_agent_pred_cls(Group([1]), search_target), ALL_AGENT), # Always True + (cansee_agent_pred_cls(Group([2]), search_target), [2,3,4]), # False -> True -> True + (cansee_agent_pred_cls(Group([3,4,5]), search_target), [1,2,3]), # False -> False -> True + (cansee_group_pred_cls(Group([1]), [3,4]), ALL_AGENT)] # False -> False -> True - env = self._get_taskenv(test_tasks, grass_map=True) + env = self._get_taskenv(test_preds, grass_map=True) # All agents to one corner BORDER = env.config.MAP_BORDER @@ -211,7 +217,7 @@ def test_can_see_agent(self): # Only CanSeeAgent(Group([1]), search_target) is true, others are false true_task = [0] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # Teleport agent 2 to agent 1's pos change_agent_pos(env.realm,2,(MS-2,MS-2)) @@ -221,7 +227,7 @@ def test_can_see_agent(self): # SearchAgent(Team([2]), search_target) is also true true_task = [0,1] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # Teleport agent 3 to agent 1s position change_agent_pos(env.realm,3,(MS-2,MS-2)) @@ -229,20 +235,22 @@ def test_can_see_agent(self): _, _, _, infos = env.step({}) true_task = [0,1,2,3] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_occupy_tile(self): + occupy_tile_pred_cls = make_predicate(bp.OccupyTile) + target_tile = (30, 30) - test_tasks = [ # (Predicate, Team), the reward is 1 by default - (bp.OccupyTile(Group([1]), *target_tile), ALL_AGENT), # False -> True - (bp.OccupyTile(Group([1,2,3]), *target_tile), Group([4,5,6])), # False -> True - (bp.OccupyTile(Group([2]), *target_tile), Group([2,3,4])), # False - (bp.OccupyTile(Group([3,4,5]), *target_tile), Group([1,2,3]))] # False + test_preds = [ # (Predicate, Team), the reward is 1 by default + (occupy_tile_pred_cls(Group([1]), *target_tile), ALL_AGENT), # False -> True + (occupy_tile_pred_cls(Group([1,2,3]), *target_tile), [4,5,6]), # False -> True + (occupy_tile_pred_cls(Group([2]), *target_tile), [2,3,4]), # False + (occupy_tile_pred_cls(Group([3,4,5]), *target_tile), [1,2,3])] # False # make all tiles habitable - env = self._get_taskenv(test_tasks, grass_map=True) + env = self._get_taskenv(test_preds, grass_map=True) # All agents to one corner for ent_id in env.realm.players: @@ -253,7 +261,7 @@ def test_occupy_tile(self): # all tasks must be false true_task = [] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # teleport agent 1 to the target tile, agent 2 to the adjacent tile change_agent_pos(env.realm,1,target_tile) @@ -265,29 +273,31 @@ def test_occupy_tile(self): # tid 0 and 1 should be true: OccupyTile(Group([1]), *target_tile) # & OccupyTile(Group([1,2,3]), *target_tile) true_task = [0, 1] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_distance_traveled(self): + distance_traveled_pred_cls = make_predicate(bp.DistanceTraveled) + agent_dist = 6 team_dist = 10 # NOTE: when evaluating predicates, to whom tasks are assigned are irrelevant - test_tasks = [ # (Predicate, Team), the reward is 1 by default - (bp.DistanceTraveled(Group([1]), agent_dist), ALL_AGENT), # False -> True - (bp.DistanceTraveled(Group([2, 5]), agent_dist), ALL_AGENT), # False - (bp.DistanceTraveled(Group([3, 4]), agent_dist), ALL_AGENT), # False - (bp.DistanceTraveled(Group([1, 2, 3]), team_dist), ALL_AGENT), # False -> True - (bp.DistanceTraveled(Group([6]), agent_dist), ALL_AGENT)] # False + test_preds = [ # (Predicate, Team), the reward is 1 by default + (distance_traveled_pred_cls(Group([1]), agent_dist), ALL_AGENT), # False -> True + (distance_traveled_pred_cls(Group([2, 5]), agent_dist), ALL_AGENT), # False + (distance_traveled_pred_cls(Group([3, 4]), agent_dist), ALL_AGENT), # False + (distance_traveled_pred_cls(Group([1, 2, 3]), team_dist), ALL_AGENT), # False -> True + (distance_traveled_pred_cls(Group([6]), agent_dist), ALL_AGENT)] # False # make all tiles habitable - env = self._get_taskenv(test_tasks, grass_map=True) + env = self._get_taskenv(test_preds, grass_map=True) _, _, _, infos = env.step({}) # one cannot accomplish these goals in the first tick, so all false true_task = [] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # all are sleeper, so they all stay in the spawn pos spawn_pos = { ent_id: ent.pos for ent_id, ent in env.realm.players.items() } @@ -302,24 +312,26 @@ def test_distance_traveled(self): _,_,_, infos = env.step({}) true_task = [0, 3] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_all_members_within_range(self): + within_range_pred_cls = make_predicate(bp.AllMembersWithinRange) + dist_123 = 1 dist_135 = 5 - test_tasks = [ # (Predicate, Team), the reward is 1 by default - (bp.AllMembersWithinRange(Group([1]), dist_123), ALL_AGENT), # Always true for group of 1 - (bp.AllMembersWithinRange(Group([1,2]), dist_123), ALL_AGENT), # True - (bp.AllMembersWithinRange(Group([1,3]), dist_123), ALL_AGENT), # True - (bp.AllMembersWithinRange(Group([2,3]), dist_123), ALL_AGENT), # False - (bp.AllMembersWithinRange(Group([1,3,5]), dist_123), ALL_AGENT), # False - (bp.AllMembersWithinRange(Group([1,3,5]), dist_135), ALL_AGENT), # True - (bp.AllMembersWithinRange(Group([2,4,6]), dist_135), ALL_AGENT)] # False + test_preds = [ # (Predicate, Team), the reward is 1 by default + (within_range_pred_cls(Group([1]), dist_123), ALL_AGENT), # Always true for group of 1 + (within_range_pred_cls(Group([1,2]), dist_123), ALL_AGENT), # True + (within_range_pred_cls(Group([1,3]), dist_123), ALL_AGENT), # True + (within_range_pred_cls(Group([2,3]), dist_123), ALL_AGENT), # False + (within_range_pred_cls(Group([1,3,5]), dist_123), ALL_AGENT), # False + (within_range_pred_cls(Group([1,3,5]), dist_135), ALL_AGENT), # True + (within_range_pred_cls(Group([2,4,6]), dist_135), ALL_AGENT)] # False # make all tiles habitable - env = self._get_taskenv(test_tasks, grass_map=True) + env = self._get_taskenv(test_preds, grass_map=True) MS = env.config.MAP_SIZE @@ -337,21 +349,23 @@ def test_all_members_within_range(self): _, _, _, infos = env.step({}) true_task = [0, 1, 2, 5] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_attain_skill(self): + attain_skill_pred_cls = make_predicate(bp.AttainSkill) + goal_level = 5 - test_tasks = [ # (Predicate, Team), the reward is 1 by default - (bp.AttainSkill(Group([1]), Skill.Melee, goal_level, 1), ALL_AGENT), # False - (bp.AttainSkill(Group([2]), Skill.Melee, goal_level, 1), ALL_AGENT), # False - (bp.AttainSkill(Group([1]), Skill.Range, goal_level, 1), ALL_AGENT), # True - (bp.AttainSkill(Group([1,3]), Skill.Fishing, goal_level, 1), ALL_AGENT), # True - (bp.AttainSkill(Group([1,2,3]), Skill.Carving, goal_level, 3), ALL_AGENT), # False - (bp.AttainSkill(Group([2,4]), Skill.Carving, goal_level, 2), ALL_AGENT)] # True + test_preds = [ # (Predicate, Team), the reward is 1 by default + (attain_skill_pred_cls(Group([1]), Skill.Melee, goal_level, 1), ALL_AGENT), # False + (attain_skill_pred_cls(Group([2]), Skill.Melee, goal_level, 1), ALL_AGENT), # False + (attain_skill_pred_cls(Group([1]), Skill.Range, goal_level, 1), ALL_AGENT), # True + (attain_skill_pred_cls(Group([1,3]), Skill.Fishing, goal_level, 1), ALL_AGENT), # True + (attain_skill_pred_cls(Group([1,2,3]), Skill.Carving, goal_level, 3), ALL_AGENT), # False + (attain_skill_pred_cls(Group([2,4]), Skill.Carving, goal_level, 2), ALL_AGENT)] # True - env = self._get_taskenv(test_tasks) + env = self._get_taskenv(test_preds) # AttainSkill(Group([1]), Skill.Melee, goal_level, 1) is false # AttainSkill(Group([2]), Skill.Melee, goal_level, 1) is false @@ -370,23 +384,25 @@ def test_attain_skill(self): _, _, _, infos = env.step({}) true_task = [2, 3, 5] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_inventory_space_ge_not(self): + inv_space_ge_pred_cls = make_predicate(bp.InventorySpaceGE) + # also test NOT InventorySpaceGE target_space = 3 - test_tasks = [ # (Predicate, Team), the reward is 1 by default - (bp.InventorySpaceGE(Group([1]), target_space), ALL_AGENT), # True -> False - (bp.InventorySpaceGE(Group([2,3]), target_space), ALL_AGENT), # True - (bp.InventorySpaceGE(Group([1,2,3]), target_space), ALL_AGENT), # True -> False - (bp.InventorySpaceGE(Group([1,2,3,4]), target_space+1), ALL_AGENT), # False - (~bp.InventorySpaceGE(Group([1]), target_space+1), ALL_AGENT), # True - (~bp.InventorySpaceGE(Group([1,2,3]), target_space), ALL_AGENT), # False -> True - (~bp.InventorySpaceGE(Group([1,2,3,4]), target_space+1), ALL_AGENT)] # True + test_preds = [ # (Predicate, Team), the reward is 1 by default + (inv_space_ge_pred_cls(Group([1]), target_space), ALL_AGENT), # True -> False + (inv_space_ge_pred_cls(Group([2,3]), target_space), ALL_AGENT), # True + (inv_space_ge_pred_cls(Group([1,2,3]), target_space), ALL_AGENT), # True -> False + (inv_space_ge_pred_cls(Group([1,2,3,4]), target_space+1), ALL_AGENT), # False + (~inv_space_ge_pred_cls(Group([1]), target_space+1), ALL_AGENT), # True + (~inv_space_ge_pred_cls(Group([1,2,3]), target_space), ALL_AGENT), # False -> True + (~inv_space_ge_pred_cls(Group([1,2,3,4]), target_space+1), ALL_AGENT)] # True - env = self._get_taskenv(test_tasks) + env = self._get_taskenv(test_preds) # add one items to agent 1 within the limit capacity = env.realm.players[1].inventory.capacity @@ -397,7 +413,7 @@ def test_inventory_space_ge_not(self): self.assertTrue(env.realm.players[1].inventory.space >= target_space) true_task = [0, 1, 2, 4, 6] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # add one more item to agent 1 provide_item(env.realm, 1, Item.Ration, level=1, quantity=1) @@ -407,26 +423,29 @@ def test_inventory_space_ge_not(self): self.assertTrue(env.realm.players[1].inventory.space < target_space) true_task = [1, 4, 5, 6] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_own_equip_item(self): + own_item_pred_cls = make_predicate(bp.OwnItem) + equip_item_pred_cls = make_predicate(bp.EquipItem) + # ration, level 2, quantity 3 (non-stackable) # ammo level 2, quantity 3 (stackable, equipable) goal_level = 2 goal_quantity = 3 - test_tasks = [ # (Predicate, Team), the reward is 1 by default - (bp.OwnItem(Group([1]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # False - (bp.OwnItem(Group([2]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # False - (bp.OwnItem(Group([1,2]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # True - (bp.OwnItem(Group([3]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # True - (bp.OwnItem(Group([4,5,6]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # False - (bp.EquipItem(Group([4]), Item.Whetstone, goal_level, 1), ALL_AGENT), # False - (bp.EquipItem(Group([4,5]), Item.Whetstone, goal_level, 1), ALL_AGENT), # True - (bp.EquipItem(Group([4,5,6]), Item.Whetstone, goal_level, 2), ALL_AGENT)] # True - - env = self._get_taskenv(test_tasks) + test_preds = [ # (Predicate, Team), the reward is 1 by default + (own_item_pred_cls(Group([1]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # False + (own_item_pred_cls(Group([2]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # False + (own_item_pred_cls(Group([1,2]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # True + (own_item_pred_cls(Group([3]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # True + (own_item_pred_cls(Group([4,5,6]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # F + (equip_item_pred_cls(Group([4]), Item.Whetstone, goal_level, 1), ALL_AGENT), # False + (equip_item_pred_cls(Group([4,5]), Item.Whetstone, goal_level, 1), ALL_AGENT), # True + (equip_item_pred_cls(Group([4,5,6]), Item.Whetstone, goal_level, 2), ALL_AGENT)] # True + + env = self._get_taskenv(test_preds) # set the level, so that agents 4-6 can equip the Whetstone equip_stone = [4, 5, 6] @@ -459,21 +478,23 @@ def test_own_equip_item(self): _, _, _, infos = env.step({}) true_task = [2, 3, 6, 7] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_fully_armed(self): + fully_armed_pred_cls = make_predicate(bp.FullyArmed) + goal_level = 5 - test_tasks = [ # (Predicate, Team), the reward is 1 by default - (bp.FullyArmed(Group([1,2,3]), Skill.Range, goal_level, 1), ALL_AGENT), # False - (bp.FullyArmed(Group([3,4]), Skill.Range, goal_level, 1), ALL_AGENT), # True - (bp.FullyArmed(Group([4]), Skill.Melee, goal_level, 1), ALL_AGENT), # False - (bp.FullyArmed(Group([4,5,6]), Skill.Range, goal_level, 3), ALL_AGENT), # True - (bp.FullyArmed(Group([4,5,6]), Skill.Range, goal_level+3, 1), ALL_AGENT), # False - (bp.FullyArmed(Group([4,5,6]), Skill.Range, goal_level, 4), ALL_AGENT)] # False + test_preds = [ # (Predicate, Team), the reward is 1 by default + (fully_armed_pred_cls(Group([1,2,3]), Skill.Range, goal_level, 1), ALL_AGENT), # False + (fully_armed_pred_cls(Group([3,4]), Skill.Range, goal_level, 1), ALL_AGENT), # True + (fully_armed_pred_cls(Group([4]), Skill.Melee, goal_level, 1), ALL_AGENT), # False + (fully_armed_pred_cls(Group([4,5,6]), Skill.Range, goal_level, 3), ALL_AGENT), # True + (fully_armed_pred_cls(Group([4,5,6]), Skill.Range, goal_level+3, 1), ALL_AGENT), # False + (fully_armed_pred_cls(Group([4,5,6]), Skill.Range, goal_level, 4), ALL_AGENT)] # False - env = self._get_taskenv(test_tasks) + env = self._get_taskenv(test_preds) # fully equip agents 4-6 fully_equip = [4, 5, 6] @@ -490,20 +511,22 @@ def test_fully_armed(self): _, _, _, infos = env.step({}) true_task = [1, 3] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_hoard_gold_and_team(self): # HoardGold, TeamHoardGold + hoard_gold_pred_cls = make_predicate(bp.HoardGold) + agent_gold_goal = 10 team_gold_goal = 30 - test_tasks = [ # (Predicate, Team), the reward is 1 by default - (bp.HoardGold(Group([1]), agent_gold_goal), ALL_AGENT), # True - (bp.HoardGold(Group([4,5,6]), agent_gold_goal), ALL_AGENT), # False - (bp.HoardGold(Group([1,3,5]), team_gold_goal), ALL_AGENT), # True - (bp.HoardGold(Group([2,4,6]), team_gold_goal), ALL_AGENT)] # False + test_preds = [ # (Predicate, Team), the reward is 1 by default + (hoard_gold_pred_cls(Group([1]), agent_gold_goal), ALL_AGENT), # True + (hoard_gold_pred_cls(Group([4,5,6]), agent_gold_goal), ALL_AGENT), # False + (hoard_gold_pred_cls(Group([1,3,5]), team_gold_goal), ALL_AGENT), # True + (hoard_gold_pred_cls(Group([2,4,6]), team_gold_goal), ALL_AGENT)] # False - env = self._get_taskenv(test_tasks) + env = self._get_taskenv(test_preds) # give gold to agents 1-3 gold_struck = [1, 2, 3] @@ -514,24 +537,28 @@ def test_hoard_gold_and_team(self): # HoardGold, TeamHoardGold _, _, _, infos = env.step({}) true_task = [0, 2] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) g = sum(env.realm.players[eid].gold.val for eid in Group([2,4,6]).agents) - self._check_progress(test_tasks[3], infos, g / team_gold_goal) + self._check_progress(env.tasks[3], infos, g / team_gold_goal) # DONE def test_exchange_gold_predicates(self): # Earn Gold, Spend Gold, Make Profit + earn_gold_pred_cls = make_predicate(bp.EarnGold) + spend_gold_pred_cls = make_predicate(bp.SpendGold) + make_profit_pred_cls = make_predicate(bp.MakeProfit) + gold_goal = 10 - test_tasks = [ - (bp.EarnGold(Group([1,2]), gold_goal), ALL_AGENT), # True - (bp.EarnGold(Group([2,4]), gold_goal), ALL_AGENT), # False - (bp.SpendGold(Group([1]), 5), ALL_AGENT), # False -> True - (bp.SpendGold(Group([1]), 6), ALL_AGENT), # False, - (bp.MakeProfit(Group([1,2]), 5), ALL_AGENT), # True, - (bp.MakeProfit(Group([1]), 5), ALL_AGENT) # True -> False + test_preds = [ + (earn_gold_pred_cls(Group([1,2]), gold_goal), ALL_AGENT), # True + (earn_gold_pred_cls(Group([2,4]), gold_goal), ALL_AGENT), # False + (spend_gold_pred_cls(Group([1]), 5), ALL_AGENT), # False -> True + (spend_gold_pred_cls(Group([1]), 6), ALL_AGENT), # False, + (make_profit_pred_cls(Group([1,2]), 5), ALL_AGENT), # True, + (make_profit_pred_cls(Group([1]), 5), ALL_AGENT) # True -> False ] - env = self._get_taskenv(test_tasks) + env = self._get_taskenv(test_preds) players = env.realm.players # 8 gold earned for agent 1 @@ -544,8 +571,8 @@ def test_exchange_gold_predicates(self): # Earn Gold, Spend Gold, Make Profit _, _, _, infos = env.step({}) true_task = [0,4,5] - self._check_result(env, test_tasks, infos, true_task) - self._check_progress(test_tasks[1], infos, 2 / gold_goal) + self._check_result(env, test_preds, infos, true_task) + self._check_progress(env.tasks[1], infos, 2 / gold_goal) env.realm.event_log.record(EventCode.BUY_ITEM, players[1], item=Item.Ration(env.realm,1), @@ -554,21 +581,23 @@ def test_exchange_gold_predicates(self): # Earn Gold, Spend Gold, Make Profit _, _, _, infos = env.step({}) true_task = [0,2,4] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_count_event(self): # CountEvent - test_tasks = [ - (bp.CountEvent(Group([1]),"EAT_FOOD",1), ALL_AGENT), # True - (bp.CountEvent(Group([1]),"EAT_FOOD",2), ALL_AGENT), # False - (bp.CountEvent(Group([1]),"DRINK_WATER",1), ALL_AGENT), # False - (bp.CountEvent(Group([1,2]),"GIVE_GOLD",1), ALL_AGENT) # True + count_event_pred_cls = make_predicate(bp.CountEvent) + + test_preds = [ + (count_event_pred_cls(Group([1]),"EAT_FOOD",1), ALL_AGENT), # True + (count_event_pred_cls(Group([1]),"EAT_FOOD",2), ALL_AGENT), # False + (count_event_pred_cls(Group([1]),"DRINK_WATER",1), ALL_AGENT), # False + (count_event_pred_cls(Group([1,2]),"GIVE_GOLD",1), ALL_AGENT) # True ] # 1 Drinks water once # 2 Gives gold once - env = self._get_taskenv(test_tasks) + env = self._get_taskenv(test_preds) players = env.realm.players env.realm.event_log.record(EventCode.EAT_FOOD, players[1]) env.realm.event_log.record(EventCode.GIVE_GOLD, players[2]) @@ -576,16 +605,18 @@ def test_count_event(self): # CountEvent _, _, _, infos = env.step({}) true_task = [0,3] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_score_hit(self): # ScoreHit - test_tasks = [ - (bp.ScoreHit(Group([1]), Skill.Mage, 2), ALL_AGENT), # False -> True - (bp.ScoreHit(Group([1]), Skill.Melee, 1), ALL_AGENT) # True + score_hit_pred_cls = make_predicate(bp.ScoreHit) + + test_preds = [ + (score_hit_pred_cls(Group([1]), Skill.Mage, 2), ALL_AGENT), # False -> True + (score_hit_pred_cls(Group([1]), Skill.Melee, 1), ALL_AGENT) # True ] - env = self._get_taskenv(test_tasks) + env = self._get_taskenv(test_preds) players = env.realm.players env.realm.event_log.record(EventCode.SCORE_HIT, @@ -601,8 +632,8 @@ def test_score_hit(self): # ScoreHit _, _, _, infos = env.step({}) true_task = [1] - self._check_result(env, test_tasks, infos, true_task) - self._check_progress(test_tasks[0], infos, 0.5) + self._check_result(env, test_preds, infos, true_task) + self._check_progress(env.tasks[0], infos, 0.5) env.realm.event_log.record(EventCode.SCORE_HIT, players[1], @@ -617,28 +648,84 @@ def test_score_hit(self): # ScoreHit _, _, _, infos = env.step({}) true_task = [0,1] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) + + # DONE + + def test_defeat_entity(self): # PlayerKill + defeat_pred_cls = make_predicate(bp.DefeatEntity) + + test_preds = [ + (defeat_pred_cls(Group([1]), 'npc', level=1, num_agent=1), ALL_AGENT), + (defeat_pred_cls(Group([1]), 'player', level=2, num_agent=2), ALL_AGENT)] + env = self._get_taskenv(test_preds) + players = env.realm.players + npcs = env.realm.npcs + + # set levels + npcs[-1].skills.melee.level.update(1) + npcs[-1].skills.range.level.update(1) + npcs[-1].skills.mage.level.update(1) + self.assertEqual(npcs[-1].attack_level, 1) + self.assertEqual(players[2].attack_level, 1) + players[3].skills.melee.level.update(3) + players[4].skills.melee.level.update(2) + + # killing player 2 does not progress the both tasks + env.realm.event_log.record(EventCode.PLAYER_KILL, players[1], + target=players[2]) # level 1 player + _, _, _, infos = env.step({}) + + true_task = [] # all false + self._check_result(env, test_preds, infos, true_task) + for task in env.tasks: + self._check_progress(task, infos, 0) + + # killing npc -1 completes the first task + env.realm.event_log.record(EventCode.PLAYER_KILL, players[1], + target=npcs[-1]) # level 1 npc + _, _, _, infos = env.step({}) + + true_task = [0] + self._check_result(env, test_preds, infos, true_task) + self._check_progress(env.tasks[0], infos, 1) + + # killing player 3 makes half progress on the second task + env.realm.event_log.record(EventCode.PLAYER_KILL, players[1], + target=players[3]) # level 3 player + _, _, _, infos = env.step({}) + self._check_progress(env.tasks[1], infos, .5) + + # killing player 4 completes the second task + env.realm.event_log.record(EventCode.PLAYER_KILL, players[1], + target=players[4]) # level 2 player + _, _, _, infos = env.step({}) + + true_task = [0,1] + self._check_result(env, test_preds, infos, true_task) + self._check_progress(env.tasks[1], infos, 1) # DONE def test_item_event_predicates(self): # Consume, Harvest, List, Buy - for predicate, event_type in [(bp.ConsumeItem, 'CONSUME_ITEM'), + for pred_fn, event_type in [(bp.ConsumeItem, 'CONSUME_ITEM'), (bp.HarvestItem, 'HARVEST_ITEM'), (bp.ListItem, 'LIST_ITEM'), (bp.BuyItem, 'BUY_ITEM')]: + predicate = make_predicate(pred_fn) id_ = getattr(EventCode, event_type) lvl = random.randint(5,10) quantity = random.randint(5,10) true_item = Item.Ration false_item = Item.Potion - test_tasks = [ + test_preds = [ (predicate(Group([1,3,5]), true_item, lvl, quantity), ALL_AGENT), # True (predicate(Group([2]), true_item, lvl, quantity), ALL_AGENT), # False (predicate(Group([4]), true_item, lvl, quantity), ALL_AGENT), # False (predicate(Group([6]), true_item, lvl, quantity), ALL_AGENT) # False ] - env = self._get_taskenv(test_tasks) + env = self._get_taskenv(test_preds) players = env.realm.players # True case: split the required items between 3 and 5 for player in (1,3): @@ -673,8 +760,9 @@ def test_item_event_predicates(self): # Consume, Harvest, List, Buy env.obs = env._compute_observations() _, _, _, infos = env.step({}) true_task = [0] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE + if __name__ == '__main__': unittest.main() diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 29842d26..76356b2f 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -1,30 +1,29 @@ -# pylint: disable=import-error,unused-argument,invalid-name -# pylint: disable=no-member,no-value-for-parameter,not-callable,expression-not-assigned +# pylint: disable=unused-argument,invalid-name import unittest -import numpy as np +from types import FunctionType import nmmo from nmmo.core.env import Env -from nmmo.task.task_api import define_predicate, define_task +from nmmo.task.predicate_api import make_predicate, Predicate +from nmmo.task.task_api import Task, make_team_tasks from nmmo.task.group import Group -from nmmo.task.team_helper import TeamHelper from nmmo.task.constraint import InvalidConstraint, ScalarConstraint -from nmmo.task.base_predicates import TickGE, CanSeeGroup +from nmmo.task.base_predicates import TickGE, CanSeeGroup, AllMembersWithinRange from nmmo.systems import item as Item from nmmo.core import action as Action -from tests.testhelpers import ScriptedAgentTestConfig +from scripted.baselines import Sleeper +from tests.testhelpers import ScriptedAgentTestConfig, change_spawn_pos -@define_predicate +# define predicates in the function form +# with the required signatures: gs, subject def Success(gs, subject: Group): return True -@define_predicate def Failure(gs, subject: Group): return False -@define_task def Fake(gs, subject, a,b,c): return False @@ -34,40 +33,51 @@ def __init__(self): self.config = nmmo.config.Default() self.cache_result = {} self.get_subject_view = lambda _: None -class TestTaskAPI(unittest.TestCase): - def test_operators(self): +class TestTaskAPI(unittest.TestCase): + def test_predicate_operators(self): # pylint: disable=unsupported-binary-operation,invalid-unary-operand-type + # pylint: disable=no-value-for-parameter,not-callable,no-member - mock_gs = MockGameState() - SUCCESS = Success(Group([0])) - FAILURE = Failure(Group([0])) - # AND (&), OR (|), NOT (~), IMPLY (>>) - task1 = SUCCESS & FAILURE - self.assertFalse(task1(mock_gs)) + self.assertTrue(isinstance(Success, FunctionType)) + self.assertTrue(isinstance(Failure, FunctionType)) + + # make predicate class from function + success_pred_cls = make_predicate(Success) + failure_pred_cls = make_predicate(Failure) + self.assertTrue(isinstance(success_pred_cls, type)) # class + self.assertTrue(isinstance(failure_pred_cls, type)) - task2 = SUCCESS | FAILURE | SUCCESS - self.assertTrue(task2(mock_gs)) + # then instantiate predicates + SUCCESS = success_pred_cls(Group(0)) + FAILURE = failure_pred_cls(Group(0)) + self.assertTrue(isinstance(SUCCESS, Predicate)) + self.assertTrue(isinstance(FAILURE, Predicate)) - task3 = SUCCESS & ~ FAILURE & SUCCESS - self.assertTrue(task3(mock_gs)) + # NOTE: only the instantiated predicate can be used with operators like below + mock_gs = MockGameState() - task4 = SUCCESS >> SUCCESS - self.assertTrue(task4(mock_gs)) + # AND (&), OR (|), NOT (~) + pred1 = SUCCESS & FAILURE + self.assertFalse(pred1(mock_gs)) - task5 = SUCCESS >> ~ SUCCESS - self.assertFalse(task5(mock_gs)) + pred2 = SUCCESS | FAILURE | SUCCESS + self.assertTrue(pred2(mock_gs)) - task6 = (FAILURE >> FAILURE) & SUCCESS - self.assertTrue(task6(mock_gs)) + pred3 = SUCCESS & ~ FAILURE & SUCCESS + self.assertTrue(pred3(mock_gs)) - task7 = SUCCESS + SUCCESS - self.assertEqual(task7(mock_gs),2) + # predicate math + pred4 = 0.1 * SUCCESS + 0.3 + self.assertEqual(pred4(mock_gs), 0.4) + self.assertEqual(pred4.name, + "(ADD_(MUL_(Success_(0,))_0.1)_0.3)") - task8 = SUCCESS * 3 - self.assertEqual(task8(mock_gs),3) + pred5 = 0.3 * SUCCESS - 1 + self.assertEqual(pred5(mock_gs), 0.0) # cannot go below 0 - self.assertEqual(task6.name, "(PAND_(IMPLY_(Failure_(0,))_(Failure_(0,)))_(Success_(0,)))") + pred6 = 0.3 * SUCCESS + 1 + self.assertEqual(pred6(mock_gs), 1.0) # cannot go over 1 def test_team_assignment(self): team = Group([1, 2, 8, 9], "TeamFoo") @@ -79,19 +89,34 @@ def test_team_assignment(self): # don't allow member of one-member team self.assertEqual(team[2][0].name, team[2].name) - def test_task_name(self): - SUCCESS = Success(Group([0])) - FAILURE = Failure(Group([0])) - fake_task = Fake(Group([2]), 1, Item.Hat, Action.Melee) - combination = (SUCCESS & ~ (FAILURE | fake_task)) | (FAILURE >> fake_task) + def test_predicate_name(self): + # pylint: disable=no-value-for-parameter,no-member + # make predicate class from function + success_pred_cls = make_predicate(Success) + failure_pred_cls = make_predicate(Failure) + fake_pred_cls = make_predicate(Fake) + + # instantiate the predicates + SUCCESS = success_pred_cls(Group([0,2])) + FAILURE = failure_pred_cls(Group(0)) + fake_pred = fake_pred_cls(Group(2), 1, Item.Hat, Action.Melee) + combination = (SUCCESS & ~ (FAILURE | fake_pred)) | (FAILURE * fake_pred + .3) - .4 self.assertEqual(combination.name, - "(POR_(PAND_(Success_(0,))_(PNOT_(POR_(Failure_(0,))_(Fake_(2,)_1_Hat_Melee))))_\ -(IMPLY_(Failure_(0,))_(Fake_(2,)_1_Hat_Melee)))") + "(OR_(AND_(Success_(0,2))_(NOT_(OR_(Failure_(0,))_(Fake_(2,)_1_Hat_Melee))))_"+\ + "(SUB_(ADD_(MUL_(Failure_(0,))_(Fake_(2,)_1_Hat_Melee))_0.3)_0.4))") def test_constraint(self): + # pylint: disable=not-callable,no-value-for-parameter + # define predicate classes from functions + + # make predicate class from function + success_pred_cls = make_predicate(Success) + tickge_pred_cls = make_predicate(TickGE) + self.assertTrue(isinstance(TickGE, FunctionType)) + mock_gs = MockGameState() - good = Success(Group([0])) - bad = Success(Group([99999])) + good = success_pred_cls(Group(0)) + bad = success_pred_cls(Group(99999)) good(mock_gs) self.assertRaises(InvalidConstraint,lambda: bad(mock_gs)) @@ -100,71 +125,149 @@ def test_constraint(self): self.assertTrue(scalar.sample(mock_gs.config)<10) self.assertTrue(scalar.sample(mock_gs.config)>=-10) - bad = TickGE(Group([0]), -1) + bad = tickge_pred_cls(Group(0), -1) self.assertRaises(InvalidConstraint, lambda: bad(mock_gs)) - def test_sample_task(self): - task = CanSeeGroup() & TickGE() - self.assertEqual(task.name, - "(PAND_(CanSeeGroup_subject:GroupConstraint_target:GroupConstraint)_\ -(TickGE_subject:GroupConstraint_num_tick:ScalarConstraint))") + def test_sample_predicate(self): + # pylint: disable=no-value-for-parameter,expression-not-assigned + # make predicate class from function + canseegrp_pred_cls = make_predicate(CanSeeGroup) + tickge_pred_cls = make_predicate(TickGE) + + # if the predicate class is instantiated without the subject, + mock_gs = MockGameState() + predicate = canseegrp_pred_cls() & tickge_pred_cls() + self.assertEqual(predicate.name, + "(AND_(CanSeeGroup_subject:GroupConstraint_target:AgentListConstraint)_"+\ + "(TickGE_subject:GroupConstraint_num_tick:ScalarConstraint))") + + # this predicate cannot calculate progress becuase it has no subject + with self.assertRaises(AttributeError): + predicate(mock_gs) + + # this predicate supports sampling with valid arguments config = nmmo.config.Default() - TickGE().sample(config) - task.sample(config).name + tickge_pred_cls().sample(config) + predicate.sample(config).name # DONE + def test_task_api_with_predicate(self): + # pylint: disable=no-value-for-parameter,no-member + fake_pred_cls = make_predicate(Fake) + + mock_gs = MockGameState() + predicate = fake_pred_cls(Group(2), 1, Item.Hat, Action.Melee) + assignee = [1,2,3] # list of agent ids + task = predicate.create_task(assignee=assignee) + rewards, infos = task.compute_rewards(mock_gs) + + self.assertEqual(task.name, # contains predicate name and assignee list + "(Task_eval_fn:(Fake_(2,)_1_Hat_Melee)_assignee:(1,2,3))") + for agent_id in assignee: + self.assertEqual(rewards[agent_id], 0) + self.assertEqual(infos[agent_id]['progress'], 0) # progress (False -> 0) + self.assertFalse(task.completed) + + def test_task_api_with_function(self): + mock_gs = MockGameState() + def eval_with_subject_fn(subject: Group): + def is_agent_1(gs): + return any(agent_id == 1 for agent_id in subject.agents) + return is_agent_1 + + assignee = [1,2,3] # list of agent ids + task = Task(eval_with_subject_fn(Group(assignee)), assignee) + rewards, infos = task.compute_rewards(mock_gs) + + self.assertEqual(task.name, # contains predicate name and assignee list + "(Task_eval_fn:is_agent_1_assignee:(1,2,3))") + for agent_id in assignee: + self.assertEqual(rewards[agent_id], 1) + self.assertEqual(infos[agent_id]['progress'], 1) # progress (True -> 1) + self.assertTrue(task.completed) + + def test_predicate_fn_using_other_predicate_fn(self): + # define a predicate: to form a tight formation, for a certain number of ticks + def PracticeFormation(gs, subject, dist, num_tick): + return AllMembersWithinRange(gs, subject, dist) * TickGE(gs, subject, num_tick) + + # team should stay together within 1 tile for 10 ticks + goal_tick = 10 + task_spec = ('team', PracticeFormation, {'dist': 1, 'num_tick': goal_tick}) + + # create the test task from the task spec + teams = {0:[1,2,3], 1:[4,5], 2:[6,7], 3:[8,9], 4:[10,11]} + + config = ScriptedAgentTestConfig() + config.PLAYERS =[Sleeper] + config.IMMORTAL = True + + env = Env(config) + env.reset(make_task_fn=lambda: make_team_tasks(teams, [task_spec])) + + # move agent 2, 3 to agent 1's pos + for agent_id in [2,3]: + change_spawn_pos(env.realm, agent_id, + env.realm.players[1].pos) + + for tick in range(goal_tick+2): + _, rewards, _, infos = env.step({}) + + if tick < 10: + self.assertAlmostEqual(rewards[1], 1/goal_tick) + self.assertAlmostEqual((1+tick)/goal_tick, + infos[1]['task'][env.tasks[0].name]['progress']) + else: + # tick 11, task should be completed + self.assertEqual(rewards[1], 0) + self.assertEqual(infos[1]['task'][env.tasks[0].name]['progress'], 1) + self.assertEqual(infos[1]['task'][env.tasks[0].name]['completed'], True) + def test_completed_tasks_in_info(self): + # pylint: disable=no-value-for-parameter,no-member config = ScriptedAgentTestConfig() env = Env(config) - team_helper = TeamHelper.generate_from_config(config) - fake_task = Fake(Group([3]), 1, Item.Hat, Action.Melee) - task_assignment = \ - [(Success(Group([1])),2), - Failure(Group([1])), - Success(Group([1])) * -1, - 3 * Success(Group([1])), - Success(team_helper.own_team(2)), - fake_task - ] - env.change_task(task_assignment) + + # make predicate class from function + success_pred_cls = make_predicate(Success) + failure_pred_cls = make_predicate(Failure) + fake_pred_cls = make_predicate(Fake) + + # instantiate the predicates + same_team = [1, 2, 3, 4] + predicates = [ + success_pred_cls(Group(1)), # task 1 + failure_pred_cls(Group(2)), # task 2 + fake_pred_cls(Group(3), 1, Item.Hat, Action.Melee), # task 3 + success_pred_cls(Group(same_team))] # task 4 + + # tasks can be created directly from predicate instances + test_tasks = [pred.create_task() for pred in predicates] + + # tasks are all instantiated with the agent ids + env.reset(make_task_fn=lambda: test_tasks) _, _, _, infos = env.step({}) - # agent 1: task1 is always True - self.assertEqual(infos[1]['task'][Success(Group([1])).name], 1.0) - self.assertEqual(infos[1]['task'][(Success(Group([1])) * -1).name], -1.0) - self.assertEqual(infos[1]['task'][(3*Success(Group([1]))).name], 3.0) + # agent 1: assigned only task 1, which is always True + self.assertEqual(infos[1]['task'][env.tasks[0].name]['reward'], 1.0) + for i in [1, 2]: # task 2 and 3 + self.assertTrue(env.tasks[i].name not in infos[1]['task']) - # agent 2 should have been assigned Success but not Fake() - self.assertEqual(infos[2]['task'][Success(team_helper.own_team(2)).name], 1) - self.assertTrue(fake_task.name not in infos[2]['task']) + # agent 2: assigned task 2 (Failure) and task 4 (Success) + self.assertEqual(infos[2]['task'][env.tasks[1].name]['reward'], 0.0) # task 2 + self.assertEqual(infos[2]['task'][env.tasks[3].name]['reward'], 1.0) # task 4 - # agent 3 should have been assigned Fake(), which is always False (0) - self.assertEqual(infos[3]['task'][fake_task.name], 0) + # agent 3 assigned task 3, Fake(), which is always False (0) + self.assertEqual(infos[3]['task'][env.tasks[2].name]['reward'], 0.0) # task 3 # all agents in the same team with agent 2 have SUCCESS # other agents don't have any tasks assigned - group_name = Success(team_helper.own_team(2)).name - for ent_id in range(4, config.PLAYER_N+1): - if Group([ent_id]) in team_helper.own_team(2): - self.assertEqual(infos[ent_id]['task'][group_name], 1) + for ent_id in env.possible_agents: + if ent_id in same_team: + self.assertEqual(infos[ent_id]['task'][env.tasks[3].name]['reward'], 1.0) else: - self.assertEqual(infos[ent_id]['task'], {}) - - # DONE - - def test_task_embedding(self): - env = Env() - obs = env.reset() - self.assertRaises(KeyError, lambda: obs[1]['Task']) - - task = [Success([1,2])] - env.change_task(task, - task_encoding={1:np.array([1,2,3,4])}, - embedding_size=4) - obs = env.reset() - self.assertTrue(all(obs[1]['Task']==np.array([1,2,3,4]))) - self.assertTrue(all(obs[2]['Task']==np.array([0,0,0,0]))) + self.assertTrue(env.tasks[3].name not in infos[ent_id]['task']) # DONE diff --git a/tests/task/test_task_system_perf.py b/tests/task/test_task_system_perf.py new file mode 100644 index 00000000..97835696 --- /dev/null +++ b/tests/task/test_task_system_perf.py @@ -0,0 +1,77 @@ +import unittest + +import nmmo +from nmmo.core.env import Env +from nmmo.task.task_api import Task, nmmo_default_task +from tests.testhelpers import profile_env_step + +PROFILE_PERF = False + +class TestTaskSystemPerf(unittest.TestCase): + def test_nmmo_default_task(self): + config = nmmo.config.Default() + env = Env(config) + agent_list = env.possible_agents + + for test_mode in [None, 'no_task', 'dummy_eval_fn', 'pure_func_eval']: + + # create tasks + if test_mode == 'pure_func_eval': + def create_stay_alive_eval_wo_group(agent_id: int): + return lambda gs: agent_id in gs.alive_agents + tasks = [Task(create_stay_alive_eval_wo_group(agent_id), assignee=agent_id) + for agent_id in agent_list] + else: + tasks = nmmo_default_task(agent_list, test_mode) + + # check tasks + for agent_id in agent_list: + if test_mode is None: + self.assertTrue('StayAlive' in tasks[agent_id-1].name) # default task + if test_mode != 'no_task': + self.assertTrue(f'assignee:({agent_id},)' in tasks[agent_id-1].name) + + # pylint: disable=cell-var-from-loop + if PROFILE_PERF: + test_cond = 'default' if test_mode is None else test_mode + profile_env_step(tasks=tasks, condition=test_cond) + else: + env.reset(make_task_fn=lambda: tasks) + for _ in range(3): + env.step({}) + + # DONE + + +if __name__ == '__main__': + unittest.main() + + # """ Tested on Win 11, docker + # === Test condition: default (StayAlive-based Predicate) === + # - env.step({}): 13.398321460997977 + # - env.realm.step(): 3.6524868449996575 + # - env._compute_observations(): 3.2038183499971638 + # - obs.to_gym(), ActionTarget: 2.30746804500086 + # - env._compute_rewards(): 2.7206644940015394 + + # === Test condition: no_task === + # - env.step({}): 10.576253965999058 + # - env.realm.step(): 3.674701832998835 + # - env._compute_observations(): 3.260661373002222 + # - obs.to_gym(), ActionTarget: 2.313872797996737 + # - env._compute_rewards(): 0.009020475001307204 + + # === Test condition: dummy_eval_fn -based Predicate === + # - env.step({}): 12.797982947995479 + # - env.realm.step(): 3.604593793003005 + # - env._compute_observations(): 3.2095355240016943 + # - obs.to_gym(), ActionTarget: 2.313207338003849 + # - env._compute_rewards(): 2.266267291997792 + + # === Test condition: pure_func_eval WITHOUT Predicate === + # - env.step({}): 10.637560240997118 + # - env.realm.step(): 3.633970066999609 + # - env._compute_observations(): 3.2308093659958104 + # - obs.to_gym(), ActionTarget: 2.331246039000689 + # - env._compute_rewards(): 0.0988905300037004 + # """ diff --git a/tests/testhelpers.py b/tests/testhelpers.py index 3783c721..03ea2097 100644 --- a/tests/testhelpers.py +++ b/tests/testhelpers.py @@ -369,25 +369,29 @@ def _check_assert_make_action(self, env, atn, test_cond): return actions # pylint: disable=unnecessary-lambda,bad-builtin -def profile_env_step(action_target=True): +def profile_env_step(action_target=True, tasks=None, condition=None): config = nmmo.config.Default() config.PLAYERS = [baselines.Sleeper] # the scripted agents doing nothing config.IMMORTAL = True # otherwise the agents will die config.PROVIDE_ACTION_TARGETS = action_target env = nmmo.Env(config) - env.reset(seed=0) + if tasks is None: + tasks = [] + env.reset(seed=0, make_task_fn=lambda: tasks) for _ in range(3): env.step({}) obs = env._compute_observations() test_func = [ - ('env.step({})', lambda: env.step({})), - ('env.realm.step()', lambda: env.realm.step({})), - ('env._compute_observations()', lambda: env._compute_observations()), - ('obs.to_gym()', lambda: {a: o.to_gym() for a,o in obs.items()}), - ('env._compute_rewards()', lambda: env._compute_rewards(obs.keys(), {})) + ('env.step({}):', lambda: env.step({})), + ('env.realm.step():', lambda: env.realm.step({})), + ('env._compute_observations():', lambda: env._compute_observations()), + ('obs.to_gym(), ActionTarget:', lambda: {a: o.to_gym() for a,o in obs.items()}), + ('env._compute_rewards():', lambda: env._compute_rewards(obs.keys(), {})) ] + if condition: + print('=== Test condition:', condition, '===') for name, func in test_func: - print(name, timeit(func, number=100, globals=globals())) + print(' -', name, timeit(func, number=100, globals=globals())) diff --git a/utils/run-perf-tests.sh b/utils/run-perf-tests.sh new file mode 100755 index 00000000..5e9529bd --- /dev/null +++ b/utils/run-perf-tests.sh @@ -0,0 +1,2 @@ +pytest --benchmark-columns=ops,rounds,median,mean,stddev,min,max,iterations --benchmark-max-time=5 --benchmark-min-rounds=500 \ + --benchmark-warmup=on --benchmark-warmup-iterations=300 tests/test_performance.py