From 1a578a64f9865cbdeb193f6bfb67bfee7ed07b31 Mon Sep 17 00:00:00 2001 From: kywch Date: Fri, 2 Jun 2023 04:49:46 +0000 Subject: [PATCH 01/18] refining task api --- nmmo/core/env.py | 106 ++---- nmmo/task/__init__.py | 2 +- nmmo/task/base_predicates.py | 4 +- nmmo/task/group.py | 6 +- nmmo/task/predicate_api.py | 294 +++++++++++++++++ nmmo/task/scenario.py | 132 ++++---- nmmo/task/task_api.py | 445 ++++++-------------------- tests/task/test_demo_task_creation.py | 230 +++++++------ tests/task/test_predicates.py | 82 +++-- tests/task/test_task_api.py | 205 +++++++----- 10 files changed, 791 insertions(+), 715 deletions(-) create mode 100644 nmmo/task/predicate_api.py diff --git a/nmmo/core/env.py b/nmmo/core/env.py index f8367610..854bf725 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -1,7 +1,6 @@ import functools import random -import copy -from typing import Any, Dict, List, Optional, Union, Tuple +from typing import Any, Dict, List from ordered_set import OrderedSet import gym @@ -16,8 +15,7 @@ from nmmo.entity.entity import Entity from nmmo.systems.item import Item from nmmo.task.game_state import GameStateGenerator -from nmmo.task.task_api import Task -from nmmo.task.scenario import default_task +from nmmo.task.task_api import Task, nmmo_default_task from scripted.baselines import Scripted class Env(ParallelEnv): @@ -41,15 +39,7 @@ def __init__(self, self._gamestate_generator = GameStateGenerator(self.realm, self.config) self.game_state = None - # Default task: rewards 1 each turn agent is alive - self.tasks: List[Tuple[Task,float]] = None - self._task_encoding = None - self._task_embedding_size = -1 - t = default_task(self.possible_agents) - self.change_task(t, - embedding_size=self._task_embedding_size, - task_encoding=self._task_encoding, - reset=False) + self.tasks = None # pylint: disable=method-cache-max-size-none @functools.lru_cache(maxsize=None) @@ -88,12 +78,6 @@ def box(rows, cols): if self.config.PROVIDE_ACTION_TARGETS: obs_space['ActionTargets'] = self.action_space(None) - if self._task_encoding: - obs_space['Task'] = gym.spaces.Box( - low=-2**20, high=2**20, - shape=(self._task_embedding_size,), - dtype=np.float32) - return gym.spaces.Dict(obs_space) def _init_random(self, seed): @@ -131,31 +115,10 @@ def action_space(self, agent): ############################################################################ # Core API - def change_task(self, - new_tasks: List[Union[Tuple[Task, float], Task]], - task_encoding: Optional[Dict[int, np.ndarray]] = None, - embedding_size: int=16, - reset: bool=True, - map_id=None, - seed=None, - options=None): - """ Changes the task given to each agent - - Args: - new_task: The task to complete and calculate rewards - task_encoding: A mapping from eid to encoded task - embedding_size: The size of each embedding - reset: Resets the environment - """ - self._tasks = [t if isinstance(t, Tuple) else (t,1) for t in new_tasks] - self._task_encoding = task_encoding - self._task_embedding_size = embedding_size - if reset: - self.reset(map_id=map_id, seed=seed, options=options) - # TODO: This doesn't conform to the PettingZoo API # pylint: disable=arguments-renamed - def reset(self, map_id=None, seed=None, options=None): + def reset(self, map_id=None, seed=None, options=None, + new_tasks: List[Task]=None): '''OpenAI Gym API reset function Loads a new game map and returns initial observations @@ -186,16 +149,19 @@ def reset(self, map_id=None, seed=None, options=None): if isinstance(ent.agent, Scripted): self.scripted_agents.add(eid) - self.tasks = copy.deepcopy(self._tasks) self.obs = self._compute_observations() self._gamestate_generator = GameStateGenerator(self.realm, self.config) - gym_obs = {} - for a, o in self.obs.items(): - gym_obs[a] = o.to_gym() - if self._task_encoding: - gym_obs[a]['Task'] = self._encode_goal().get(a,np.zeros(self._task_embedding_size)) - return gym_obs + # CHECK ME: How the tasks are provided to the env? + # If the provided task instances are mapped to the individual agents, this is enough + # If not, we need to map the tasks to the agents using TeamHelper, in change_task perhaps + if new_tasks is None: + self.tasks = nmmo_default_task(self.possible_agents) + else: + # providing an empty new_tasks [] is also possible + self.tasks = new_tasks + + return {a: o.to_gym() for a,o in self.obs.items()} def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): '''Simulates one game tick or timestep @@ -308,11 +274,7 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): # Store the observations, since actions reference them self.obs = self._compute_observations() - gym_obs = {} - for a, o in self.obs.items(): - gym_obs[a] = o.to_gym() - if self._task_encoding: - gym_obs[a]['Task'] = self._encode_goal()[a] + gym_obs = {a: o.to_gym() for a,o in self.obs.items()} rewards, infos = self._compute_rewards(self.obs.keys(), dones) @@ -321,8 +283,6 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): def _validate_actions(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): '''Deserialize action arg values and validate actions For now, it does a basic validation (e.g., value is not none). - - TODO(kywch): add sophisticated validation like use/sell/give on the same item ''' validated_actions = {} @@ -423,9 +383,6 @@ def _compute_observations(self): inventory, market) return obs - def _encode_goal(self): - return self._task_encoding - def _compute_rewards(self, agents: List[AgentID], dones: Dict[AgentID, bool]): '''Computes the reward for the specified agent @@ -442,27 +399,24 @@ def _compute_rewards(self, agents: List[AgentID], dones: Dict[AgentID, bool]): entity identified by ent_id. ''' # Initialization - self.game_state = self._gamestate_generator.generate(self.realm, self.obs) infos = {} - for eid in agents: - infos[eid] = {} - infos[eid]['task'] = {} - rewards = {eid: 0 for eid in agents} + for agent_id in agents: + infos[agent_id] = {} + infos[agent_id]['task'] = {} + rewards = {agent_id: 0 for agent_id in agents} # Compute Rewards and infos - for task, weight in self.tasks: + self.game_state = self._gamestate_generator.generate(self.realm, self.obs) + for task in self.tasks: task_rewards, task_infos = task.compute_rewards(self.game_state) - for eid, reward in task_rewards.items(): - # Rewards, weighted - rewards[eid] = rewards.get(eid,0) + reward * weight - # Infos - for eid, info in task_infos.items(): - if eid in infos: - infos[eid]['task'] = {**infos[eid]['task'], **info} - - # Remove rewards for dead agents (?) - for eid in dones: - rewards[eid] = 0 + for agent_id, reward in task_rewards.items(): + if agent_id in agents and agent_id not in dones: + rewards[agent_id] = rewards.get(agent_id,0) + reward + infos[agent_id]['task'][task.name] = task_infos[agent_id] # progress + + # Remove rewards for dead agents + for agent_id in dones: + rewards[agent_id] = -1 return rewards, infos diff --git a/nmmo/task/__init__.py b/nmmo/task/__init__.py index 4bd5626b..fde78a93 100644 --- a/nmmo/task/__init__.py +++ b/nmmo/task/__init__.py @@ -1,4 +1,4 @@ from .game_state import * +from .predicate_api import * from .task_api import * -from .scenario import * from .team_helper import * diff --git a/nmmo/task/base_predicates.py b/nmmo/task/base_predicates.py index 60e99ded..6c6fbc68 100644 --- a/nmmo/task/base_predicates.py +++ b/nmmo/task/base_predicates.py @@ -3,7 +3,7 @@ import numpy as np from numpy import count_nonzero as count -from nmmo.task.task_api import OR, define_predicate +from nmmo.task.predicate_api import POR, define_predicate from nmmo.task.group import Group from nmmo.task.game_state import GameState from nmmo.task import constraint @@ -87,7 +87,7 @@ def CanSeeGroup(gs: GameState, target: Group = constraint.TEAM_GROUPS): """ Returns True if subject can see any of target """ - return OR(*(CanSeeAgent(subject, agent) for agent in target.agents)) + return POR(*(CanSeeAgent(subject, agent) for agent in target.agents)) @define_predicate def DistanceTraveled(gs: GameState, diff --git a/nmmo/task/group.py b/nmmo/task/group.py index 29aa1b0b..442778c1 100644 --- a/nmmo/task/group.py +++ b/nmmo/task/group.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import Dict, Iterable, TYPE_CHECKING +from typing import Dict, Union, Iterable, TYPE_CHECKING from collections import OrderedDict from collections.abc import Set, Sequence @@ -10,9 +10,11 @@ class Group(Sequence, Set): ''' An immutable, ordered, unique group of agents involved in a task ''' def __init__(self, - agents: Iterable[int], + agents: Union(Iterable[int], int), name: str=None): + if isinstance(agents, int): + agents = (agents,) assert len(agents) > 0, "Team must have at least one agent" self.name = name if name else f"Agent({','.join([str(e) for e in agents])})" # Remove duplicates diff --git a/nmmo/task/predicate_api.py b/nmmo/task/predicate_api.py new file mode 100644 index 00000000..d945ed89 --- /dev/null +++ b/nmmo/task/predicate_api.py @@ -0,0 +1,294 @@ +from __future__ import annotations +from typing import Callable, List, Optional, Tuple, Union +from abc import ABC, abstractmethod +import inspect +from numbers import Real + +from nmmo.core.config import Config +from nmmo.task.group import Group, union +from nmmo.task.game_state import GameState +from nmmo.task.constraint import Constraint, InvalidConstraint, GroupConstraint + +class InvalidPredicateDefinition(Exception): + pass + +class Predicate(ABC): + """ A mapping from a game state to bounded [0, 1] float + """ + def __init__(self, + subject: Group, + *args, + constraints: Optional[List[Tuple[str,Optional[Constraint]]]] = None, + **kwargs): + self.name = self._make_name(self.__class__.__name__, args, kwargs) + + def is_group(x): + return isinstance(x, Group) + self._groups: List[Group] = list(filter(is_group, args)) + self._groups = self._groups + list(filter(is_group, kwargs.values())) + self._groups.append(subject) + + self._args = args + self._kwargs = kwargs + self._constraints = constraints + self._config = None + self._subject = subject + + def __call__(self, gs: GameState) -> float: + """ Calculates score + + Params: + gs: GameState + + Returns: + score: float bounded between [0, 1], 1 is considered to be true + """ + if not self._config == gs.config: + # TODO(mark) should we make this explicitly called by environment + self._reset(gs.config) + # Update views + for group in self._groups: + group.update(gs) + # Calculate score + cache = gs.cache_result + if self.name in cache: + score = cache[self.name] + else: + score = max(min(self._evaluate(gs)*1.0,1.0),0.0) + cache[self.name] = score + # Calculate score + return score + + def _reset(self, config: Config): + self._config = config + if not self.check(self._config): + raise InvalidConstraint() + + def check(self, config: Config): + """ Checks whether the predicate is valid + + A satisfiable predicate "makes sense" given a config + ie. Not trying to reach target off the map + """ + if not GroupConstraint().check(config, self._subject): + return False + for i, (name, constraint) in enumerate(self._constraints): + if constraint is None: + continue + if i < len(self._args): + if not constraint.check(config, self._args[i]): + return False + elif not constraint.check(config, self._kwargs[name]): + return False + return True + + def sample(self, config: Config, **overload): + """ Samples a concrete instance of a given task. + + Allows overloading of previous parameters. + """ + # Sample Constraint + nargs = [arg.sample(config) if isinstance(arg, Constraint) else arg + for arg in self._args] + nkwargs = {k : v.sample(config) if isinstance(v, Constraint) else v + for k,v in self._kwargs.items()} + for i, (name, _) in enumerate(self._constraints): + if i < len(nargs): + if name in nkwargs: + raise InvalidPredicateDefinition("Constraints should match arguments.") + nkwargs[name] = nargs[i] + else: + break + + for k, v in overload.items(): + nkwargs[k] = v + # Result + return self.__class__(**nkwargs) + + @abstractmethod + def _evaluate(self, gs: GameState) -> float: + """ A mapping from a game state to the desirability of that state. + __call__() will cap its value to [0, 1] + """ + raise NotImplementedError + + def _make_name(self, class_name, args, kwargs) -> str: + def arg_to_string(arg): + if isinstance(arg, type): # class + return arg.__name__ + if arg is None: + return 'Any' + return str(arg) + + name = [class_name] + \ + list(map(arg_to_string, args)) + \ + [f"{arg_to_string(key)}:{arg_to_string(arg)}" for key, arg in kwargs.items()] + name = "("+'_'.join(name).replace(' ', '')+")" + return name + + def __str__(self): + return self.name + + @property + def subject(self): + return self._subject + + def __and__(self, other): + return PAND(self, other) + def __or__(self, other): + return POR(self, other) + def __invert__(self): + return PNOT(self) + def __rshift__(self, other): + return IMPLY(self, other) + def __add__(self, other): + return PADD(self, other) + def __radd__(self, other): + return PADD(self, other) + def __sub__(self, other): + return PSUB(self, other) + def __rsub__(self, other): + return PSUB(self, other) + def __mul__(self, other): + return PMUL(self, other) + def __rmul__(self, other): + return PMUL(self, other) + +################################################ + +def define_predicate(fn: Callable) -> type[Predicate]: + """ Syntactic sugar API for defining predicates + + See examples at base_predicates.py + """ + signature = inspect.signature(fn) + for i, param in enumerate(signature.parameters.values()): + if i == 0 and param.name != 'gs': + raise InvalidPredicateDefinition('First parameter must be gs: GameState') + if i == 1 and (param.name != 'subject'): + raise InvalidPredicateDefinition("Second parameter must be subject: Group") + + class FunctionPredicate(Predicate): + def __init__(self, *args, **kwargs) -> None: + constraints = [] + self._signature = signature + args = list(args) + for i, param in enumerate(self._signature.parameters.values()): + if i == 0: + continue + # Calculate list of constraints + if isinstance(param.default, Constraint): + constraints.append((param.name,param.default)) + else: + constraints.append((param.name,None)) + # Insert default values from function definition + if not param.name in kwargs and i-1 >= len(args): + if param.default == inspect.Parameter.empty: + args.append(param.default) + else: + kwargs[param.name] = param.default + super().__init__(*args, **kwargs, constraints=constraints) + self._args = args + self._kwargs = kwargs + self.name = self._make_name(fn.__name__, args, kwargs) + def _evaluate(self, gs: GameState) -> float: + # pylint: disable=redefined-builtin, unused-variable + __doc = fn.__doc__ + result = fn(gs, *self._args, **self._kwargs) + if isinstance(result, Predicate): + return result(gs) + return result + + return FunctionPredicate + + +################################################ +class PredicateOperator(Predicate): + def __init__(self, n, *predicates: Union[Predicate, Real], subject: Group=None): + if not n(len(predicates)): + raise InvalidPredicateDefinition(f"Need {n} arguments") + predicates = list(predicates) + self._subject_argument = subject + if subject is None: + try: + subject = union(*[p.subject + for p in filter(lambda p: isinstance(p, Predicate), predicates)]) + except AttributeError: + subject = GroupConstraint() + super().__init__(subject, *predicates) + + for i, p in enumerate(predicates): + if isinstance(p, Real): + predicates[i] = lambda _,v=predicates[i] : v + self._predicates = predicates + + def check(self, config: Config) -> bool: + return all((p.check(config) if isinstance(p, Predicate) + else True for p in self._predicates)) + + def sample(self, config: Config, cls: type[PredicateOperator], **kwargs): + subject = self._subject_argument if 'subject' not in kwargs else kwargs['subject'] + predicates = [p.sample(config, **kwargs) if isinstance(p, Predicate) + else p(None) for p in self._predicates] + return cls(*predicates, subject=subject) + +class POR(PredicateOperator, Predicate): + def __init__(self, *predicates: Predicate, subject: Group=None): + super().__init__(lambda n: n>0, *predicates, subject=subject) + def _evaluate(self, gs: GameState) -> float: + return any(p(gs) == 1 for p in self._predicates)*1.0 + def sample(self, config: Config, **kwargs): + return super().sample(config, POR, **kwargs) + +class PAND(PredicateOperator, Predicate): + def __init__(self, *predicates: Predicate, subject: Group=None): + super().__init__(lambda n: n>0, *predicates, subject=subject) + def _evaluate(self, gs: GameState) -> float: + return all(p(gs) == 1 for p in self._predicates)*1.0 + def sample(self, config: Config, **kwargs): + return super().sample(config, PAND, **kwargs) + +class PNOT(PredicateOperator, Predicate): + def __init__(self, predicate: Predicate, subject: Group=None): + super().__init__(lambda n: n==1, predicate, subject=subject) + def _evaluate(self, gs: GameState) -> float: + return 1.0 - self._predicates[0](gs) + def sample(self, config: Config, **kwargs): + return super().sample(config, PNOT, **kwargs) + +class IMPLY(PredicateOperator, Predicate): + def __init__(self, p: Predicate, q: Predicate, subject: Group=None): + super().__init__(lambda n: n==2, p,q, subject=subject) + def _evaluate(self, gs: GameState) -> float: + if self._predicates[0](gs) == 1: + return self._predicates[1](gs)*1.0 + return True + def sample(self, config: Config, **kwargs): + return super().sample(config, IMPLY, **kwargs) + +class PADD(PredicateOperator, Predicate): + def __init__(self, *predicate: Union[Predicate, Real], subject: Group=None): + super().__init__(lambda n: n>0, *predicate, subject=subject) + def _evaluate(self, gs: GameState) -> float: + return max(min(sum(p(gs) for p in self._predicates),1.0),0.0) + def sample(self, config: Config, **kwargs): + return super().sample(config, PADD, **kwargs) + +class PSUB(PredicateOperator, Predicate): + def __init__(self, p: Predicate, q: Union[Predicate, Real], subject: Group=None): + super().__init__(lambda n: n==2, p,q, subject=subject) + def _evaluate(self, gs: GameState) -> float: + return max(min(self._predicates[0](gs)-self._predicates[1](gs),1.0),0.0) + def sample(self, config: Config, **kwargs): + return super().sample(config, PSUB, **kwargs) + +class PMUL(PredicateOperator, Predicate): + def __init__(self, *predicate: Union[Predicate, Real], subject: Group=None): + super().__init__(lambda n: n>0, *predicate, subject=subject) + def _evaluate(self, gs: GameState) -> float: + result = 1.0 + for p in self._predicates: + result = result * p(gs) + return max(min(result,1.0),0.0) + def sample(self, config: Config, **kwargs): + return super().sample(config, PMUL, **kwargs) diff --git a/nmmo/task/scenario.py b/nmmo/task/scenario.py index 7efeb210..57be1b4e 100644 --- a/nmmo/task/scenario.py +++ b/nmmo/task/scenario.py @@ -1,76 +1,76 @@ -from __future__ import annotations +# from __future__ import annotations -import copy -from typing import Callable, Union, Iterable, \ - Optional, List, Tuple -from nmmo.core.config import Config -from nmmo.task.group import Group -from nmmo.task.team_helper import TeamHelper -from nmmo.task.task_api import Task, Repeat -from nmmo.task.base_predicates import StayAlive +# import copy +# from typing import Callable, Union, Iterable, \ +# Optional, List, Tuple +# from nmmo.core.config import Config +# from nmmo.task.group import Group +# from nmmo.task.team_helper import TeamHelper +# from nmmo.task.task_api import Task +# from nmmo.task.base_predicates import StayAlive -class Scenario: - ''' Utility class to aid in defining common tasks - ''' - def __init__(self, config: Config): - config = copy.deepcopy(config) - self.team_helper = TeamHelper.generate_from_config(config) - self.config = config - self._tasks: List[Task] = [] +# class Scenario: +# ''' Utility class to aid in defining common tasks +# ''' +# def __init__(self, config: Config): +# config = copy.deepcopy(config) +# self.team_helper = TeamHelper.generate_from_config(config) +# self.config = config +# self._tasks: List[Task] = [] - def add_task(self, task: Task): - self._tasks.append(task) +# def add_task(self, task: Task): +# self._tasks.append(task) - def add_tasks(self, - tasks: Union[Task, - Iterable[Task], - Callable[[Group], Task]], - groups: Optional[Union[str,Iterable[Group]]] = 'teams') -> None: - # pylint: disable=unnecessary-lambda-assignment - """ Utility function to define symmetric tasks +# def add_tasks(self, +# tasks: Union[Task, +# Iterable[Task], +# Callable[[Group], Task]], +# groups: Optional[Union[str,Iterable[Group]]] = 'teams') -> None: +# # pylint: disable=unnecessary-lambda-assignment +# """ Utility function to define symmetric tasks - Params: +# Params: - tasks: - Iterable[Task]: - For each Task in the iterable, add to scenario. - Callable[[Group], Task]: - A function taking in a group and return a task. - The result from applying this function to "groups" is added to - the scenario. - Task: - Mapped to Callable by overriding subject +# tasks: +# Iterable[Task]: +# For each Task in the iterable, add to scenario. +# Callable[[Group], Task]: +# A function taking in a group and return a task. +# The result from applying this function to "groups" is added to +# the scenario. +# Task: +# Mapped to Callable by overriding subject - groups: - Foreach group in groups, add a task. - """ - # Tasks - if isinstance(tasks, Iterable): - for task in tasks: - self.add_task(task) - return +# groups: +# Foreach group in groups, add a task. +# """ +# # Tasks +# if isinstance(tasks, Iterable): +# for task in tasks: +# self.add_task(task) +# return - # Functional Syntax - # Tasks - if isinstance(tasks, Task): - task_generator = lambda group: tasks.sample(config=self.config, subject=group) - else: - task_generator = tasks - # Groups - if isinstance(groups, str): - assert(groups in ['agents','teams']) - if groups == 'agents': - groups = self.team_helper.all_agents - elif groups == 'teams': - groups = self.team_helper.all_teams - # Create - self.add_tasks([task_generator(group) for group in groups]) +# # Functional Syntax +# # Tasks +# if isinstance(tasks, Task): +# task_generator = lambda group: tasks.sample(config=self.config, subject=group) +# else: +# task_generator = tasks +# # Groups +# if isinstance(groups, str): +# assert(groups in ['agents','teams']) +# if groups == 'agents': +# groups = self.team_helper.all_agents +# elif groups == 'teams': +# groups = self.team_helper.all_teams +# # Create +# self.add_tasks([task_generator(group) for group in groups]) - @property - def tasks(self) -> List[Task]: - return self._tasks +# @property +# def tasks(self) -> List[Task]: +# return self._tasks -def default_task(agents) -> List[Tuple[Task, float]]: - '''Generates the default reward on env.init - ''' - return [Repeat(StayAlive(Group([agent]))) for agent in agents] +# def default_task(agents) -> List[Tuple[Task, float]]: +# '''Generates the default reward on env.init +# ''' +# return [Repeat(StayAlive(Group([agent]))) for agent in agents] diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index df18db83..c59aa945 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -1,141 +1,89 @@ -from __future__ import annotations -from typing import Callable, Dict, List, Optional, Tuple, Union -from abc import ABC, abstractmethod -import inspect -from numbers import Real -import math +# pylint: disable=unused-import +from typing import Callable, Iterable, Dict, List, Union, Tuple +from types import FunctionType +from abc import ABC -from nmmo.core.config import Config -from nmmo.task.group import Group, union -from nmmo.task.game_state import GameState -from nmmo.task.constraint import Constraint, InvalidConstraint, GroupConstraint - -class InvalidTaskDefinition(Exception): - pass +from nmmo.task.group import Group +from nmmo.task.predicate_api import Predicate +from nmmo.task.base_predicates import StayAlive class Task(ABC): - """ A task is used to calculate rewards for agents in "assignee" + """ A task is used to calculate rewards for agents in assignee + based on the predicate and game state """ def __init__(self, - subject: Group, - *args, - constraints: Optional[List[Tuple[str,Optional[Constraint]]]] = None, - **kwargs): - self.name = self._make_name(self.__class__.__name__, args, kwargs) + eval_fn: Callable, + assignee: Union[Iterable[int], int], + reward_multiplier = 1.0): + if isinstance(assignee, int): + self._assignee = (assignee,) + else: + assert len(assignee) > 0, "Assignee cannot be empty" + self._assignee = tuple(set(assignee)) # dedup + self._eval_fn = eval_fn + self._progress = 0.0 + self._completed = False + self._reward_multiplier = reward_multiplier - def is_group(x): - return isinstance(x, Group) - self._groups: List[Group] = list(filter(is_group, args)) - self._groups = self._groups + list(filter(is_group, kwargs.values())) - self._groups.append(subject) + self.name = self._make_name(self.__class__.__name__, + eval_fn=eval_fn, assignee=self._assignee) - self._args = args - self._kwargs = kwargs - self._constraints = constraints - self._config = None - self._score = 0.0 - self._subject = subject + def reset(self): + self._progress = 0.0 + self._completed = False - def compute_rewards(self, gs) -> Tuple[Dict[int, float], Dict[int, Dict]]: - """ Environment facing API + @property + def assignee(self) -> Tuple[int]: + return self._assignee - Returns rewards and infos for all agents in subject - """ - reward = self(gs) - self._score - self._score += reward - rewards = {int(ent_id): reward for ent_id in self._subject} - infos = {int(ent_id): {self.name: self._score} - for ent_id in self._subject} - return rewards, infos + @property + def completed(self) -> bool: + return self._completed - def __call__(self, gs: GameState) -> float: - """ Calculates score + @property + def reward_multiplier(self) -> float: + return self._reward_multiplier - Params: - gs: GameState + def _map_eval_to_reward(self, gs) -> float: + """ The default reward is the diff between the old and new progress. + Once the task is completed, no more reward is provided. - Returns: - score + Override this function to create a custom reward function """ - if not self._config == gs.config: - # TODO(mark) should we make this explicitly called by environment - self._reset(gs.config) - # Update views - for group in self._groups: - group.update(gs) - # Calculate score - cache = gs.cache_result - if self.name in cache: - score = cache[self.name] - else: - score = self._evaluate(gs) - cache[self.name] = score - # Calculate score - return score + if self._completed: + return 0.0 - def _reset(self, config: Config): - self._score = 0.0 - self._config = config - if not self.check(self._config): - raise InvalidConstraint() + new_progress = max(min(self._eval_fn(gs)*1.0,1.0),0.0) + diff = new_progress - self._progress + self._progress = new_progress + if self._progress >= 1: + self._completed = True - def check(self, config: Config): - """ Checks whether the task is valid + return diff - A satisfiable task "makes sense" given a config - ie. Not trying to reach target off the map - """ - if not GroupConstraint().check(config, self._subject): - return False - for i, (name, constraint) in enumerate(self._constraints): - if constraint is None: - continue - if i < len(self._args): - if not constraint.check(config, self._args[i]): - return False - elif not constraint.check(config, self._kwargs[name]): - return False - return True + def compute_rewards(self, gs) -> Tuple[Dict[int, float], Dict[int, Dict]]: + """ Environment facing API - def sample(self, config: Config, **overload): - """ Samples a concrete instance of a given task. - - Allows overloading of previous parameters. + Returns rewards and infos for all agents in subject """ - # Sample Constraint - nargs = [arg.sample(config) if isinstance(arg, Constraint) else arg - for arg in self._args] - nkwargs = {k : v.sample(config) if isinstance(v, Constraint) else v - for k,v in self._kwargs.items()} - for i, (name, _) in enumerate(self._constraints): - if i < len(nargs): - if name in nkwargs: - raise InvalidTaskDefinition("Constraints should match arguments.") - nkwargs[name] = nargs[i] - else: - break + reward = self._map_eval_to_reward(gs) * self._reward_multiplier + rewards = {int(ent_id): reward for ent_id in self._assignee} + infos = {int(ent_id): {'reward': reward, 'progress': self._progress} + for ent_id in self._assignee} - for k, v in overload.items(): - nkwargs[k] = v - # Result - return self.__class__(**nkwargs) - - @abstractmethod - def _evaluate(self, gs: GameState) -> float: - """ A mapping from a game state to the desirability of that state. - """ - raise NotImplementedError + # NOTE: tasks do not know whether assignee agents are alive or dead + # so the Env must check it before filling in rewards and infos + return rewards, infos - def _make_name(self, class_name, args, kwargs) -> str: + def _make_name(self, class_name, **kwargs) -> str: def arg_to_string(arg): - if isinstance(arg, type): # class + if isinstance(arg, (type, FunctionType)): # class or function return arg.__name__ if arg is None: return 'Any' return str(arg) name = [class_name] + \ - list(map(arg_to_string, args)) + \ [f"{arg_to_string(key)}:{arg_to_string(arg)}" for key, arg in kwargs.items()] name = "("+'_'.join(name).replace(' ', '')+")" return name @@ -143,233 +91,48 @@ def arg_to_string(arg): def __str__(self): return self.name - @property - def subject(self): - return self._subject - - def __add__(self, other): - return ADD(self, other) - def __radd__(self, other): - return ADD(self, other) - def __mul__(self, other): - return MUL(self, other) - def __rmul__(self, other): - return MUL(self, other) - def __and__(self, other): - return AND(self, other) - def __or__(self, other): - return OR(self, other) - def __invert__(self): - return NOT(self) - -class Predicate(Task): - """ A task with evaluate restricted to boolean values. - - True = 1.0 - False = 0.0 - """ - def __call__(self, gs: GameState) -> float: - if not self._config == gs.config: - self._reset(gs.config) - # Update views - for group in self._groups: - group.update(gs) - # Calculate score - cache = gs.cache_result - if self.name in cache: - score = cache[self.name] - else: - score = max(min(self._evaluate(gs)*1,1.0),0.0) - cache[self.name] = score - # Calculate score - return score - - def __and__(self, other): - return PAND(self, other) - def __or__(self, other): - return POR(self, other) - def __invert__(self): - return PNOT(self) - def __rshift__(self, other): - return IMPLY(self, other) - -################################################ - -def define_task(fn: Callable) -> type[Task]: - """ Syntactic sugar API for defining tasks - - See examples at base_predicates.py - """ - signature = inspect.signature(fn) - for i, param in enumerate(signature.parameters.values()): - if i == 0 and param.name != 'gs': - raise InvalidTaskDefinition('First parameter must be gs: GameState') - if i == 1 and (param.name != 'subject'): - raise InvalidTaskDefinition("Second parameter must be subject: Group") - - class FunctionTask(Task): - def __init__(self, *args, **kwargs) -> None: - constraints = [] - self._signature = signature - args = list(args) - for i, param in enumerate(self._signature.parameters.values()): - if i == 0: - continue - # Calculate list of constraints - if isinstance(param.default, Constraint): - constraints.append((param.name,param.default)) - else: - constraints.append((param.name,None)) - # Insert default values from function definition - if not param.name in kwargs and i-1 >= len(args): - if param.default == inspect.Parameter.empty: - args.append(param.default) - else: - kwargs[param.name] = param.default - super().__init__(*args, **kwargs, constraints=constraints) - self._args = args - self._kwargs = kwargs - self.name = self._make_name(fn.__name__, args, kwargs) - def _evaluate(self, gs: GameState) -> float: - # pylint: disable=redefined-builtin, unused-variable - __doc = fn.__doc__ - result = fn(gs, *self._args, **self._kwargs) - if isinstance(result, Task): - return result(gs) - return result - - return FunctionTask - -def define_predicate(fn: Callable) -> type[Predicate]: - T = define_task(fn) - class FunctionPredicate(Predicate, T): - # pylint: disable=super-init-not-called - def __init__(self, *args, **kwargs) -> None: - T.__init__(self, *args, **kwargs) - return FunctionPredicate - -################################################ -class TaskOperator(Task): - def __init__(self, n, *tasks: Union[Task, Real] ,subject: Group=None): - if not n(len(tasks)): - raise InvalidTaskDefinition(f"Need {n} arguments") - tasks = list(tasks) - self._subject_argument = subject - if subject is None: - try: - subject = union(*[t.subject for t in filter(lambda t: isinstance(t, Task), tasks)]) - except AttributeError: - subject = GroupConstraint() - super().__init__(subject, *tasks) - - for i, t in enumerate(tasks): - if isinstance(t, Real): - tasks[i] = lambda _,v=tasks[i] : v - self._tasks = tasks - - def check(self, config: Config) -> bool: - return all((t.check(config) if isinstance(t, Task) else True for t in self._tasks)) - - def sample(self, config: Config, cls: type[TaskOperator], **kwargs): - subject = self._subject_argument if 'subject' not in kwargs else kwargs['subject'] - tasks = [t.sample(config, **kwargs) if isinstance(t, Task) else t(None) for t in self._tasks] - return cls(*tasks, subject=subject) -class OR(TaskOperator): - def __init__(self, *tasks: Union[Task, Real], subject: Group=None): - super().__init__(lambda n: n>0, *tasks, subject=subject) - def _evaluate(self, gs: GameState) -> float: - return max(t(gs) for t in self._tasks) - def sample(self, config: Config, **kwargs): - return super().sample(config, OR, **kwargs) - -class AND(TaskOperator): - def __init__(self, *tasks: Union[Task, Real], subject: Group=None): - super().__init__(lambda n: n>0, *tasks, subject=subject) - def _evaluate(self, gs: GameState) -> float: - return min(t(gs) for t in self._tasks) - def sample(self, config: Config, **kwargs): - return super().sample(config, AND, **kwargs) - -class NOT(TaskOperator): - def __init__(self, *tasks: Union[Task, Real], subject: Group=None): - super().__init__(lambda n: n>0, *tasks, subject=subject) - def _evaluate(self, gs: GameState) -> float: - return -sum(t(gs) for t in self._tasks) - def sample(self, config: Config, **kwargs): - return super().sample(config, NOT, **kwargs) - -class ADD(TaskOperator): - def __init__(self, *tasks: Union[Task, Real], subject: Group=None): - super().__init__(lambda n: n>0, *tasks, subject=subject) - def _evaluate(self, gs: GameState) -> float: - return sum(t(gs) for t in self._tasks) - def sample(self, config: Config, **kwargs): - return super().sample(config, ADD, **kwargs) - -class MUL(TaskOperator): - def __init__(self, *tasks: Union[Task, Real], subject: Group=None): - super().__init__(lambda n: n>0, *tasks, subject=subject) - def _evaluate(self, gs: GameState) -> float: - result = 1.0 - for t in self._tasks: - result = result * t(gs) - return result - def sample(self, config: Config, **kwargs): - return super().sample(config, MUL, **kwargs) - -class POR(TaskOperator, Predicate): - def __init__(self, *tasks: Predicate, subject: Group=None): - super().__init__(lambda n: n>0, *tasks, subject=subject) - def _evaluate(self, gs: GameState) -> float: - return any(t(gs) for t in self._tasks) - def sample(self, config: Config, **kwargs): - return super().sample(config, POR, **kwargs) - -class PAND(TaskOperator, Predicate): - def __init__(self, *tasks: Predicate, subject: Group=None): - super().__init__(lambda n: n>0, *tasks, subject=subject) - def _evaluate(self, gs: GameState) -> float: - return all(t(gs) for t in self._tasks) - def sample(self, config: Config, **kwargs): - return super().sample(config, PAND, **kwargs) - -class PNOT(TaskOperator, Predicate): - def __init__(self, task: Predicate, subject: Group=None): - super().__init__(lambda n: n==1, task, subject=subject) - def _evaluate(self, gs: GameState) -> float: - return not self._tasks[0](gs) - def sample(self, config: Config, **kwargs): - return super().sample(config, PNOT, **kwargs) - -class IMPLY(TaskOperator, Predicate): - def __init__(self, p: Predicate, q: Predicate, subject: Group=None): - super().__init__(lambda n: n==2, p,q, subject=subject) - def _evaluate(self, gs: GameState) -> float: - if self._tasks[0](gs): - return self._tasks[1](gs) - return True - def sample(self, config: Config, **kwargs): - return super().sample(config, IMPLY, **kwargs) - -class Once(TaskOperator): - def __init__(self, task: Task, subject: Group=None): - super().__init__(lambda n: n==1, task, subject=subject) - self._maximum_score = -math.inf - def _evaluate(self, gs: GameState) -> float: - self._maximum_score = max(self._maximum_score, self._tasks[0](gs)) - return self._maximum_score - def sample(self, config: Config, **kwargs): - return super().sample(config, Once, **kwargs) - -class Repeat(TaskOperator): - def __init__(self, task: Task, subject: Group=None): - super().__init__(lambda n: n==1, task, subject=subject) - self._current_score = 0 - def _evaluate(self, gs: GameState) -> float: - self._current_score += self._tasks[0](gs) - return self._current_score - def sample(self, config: Config, **kwargs): - return super().sample(config, Repeat, **kwargs) - -# TODO(mark) should we define the remaining available operators -# such as multiply, modulo... +class OngoingTask(Task): + def _map_eval_to_reward(self, gs) -> float: + """Keep returning the progress reward after the task is completed. + However, this task tracks the completion status in the same manner. + """ + self._progress = max(min(self._eval_fn(gs)*1.0,1.0),0.0) + if self._progress >= 1: + self._completed = True + return self._progress + + +###################################################################### +# Task generator helpers + +def make_same_tasks(pred: Predicate, + assignee: Union[Iterable[int], int], + task_cls=Task, + reward_multiplier=1.0, + **kwargs): + if isinstance(assignee, int): + assignee = [assignee] + + # when a list of agent is provided, return a list of identical tasks + return [task_cls(eval_fn=pred(Group(agent_id),**kwargs), + assignee=agent_id, reward_multiplier=reward_multiplier) + for agent_id in set(assignee)] + +# The performance of function based eval_fn vs. predicate +# NOTE: there is ~30% perf overhead for the class based predicate +def make_stay_alive_eval(subject: Group): + def stay_alive_eval(gs): + # return True # for speed testing + return all(agent_id in gs.alive_agents for agent_id in subject.agents) + + # change function name for each agent + return FunctionType( + stay_alive_eval.__code__, globals(), f"stay_alive_{str(subject.agents)}", + closure=stay_alive_eval.__closure__ + ) + +def nmmo_default_task(agent_list: Iterable[int]) -> List[Task]: + #return make_same_tasks(StayAlive, agent_list, task_cls=RepeatTask) + return [OngoingTask(eval_fn=make_stay_alive_eval(Group(agent_id)), + assignee=agent_id) + for agent_id in agent_list] diff --git a/tests/task/test_demo_task_creation.py b/tests/task/test_demo_task_creation.py index 4aaa1894..d6d4e16d 100644 --- a/tests/task/test_demo_task_creation.py +++ b/tests/task/test_demo_task_creation.py @@ -1,14 +1,22 @@ +# pylint: disable=invalid-name,unused-argument,unused-variable import unittest from tests.testhelpers import ScriptedAgentTestConfig from nmmo.core.env import Env from nmmo.lib.log import EventCode from nmmo.systems import skill -from nmmo.task import base_predicates as p +from nmmo.task import predicate_api as p +from nmmo.task import base_predicates as bp from nmmo.task import task_api as t from nmmo.task.game_state import GameState from nmmo.task.group import Group -from nmmo.task.scenario import Scenario + + +def rollout(env, tasks, steps=5): + env.reset(new_tasks=tasks) + for _ in range(steps): + env.step({}) + return env.step({}) class TestDemoTask(unittest.TestCase): @@ -22,163 +30,181 @@ class Tier: NORMAL = 6 / REWARD_SCALE HARD = 11 / REWARD_SCALE - # Usage of inbuilt predicate - def player_kills(scenario: Scenario): - scenario.add_tasks(p.CountEvent(event='PLAYER_KILL',N=1)*Tier.EASY) - scenario.add_tasks(p.CountEvent(event='PLAYER_KILL',N=2)*Tier.NORMAL) - scenario.add_tasks(p.CountEvent(event='PLAYER_KILL',N=3)*Tier.HARD) - return scenario.tasks + # Predicates defined below can be evaluated over one agent or several agents, + # which are sepcified separately + # Reward multiplier is indendent from predicates and used by tasks. + # The multipliers are just shown to indicate the difficulty level of predicates - def exploration(scenario: Scenario): - scenario.add_tasks(p.DistanceTraveled(dist=16)*Tier.EASY) - scenario.add_tasks(p.DistanceTraveled(dist=32)*Tier.NORMAL) - scenario.add_tasks(p.DistanceTraveled(dist=64)*Tier.HARD) - return scenario.tasks + # Usage of base predicates (see nmmo/task/base_predicates.py) + player_kills = [ # (predicate, kwargs, reward_multiplier) + (bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 1}, Tier.EASY), + (bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 2}, Tier.NORMAL), + (bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 3}, Tier.HARD)] + + exploration = [ # (predicate, reward_multiplier) + (bp.DistanceTraveled, {'dist': 16}, Tier.EASY), + (bp.DistanceTraveled, {'dist': 32}, Tier.NORMAL), + (bp.DistanceTraveled, {'dist': 64}, Tier.HARD)] # Demonstrates custom predicate - return float/boolean - @t.define_predicate + @p.define_predicate def EquipmentLevel(gs: GameState, subject: Group, number: int): - equipped = (subject.item.equipped>0) + equipped = subject.item.equipped > 0 levels = subject.item.level[equipped] return levels.sum() >= number - def equipment(scenario: Scenario): - scenario.add_tasks(EquipmentLevel(number=1 )*Tier.EASY, groups='agents') - scenario.add_tasks(EquipmentLevel(number=5 )*Tier.NORMAL, groups='agents') - scenario.add_tasks(EquipmentLevel(number=10)*Tier.HARD, groups='agents') - return scenario.tasks + equipment = [ # (predicate, reward_multiplier) + (EquipmentLevel, {'number': 1}, Tier.EASY), + (EquipmentLevel, {'number': 5}, Tier.NORMAL), + (EquipmentLevel, {'number': 10}, Tier.HARD)] - @t.define_predicate + @p.define_predicate def CombatSkill(gs, subject, lvl): - return t.OR(p.AttainSkill(subject, skill.Melee, lvl, 1), - p.AttainSkill(subject, skill.Range, lvl, 1), - p.AttainSkill(subject, skill.Mage, lvl, 1)) + # using predicate OR + return p.POR(bp.AttainSkill(subject, skill.Melee, lvl, 1), + bp.AttainSkill(subject, skill.Range, lvl, 1), + bp.AttainSkill(subject, skill.Mage, lvl, 1)) - def combat(scenario: Scenario): - scenario.add_tasks(CombatSkill(lvl=2)*Tier.EASY, groups='agents') - scenario.add_tasks(CombatSkill(lvl=3)*Tier.NORMAL, groups='agents') - scenario.add_tasks(CombatSkill(lvl=4)*Tier.HARD, groups='agents') - return scenario.tasks + combat = [ # (predicate, reward_multiplier) + (CombatSkill, {'lvl': 2}, Tier.EASY), + (CombatSkill, {'lvl': 3}, Tier.NORMAL), + (CombatSkill, {'lvl': 4}, Tier.HARD)] - @t.define_predicate + @p.define_predicate def ForageSkill(gs, subject, lvl): - return t.OR(p.AttainSkill(subject, skill.Fishing, lvl, 1), - p.AttainSkill(subject, skill.Herbalism, lvl, 1), - p.AttainSkill(subject, skill.Prospecting, lvl, 1), - p.AttainSkill(subject, skill.Carving, lvl, 1), - p.AttainSkill(subject, skill.Alchemy, lvl, 1)) - - def foraging(scenario: Scenario): - scenario.add_tasks(ForageSkill(lvl=2)*Tier.EASY) - scenario.add_tasks(ForageSkill(lvl=3)*Tier.NORMAL) - scenario.add_tasks(ForageSkill(lvl=4)*Tier.HARD) - return scenario.tasks - - # Demonstrate task scenario definition API - def all_tasks(scenario: Scenario): - player_kills(scenario) - exploration(scenario) - equipment(scenario) - combat(scenario) - foraging(scenario) - return scenario.tasks + return p.POR(bp.AttainSkill(subject, skill.Fishing, lvl, 1), + bp.AttainSkill(subject, skill.Herbalism, lvl, 1), + bp.AttainSkill(subject, skill.Prospecting, lvl, 1), + bp.AttainSkill(subject, skill.Carving, lvl, 1), + bp.AttainSkill(subject, skill.Alchemy, lvl, 1)) + + foraging = [ # (predicate, reward_multiplier) + (ForageSkill, {'lvl': 2}, Tier.EASY), + (ForageSkill, {'lvl': 3}, Tier.NORMAL), + (ForageSkill, {'lvl': 4}, Tier.HARD)] # Test rollout - task_generators = [player_kills, exploration, equipment, combat, foraging, all_tasks] - for tg in task_generators: - config = ScriptedAgentTestConfig() - env = Env(config) - scenario = Scenario(config) - tasks = tg(scenario) - env.change_task(tasks) - for _ in range(10): - env.step({}) + config = ScriptedAgentTestConfig() + env = Env(config) + + # Creating and testing "team" tasks + # i.e., predicates are evalauated over all team members, + # and all team members get the same reward from each task + + # The team mapping can come from anywhere. + # The below is an arbitrary example and even doesn't include all agents + teams = {0: [1, 2, 3, 4], 1: [5, 6, 7, 8]} + + # Making player_kills and exploration team tasks, + team_tasks = [] + for pred, kwargs, weight in player_kills + exploration: + for team in teams.values(): + team_tasks.append(t.Task(pred(Group(team), **kwargs), + assignee=team, + reward_multiplier=weight)) + + # Run the environment with these tasks + # check rewards and infos for the task info + obs, rewards, dones, infos = rollout(env, team_tasks) + + # Creating and testing the same task for all agents + # i.e, each agent gets evaluated and rewarded individually + same_tasks = [] + for pred, kwargs, weight in exploration + equipment + combat + foraging: + # a helper function can do this + same_tasks += t.make_same_tasks(pred, env.possible_agents, + reward_multiplier=weight, + **kwargs) + + # Run the environment with these tasks + # check rewards and infos for the task info + obs, rewards, dones, infos = rollout(env, same_tasks) # DONE def test_player_kill_reward(self): - """ Reward 0.1 per player defeated, 1 for first and 3rd kills + """ Design a predicate with a complex progress scheme """ config = ScriptedAgentTestConfig() env = Env(config) - scenario = Scenario(config) # PARTICIPANT WRITES # ==================================== - @t.define_task - def KillTask(gs: GameState, - subject: Group): - """ Reward 0.1 per player defeated, with a bonus for the 1st and 3rd kills. + @p.define_predicate + def KillPredicate(gs: GameState, + subject: Group): + """The progress, the max of which is 1, should + * increase small for each player kill + * increase big for the 1st and 3rd kills + * reach 1 with 10 kills """ num_kills = len(subject.event.PLAYER_KILL) - score = num_kills * 0.1 + progress = num_kills * 0.06 if num_kills >= 1: - score += 1 + progress += .1 if num_kills >= 3: - score += 1 - return score + progress += .3 + return min(progress, 1.0) - scenario.add_tasks(lambda agent: KillTask(agent), groups='agents') - # ==================================== + kill_tasks = t.make_same_tasks(KillPredicate, env.possible_agents) # Test Reward - env.change_task(scenario.tasks) + env.reset(new_tasks=kill_tasks) players = env.realm.players code = EventCode.PLAYER_KILL env.realm.event_log.record(code, players[1], target=players[3]) env.realm.event_log.record(code, players[2], target=players[4]) env.realm.event_log.record(code, players[2], target=players[5]) env.realm.event_log.record(EventCode.EAT_FOOD, players[2]) - # Award given as designed - # Agent 1 kills 1 - reward 1 + 0.1 - # Agent 2 kills 2 - reward 1 + 0.2 - # Agent 3 kills 0 - reward 0 - _, rewards, _, _ = env.step({}) - self.assertEqual(rewards[1],1.1) - self.assertEqual(rewards[2],1.2) - self.assertEqual(rewards[3],0) - # No reward when no changes - _, rewards, _, _ = env.step({}) - self.assertEqual(rewards[1],0) - self.assertEqual(rewards[2],0) - self.assertEqual(rewards[3],0) - # Test task reset on env reset - env.reset() + + # Award given as designed + # Agent 1 kills 1 - reward .06 + .1 + # Agent 2 kills 2 - reward .12 + .1 + # Agent 3 kills 0 - reward 0 _, rewards, _, _ = env.step({}) - self.assertEqual(env.tasks[0][0]._score,0) + self.assertEqual(rewards[1], 0.16) + self.assertEqual(rewards[2], 0.22) + self.assertEqual(rewards[3], 0) - # Test Rollout - env.change_task(scenario.tasks) - for _ in range(10): - env.step({}) + # No reward when no changes + _, rewards, _, _ = env.step({}) + self.assertEqual(rewards[1], 0) + self.assertEqual(rewards[2], 0) + self.assertEqual(rewards[3], 0) # DONE - def test_combination_task_reward(self): + def test_predicate_math(self): config = ScriptedAgentTestConfig() env = Env(config) - scenario = Scenario(config) - task = t.OR(p.CountEvent(event='PLAYER_KILL',N=5),p.TickGE(num_tick=5)) - task = task * 5 - scenario.add_tasks(task) + @p.define_predicate + def PredicateMath(gs, subject): + progress = 0.8 * bp.CountEvent(subject, event='PLAYER_KILL', N=7) + \ + 1.1 * bp.TickGE(subject, num_tick=3) + # NOTE: the resulting progress will be bounded from [0, 1] afterwards + return progress + + task_for_agent_1 = t.make_same_tasks(PredicateMath, assignee=1) # Test Reward - env.change_task(scenario.tasks) + env.reset(new_tasks=task_for_agent_1) code = EventCode.PLAYER_KILL players = env.realm.players env.realm.event_log.record(code, players[1], target=players[2]) env.realm.event_log.record(code, players[1], target=players[3]) _, rewards, _, _ = env.step({}) - self.assertEqual(rewards[1],2) + self.assertAlmostEqual(rewards[1], 0.8*2/7 + 1.1*1/3) - for _ in range(4): + for _ in range(2): _, _, _, infos = env.step({}) - - self.assertEqual(list(infos[1]['task'].values())[0],5.0) + + # 0.8*2/7 + 1.1 > 1, but the progress is maxed at 1 + self.assertEqual(infos[1]['task'][env.tasks[0].name]['progress'], 1.0) + self.assertTrue(env.tasks[0].completed) # because progress >= 1 # DONE diff --git a/tests/task/test_predicates.py b/tests/task/test_predicates.py index 581d8864..0cd59882 100644 --- a/tests/task/test_predicates.py +++ b/tests/task/test_predicates.py @@ -1,5 +1,5 @@ import unittest -from typing import List, Tuple +from typing import List, Tuple, Union, Iterable import random from tests.testhelpers import ScriptedAgentTestConfig, provide_item @@ -15,27 +15,21 @@ # pylint: disable=import-error from nmmo.core.env import Env -from nmmo.task.task_api import Task, TaskOperator +from nmmo.task.predicate_api import Predicate +from nmmo.task.task_api import OngoingTask from nmmo.task.group import Group import nmmo.task.base_predicates as bp # use the constant reward of 1 for testing predicates NUM_AGENT = 6 -ALL_AGENT = Group(list(range(1, NUM_AGENT+1)), 'All') +ALL_AGENT = list(range(1, NUM_AGENT+1)) -class Change(TaskOperator): - def __init__(self, task: Task, subject: Group=None): - super().__init__(lambda n: n==1, task, subject=subject) - def _evaluate(self, gs) -> float: - return self._tasks[0](gs) - def sample(self, config, **kwargs): - return super().sample(config, Change, **kwargs) class TestBasePredicate(unittest.TestCase): # pylint: disable=protected-access,invalid-name,no-member def _get_taskenv(self, - test_tasks: List[Tuple[Task, Group]], + test_preds: List[Tuple[Predicate, Union[Iterable[int], int]]], grass_map=False): config = ScriptedAgentTestConfig() @@ -43,10 +37,12 @@ def _get_taskenv(self, config.PLAYER_N = NUM_AGENT config.IMMORTAL = True - tasks = [Change(tsk, subject=team) for tsk, team in test_tasks] + # OngoingTask keeps evaluating and returns progress as the reward + # vs. Task stops evaluating once the task is completed + tasks = [OngoingTask(pred, assignee) for pred, assignee in test_preds] env = Env(config) - env.change_task(tasks) + env.reset(new_tasks=tasks) if grass_map: MS = env.config.MAP_SIZE @@ -61,38 +57,39 @@ def _get_taskenv(self, return env def _check_result(self, env, test_tasks, infos, true_task): - for tid, (task, assignee) in enumerate(test_tasks): + for tid, (predicate, assignee) in enumerate(test_tasks): # result is cached when at least one assignee is alive so that the task is evaled - if set(assignee).intersection(infos): - self.assertEqual(int(env.game_state.cache_result[task.name]), tid in true_task) + if len(set(assignee) & set(infos)) > 0: + self.assertEqual(int(env.game_state.cache_result[predicate.name]), + int(tid in true_task)) + for ent_id in infos: if ent_id in assignee: # the agents that are assigned the task get evaluated for reward - self.assertEqual(int(infos[ent_id]['task'][Change(task,assignee).name]), + self.assertEqual(int(infos[ent_id]['task'][env.tasks[tid].name]['reward']), int(tid in true_task)) else: # the agents that are not assigned the task are not evaluated - self.assertTrue(task.name not in infos[ent_id]['task']) + self.assertTrue(env.tasks[tid].name not in infos[ent_id]['task']) def _check_progress(self, task, infos, value): - """ Some predicates return a float in the range 0-1 indicating completion progress. + """ Tasks return a float in the range 0-1 indicating completion progress. """ - predicate, assignee = task[0], task[1] for ent_id in infos: - if ent_id in assignee: - self.assertAlmostEqual(infos[ent_id]['task'][Change(predicate,assignee).name],value) + if ent_id in task.assignee: + self.assertAlmostEqual(infos[ent_id]['task'][task.name]['progress'],value) def test_tickge_stay_alive_rip(self): tick_true = 5 death_note = [1, 2, 3] - test_tasks = [ # (Predicate, Team) + test_tasks = [ # (instantiated predicate, task assignee) (bp.TickGE(Group([1]), tick_true), ALL_AGENT), - (bp.StayAlive(Group([1, 3])), ALL_AGENT), - (bp.StayAlive(Group([3, 4])), Group([1, 2])), - (bp.StayAlive(Group([4])), Group([5, 6])), - (bp.AllDead(Group([1, 3])), ALL_AGENT), - (bp.AllDead(Group([3, 4])), Group([1, 2])), - (bp.AllDead(Group([4])), Group([5, 6]))] + (bp.StayAlive(Group([1,3])), ALL_AGENT), + (bp.StayAlive(Group([3,4])), [1,2]), + (bp.StayAlive(Group([4])), [5,6]), + (bp.AllDead(Group([1,3])), ALL_AGENT), + (bp.AllDead(Group([3,4])), [1,2]), + (bp.AllDead(Group([4])), [5,6])] env = self._get_taskenv(test_tasks) @@ -105,7 +102,7 @@ def test_tickge_stay_alive_rip(self): true_task = [1, 2, 3] self._check_result(env, test_tasks, infos, true_task) - self._check_progress(test_tasks[0], infos, (tick_true-1) / tick_true) + self._check_progress(env.tasks[0], infos, (tick_true-1) / tick_true) # kill agents 1-3 for ent_id in death_note: @@ -135,19 +132,19 @@ def test_tickge_stay_alive_rip(self): self._check_result(env, test_tasks, infos, true_task) # 3 is dead but 4 is alive. Half of agents killed, 50% completion. - self._check_progress(test_tasks[5], infos, 0.5) + self._check_progress(env.tasks[5], infos, 0.5) # DONE def test_can_see_tile(self): a1_target = Material.Foilage a2_target = Material.Water - test_tasks = [ # (Predicate, Team), the reward is 1 by default + test_tasks = [ # (instantiated predicate, task assignee) (bp.CanSeeTile(Group([1]), a1_target), ALL_AGENT), # True (bp.CanSeeTile(Group([1,3,5]), a2_target), ALL_AGENT), # False - (bp.CanSeeTile(Group([2]), a2_target), Group([1,2,3])), # True + (bp.CanSeeTile(Group([2]), a2_target), [1,2,3]), # True (bp.CanSeeTile(Group([2,5,6]), a1_target), ALL_AGENT), # False - (bp.CanSeeTile(ALL_AGENT, a2_target), Group([2,3,4]))] # True + (bp.CanSeeTile(Group(ALL_AGENT), a2_target), [2,3,4])] # True # setup env with all grass map env = self._get_taskenv(test_tasks, grass_map=True) @@ -190,8 +187,8 @@ def test_can_see_agent(self): search_target = 1 test_tasks = [ # (Predicate, Team), the reward is 1 by default (bp.CanSeeAgent(Group([1]), search_target), ALL_AGENT), # Always True - (bp.CanSeeAgent(Group([2]), search_target), Group([2,3,4])), # False -> True -> True - (bp.CanSeeAgent(Group([3,4,5]), search_target), Group([1,2,3])), # False -> False -> True + (bp.CanSeeAgent(Group([2]), search_target), [2,3,4]), # False -> True -> True + (bp.CanSeeAgent(Group([3,4,5]), search_target), [1,2,3]), # False -> False -> True (bp.CanSeeGroup(Group([1]), Group([3,4])), ALL_AGENT)] # False -> False -> True env = self._get_taskenv(test_tasks, grass_map=True) @@ -235,9 +232,9 @@ def test_occupy_tile(self): target_tile = (30, 30) test_tasks = [ # (Predicate, Team), the reward is 1 by default (bp.OccupyTile(Group([1]), *target_tile), ALL_AGENT), # False -> True - (bp.OccupyTile(Group([1,2,3]), *target_tile), Group([4,5,6])), # False -> True - (bp.OccupyTile(Group([2]), *target_tile), Group([2,3,4])), # False - (bp.OccupyTile(Group([3,4,5]), *target_tile), Group([1,2,3]))] # False + (bp.OccupyTile(Group([1,2,3]), *target_tile), [4,5,6]), # False -> True + (bp.OccupyTile(Group([2]), *target_tile), [2,3,4]), # False + (bp.OccupyTile(Group([3,4,5]), *target_tile), [1,2,3])] # False # make all tiles habitable env = self._get_taskenv(test_tasks, grass_map=True) @@ -514,7 +511,7 @@ def test_hoard_gold_and_team(self): # HoardGold, TeamHoardGold true_task = [0, 2] self._check_result(env, test_tasks, infos, true_task) g = sum(env.realm.players[eid].gold.val for eid in Group([2,4,6]).agents) - self._check_progress(test_tasks[3], infos, g / team_gold_goal) + self._check_progress(env.tasks[3], infos, g / team_gold_goal) # DONE @@ -543,7 +540,7 @@ def test_exchange_gold_predicates(self): # Earn Gold, Spend Gold, Make Profit true_task = [0,4,5] self._check_result(env, test_tasks, infos, true_task) - self._check_progress(test_tasks[1], infos, 2 / gold_goal) + self._check_progress(env.tasks[1], infos, 2 / gold_goal) env.realm.event_log.record(EventCode.BUY_ITEM, players[1], item=Item.Ration(env.realm,1), @@ -600,7 +597,7 @@ def test_score_hit(self): # ScoreHit true_task = [1] self._check_result(env, test_tasks, infos, true_task) - self._check_progress(test_tasks[0], infos, 0.5) + self._check_progress(env.tasks[0], infos, 0.5) env.realm.event_log.record(EventCode.SCORE_HIT, players[1], @@ -674,5 +671,6 @@ def test_item_event_predicates(self): # Consume, Harvest, List, Buy self._check_result(env, test_tasks, infos, true_task) # DONE + if __name__ == '__main__': unittest.main() diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 29842d26..22564ed2 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -1,13 +1,11 @@ -# pylint: disable=import-error,unused-argument,invalid-name -# pylint: disable=no-member,no-value-for-parameter,not-callable,expression-not-assigned +# pylint: disable=unused-argument,invalid-name import unittest -import numpy as np import nmmo from nmmo.core.env import Env -from nmmo.task.task_api import define_predicate, define_task +from nmmo.task.predicate_api import define_predicate +from nmmo.task.task_api import Task, nmmo_default_task, make_same_tasks from nmmo.task.group import Group -from nmmo.task.team_helper import TeamHelper from nmmo.task.constraint import InvalidConstraint, ScalarConstraint from nmmo.task.base_predicates import TickGE, CanSeeGroup @@ -24,7 +22,7 @@ def Success(gs, subject: Group): def Failure(gs, subject: Group): return False -@define_task +@define_predicate def Fake(gs, subject, a,b,c): return False @@ -34,40 +32,48 @@ def __init__(self): self.config = nmmo.config.Default() self.cache_result = {} self.get_subject_view = lambda _: None -class TestTaskAPI(unittest.TestCase): - def test_operators(self): +class TestTaskAPI(unittest.TestCase): + def test_predicate_operators(self): # pylint: disable=unsupported-binary-operation,invalid-unary-operand-type + # pylint: disable=no-value-for-parameter,not-callable,no-member mock_gs = MockGameState() - SUCCESS = Success(Group([0])) - FAILURE = Failure(Group([0])) + SUCCESS = Success(Group(0)) + FAILURE = Failure(Group(0)) + # AND (&), OR (|), NOT (~), IMPLY (>>) - task1 = SUCCESS & FAILURE - self.assertFalse(task1(mock_gs)) + pred1 = SUCCESS & FAILURE + self.assertFalse(pred1(mock_gs)) - task2 = SUCCESS | FAILURE | SUCCESS - self.assertTrue(task2(mock_gs)) + pred2 = SUCCESS | FAILURE | SUCCESS + self.assertTrue(pred2(mock_gs)) - task3 = SUCCESS & ~ FAILURE & SUCCESS - self.assertTrue(task3(mock_gs)) + pred3 = SUCCESS & ~ FAILURE & SUCCESS + self.assertTrue(pred3(mock_gs)) - task4 = SUCCESS >> SUCCESS - self.assertTrue(task4(mock_gs)) + pred4 = SUCCESS >> SUCCESS + self.assertTrue(pred4(mock_gs)) - task5 = SUCCESS >> ~ SUCCESS - self.assertFalse(task5(mock_gs)) + pred5 = SUCCESS >> ~ SUCCESS + self.assertFalse(pred5(mock_gs)) - task6 = (FAILURE >> FAILURE) & SUCCESS - self.assertTrue(task6(mock_gs)) + pred6 = (FAILURE >> FAILURE) & SUCCESS + self.assertTrue(pred6(mock_gs)) + self.assertEqual(pred6.name, + "(PAND_(IMPLY_(Failure_(0,))_(Failure_(0,)))_(Success_(0,)))") - task7 = SUCCESS + SUCCESS - self.assertEqual(task7(mock_gs),2) + # predicate math + pred7 = 0.1 * SUCCESS + 0.3 + self.assertEqual(pred7(mock_gs), 0.4) + self.assertEqual(pred7.name, + "(PADD_(PMUL_(Success_(0,))_0.1)_0.3)") - task8 = SUCCESS * 3 - self.assertEqual(task8(mock_gs),3) + pred8 = 0.3 * SUCCESS - 1 + self.assertEqual(pred8(mock_gs), 0.0) # cannot go below 0 - self.assertEqual(task6.name, "(PAND_(IMPLY_(Failure_(0,))_(Failure_(0,)))_(Success_(0,)))") + pred9 = 0.3 * SUCCESS + 1 + self.assertEqual(pred9(mock_gs), 1.0) # cannot go over 1 def test_team_assignment(self): team = Group([1, 2, 8, 9], "TeamFoo") @@ -79,19 +85,21 @@ def test_team_assignment(self): # don't allow member of one-member team self.assertEqual(team[2][0].name, team[2].name) - def test_task_name(self): - SUCCESS = Success(Group([0])) - FAILURE = Failure(Group([0])) - fake_task = Fake(Group([2]), 1, Item.Hat, Action.Melee) - combination = (SUCCESS & ~ (FAILURE | fake_task)) | (FAILURE >> fake_task) + def test_predicate_name(self): + # pylint: disable=no-value-for-parameter,no-member + SUCCESS = Success(Group([0,2])) + FAILURE = Failure(Group(0)) + fake_pred = Fake(Group(2), 1, Item.Hat, Action.Melee) + combination = (SUCCESS & ~ (FAILURE | fake_pred)) | (FAILURE >> fake_pred) self.assertEqual(combination.name, - "(POR_(PAND_(Success_(0,))_(PNOT_(POR_(Failure_(0,))_(Fake_(2,)_1_Hat_Melee))))_\ -(IMPLY_(Failure_(0,))_(Fake_(2,)_1_Hat_Melee)))") + "(POR_(PAND_(Success_(0,2))_(PNOT_(POR_(Failure_(0,))_(Fake_(2,)_1_Hat_Melee))))_"+\ + "(IMPLY_(Failure_(0,))_(Fake_(2,)_1_Hat_Melee)))") def test_constraint(self): + # pylint: disable=not-callable,no-value-for-parameter mock_gs = MockGameState() - good = Success(Group([0])) - bad = Success(Group([99999])) + good = Success(Group(0)) + bad = Success(Group(99999)) good(mock_gs) self.assertRaises(InvalidConstraint,lambda: bad(mock_gs)) @@ -100,71 +108,102 @@ def test_constraint(self): self.assertTrue(scalar.sample(mock_gs.config)<10) self.assertTrue(scalar.sample(mock_gs.config)>=-10) - bad = TickGE(Group([0]), -1) + bad = TickGE(Group(0), -1) self.assertRaises(InvalidConstraint, lambda: bad(mock_gs)) - def test_sample_task(self): - task = CanSeeGroup() & TickGE() - self.assertEqual(task.name, - "(PAND_(CanSeeGroup_subject:GroupConstraint_target:GroupConstraint)_\ -(TickGE_subject:GroupConstraint_num_tick:ScalarConstraint))") + def test_sample_predicate(self): + # pylint: disable=no-value-for-parameter,expression-not-assigned + predicate = CanSeeGroup() & TickGE() + self.assertEqual(predicate.name, + "(PAND_(CanSeeGroup_subject:GroupConstraint_target:GroupConstraint)_"+\ + "(TickGE_subject:GroupConstraint_num_tick:ScalarConstraint))") config = nmmo.config.Default() TickGE().sample(config) - task.sample(config).name + predicate.sample(config).name + + # DONE + + def test_task_api_with_predicate(self): + # pylint: disable=no-value-for-parameter + mock_gs = MockGameState() + pred = Fake(Group(2), 1, Item.Hat, Action.Melee) + assignee = [1,2,3] # list of agent ids + task = Task(pred, assignee) + rewards, infos = task.compute_rewards(mock_gs) + + self.assertEqual(task.name, # contains predicate name and assignee list + "(Task_eval_fn:(Fake_(2,)_1_Hat_Melee)_assignee:(1,2,3))") + for agent_id in assignee: + self.assertEqual(rewards[agent_id], 0) + self.assertEqual(infos[agent_id]['progress'], 0) # progress (False -> 0) + self.assertFalse(task.completed) + + def test_task_api_with_function(self): + mock_gs = MockGameState() + def eval_with_subject_fn(subject: Group): + def is_agent_1(gs): + return any(agent_id == 1 for agent_id in subject.agents) + return is_agent_1 + + assignee = [1,2,3] # list of agent ids + task = Task(eval_with_subject_fn(Group(assignee)), assignee) + rewards, infos = task.compute_rewards(mock_gs) + + self.assertEqual(task.name, # contains predicate name and assignee list + "(Task_eval_fn:is_agent_1_assignee:(1,2,3))") + for agent_id in assignee: + self.assertEqual(rewards[agent_id], 1) + self.assertEqual(infos[agent_id]['progress'], 1) # progress (True -> 1) + self.assertTrue(task.completed) + + def test_nmmo_default_task(self): + config = ScriptedAgentTestConfig() + env = Env(config) + + dafault_tasks = nmmo_default_task(env.possible_agents) + env.reset(new_tasks=dafault_tasks) + for _ in range(3): + env.step({}) # DONE def test_completed_tasks_in_info(self): + # pylint: disable=no-value-for-parameter config = ScriptedAgentTestConfig() env = Env(config) - team_helper = TeamHelper.generate_from_config(config) - fake_task = Fake(Group([3]), 1, Item.Hat, Action.Melee) - task_assignment = \ - [(Success(Group([1])),2), - Failure(Group([1])), - Success(Group([1])) * -1, - 3 * Success(Group([1])), - Success(team_helper.own_team(2)), - fake_task - ] - env.change_task(task_assignment) + + # Use make_atomic_task(predicate, agent_list) when + # the predicate's subject is the same as the task assignee + same_team = [1, 2, 3, 4] + fake_pred = Fake(Group(3), 1, Item.Hat, Action.Melee) + tasks = make_same_tasks(pred=Success, assignee=1) # task 1 + tasks += make_same_tasks(pred=Failure, assignee=2) # task 2 + tasks += [Task(fake_pred, assignee=3), # task 3: fake_pred is already instantiated + Task(Success(Group(same_team)), assignee=same_team)] # task 4: team task + + # tasks are all instantiated with the agent ids + env.reset(new_tasks=tasks) _, _, _, infos = env.step({}) - # agent 1: task1 is always True - self.assertEqual(infos[1]['task'][Success(Group([1])).name], 1.0) - self.assertEqual(infos[1]['task'][(Success(Group([1])) * -1).name], -1.0) - self.assertEqual(infos[1]['task'][(3*Success(Group([1]))).name], 3.0) + # agent 1: assigned only task 1, which is always True + self.assertEqual(infos[1]['task'][tasks[0].name]['reward'], 1.0) + for i in [1, 2]: # task 2 and 3 + self.assertTrue(tasks[i].name not in infos[1]['task']) - # agent 2 should have been assigned Success but not Fake() - self.assertEqual(infos[2]['task'][Success(team_helper.own_team(2)).name], 1) - self.assertTrue(fake_task.name not in infos[2]['task']) + # agent 2: assigned task 2 (Failure) and task 4 (Success) + self.assertEqual(infos[2]['task'][tasks[1].name]['reward'], 0.0) # task 2 + self.assertEqual(infos[2]['task'][tasks[3].name]['reward'], 1.0) # task 4 - # agent 3 should have been assigned Fake(), which is always False (0) - self.assertEqual(infos[3]['task'][fake_task.name], 0) + # agent 3 assigned task 3, Fake(), which is always False (0) + self.assertEqual(infos[3]['task'][tasks[2].name]['reward'], 0.0) # task 3 # all agents in the same team with agent 2 have SUCCESS # other agents don't have any tasks assigned - group_name = Success(team_helper.own_team(2)).name - for ent_id in range(4, config.PLAYER_N+1): - if Group([ent_id]) in team_helper.own_team(2): - self.assertEqual(infos[ent_id]['task'][group_name], 1) + for ent_id in env.possible_agents: + if ent_id in same_team: + self.assertEqual(infos[ent_id]['task'][tasks[3].name]['reward'], 1.0) else: - self.assertEqual(infos[ent_id]['task'], {}) - - # DONE - - def test_task_embedding(self): - env = Env() - obs = env.reset() - self.assertRaises(KeyError, lambda: obs[1]['Task']) - - task = [Success([1,2])] - env.change_task(task, - task_encoding={1:np.array([1,2,3,4])}, - embedding_size=4) - obs = env.reset() - self.assertTrue(all(obs[1]['Task']==np.array([1,2,3,4]))) - self.assertTrue(all(obs[2]['Task']==np.array([0,0,0,0]))) + self.assertTrue(tasks[3].name not in infos[ent_id]['task']) # DONE From e16995ab95fc852b759a3cea8d140a1aa874353c Mon Sep 17 00:00:00 2001 From: kywch Date: Fri, 2 Jun 2023 17:44:46 +0000 Subject: [PATCH 02/18] added create_task() to predicate --- nmmo/task/predicate_api.py | 35 ++++++++++--- nmmo/task/task_api.py | 72 ++++++++++++--------------- tests/task/test_demo_task_creation.py | 22 ++++---- tests/task/test_task_api.py | 29 ++++++----- 4 files changed, 88 insertions(+), 70 deletions(-) diff --git a/nmmo/task/predicate_api.py b/nmmo/task/predicate_api.py index d945ed89..601d8f91 100644 --- a/nmmo/task/predicate_api.py +++ b/nmmo/task/predicate_api.py @@ -1,5 +1,6 @@ from __future__ import annotations -from typing import Callable, List, Optional, Tuple, Union +from typing import Callable, List, Optional, Tuple, Union, Iterable, TYPE_CHECKING +from types import FunctionType from abc import ABC, abstractmethod import inspect from numbers import Real @@ -9,6 +10,9 @@ from nmmo.task.game_state import GameState from nmmo.task.constraint import Constraint, InvalidConstraint, GroupConstraint +if TYPE_CHECKING: + from nmmo.task.task_api import Task + class InvalidPredicateDefinition(Exception): pass @@ -113,13 +117,6 @@ def _evaluate(self, gs: GameState) -> float: raise NotImplementedError def _make_name(self, class_name, args, kwargs) -> str: - def arg_to_string(arg): - if isinstance(arg, type): # class - return arg.__name__ - if arg is None: - return 'Any' - return str(arg) - name = [class_name] + \ list(map(arg_to_string, args)) + \ [f"{arg_to_string(key)}:{arg_to_string(arg)}" for key, arg in kwargs.items()] @@ -133,6 +130,20 @@ def __str__(self): def subject(self): return self._subject + def create_task(self, task_cls: Task=None, + assignee: Union[Iterable[int], int]=None, + reward_multiplier=1.0) -> Task: + """ Creates a task from this predicate""" + if task_cls is None: + from nmmo.task.task_api import Task + task_cls = Task + + if assignee is None: + # the new task is assigned to this predicate's subject + assignee = self._subject.agents + + return task_cls(eval_fn=self, assignee=assignee, reward_multiplier=reward_multiplier) + def __and__(self, other): return PAND(self, other) def __or__(self, other): @@ -154,6 +165,14 @@ def __mul__(self, other): def __rmul__(self, other): return PMUL(self, other) +# _make_name helper functions +def arg_to_string(arg): + if isinstance(arg, (type, FunctionType)): # class or function + return arg.__name__ + if arg is None: + return 'Any' + return str(arg) + ################################################ def define_predicate(fn: Callable) -> type[Predicate]: diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index c59aa945..b93e5364 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -4,7 +4,7 @@ from abc import ABC from nmmo.task.group import Group -from nmmo.task.predicate_api import Predicate +from nmmo.task.predicate_api import Predicate, arg_to_string from nmmo.task.base_predicates import StayAlive class Task(ABC): @@ -44,7 +44,7 @@ def completed(self) -> bool: def reward_multiplier(self) -> float: return self._reward_multiplier - def _map_eval_to_reward(self, gs) -> float: + def _map_progress_to_reward(self, gs) -> float: """ The default reward is the diff between the old and new progress. Once the task is completed, no more reward is provided. @@ -66,7 +66,7 @@ def compute_rewards(self, gs) -> Tuple[Dict[int, float], Dict[int, Dict]]: Returns rewards and infos for all agents in subject """ - reward = self._map_eval_to_reward(gs) * self._reward_multiplier + reward = self._map_progress_to_reward(gs) * self._reward_multiplier rewards = {int(ent_id): reward for ent_id in self._assignee} infos = {int(ent_id): {'reward': reward, 'progress': self._progress} for ent_id in self._assignee} @@ -76,13 +76,6 @@ def compute_rewards(self, gs) -> Tuple[Dict[int, float], Dict[int, Dict]]: return rewards, infos def _make_name(self, class_name, **kwargs) -> str: - def arg_to_string(arg): - if isinstance(arg, (type, FunctionType)): # class or function - return arg.__name__ - if arg is None: - return 'Any' - return str(arg) - name = [class_name] + \ [f"{arg_to_string(key)}:{arg_to_string(arg)}" for key, arg in kwargs.items()] name = "("+'_'.join(name).replace(' ', '')+")" @@ -92,7 +85,7 @@ def __str__(self): return self.name class OngoingTask(Task): - def _map_eval_to_reward(self, gs) -> float: + def _map_progress_to_reward(self, gs) -> float: """Keep returning the progress reward after the task is completed. However, this task tracks the completion status in the same manner. """ @@ -103,36 +96,37 @@ def _map_eval_to_reward(self, gs) -> float: ###################################################################### -# Task generator helpers - -def make_same_tasks(pred: Predicate, - assignee: Union[Iterable[int], int], - task_cls=Task, - reward_multiplier=1.0, - **kwargs): - if isinstance(assignee, int): - assignee = [assignee] - - # when a list of agent is provided, return a list of identical tasks - return [task_cls(eval_fn=pred(Group(agent_id),**kwargs), - assignee=agent_id, reward_multiplier=reward_multiplier) - for agent_id in set(assignee)] - -# The performance of function based eval_fn vs. predicate -# NOTE: there is ~30% perf overhead for the class based predicate -def make_stay_alive_eval(subject: Group): - def stay_alive_eval(gs): - # return True # for speed testing - return all(agent_id in gs.alive_agents for agent_id in subject.agents) + +def nmmo_default_task(agent_list: Iterable[int], test_mode=None) -> List[Task]: + if test_mode is None: + # use the full predicate system + return [StayAlive(Group(agent_id)).create_task(task_cls=OngoingTask) + for agent_id in agent_list] + + if test_mode == 'no_task': + return [] + + if test_mode == 'dummy_eval_fn': + return [OngoingTask(eval_fn=make_stay_alive_eval(Group(agent_id), test_mode), + assignee=agent_id) for agent_id in agent_list] + + # use the function-based eval + return [OngoingTask(eval_fn=make_stay_alive_eval(Group(agent_id)), + assignee=agent_id) for agent_id in agent_list] + +# for speed testing, function-based eval +def make_stay_alive_eval(subject: Group, test_mode=None): + if test_mode is None: + def stay_alive_eval(gs): + return all(agent_id in gs.alive_agents for agent_id in subject.agents) + else: + # use dummy eval function for speed testing + def stay_alive_eval(gs): + # pylint: disable=unused-argument + return True # change function name for each agent return FunctionType( - stay_alive_eval.__code__, globals(), f"stay_alive_{str(subject.agents)}", + stay_alive_eval.__code__, globals(), f"StayAlive_fn_{str(subject.agents)}", closure=stay_alive_eval.__closure__ ) - -def nmmo_default_task(agent_list: Iterable[int]) -> List[Task]: - #return make_same_tasks(StayAlive, agent_list, task_cls=RepeatTask) - return [OngoingTask(eval_fn=make_stay_alive_eval(Group(agent_id)), - assignee=agent_id) - for agent_id in agent_list] diff --git a/tests/task/test_demo_task_creation.py b/tests/task/test_demo_task_creation.py index d6d4e16d..fd36d035 100644 --- a/tests/task/test_demo_task_creation.py +++ b/tests/task/test_demo_task_creation.py @@ -7,7 +7,6 @@ from nmmo.systems import skill from nmmo.task import predicate_api as p from nmmo.task import base_predicates as bp -from nmmo.task import task_api as t from nmmo.task.game_state import GameState from nmmo.task.group import Group @@ -101,9 +100,8 @@ def ForageSkill(gs, subject, lvl): team_tasks = [] for pred, kwargs, weight in player_kills + exploration: for team in teams.values(): - team_tasks.append(t.Task(pred(Group(team), **kwargs), - assignee=team, - reward_multiplier=weight)) + team_tasks.append( + pred(Group(team), **kwargs).create_task(reward_multiplier=weight)) # Run the environment with these tasks # check rewards and infos for the task info @@ -113,10 +111,9 @@ def ForageSkill(gs, subject, lvl): # i.e, each agent gets evaluated and rewarded individually same_tasks = [] for pred, kwargs, weight in exploration + equipment + combat + foraging: - # a helper function can do this - same_tasks += t.make_same_tasks(pred, env.possible_agents, - reward_multiplier=weight, - **kwargs) + for agent_id in env.possible_agents: + same_tasks.append( + pred(Group([agent_id]), **kwargs).create_task(reward_multiplier=weight)) # Run the environment with these tasks # check rewards and infos for the task info @@ -125,6 +122,7 @@ def ForageSkill(gs, subject, lvl): # DONE def test_player_kill_reward(self): + # pylint: disable=no-value-for-parameter """ Design a predicate with a complex progress scheme """ config = ScriptedAgentTestConfig() @@ -148,7 +146,8 @@ def KillPredicate(gs: GameState, progress += .3 return min(progress, 1.0) - kill_tasks = t.make_same_tasks(KillPredicate, env.possible_agents) + kill_tasks = [KillPredicate(Group(agent_id)).create_task() + for agent_id in env.possible_agents] # Test Reward env.reset(new_tasks=kill_tasks) @@ -177,6 +176,7 @@ def KillPredicate(gs: GameState, # DONE def test_predicate_math(self): + # pylint: disable=no-value-for-parameter config = ScriptedAgentTestConfig() env = Env(config) @@ -187,10 +187,10 @@ def PredicateMath(gs, subject): # NOTE: the resulting progress will be bounded from [0, 1] afterwards return progress - task_for_agent_1 = t.make_same_tasks(PredicateMath, assignee=1) + task_for_agent_1 = PredicateMath(Group(1)).create_task() # Test Reward - env.reset(new_tasks=task_for_agent_1) + env.reset(new_tasks=[task_for_agent_1]) code = EventCode.PLAYER_KILL players = env.realm.players env.realm.event_log.record(code, players[1], target=players[2]) diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 22564ed2..9b3c3f75 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -4,7 +4,7 @@ import nmmo from nmmo.core.env import Env from nmmo.task.predicate_api import define_predicate -from nmmo.task.task_api import Task, nmmo_default_task, make_same_tasks +from nmmo.task.task_api import Task, nmmo_default_task from nmmo.task.group import Group from nmmo.task.constraint import InvalidConstraint, ScalarConstraint from nmmo.task.base_predicates import TickGE, CanSeeGroup @@ -124,11 +124,11 @@ def test_sample_predicate(self): # DONE def test_task_api_with_predicate(self): - # pylint: disable=no-value-for-parameter + # pylint: disable=no-value-for-parameter,no-member mock_gs = MockGameState() - pred = Fake(Group(2), 1, Item.Hat, Action.Melee) + predicate = Fake(Group(2), 1, Item.Hat, Action.Melee) assignee = [1,2,3] # list of agent ids - task = Task(pred, assignee) + task = predicate.create_task(assignee=assignee) rewards, infos = task.compute_rewards(mock_gs) self.assertEqual(task.name, # contains predicate name and assignee list @@ -165,21 +165,26 @@ def test_nmmo_default_task(self): for _ in range(3): env.step({}) + for agent_id in env.possible_agents: + self.assertTrue('StayAlive' in env.tasks[agent_id-1].name) # default task + self.assertTrue(f'assignee:({agent_id},)' in env.tasks[agent_id-1].name) + # DONE def test_completed_tasks_in_info(self): - # pylint: disable=no-value-for-parameter + # pylint: disable=no-value-for-parameter,no-member config = ScriptedAgentTestConfig() env = Env(config) - # Use make_atomic_task(predicate, agent_list) when - # the predicate's subject is the same as the task assignee same_team = [1, 2, 3, 4] - fake_pred = Fake(Group(3), 1, Item.Hat, Action.Melee) - tasks = make_same_tasks(pred=Success, assignee=1) # task 1 - tasks += make_same_tasks(pred=Failure, assignee=2) # task 2 - tasks += [Task(fake_pred, assignee=3), # task 3: fake_pred is already instantiated - Task(Success(Group(same_team)), assignee=same_team)] # task 4: team task + predicates = [ + Success(Group(1)), # task 1 + Failure(Group(2)), # task 2 + Fake(Group(3), 1, Item.Hat, Action.Melee), # task 3 + Success(Group(same_team))] # task 4 + + # in this case the task assignees are the same as the predicate subjects + tasks = [pred.create_task() for pred in predicates] # tasks are all instantiated with the agent ids env.reset(new_tasks=tasks) From 977f6b0a6395ee16d019c5f27db4a6d7b22a7ee5 Mon Sep 17 00:00:00 2001 From: kywch Date: Sat, 3 Jun 2023 06:53:18 +0000 Subject: [PATCH 03/18] added make_team_tasks(), init tasks in reset, etc --- nmmo/core/env.py | 31 ++++++---- nmmo/lib/team_helper.py | 11 ++++ nmmo/systems/skill.py | 9 ++- nmmo/task/base_predicates.py | 5 ++ nmmo/task/constraint.py | 5 +- nmmo/task/game_state.py | 2 +- nmmo/task/predicate_api.py | 13 ++--- nmmo/task/task_api.py | 63 +++++++++++++++++++- tests/core/test_env.py | 5 +- tests/render/test_render_save.py | 82 ++++++++++++++++++++++----- tests/task/test_demo_task_creation.py | 46 +++++++++++++++ utils/run-perf-tests.sh | 2 + 12 files changed, 229 insertions(+), 45 deletions(-) create mode 100755 utils/run-perf-tests.sh diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 854bf725..cee938fa 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -1,6 +1,6 @@ import functools import random -from typing import Any, Dict, List +from typing import Any, Dict, List, Tuple from ordered_set import OrderedSet import gym @@ -15,7 +15,7 @@ from nmmo.entity.entity import Entity from nmmo.systems.item import Item from nmmo.task.game_state import GameStateGenerator -from nmmo.task.task_api import Task, nmmo_default_task +from nmmo.task.task_api import Task, nmmo_default_task, make_team_tasks from scripted.baselines import Scripted class Env(ParallelEnv): @@ -118,14 +118,19 @@ def action_space(self, agent): # TODO: This doesn't conform to the PettingZoo API # pylint: disable=arguments-renamed def reset(self, map_id=None, seed=None, options=None, - new_tasks: List[Task]=None): + new_tasks: List[Task]=None, + task_spec: List[Tuple]=None, + teams: Dict[int,List[int]]=None): '''OpenAI Gym API reset function Loads a new game map and returns initial observations Args: - idx: Map index to load. Selects a random map by default - + map_id: Map index to load. Selects a random map by default + seed: random seed to use + new_tasks: A list of instantiated tasks + task_spec: A list of task spec to instantiate inside reset() + teams: team info to map agent references in the task_spec Returns: observations, as documented by _compute_observations() @@ -152,14 +157,18 @@ def reset(self, map_id=None, seed=None, options=None, self.obs = self._compute_observations() self._gamestate_generator = GameStateGenerator(self.realm, self.config) - # CHECK ME: How the tasks are provided to the env? - # If the provided task instances are mapped to the individual agents, this is enough - # If not, we need to map the tasks to the agents using TeamHelper, in change_task perhaps - if new_tasks is None: - self.tasks = nmmo_default_task(self.possible_agents) - else: + """Two methods to define tasks. + * new_tasks: a list of instantiated tasks. This method has precedence + * task_spec and teams: these are used to instantiate tasks here + If these are all None, then use the default task + """ + if new_tasks is not None: # providing an empty new_tasks [] is also possible self.tasks = new_tasks + elif task_spec is not None and teams is not None: + self.tasks = make_team_tasks(teams, task_spec) + else: + self.tasks = nmmo_default_task(self.possible_agents) return {a: o.to_gym() for a,o in self.obs.items()} diff --git a/nmmo/lib/team_helper.py b/nmmo/lib/team_helper.py index caa81695..f97fab25 100644 --- a/nmmo/lib/team_helper.py +++ b/nmmo/lib/team_helper.py @@ -23,3 +23,14 @@ def agent_id(self, team_id: int, position: int) -> int: def is_agent_in_team(self, agent_id:int , team_id: int) -> bool: return agent_id in self.teams[team_id] + + def get_target_agent(self, team_id: int, target: str): + if target == 'left_team': + return self.teams[(team_id-1) % self.num_teams] + if target == 'left_team_leader': + return self.teams[(team_id-1) % self.num_teams][0] + if target == 'right_team': + return self.teams[(team_id+1) % self.num_teams] + if target == 'right_team_leader': + return self.teams[(team_id-1) % self.num_teams][0] + return None diff --git a/nmmo/systems/skill.py b/nmmo/systems/skill.py index e8f43a0b..59e93c5c 100644 --- a/nmmo/systems/skill.py +++ b/nmmo/systems/skill.py @@ -265,13 +265,13 @@ def update(self): if not config.RESOURCE_SYSTEM_ENABLED: return + if config.IMMORTAL: + return + depletion = config.RESOURCE_DEPLETION_RATE water = self.entity.resources.water water.decrement(depletion) - if self.config.IMMORTAL: - return - if not self.harvest_adjacent(material.Water, deplete=False): return @@ -288,6 +288,9 @@ def update(self): if not config.RESOURCE_SYSTEM_ENABLED: return + if config.IMMORTAL: + return + depletion = config.RESOURCE_DEPLETION_RATE food = self.entity.resources.food food.decrement(depletion) diff --git a/nmmo/task/base_predicates.py b/nmmo/task/base_predicates.py index 6c6fbc68..1beb1dfc 100644 --- a/nmmo/task/base_predicates.py +++ b/nmmo/task/base_predicates.py @@ -43,6 +43,11 @@ def StayAlive(gs: GameState, """True if all subjects are alive. """ return count(subject.health > 0) == len(subject) + # The below is for speed testing (bypass GroupView) + # agent = gs.entity_or_none(subject.agents[0]) + # if agent is None: + # return False + # return agent.health > 0 @define_predicate def AllDead(gs: GameState, diff --git a/nmmo/task/constraint.py b/nmmo/task/constraint.py index 5f2b48fc..71ba829e 100644 --- a/nmmo/task/constraint.py +++ b/nmmo/task/constraint.py @@ -8,6 +8,8 @@ from nmmo.systems import skill, item from nmmo.lib import material from nmmo.core.config import Config + +# TODO: remove this TeamHelper from nmmo.task.team_helper import TeamHelper class InvalidConstraint(Exception): @@ -118,9 +120,8 @@ def sample(self, config: Config): HABITABLE_CONSTRAINT = DiscreteConstraint(space=list(material.Habitable.materials), systems=['TERRAIN_SYSTEM_ENABLED']) combat_skills = [skill.Melee, skill.Mage, skill.Range] -basic_skills = [skill.Water, skill.Food] harvest_skills = [skill.Fishing, skill.Herbalism, skill.Prospecting, skill.Alchemy, skill.Carving] -SKILL_CONSTRAINT = DiscreteConstraint(space=combat_skills+basic_skills+harvest_skills, +SKILL_CONSTRAINT = DiscreteConstraint(space=combat_skills+harvest_skills, systems=['PROFESSION_SYSTEM_ENABLED']) COMBAT_SKILL_CONSTRAINT = DiscreteConstraint(space=combat_skills, systems=['PROFESSION_SYSTEM_ENABLED']) diff --git a/nmmo/task/game_state.py b/nmmo/task/game_state.py index 583d0797..fb57ba4c 100644 --- a/nmmo/task/game_state.py +++ b/nmmo/task/game_state.py @@ -43,7 +43,7 @@ class GameState: def entity_or_none(self, ent_id): flt_ent = self.entity_data[:, EntityAttr['id']] == ent_id if np.any(flt_ent): - return EntityAttr.parse_array(self.entity_data[flt_ent][0]) + return EntityState.parse_array(self.entity_data[flt_ent][0]) return None diff --git a/nmmo/task/predicate_api.py b/nmmo/task/predicate_api.py index 601d8f91..cbcd4853 100644 --- a/nmmo/task/predicate_api.py +++ b/nmmo/task/predicate_api.py @@ -45,7 +45,7 @@ def __call__(self, gs: GameState) -> float: gs: GameState Returns: - score: float bounded between [0, 1], 1 is considered to be true + progress: float bounded between [0, 1], 1 is considered to be true """ if not self._config == gs.config: # TODO(mark) should we make this explicitly called by environment @@ -56,12 +56,11 @@ def __call__(self, gs: GameState) -> float: # Calculate score cache = gs.cache_result if self.name in cache: - score = cache[self.name] + progress = cache[self.name] else: - score = max(min(self._evaluate(gs)*1.0,1.0),0.0) - cache[self.name] = score - # Calculate score - return score + progress = max(min(self._evaluate(gs)*1.0,1.0),0.0) + cache[self.name] = progress + return progress def _reset(self, config: Config): self._config = config @@ -111,7 +110,7 @@ def sample(self, config: Config, **overload): @abstractmethod def _evaluate(self, gs: GameState) -> float: - """ A mapping from a game state to the desirability of that state. + """ A mapping from a game state to the desirability/progress of that state. __call__() will cap its value to [0, 1] """ raise NotImplementedError diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index b93e5364..2df64e84 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -5,7 +5,8 @@ from nmmo.task.group import Group from nmmo.task.predicate_api import Predicate, arg_to_string -from nmmo.task.base_predicates import StayAlive +from nmmo.task import base_predicates as bp +from nmmo.lib.team_helper import TeamHelper class Task(ABC): """ A task is used to calculate rewards for agents in assignee @@ -100,7 +101,7 @@ def _map_progress_to_reward(self, gs) -> float: def nmmo_default_task(agent_list: Iterable[int], test_mode=None) -> List[Task]: if test_mode is None: # use the full predicate system - return [StayAlive(Group(agent_id)).create_task(task_cls=OngoingTask) + return [bp.StayAlive(Group(agent_id)).create_task(task_cls=OngoingTask) for agent_id in agent_list] if test_mode == 'no_task': @@ -130,3 +131,61 @@ def stay_alive_eval(gs): stay_alive_eval.__code__, globals(), f"StayAlive_fn_{str(subject.agents)}", closure=stay_alive_eval.__closure__ ) + +# TODO: a lot to improve here. + +REWARD_TO = ['agent', 'team'] +VALID_TARGET = ['left_team', 'right_team', 'left_team_leader', 'right_team_leader'] + +def make_team_tasks(teams, task_spec) -> List[Task]: + """ + task_spec: a list of tuples (reward_to, eval_fn, **kwargs) + + each tuple is assigned to the teams + """ + tasks = [] + team_list = list(teams.keys()) + team_helper = TeamHelper(teams) + for idx in range(min(len(team_list), len(task_spec))): + team_id = team_list[idx] + reward_to, pred_cls, kwargs = task_spec[team_id] + + assert reward_to in REWARD_TO, 'Wrong reward target' + + if 'task_cls' in kwargs: + task_cls = kwargs.pop('task_cls') + else: + task_cls = Task + + # reserve 'target' for relative agent mapping + if 'target' in kwargs: + target = kwargs.pop('target') + assert target in VALID_TARGET, 'Invalid target' + # translate target to specific agent ids using team_helper + target = team_helper.get_target_agent(team_id, target) + kwargs['target'] = target #tuple(target,) if isinstance(target, int) else tuple(target) + + # handle some special cases and instantiate the predicate first + predicate = None + if pred_cls in [bp.AllDead]: + kwargs.pop('target') # remove target + predicate = pred_cls(Group(target), **kwargs) + + # create the task + if reward_to == 'team': + assignee = team_helper.teams[team_id] + if predicate is None: + tasks.append(pred_cls(Group(assignee), **kwargs).create_task(task_cls=task_cls)) + else: + tasks.append(predicate.create_task(assignee=assignee, task_cls=task_cls)) + + elif reward_to == 'agent': + agent_list = team_helper.teams[team_id] + if predicate is None: + tasks += [pred_cls(Group(agent_id), **kwargs).create_task(task_cls=task_cls) + for agent_id in agent_list] + else: + tasks += [predicate.create_task(assignee=agent_id, task_cls=task_cls) + for agent_id in agent_list] + + return tasks diff --git a/tests/core/test_env.py b/tests/core/test_env.py index fb1bd7a4..1ddeb677 100644 --- a/tests/core/test_env.py +++ b/tests/core/test_env.py @@ -1,4 +1,3 @@ - import unittest from typing import List @@ -18,10 +17,8 @@ # 30 seems to be enough to test variety of agent actions TEST_HORIZON = 30 RANDOM_SEED = random.randint(0, 10000) -# TODO: We should check that milestones have been reached, to make -# sure that the agents aren't just dying + class Config(nmmo.config.Small, nmmo.config.AllGameSystems): - RENDER = False SPECIALIZE = True PLAYERS = [ baselines.Fisher, baselines.Herbalist, baselines.Prospector, diff --git a/tests/render/test_render_save.py b/tests/render/test_render_save.py index 21b463dd..f1f3801e 100644 --- a/tests/render/test_render_save.py +++ b/tests/render/test_render_save.py @@ -1,30 +1,82 @@ -'''Manual test for render client connectivity''' +'''Manual test for render client connectivity and save replay''' +import nmmo +from nmmo.core.config import (AllGameSystems, Combat, Communication, + Equipment, Exchange, Item, Medium, Profession, + Progression, Resource, Small, Terrain) +from nmmo.task.task_api import nmmo_default_task +from nmmo.render.render_client import WebsocketRenderer +from nmmo.render.replay_helper import FileReplayHelper +from scripted import baselines + +def create_config(base, nent, *systems): + # pylint: disable=redefined-outer-name + systems = (base, *systems) + name = '_'.join(cls.__name__ for cls in systems) + + conf = type(name, systems, {})() + + conf.TERRAIN_TRAIN_MAPS = 1 + conf.TERRAIN_EVAL_MAPS = 1 + conf.IMMORTAL = True + conf.PLAYER_N = nent + conf.PLAYERS = [baselines.Random] + + return conf + +no_npc_small_1_pop_conf = create_config(Small, 1, Terrain, Resource, + Combat, Progression, Item, Equipment, Profession, Exchange, Communication) + +no_npc_med_1_pop_conf = create_config(Medium, 1, Terrain, Resource, + Combat, Progression, Item, Equipment, Profession, Exchange, Communication) + +no_npc_med_100_pop_conf = create_config(Medium, 100, Terrain, Resource, + Combat, Progression, Item, Equipment, Profession, Exchange, Communication) + +all_small_1_pop_conf = create_config(Small, 1, AllGameSystems) + +all_med_1_pop_conf = create_config(Medium, 1, AllGameSystems) + +all_med_100_pop_conf = create_config(Medium, 100, AllGameSystems) + +conf_dict = { + 'no_npc_small_1_pop': no_npc_small_1_pop_conf, + 'no_npc_med_1_pop': no_npc_med_1_pop_conf, + 'no_npc_med_100_pop': no_npc_med_100_pop_conf, + 'all_small_1_pop': all_small_1_pop_conf, + 'all_med_1_pop': all_med_1_pop_conf, + 'all_med_100_pop': all_med_100_pop_conf +} if __name__ == '__main__': import random - import nmmo + from tqdm import tqdm - # pylint: disable=import-error - from nmmo.render.render_client import WebsocketRenderer from tests.testhelpers import ScriptedAgentTestConfig TEST_HORIZON = 100 RANDOM_SEED = random.randint(0, 9999) - # config.RENDER option is gone, - # RENDER can be done without setting any config config = ScriptedAgentTestConfig() config.NPC_SPAWN_ATTEMPTS = 8 - env = nmmo.Env(config) - env.reset(seed=RANDOM_SEED) + replay_helper = FileReplayHelper() + + for name, config in conf_dict.items(): + env = nmmo.Env(config) + + # to make replay, one should create replay_helper + # and run the below line + env.realm.record_replay(replay_helper) + + tasks = nmmo_default_task(env.possible_agents, 'no_task') + env.reset(seed=RANDOM_SEED, new_tasks=tasks) - # the renderer is external to the env, so need to manually initiate it - renderer = WebsocketRenderer(env.realm) + # the renderer is external to the env, so need to manually initiate it + renderer = WebsocketRenderer(env.realm) - for tick in range(TEST_HORIZON): - env.step({}) - renderer.render_realm() + for tick in tqdm(range(TEST_HORIZON)): + env.step({}) + renderer.render_realm() - # save the packet: this is possible because config.SAVE_REPLAY = True - env.realm.save_replay(f'replay_seed_{RANDOM_SEED:04d}.json', compress=False) + # NOTE: the web client has trouble loading the compressed replay file + replay_helper.save(f'replay_{name}_seed_{RANDOM_SEED:04d}.json', compress=False) diff --git a/tests/task/test_demo_task_creation.py b/tests/task/test_demo_task_creation.py index fd36d035..0f7d7288 100644 --- a/tests/task/test_demo_task_creation.py +++ b/tests/task/test_demo_task_creation.py @@ -6,6 +6,7 @@ from nmmo.lib.log import EventCode from nmmo.systems import skill from nmmo.task import predicate_api as p +from nmmo.task import task_api as t from nmmo.task import base_predicates as bp from nmmo.task.game_state import GameState from nmmo.task.group import Group @@ -208,5 +209,50 @@ def PredicateMath(gs, subject): # DONE + def test_make_team_tasks_inside_reset(self): + # NOTE: len(teams) and len(task_spec) don't need to match + teams = {0:[1, 2, 3], 1:[4, 5], 2:[6, 7], 3:[8, 9], 4:[10, 11, 12]} + + """ task_spec is a list of tuple (reward_to, predicate class, kwargs) + + each tuple in the task_spec will create tasks for a team in teams + + reward_to: must be in ['team', 'agent'] + * 'team' create a single team task, in which all team members get rewarded + * 'agent' create a task for each agent, in which only the agent gets rewarded + + predicate class from the base predicates or custom predicates like above + + kwargs are the additional args that go into predicate. There are also special keys + * 'target' must be ['left_team', 'right_team', 'left_team_leader', 'right_team_leader'] + these str will be translated into the actual agent ids + * 'task_cls' is optional. If not provided, the standard Task is used. """ + task_spec = [ # (reward_to, predicate class, kwargs) + ('team', bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 1}), # one task + ('agent', bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 2}), + ('agent', bp.AllDead, {'target': 'left_team'}), + ('team', bp.CanSeeAgent, {'target': 'right_team_leader', 'task_cls': t.OngoingTask})] + + config = ScriptedAgentTestConfig() + env = Env(config) + + env.reset(task_spec=task_spec, teams=teams) + + self.assertEqual(len(env.tasks), 6) # 6 tasks were created + self.assertEqual(env.tasks[0].name, # team 0 task assigned to agents 1,2,3 + '(Task_eval_fn:(CountEvent_(1,2,3)_event:PLAYER_KILL_N:1)_assignee:(1,2,3))') + self.assertEqual(env.tasks[1].name, # agent task assigned to agent 4 + '(Task_eval_fn:(CountEvent_(4,)_event:PLAYER_KILL_N:2)_assignee:(4,))') + self.assertEqual(env.tasks[2].name, # agent task assigned to agent 4 + '(Task_eval_fn:(CountEvent_(5,)_event:PLAYER_KILL_N:2)_assignee:(5,))') + self.assertEqual(env.tasks[3].name, # agent 6 task, left_team became agents 4,5 (team 1) + '(Task_eval_fn:(AllDead_(4,5))_assignee:(6,))') + self.assertEqual(env.tasks[5].name, # team 3 task, right_team became agent 8,9 (team 4) + '(OngoingTask_eval_fn:(CanSeeAgent_(8,9)_target:6)_assignee:(8,9))') + # no task for team 4 + + for _ in range(2): + env.step({}) + if __name__ == '__main__': unittest.main() diff --git a/utils/run-perf-tests.sh b/utils/run-perf-tests.sh new file mode 100755 index 00000000..5e9529bd --- /dev/null +++ b/utils/run-perf-tests.sh @@ -0,0 +1,2 @@ +pytest --benchmark-columns=ops,rounds,median,mean,stddev,min,max,iterations --benchmark-max-time=5 --benchmark-min-rounds=500 \ + --benchmark-warmup=on --benchmark-warmup-iterations=300 tests/test_performance.py From a7975e59697e787c5238877b28d73f36b4009e48 Mon Sep 17 00:00:00 2001 From: kywch Date: Sat, 3 Jun 2023 07:22:52 +0000 Subject: [PATCH 04/18] can pass function into reset to create task --- nmmo/task/task_api.py | 5 ++++- tests/task/test_demo_task_creation.py | 14 ++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index 2df64e84..8fe51264 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -4,7 +4,7 @@ from abc import ABC from nmmo.task.group import Group -from nmmo.task.predicate_api import Predicate, arg_to_string +from nmmo.task.predicate_api import define_predicate, arg_to_string from nmmo.task import base_predicates as bp from nmmo.lib.team_helper import TeamHelper @@ -167,6 +167,9 @@ def make_team_tasks(teams, task_spec) -> List[Task]: # handle some special cases and instantiate the predicate first predicate = None + if not isinstance(pred_cls, type): + # if a function is provided as a predicate + pred_cls = define_predicate(pred_cls) if pred_cls in [bp.AllDead]: kwargs.pop('target') # remove target predicate = pred_cls(Group(target), **kwargs) diff --git a/tests/task/test_demo_task_creation.py b/tests/task/test_demo_task_creation.py index 0f7d7288..e27c7668 100644 --- a/tests/task/test_demo_task_creation.py +++ b/tests/task/test_demo_task_creation.py @@ -211,7 +211,11 @@ def PredicateMath(gs, subject): def test_make_team_tasks_inside_reset(self): # NOTE: len(teams) and len(task_spec) don't need to match - teams = {0:[1, 2, 3], 1:[4, 5], 2:[6, 7], 3:[8, 9], 4:[10, 11, 12]} + teams = {0:[1,2,3], 1:[4,5], 2:[6,7], 3:[8,9], 4:[10,11]} + + # custom function to turn into predicate inside reset + def custom_predicate_func(gs, subject, test): + return True """ task_spec is a list of tuple (reward_to, predicate class, kwargs) @@ -231,14 +235,15 @@ def test_make_team_tasks_inside_reset(self): ('team', bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 1}), # one task ('agent', bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 2}), ('agent', bp.AllDead, {'target': 'left_team'}), - ('team', bp.CanSeeAgent, {'target': 'right_team_leader', 'task_cls': t.OngoingTask})] + ('team', bp.CanSeeAgent, {'target': 'right_team_leader', 'task_cls': t.OngoingTask}), + ('team', custom_predicate_func, {'test': 1})] config = ScriptedAgentTestConfig() env = Env(config) env.reset(task_spec=task_spec, teams=teams) - self.assertEqual(len(env.tasks), 6) # 6 tasks were created + self.assertEqual(len(env.tasks), 7) # 7 tasks were created self.assertEqual(env.tasks[0].name, # team 0 task assigned to agents 1,2,3 '(Task_eval_fn:(CountEvent_(1,2,3)_event:PLAYER_KILL_N:1)_assignee:(1,2,3))') self.assertEqual(env.tasks[1].name, # agent task assigned to agent 4 @@ -249,7 +254,8 @@ def test_make_team_tasks_inside_reset(self): '(Task_eval_fn:(AllDead_(4,5))_assignee:(6,))') self.assertEqual(env.tasks[5].name, # team 3 task, right_team became agent 8,9 (team 4) '(OngoingTask_eval_fn:(CanSeeAgent_(8,9)_target:6)_assignee:(8,9))') - # no task for team 4 + self.assertEqual(env.tasks[6].name, # team 4 task, based on a predicate function + '(Task_eval_fn:(custom_predicate_func_(10,11)_test:1)_assignee:(10,11))') for _ in range(2): env.step({}) From 62f3e0bf09b5e750c3c01a6ecf5f7570ee0eb796 Mon Sep 17 00:00:00 2001 From: kywch Date: Sat, 3 Jun 2023 07:30:15 +0000 Subject: [PATCH 05/18] fixed typo --- tests/task/test_demo_task_creation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/task/test_demo_task_creation.py b/tests/task/test_demo_task_creation.py index e27c7668..b1fb1a94 100644 --- a/tests/task/test_demo_task_creation.py +++ b/tests/task/test_demo_task_creation.py @@ -246,13 +246,13 @@ def custom_predicate_func(gs, subject, test): self.assertEqual(len(env.tasks), 7) # 7 tasks were created self.assertEqual(env.tasks[0].name, # team 0 task assigned to agents 1,2,3 '(Task_eval_fn:(CountEvent_(1,2,3)_event:PLAYER_KILL_N:1)_assignee:(1,2,3))') - self.assertEqual(env.tasks[1].name, # agent task assigned to agent 4 + self.assertEqual(env.tasks[1].name, # team 1, agent task assigned to agent 4 '(Task_eval_fn:(CountEvent_(4,)_event:PLAYER_KILL_N:2)_assignee:(4,))') - self.assertEqual(env.tasks[2].name, # agent task assigned to agent 4 + self.assertEqual(env.tasks[2].name, # team 1, agent task assigned to agent 5 '(Task_eval_fn:(CountEvent_(5,)_event:PLAYER_KILL_N:2)_assignee:(5,))') - self.assertEqual(env.tasks[3].name, # agent 6 task, left_team became agents 4,5 (team 1) + self.assertEqual(env.tasks[3].name, # team 2, agent 6 task, left_team is team 1 (agents 4,5) '(Task_eval_fn:(AllDead_(4,5))_assignee:(6,))') - self.assertEqual(env.tasks[5].name, # team 3 task, right_team became agent 8,9 (team 4) + self.assertEqual(env.tasks[5].name, # team 3 task, right_team is team 4 (agents 8,9) '(OngoingTask_eval_fn:(CanSeeAgent_(8,9)_target:6)_assignee:(8,9))') self.assertEqual(env.tasks[6].name, # team 4 task, based on a predicate function '(Task_eval_fn:(custom_predicate_func_(10,11)_test:1)_assignee:(10,11))') From ad1bc2b17890672e7d4f9dac7a66878dd9c3c368 Mon Sep 17 00:00:00 2001 From: kywch Date: Sat, 3 Jun 2023 07:57:02 +0000 Subject: [PATCH 06/18] corrected team spawn pos, left/right team --- nmmo/lib/spawn.py | 1 - nmmo/lib/team_helper.py | 6 +++--- tests/task/test_demo_task_creation.py | 6 +++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/nmmo/lib/spawn.py b/nmmo/lib/spawn.py index 97508266..8dffc750 100644 --- a/nmmo/lib/spawn.py +++ b/nmmo/lib/spawn.py @@ -135,5 +135,4 @@ def get_team_spawn_positions(config, num_teams): idx = int(len(side)*(i+1)/(teams_per_sides + 1)) team_spawn_positions.append(side[idx]) - np.random.shuffle(team_spawn_positions) return team_spawn_positions diff --git a/nmmo/lib/team_helper.py b/nmmo/lib/team_helper.py index f97fab25..5474f7a5 100644 --- a/nmmo/lib/team_helper.py +++ b/nmmo/lib/team_helper.py @@ -26,11 +26,11 @@ def is_agent_in_team(self, agent_id:int , team_id: int) -> bool: def get_target_agent(self, team_id: int, target: str): if target == 'left_team': - return self.teams[(team_id-1) % self.num_teams] + return self.teams[(team_id+1) % self.num_teams] if target == 'left_team_leader': - return self.teams[(team_id-1) % self.num_teams][0] + return self.teams[(team_id+1) % self.num_teams][0] if target == 'right_team': - return self.teams[(team_id+1) % self.num_teams] + return self.teams[(team_id-1) % self.num_teams] if target == 'right_team_leader': return self.teams[(team_id-1) % self.num_teams][0] return None diff --git a/tests/task/test_demo_task_creation.py b/tests/task/test_demo_task_creation.py index b1fb1a94..148a1e13 100644 --- a/tests/task/test_demo_task_creation.py +++ b/tests/task/test_demo_task_creation.py @@ -250,9 +250,9 @@ def custom_predicate_func(gs, subject, test): '(Task_eval_fn:(CountEvent_(4,)_event:PLAYER_KILL_N:2)_assignee:(4,))') self.assertEqual(env.tasks[2].name, # team 1, agent task assigned to agent 5 '(Task_eval_fn:(CountEvent_(5,)_event:PLAYER_KILL_N:2)_assignee:(5,))') - self.assertEqual(env.tasks[3].name, # team 2, agent 6 task, left_team is team 1 (agents 4,5) - '(Task_eval_fn:(AllDead_(4,5))_assignee:(6,))') - self.assertEqual(env.tasks[5].name, # team 3 task, right_team is team 4 (agents 8,9) + self.assertEqual(env.tasks[3].name, # team 2, agent 6 task, left_team is team 3 (agents 8,9) + '(Task_eval_fn:(AllDead_(8,9))_assignee:(6,))') + self.assertEqual(env.tasks[5].name, # team 3 task, right_team is team 2 (6,7), leader 6 '(OngoingTask_eval_fn:(CanSeeAgent_(8,9)_target:6)_assignee:(8,9))') self.assertEqual(env.tasks[6].name, # team 4 task, based on a predicate function '(Task_eval_fn:(custom_predicate_func_(10,11)_test:1)_assignee:(10,11))') From 58dd1d72e65b78167c5d37dc19d635efca1f8e7b Mon Sep 17 00:00:00 2001 From: kywch Date: Sat, 3 Jun 2023 17:43:27 +0000 Subject: [PATCH 07/18] refactored nmmo_default_task() --- nmmo/task/task_api.py | 52 ++++++++++++++++++------------------- tests/task/test_task_api.py | 19 ++++++++------ 2 files changed, 36 insertions(+), 35 deletions(-) diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index 8fe51264..3c9142d4 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -4,7 +4,7 @@ from abc import ABC from nmmo.task.group import Group -from nmmo.task.predicate_api import define_predicate, arg_to_string +from nmmo.task.predicate_api import Predicate, define_predicate, arg_to_string from nmmo.task import base_predicates as bp from nmmo.lib.team_helper import TeamHelper @@ -98,41 +98,40 @@ def _map_progress_to_reward(self, gs) -> float: ###################################################################### +# The same task is assigned each agent in agent_list individually +# with the agent as the predicate subject and task assignee +def make_same_task(predicate: Union[Predicate, Callable], + agent_list: Iterable[int], + task_cls = Task, **kwargs) -> List[Task]: + if isinstance(predicate, type): # predicate is class, assuming Predicate + return [predicate(Group(agent_id),**kwargs).create_task(task_cls=task_cls) + for agent_id in agent_list] + + # eval_fn is a function to turn into predicate + pred_cls = define_predicate(predicate) + return [pred_cls(Group(agent_id),**kwargs).create_task(task_cls=task_cls) + for agent_id in agent_list] + def nmmo_default_task(agent_list: Iterable[int], test_mode=None) -> List[Task]: if test_mode is None: # use the full predicate system - return [bp.StayAlive(Group(agent_id)).create_task(task_cls=OngoingTask) - for agent_id in agent_list] + return make_same_task(bp.StayAlive, agent_list, task_cls=OngoingTask) if test_mode == 'no_task': return [] if test_mode == 'dummy_eval_fn': - return [OngoingTask(eval_fn=make_stay_alive_eval(Group(agent_id), test_mode), - assignee=agent_id) for agent_id in agent_list] + # pylint: disable=unused-argument + return make_same_task(lambda gs, subject: True, agent_list, task_cls=OngoingTask) # use the function-based eval - return [OngoingTask(eval_fn=make_stay_alive_eval(Group(agent_id)), - assignee=agent_id) for agent_id in agent_list] + def stay_alive_eval(gs, subject): + return all(agent_id in gs.alive_agents for agent_id in subject.agents) -# for speed testing, function-based eval -def make_stay_alive_eval(subject: Group, test_mode=None): - if test_mode is None: - def stay_alive_eval(gs): - return all(agent_id in gs.alive_agents for agent_id in subject.agents) - else: - # use dummy eval function for speed testing - def stay_alive_eval(gs): - # pylint: disable=unused-argument - return True - - # change function name for each agent - return FunctionType( - stay_alive_eval.__code__, globals(), f"StayAlive_fn_{str(subject.agents)}", - closure=stay_alive_eval.__closure__ - ) - -# TODO: a lot to improve here. + return make_same_task(stay_alive_eval, agent_list, task_cls=OngoingTask) + +###################################################################### +# TODO: a lot to improve below REWARD_TO = ['agent', 'team'] VALID_TARGET = ['left_team', 'right_team', 'left_team_leader', 'right_team_leader'] @@ -185,8 +184,7 @@ def make_team_tasks(teams, task_spec) -> List[Task]: elif reward_to == 'agent': agent_list = team_helper.teams[team_id] if predicate is None: - tasks += [pred_cls(Group(agent_id), **kwargs).create_task(task_cls=task_cls) - for agent_id in agent_list] + tasks += make_same_task(pred_cls, agent_list, task_cls=task_cls, **kwargs) else: tasks += [predicate.create_task(assignee=agent_id, task_cls=task_cls) for agent_id in agent_list] diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 9b3c3f75..8db4f086 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -160,14 +160,17 @@ def test_nmmo_default_task(self): config = ScriptedAgentTestConfig() env = Env(config) - dafault_tasks = nmmo_default_task(env.possible_agents) - env.reset(new_tasks=dafault_tasks) - for _ in range(3): - env.step({}) - - for agent_id in env.possible_agents: - self.assertTrue('StayAlive' in env.tasks[agent_id-1].name) # default task - self.assertTrue(f'assignee:({agent_id},)' in env.tasks[agent_id-1].name) + for test_mode in [None, 'no_task', 'func_eval', 'dummy_eval_fn']: + dafault_tasks = nmmo_default_task(env.possible_agents, test_mode) + env.reset(new_tasks=dafault_tasks) + for _ in range(3): + env.step({}) + + for agent_id in env.possible_agents: + if test_mode is None: + self.assertTrue('StayAlive' in env.tasks[agent_id-1].name) # default task + if test_mode != 'no_task': + self.assertTrue(f'assignee:({agent_id},)' in env.tasks[agent_id-1].name) # DONE From 153a1ed9c08eb4a93dd8ac98607ce3acc1918da9 Mon Sep 17 00:00:00 2001 From: kywch Date: Sat, 3 Jun 2023 18:32:40 +0000 Subject: [PATCH 08/18] refactored team_helper, checked constraints --- nmmo/task/__init__.py | 1 - nmmo/task/base_predicates.py | 22 +++++------ nmmo/task/constraint.py | 23 ++++++++--- nmmo/task/scenario.py | 76 ------------------------------------ nmmo/task/task_api.py | 2 +- nmmo/task/team_helper.py | 57 --------------------------- 6 files changed, 29 insertions(+), 152 deletions(-) delete mode 100644 nmmo/task/scenario.py delete mode 100644 nmmo/task/team_helper.py diff --git a/nmmo/task/__init__.py b/nmmo/task/__init__.py index fde78a93..034e22ec 100644 --- a/nmmo/task/__init__.py +++ b/nmmo/task/__init__.py @@ -1,4 +1,3 @@ from .game_state import * from .predicate_api import * from .task_api import * -from .team_helper import * diff --git a/nmmo/task/base_predicates.py b/nmmo/task/base_predicates.py index 1beb1dfc..daef8daa 100644 --- a/nmmo/task/base_predicates.py +++ b/nmmo/task/base_predicates.py @@ -124,7 +124,7 @@ def AttainSkill(gs: GameState, def CountEvent(gs: GameState, subject: Group = constraint.TEAM_GROUPS, event: str = constraint.EVENTCODE_CONSTRAINT, - N: int = constraint.ScalarConstraint()): + N: int = constraint.EVENT_NUMBER_CONSTRAINT): """True if the number of events occured in subject corresponding to event >= N """ @@ -134,7 +134,7 @@ def CountEvent(gs: GameState, def ScoreHit(gs: GameState, subject: Group = constraint.TEAM_GROUPS, combat_style: type[Skill] = constraint.COMBAT_SKILL_CONSTRAINT, - N: int = constraint.ScalarConstraint()): + N: int = constraint.EVENT_NUMBER_CONSTRAINT): """True if the number of hits scored in style combat_style >= count """ @@ -144,7 +144,7 @@ def ScoreHit(gs: GameState, @define_predicate def HoardGold(gs: GameState, subject: Group = constraint.TEAM_GROUPS, - amount: int = constraint.ScalarConstraint()): + amount: int = constraint.GOLD_CONSTRAINT): """True iff the summed gold of all teammate is greater than or equal to amount. """ return subject.gold.sum() / amount @@ -152,7 +152,7 @@ def HoardGold(gs: GameState, @define_predicate def EarnGold(gs: GameState, subject: Group = constraint.TEAM_GROUPS, - amount: int = constraint.ScalarConstraint()): + amount: int = constraint.GOLD_CONSTRAINT): """ True if the total amount of gold earned is greater than or equal to amount. """ return subject.event.EARN_GOLD.gold.sum() / amount @@ -160,7 +160,7 @@ def EarnGold(gs: GameState, @define_predicate def SpendGold(gs: GameState, subject: Group = constraint.TEAM_GROUPS, - amount: int = constraint.ScalarConstraint()): + amount: int = constraint.GOLD_CONSTRAINT): """ True if the total amount of gold spent is greater than or equal to amount. """ return subject.event.BUY_ITEM.gold.sum() / amount @@ -168,7 +168,7 @@ def SpendGold(gs: GameState, @define_predicate def MakeProfit(gs: GameState, subject: Group = constraint.TEAM_GROUPS, - amount: int = constraint.ScalarConstraint()): + amount: int = constraint.GOLD_CONSTRAINT): """ True if the total amount of gold earned-spent is greater than or equal to amount. """ profits = subject.event.EARN_GOLD.gold.sum() @@ -178,7 +178,7 @@ def MakeProfit(gs: GameState, @define_predicate def InventorySpaceGE(gs: GameState, subject: Group = constraint.TEAM_GROUPS, - space: int = constraint.ScalarConstraint()): + space: int = constraint.INVENTORY_CONSTRAINT): """True if the inventory space of every subjects is greater than or equal to the space. Otherwise false. """ @@ -247,7 +247,7 @@ def ConsumeItem(gs: GameState, subject: Group = constraint.TEAM_GROUPS, item: type[Item] = constraint.CONSUMABLE_CONSTRAINT, level: int = constraint.PROGRESSION_CONSTRAINT, - quantity: int = constraint.ScalarConstraint()): + quantity: int = constraint.EVENT_NUMBER_CONSTRAINT): """True if total quantity consumed of item type above level is >= quantity """ type_flt = subject.event.CONSUME_ITEM.type == item.ITEM_TYPE_ID @@ -259,7 +259,7 @@ def HarvestItem(gs: GameState, subject: Group = constraint.TEAM_GROUPS, item: type[Item] = constraint.ITEM_CONSTRAINT, level: int = constraint.PROGRESSION_CONSTRAINT, - quantity: int = constraint.ScalarConstraint()): + quantity: int = constraint.EVENT_NUMBER_CONSTRAINT): """True if total quantity harvested of item type above level is >= quantity """ type_flt = subject.event.HARVEST_ITEM.type == item.ITEM_TYPE_ID @@ -271,7 +271,7 @@ def ListItem(gs: GameState, subject: Group = constraint.TEAM_GROUPS, item: type[Item] = constraint.ITEM_CONSTRAINT, level: int = constraint.PROGRESSION_CONSTRAINT, - quantity: int = constraint.ScalarConstraint()): + quantity: int = constraint.EVENT_NUMBER_CONSTRAINT): """True if total quantity listed of item type above level is >= quantity """ type_flt = subject.event.LIST_ITEM.type == item.ITEM_TYPE_ID @@ -283,7 +283,7 @@ def BuyItem(gs: GameState, subject: Group = constraint.TEAM_GROUPS, item: type[Item] = constraint.ITEM_CONSTRAINT, level: int = constraint.PROGRESSION_CONSTRAINT, - quantity: int = constraint.ScalarConstraint()): + quantity: int = constraint.EVENT_NUMBER_CONSTRAINT): """True if total quantity purchased of item type above level is >= quantity """ type_flt = subject.event.BUY_ITEM.type == item.ITEM_TYPE_ID diff --git a/nmmo/task/constraint.py b/nmmo/task/constraint.py index 71ba829e..c62989d5 100644 --- a/nmmo/task/constraint.py +++ b/nmmo/task/constraint.py @@ -2,16 +2,13 @@ import random from numbers import Number -from typing import Union, Callable +from typing import Union, Callable, Dict from abc import ABC, abstractmethod from nmmo.systems import skill, item from nmmo.lib import material from nmmo.core.config import Config -# TODO: remove this TeamHelper -from nmmo.task.team_helper import TeamHelper - class InvalidConstraint(Exception): pass @@ -45,11 +42,18 @@ def sample(self, config: Config): def __str__(self): return self.__class__.__name__ +# This is a dummy function for GroupConstraint +# NOTE: config does not have team info +def sample_one_big_team(config): + from nmmo.task.group import Group + team = list(range(1, config.PLAYER_N+1)) + return [Group(team, 'All')] + class GroupConstraint(Constraint): """ Ensures that all agents of a group exist in a config """ def __init__(self, - sample_fn = lambda c: TeamHelper.generate_from_config(c).all_teams, + sample_fn = sample_one_big_team, systems = None): """ Params @@ -70,6 +74,11 @@ def check(self, config, value): def sample(self, config): return random.choice(self._sample_fn(config)) + def sample_from_teams(self, teams: Dict[int, Dict]): + from nmmo.task.group import Group + team_id = random.choice(list(teams.keys())) + return Group(teams[team_id], str(team_id)) + class ScalarConstraint(Constraint): def __init__(self, low: Union[Callable, Number] = 0, @@ -111,7 +120,7 @@ def sample(self, config: Config): # Group Constraints TEAM_GROUPS = GroupConstraint() -INDIVIDUAL_GROUPS=GroupConstraint(sample_fn=lambda c:TeamHelper.generate_from_config(c).all_agents) +INDIVIDUAL_GROUPS=GroupConstraint() # System Constraints MATERIAL_CONSTRAINT = DiscreteConstraint(space=list(material.All.materials), @@ -151,3 +160,5 @@ def sample(self, config: Config): PROGRESSION_CONSTRAINT = ScalarConstraint(high = lambda c: c.PROGRESSION_LEVEL_MAX+1) INVENTORY_CONSTRAINT = ScalarConstraint(high=lambda c: c.ITEM_INVENTORY_CAPACITY+1) AGENT_NUMBER_CONSTRAINT = ScalarConstraint(low = 1, high = lambda c: c.PLAYER_N+1) +EVENT_NUMBER_CONSTRAINT = ScalarConstraint(low = 1, high = 50) # arbitrary +GOLD_CONSTRAINT = ScalarConstraint(low = 1, high = 100) # arbitrary diff --git a/nmmo/task/scenario.py b/nmmo/task/scenario.py deleted file mode 100644 index 57be1b4e..00000000 --- a/nmmo/task/scenario.py +++ /dev/null @@ -1,76 +0,0 @@ -# from __future__ import annotations - -# import copy -# from typing import Callable, Union, Iterable, \ -# Optional, List, Tuple -# from nmmo.core.config import Config -# from nmmo.task.group import Group -# from nmmo.task.team_helper import TeamHelper -# from nmmo.task.task_api import Task -# from nmmo.task.base_predicates import StayAlive - -# class Scenario: -# ''' Utility class to aid in defining common tasks -# ''' -# def __init__(self, config: Config): -# config = copy.deepcopy(config) -# self.team_helper = TeamHelper.generate_from_config(config) -# self.config = config -# self._tasks: List[Task] = [] - -# def add_task(self, task: Task): -# self._tasks.append(task) - -# def add_tasks(self, -# tasks: Union[Task, -# Iterable[Task], -# Callable[[Group], Task]], -# groups: Optional[Union[str,Iterable[Group]]] = 'teams') -> None: -# # pylint: disable=unnecessary-lambda-assignment -# """ Utility function to define symmetric tasks - -# Params: - -# tasks: -# Iterable[Task]: -# For each Task in the iterable, add to scenario. -# Callable[[Group], Task]: -# A function taking in a group and return a task. -# The result from applying this function to "groups" is added to -# the scenario. -# Task: -# Mapped to Callable by overriding subject - -# groups: -# Foreach group in groups, add a task. -# """ -# # Tasks -# if isinstance(tasks, Iterable): -# for task in tasks: -# self.add_task(task) -# return - -# # Functional Syntax -# # Tasks -# if isinstance(tasks, Task): -# task_generator = lambda group: tasks.sample(config=self.config, subject=group) -# else: -# task_generator = tasks -# # Groups -# if isinstance(groups, str): -# assert(groups in ['agents','teams']) -# if groups == 'agents': -# groups = self.team_helper.all_agents -# elif groups == 'teams': -# groups = self.team_helper.all_teams -# # Create -# self.add_tasks([task_generator(group) for group in groups]) - -# @property -# def tasks(self) -> List[Task]: -# return self._tasks - -# def default_task(agents) -> List[Tuple[Task, float]]: -# '''Generates the default reward on env.init -# ''' -# return [Repeat(StayAlive(Group([agent]))) for agent in agents] diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index 3c9142d4..6f57d580 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -106,7 +106,7 @@ def make_same_task(predicate: Union[Predicate, Callable], if isinstance(predicate, type): # predicate is class, assuming Predicate return [predicate(Group(agent_id),**kwargs).create_task(task_cls=task_cls) for agent_id in agent_list] - + # eval_fn is a function to turn into predicate pred_cls = define_predicate(predicate) return [pred_cls(Group(agent_id),**kwargs).create_task(task_cls=task_cls) diff --git a/nmmo/task/team_helper.py b/nmmo/task/team_helper.py deleted file mode 100644 index a8b0aa67..00000000 --- a/nmmo/task/team_helper.py +++ /dev/null @@ -1,57 +0,0 @@ -from typing import List -from nmmo.task.group import Group - -class TeamHelper: - ''' Provides a mapping from ent_id to group as equivalent to the grouping - expected by the policy - ''' - - def __init__(self, agents: List[int], num_teams: int): - assert len(agents) % num_teams == 0 - self.team_size = len(agents) // num_teams - self._team_to_ent, self._ent_to_team = self._map_ent_team(agents, num_teams) - - def _map_ent_team(self, agents, num_teams): - _team_to_ent = {} - _ent_to_team = {} - for ent_id in agents: - # to assigne agent 1 to team 0, and so forth - pop_id = (ent_id - 1) % num_teams - _ent_to_team[ent_id] = pop_id - if pop_id in _team_to_ent: - _team_to_ent[pop_id].append(ent_id) - else: - _team_to_ent[pop_id] = [ent_id] - - return _team_to_ent, _ent_to_team - - def team(self, pop_id: int) -> Group: - assert pop_id in self._team_to_ent, "Wrong pop_id" - return Group(self._team_to_ent[pop_id], f"Team.{pop_id}") - - def own_team(self, ent_id: int) -> Group: - assert ent_id in self._ent_to_team, "Wrong ent_id" - pop_id = self._ent_to_team[ent_id] - return Group(self._team_to_ent[pop_id], f"Team.{pop_id}") - - def left_team(self, ent_id: int) -> Group: - assert ent_id in self._ent_to_team, "Wrong ent_id" - pop_id = (self._ent_to_team[ent_id] - 1) % len(self._team_to_ent) - return Group(self._team_to_ent[pop_id], f"Team.{pop_id}") - - def right_team(self, ent_id: int) -> Group: - assert ent_id in self._ent_to_team, "Wrong ent_id" - pop_id = (self._ent_to_team[ent_id] + 1) % len(self._team_to_ent) - return Group(self._team_to_ent[pop_id], f"Team.{pop_id}") - - @property - def all_agents(self) -> Group: - return Group(list(self._ent_to_team.keys()), "All") - - @property - def all_teams(self) -> List[Group]: - return list((Group(v,str(k)) for k,v in self._team_to_ent.items())) - - @staticmethod - def generate_from_config(config): - return TeamHelper(list(range(1, config.PLAYER_N+1)), len(config.PLAYERS)) From fc3ae271b5283b7ad0e1a609e75d4c0ca543b72e Mon Sep 17 00:00:00 2001 From: Nikhil Pinnaparaju Date: Sat, 3 Jun 2023 21:43:42 -0500 Subject: [PATCH 09/18] Simple Optimizations for Speed V2 --- nmmo/core/env.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index cee938fa..cc456a84 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -2,6 +2,7 @@ import random from typing import Any, Dict, List, Tuple from ordered_set import OrderedSet +from collections import defaultdict import gym import numpy as np @@ -408,16 +409,20 @@ def _compute_rewards(self, agents: List[AgentID], dones: Dict[AgentID, bool]): entity identified by ent_id. ''' # Initialization - infos = {} - for agent_id in agents: - infos[agent_id] = {} - infos[agent_id]['task'] = {} - rewards = {agent_id: 0 for agent_id in agents} + + infos = {agent_id: {'task': {}} for agent_id in agents} + rewards = defaultdict(int) + agents = set(agents) + reward_cache = {} # Compute Rewards and infos self.game_state = self._gamestate_generator.generate(self.realm, self.obs) for task in self.tasks: - task_rewards, task_infos = task.compute_rewards(self.game_state) + if task in reward_cache: + task_rewards, task_infos = reward_cache[task] + else: + task_rewards, task_infos = task.compute_rewards(self.game_state) + reward_cache[task] = (task_rewards, task_infos) for agent_id, reward in task_rewards.items(): if agent_id in agents and agent_id not in dones: rewards[agent_id] = rewards.get(agent_id,0) + reward From b40bf912b4bb97391b5790490e2705ba9bbeeb48 Mon Sep 17 00:00:00 2001 From: Nikhil Pinnaparaju Date: Sat, 3 Jun 2023 21:51:29 -0500 Subject: [PATCH 10/18] fixing import order --- nmmo/core/env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index cc456a84..ca386e4b 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -1,8 +1,8 @@ import functools import random from typing import Any, Dict, List, Tuple -from ordered_set import OrderedSet from collections import defaultdict +from ordered_set import OrderedSet import gym import numpy as np From 1d2d46e18c05a6f11592074eb25a25aaa2d75536 Mon Sep 17 00:00:00 2001 From: kywch Date: Sun, 4 Jun 2023 07:31:11 +0000 Subject: [PATCH 11/18] tweaking how to pass tasks to env, removed P prefix --- nmmo/core/env.py | 24 ++++++------ nmmo/task/base_predicates.py | 9 +---- nmmo/task/predicate_api.py | 54 +++++++++++---------------- nmmo/task/task_api.py | 4 +- tests/task/test_demo_task_creation.py | 20 +++++----- tests/task/test_task_api.py | 41 ++++++++------------ 6 files changed, 65 insertions(+), 87 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index ca386e4b..f8405231 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -1,6 +1,6 @@ import functools import random -from typing import Any, Dict, List, Tuple +from typing import Any, Dict, List, Callable from collections import defaultdict from ordered_set import OrderedSet @@ -16,7 +16,7 @@ from nmmo.entity.entity import Entity from nmmo.systems.item import Item from nmmo.task.game_state import GameStateGenerator -from nmmo.task.task_api import Task, nmmo_default_task, make_team_tasks +from nmmo.task.task_api import Task, nmmo_default_task from scripted.baselines import Scripted class Env(ParallelEnv): @@ -40,7 +40,7 @@ def __init__(self, self._gamestate_generator = GameStateGenerator(self.realm, self.config) self.game_state = None - self.tasks = None + self.tasks = nmmo_default_task(self.possible_agents) # pylint: disable=method-cache-max-size-none @functools.lru_cache(maxsize=None) @@ -120,8 +120,8 @@ def action_space(self, agent): # pylint: disable=arguments-renamed def reset(self, map_id=None, seed=None, options=None, new_tasks: List[Task]=None, - task_spec: List[Tuple]=None, - teams: Dict[int,List[int]]=None): + make_task_fn: Callable=None, + make_task_fn_kwargs: Dict[str, Any]=None): '''OpenAI Gym API reset function Loads a new game map and returns initial observations @@ -130,8 +130,8 @@ def reset(self, map_id=None, seed=None, options=None, map_id: Map index to load. Selects a random map by default seed: random seed to use new_tasks: A list of instantiated tasks - task_spec: A list of task spec to instantiate inside reset() - teams: team info to map agent references in the task_spec + make_task_fn: A function to instantiate tasks + make_task_fn_kwargs: Keyword arguments to pass to make_task_fn Returns: observations, as documented by _compute_observations() @@ -160,16 +160,17 @@ def reset(self, map_id=None, seed=None, options=None, """Two methods to define tasks. * new_tasks: a list of instantiated tasks. This method has precedence - * task_spec and teams: these are used to instantiate tasks here + * make_task_fn & kwargs: a task maker fn and its kwargs to instantiate tasks If these are all None, then use the default task """ if new_tasks is not None: # providing an empty new_tasks [] is also possible self.tasks = new_tasks - elif task_spec is not None and teams is not None: - self.tasks = make_team_tasks(teams, task_spec) + elif make_task_fn is not None: + self.tasks = make_task_fn(**make_task_fn_kwargs) else: - self.tasks = nmmo_default_task(self.possible_agents) + for task in self.tasks: + task.reset() return {a: o.to_gym() for a,o in self.obs.items()} @@ -409,7 +410,6 @@ def _compute_rewards(self, agents: List[AgentID], dones: Dict[AgentID, bool]): entity identified by ent_id. ''' # Initialization - infos = {agent_id: {'task': {}} for agent_id in agents} rewards = defaultdict(int) agents = set(agents) diff --git a/nmmo/task/base_predicates.py b/nmmo/task/base_predicates.py index daef8daa..c84f084b 100644 --- a/nmmo/task/base_predicates.py +++ b/nmmo/task/base_predicates.py @@ -3,7 +3,7 @@ import numpy as np from numpy import count_nonzero as count -from nmmo.task.predicate_api import POR, define_predicate +from nmmo.task.predicate_api import OR, define_predicate from nmmo.task.group import Group from nmmo.task.game_state import GameState from nmmo.task import constraint @@ -43,11 +43,6 @@ def StayAlive(gs: GameState, """True if all subjects are alive. """ return count(subject.health > 0) == len(subject) - # The below is for speed testing (bypass GroupView) - # agent = gs.entity_or_none(subject.agents[0]) - # if agent is None: - # return False - # return agent.health > 0 @define_predicate def AllDead(gs: GameState, @@ -92,7 +87,7 @@ def CanSeeGroup(gs: GameState, target: Group = constraint.TEAM_GROUPS): """ Returns True if subject can see any of target """ - return POR(*(CanSeeAgent(subject, agent) for agent in target.agents)) + return OR(*(CanSeeAgent(subject, agent) for agent in target.agents)) @define_predicate def DistanceTraveled(gs: GameState, diff --git a/nmmo/task/predicate_api.py b/nmmo/task/predicate_api.py index cbcd4853..1e21da3b 100644 --- a/nmmo/task/predicate_api.py +++ b/nmmo/task/predicate_api.py @@ -144,25 +144,23 @@ def create_task(self, task_cls: Task=None, return task_cls(eval_fn=self, assignee=assignee, reward_multiplier=reward_multiplier) def __and__(self, other): - return PAND(self, other) + return AND(self, other) def __or__(self, other): - return POR(self, other) + return OR(self, other) def __invert__(self): - return PNOT(self) - def __rshift__(self, other): - return IMPLY(self, other) + return NOT(self) def __add__(self, other): - return PADD(self, other) + return ADD(self, other) def __radd__(self, other): - return PADD(self, other) + return ADD(self, other) def __sub__(self, other): - return PSUB(self, other) + return SUB(self, other) def __rsub__(self, other): - return PSUB(self, other) + return SUB(self, other) def __mul__(self, other): - return PMUL(self, other) + return MUL(self, other) def __rmul__(self, other): - return PMUL(self, other) + return MUL(self, other) # _make_name helper functions def arg_to_string(arg): @@ -250,57 +248,47 @@ def sample(self, config: Config, cls: type[PredicateOperator], **kwargs): else p(None) for p in self._predicates] return cls(*predicates, subject=subject) -class POR(PredicateOperator, Predicate): +class OR(PredicateOperator, Predicate): def __init__(self, *predicates: Predicate, subject: Group=None): super().__init__(lambda n: n>0, *predicates, subject=subject) def _evaluate(self, gs: GameState) -> float: return any(p(gs) == 1 for p in self._predicates)*1.0 def sample(self, config: Config, **kwargs): - return super().sample(config, POR, **kwargs) + return super().sample(config, OR, **kwargs) -class PAND(PredicateOperator, Predicate): +class AND(PredicateOperator, Predicate): def __init__(self, *predicates: Predicate, subject: Group=None): super().__init__(lambda n: n>0, *predicates, subject=subject) def _evaluate(self, gs: GameState) -> float: return all(p(gs) == 1 for p in self._predicates)*1.0 def sample(self, config: Config, **kwargs): - return super().sample(config, PAND, **kwargs) + return super().sample(config, AND, **kwargs) -class PNOT(PredicateOperator, Predicate): +class NOT(PredicateOperator, Predicate): def __init__(self, predicate: Predicate, subject: Group=None): super().__init__(lambda n: n==1, predicate, subject=subject) def _evaluate(self, gs: GameState) -> float: return 1.0 - self._predicates[0](gs) def sample(self, config: Config, **kwargs): - return super().sample(config, PNOT, **kwargs) + return super().sample(config, NOT, **kwargs) -class IMPLY(PredicateOperator, Predicate): - def __init__(self, p: Predicate, q: Predicate, subject: Group=None): - super().__init__(lambda n: n==2, p,q, subject=subject) - def _evaluate(self, gs: GameState) -> float: - if self._predicates[0](gs) == 1: - return self._predicates[1](gs)*1.0 - return True - def sample(self, config: Config, **kwargs): - return super().sample(config, IMPLY, **kwargs) - -class PADD(PredicateOperator, Predicate): +class ADD(PredicateOperator, Predicate): def __init__(self, *predicate: Union[Predicate, Real], subject: Group=None): super().__init__(lambda n: n>0, *predicate, subject=subject) def _evaluate(self, gs: GameState) -> float: return max(min(sum(p(gs) for p in self._predicates),1.0),0.0) def sample(self, config: Config, **kwargs): - return super().sample(config, PADD, **kwargs) + return super().sample(config, ADD, **kwargs) -class PSUB(PredicateOperator, Predicate): +class SUB(PredicateOperator, Predicate): def __init__(self, p: Predicate, q: Union[Predicate, Real], subject: Group=None): super().__init__(lambda n: n==2, p,q, subject=subject) def _evaluate(self, gs: GameState) -> float: return max(min(self._predicates[0](gs)-self._predicates[1](gs),1.0),0.0) def sample(self, config: Config, **kwargs): - return super().sample(config, PSUB, **kwargs) + return super().sample(config, SUB, **kwargs) -class PMUL(PredicateOperator, Predicate): +class MUL(PredicateOperator, Predicate): def __init__(self, *predicate: Union[Predicate, Real], subject: Group=None): super().__init__(lambda n: n>0, *predicate, subject=subject) def _evaluate(self, gs: GameState) -> float: @@ -309,4 +297,4 @@ def _evaluate(self, gs: GameState) -> float: result = result * p(gs) return max(min(result,1.0),0.0) def sample(self, config: Config, **kwargs): - return super().sample(config, PMUL, **kwargs) + return super().sample(config, MUL, **kwargs) diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index 6f57d580..b45a68da 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -69,7 +69,9 @@ def compute_rewards(self, gs) -> Tuple[Dict[int, float], Dict[int, Dict]]: """ reward = self._map_progress_to_reward(gs) * self._reward_multiplier rewards = {int(ent_id): reward for ent_id in self._assignee} - infos = {int(ent_id): {'reward': reward, 'progress': self._progress} + infos = {int(ent_id): {'reward': reward, + 'progress': self._progress, + 'completed': self._completed} for ent_id in self._assignee} # NOTE: tasks do not know whether assignee agents are alive or dead diff --git a/tests/task/test_demo_task_creation.py b/tests/task/test_demo_task_creation.py index 148a1e13..ba03c5f9 100644 --- a/tests/task/test_demo_task_creation.py +++ b/tests/task/test_demo_task_creation.py @@ -10,6 +10,7 @@ from nmmo.task import base_predicates as bp from nmmo.task.game_state import GameState from nmmo.task.group import Group +from nmmo.task.task_api import make_team_tasks def rollout(env, tasks, steps=5): @@ -63,9 +64,9 @@ def EquipmentLevel(gs: GameState, @p.define_predicate def CombatSkill(gs, subject, lvl): # using predicate OR - return p.POR(bp.AttainSkill(subject, skill.Melee, lvl, 1), - bp.AttainSkill(subject, skill.Range, lvl, 1), - bp.AttainSkill(subject, skill.Mage, lvl, 1)) + return p.OR(bp.AttainSkill(subject, skill.Melee, lvl, 1), + bp.AttainSkill(subject, skill.Range, lvl, 1), + bp.AttainSkill(subject, skill.Mage, lvl, 1)) combat = [ # (predicate, reward_multiplier) (CombatSkill, {'lvl': 2}, Tier.EASY), @@ -74,11 +75,11 @@ def CombatSkill(gs, subject, lvl): @p.define_predicate def ForageSkill(gs, subject, lvl): - return p.POR(bp.AttainSkill(subject, skill.Fishing, lvl, 1), - bp.AttainSkill(subject, skill.Herbalism, lvl, 1), - bp.AttainSkill(subject, skill.Prospecting, lvl, 1), - bp.AttainSkill(subject, skill.Carving, lvl, 1), - bp.AttainSkill(subject, skill.Alchemy, lvl, 1)) + return p.OR(bp.AttainSkill(subject, skill.Fishing, lvl, 1), + bp.AttainSkill(subject, skill.Herbalism, lvl, 1), + bp.AttainSkill(subject, skill.Prospecting, lvl, 1), + bp.AttainSkill(subject, skill.Carving, lvl, 1), + bp.AttainSkill(subject, skill.Alchemy, lvl, 1)) foraging = [ # (predicate, reward_multiplier) (ForageSkill, {'lvl': 2}, Tier.EASY), @@ -241,7 +242,8 @@ def custom_predicate_func(gs, subject, test): config = ScriptedAgentTestConfig() env = Env(config) - env.reset(task_spec=task_spec, teams=teams) + env.reset(make_task_fn=make_team_tasks, + make_task_fn_kwargs={'task_spec':task_spec, 'teams':teams}) self.assertEqual(len(env.tasks), 7) # 7 tasks were created self.assertEqual(env.tasks[0].name, # team 0 task assigned to agents 1,2,3 diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 8db4f086..58cb3a8a 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -52,28 +52,17 @@ def test_predicate_operators(self): pred3 = SUCCESS & ~ FAILURE & SUCCESS self.assertTrue(pred3(mock_gs)) - pred4 = SUCCESS >> SUCCESS - self.assertTrue(pred4(mock_gs)) - - pred5 = SUCCESS >> ~ SUCCESS - self.assertFalse(pred5(mock_gs)) - - pred6 = (FAILURE >> FAILURE) & SUCCESS - self.assertTrue(pred6(mock_gs)) - self.assertEqual(pred6.name, - "(PAND_(IMPLY_(Failure_(0,))_(Failure_(0,)))_(Success_(0,)))") - # predicate math - pred7 = 0.1 * SUCCESS + 0.3 - self.assertEqual(pred7(mock_gs), 0.4) - self.assertEqual(pred7.name, - "(PADD_(PMUL_(Success_(0,))_0.1)_0.3)") + pred4 = 0.1 * SUCCESS + 0.3 + self.assertEqual(pred4(mock_gs), 0.4) + self.assertEqual(pred4.name, + "(ADD_(MUL_(Success_(0,))_0.1)_0.3)") - pred8 = 0.3 * SUCCESS - 1 - self.assertEqual(pred8(mock_gs), 0.0) # cannot go below 0 + pred5 = 0.3 * SUCCESS - 1 + self.assertEqual(pred5(mock_gs), 0.0) # cannot go below 0 - pred9 = 0.3 * SUCCESS + 1 - self.assertEqual(pred9(mock_gs), 1.0) # cannot go over 1 + pred6 = 0.3 * SUCCESS + 1 + self.assertEqual(pred6(mock_gs), 1.0) # cannot go over 1 def test_team_assignment(self): team = Group([1, 2, 8, 9], "TeamFoo") @@ -90,10 +79,10 @@ def test_predicate_name(self): SUCCESS = Success(Group([0,2])) FAILURE = Failure(Group(0)) fake_pred = Fake(Group(2), 1, Item.Hat, Action.Melee) - combination = (SUCCESS & ~ (FAILURE | fake_pred)) | (FAILURE >> fake_pred) + combination = (SUCCESS & ~ (FAILURE | fake_pred)) | (FAILURE * fake_pred + .3) - .4 self.assertEqual(combination.name, - "(POR_(PAND_(Success_(0,2))_(PNOT_(POR_(Failure_(0,))_(Fake_(2,)_1_Hat_Melee))))_"+\ - "(IMPLY_(Failure_(0,))_(Fake_(2,)_1_Hat_Melee)))") + "(OR_(AND_(Success_(0,2))_(NOT_(OR_(Failure_(0,))_(Fake_(2,)_1_Hat_Melee))))_"+\ + "(SUB_(ADD_(MUL_(Failure_(0,))_(Fake_(2,)_1_Hat_Melee))_0.3)_0.4))") def test_constraint(self): # pylint: disable=not-callable,no-value-for-parameter @@ -115,7 +104,7 @@ def test_sample_predicate(self): # pylint: disable=no-value-for-parameter,expression-not-assigned predicate = CanSeeGroup() & TickGE() self.assertEqual(predicate.name, - "(PAND_(CanSeeGroup_subject:GroupConstraint_target:GroupConstraint)_"+\ + "(AND_(CanSeeGroup_subject:GroupConstraint_target:GroupConstraint)_"+\ "(TickGE_subject:GroupConstraint_num_tick:ScalarConstraint))") config = nmmo.config.Default() TickGE().sample(config) @@ -161,8 +150,10 @@ def test_nmmo_default_task(self): env = Env(config) for test_mode in [None, 'no_task', 'func_eval', 'dummy_eval_fn']: - dafault_tasks = nmmo_default_task(env.possible_agents, test_mode) - env.reset(new_tasks=dafault_tasks) + #dafault_tasks = nmmo_default_task(env.possible_agents, test_mode) + env.reset(make_task_fn=nmmo_default_task, + make_task_fn_kwargs={'agent_list': env.possible_agents, + 'test_mode': test_mode}) for _ in range(3): env.step({}) From 25bab852463e45d89ba73881eb19e0511493cf46 Mon Sep 17 00:00:00 2001 From: Nikhil Pinnaparaju Date: Sun, 4 Jun 2023 11:38:01 -0500 Subject: [PATCH 12/18] minor changes to predicate api and caching --- nmmo/task/predicate_api.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/nmmo/task/predicate_api.py b/nmmo/task/predicate_api.py index cbcd4853..985fddfb 100644 --- a/nmmo/task/predicate_api.py +++ b/nmmo/task/predicate_api.py @@ -26,10 +26,9 @@ def __init__(self, **kwargs): self.name = self._make_name(self.__class__.__name__, args, kwargs) - def is_group(x): - return isinstance(x, Group) - self._groups: List[Group] = list(filter(is_group, args)) - self._groups = self._groups + list(filter(is_group, kwargs.values())) + self._groups: List[Group] = [x for x in list(args) + list(kwargs.values()) + if isinstance(x, Group)] + self._groups.append(subject) self._args = args @@ -54,12 +53,12 @@ def __call__(self, gs: GameState) -> float: for group in self._groups: group.update(gs) # Calculate score - cache = gs.cache_result - if self.name in cache: - progress = cache[self.name] + # cache = gs.cache_result + if self.name in gs.cache_result: + progress = gs.cache_result[self.name] else: progress = max(min(self._evaluate(gs)*1.0,1.0),0.0) - cache[self.name] = progress + gs.cache_result[self.name] = progress return progress def _reset(self, config: Config): From 18b04defcb288963f78e824d4a9ed299a8859331 Mon Sep 17 00:00:00 2001 From: kywch Date: Mon, 5 Jun 2023 07:54:16 +0000 Subject: [PATCH 13/18] created manual curriculum, tweaked task api --- nmmo/lib/team_helper.py | 3 +- nmmo/task/base_predicates.py | 91 +++---- nmmo/task/constraint.py | 51 ++-- nmmo/task/predicate_api.py | 12 +- nmmo/task/task_api.py | 21 +- tests/task/test_demo_task_creation.py | 63 ++--- tests/task/test_manual_curriculum.py | 303 ++++++++++++++++++++++ tests/task/test_predicates.py | 359 ++++++++++++++++---------- tests/task/test_task_api.py | 136 ++++++++-- 9 files changed, 767 insertions(+), 272 deletions(-) create mode 100644 tests/task/test_manual_curriculum.py diff --git a/nmmo/lib/team_helper.py b/nmmo/lib/team_helper.py index 5474f7a5..e624dca5 100644 --- a/nmmo/lib/team_helper.py +++ b/nmmo/lib/team_helper.py @@ -1,6 +1,5 @@ from typing import Dict, List - class TeamHelper(): def __init__(self, teams: Dict[int, List[int]]): self.teams = teams @@ -33,4 +32,6 @@ def get_target_agent(self, team_id: int, target: str): return self.teams[(team_id-1) % self.num_teams] if target == 'right_team_leader': return self.teams[(team_id-1) % self.num_teams][0] + if target == 'my_team_leader': + return self.teams[team_id][0] return None diff --git a/nmmo/task/base_predicates.py b/nmmo/task/base_predicates.py index c84f084b..4f8dbaf1 100644 --- a/nmmo/task/base_predicates.py +++ b/nmmo/task/base_predicates.py @@ -1,9 +1,9 @@ #pylint: disable=invalid-name, unused-argument, no-value-for-parameter from __future__ import annotations +from typing import Iterable import numpy as np from numpy import count_nonzero as count -from nmmo.task.predicate_api import OR, define_predicate from nmmo.task.group import Group from nmmo.task.game_state import GameState from nmmo.task import constraint @@ -13,23 +13,23 @@ from nmmo.lib.material import Material from nmmo.lib import utils -@define_predicate +def norm(progress): + return max(min(progress, 1.0), 0.0) + def Success(gs: GameState, subject: Group): ''' Returns True. For debugging. ''' return True -@define_predicate def TickGE(gs: GameState, subject: Group = constraint.TEAM_GROUPS, num_tick: int = constraint.ScalarConstraint()): """True if the current tick is greater than or equal to the specified num_tick. Is progress counter. """ - return gs.current_tick / num_tick + return norm(gs.current_tick / num_tick) -@define_predicate def CanSeeTile(gs: GameState, subject: Group = constraint.TEAM_GROUPS, tile_type: type[Material]= constraint.MATERIAL_CONSTRAINT): @@ -37,21 +37,18 @@ def CanSeeTile(gs: GameState, """ return any(tile_type.index in t for t in subject.obs.tile.material_id) -@define_predicate def StayAlive(gs: GameState, subject: Group = constraint.TEAM_GROUPS): """True if all subjects are alive. """ return count(subject.health > 0) == len(subject) -@define_predicate def AllDead(gs: GameState, subject: Group = constraint.TEAM_GROUPS): """True if all subjects are dead. """ - return 1.0 - count(subject.health) / len(subject) + return norm(1.0 - count(subject.health) / len(subject)) -@define_predicate def OccupyTile(gs: GameState, subject: Group, row: int = constraint.COORDINATE_CONSTRAINT, @@ -60,7 +57,6 @@ def OccupyTile(gs: GameState, """ return np.any((subject.row == row) & (subject.col == col)) -@define_predicate def AllMembersWithinRange(gs: GameState, subject: Group = constraint.TEAM_GROUPS, dist: int = constraint.COORDINATE_CONSTRAINT): @@ -71,9 +67,8 @@ def AllMembersWithinRange(gs: GameState, subject.col.max()-subject.col.min()) if current_dist <= 0: return 1.0 - return dist / current_dist + return norm(dist / current_dist) -@define_predicate def CanSeeAgent(gs: GameState, subject: Group = constraint.TEAM_GROUPS, target: int = constraint.AGENT_NUMBER_CONSTRAINT): @@ -81,15 +76,13 @@ def CanSeeAgent(gs: GameState, """ return any(target in e.ids for e in subject.obs.entities) -@define_predicate def CanSeeGroup(gs: GameState, subject: Group = constraint.TEAM_GROUPS, - target: Group = constraint.TEAM_GROUPS): + target: Iterable[int] = constraint.AgentListConstraint): """ Returns True if subject can see any of target """ - return OR(*(CanSeeAgent(subject, agent) for agent in target.agents)) + return any(CanSeeAgent(gs, subject, agent) for agent in target) -@define_predicate def DistanceTraveled(gs: GameState, subject: Group = constraint.TEAM_GROUPS, dist: int = constraint.ScalarConstraint()): @@ -101,9 +94,8 @@ def DistanceTraveled(gs: GameState, r = subject.row c = subject.col dists = utils.linf(list(zip(r,c)),[gs.spawn_pos[id_] for id_ in subject.entity.id]) - return dists.sum() / dist + return norm(dists.sum() / dist) -@define_predicate def AttainSkill(gs: GameState, subject: Group = constraint.TEAM_GROUPS, skill: Skill = constraint.SKILL_CONSTRAINT, @@ -113,9 +105,8 @@ def AttainSkill(gs: GameState, is greather than or equal to num_agent """ skill_level = getattr(subject,skill.__name__.lower() + '_level') - return sum(skill_level >= level) / num_agent + return norm(sum(skill_level >= level) / num_agent) -@define_predicate def CountEvent(gs: GameState, subject: Group = constraint.TEAM_GROUPS, event: str = constraint.EVENTCODE_CONSTRAINT, @@ -123,9 +114,8 @@ def CountEvent(gs: GameState, """True if the number of events occured in subject corresponding to event >= N """ - return len(getattr(subject.event, event)) / N + return norm(len(getattr(subject.event, event)) / N) -@define_predicate def ScoreHit(gs: GameState, subject: Group = constraint.TEAM_GROUPS, combat_style: type[Skill] = constraint.COMBAT_SKILL_CONSTRAINT, @@ -134,33 +124,46 @@ def ScoreHit(gs: GameState, combat_style >= count """ hits = subject.event.SCORE_HIT.combat_style == combat_style.SKILL_ID - return count(hits) / N + return norm(count(hits) / N) + +def DefeatEntity(gs: GameState, + subject: Group = constraint.TEAM_GROUPS, + agent_type: str = constraint.AGENT_TYPE_CONSTRAINT, + level: int = constraint.PROGRESSION_CONSTRAINT, + num_agent: int = constraint.AGENT_NUMBER_CONSTRAINT): + """True if the number of agents (agent_type, >= level) defeated + is greater than or equal to num_agent + """ + # NOTE: there is no way to tell if an agent is a teammate or an enemy + # so agents can get rewarded for killing their own teammates + defeated_type = subject.event.PLAYER_KILL.target_ent > 0 if agent_type == 'player' \ + else subject.event.PLAYER_KILL.target_ent < 0 + defeated = defeated_type & (subject.event.PLAYER_KILL.level >= level) + if num_agent > 0: + return norm(count(defeated) / num_agent) + return 1.0 -@define_predicate def HoardGold(gs: GameState, subject: Group = constraint.TEAM_GROUPS, amount: int = constraint.GOLD_CONSTRAINT): """True iff the summed gold of all teammate is greater than or equal to amount. """ - return subject.gold.sum() / amount + return norm(subject.gold.sum() / amount) -@define_predicate def EarnGold(gs: GameState, subject: Group = constraint.TEAM_GROUPS, amount: int = constraint.GOLD_CONSTRAINT): """ True if the total amount of gold earned is greater than or equal to amount. """ - return subject.event.EARN_GOLD.gold.sum() / amount + return norm(subject.event.EARN_GOLD.gold.sum() / amount) -@define_predicate def SpendGold(gs: GameState, subject: Group = constraint.TEAM_GROUPS, amount: int = constraint.GOLD_CONSTRAINT): """ True if the total amount of gold spent is greater than or equal to amount. """ - return subject.event.BUY_ITEM.gold.sum() / amount + return norm(subject.event.BUY_ITEM.gold.sum() / amount) -@define_predicate def MakeProfit(gs: GameState, subject: Group = constraint.TEAM_GROUPS, amount: int = constraint.GOLD_CONSTRAINT): @@ -168,9 +171,8 @@ def MakeProfit(gs: GameState, """ profits = subject.event.EARN_GOLD.gold.sum() costs = subject.event.BUY_ITEM.gold.sum() - return (profits-costs) / amount + return norm((profits-costs) / amount) -@define_predicate def InventorySpaceGE(gs: GameState, subject: Group = constraint.TEAM_GROUPS, space: int = constraint.INVENTORY_CONSTRAINT): @@ -180,7 +182,6 @@ def InventorySpaceGE(gs: GameState, max_space = gs.config.ITEM_INVENTORY_CAPACITY return all(max_space - inv.len >= space for inv in subject.obs.inventory) -@define_predicate def OwnItem(gs: GameState, subject: Group = constraint.TEAM_GROUPS, item: type[Item] = constraint.ITEM_CONSTRAINT, @@ -191,12 +192,11 @@ def OwnItem(gs: GameState, """ owned = (subject.item.type_id == item.ITEM_TYPE_ID) & \ (subject.item.level >= level) - return sum(subject.item.quantity[owned]) / quantity + return norm(sum(subject.item.quantity[owned]) / quantity) -@define_predicate def EquipItem(gs: GameState, subject: Group = constraint.TEAM_GROUPS, - item: type[Item] = constraint.ITEM_CONSTRAINT, + item: type[Item] = constraint.EQUIPABLE_CONSTRAINT, level: int = constraint.PROGRESSION_CONSTRAINT, num_agent: int = constraint.AGENT_NUMBER_CONSTRAINT): """True if the number of agents that equip the item (_item_type, >=_level) @@ -206,10 +206,9 @@ def EquipItem(gs: GameState, (subject.item.level >= level) & \ (subject.item.equipped > 0) if num_agent > 0: - return count(equipped) / num_agent + return norm(count(equipped) / num_agent) return 1.0 -@define_predicate def FullyArmed(gs: GameState, subject: Group = constraint.TEAM_GROUPS, combat_style: type[Skill] = constraint.COMBAT_SKILL_CONSTRAINT, @@ -234,10 +233,9 @@ def FullyArmed(gs: GameState, _, equipment_numbers = np.unique(subject.item.owner_id[lvl_flt & type_flt], return_counts=True) if num_agent > 0: - return (equipment_numbers >= len(item_ids.items())).sum() / num_agent + return norm((equipment_numbers >= len(item_ids.items())).sum() / num_agent) return 1.0 -@define_predicate def ConsumeItem(gs: GameState, subject: Group = constraint.TEAM_GROUPS, item: type[Item] = constraint.CONSUMABLE_CONSTRAINT, @@ -247,21 +245,19 @@ def ConsumeItem(gs: GameState, """ type_flt = subject.event.CONSUME_ITEM.type == item.ITEM_TYPE_ID lvl_flt = subject.event.CONSUME_ITEM.level >= level - return subject.event.CONSUME_ITEM.number[type_flt & lvl_flt].sum() / quantity + return norm(subject.event.CONSUME_ITEM.number[type_flt & lvl_flt].sum() / quantity) -@define_predicate def HarvestItem(gs: GameState, subject: Group = constraint.TEAM_GROUPS, - item: type[Item] = constraint.ITEM_CONSTRAINT, + item: type[Item] = constraint.HARVEST_CONSTRAINT, level: int = constraint.PROGRESSION_CONSTRAINT, quantity: int = constraint.EVENT_NUMBER_CONSTRAINT): """True if total quantity harvested of item type above level is >= quantity """ type_flt = subject.event.HARVEST_ITEM.type == item.ITEM_TYPE_ID lvl_flt = subject.event.HARVEST_ITEM.level >= level - return subject.event.HARVEST_ITEM.number[type_flt & lvl_flt].sum() / quantity + return norm(subject.event.HARVEST_ITEM.number[type_flt & lvl_flt].sum() / quantity) -@define_predicate def ListItem(gs: GameState, subject: Group = constraint.TEAM_GROUPS, item: type[Item] = constraint.ITEM_CONSTRAINT, @@ -271,9 +267,8 @@ def ListItem(gs: GameState, """ type_flt = subject.event.LIST_ITEM.type == item.ITEM_TYPE_ID lvl_flt = subject.event.LIST_ITEM.level >= level - return subject.event.LIST_ITEM.number[type_flt & lvl_flt].sum() / quantity + return norm(subject.event.LIST_ITEM.number[type_flt & lvl_flt].sum() / quantity) -@define_predicate def BuyItem(gs: GameState, subject: Group = constraint.TEAM_GROUPS, item: type[Item] = constraint.ITEM_CONSTRAINT, @@ -283,4 +278,4 @@ def BuyItem(gs: GameState, """ type_flt = subject.event.BUY_ITEM.type == item.ITEM_TYPE_ID lvl_flt = subject.event.BUY_ITEM.level >= level - return subject.event.BUY_ITEM.number[type_flt & lvl_flt].sum() / quantity + return norm(subject.event.BUY_ITEM.number[type_flt & lvl_flt].sum() / quantity) diff --git a/nmmo/task/constraint.py b/nmmo/task/constraint.py index c62989d5..d19686bc 100644 --- a/nmmo/task/constraint.py +++ b/nmmo/task/constraint.py @@ -7,6 +7,7 @@ from nmmo.systems import skill, item from nmmo.lib import material +from nmmo.lib.log import EventCode from nmmo.core.config import Config class InvalidConstraint(Exception): @@ -79,6 +80,18 @@ def sample_from_teams(self, teams: Dict[int, Dict]): team_id = random.choice(list(teams.keys())) return Group(teams[team_id], str(team_id)) +class AgentListConstraint(Constraint): + """ Ensures that all agents of the list exist in a config + """ + def check(self, config, value): + for agent in value: + if agent > config.PLAYER_N: + return False + return True + + def sample(self, config): + return None + class ScalarConstraint(Constraint): def __init__(self, low: Union[Callable, Number] = 0, @@ -108,7 +121,7 @@ def sample(self, config): class DiscreteConstraint(Constraint): def __init__(self, space, systems=None): super().__init__(systems) - self._space = space + self._space = set(space) def check(self, config: Config, value): if not super().check(config,value): @@ -116,36 +129,34 @@ def check(self, config: Config, value): return value in self._space def sample(self, config: Config): + # NOTE: this does NOT need to be deterministic return random.choice(self._space) # Group Constraints TEAM_GROUPS = GroupConstraint() INDIVIDUAL_GROUPS=GroupConstraint() +AGENT_LIST_CONSTRAINT = AgentListConstraint() -# System Constraints +# Tile Constraints MATERIAL_CONSTRAINT = DiscreteConstraint(space=list(material.All.materials), systems=['TERRAIN_SYSTEM_ENABLED', 'RESOURCE_SYSTEM_ENABLED']) HABITABLE_CONSTRAINT = DiscreteConstraint(space=list(material.Habitable.materials), systems=['TERRAIN_SYSTEM_ENABLED']) + +# Event Constraints +event_names = [k for k, v in EventCode.__dict__.items() if isinstance(v,int)] +EVENTCODE_CONSTRAINT = DiscreteConstraint(space=event_names) + +# Skill Constraints combat_skills = [skill.Melee, skill.Mage, skill.Range] harvest_skills = [skill.Fishing, skill.Herbalism, skill.Prospecting, skill.Alchemy, skill.Carving] SKILL_CONSTRAINT = DiscreteConstraint(space=combat_skills+harvest_skills, systems=['PROFESSION_SYSTEM_ENABLED']) COMBAT_SKILL_CONSTRAINT = DiscreteConstraint(space=combat_skills, systems=['PROFESSION_SYSTEM_ENABLED']) -EVENTCODE_CONSTRAINT = DiscreteConstraint(space=['EAT_FOOD', - 'DRINK_WATER', - 'SCORE_HIT', - 'PLAYER_KILL', - 'CONSUME_ITEM', - 'GIVE_ITEM', - 'DESTROY_ITEM', - 'HARVEST_ITEM', - 'GIVE_GOLD', - 'LIST_ITEM', - 'EARN_GOLD', - 'BUY_ITEM']) + +# Item Constraints armour = [item.Hat, item.Top, item.Bottom] weapons = [item.Spear, item.Bow, item.Wand] tools = [item.Axe, item.Gloves, item.Rod, item.Pickaxe, item.Chisel] @@ -153,12 +164,20 @@ def sample(self, config: Config): consumables = [item.Potion, item.Ration] ITEM_CONSTRAINT = DiscreteConstraint(space=armour+weapons+tools+ammunition+consumables, systems=['ITEM_SYSTEM_ENABLED']) +EQUIPABLE_CONSTRAINT = DiscreteConstraint(space=armour+weapons+tools+ammunition, + systems=['ITEM_SYSTEM_ENABLED']) CONSUMABLE_CONSTRAINT = DiscreteConstraint(space=consumables, systems=['ITEM_SYSTEM_ENABLED']) +HARVEST_CONSTRAINT = DiscreteConstraint(space=weapons+ammunition+consumables, + systems=['ITEM_SYSTEM_ENABLED']) + # Config Constraints COORDINATE_CONSTRAINT = ScalarConstraint(high = lambda c: c.MAP_CENTER) PROGRESSION_CONSTRAINT = ScalarConstraint(high = lambda c: c.PROGRESSION_LEVEL_MAX+1) INVENTORY_CONSTRAINT = ScalarConstraint(high=lambda c: c.ITEM_INVENTORY_CAPACITY+1) AGENT_NUMBER_CONSTRAINT = ScalarConstraint(low = 1, high = lambda c: c.PLAYER_N+1) -EVENT_NUMBER_CONSTRAINT = ScalarConstraint(low = 1, high = 50) # arbitrary -GOLD_CONSTRAINT = ScalarConstraint(low = 1, high = 100) # arbitrary + +# Arbitrary Constraints +EVENT_NUMBER_CONSTRAINT = ScalarConstraint(low = 1, high = 110) +GOLD_CONSTRAINT = ScalarConstraint(low = 1, high = 1000) +AGENT_TYPE_CONSTRAINT = DiscreteConstraint(space=['npc','player']) diff --git a/nmmo/task/predicate_api.py b/nmmo/task/predicate_api.py index 80bf1191..e71f2cc1 100644 --- a/nmmo/task/predicate_api.py +++ b/nmmo/task/predicate_api.py @@ -171,10 +171,8 @@ def arg_to_string(arg): ################################################ -def define_predicate(fn: Callable) -> type[Predicate]: - """ Syntactic sugar API for defining predicates - - See examples at base_predicates.py +def make_predicate(fn: Callable) -> type[Predicate]: + """ Syntactic sugar API for defining predicates from function """ signature = inspect.signature(fn) for i, param in enumerate(signature.parameters.values()): @@ -251,7 +249,8 @@ class OR(PredicateOperator, Predicate): def __init__(self, *predicates: Predicate, subject: Group=None): super().__init__(lambda n: n>0, *predicates, subject=subject) def _evaluate(self, gs: GameState) -> float: - return any(p(gs) == 1 for p in self._predicates)*1.0 + # using max as OR for the [0,1] float + return max(p(gs) for p in self._predicates) def sample(self, config: Config, **kwargs): return super().sample(config, OR, **kwargs) @@ -259,7 +258,8 @@ class AND(PredicateOperator, Predicate): def __init__(self, *predicates: Predicate, subject: Group=None): super().__init__(lambda n: n>0, *predicates, subject=subject) def _evaluate(self, gs: GameState) -> float: - return all(p(gs) == 1 for p in self._predicates)*1.0 + # using min as AND for the [0,1] float + return min(p(gs) for p in self._predicates) def sample(self, config: Config, **kwargs): return super().sample(config, AND, **kwargs) diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index b45a68da..902d880b 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -4,7 +4,7 @@ from abc import ABC from nmmo.task.group import Group -from nmmo.task.predicate_api import Predicate, define_predicate, arg_to_string +from nmmo.task.predicate_api import Predicate, make_predicate, arg_to_string from nmmo.task import base_predicates as bp from nmmo.lib.team_helper import TeamHelper @@ -110,7 +110,7 @@ def make_same_task(predicate: Union[Predicate, Callable], for agent_id in agent_list] # eval_fn is a function to turn into predicate - pred_cls = define_predicate(predicate) + pred_cls = make_predicate(predicate) return [pred_cls(Group(agent_id),**kwargs).create_task(task_cls=task_cls) for agent_id in agent_list] @@ -136,7 +136,9 @@ def stay_alive_eval(gs, subject): # TODO: a lot to improve below REWARD_TO = ['agent', 'team'] -VALID_TARGET = ['left_team', 'right_team', 'left_team_leader', 'right_team_leader'] +VALID_TARGET = ['left_team', 'left_team_leader', + 'right_team', 'right_team_leader', + 'my_team_leader'] def make_team_tasks(teams, task_spec) -> List[Task]: """ @@ -149,7 +151,7 @@ def make_team_tasks(teams, task_spec) -> List[Task]: team_helper = TeamHelper(teams) for idx in range(min(len(team_list), len(task_spec))): team_id = team_list[idx] - reward_to, pred_cls, kwargs = task_spec[team_id] + reward_to, pred_fn, kwargs = task_spec[team_id] assert reward_to in REWARD_TO, 'Wrong reward target' @@ -168,10 +170,15 @@ def make_team_tasks(teams, task_spec) -> List[Task]: # handle some special cases and instantiate the predicate first predicate = None - if not isinstance(pred_cls, type): + if isinstance(pred_fn, FunctionType): # if a function is provided as a predicate - pred_cls = define_predicate(pred_cls) - if pred_cls in [bp.AllDead]: + pred_cls = make_predicate(pred_fn) + + # TODO: should create a test for these + if pred_fn in [bp.AllDead]: + kwargs.pop('target') # remove target + predicate = pred_cls(Group(target), **kwargs) + if pred_fn in [bp.StayAlive] and 'target' in kwargs: kwargs.pop('target') # remove target predicate = pred_cls(Group(target), **kwargs) diff --git a/tests/task/test_demo_task_creation.py b/tests/task/test_demo_task_creation.py index ba03c5f9..884c7c27 100644 --- a/tests/task/test_demo_task_creation.py +++ b/tests/task/test_demo_task_creation.py @@ -10,8 +10,6 @@ from nmmo.task import base_predicates as bp from nmmo.task.game_state import GameState from nmmo.task.group import Group -from nmmo.task.task_api import make_team_tasks - def rollout(env, tasks, steps=5): env.reset(new_tasks=tasks) @@ -48,7 +46,6 @@ class Tier: (bp.DistanceTraveled, {'dist': 64}, Tier.HARD)] # Demonstrates custom predicate - return float/boolean - @p.define_predicate def EquipmentLevel(gs: GameState, subject: Group, number: int): @@ -61,25 +58,23 @@ def EquipmentLevel(gs: GameState, (EquipmentLevel, {'number': 5}, Tier.NORMAL), (EquipmentLevel, {'number': 10}, Tier.HARD)] - @p.define_predicate def CombatSkill(gs, subject, lvl): - # using predicate OR - return p.OR(bp.AttainSkill(subject, skill.Melee, lvl, 1), - bp.AttainSkill(subject, skill.Range, lvl, 1), - bp.AttainSkill(subject, skill.Mage, lvl, 1)) + # OR on predicate functions: max over all progress + return max(bp.AttainSkill(gs, subject, skill.Melee, lvl, 1), + bp.AttainSkill(gs, subject, skill.Range, lvl, 1), + bp.AttainSkill(gs, subject, skill.Mage, lvl, 1)) combat = [ # (predicate, reward_multiplier) (CombatSkill, {'lvl': 2}, Tier.EASY), (CombatSkill, {'lvl': 3}, Tier.NORMAL), (CombatSkill, {'lvl': 4}, Tier.HARD)] - @p.define_predicate def ForageSkill(gs, subject, lvl): - return p.OR(bp.AttainSkill(subject, skill.Fishing, lvl, 1), - bp.AttainSkill(subject, skill.Herbalism, lvl, 1), - bp.AttainSkill(subject, skill.Prospecting, lvl, 1), - bp.AttainSkill(subject, skill.Carving, lvl, 1), - bp.AttainSkill(subject, skill.Alchemy, lvl, 1)) + return max(bp.AttainSkill(gs, subject, skill.Fishing, lvl, 1), + bp.AttainSkill(gs, subject, skill.Herbalism, lvl, 1), + bp.AttainSkill(gs, subject, skill.Prospecting, lvl, 1), + bp.AttainSkill(gs, subject, skill.Carving, lvl, 1), + bp.AttainSkill(gs, subject, skill.Alchemy, lvl, 1)) foraging = [ # (predicate, reward_multiplier) (ForageSkill, {'lvl': 2}, Tier.EASY), @@ -100,10 +95,11 @@ def ForageSkill(gs, subject, lvl): # Making player_kills and exploration team tasks, team_tasks = [] - for pred, kwargs, weight in player_kills + exploration: + for pred_fn, kwargs, weight in player_kills + exploration: + pred_cls = p.make_predicate(pred_fn) for team in teams.values(): team_tasks.append( - pred(Group(team), **kwargs).create_task(reward_multiplier=weight)) + pred_cls(Group(team), **kwargs).create_task(reward_multiplier=weight)) # Run the environment with these tasks # check rewards and infos for the task info @@ -112,10 +108,11 @@ def ForageSkill(gs, subject, lvl): # Creating and testing the same task for all agents # i.e, each agent gets evaluated and rewarded individually same_tasks = [] - for pred, kwargs, weight in exploration + equipment + combat + foraging: + for pred_fn, kwargs, weight in exploration + equipment + combat + foraging: + pred_cls = p.make_predicate(pred_fn) for agent_id in env.possible_agents: same_tasks.append( - pred(Group([agent_id]), **kwargs).create_task(reward_multiplier=weight)) + pred_cls(Group([agent_id]), **kwargs).create_task(reward_multiplier=weight)) # Run the environment with these tasks # check rewards and infos for the task info @@ -132,7 +129,6 @@ def test_player_kill_reward(self): # PARTICIPANT WRITES # ==================================== - @p.define_predicate def KillPredicate(gs: GameState, subject: Group): """The progress, the max of which is 1, should @@ -148,7 +144,9 @@ def KillPredicate(gs: GameState, progress += .3 return min(progress, 1.0) - kill_tasks = [KillPredicate(Group(agent_id)).create_task() + # participants don't need to know about Predicate classes + kill_pred_cls = p.make_predicate(KillPredicate) + kill_tasks = [kill_pred_cls(Group(agent_id)).create_task() for agent_id in env.possible_agents] # Test Reward @@ -182,14 +180,16 @@ def test_predicate_math(self): config = ScriptedAgentTestConfig() env = Env(config) - @p.define_predicate + # each predicate function returns float, so one can do math on them def PredicateMath(gs, subject): - progress = 0.8 * bp.CountEvent(subject, event='PLAYER_KILL', N=7) + \ - 1.1 * bp.TickGE(subject, num_tick=3) + progress = 0.8 * bp.CountEvent(gs, subject, event='PLAYER_KILL', N=7) + \ + 1.1 * bp.TickGE(gs, subject, num_tick=3) # NOTE: the resulting progress will be bounded from [0, 1] afterwards return progress - task_for_agent_1 = PredicateMath(Group(1)).create_task() + # participants don't need to know about Predicate classes + pred_math_cls = p.make_predicate(PredicateMath) + task_for_agent_1 = pred_math_cls(Group(1)).create_task() # Test Reward env.reset(new_tasks=[task_for_agent_1]) @@ -214,10 +214,6 @@ def test_make_team_tasks_inside_reset(self): # NOTE: len(teams) and len(task_spec) don't need to match teams = {0:[1,2,3], 1:[4,5], 2:[6,7], 3:[8,9], 4:[10,11]} - # custom function to turn into predicate inside reset - def custom_predicate_func(gs, subject, test): - return True - """ task_spec is a list of tuple (reward_to, predicate class, kwargs) each tuple in the task_spec will create tasks for a team in teams @@ -232,20 +228,19 @@ def custom_predicate_func(gs, subject, test): * 'target' must be ['left_team', 'right_team', 'left_team_leader', 'right_team_leader'] these str will be translated into the actual agent ids * 'task_cls' is optional. If not provided, the standard Task is used. """ - task_spec = [ # (reward_to, predicate class, kwargs) + task_spec = [ # (reward_to, predicate function, kwargs) ('team', bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 1}), # one task ('agent', bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 2}), ('agent', bp.AllDead, {'target': 'left_team'}), - ('team', bp.CanSeeAgent, {'target': 'right_team_leader', 'task_cls': t.OngoingTask}), - ('team', custom_predicate_func, {'test': 1})] + ('team', bp.CanSeeAgent, {'target': 'right_team_leader', 'task_cls': t.OngoingTask})] config = ScriptedAgentTestConfig() env = Env(config) - env.reset(make_task_fn=make_team_tasks, + env.reset(make_task_fn=t.make_team_tasks, make_task_fn_kwargs={'task_spec':task_spec, 'teams':teams}) - self.assertEqual(len(env.tasks), 7) # 7 tasks were created + self.assertEqual(len(env.tasks), 6) # 6 tasks were created self.assertEqual(env.tasks[0].name, # team 0 task assigned to agents 1,2,3 '(Task_eval_fn:(CountEvent_(1,2,3)_event:PLAYER_KILL_N:1)_assignee:(1,2,3))') self.assertEqual(env.tasks[1].name, # team 1, agent task assigned to agent 4 @@ -256,8 +251,6 @@ def custom_predicate_func(gs, subject, test): '(Task_eval_fn:(AllDead_(8,9))_assignee:(6,))') self.assertEqual(env.tasks[5].name, # team 3 task, right_team is team 2 (6,7), leader 6 '(OngoingTask_eval_fn:(CanSeeAgent_(8,9)_target:6)_assignee:(8,9))') - self.assertEqual(env.tasks[6].name, # team 4 task, based on a predicate function - '(Task_eval_fn:(custom_predicate_func_(10,11)_test:1)_assignee:(10,11))') for _ in range(2): env.step({}) diff --git a/tests/task/test_manual_curriculum.py b/tests/task/test_manual_curriculum.py new file mode 100644 index 00000000..ef6cd1b3 --- /dev/null +++ b/tests/task/test_manual_curriculum.py @@ -0,0 +1,303 @@ +'''Manual test for creating learning curriculum manually''' +# pylint: disable=invalid-name,redefined-outer-name,bad-builtin + +import nmmo +import nmmo.lib.material as Material +from nmmo.task import base_predicates as bp +from nmmo.task.task_api import OngoingTask, make_team_tasks +from nmmo.task import constraint as c + + +EVENT_NUMBER_GOAL = [1, 2, 3, 4, 5, 7, 9, 12, 15, 20, 30, 50] +INFREQUENT_GOAL = list(range(1, 10)) +STAY_ALIVE_GOAL = [50, 100, 150, 200, 300, 500] +TEAM_NUMBER_GOAL = [10, 20, 30, 50, 70, 100] +LEVEL_GOAL = list(range(1, 10)) # TODO: get config +AGENT_NUM_GOAL = [1, 2, 3, 4, 5] # competition team size: 8 +ITEM_NUM_GOAL = AGENT_NUM_GOAL +TEAM_ITEM_GOAL = [1, 3, 5, 7, 10, 15, 20] +SKILLS = c.combat_skills + c.harvest_skills +COMBAT_STYLE = c.combat_skills +ALL_ITEM = c.armour + c.weapons + c.tools + c.ammunition + c.consumables +EQUIP_ITEM = c.armour + c.weapons + c.tools + c.ammunition +HARVEST_ITEM = c.weapons + c.ammunition + c.consumables + +""" task_spec is a list of tuple (reward_to, predicate class, kwargs) + + each tuple in the task_spec will create tasks for a team in teams + + reward_to: must be in ['team', 'agent'] + * 'team' create a single team task, in which all team members get rewarded + * 'agent' create a task for each agent, in which only the agent gets rewarded + + predicate class from the base predicates or custom predicates like above + + kwargs are the additional args that go into predicate. There are also special keys + * 'target' must be ['left_team', 'right_team', 'left_team_leader', 'right_team_leader'] + these str will be translated into the actual agent ids + * 'task_cls' is optional. If not provided, the standard Task is used. """ +task_spec = [] + +# explore, eat, drink, attack any agent, harvest any item, level up any skill +# which can happen frequently +essential_skills = ['GO_FARTHEST', 'EAT_FOOD', 'DRINK_WATER', + 'SCORE_HIT', 'HARVEST_ITEM', 'LEVEL_UP'] +for event_code in essential_skills: + task_spec += [('agent', bp.CountEvent, {'event': event_code, 'N': cnt}) + for cnt in EVENT_NUMBER_GOAL] + +# item/market skills, which happen less frequently or should not do too much +item_skills = ['CONSUME_ITEM', 'GIVE_ITEM', 'DESTROY_ITEM', 'EQUIP_ITEM', + 'GIVE_GOLD', 'LIST_ITEM', 'EARN_GOLD', 'BUY_ITEM'] +for event_code in item_skills: + task_spec += [('agent', bp.CountEvent, {'event': event_code, 'N': cnt}) + for cnt in INFREQUENT_GOAL] # less than 10 + +# find resource tiles +for resource in Material.Harvestable: + for reward_to in ['agent', 'team']: + task_spec.append((reward_to, bp.CanSeeTile, {'tile_type': resource})) + +# stay alive ... like ... for 300 ticks +# i.e., getting incremental reward for each tick alive as an individual or a team +for reward_to in ['agent', 'team']: + for num_tick in STAY_ALIVE_GOAL: + task_spec.append((reward_to, bp.TickGE, {'num_tick': num_tick})) + +# protect the leader: get reward for each tick the leader is alive +task_spec.append(('team', bp.StayAlive, {'target': 'my_team_leader', 'task_cls': OngoingTask})) + +# want the other team or team leader to die +for target in ['left_team', 'left_team_leader', 'right_team', 'right_team_leader']: + task_spec.append(('team', bp.AllDead, {'target': target})) + +# occupy the center tile, assuming the Medium map size +# TODO: it'd be better to have some intermediate targets toward the center +for reward_to in ['agent', 'team']: + task_spec.append((reward_to, bp.OccupyTile, {'row': 80, 'col': 80})) # TODO: get config + +# form a tight formation, for a certain number of ticks +def PracticeFormation(gs, subject, dist, num_tick): + return bp.AllMembersWithinRange(gs, subject, dist) * bp.TickGE(gs, subject, num_tick) +for dist in [1, 3, 5, 10]: + task_spec += [('team', PracticeFormation, {'dist': dist, 'num_tick': num_tick}) + for num_tick in STAY_ALIVE_GOAL] + +# find the other team leader +for reward_to in ['agent', 'team']: + for target in ['left_team_leader', 'right_team_leader']: + task_spec.append((reward_to, bp.CanSeeAgent, {'target': target})) + +# find the other team (any agent) +for reward_to in ['agent']: #, 'team']: + for target in ['left_team', 'right_team']: + task_spec.append((reward_to, bp.CanSeeGroup, {'target': target})) + +# explore the map -- sum the l-inf distance traveled by all subjects +for dist in [10, 20, 30, 50, 100]: # each agent + task_spec.append(('agent', bp.DistanceTraveled, {'dist': dist})) +for dist in [30, 50, 70, 100, 150, 200, 300, 500]: # summed over all team members + task_spec.append(('team', bp.DistanceTraveled, {'dist': dist})) + +# level up a skill +for skill in SKILLS: + for level in LEVEL_GOAL: + # since this is an agent task, num_agent must be 1 + task_spec.append(('agent', bp.AttainSkill, {'skill': skill, 'level': level, 'num_agent': 1})) + +# make attain skill a team task by varying the number of agents +for skill in SKILLS: + for level in LEVEL_GOAL: + for num_agent in AGENT_NUM_GOAL: + if level + num_agent <= 6 or num_agent == 1: # heuristic prune + task_spec.append(('team', bp.AttainSkill, + {'skill': skill, 'level': level,'num_agent': num_agent})) + +# practice specific combat style +for style in COMBAT_STYLE: + for cnt in EVENT_NUMBER_GOAL: + task_spec.append(('agent', bp.ScoreHit, {'combat_style': style, 'N': cnt})) + for cnt in TEAM_NUMBER_GOAL: + task_spec.append(('team', bp.ScoreHit, {'combat_style': style, 'N': cnt})) + +# defeat agents of a certain level as a team +for agent_type in ['player', 'npc']: # c.AGENT_TYPE_CONSTRAINT + for level in LEVEL_GOAL: + for num_agent in AGENT_NUM_GOAL: + if level + num_agent <= 6 or num_agent == 1: # heuristic prune + task_spec.append(('team', bp.DefeatEntity, + {'agent_type': agent_type, 'level': level, 'num_agent': num_agent})) + +# hoarding gold -- evaluated on the current gold +for amount in EVENT_NUMBER_GOAL: + task_spec.append(('agent', bp.HoardGold, {'amount': amount})) +for amount in TEAM_NUMBER_GOAL: + task_spec.append(('team', bp.HoardGold, {'amount': amount})) + +# earning gold -- evaluated on the total gold earned by selling items +# does NOT include looted gold +for amount in EVENT_NUMBER_GOAL: + task_spec.append(('agent', bp.EarnGold, {'amount': amount})) +for amount in TEAM_NUMBER_GOAL: + task_spec.append(('team', bp.EarnGold, {'amount': amount})) + +# spending gold, by buying items +for amount in EVENT_NUMBER_GOAL: + task_spec.append(('agent', bp.SpendGold, {'amount': amount})) +for amount in TEAM_NUMBER_GOAL: + task_spec.append(('team', bp.SpendGold, {'amount': amount})) + +# making profits by trading -- only buying and selling are counted +for amount in EVENT_NUMBER_GOAL: + task_spec.append(('agent', bp.MakeProfit, {'amount': amount})) +for amount in TEAM_NUMBER_GOAL: + task_spec.append(('team', bp.MakeProfit, {'amount': amount})) + +# managing inventory space +def PracticeInventoryManagement(gs, subject, space, num_tick): + return bp.AllMembersWithinRange(gs, subject, space) * bp.TickGE(gs, subject, num_tick) +for space in [2, 4, 8]: + task_spec += [('agent', PracticeInventoryManagement, {'space': space, 'num_tick': num_tick}) + for num_tick in STAY_ALIVE_GOAL] + +# own item, evaluated on the current inventory +for item in ALL_ITEM: + for level in LEVEL_GOAL: + # agent task + for quantity in ITEM_NUM_GOAL: + if level + quantity <= 6 or quantity == 1: # heuristic prune + task_spec.append(('agent', bp.OwnItem, + {'item': item, 'level': level, 'quantity': quantity})) + + # team task + for quantity in TEAM_ITEM_GOAL: + if level + quantity <= 10 or quantity == 1: # heuristic prune + task_spec.append(('team', bp.OwnItem, + {'item': item, 'level': level, 'quantity': quantity})) + +# equip item, evaluated on the current inventory and equipment status +for item in EQUIP_ITEM: + for level in LEVEL_GOAL: + # agent task + task_spec.append(('agent', bp.EquipItem, + {'item': item, 'level': level, 'num_agent': 1})) + + # team task + for num_agent in AGENT_NUM_GOAL: + if level + num_agent <= 6 or num_agent == 1: # heuristic prune + task_spec.append(('team', bp.EquipItem, + {'item': item, 'level': level, 'num_agent': num_agent})) + +# consume items (ration, potion), evaluated based on the event log +for item in c.consumables: + for level in LEVEL_GOAL: + # agent task + for quantity in ITEM_NUM_GOAL: + if level + quantity <= 6 or quantity == 1: # heuristic prune + task_spec.append(('agent', bp.ConsumeItem, + {'item': item, 'level': level, 'quantity': quantity})) + + # team task + for quantity in TEAM_ITEM_GOAL: + if level + quantity <= 10 or quantity == 1: # heuristic prune + task_spec.append(('team', bp.ConsumeItem, + {'item': item, 'level': level, 'quantity': quantity})) + +# harvest items, evaluated based on the event log +for item in HARVEST_ITEM: + for level in LEVEL_GOAL: + # agent task + for quantity in ITEM_NUM_GOAL: + if level + quantity <= 6 or quantity == 1: # heuristic prune + task_spec.append(('agent', bp.HarvestItem, + {'item': item, 'level': level, 'quantity': quantity})) + + # team task + for quantity in TEAM_ITEM_GOAL: + if level + quantity <= 10 or quantity == 1: # heuristic prune + task_spec.append(('team', bp.HarvestItem, + {'item': item, 'level': level, 'quantity': quantity})) + +# list items, evaluated based on the event log +for item in ALL_ITEM: + for level in LEVEL_GOAL: + # agent task + for quantity in ITEM_NUM_GOAL: + if level + quantity <= 6 or quantity == 1: # heuristic prune + task_spec.append(('agent', bp.ListItem, + {'item': item, 'level': level, 'quantity': quantity})) + + # team task + for quantity in TEAM_ITEM_GOAL: + if level + quantity <= 10 or quantity == 1: # heuristic prune + task_spec.append(('team', bp.ListItem, + {'item': item, 'level': level, 'quantity': quantity})) + +# buy items, evaluated based on the event log +for item in ALL_ITEM: + for level in LEVEL_GOAL: + # agent task + for quantity in ITEM_NUM_GOAL: + if level + quantity <= 6 or quantity == 1: # heuristic prune + task_spec.append(('agent', bp.BuyItem, + {'item': item, 'level': level, 'quantity': quantity})) + + # team task + for quantity in TEAM_ITEM_GOAL: + if level + quantity <= 10 or quantity == 1: # heuristic prune + task_spec.append(('team', bp.BuyItem, + {'item': item, 'level': level, 'quantity': quantity})) + +# fully armed, evaluated based on the current player/inventory status +for style in COMBAT_STYLE: + for level in LEVEL_GOAL: + for num_agent in AGENT_NUM_GOAL: + if level + num_agent <= 6 or num_agent == 1: # heuristic prune + task_spec.append(('team', bp.FullyArmed, + {'combat_style': style, 'level': level, 'num_agent': num_agent})) + + +if __name__ == '__main__': + # pylint: disable=bare-except + import psutil + from contextlib import contextmanager + import multiprocessing as mp + import numpy as np + import pickle + + @contextmanager + def create_pool(num_proc): + pool = mp.Pool(processes=num_proc) + yield pool + pool.close() + pool.join() + + def check_task_spec(spec_list): + teams = {0:[1,2,3], 1:[4,5], 2:[6,7], 3:[8,9], 4:[10,11]} + config = nmmo.config.Default() + env = nmmo.Env(config) + for idx, single_spec in enumerate(spec_list): + sample_task = make_team_tasks(teams, [single_spec]) + try: + env.reset(new_tasks=sample_task) + for _ in range(3): + env.step({}) + except: + print('invalid task spec:', single_spec) + + if idx > 0 and idx % 50 == 0: + print(idx, 'task specs checked.') + + # 3590 task specs: divide the specs into chunks + num_cores = psutil.cpu_count(logical=False) + spec_chunks = np.array_split(task_spec, num_cores) + with create_pool(num_cores) as pool: + pool.map(check_task_spec, spec_chunks) + + # print(sample_task[0].name) + # if len(sample_task) > 1: + # print(sample_task[-1].name) + + # test if the task spec is pickalable + with open('manual_curriculum.pkl', 'wb') as f: + pickle.dump(task_spec, f) diff --git a/tests/task/test_predicates.py b/tests/task/test_predicates.py index 0cd59882..555d1e71 100644 --- a/tests/task/test_predicates.py +++ b/tests/task/test_predicates.py @@ -15,7 +15,7 @@ # pylint: disable=import-error from nmmo.core.env import Env -from nmmo.task.predicate_api import Predicate +from nmmo.task.predicate_api import Predicate, make_predicate from nmmo.task.task_api import OngoingTask from nmmo.task.group import Group import nmmo.task.base_predicates as bp @@ -56,8 +56,8 @@ def _get_taskenv(self, return env - def _check_result(self, env, test_tasks, infos, true_task): - for tid, (predicate, assignee) in enumerate(test_tasks): + def _check_result(self, env, test_preds, infos, true_task): + for tid, (predicate, assignee) in enumerate(test_preds): # result is cached when at least one assignee is alive so that the task is evaled if len(set(assignee) & set(infos)) > 0: self.assertEqual(int(env.game_state.cache_result[predicate.name]), @@ -80,18 +80,23 @@ def _check_progress(self, task, infos, value): self.assertAlmostEqual(infos[ent_id]['task'][task.name]['progress'],value) def test_tickge_stay_alive_rip(self): + tickge_pred_cls = make_predicate(bp.TickGE) + stay_alive_pred_cls = make_predicate(bp.StayAlive) + all_dead_pred_cls = make_predicate(bp.AllDead) + + tick_true = 5 death_note = [1, 2, 3] - test_tasks = [ # (instantiated predicate, task assignee) - (bp.TickGE(Group([1]), tick_true), ALL_AGENT), - (bp.StayAlive(Group([1,3])), ALL_AGENT), - (bp.StayAlive(Group([3,4])), [1,2]), - (bp.StayAlive(Group([4])), [5,6]), - (bp.AllDead(Group([1,3])), ALL_AGENT), - (bp.AllDead(Group([3,4])), [1,2]), - (bp.AllDead(Group([4])), [5,6])] + test_preds = [ # (instantiated predicate, task assignee) + (tickge_pred_cls(Group([1]), tick_true), ALL_AGENT), + (stay_alive_pred_cls(Group([1,3])), ALL_AGENT), + (stay_alive_pred_cls(Group([3,4])), [1,2]), + (stay_alive_pred_cls(Group([4])), [5,6]), + (all_dead_pred_cls(Group([1,3])), ALL_AGENT), + (all_dead_pred_cls(Group([3,4])), [1,2]), + (all_dead_pred_cls(Group([4])), [5,6])] - env = self._get_taskenv(test_tasks) + env = self._get_taskenv(test_preds) for _ in range(tick_true-1): _, _, _, infos = env.step({}) @@ -101,7 +106,7 @@ def test_tickge_stay_alive_rip(self): # and all AllDead tasks (ti in [4, 5, 6]) are false true_task = [1, 2, 3] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) self._check_progress(env.tasks[0], infos, (tick_true-1) / tick_true) # kill agents 1-3 @@ -129,7 +134,7 @@ def test_tickge_stay_alive_rip(self): # StayAlive(1,3) and StayAlive(3,4) are false, StayAlive(4) is true # AllDead(1,3) is true, AllDead(3,4) and AllDead(4) are false true_task = [0, 3, 4] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # 3 is dead but 4 is alive. Half of agents killed, 50% completion. self._check_progress(env.tasks[5], infos, 0.5) @@ -137,17 +142,19 @@ def test_tickge_stay_alive_rip(self): # DONE def test_can_see_tile(self): + canseetile_pred_cls = make_predicate(bp.CanSeeTile) + a1_target = Material.Foilage a2_target = Material.Water - test_tasks = [ # (instantiated predicate, task assignee) - (bp.CanSeeTile(Group([1]), a1_target), ALL_AGENT), # True - (bp.CanSeeTile(Group([1,3,5]), a2_target), ALL_AGENT), # False - (bp.CanSeeTile(Group([2]), a2_target), [1,2,3]), # True - (bp.CanSeeTile(Group([2,5,6]), a1_target), ALL_AGENT), # False - (bp.CanSeeTile(Group(ALL_AGENT), a2_target), [2,3,4])] # True + test_preds = [ # (instantiated predicate, task assignee) + (canseetile_pred_cls(Group([1]), a1_target), ALL_AGENT), # True + (canseetile_pred_cls(Group([1,3,5]), a2_target), ALL_AGENT), # False + (canseetile_pred_cls(Group([2]), a2_target), [1,2,3]), # True + (canseetile_pred_cls(Group([2,5,6]), a1_target), ALL_AGENT), # False + (canseetile_pred_cls(Group(ALL_AGENT), a2_target), [2,3,4])] # True # setup env with all grass map - env = self._get_taskenv(test_tasks, grass_map=True) + env = self._get_taskenv(test_preds, grass_map=True) # Two corners to the target materials MS = env.config.MAP_SIZE @@ -168,7 +175,7 @@ def test_can_see_tile(self): # no target tiles are found, so all are false true_task = [] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # Team one to foilage, team two to water change_agent_pos(env.realm,1,(0,MS-2)) # agent 1, team 0, foilage @@ -179,19 +186,22 @@ def test_can_see_tile(self): # t0, t2, t4 are true true_task = [0, 2, 4] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_can_see_agent(self): + cansee_agent_pred_cls = make_predicate(bp.CanSeeAgent) + cansee_group_pred_cls = make_predicate(bp.CanSeeGroup) + search_target = 1 - test_tasks = [ # (Predicate, Team), the reward is 1 by default - (bp.CanSeeAgent(Group([1]), search_target), ALL_AGENT), # Always True - (bp.CanSeeAgent(Group([2]), search_target), [2,3,4]), # False -> True -> True - (bp.CanSeeAgent(Group([3,4,5]), search_target), [1,2,3]), # False -> False -> True - (bp.CanSeeGroup(Group([1]), Group([3,4])), ALL_AGENT)] # False -> False -> True + test_preds = [ # (Predicate, Team), the reward is 1 by default + (cansee_agent_pred_cls(Group([1]), search_target), ALL_AGENT), # Always True + (cansee_agent_pred_cls(Group([2]), search_target), [2,3,4]), # False -> True -> True + (cansee_agent_pred_cls(Group([3,4,5]), search_target), [1,2,3]), # False -> False -> True + (cansee_group_pred_cls(Group([1]), [3,4]), ALL_AGENT)] # False -> False -> True - env = self._get_taskenv(test_tasks, grass_map=True) + env = self._get_taskenv(test_preds, grass_map=True) # All agents to one corner for ent_id in env.realm.players: @@ -206,7 +216,7 @@ def test_can_see_agent(self): # Only CanSeeAgent(Group([1]), search_target) is true, others are false true_task = [0] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # Teleport agent 2 to agent 1's pos change_agent_pos(env.realm,2,(MS-2,MS-2)) @@ -216,7 +226,7 @@ def test_can_see_agent(self): # SearchAgent(Team([2]), search_target) is also true true_task = [0,1] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # Teleport agent 3 to agent 1s position change_agent_pos(env.realm,3,(MS-2,MS-2)) @@ -224,20 +234,22 @@ def test_can_see_agent(self): _, _, _, infos = env.step({}) true_task = [0,1,2,3] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_occupy_tile(self): + occupy_tile_pred_cls = make_predicate(bp.OccupyTile) + target_tile = (30, 30) - test_tasks = [ # (Predicate, Team), the reward is 1 by default - (bp.OccupyTile(Group([1]), *target_tile), ALL_AGENT), # False -> True - (bp.OccupyTile(Group([1,2,3]), *target_tile), [4,5,6]), # False -> True - (bp.OccupyTile(Group([2]), *target_tile), [2,3,4]), # False - (bp.OccupyTile(Group([3,4,5]), *target_tile), [1,2,3])] # False + test_preds = [ # (Predicate, Team), the reward is 1 by default + (occupy_tile_pred_cls(Group([1]), *target_tile), ALL_AGENT), # False -> True + (occupy_tile_pred_cls(Group([1,2,3]), *target_tile), [4,5,6]), # False -> True + (occupy_tile_pred_cls(Group([2]), *target_tile), [2,3,4]), # False + (occupy_tile_pred_cls(Group([3,4,5]), *target_tile), [1,2,3])] # False # make all tiles habitable - env = self._get_taskenv(test_tasks, grass_map=True) + env = self._get_taskenv(test_preds, grass_map=True) # All agents to one corner for ent_id in env.realm.players: @@ -248,7 +260,7 @@ def test_occupy_tile(self): # all tasks must be false true_task = [] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # teleport agent 1 to the target tile, agent 2 to the adjacent tile change_agent_pos(env.realm,1,target_tile) @@ -260,29 +272,31 @@ def test_occupy_tile(self): # tid 0 and 1 should be true: OccupyTile(Group([1]), *target_tile) # & OccupyTile(Group([1,2,3]), *target_tile) true_task = [0, 1] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_distance_traveled(self): + distance_traveled_pred_cls = make_predicate(bp.DistanceTraveled) + agent_dist = 6 team_dist = 10 # NOTE: when evaluating predicates, to whom tasks are assigned are irrelevant - test_tasks = [ # (Predicate, Team), the reward is 1 by default - (bp.DistanceTraveled(Group([1]), agent_dist), ALL_AGENT), # False -> True - (bp.DistanceTraveled(Group([2, 5]), agent_dist), ALL_AGENT), # False - (bp.DistanceTraveled(Group([3, 4]), agent_dist), ALL_AGENT), # False - (bp.DistanceTraveled(Group([1, 2, 3]), team_dist), ALL_AGENT), # False -> True - (bp.DistanceTraveled(Group([6]), agent_dist), ALL_AGENT)] # False + test_preds = [ # (Predicate, Team), the reward is 1 by default + (distance_traveled_pred_cls(Group([1]), agent_dist), ALL_AGENT), # False -> True + (distance_traveled_pred_cls(Group([2, 5]), agent_dist), ALL_AGENT), # False + (distance_traveled_pred_cls(Group([3, 4]), agent_dist), ALL_AGENT), # False + (distance_traveled_pred_cls(Group([1, 2, 3]), team_dist), ALL_AGENT), # False -> True + (distance_traveled_pred_cls(Group([6]), agent_dist), ALL_AGENT)] # False # make all tiles habitable - env = self._get_taskenv(test_tasks, grass_map=True) + env = self._get_taskenv(test_preds, grass_map=True) _, _, _, infos = env.step({}) # one cannot accomplish these goals in the first tick, so all false true_task = [] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # all are sleeper, so they all stay in the spawn pos spawn_pos = { ent_id: ent.pos for ent_id, ent in env.realm.players.items() } @@ -297,24 +311,26 @@ def test_distance_traveled(self): _,_,_, infos = env.step({}) true_task = [0, 3] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_all_members_within_range(self): + within_range_pred_cls = make_predicate(bp.AllMembersWithinRange) + dist_123 = 1 dist_135 = 5 - test_tasks = [ # (Predicate, Team), the reward is 1 by default - (bp.AllMembersWithinRange(Group([1]), dist_123), ALL_AGENT), # Always true for group of 1 - (bp.AllMembersWithinRange(Group([1,2]), dist_123), ALL_AGENT), # True - (bp.AllMembersWithinRange(Group([1,3]), dist_123), ALL_AGENT), # True - (bp.AllMembersWithinRange(Group([2,3]), dist_123), ALL_AGENT), # False - (bp.AllMembersWithinRange(Group([1,3,5]), dist_123), ALL_AGENT), # False - (bp.AllMembersWithinRange(Group([1,3,5]), dist_135), ALL_AGENT), # True - (bp.AllMembersWithinRange(Group([2,4,6]), dist_135), ALL_AGENT)] # False + test_preds = [ # (Predicate, Team), the reward is 1 by default + (within_range_pred_cls(Group([1]), dist_123), ALL_AGENT), # Always true for group of 1 + (within_range_pred_cls(Group([1,2]), dist_123), ALL_AGENT), # True + (within_range_pred_cls(Group([1,3]), dist_123), ALL_AGENT), # True + (within_range_pred_cls(Group([2,3]), dist_123), ALL_AGENT), # False + (within_range_pred_cls(Group([1,3,5]), dist_123), ALL_AGENT), # False + (within_range_pred_cls(Group([1,3,5]), dist_135), ALL_AGENT), # True + (within_range_pred_cls(Group([2,4,6]), dist_135), ALL_AGENT)] # False # make all tiles habitable - env = self._get_taskenv(test_tasks, grass_map=True) + env = self._get_taskenv(test_preds, grass_map=True) MS = env.config.MAP_SIZE @@ -332,21 +348,23 @@ def test_all_members_within_range(self): _, _, _, infos = env.step({}) true_task = [0, 1, 2, 5] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_attain_skill(self): + attain_skill_pred_cls = make_predicate(bp.AttainSkill) + goal_level = 5 - test_tasks = [ # (Predicate, Team), the reward is 1 by default - (bp.AttainSkill(Group([1]), Skill.Melee, goal_level, 1), ALL_AGENT), # False - (bp.AttainSkill(Group([2]), Skill.Melee, goal_level, 1), ALL_AGENT), # False - (bp.AttainSkill(Group([1]), Skill.Range, goal_level, 1), ALL_AGENT), # True - (bp.AttainSkill(Group([1,3]), Skill.Fishing, goal_level, 1), ALL_AGENT), # True - (bp.AttainSkill(Group([1,2,3]), Skill.Carving, goal_level, 3), ALL_AGENT), # False - (bp.AttainSkill(Group([2,4]), Skill.Carving, goal_level, 2), ALL_AGENT)] # True + test_preds = [ # (Predicate, Team), the reward is 1 by default + (attain_skill_pred_cls(Group([1]), Skill.Melee, goal_level, 1), ALL_AGENT), # False + (attain_skill_pred_cls(Group([2]), Skill.Melee, goal_level, 1), ALL_AGENT), # False + (attain_skill_pred_cls(Group([1]), Skill.Range, goal_level, 1), ALL_AGENT), # True + (attain_skill_pred_cls(Group([1,3]), Skill.Fishing, goal_level, 1), ALL_AGENT), # True + (attain_skill_pred_cls(Group([1,2,3]), Skill.Carving, goal_level, 3), ALL_AGENT), # False + (attain_skill_pred_cls(Group([2,4]), Skill.Carving, goal_level, 2), ALL_AGENT)] # True - env = self._get_taskenv(test_tasks) + env = self._get_taskenv(test_preds) # AttainSkill(Group([1]), Skill.Melee, goal_level, 1) is false # AttainSkill(Group([2]), Skill.Melee, goal_level, 1) is false @@ -365,23 +383,25 @@ def test_attain_skill(self): _, _, _, infos = env.step({}) true_task = [2, 3, 5] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_inventory_space_ge_not(self): + inv_space_ge_pred_cls = make_predicate(bp.InventorySpaceGE) + # also test NOT InventorySpaceGE target_space = 3 - test_tasks = [ # (Predicate, Team), the reward is 1 by default - (bp.InventorySpaceGE(Group([1]), target_space), ALL_AGENT), # True -> False - (bp.InventorySpaceGE(Group([2,3]), target_space), ALL_AGENT), # True - (bp.InventorySpaceGE(Group([1,2,3]), target_space), ALL_AGENT), # True -> False - (bp.InventorySpaceGE(Group([1,2,3,4]), target_space+1), ALL_AGENT), # False - (~bp.InventorySpaceGE(Group([1]), target_space+1), ALL_AGENT), # True - (~bp.InventorySpaceGE(Group([1,2,3]), target_space), ALL_AGENT), # False -> True - (~bp.InventorySpaceGE(Group([1,2,3,4]), target_space+1), ALL_AGENT)] # True + test_preds = [ # (Predicate, Team), the reward is 1 by default + (inv_space_ge_pred_cls(Group([1]), target_space), ALL_AGENT), # True -> False + (inv_space_ge_pred_cls(Group([2,3]), target_space), ALL_AGENT), # True + (inv_space_ge_pred_cls(Group([1,2,3]), target_space), ALL_AGENT), # True -> False + (inv_space_ge_pred_cls(Group([1,2,3,4]), target_space+1), ALL_AGENT), # False + (~inv_space_ge_pred_cls(Group([1]), target_space+1), ALL_AGENT), # True + (~inv_space_ge_pred_cls(Group([1,2,3]), target_space), ALL_AGENT), # False -> True + (~inv_space_ge_pred_cls(Group([1,2,3,4]), target_space+1), ALL_AGENT)] # True - env = self._get_taskenv(test_tasks) + env = self._get_taskenv(test_preds) # add one items to agent 1 within the limit capacity = env.realm.players[1].inventory.capacity @@ -392,7 +412,7 @@ def test_inventory_space_ge_not(self): self.assertTrue(env.realm.players[1].inventory.space >= target_space) true_task = [0, 1, 2, 4, 6] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # add one more item to agent 1 provide_item(env.realm, 1, Item.Ration, level=1, quantity=1) @@ -402,26 +422,29 @@ def test_inventory_space_ge_not(self): self.assertTrue(env.realm.players[1].inventory.space < target_space) true_task = [1, 4, 5, 6] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_own_equip_item(self): + own_item_pred_cls = make_predicate(bp.OwnItem) + equip_item_pred_cls = make_predicate(bp.EquipItem) + # ration, level 2, quantity 3 (non-stackable) # ammo level 2, quantity 3 (stackable, equipable) goal_level = 2 goal_quantity = 3 - test_tasks = [ # (Predicate, Team), the reward is 1 by default - (bp.OwnItem(Group([1]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # False - (bp.OwnItem(Group([2]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # False - (bp.OwnItem(Group([1,2]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # True - (bp.OwnItem(Group([3]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # True - (bp.OwnItem(Group([4,5,6]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # False - (bp.EquipItem(Group([4]), Item.Whetstone, goal_level, 1), ALL_AGENT), # False - (bp.EquipItem(Group([4,5]), Item.Whetstone, goal_level, 1), ALL_AGENT), # True - (bp.EquipItem(Group([4,5,6]), Item.Whetstone, goal_level, 2), ALL_AGENT)] # True - - env = self._get_taskenv(test_tasks) + test_preds = [ # (Predicate, Team), the reward is 1 by default + (own_item_pred_cls(Group([1]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # False + (own_item_pred_cls(Group([2]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # False + (own_item_pred_cls(Group([1,2]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # True + (own_item_pred_cls(Group([3]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # True + (own_item_pred_cls(Group([4,5,6]), Item.Ration, goal_level, goal_quantity), ALL_AGENT), # F + (equip_item_pred_cls(Group([4]), Item.Whetstone, goal_level, 1), ALL_AGENT), # False + (equip_item_pred_cls(Group([4,5]), Item.Whetstone, goal_level, 1), ALL_AGENT), # True + (equip_item_pred_cls(Group([4,5,6]), Item.Whetstone, goal_level, 2), ALL_AGENT)] # True + + env = self._get_taskenv(test_preds) # set the level, so that agents 4-6 can equip the Whetstone equip_stone = [4, 5, 6] @@ -454,21 +477,23 @@ def test_own_equip_item(self): _, _, _, infos = env.step({}) true_task = [2, 3, 6, 7] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_fully_armed(self): + fully_armed_pred_cls = make_predicate(bp.FullyArmed) + goal_level = 5 - test_tasks = [ # (Predicate, Team), the reward is 1 by default - (bp.FullyArmed(Group([1,2,3]), Skill.Range, goal_level, 1), ALL_AGENT), # False - (bp.FullyArmed(Group([3,4]), Skill.Range, goal_level, 1), ALL_AGENT), # True - (bp.FullyArmed(Group([4]), Skill.Melee, goal_level, 1), ALL_AGENT), # False - (bp.FullyArmed(Group([4,5,6]), Skill.Range, goal_level, 3), ALL_AGENT), # True - (bp.FullyArmed(Group([4,5,6]), Skill.Range, goal_level+3, 1), ALL_AGENT), # False - (bp.FullyArmed(Group([4,5,6]), Skill.Range, goal_level, 4), ALL_AGENT)] # False + test_preds = [ # (Predicate, Team), the reward is 1 by default + (fully_armed_pred_cls(Group([1,2,3]), Skill.Range, goal_level, 1), ALL_AGENT), # False + (fully_armed_pred_cls(Group([3,4]), Skill.Range, goal_level, 1), ALL_AGENT), # True + (fully_armed_pred_cls(Group([4]), Skill.Melee, goal_level, 1), ALL_AGENT), # False + (fully_armed_pred_cls(Group([4,5,6]), Skill.Range, goal_level, 3), ALL_AGENT), # True + (fully_armed_pred_cls(Group([4,5,6]), Skill.Range, goal_level+3, 1), ALL_AGENT), # False + (fully_armed_pred_cls(Group([4,5,6]), Skill.Range, goal_level, 4), ALL_AGENT)] # False - env = self._get_taskenv(test_tasks) + env = self._get_taskenv(test_preds) # fully equip agents 4-6 fully_equip = [4, 5, 6] @@ -485,20 +510,22 @@ def test_fully_armed(self): _, _, _, infos = env.step({}) true_task = [1, 3] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_hoard_gold_and_team(self): # HoardGold, TeamHoardGold + hoard_gold_pred_cls = make_predicate(bp.HoardGold) + agent_gold_goal = 10 team_gold_goal = 30 - test_tasks = [ # (Predicate, Team), the reward is 1 by default - (bp.HoardGold(Group([1]), agent_gold_goal), ALL_AGENT), # True - (bp.HoardGold(Group([4,5,6]), agent_gold_goal), ALL_AGENT), # False - (bp.HoardGold(Group([1,3,5]), team_gold_goal), ALL_AGENT), # True - (bp.HoardGold(Group([2,4,6]), team_gold_goal), ALL_AGENT)] # False + test_preds = [ # (Predicate, Team), the reward is 1 by default + (hoard_gold_pred_cls(Group([1]), agent_gold_goal), ALL_AGENT), # True + (hoard_gold_pred_cls(Group([4,5,6]), agent_gold_goal), ALL_AGENT), # False + (hoard_gold_pred_cls(Group([1,3,5]), team_gold_goal), ALL_AGENT), # True + (hoard_gold_pred_cls(Group([2,4,6]), team_gold_goal), ALL_AGENT)] # False - env = self._get_taskenv(test_tasks) + env = self._get_taskenv(test_preds) # give gold to agents 1-3 gold_struck = [1, 2, 3] @@ -509,24 +536,28 @@ def test_hoard_gold_and_team(self): # HoardGold, TeamHoardGold _, _, _, infos = env.step({}) true_task = [0, 2] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) g = sum(env.realm.players[eid].gold.val for eid in Group([2,4,6]).agents) self._check_progress(env.tasks[3], infos, g / team_gold_goal) # DONE def test_exchange_gold_predicates(self): # Earn Gold, Spend Gold, Make Profit + earn_gold_pred_cls = make_predicate(bp.EarnGold) + spend_gold_pred_cls = make_predicate(bp.SpendGold) + make_profit_pred_cls = make_predicate(bp.MakeProfit) + gold_goal = 10 - test_tasks = [ - (bp.EarnGold(Group([1,2]), gold_goal), ALL_AGENT), # True - (bp.EarnGold(Group([2,4]), gold_goal), ALL_AGENT), # False - (bp.SpendGold(Group([1]), 5), ALL_AGENT), # False -> True - (bp.SpendGold(Group([1]), 6), ALL_AGENT), # False, - (bp.MakeProfit(Group([1,2]), 5), ALL_AGENT), # True, - (bp.MakeProfit(Group([1]), 5), ALL_AGENT) # True -> False + test_preds = [ + (earn_gold_pred_cls(Group([1,2]), gold_goal), ALL_AGENT), # True + (earn_gold_pred_cls(Group([2,4]), gold_goal), ALL_AGENT), # False + (spend_gold_pred_cls(Group([1]), 5), ALL_AGENT), # False -> True + (spend_gold_pred_cls(Group([1]), 6), ALL_AGENT), # False, + (make_profit_pred_cls(Group([1,2]), 5), ALL_AGENT), # True, + (make_profit_pred_cls(Group([1]), 5), ALL_AGENT) # True -> False ] - env = self._get_taskenv(test_tasks) + env = self._get_taskenv(test_preds) players = env.realm.players # 8 gold earned for agent 1 @@ -539,7 +570,7 @@ def test_exchange_gold_predicates(self): # Earn Gold, Spend Gold, Make Profit _, _, _, infos = env.step({}) true_task = [0,4,5] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) self._check_progress(env.tasks[1], infos, 2 / gold_goal) env.realm.event_log.record(EventCode.BUY_ITEM, players[1], @@ -549,21 +580,23 @@ def test_exchange_gold_predicates(self): # Earn Gold, Spend Gold, Make Profit _, _, _, infos = env.step({}) true_task = [0,2,4] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_count_event(self): # CountEvent - test_tasks = [ - (bp.CountEvent(Group([1]),"EAT_FOOD",1), ALL_AGENT), # True - (bp.CountEvent(Group([1]),"EAT_FOOD",2), ALL_AGENT), # False - (bp.CountEvent(Group([1]),"DRINK_WATER",1), ALL_AGENT), # False - (bp.CountEvent(Group([1,2]),"GIVE_GOLD",1), ALL_AGENT) # True + count_event_pred_cls = make_predicate(bp.CountEvent) + + test_preds = [ + (count_event_pred_cls(Group([1]),"EAT_FOOD",1), ALL_AGENT), # True + (count_event_pred_cls(Group([1]),"EAT_FOOD",2), ALL_AGENT), # False + (count_event_pred_cls(Group([1]),"DRINK_WATER",1), ALL_AGENT), # False + (count_event_pred_cls(Group([1,2]),"GIVE_GOLD",1), ALL_AGENT) # True ] # 1 Drinks water once # 2 Gives gold once - env = self._get_taskenv(test_tasks) + env = self._get_taskenv(test_preds) players = env.realm.players env.realm.event_log.record(EventCode.EAT_FOOD, players[1]) env.realm.event_log.record(EventCode.GIVE_GOLD, players[2]) @@ -571,16 +604,18 @@ def test_count_event(self): # CountEvent _, _, _, infos = env.step({}) true_task = [0,3] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE def test_score_hit(self): # ScoreHit - test_tasks = [ - (bp.ScoreHit(Group([1]), Skill.Mage, 2), ALL_AGENT), # False -> True - (bp.ScoreHit(Group([1]), Skill.Melee, 1), ALL_AGENT) # True + score_hit_pred_cls = make_predicate(bp.ScoreHit) + + test_preds = [ + (score_hit_pred_cls(Group([1]), Skill.Mage, 2), ALL_AGENT), # False -> True + (score_hit_pred_cls(Group([1]), Skill.Melee, 1), ALL_AGENT) # True ] - env = self._get_taskenv(test_tasks) + env = self._get_taskenv(test_preds) players = env.realm.players env.realm.event_log.record(EventCode.SCORE_HIT, @@ -596,7 +631,7 @@ def test_score_hit(self): # ScoreHit _, _, _, infos = env.step({}) true_task = [1] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) self._check_progress(env.tasks[0], infos, 0.5) env.realm.event_log.record(EventCode.SCORE_HIT, @@ -612,28 +647,84 @@ def test_score_hit(self): # ScoreHit _, _, _, infos = env.step({}) true_task = [0,1] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) + + # DONE + + def test_defeat_entity(self): # PlayerKill + defeat_pred_cls = make_predicate(bp.DefeatEntity) + + test_preds = [ + (defeat_pred_cls(Group([1]), 'npc', level=1, num_agent=1), ALL_AGENT), + (defeat_pred_cls(Group([1]), 'player', level=2, num_agent=2), ALL_AGENT)] + env = self._get_taskenv(test_preds) + players = env.realm.players + npcs = env.realm.npcs + + # set levels + npcs[-1].skills.melee.level.update(1) + npcs[-1].skills.range.level.update(1) + npcs[-1].skills.mage.level.update(1) + self.assertEqual(npcs[-1].attack_level, 1) + self.assertEqual(players[2].attack_level, 1) + players[3].skills.melee.level.update(3) + players[4].skills.melee.level.update(2) + + # killing player 2 does not progress the both tasks + env.realm.event_log.record(EventCode.PLAYER_KILL, players[1], + target=players[2]) # level 1 player + _, _, _, infos = env.step({}) + + true_task = [] # all false + self._check_result(env, test_preds, infos, true_task) + for task in env.tasks: + self._check_progress(task, infos, 0) + + # killing npc -1 completes the first task + env.realm.event_log.record(EventCode.PLAYER_KILL, players[1], + target=npcs[-1]) # level 1 npc + _, _, _, infos = env.step({}) + + true_task = [0] + self._check_result(env, test_preds, infos, true_task) + self._check_progress(env.tasks[0], infos, 1) + + # killing player 3 makes half progress on the second task + env.realm.event_log.record(EventCode.PLAYER_KILL, players[1], + target=players[3]) # level 3 player + _, _, _, infos = env.step({}) + self._check_progress(env.tasks[1], infos, .5) + + # killing player 4 completes the second task + env.realm.event_log.record(EventCode.PLAYER_KILL, players[1], + target=players[4]) # level 2 player + _, _, _, infos = env.step({}) + + true_task = [0,1] + self._check_result(env, test_preds, infos, true_task) + self._check_progress(env.tasks[1], infos, 1) # DONE def test_item_event_predicates(self): # Consume, Harvest, List, Buy - for predicate, event_type in [(bp.ConsumeItem, 'CONSUME_ITEM'), + for pred_fn, event_type in [(bp.ConsumeItem, 'CONSUME_ITEM'), (bp.HarvestItem, 'HARVEST_ITEM'), (bp.ListItem, 'LIST_ITEM'), (bp.BuyItem, 'BUY_ITEM')]: + predicate = make_predicate(pred_fn) id_ = getattr(EventCode, event_type) lvl = random.randint(5,10) quantity = random.randint(5,10) true_item = Item.Ration false_item = Item.Potion - test_tasks = [ + test_preds = [ (predicate(Group([1,3,5]), true_item, lvl, quantity), ALL_AGENT), # True (predicate(Group([2]), true_item, lvl, quantity), ALL_AGENT), # False (predicate(Group([4]), true_item, lvl, quantity), ALL_AGENT), # False (predicate(Group([6]), true_item, lvl, quantity), ALL_AGENT) # False ] - env = self._get_taskenv(test_tasks) + env = self._get_taskenv(test_preds) players = env.realm.players # True case: split the required items between 3 and 5 for player in (1,3): @@ -668,7 +759,7 @@ def test_item_event_predicates(self): # Consume, Harvest, List, Buy env.obs = env._compute_observations() _, _, _, infos = env.step({}) true_task = [0] - self._check_result(env, test_tasks, infos, true_task) + self._check_result(env, test_preds, infos, true_task) # DONE diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 58cb3a8a..6b6f3418 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -1,28 +1,29 @@ # pylint: disable=unused-argument,invalid-name import unittest +from types import FunctionType import nmmo from nmmo.core.env import Env -from nmmo.task.predicate_api import define_predicate -from nmmo.task.task_api import Task, nmmo_default_task +from nmmo.task.predicate_api import make_predicate, Predicate +from nmmo.task.task_api import Task, nmmo_default_task, make_team_tasks from nmmo.task.group import Group from nmmo.task.constraint import InvalidConstraint, ScalarConstraint -from nmmo.task.base_predicates import TickGE, CanSeeGroup +from nmmo.task.base_predicates import TickGE, CanSeeGroup, AllMembersWithinRange from nmmo.systems import item as Item from nmmo.core import action as Action -from tests.testhelpers import ScriptedAgentTestConfig +from scripted.baselines import Sleeper +from tests.testhelpers import ScriptedAgentTestConfig, change_spawn_pos -@define_predicate +# define predicates in the function form +# with the required signatures: gs, subject def Success(gs, subject: Group): return True -@define_predicate def Failure(gs, subject: Group): return False -@define_predicate def Fake(gs, subject, a,b,c): return False @@ -38,11 +39,25 @@ def test_predicate_operators(self): # pylint: disable=unsupported-binary-operation,invalid-unary-operand-type # pylint: disable=no-value-for-parameter,not-callable,no-member + self.assertTrue(isinstance(Success, FunctionType)) + self.assertTrue(isinstance(Failure, FunctionType)) + + # make predicate class from function + success_pred_cls = make_predicate(Success) + failure_pred_cls = make_predicate(Failure) + self.assertTrue(isinstance(success_pred_cls, type)) # class + self.assertTrue(isinstance(failure_pred_cls, type)) + + # then instantiate predicates + SUCCESS = success_pred_cls(Group(0)) + FAILURE = failure_pred_cls(Group(0)) + self.assertTrue(isinstance(SUCCESS, Predicate)) + self.assertTrue(isinstance(FAILURE, Predicate)) + + # NOTE: only the instantiated predicate can be used with operators like below mock_gs = MockGameState() - SUCCESS = Success(Group(0)) - FAILURE = Failure(Group(0)) - # AND (&), OR (|), NOT (~), IMPLY (>>) + # AND (&), OR (|), NOT (~) pred1 = SUCCESS & FAILURE self.assertFalse(pred1(mock_gs)) @@ -76,9 +91,15 @@ def test_team_assignment(self): def test_predicate_name(self): # pylint: disable=no-value-for-parameter,no-member - SUCCESS = Success(Group([0,2])) - FAILURE = Failure(Group(0)) - fake_pred = Fake(Group(2), 1, Item.Hat, Action.Melee) + # make predicate class from function + success_pred_cls = make_predicate(Success) + failure_pred_cls = make_predicate(Failure) + fake_pred_cls = make_predicate(Fake) + + # instantiate the predicates + SUCCESS = success_pred_cls(Group([0,2])) + FAILURE = failure_pred_cls(Group(0)) + fake_pred = fake_pred_cls(Group(2), 1, Item.Hat, Action.Melee) combination = (SUCCESS & ~ (FAILURE | fake_pred)) | (FAILURE * fake_pred + .3) - .4 self.assertEqual(combination.name, "(OR_(AND_(Success_(0,2))_(NOT_(OR_(Failure_(0,))_(Fake_(2,)_1_Hat_Melee))))_"+\ @@ -86,9 +107,16 @@ def test_predicate_name(self): def test_constraint(self): # pylint: disable=not-callable,no-value-for-parameter + # define predicate classes from functions + + # make predicate class from function + success_pred_cls = make_predicate(Success) + tickge_pred_cls = make_predicate(TickGE) + self.assertTrue(isinstance(TickGE, FunctionType)) + mock_gs = MockGameState() - good = Success(Group(0)) - bad = Success(Group(99999)) + good = success_pred_cls(Group(0)) + bad = success_pred_cls(Group(99999)) good(mock_gs) self.assertRaises(InvalidConstraint,lambda: bad(mock_gs)) @@ -97,25 +125,39 @@ def test_constraint(self): self.assertTrue(scalar.sample(mock_gs.config)<10) self.assertTrue(scalar.sample(mock_gs.config)>=-10) - bad = TickGE(Group(0), -1) + bad = tickge_pred_cls(Group(0), -1) self.assertRaises(InvalidConstraint, lambda: bad(mock_gs)) def test_sample_predicate(self): # pylint: disable=no-value-for-parameter,expression-not-assigned - predicate = CanSeeGroup() & TickGE() + # make predicate class from function + canseegrp_pred_cls = make_predicate(CanSeeGroup) + tickge_pred_cls = make_predicate(TickGE) + + # if the predicate class is instantiated without the subject, + mock_gs = MockGameState() + predicate = canseegrp_pred_cls() & tickge_pred_cls() self.assertEqual(predicate.name, - "(AND_(CanSeeGroup_subject:GroupConstraint_target:GroupConstraint)_"+\ + "(AND_(CanSeeGroup_subject:GroupConstraint_target:AgentListConstraint)_"+\ "(TickGE_subject:GroupConstraint_num_tick:ScalarConstraint))") + + # this predicate cannot calculate progress becuase it has no subject + with self.assertRaises(AttributeError): + predicate(mock_gs) + + # this predicate supports sampling with valid arguments config = nmmo.config.Default() - TickGE().sample(config) + tickge_pred_cls().sample(config) predicate.sample(config).name # DONE def test_task_api_with_predicate(self): # pylint: disable=no-value-for-parameter,no-member + fake_pred_cls = make_predicate(Fake) + mock_gs = MockGameState() - predicate = Fake(Group(2), 1, Item.Hat, Action.Melee) + predicate = fake_pred_cls(Group(2), 1, Item.Hat, Action.Melee) assignee = [1,2,3] # list of agent ids task = predicate.create_task(assignee=assignee) rewards, infos = task.compute_rewards(mock_gs) @@ -145,6 +187,44 @@ def is_agent_1(gs): self.assertEqual(infos[agent_id]['progress'], 1) # progress (True -> 1) self.assertTrue(task.completed) + def test_predicate_fn_using_other_predicate_fn(self): + # define a predicate: to form a tight formation, for a certain number of ticks + def PracticeFormation(gs, subject, dist, num_tick): + return AllMembersWithinRange(gs, subject, dist) * TickGE(gs, subject, num_tick) + + # team should stay together within 1 tile for 10 ticks + goal_tick = 10 + task_spec = ('team', PracticeFormation, {'dist': 1, 'num_tick': goal_tick}) + + # create the test task from the task spec + teams = {0:[1,2,3], 1:[4,5], 2:[6,7], 3:[8,9], 4:[10,11]} + test_task = make_team_tasks(teams, [task_spec]) + + config = ScriptedAgentTestConfig() + config.PLAYERS =[Sleeper] + config.IMMORTAL = True + + env = Env(config) + env.reset(new_tasks=test_task) + + # move agent 2, 3 to agent 1's pos + for agent_id in [2,3]: + change_spawn_pos(env.realm, agent_id, + env.realm.players[1].pos) + + for tick in range(goal_tick+2): + _, rewards, _, infos = env.step({}) + + if tick < 10: + self.assertAlmostEqual(rewards[1], 1/goal_tick) + self.assertAlmostEqual((1+tick)/goal_tick, + infos[1]['task'][test_task[0].name]['progress']) + else: + # tick 11, task should be completed + self.assertEqual(rewards[1], 0) + self.assertEqual(infos[1]['task'][test_task[0].name]['progress'], 1) + self.assertEqual(infos[1]['task'][test_task[0].name]['completed'], True) + def test_nmmo_default_task(self): config = ScriptedAgentTestConfig() env = Env(config) @@ -170,14 +250,20 @@ def test_completed_tasks_in_info(self): config = ScriptedAgentTestConfig() env = Env(config) + # make predicate class from function + success_pred_cls = make_predicate(Success) + failure_pred_cls = make_predicate(Failure) + fake_pred_cls = make_predicate(Fake) + + # instantiate the predicates same_team = [1, 2, 3, 4] predicates = [ - Success(Group(1)), # task 1 - Failure(Group(2)), # task 2 - Fake(Group(3), 1, Item.Hat, Action.Melee), # task 3 - Success(Group(same_team))] # task 4 + success_pred_cls(Group(1)), # task 1 + failure_pred_cls(Group(2)), # task 2 + fake_pred_cls(Group(3), 1, Item.Hat, Action.Melee), # task 3 + success_pred_cls(Group(same_team))] # task 4 - # in this case the task assignees are the same as the predicate subjects + # tasks can be created directly from predicate instances tasks = [pred.create_task() for pred in predicates] # tasks are all instantiated with the agent ids From 4f0c94ca2e3ec7a13710751fdcf2a056f755be3e Mon Sep 17 00:00:00 2001 From: kywch Date: Mon, 5 Jun 2023 18:59:56 +0000 Subject: [PATCH 14/18] curriculum bug fix, add reset packet to replay --- nmmo/core/realm.py | 7 ++++--- tests/task/test_manual_curriculum.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/nmmo/core/realm.py b/nmmo/core/realm.py index 52bcb557..958a4c07 100644 --- a/nmmo/core/realm.py +++ b/nmmo/core/realm.py @@ -76,9 +76,6 @@ def reset(self, map_id: int = None): self.log_helper.reset() self.event_log.reset() - if self._replay_helper is not None: - self._replay_helper.reset() - self.map.reset(map_id or np.random.randint(self.config.MAP_N) + 1) # EntityState and ItemState tables must be empty after players/npcs.reset() @@ -104,6 +101,10 @@ def reset(self, map_id: int = None): Item.INSTANCE_ID = 0 self.items = {} + if self._replay_helper is not None: + self._replay_helper.reset() + self._replay_helper.update() # capture the initial packet + def packet(self): """Client packet""" return { diff --git a/tests/task/test_manual_curriculum.py b/tests/task/test_manual_curriculum.py index ef6cd1b3..ab3189ea 100644 --- a/tests/task/test_manual_curriculum.py +++ b/tests/task/test_manual_curriculum.py @@ -155,7 +155,7 @@ def PracticeFormation(gs, subject, dist, num_tick): # managing inventory space def PracticeInventoryManagement(gs, subject, space, num_tick): - return bp.AllMembersWithinRange(gs, subject, space) * bp.TickGE(gs, subject, num_tick) + return bp.InventorySpaceGE(gs, subject, space) * bp.TickGE(gs, subject, num_tick) for space in [2, 4, 8]: task_spec += [('agent', PracticeInventoryManagement, {'space': space, 'num_tick': num_tick}) for num_tick in STAY_ALIVE_GOAL] From d9aa89b7e5c90467e59ba90e285c1916dd772e10 Mon Sep 17 00:00:00 2001 From: kywch Date: Mon, 5 Jun 2023 22:15:01 +0000 Subject: [PATCH 15/18] env.reset() only takes make_task_fn --- nmmo/core/env.py | 28 +++++------------------ nmmo/core/realm.py | 1 - nmmo/render/replay_helper.py | 1 + tests/task/test_demo_task_creation.py | 9 ++++---- tests/task/test_manual_curriculum.py | 5 ++-- tests/task/test_predicates.py | 7 +++--- tests/task/test_task_api.py | 33 ++++++++++++--------------- 7 files changed, 32 insertions(+), 52 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index f8405231..3cc41570 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -16,7 +16,7 @@ from nmmo.entity.entity import Entity from nmmo.systems.item import Item from nmmo.task.game_state import GameStateGenerator -from nmmo.task.task_api import Task, nmmo_default_task +from nmmo.task import task_api from scripted.baselines import Scripted class Env(ParallelEnv): @@ -40,7 +40,7 @@ def __init__(self, self._gamestate_generator = GameStateGenerator(self.realm, self.config) self.game_state = None - self.tasks = nmmo_default_task(self.possible_agents) + self.tasks = task_api.nmmo_default_task(self.possible_agents) # pylint: disable=method-cache-max-size-none @functools.lru_cache(maxsize=None) @@ -119,9 +119,7 @@ def action_space(self, agent): # TODO: This doesn't conform to the PettingZoo API # pylint: disable=arguments-renamed def reset(self, map_id=None, seed=None, options=None, - new_tasks: List[Task]=None, - make_task_fn: Callable=None, - make_task_fn_kwargs: Dict[str, Any]=None): + make_task_fn: Callable=None): '''OpenAI Gym API reset function Loads a new game map and returns initial observations @@ -129,9 +127,7 @@ def reset(self, map_id=None, seed=None, options=None, Args: map_id: Map index to load. Selects a random map by default seed: random seed to use - new_tasks: A list of instantiated tasks - make_task_fn: A function to instantiate tasks - make_task_fn_kwargs: Keyword arguments to pass to make_task_fn + make_task_fn: A function to make tasks Returns: observations, as documented by _compute_observations() @@ -158,16 +154,8 @@ def reset(self, map_id=None, seed=None, options=None, self.obs = self._compute_observations() self._gamestate_generator = GameStateGenerator(self.realm, self.config) - """Two methods to define tasks. - * new_tasks: a list of instantiated tasks. This method has precedence - * make_task_fn & kwargs: a task maker fn and its kwargs to instantiate tasks - If these are all None, then use the default task - """ - if new_tasks is not None: - # providing an empty new_tasks [] is also possible - self.tasks = new_tasks - elif make_task_fn is not None: - self.tasks = make_task_fn(**make_task_fn_kwargs) + if make_task_fn is not None: + self.tasks = make_task_fn() else: for task in self.tasks: task.reset() @@ -428,10 +416,6 @@ def _compute_rewards(self, agents: List[AgentID], dones: Dict[AgentID, bool]): rewards[agent_id] = rewards.get(agent_id,0) + reward infos[agent_id]['task'][task.name] = task_infos[agent_id] # progress - # Remove rewards for dead agents - for agent_id in dones: - rewards[agent_id] = -1 - return rewards, infos ############################################################################ diff --git a/nmmo/core/realm.py b/nmmo/core/realm.py index 958a4c07..8b3da598 100644 --- a/nmmo/core/realm.py +++ b/nmmo/core/realm.py @@ -103,7 +103,6 @@ def reset(self, map_id: int = None): if self._replay_helper is not None: self._replay_helper.reset() - self._replay_helper.update() # capture the initial packet def packet(self): """Client packet""" diff --git a/nmmo/render/replay_helper.py b/nmmo/render/replay_helper.py index bb58e6f0..a16564e7 100644 --- a/nmmo/render/replay_helper.py +++ b/nmmo/render/replay_helper.py @@ -37,6 +37,7 @@ def reset(self): self.packets = [] self.map = None self._i = 0 + self.update() # to capture the initial packet def __len__(self): return len(self.packets) diff --git a/tests/task/test_demo_task_creation.py b/tests/task/test_demo_task_creation.py index 884c7c27..462284b3 100644 --- a/tests/task/test_demo_task_creation.py +++ b/tests/task/test_demo_task_creation.py @@ -12,7 +12,7 @@ from nmmo.task.group import Group def rollout(env, tasks, steps=5): - env.reset(new_tasks=tasks) + env.reset(make_task_fn=lambda: tasks) for _ in range(steps): env.step({}) return env.step({}) @@ -150,7 +150,7 @@ def KillPredicate(gs: GameState, for agent_id in env.possible_agents] # Test Reward - env.reset(new_tasks=kill_tasks) + env.reset(make_task_fn=lambda: kill_tasks) players = env.realm.players code = EventCode.PLAYER_KILL env.realm.event_log.record(code, players[1], target=players[3]) @@ -192,7 +192,7 @@ def PredicateMath(gs, subject): task_for_agent_1 = pred_math_cls(Group(1)).create_task() # Test Reward - env.reset(new_tasks=[task_for_agent_1]) + env.reset(make_task_fn=lambda: [task_for_agent_1]) code = EventCode.PLAYER_KILL players = env.realm.players env.realm.event_log.record(code, players[1], target=players[2]) @@ -237,8 +237,7 @@ def test_make_team_tasks_inside_reset(self): config = ScriptedAgentTestConfig() env = Env(config) - env.reset(make_task_fn=t.make_team_tasks, - make_task_fn_kwargs={'task_spec':task_spec, 'teams':teams}) + env.reset(make_task_fn=lambda: t.make_team_tasks(teams, task_spec)) self.assertEqual(len(env.tasks), 6) # 6 tasks were created self.assertEqual(env.tasks[0].name, # team 0 task assigned to agents 1,2,3 diff --git a/tests/task/test_manual_curriculum.py b/tests/task/test_manual_curriculum.py index ab3189ea..5fcc6bbc 100644 --- a/tests/task/test_manual_curriculum.py +++ b/tests/task/test_manual_curriculum.py @@ -277,9 +277,10 @@ def check_task_spec(spec_list): config = nmmo.config.Default() env = nmmo.Env(config) for idx, single_spec in enumerate(spec_list): - sample_task = make_team_tasks(teams, [single_spec]) + # pylint: disable=cell-var-from-loop + test_task = make_team_tasks(teams, [single_spec]) try: - env.reset(new_tasks=sample_task) + env.reset(make_task_fn=lambda: test_task) for _ in range(3): env.step({}) except: diff --git a/tests/task/test_predicates.py b/tests/task/test_predicates.py index 555d1e71..a9b76f57 100644 --- a/tests/task/test_predicates.py +++ b/tests/task/test_predicates.py @@ -38,11 +38,11 @@ def _get_taskenv(self, config.IMMORTAL = True # OngoingTask keeps evaluating and returns progress as the reward - # vs. Task stops evaluating once the task is completed - tasks = [OngoingTask(pred, assignee) for pred, assignee in test_preds] + # vs. Task stops evaluating once the task is completed, returns reward = delta(progress) + test_tasks = [OngoingTask(pred, assignee) for pred, assignee in test_preds] env = Env(config) - env.reset(new_tasks=tasks) + env.reset(make_task_fn=lambda: test_tasks) if grass_map: MS = env.config.MAP_SIZE @@ -84,7 +84,6 @@ def test_tickge_stay_alive_rip(self): stay_alive_pred_cls = make_predicate(bp.StayAlive) all_dead_pred_cls = make_predicate(bp.AllDead) - tick_true = 5 death_note = [1, 2, 3] test_preds = [ # (instantiated predicate, task assignee) diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 6b6f3418..98794f55 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -198,14 +198,13 @@ def PracticeFormation(gs, subject, dist, num_tick): # create the test task from the task spec teams = {0:[1,2,3], 1:[4,5], 2:[6,7], 3:[8,9], 4:[10,11]} - test_task = make_team_tasks(teams, [task_spec]) config = ScriptedAgentTestConfig() config.PLAYERS =[Sleeper] config.IMMORTAL = True env = Env(config) - env.reset(new_tasks=test_task) + env.reset(make_task_fn=lambda: make_team_tasks(teams, [task_spec])) # move agent 2, 3 to agent 1's pos for agent_id in [2,3]: @@ -218,22 +217,20 @@ def PracticeFormation(gs, subject, dist, num_tick): if tick < 10: self.assertAlmostEqual(rewards[1], 1/goal_tick) self.assertAlmostEqual((1+tick)/goal_tick, - infos[1]['task'][test_task[0].name]['progress']) + infos[1]['task'][env.tasks[0].name]['progress']) else: # tick 11, task should be completed self.assertEqual(rewards[1], 0) - self.assertEqual(infos[1]['task'][test_task[0].name]['progress'], 1) - self.assertEqual(infos[1]['task'][test_task[0].name]['completed'], True) + self.assertEqual(infos[1]['task'][env.tasks[0].name]['progress'], 1) + self.assertEqual(infos[1]['task'][env.tasks[0].name]['completed'], True) def test_nmmo_default_task(self): config = ScriptedAgentTestConfig() env = Env(config) for test_mode in [None, 'no_task', 'func_eval', 'dummy_eval_fn']: - #dafault_tasks = nmmo_default_task(env.possible_agents, test_mode) - env.reset(make_task_fn=nmmo_default_task, - make_task_fn_kwargs={'agent_list': env.possible_agents, - 'test_mode': test_mode}) + # pylint: disable=cell-var-from-loop + env.reset(make_task_fn=lambda: nmmo_default_task(env.possible_agents, test_mode)) for _ in range(3): env.step({}) @@ -264,31 +261,31 @@ def test_completed_tasks_in_info(self): success_pred_cls(Group(same_team))] # task 4 # tasks can be created directly from predicate instances - tasks = [pred.create_task() for pred in predicates] + test_tasks = [pred.create_task() for pred in predicates] # tasks are all instantiated with the agent ids - env.reset(new_tasks=tasks) + env.reset(make_task_fn=lambda: test_tasks) _, _, _, infos = env.step({}) # agent 1: assigned only task 1, which is always True - self.assertEqual(infos[1]['task'][tasks[0].name]['reward'], 1.0) + self.assertEqual(infos[1]['task'][env.tasks[0].name]['reward'], 1.0) for i in [1, 2]: # task 2 and 3 - self.assertTrue(tasks[i].name not in infos[1]['task']) + self.assertTrue(env.tasks[i].name not in infos[1]['task']) # agent 2: assigned task 2 (Failure) and task 4 (Success) - self.assertEqual(infos[2]['task'][tasks[1].name]['reward'], 0.0) # task 2 - self.assertEqual(infos[2]['task'][tasks[3].name]['reward'], 1.0) # task 4 + self.assertEqual(infos[2]['task'][env.tasks[1].name]['reward'], 0.0) # task 2 + self.assertEqual(infos[2]['task'][env.tasks[3].name]['reward'], 1.0) # task 4 # agent 3 assigned task 3, Fake(), which is always False (0) - self.assertEqual(infos[3]['task'][tasks[2].name]['reward'], 0.0) # task 3 + self.assertEqual(infos[3]['task'][env.tasks[2].name]['reward'], 0.0) # task 3 # all agents in the same team with agent 2 have SUCCESS # other agents don't have any tasks assigned for ent_id in env.possible_agents: if ent_id in same_team: - self.assertEqual(infos[ent_id]['task'][tasks[3].name]['reward'], 1.0) + self.assertEqual(infos[ent_id]['task'][env.tasks[3].name]['reward'], 1.0) else: - self.assertTrue(tasks[3].name not in infos[ent_id]['task']) + self.assertTrue(env.tasks[3].name not in infos[ent_id]['task']) # DONE From 332f843691bb569533a01914bfb0063e150a24a8 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 7 Jun 2023 02:17:49 +0000 Subject: [PATCH 16/18] updated nmmo_default_task(), profiled task system --- nmmo/task/task_api.py | 14 ++---- tests/task/test_task_api.py | 20 +------- tests/task/test_task_system_perf.py | 76 +++++++++++++++++++++++++++++ tests/testhelpers.py | 20 +++++--- 4 files changed, 94 insertions(+), 36 deletions(-) create mode 100644 tests/task/test_task_system_perf.py diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index 902d880b..266cfba9 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -115,22 +115,18 @@ def make_same_task(predicate: Union[Predicate, Callable], for agent_id in agent_list] def nmmo_default_task(agent_list: Iterable[int], test_mode=None) -> List[Task]: - if test_mode is None: - # use the full predicate system - return make_same_task(bp.StayAlive, agent_list, task_cls=OngoingTask) - + # (almost) no overhead in env._compute_rewards() if test_mode == 'no_task': return [] + # eval function on Predicate class, but does not use Group during eval if test_mode == 'dummy_eval_fn': # pylint: disable=unused-argument return make_same_task(lambda gs, subject: True, agent_list, task_cls=OngoingTask) - # use the function-based eval - def stay_alive_eval(gs, subject): - return all(agent_id in gs.alive_agents for agent_id in subject.agents) - - return make_same_task(stay_alive_eval, agent_list, task_cls=OngoingTask) + # the default is to use the predicate class + pred_cls = make_predicate(bp.StayAlive) + return make_same_task(pred_cls, agent_list, task_cls=OngoingTask) ###################################################################### # TODO: a lot to improve below diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 98794f55..76356b2f 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -5,7 +5,7 @@ import nmmo from nmmo.core.env import Env from nmmo.task.predicate_api import make_predicate, Predicate -from nmmo.task.task_api import Task, nmmo_default_task, make_team_tasks +from nmmo.task.task_api import Task, make_team_tasks from nmmo.task.group import Group from nmmo.task.constraint import InvalidConstraint, ScalarConstraint from nmmo.task.base_predicates import TickGE, CanSeeGroup, AllMembersWithinRange @@ -224,24 +224,6 @@ def PracticeFormation(gs, subject, dist, num_tick): self.assertEqual(infos[1]['task'][env.tasks[0].name]['progress'], 1) self.assertEqual(infos[1]['task'][env.tasks[0].name]['completed'], True) - def test_nmmo_default_task(self): - config = ScriptedAgentTestConfig() - env = Env(config) - - for test_mode in [None, 'no_task', 'func_eval', 'dummy_eval_fn']: - # pylint: disable=cell-var-from-loop - env.reset(make_task_fn=lambda: nmmo_default_task(env.possible_agents, test_mode)) - for _ in range(3): - env.step({}) - - for agent_id in env.possible_agents: - if test_mode is None: - self.assertTrue('StayAlive' in env.tasks[agent_id-1].name) # default task - if test_mode != 'no_task': - self.assertTrue(f'assignee:({agent_id},)' in env.tasks[agent_id-1].name) - - # DONE - def test_completed_tasks_in_info(self): # pylint: disable=no-value-for-parameter,no-member config = ScriptedAgentTestConfig() diff --git a/tests/task/test_task_system_perf.py b/tests/task/test_task_system_perf.py new file mode 100644 index 00000000..c89eea8c --- /dev/null +++ b/tests/task/test_task_system_perf.py @@ -0,0 +1,76 @@ +import unittest + +from nmmo.core.env import Env +from nmmo.task.task_api import Task, nmmo_default_task +from tests.testhelpers import profile_env_step, ScriptedAgentTestConfig + +PROFILE_PERF = False + +class TestTaskSystemPerf(unittest.TestCase): + def test_nmmo_default_task(self): + config = ScriptedAgentTestConfig() + env = Env(config) + agent_list = env.possible_agents + + for test_mode in [None, 'no_task', 'dummy_eval_fn', 'pure_func_eval']: + + # create tasks + if test_mode == 'pure_func_eval': + def create_stay_alive_eval_wo_group(agent_id: int): + return lambda gs: agent_id in gs.alive_agents + tasks = [Task(create_stay_alive_eval_wo_group(agent_id), assignee=agent_id) + for agent_id in agent_list] + else: + tasks = nmmo_default_task(agent_list, test_mode) + + # check tasks + for agent_id in agent_list: + if test_mode is None: + self.assertTrue('StayAlive' in tasks[agent_id-1].name) # default task + if test_mode != 'no_task': + self.assertTrue(f'assignee:({agent_id},)' in tasks[agent_id-1].name) + + # pylint: disable=cell-var-from-loop + if PROFILE_PERF: + test_cond = 'default' if test_mode is None else test_mode + profile_env_step(tasks=tasks, condition=test_cond) + else: + env.reset(make_task_fn=lambda: tasks) + for _ in range(3): + env.step({}) + + # DONE + + +if __name__ == '__main__': + unittest.main() + + # """ Tested on Win 11, docker + # === Test condition: default === + # - env.step({}): 12.302560470998287 + # - env.realm.step(): 3.8562550359929446 + # - env._compute_observations(): 3.3712658310032566 + # - obs.to_gym(), ActionTarget: 2.477421684998262 + # - env._compute_rewards(): 1.4060252049966948 + + # === Test condition: no_task === + # - env.step({}): 10.818232985999202 + # - env.realm.step(): 3.79689467499702 + # - env._compute_observations(): 3.3100888289991417 + # - obs.to_gym(), ActionTarget: 2.409053840994602 + # - env._compute_rewards(): 0.00781778599775862 + + # === Test condition: dummy_eval_fn, using Predicate class === + # - env.step({}): 11.989140973004396 + # - env.realm.step(): 3.8649445789997117 + # - env._compute_observations(): 3.344463708999683 + # - obs.to_gym(), ActionTarget: 2.431279453005118 + # - env._compute_rewards(): 1.119989460996294 + + # === Test condition: pure_func_eval, WITHOUT Predicate class === + # - env.step({}): 11.032341518002795 + # - env.realm.step(): 3.8636899659977644 + # - env._compute_observations(): 3.3460479429995758 + # - obs.to_gym(), ActionTarget: 2.498140270996373 + # - env._compute_rewards(): 0.055145307997008786 + # """ diff --git a/tests/testhelpers.py b/tests/testhelpers.py index 3783c721..03ea2097 100644 --- a/tests/testhelpers.py +++ b/tests/testhelpers.py @@ -369,25 +369,29 @@ def _check_assert_make_action(self, env, atn, test_cond): return actions # pylint: disable=unnecessary-lambda,bad-builtin -def profile_env_step(action_target=True): +def profile_env_step(action_target=True, tasks=None, condition=None): config = nmmo.config.Default() config.PLAYERS = [baselines.Sleeper] # the scripted agents doing nothing config.IMMORTAL = True # otherwise the agents will die config.PROVIDE_ACTION_TARGETS = action_target env = nmmo.Env(config) - env.reset(seed=0) + if tasks is None: + tasks = [] + env.reset(seed=0, make_task_fn=lambda: tasks) for _ in range(3): env.step({}) obs = env._compute_observations() test_func = [ - ('env.step({})', lambda: env.step({})), - ('env.realm.step()', lambda: env.realm.step({})), - ('env._compute_observations()', lambda: env._compute_observations()), - ('obs.to_gym()', lambda: {a: o.to_gym() for a,o in obs.items()}), - ('env._compute_rewards()', lambda: env._compute_rewards(obs.keys(), {})) + ('env.step({}):', lambda: env.step({})), + ('env.realm.step():', lambda: env.realm.step({})), + ('env._compute_observations():', lambda: env._compute_observations()), + ('obs.to_gym(), ActionTarget:', lambda: {a: o.to_gym() for a,o in obs.items()}), + ('env._compute_rewards():', lambda: env._compute_rewards(obs.keys(), {})) ] + if condition: + print('=== Test condition:', condition, '===') for name, func in test_func: - print(name, timeit(func, number=100, globals=globals())) + print(' -', name, timeit(func, number=100, globals=globals())) From 44eab798087366960bbe6caf06b03b8c86547061 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 7 Jun 2023 03:10:50 +0000 Subject: [PATCH 17/18] clean up make-task helpers --- nmmo/task/task_api.py | 17 +++++----- tests/task/test_task_system_perf.py | 51 +++++++++++++++-------------- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index 266cfba9..8bc5d587 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -105,13 +105,11 @@ def _map_progress_to_reward(self, gs) -> float: def make_same_task(predicate: Union[Predicate, Callable], agent_list: Iterable[int], task_cls = Task, **kwargs) -> List[Task]: - if isinstance(predicate, type): # predicate is class, assuming Predicate - return [predicate(Group(agent_id),**kwargs).create_task(task_cls=task_cls) - for agent_id in agent_list] + # if a function is provided, make it a predicate class + if isinstance(predicate, FunctionType): + predicate = make_predicate(predicate) - # eval_fn is a function to turn into predicate - pred_cls = make_predicate(predicate) - return [pred_cls(Group(agent_id),**kwargs).create_task(task_cls=task_cls) + return [predicate(Group(agent_id),**kwargs).create_task(task_cls=task_cls) for agent_id in agent_list] def nmmo_default_task(agent_list: Iterable[int], test_mode=None) -> List[Task]: @@ -125,8 +123,7 @@ def nmmo_default_task(agent_list: Iterable[int], test_mode=None) -> List[Task]: return make_same_task(lambda gs, subject: True, agent_list, task_cls=OngoingTask) # the default is to use the predicate class - pred_cls = make_predicate(bp.StayAlive) - return make_same_task(pred_cls, agent_list, task_cls=OngoingTask) + return make_same_task(bp.StayAlive, agent_list, task_cls=OngoingTask) ###################################################################### # TODO: a lot to improve below @@ -162,7 +159,7 @@ def make_team_tasks(teams, task_spec) -> List[Task]: assert target in VALID_TARGET, 'Invalid target' # translate target to specific agent ids using team_helper target = team_helper.get_target_agent(team_id, target) - kwargs['target'] = target #tuple(target,) if isinstance(target, int) else tuple(target) + kwargs['target'] = target # handle some special cases and instantiate the predicate first predicate = None @@ -184,6 +181,7 @@ def make_team_tasks(teams, task_spec) -> List[Task]: if predicate is None: tasks.append(pred_cls(Group(assignee), **kwargs).create_task(task_cls=task_cls)) else: + # this branch is for the cases like AllDead, StayAlive tasks.append(predicate.create_task(assignee=assignee, task_cls=task_cls)) elif reward_to == 'agent': @@ -191,6 +189,7 @@ def make_team_tasks(teams, task_spec) -> List[Task]: if predicate is None: tasks += make_same_task(pred_cls, agent_list, task_cls=task_cls, **kwargs) else: + # this branch is for the cases like AllDead, StayAlive tasks += [predicate.create_task(assignee=agent_id, task_cls=task_cls) for agent_id in agent_list] diff --git a/tests/task/test_task_system_perf.py b/tests/task/test_task_system_perf.py index c89eea8c..97835696 100644 --- a/tests/task/test_task_system_perf.py +++ b/tests/task/test_task_system_perf.py @@ -1,14 +1,15 @@ import unittest +import nmmo from nmmo.core.env import Env from nmmo.task.task_api import Task, nmmo_default_task -from tests.testhelpers import profile_env_step, ScriptedAgentTestConfig +from tests.testhelpers import profile_env_step PROFILE_PERF = False class TestTaskSystemPerf(unittest.TestCase): def test_nmmo_default_task(self): - config = ScriptedAgentTestConfig() + config = nmmo.config.Default() env = Env(config) agent_list = env.possible_agents @@ -46,31 +47,31 @@ def create_stay_alive_eval_wo_group(agent_id: int): unittest.main() # """ Tested on Win 11, docker - # === Test condition: default === - # - env.step({}): 12.302560470998287 - # - env.realm.step(): 3.8562550359929446 - # - env._compute_observations(): 3.3712658310032566 - # - obs.to_gym(), ActionTarget: 2.477421684998262 - # - env._compute_rewards(): 1.4060252049966948 + # === Test condition: default (StayAlive-based Predicate) === + # - env.step({}): 13.398321460997977 + # - env.realm.step(): 3.6524868449996575 + # - env._compute_observations(): 3.2038183499971638 + # - obs.to_gym(), ActionTarget: 2.30746804500086 + # - env._compute_rewards(): 2.7206644940015394 # === Test condition: no_task === - # - env.step({}): 10.818232985999202 - # - env.realm.step(): 3.79689467499702 - # - env._compute_observations(): 3.3100888289991417 - # - obs.to_gym(), ActionTarget: 2.409053840994602 - # - env._compute_rewards(): 0.00781778599775862 + # - env.step({}): 10.576253965999058 + # - env.realm.step(): 3.674701832998835 + # - env._compute_observations(): 3.260661373002222 + # - obs.to_gym(), ActionTarget: 2.313872797996737 + # - env._compute_rewards(): 0.009020475001307204 - # === Test condition: dummy_eval_fn, using Predicate class === - # - env.step({}): 11.989140973004396 - # - env.realm.step(): 3.8649445789997117 - # - env._compute_observations(): 3.344463708999683 - # - obs.to_gym(), ActionTarget: 2.431279453005118 - # - env._compute_rewards(): 1.119989460996294 + # === Test condition: dummy_eval_fn -based Predicate === + # - env.step({}): 12.797982947995479 + # - env.realm.step(): 3.604593793003005 + # - env._compute_observations(): 3.2095355240016943 + # - obs.to_gym(), ActionTarget: 2.313207338003849 + # - env._compute_rewards(): 2.266267291997792 - # === Test condition: pure_func_eval, WITHOUT Predicate class === - # - env.step({}): 11.032341518002795 - # - env.realm.step(): 3.8636899659977644 - # - env._compute_observations(): 3.3460479429995758 - # - obs.to_gym(), ActionTarget: 2.498140270996373 - # - env._compute_rewards(): 0.055145307997008786 + # === Test condition: pure_func_eval WITHOUT Predicate === + # - env.step({}): 10.637560240997118 + # - env.realm.step(): 3.633970066999609 + # - env._compute_observations(): 3.2308093659958104 + # - obs.to_gym(), ActionTarget: 2.331246039000689 + # - env._compute_rewards(): 0.0988905300037004 # """ From 79a66bd1a7f7daf472508fb6c94928d28fb723ec Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 7 Jun 2023 18:36:40 +0000 Subject: [PATCH 18/18] renamed the make task from task spec example --- tests/task/test_demo_task_creation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/task/test_demo_task_creation.py b/tests/task/test_demo_task_creation.py index 462284b3..5f5e532c 100644 --- a/tests/task/test_demo_task_creation.py +++ b/tests/task/test_demo_task_creation.py @@ -210,7 +210,7 @@ def PredicateMath(gs, subject): # DONE - def test_make_team_tasks_inside_reset(self): + def test_make_team_tasks_using_task_spec(self): # NOTE: len(teams) and len(task_spec) don't need to match teams = {0:[1,2,3], 1:[4,5], 2:[6,7], 3:[8,9], 4:[10,11]}