+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+fromcopyimportdeepcopy
+
+# The agent module
+importnumpyasnp
+importpandasaspd
+importpydashasps
+importtorch
+
+fromconvlab.agentimportalgorithm,memory
+fromconvlab.agent.algorithmimportpolicy_util
+fromconvlab.agent.netimportnet_util
+fromconvlab.libimportlogger,util
+fromconvlab.lib.decoratorimportlab_api
+fromconvlab.modulesimportnlu,dst,word_dst,nlg,state_encoder,action_decoder
+
+logger=logger.get_logger(__name__)
+
+
+
[docs]classAgent:
+ '''
+ Agent abstraction; implements the API to interface with Env in SLM Lab
+ Contains algorithm, memory, body
+ '''
+
+ def__init__(self,spec,body,a=None,global_nets=None):
+ self.spec=spec
+ self.a=aor0# for multi-agent
+ self.agent_spec=spec['agent'][self.a]
+ self.name=self.agent_spec['name']
+ assertnotps.is_list(global_nets),f'single agent global_nets must be a dict, got {global_nets}'
+ # set components
+ self.body=body
+ body.agent=self
+ MemoryClass=getattr(memory,ps.get(self.agent_spec,'memory.name'))
+ self.body.memory=MemoryClass(self.agent_spec['memory'],self.body)
+ AlgorithmClass=getattr(algorithm,ps.get(self.agent_spec,'algorithm.name'))
+ self.algorithm=AlgorithmClass(self,global_nets)
+
+ logger.info(util.self_desc(self))
+
+
[docs]@lab_api
+ defact(self,state):
+ '''Standard act method from algorithm.'''
+ withtorch.no_grad():# for efficiency, only calc grad in algorithm.train
+ action=self.algorithm.act(state)
+ returnaction
+
+
[docs]@lab_api
+ defupdate(self,state,action,reward,next_state,done):
+ '''Update per timestep after env transitions, e.g. memory, algorithm, update agent params, train net'''
+ self.body.update(state,action,reward,next_state,done)
+ ifutil.in_eval_lab_modes():# eval does not update agent for training
+ return
+ self.body.memory.update(state,action,reward,next_state,done)
+ loss=self.algorithm.train()
+ ifnotnp.isnan(loss):# set for log_summary()
+ self.body.loss=loss
+ explore_var=self.algorithm.update()
+ returnloss,explore_var
+
+
[docs]@lab_api
+ defsave(self,ckpt=None):
+ '''Save agent'''
+ ifutil.in_eval_lab_modes():# eval does not save new models
+ return
+ self.algorithm.save(ckpt=ckpt)
+
+
[docs]@lab_api
+ defclose(self):
+ '''Close and cleanup agent at the end of a session, e.g. save model'''
+ self.save()
[docs]@lab_api
+ defreset(self,obs):
+ '''Do agent reset per session, such as memory pointer'''
+ logger.debug(f'Agent {self.a} reset')
+ ifself.dst:
+ self.dst.init_session()
+ ifhasattr(self.algorithm,"reset"):# This is mainly for external policies that may need to reset its state.
+ self.algorithm.reset()
+
+ input_act,state,encoded_state=self.state_update(obs,"null")# "null" action to be compatible with MDBT
+
+ self.body.state,self.body.encoded_state=state,encoded_state
[docs]@lab_api
+ defupdate(self,obs,action,reward,next_obs,done):
+ '''Update per timestep after env transitions, e.g. memory, algorithm, update agent params, train net'''
+ # update state
+ input_act,next_state,encoded_state=self.state_update(next_obs,action)
+
+ # update body
+ self.body.update(self.body.state,action,reward,next_state,done)
+
+ # update memory
+ ifutil.in_eval_lab_modes()orself.algorithm.__class__.__name__=='ExternalPolicy':# eval does not update agent for training
+ self.body.state,self.body.encoded_state=next_state,encoded_state
+ return
+
+ ifnothasattr(self.body,'warmup_memory')orself.body.env.clock.epi>self.warmup_epi:
+ self.body.memory.update(self.body.encoded_state,self.body.action,reward,encoded_state,done)
+ else:
+ self.body.warmup_memory.update(self.body.encoded_state,self.body.action,reward,encoded_state,done)
+
+ # update body
+ self.body.state,self.body.encoded_state=next_state,encoded_state
+
+ # train algorithm
+ loss=self.algorithm.train()
+ ifnotnp.isnan(loss):# set for log_summary()
+ self.body.loss=loss
+ explore_var=self.algorithm.update()
+
+ returnloss,explore_var
+
+
[docs]@lab_api
+ defsave(self,ckpt=None):
+ '''Save agent'''
+ ifself.algorithm.__class__.__name__=='ExternalPolicy':
+ return
+ ifutil.in_eval_lab_modes():
+ # eval does not save new models
+ return
+ self.algorithm.save(ckpt=ckpt)
+
+
[docs]@lab_api
+ defclose(self):
+ '''Close and cleanup agent at the end of a session, e.g. save model'''
+ self.save()
+
+
+
[docs]classBody:
+ '''
+ Body of an agent inside an environment, it:
+ - enables the automatic dimension inference for constructing network input/output
+ - acts as reference bridge between agent and environment (useful for multi-agent, multi-env)
+ - acts as non-gradient variable storage for monitoring and analysis
+ '''
+
+ def__init__(self,env,agent_spec,aeb=(0,0,0)):
+ # essential reference variables
+ self.agent=None# set later
+ self.env=env
+ self.aeb=aeb
+ self.a,self.e,self.b=aeb
+
+ # variables set during init_algorithm_params
+ self.explore_var=np.nan# action exploration: epsilon or tau
+ self.entropy_coef=np.nan# entropy for exploration
+
+ # debugging/logging variables, set in train or loss function
+ self.loss=np.nan
+ self.mean_entropy=np.nan
+ self.mean_grad_norm=np.nan
+
+ self.ckpt_total_reward=np.nan
+ self.total_reward=0# init to 0, but dont ckpt before end of an epi
+ self.total_reward_ma=np.nan
+ self.ma_window=100
+ # store current and best reward_ma for model checkpointing and early termination if all the environments are solved
+ self.best_reward_ma=-np.inf
+ self.eval_reward_ma=np.nan
+
+ # dataframes to track data for analysis.analyze_session
+ # track training data per episode
+ self.train_df=pd.DataFrame(columns=[
+ 'epi','t','wall_t','opt_step','frame','fps','total_reward','avg_return','avg_len','avg_success','loss','lr',
+ 'explore_var','entropy_coef','entropy','grad_norm'])
+ # track eval data within run_eval. the same as train_df except for reward
+ self.eval_df=self.train_df.copy()
+
+ # the specific agent-env interface variables for a body
+ self.observation_space=self.env.observation_space
+ self.action_space=self.env.action_space
+ self.observable_dim=self.env.observable_dim
+ self.state_dim=self.observable_dim['state']
+ self.action_dim=self.env.action_dim
+ self.is_discrete=self.env.is_discrete
+ # set the ActionPD class for sampling action
+ self.action_type=policy_util.get_action_type(self.action_space)
+ self.action_pdtype=agent_spec[self.a]['algorithm'].get('action_pdtype')
+ ifself.action_pdtypein(None,'default'):
+ self.action_pdtype=policy_util.ACTION_PDS[self.action_type][0]
+ self.ActionPD=policy_util.get_action_pd_cls(self.action_pdtype,self.action_type)
+
+
[docs]defupdate(self,state,action,reward,next_state,done):
+ '''Interface update method for body at agent.update()'''
+ ifhasattr(self.env.u_env,'raw_reward'):# use raw_reward if reward is preprocessed
+ reward=self.env.u_env.raw_reward
+ ifself.ckpt_total_rewardisnp.nan:# init
+ self.ckpt_total_reward=reward
+ else:# reset on epi_start, else keep adding. generalized for vec env
+ self.ckpt_total_reward=self.ckpt_total_reward*(1-self.epi_start)+reward
+ self.total_reward=done*self.ckpt_total_reward+(1-done)*self.total_reward
+ self.epi_start=done
[docs]deflog_summary(self,df_mode):
+ '''
+ Log the summary for this body when its environment is done
+ @param str:df_mode 'train' or 'eval'
+ '''
+ prefix=self.get_log_prefix()
+ df=getattr(self,f'{df_mode}_df')
+ last_row=df.iloc[-1]
+ row_str=' '.join([f'{k}: {v:g}'fork,vinlast_row.items()])
+ msg=f'{prefix} [{df_mode}_df] {row_str}'
+ logger.info(msg)
Source code for convlab.agent.algorithm.actor_critic
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+importnumpyasnp
+importpydashasps
+importtorch
+
+fromconvlab.agentimportnet
+fromconvlab.agent.algorithmimportpolicy_util
+fromconvlab.agent.algorithm.reinforceimportReinforce
+fromconvlab.agent.netimportnet_util
+fromconvlab.libimportlogger,math_util,util
+fromconvlab.lib.decoratorimportlab_api
+
+logger=logger.get_logger(__name__)
+
+
+
[docs]classActorCritic(Reinforce):
+ '''
+ Implementation of single threaded Advantage Actor Critic
+ Original paper: "Asynchronous Methods for Deep Reinforcement Learning"
+ https://arxiv.org/abs/1602.01783
+ Algorithm specific spec param:
+ memory.name: batch (through OnPolicyBatchReplay memory class) or episodic through (OnPolicyReplay memory class)
+ lam: if not null, used as the lambda value of generalized advantage estimation (GAE) introduced in "High-Dimensional Continuous Control Using Generalized Advantage Estimation https://arxiv.org/abs/1506.02438. This lambda controls the bias variance tradeoff for GAE. Floating point value between 0 and 1. Lower values correspond to more bias, less variance. Higher values to more variance, less bias. Algorithm becomes A2C(GAE).
+ num_step_returns: if lam is null and this is not null, specifies the number of steps for N-step returns from "Asynchronous Methods for Deep Reinforcement Learning". The algorithm becomes A2C(Nstep).
+ If both lam and num_step_returns are null, use the default TD error. Then the algorithm stays as AC.
+ net.type: whether the actor and critic should share params (e.g. through 'MLPNetShared') or have separate params (e.g. through 'MLPNetSeparate'). If param sharing is used then there is also the option to control the weight given to the policy and value components of the loss function through 'policy_loss_coef' and 'val_loss_coef'
+ Algorithm - separate actor and critic:
+ Repeat:
+ 1. Collect k examples
+ 2. Train the critic network using these examples
+ 3. Calculate the advantage of each example using the critic
+ 4. Multiply the advantage by the negative of log probability of the action taken, and sum all the values. This is the policy loss.
+ 5. Calculate the gradient the parameters of the actor network with respect to the policy loss
+ 6. Update the actor network parameters using the gradient
+ Algorithm - shared parameters:
+ Repeat:
+ 1. Collect k examples
+ 2. Calculate the target for each example for the critic
+ 3. Compute current estimate of state-value for each example using the critic
+ 4. Calculate the critic loss using a regression loss (e.g. square loss) between the target and estimate of the state-value for each example
+ 5. Calculate the advantage of each example using the rewards and critic
+ 6. Multiply the advantage by the negative of log probability of the action taken, and sum all the values. This is the policy loss.
+ 7. Compute the total loss by summing the value and policy lossses
+ 8. Calculate the gradient of the parameters of shared network with respect to the total loss
+ 9. Update the shared network parameters using the gradient
+
+ e.g. algorithm_spec
+ "algorithm": {
+ "name": "ActorCritic",
+ "action_pdtype": "default",
+ "action_policy": "default",
+ "explore_var_spec": null,
+ "gamma": 0.99,
+ "lam": 1.0,
+ "num_step_returns": 100,
+ "entropy_coef_spec": {
+ "name": "linear_decay",
+ "start_val": 0.01,
+ "end_val": 0.001,
+ "start_step": 100,
+ "end_step": 5000,
+ },
+ "policy_loss_coef": 1.0,
+ "val_loss_coef": 0.01,
+ "training_frequency": 1,
+ }
+
+ e.g. special net_spec param "shared" to share/separate Actor/Critic
+ "net": {
+ "type": "MLPNet",
+ "shared": true,
+ ...
+ '''
+
+
[docs]@lab_api
+ definit_algorithm_params(self):
+ '''Initialize other algorithm parameters'''
+ # set default
+ util.set_attr(self,dict(
+ action_pdtype='default',
+ action_policy='default',
+ explore_var_spec=None,
+ entropy_coef_spec=None,
+ policy_loss_coef=1.0,
+ val_loss_coef=1.0,
+ ))
+ util.set_attr(self,self.algorithm_spec,[
+ 'action_pdtype',
+ 'action_policy',
+ # theoretically, AC does not have policy update; but in this implementation we have such option
+ 'explore_var_spec',
+ 'gamma',# the discount factor
+ 'lam',
+ 'num_step_returns',
+ 'entropy_coef_spec',
+ 'policy_loss_coef',
+ 'val_loss_coef',
+ 'training_frequency',
+ ])
+ self.to_train=0
+ self.action_policy=getattr(policy_util,self.action_policy)
+ self.explore_var_scheduler=policy_util.VarScheduler(self.explore_var_spec)
+ self.body.explore_var=self.explore_var_scheduler.start_val
+ ifself.entropy_coef_specisnotNone:
+ self.entropy_coef_scheduler=policy_util.VarScheduler(self.entropy_coef_spec)
+ self.body.entropy_coef=self.entropy_coef_scheduler.start_val
+ # Select appropriate methods to calculate advs and v_targets for training
+ ifself.lamisnotNone:
+ self.calc_advs_v_targets=self.calc_gae_advs_v_targets
+ elifself.num_step_returnsisnotNone:
+ self.calc_advs_v_targets=self.calc_nstep_advs_v_targets
+ else:
+ self.calc_advs_v_targets=self.calc_ret_advs_v_targets
+
+
[docs]@lab_api
+ definit_nets(self,global_nets=None):
+ '''
+ Initialize the neural networks used to learn the actor and critic from the spec
+ Below we automatically select an appropriate net based on two different conditions
+ 1. If the action space is discrete or continuous action
+ - Networks for continuous action spaces have two heads and return two values, the first is a tensor containing the mean of the action policy, the second is a tensor containing the std deviation of the action policy. The distribution is assumed to be a Gaussian (Normal) distribution.
+ - Networks for discrete action spaces have a single head and return the logits for a categorical probability distribution over the discrete actions
+ 2. If the actor and critic are separate or share weights
+ - If the networks share weights then the single network returns a list.
+ - Continuous action spaces: The return list contains 3 elements: The first element contains the mean output for the actor (policy), the second element the std dev of the policy, and the third element is the state-value estimated by the network.
+ - Discrete action spaces: The return list contains 2 element. The first element is a tensor containing the logits for a categorical probability distribution over the actions. The second element contains the state-value estimated by the network.
+ 3. If the network type is feedforward, convolutional, or recurrent
+ - Feedforward and convolutional networks take a single state as input and require an OnPolicyReplay or OnPolicyBatchReplay memory
+ - Recurrent networks take n states as input and require env spec "frame_op": "concat", "frame_op_len": seq_len
+ '''
+ assert'shared'inself.net_spec,'Specify "shared" for ActorCritic network in net_spec'
+ self.shared=self.net_spec['shared']
+
+ # create actor/critic specific specs
+ actor_net_spec=self.net_spec.copy()
+ critic_net_spec=self.net_spec.copy()
+ forkinself.net_spec:
+ if'actor_'ink:
+ actor_net_spec[k.replace('actor_','')]=actor_net_spec.pop(k)
+ critic_net_spec.pop(k)
+ if'critic_'ink:
+ critic_net_spec[k.replace('critic_','')]=critic_net_spec.pop(k)
+ actor_net_spec.pop(k)
+ ifcritic_net_spec['use_same_optim']:
+ critic_net_spec=actor_net_spec
+
+ in_dim=self.body.state_dim
+ out_dim=net_util.get_out_dim(self.body,add_critic=self.shared)
+ # main actor network, may contain out_dim self.shared == True
+ NetClass=getattr(net,actor_net_spec['type'])
+ self.net=NetClass(actor_net_spec,in_dim,out_dim)
+ self.net_names=['net']
+ ifnotself.shared:# add separate network for critic
+ critic_out_dim=1
+ CriticNetClass=getattr(net,critic_net_spec['type'])
+ self.critic_net=CriticNetClass(critic_net_spec,in_dim,critic_out_dim)
+ self.net_names.append('critic_net')
+ # init net optimizer and its lr scheduler
+ self.optim=net_util.get_optim(self.net,self.net.optim_spec)
+ self.lr_scheduler=net_util.get_lr_scheduler(self.optim,self.net.lr_scheduler_spec)
+ ifnotself.shared:
+ self.critic_optim=net_util.get_optim(self.critic_net,self.critic_net.optim_spec)
+ self.critic_lr_scheduler=net_util.get_lr_scheduler(self.critic_optim,self.critic_net.lr_scheduler_spec)
+ net_util.set_global_nets(self,global_nets)
+ self.post_init_nets()
+
+
[docs]@lab_api
+ defcalc_pdparam(self,x,net=None):
+ '''
+ The pdparam will be the logits for discrete prob. dist., or the mean and std for continuous prob. dist.
+ '''
+ out=super().calc_pdparam(x,net=net)
+ ifself.shared:
+ assertps.is_list(out),f'Shared output should be a list [pdparam, v]'
+ iflen(out)==2:# single policy
+ pdparam=out[0]
+ else:# multiple-task policies, still assumes 1 value
+ pdparam=out[:-1]
+ self.v_pred=out[-1].view(-1)# cache for loss calc to prevent double-pass
+ else:# out is pdparam
+ pdparam=out
+ returnpdparam
+
+
[docs]defcalc_v(self,x,net=None,use_cache=True):
+ '''
+ Forward-pass to calculate the predicted state-value from critic_net.
+ '''
+ ifself.shared:# output: policy, value
+ ifuse_cache:# uses cache from calc_pdparam to prevent double-pass
+ v_pred=self.v_pred
+ else:
+ net=self.netifnetisNoneelsenet
+ v_pred=net(x)[-1].view(-1)
+ else:
+ net=self.critic_netifnetisNoneelsenet
+ v_pred=net(x).view(-1)
+ returnv_pred
+
+
[docs]defcalc_pdparam_v(self,batch):
+ '''Efficiently forward to get pdparam and v by batch for loss computation'''
+ states=batch['states']
+ ifself.body.env.is_venv:
+ states=math_util.venv_unpack(states)
+ pdparam=self.calc_pdparam(states)
+ v_pred=self.calc_v(states)# uses self.v_pred from calc_pdparam if self.shared
+ returnpdparam,v_pred
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+fromabcimportABC,abstractmethod
+
+importnumpyasnp
+importpydashasps
+
+fromconvlab.agent.netimportnet_util
+fromconvlab.libimportlogger,util
+fromconvlab.lib.decoratorimportlab_api
+
+logger=logger.get_logger(__name__)
+
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+
[docs]classAlgorithm(ABC):
+ '''
+ Abstract class ancestor to all Algorithms,
+ specifies the necessary design blueprint for agent to work in Lab.
+ Mostly, implement just the abstract methods and properties.
+ '''
+
+ def__init__(self,agent,global_nets=None):
+ '''
+ @param {*} agent is the container for algorithm and related components, and interfaces with env.
+ '''
+ self.agent=agent
+ self.algorithm_spec=agent.agent_spec['algorithm']
+ self.name=self.algorithm_spec['name']
+ self.net_spec=agent.agent_spec.get('net',None)
+ ifps.get(agent.agent_spec,'memory'):
+ self.memory_spec=agent.agent_spec['memory']
+ self.body=self.agent.body
+ self.init_algorithm_params()
+ self.init_nets(global_nets)
+ logger.info(util.self_desc(self))
+
+
[docs]@abstractmethod
+ @lab_api
+ definit_nets(self,global_nets=None):
+ '''Initialize the neural network from the spec'''
+ raiseNotImplementedError
+
+
[docs]@lab_api
+ defpost_init_nets(self):
+ '''
+ Method to conditionally load models.
+ Call at the end of init_nets() after setting self.net_names
+ '''
+ asserthasattr(self,'net_names')
+ ifutil.in_eval_lab_modes():
+ logger.info(f'Loaded algorithm models for lab_mode: {util.get_lab_mode()}')
+ self.load()
+ else:
+ logger.info(f'Initialized algorithm models for lab_mode: {util.get_lab_mode()}')
+
+
[docs]@lab_api
+ defcalc_pdparam(self,x,evaluate=True,net=None):
+ '''
+ To get the pdparam for action policy sampling, do a forward pass of the appropriate net, and pick the correct outputs.
+ The pdparam will be the logits for discrete prob. dist., or the mean and std for continuous prob. dist.
+ '''
+ raiseNotImplementedError
+
+
[docs]defnanflat_to_data_a(self,data_name,nanflat_data_a):
+ '''Reshape nanflat_data_a, e.g. action_a, from a single pass back into the API-conforming data_a'''
+ data_names=(data_name,)
+ data_a,=self.agent.agent_space.aeb_space.init_data_s(data_names,a=self.agent.a)
+ forbody,datainzip(self.agent.nanflat_body_a,nanflat_data_a):
+ e,b=body.e,body.b
+ data_a[(e,b)]=data
+ returndata_a
[docs]@lab_api
+ defsave(self,ckpt=None):
+ '''Save net models for algorithm given the required property self.net_names'''
+ ifnothasattr(self,'net_names'):
+ logger.info('No net declared in self.net_names in init_nets(); no models to save.')
+ else:
+ net_util.save_algorithm(self,ckpt=ckpt)
+
+
[docs]@lab_api
+ defload(self):
+ '''Load net models for algorithm given the required property self.net_names'''
+ ifnothasattr(self,'net_names'):
+ logger.info('No net declared in self.net_names in init_nets(); no models to load.')
+ else:
+ net_util.load_algorithm(self)
+ # set decayable variables to final values
+ fork,vinvars(self).items():
+ ifk.endswith('_scheduler'):
+ var_name=k.replace('_scheduler','')
+ ifhasattr(v,'end_val'):
+ setattr(self.body,var_name,v.end_val)
+
+ # NOTE optional extension for multi-agent-env
+
+
[docs]@lab_api
+ defspace_act(self,state_a):
+ '''Interface-level agent act method for all its bodies. Resolves state to state; get action and compose into action.'''
+ data_names=('action',)
+ action_a,=self.agent.agent_space.aeb_space.init_data_s(data_names,a=self.agent.a)
+ foreb,bodyinutil.ndenumerate_nonan(self.agent.body_a):
+ state=state_a[eb]
+ self.body=body
+ action_a[eb]=self.act(state)
+ # set body reference back to default
+ self.body=self.agent.nanflat_body_a[0]
+ returnaction_a
+
+
[docs]@lab_api
+ defspace_sample(self):
+ '''Samples a batch from memory'''
+ batches=[]
+ forbodyinself.agent.nanflat_body_a:
+ self.body=body
+ batches.append(self.sample())
+ # set body reference back to default
+ self.body=self.agent.nanflat_body_a[0]
+ batch=util.concat_batches(batches)
+ batch=util.to_torch_batch(batch,self.net.device,self.body.memory.is_episodic)
+ returnbatch
+
+
[docs]@lab_api
+ defspace_train(self):
+ ifutil.in_eval_lab_modes():
+ returnnp.nan
+ losses=[]
+ forbodyinself.agent.nanflat_body_a:
+ self.body=body
+ losses.append(self.train())
+ # set body reference back to default
+ self.body=self.agent.nanflat_body_a[0]
+ loss_a=self.nanflat_to_data_a('loss',losses)
+ returnloss_a
+
+
[docs]@lab_api
+ defspace_update(self):
+ explore_vars=[]
+ forbodyinself.agent.nanflat_body_a:
+ self.body=body
+ explore_vars.append(self.update())
+ # set body reference back to default
+ self.body=self.agent.nanflat_body_a[0]
+ explore_var_a=self.nanflat_to_data_a('explore_var',explore_vars)
+ returnexplore_var_a
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+importnumpyasnp
+importtorch
+
+fromconvlab.agentimportmemory
+fromconvlab.agentimportnet
+fromconvlab.agent.algorithm.sarsaimportSARSA
+fromconvlab.agent.netimportnet_util
+fromconvlab.libimportlogger,util
+fromconvlab.lib.decoratorimportlab_api
+
+logger=logger.get_logger(__name__)
+
+
+
[docs]classVanillaDQN(SARSA):
+ '''
+ Implementation of a simple DQN algorithm.
+ Algorithm:
+ 1. Collect some examples by acting in the environment and store them in a replay memory
+ 2. Every K steps sample N examples from replay memory
+ 3. For each example calculate the target (bootstrapped estimate of the discounted value of the state and action taken), y, using a neural network to approximate the Q function. s' is the next state following the action actually taken.
+ y_t = r_t + gamma * argmax_a Q(s_t', a)
+ 4. For each example calculate the current estimate of the discounted value of the state and action taken
+ x_t = Q(s_t, a_t)
+ 5. Calculate L(x, y) where L is a regression loss (eg. mse)
+ 6. Calculate the gradient of L with respect to all the parameters in the network and update the network parameters using the gradient
+ 7. Repeat steps 3 - 6 M times
+ 8. Repeat steps 2 - 7 Z times
+ 9. Repeat steps 1 - 8
+
+ For more information on Q-Learning see Sergey Levine's lectures 6 and 7 from CS294-112 Fall 2017
+ https://www.youtube.com/playlist?list=PLkFD6_40KJIznC9CDbVTjAF2oyt8_VAe3
+
+ e.g. algorithm_spec
+ "algorithm": {
+ "name": "VanillaDQN",
+ "action_pdtype": "Argmax",
+ "action_policy": "epsilon_greedy",
+ "explore_var_spec": {
+ "name": "linear_decay",
+ "start_val": 1.0,
+ "end_val": 0.1,
+ "start_step": 10,
+ "end_step": 1000,
+ },
+ "gamma": 0.99,
+ "training_batch_iter": 8,
+ "training_iter": 4,
+ "training_frequency": 10,
+ "training_start_step": 10,
+ }
+ '''
+
+
[docs]@lab_api
+ definit_algorithm_params(self):
+ # set default
+ util.set_attr(self,dict(
+ action_pdtype='Argmax',
+ action_policy='epsilon_greedy',
+ explore_var_spec=None,
+ ))
+ util.set_attr(self,self.algorithm_spec,[
+ 'action_pdtype',
+ 'action_policy',
+ # explore_var is epsilon, tau or etc. depending on the action policy
+ # these control the trade off between exploration and exploitaton
+ 'explore_var_spec',
+ 'gamma',# the discount factor
+ 'training_batch_iter',# how many gradient updates per batch
+ 'training_iter',# how many batches to train each time
+ 'training_frequency',# how often to train (once a few timesteps)
+ 'training_start_step',# how long before starting training
+ ])
+ super().init_algorithm_params()
+
+
[docs]@lab_api
+ definit_nets(self,global_nets=None):
+ '''Initialize the neural network used to learn the Q function from the spec'''
+ ifself.algorithm_spec['name']=='VanillaDQN':
+ assertall(knotinself.net_specforkin['update_type','update_frequency','polyak_coef']),'Network update not available for VanillaDQN; use DQN.'
+ in_dim=self.body.state_dim
+ out_dim=net_util.get_out_dim(self.body)
+ NetClass=getattr(net,self.net_spec['type'])
+ self.net=NetClass(self.net_spec,in_dim,out_dim)
+ self.net_names=['net']
+ # init net optimizer and its lr scheduler
+ self.optim=net_util.get_optim(self.net,self.net.optim_spec)
+ self.lr_scheduler=net_util.get_lr_scheduler(self.optim,self.net.lr_scheduler_spec)
+ net_util.set_global_nets(self,global_nets)
+ self.post_init_nets()
+
+
[docs]defcalc_q_loss(self,batch):
+ '''Compute the Q value loss using predicted and target Q values from the appropriate networks'''
+ states=batch['states']
+ next_states=batch['next_states']
+ q_preds=self.net(states)
+ withtorch.no_grad():
+ next_q_preds=self.net(next_states)
+ act_q_preds=q_preds.gather(-1,batch['actions'].long().unsqueeze(-1)).squeeze(-1)
+ # Bellman equation: compute max_q_targets using reward and max estimated Q values (0 if no next_state)
+ max_next_q_preds,_=next_q_preds.max(dim=-1,keepdim=True)
+ max_q_targets=batch['rewards']+self.gamma*(1-batch['dones'])*max_next_q_preds
+ logger.debug(f'act_q_preds: {act_q_preds}\nmax_q_targets: {max_q_targets}')
+ q_loss=self.net.loss_fn(act_q_preds,max_q_targets)
+
+ # TODO use the same loss_fn but do not reduce yet
+ if'Prioritized'inutil.get_class_name(self.body.memory):# PER
+ errors=(max_q_targets-act_q_preds.detach()).abs().cpu().numpy()
+ self.body.memory.update_priorities(errors)
+ returnq_loss
+
+
[docs]@lab_api
+ defact(self,state):
+ '''Selects and returns a discrete action for body using the action policy'''
+ returnsuper().act(state)
+
+
[docs]@lab_api
+ defsample(self):
+ '''Samples a batch from memory of size self.memory_spec['batch_size']'''
+ batch=self.body.memory.sample()
+ batch=util.to_torch_batch(batch,self.net.device,self.body.memory.is_episodic)
+ returnbatch
+
+
[docs]@lab_api
+ deftrain(self):
+ '''
+ Completes one training step for the agent if it is time to train.
+ i.e. the environment timestep is greater than the minimum training timestep and a multiple of the training_frequency.
+ Each training step consists of sampling n batches from the agent's memory.
+ For each of the batches, the target Q values (q_targets) are computed and a single training step is taken k times
+ Otherwise this function does nothing.
+ '''
+ ifutil.in_eval_lab_modes():
+ returnnp.nan
+ clock=self.body.env.clock
+ ifself.to_train==1:
+ total_loss=torch.tensor(0.0)
+ for_inrange(self.training_iter):
+ batch=self.sample()
+ clock.set_batch_size(len(batch))
+ for_inrange(self.training_batch_iter):
+ loss=self.calc_q_loss(batch)
+ self.net.train_step(loss,self.optim,self.lr_scheduler,clock=clock,global_net=self.global_net)
+ total_loss+=loss
+ loss=total_loss/(self.training_iter*self.training_batch_iter)
+ # reset
+ self.to_train=0
+ logger.debug(f'Trained {self.name} at epi: {clock.epi}, frame: {clock.frame}, t: {clock.t}, total_reward so far: {self.body.total_reward}, loss: {loss:g}')
+ returnloss.item()
+ else:
+ returnnp.nan
+
+
[docs]@lab_api
+ defupdate(self):
+ '''Update the agent after training'''
+ returnsuper().update()
+
+
+
[docs]classDQNBase(VanillaDQN):
+ '''
+ Implementation of the base DQN algorithm.
+ The algorithm follows the same general approach as VanillaDQN but is more general since it allows
+ for two different networks (through self.net and self.target_net).
+
+ self.net is used to act, and is the network trained.
+ self.target_net is used to estimate the maximum value of the Q-function in the next state when calculating the target (see VanillaDQN comments).
+ self.target_net is updated periodically to either match self.net (self.net.update_type = "replace") or to be a weighted average of self.net and the previous self.target_net (self.net.update_type = "polyak")
+ If desired, self.target_net can be updated slowly, and this can help to stabilize learning.
+
+ It also allows for different nets to be used to select the action in the next state and to evaluate the value of that action through self.online_net and self.eval_net. This can help reduce the tendency of DQN's to overestimate the value of the Q-function. Following this approach leads to the DoubleDQN algorithm.
+
+ Setting all nets to self.net reduces to the VanillaDQN case.
+ '''
+
+
[docs]@lab_api
+ definit_nets(self,global_nets=None):
+ '''Initialize networks'''
+ ifself.algorithm_spec['name']=='DQNBase':
+ assertall(knotinself.net_specforkin['update_type','update_frequency','polyak_coef']),'Network update not available for DQNBase; use DQN.'
+ in_dim=self.body.state_dim
+ out_dim=net_util.get_out_dim(self.body)
+ NetClass=getattr(net,self.net_spec['type'])
+ self.net=NetClass(self.net_spec,in_dim,out_dim)
+ self.target_net=NetClass(self.net_spec,in_dim,out_dim)
+ self.net_names=['net','target_net']
+ # init net optimizer and its lr scheduler
+ self.optim=net_util.get_optim(self.net,self.net.optim_spec)
+ self.lr_scheduler=net_util.get_lr_scheduler(self.optim,self.net.lr_scheduler_spec)
+ net_util.set_global_nets(self,global_nets)
+ self.post_init_nets()
+ self.online_net=self.target_net
+ self.eval_net=self.target_net
+
+
[docs]defcalc_q_loss(self,batch):
+ '''Compute the Q value loss using predicted and target Q values from the appropriate networks'''
+ states=batch['states']
+ next_states=batch['next_states']
+ q_preds=self.net(states)
+ withtorch.no_grad():
+ # Use online_net to select actions in next state
+ online_next_q_preds=self.online_net(next_states)
+ # Use eval_net to calculate next_q_preds for actions chosen by online_net
+ next_q_preds=self.eval_net(next_states)
+ act_q_preds=q_preds.gather(-1,batch['actions'].long().unsqueeze(-1)).squeeze(-1)
+ online_actions=online_next_q_preds.argmax(dim=-1,keepdim=True)
+ max_next_q_preds=next_q_preds.gather(-1,online_actions).squeeze(-1)
+ max_q_targets=batch['rewards']+self.gamma*(1-batch['dones'])*max_next_q_preds
+ logger.debug(f'act_q_preds: {act_q_preds}\nmax_q_targets: {max_q_targets}')
+ q_loss=self.net.loss_fn(act_q_preds,max_q_targets)
+
+ # TODO use the same loss_fn but do not reduce yet
+ if'Prioritized'inutil.get_class_name(self.body.memory):# PER
+ errors=(max_q_targets-act_q_preds.detach()).abs().cpu().numpy()
+ self.body.memory.update_priorities(errors)
+ returnq_loss
+
+
[docs]defupdate_nets(self):
+ ifutil.frame_mod(self.body.env.clock.frame,self.net.update_frequency,self.body.env.num_envs):
+ ifself.net.update_type=='replace':
+ net_util.copy(self.net,self.target_net)
+ elifself.net.update_type=='polyak':
+ net_util.polyak_update(self.net,self.target_net,self.net.polyak_coef)
+ else:
+ raiseValueError('Unknown net.update_type. Should be "replace" or "polyak". Exiting.')
+
+
[docs]@lab_api
+ defupdate(self):
+ '''Updates self.target_net and the explore variables'''
+ self.update_nets()
+ returnsuper().update()
Source code for convlab.agent.algorithm.policy_util
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+importnumpyasnp
+importtorch
+importtorch.nn.functionalasF
+# Action policy module
+# Constructs action probability distribution used by agent to sample action and calculate log_prob, entropy, etc.
+fromgymimportspaces
+fromtorchimportdistributions
+
+# from convlab.env.wrapper import LazyFrames
+fromconvlab.libimportdistribution,logger,math_util,util
+
+logger=logger.get_logger(__name__)
+
+# register custom distributions
+setattr(distributions,'Argmax',distribution.Argmax)
+setattr(distributions,'GumbelCategorical',distribution.GumbelCategorical)
+setattr(distributions,'MultiCategorical',distribution.MultiCategorical)
+# probability distributions constraints for different action types; the first in the list is the default
+ACTION_PDS={
+ 'continuous':['Normal','Beta','Gumbel','LogNormal'],
+ 'multi_continuous':['MultivariateNormal'],
+ 'discrete':['Categorical','Argmax','GumbelCategorical'],
+ 'multi_discrete':['MultiCategorical'],
+ 'multi_binary':['Bernoulli'],
+}
+
+
+
[docs]defget_action_type(action_space):
+ '''Method to get the action type to choose prob. dist. to sample actions from NN logits output'''
+ ifisinstance(action_space,spaces.Box):
+ shape=action_space.shape
+ assertlen(shape)==1
+ ifshape[0]==1:
+ return'continuous'
+ else:
+ return'multi_continuous'
+ elifisinstance(action_space,spaces.Discrete):
+ return'discrete'
+ elifisinstance(action_space,spaces.MultiDiscrete):
+ return'multi_discrete'
+ elifisinstance(action_space,spaces.MultiBinary):
+ return'multi_binary'
+ else:
+ raiseNotImplementedError
+
+
+# action_policy base methods
+
+
[docs]defget_action_pd_cls(action_pdtype,action_type):
+ '''
+ Verify and get the action prob. distribution class for construction
+ Called by body at init to set its own ActionPD
+ '''
+ pdtypes=ACTION_PDS[action_type]
+ assertaction_pdtypeinpdtypes,f'Pdtype {action_pdtype} is not compatible/supported with action_type {action_type}. Options are: {pdtypes}'
+ ActionPD=getattr(distributions,action_pdtype)
+ returnActionPD
+
+
+
[docs]defguard_tensor(state,body):
+ '''Guard-cast tensor before being input to network'''
+ # if isinstance(state, LazyFrames):
+ # state = state.__array__() # realize data
+ state=torch.from_numpy(state.astype(np.float32))
+ ifnotbody.env.is_venvorutil.in_eval_lab_modes():
+ # singleton state, unsqueeze as minibatch for net input
+ state=state.unsqueeze(dim=0)
+ returnstate
+
+
+
[docs]defcalc_pdparam(state,algorithm,body):
+ '''
+ Prepare the state and run algorithm.calc_pdparam to get pdparam for action_pd
+ @param tensor:state For pdparam = net(state)
+ @param algorithm The algorithm containing self.net
+ @param body Body which links algorithm to the env which the action is for
+ @returns tensor:pdparam
+ @example
+
+ pdparam = calc_pdparam(state, algorithm, body)
+ action_pd = ActionPD(logits=pdparam) # e.g. ActionPD is Categorical
+ action = action_pd.sample()
+ '''
+ ifnottorch.is_tensor(state):# dont need to cast from numpy
+ state=guard_tensor(state,body)
+ state=state.to(algorithm.net.device)
+ pdparam=algorithm.calc_pdparam(state)
+ returnpdparam
+
+
+
[docs]definit_action_pd(ActionPD,pdparam):
+ '''
+ Initialize the action_pd for discrete or continuous actions:
+ - discrete: action_pd = ActionPD(logits)
+ - continuous: action_pd = ActionPD(loc, scale)
+ '''
+ if'logits'inActionPD.arg_constraints:# discrete
+ action_pd=ActionPD(logits=pdparam)
+ else:# continuous, args = loc and scale
+ ifisinstance(pdparam,list):# split output
+ loc,scale=pdparam
+ else:
+ loc,scale=pdparam.transpose(0,1)
+ # scale (stdev) must be > 0, use softplus with positive
+ scale=F.softplus(scale)+1e-8
+ ifisinstance(pdparam,list):# split output
+ # construct covars from a batched scale tensor
+ covars=torch.diag_embed(scale)
+ action_pd=ActionPD(loc=loc,covariance_matrix=covars)
+ else:
+ action_pd=ActionPD(loc=loc,scale=scale)
+ returnaction_pd
+
+
+
[docs]defsample_action(ActionPD,pdparam):
+ '''
+ Convenience method to sample action(s) from action_pd = ActionPD(pdparam)
+ Works with batched pdparam too
+ @returns tensor:action Sampled action(s)
+ @example
+
+ # policy contains:
+ pdparam = calc_pdparam(state, algorithm, body)
+ action = sample_action(body.ActionPD, pdparam)
+ '''
+ action_pd=init_action_pd(ActionPD,pdparam)
+ action=action_pd.sample()
+ returnaction
+
+
+# action_policy used by agent
+
+
+
[docs]defdefault(state,algorithm,body):
+ '''Plain policy by direct sampling from a default action probability defined by body.ActionPD'''
+ pdparam=calc_pdparam(state,algorithm,body)
+ action=sample_action(body.ActionPD,pdparam)
+ returnaction
+
+
+
[docs]defrandom(state,algorithm,body):
+ '''Random action using gym.action_space.sample(), with the same format as default()'''
+ ifbody.env.is_venvandnotutil.in_eval_lab_modes():
+ _action=[body.action_space.sample()for_inrange(body.env.num_envs)]
+ else:
+ _action=body.action_space.sample()
+ action=torch.tensor([_action])
+ returnaction
+
+
+
[docs]defepsilon_greedy(state,algorithm,body):
+ '''Epsilon-greedy policy: with probability epsilon, do random action, otherwise do default sampling.'''
+ epsilon=body.explore_var
+ ifepsilon>np.random.rand():
+ returnrandom(state,algorithm,body)
+ else:
+ returndefault(state,algorithm,body)
+
+
+
[docs]defboltzmann(state,algorithm,body):
+ '''
+ Boltzmann policy: adjust pdparam with temperature tau; the higher the more randomness/noise in action.
+ '''
+ tau=body.explore_var
+ pdparam=calc_pdparam(state,algorithm,body)
+ pdparam/=tau
+ action=sample_action(body.ActionPD,pdparam)
+ returnaction
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+fromcopyimportdeepcopy
+
+importnumpyasnp
+importtorch
+
+fromconvlab.agent.algorithmimportpolicy_util
+fromconvlab.agent.algorithm.actor_criticimportActorCritic
+fromconvlab.agent.netimportnet_util
+fromconvlab.libimportlogger,math_util,util
+fromconvlab.lib.decoratorimportlab_api
+
+logger=logger.get_logger(__name__)
+
+
+
[docs]classPPO(ActorCritic):
+ '''
+ Implementation of PPO
+ This is actually just ActorCritic with a custom loss function
+ Original paper: "Proximal Policy Optimization Algorithms"
+ https://arxiv.org/pdf/1707.06347.pdf
+
+ Adapted from OpenAI baselines, CPU version https://github.com/openai/baselines/tree/master/baselines/ppo1
+ Algorithm:
+ for iteration = 1, 2, 3, ... do
+ for actor = 1, 2, 3, ..., N do
+ run policy pi_old in env for T timesteps
+ compute advantage A_1, ..., A_T
+ end for
+ optimize surrogate L wrt theta, with K epochs and minibatch size M <= NT
+ end for
+
+ e.g. algorithm_spec
+ "algorithm": {
+ "name": "PPO",
+ "action_pdtype": "default",
+ "action_policy": "default",
+ "explore_var_spec": null,
+ "gamma": 0.99,
+ "lam": 1.0,
+ "clip_eps_spec": {
+ "name": "linear_decay",
+ "start_val": 0.01,
+ "end_val": 0.001,
+ "start_step": 100,
+ "end_step": 5000,
+ },
+ "entropy_coef_spec": {
+ "name": "linear_decay",
+ "start_val": 0.01,
+ "end_val": 0.001,
+ "start_step": 100,
+ "end_step": 5000,
+ },
+ "minibatch_size": 256,
+ "training_frequency": 1,
+ "training_epoch": 8,
+ }
+
+ e.g. special net_spec param "shared" to share/separate Actor/Critic
+ "net": {
+ "type": "MLPNet",
+ "shared": true,
+ ...
+ '''
+
+
[docs]@lab_api
+ definit_algorithm_params(self):
+ '''Initialize other algorithm parameters'''
+ # set default
+ util.set_attr(self,dict(
+ action_pdtype='default',
+ action_policy='default',
+ explore_var_spec=None,
+ entropy_coef_spec=None,
+ minibatch_size=4,
+ val_loss_coef=1.0,
+ ))
+ util.set_attr(self,self.algorithm_spec,[
+ 'action_pdtype',
+ 'action_policy',
+ # theoretically, PPO does not have policy update; but in this implementation we have such option
+ 'explore_var_spec',
+ 'gamma',
+ 'lam',
+ 'clip_eps_spec',
+ 'entropy_coef_spec',
+ 'val_loss_coef',
+ 'minibatch_size',
+ 'training_frequency',# horizon
+ 'training_epoch',
+ ])
+ self.to_train=0
+ self.action_policy=getattr(policy_util,self.action_policy)
+ self.explore_var_scheduler=policy_util.VarScheduler(self.explore_var_spec)
+ self.body.explore_var=self.explore_var_scheduler.start_val
+ # extra variable decays for PPO
+ self.clip_eps_scheduler=policy_util.VarScheduler(self.clip_eps_spec)
+ self.body.clip_eps=self.clip_eps_scheduler.start_val
+ ifself.entropy_coef_specisnotNone:
+ self.entropy_coef_scheduler=policy_util.VarScheduler(self.entropy_coef_spec)
+ self.body.entropy_coef=self.entropy_coef_scheduler.start_val
+ # PPO uses GAE
+ self.calc_advs_v_targets=self.calc_gae_advs_v_targets
+
+
[docs]@lab_api
+ definit_nets(self,global_nets=None):
+ '''PPO uses old and new to calculate ratio for loss'''
+ super().init_nets(global_nets)
+ # create old net to calculate ratio
+ self.old_net=deepcopy(self.net)
+ assertid(self.old_net)!=id(self.net)
+
+
[docs]defcalc_policy_loss(self,batch,pdparams,advs):
+ '''
+ The PPO loss function (subscript t is omitted)
+ L^{CLIP+VF+S} = E[ L^CLIP - c1 * L^VF + c2 * S[pi](s) ]
+
+ Breakdown piecewise,
+ 1. L^CLIP = E[ min(ratio * A, clip(ratio, 1-eps, 1+eps) * A) ]
+ where ratio = pi(a|s) / pi_old(a|s)
+
+ 2. L^VF = E[ mse(V(s_t), V^target) ]
+
+ 3. S = E[ entropy ]
+ '''
+ clip_eps=self.body.clip_eps
+ action_pd=policy_util.init_action_pd(self.body.ActionPD,pdparams)
+ states=batch['states']
+ actions=batch['actions']
+ ifself.body.env.is_venv:
+ states=math_util.venv_unpack(states)
+ actions=math_util.venv_unpack(actions)
+
+ # L^CLIP
+ log_probs=action_pd.log_prob(actions)
+ withtorch.no_grad():
+ old_pdparams=self.calc_pdparam(states,net=self.old_net)
+ old_action_pd=policy_util.init_action_pd(self.body.ActionPD,old_pdparams)
+ old_log_probs=old_action_pd.log_prob(actions)
+ assertlog_probs.shape==old_log_probs.shape
+ ratios=torch.exp(log_probs-old_log_probs)# clip to prevent overflow
+ logger.debug(f'ratios: {ratios}')
+ sur_1=ratios*advs
+ sur_2=torch.clamp(ratios,1.0-clip_eps,1.0+clip_eps)*advs
+ # flip sign because need to maximize
+ clip_loss=-torch.min(sur_1,sur_2).mean()
+ logger.debug(f'clip_loss: {clip_loss}')
+
+ # L^VF (inherit from ActorCritic)
+
+ # S entropy bonus
+ entropy=action_pd.entropy().mean()
+ self.body.mean_entropy=entropy# update logging variable
+ ent_penalty=-self.body.entropy_coef*entropy
+ logger.debug(f'ent_penalty: {ent_penalty}')
+
+ policy_loss=clip_loss+ent_penalty
+ logger.debug(f'PPO Actor policy loss: {policy_loss:g}')
+ returnpolicy_loss
+
+
[docs]deftrain(self):
+ ifutil.in_eval_lab_modes():
+ returnnp.nan
+ clock=self.body.env.clock
+ ifself.to_train==1:
+ net_util.copy(self.net,self.old_net)# update old net
+ batch=self.sample()
+ clock.set_batch_size(len(batch))
+ _pdparams,v_preds=self.calc_pdparam_v(batch)
+ advs,v_targets=self.calc_advs_v_targets(batch,v_preds)
+ # piggy back on batch, but remember to not pack or unpack
+ batch['advs'],batch['v_targets']=advs,v_targets
+ ifself.body.env.is_venv:# unpack if venv for minibatch sampling
+ fork,vinbatch.items():
+ ifknotin('advs','v_targets'):
+ batch[k]=math_util.venv_unpack(v)
+ total_loss=torch.tensor(0.0)
+ for_inrange(self.training_epoch):
+ minibatches=util.split_minibatch(batch,self.minibatch_size)
+ forminibatchinminibatches:
+ ifself.body.env.is_venv:# re-pack to restore proper shape
+ fork,vinminibatch.items():
+ ifknotin('advs','v_targets'):
+ minibatch[k]=math_util.venv_pack(v,self.body.env.num_envs)
+ advs,v_targets=minibatch['advs'],minibatch['v_targets']
+ pdparams,v_preds=self.calc_pdparam_v(minibatch)
+ policy_loss=self.calc_policy_loss(minibatch,pdparams,advs)# from actor
+ val_loss=self.calc_val_loss(v_preds,v_targets)# from critic
+ ifself.shared:# shared network
+ loss=policy_loss+val_loss
+ self.net.train_step(loss,self.optim,self.lr_scheduler,clock=clock,global_net=self.global_net)
+ else:
+ self.net.train_step(policy_loss,self.optim,self.lr_scheduler,clock=clock,global_net=self.global_net)
+ self.critic_net.train_step(val_loss,self.critic_optim,self.critic_lr_scheduler,clock=clock,global_net=self.global_critic_net)
+ loss=policy_loss+val_loss
+ total_loss+=loss
+ loss=total_loss/self.training_epoch/len(minibatches)
+ # reset
+ self.to_train=0
+ logger.debug(f'Trained {self.name} at epi: {clock.epi}, frame: {clock.frame}, t: {clock.t}, total_reward so far: {self.body.total_reward}, loss: {loss:g}')
+ returnloss.item()
+ else:
+ returnnp.nan
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+importnumpyasnp
+
+# The random agent algorithm
+# For basic dev purpose
+fromconvlab.agent.algorithm.baseimportAlgorithm
+fromconvlab.libimportlogger,util
+fromconvlab.lib.decoratorimportlab_api
+
+logger=logger.get_logger(__name__)
+
+
+
[docs]classRandom(Algorithm):
+ '''
+ Example Random agent that works in both discrete and continuous envs
+ '''
+
+
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+importnumpyasnp
+
+fromconvlab.agentimportnet
+fromconvlab.agent.algorithmimportpolicy_util
+fromconvlab.agent.algorithm.baseimportAlgorithm
+fromconvlab.agent.netimportnet_util
+fromconvlab.libimportlogger,math_util,util
+fromconvlab.lib.decoratorimportlab_api
+
+logger=logger.get_logger(__name__)
+
+
+
[docs]classReinforce(Algorithm):
+ '''
+ Implementation of REINFORCE (Williams, 1992) with baseline for discrete or continuous actions http://www-anw.cs.umass.edu/~barto/courses/cs687/williams92simple.pdf
+ Adapted from https://github.com/pytorch/examples/blob/master/reinforcement_learning/reinforce.py
+ Algorithm:
+ 0. Collect n episodes of data
+ 1. At each timestep in an episode
+ - Calculate the advantage of that timestep
+ - Multiply the advantage by the negative of the log probability of the action taken
+ 2. Sum all the values above.
+ 3. Calculate the gradient of this value with respect to all of the parameters of the network
+ 4. Update the network parameters using the gradient
+
+ e.g. algorithm_spec:
+ "algorithm": {
+ "name": "Reinforce",
+ "action_pdtype": "default",
+ "action_policy": "default",
+ "explore_var_spec": null,
+ "gamma": 0.99,
+ "entropy_coef_spec": {
+ "name": "linear_decay",
+ "start_val": 0.01,
+ "end_val": 0.001,
+ "start_step": 100,
+ "end_step": 5000,
+ },
+ "training_frequency": 1,
+ }
+ '''
+
+
[docs]@lab_api
+ definit_algorithm_params(self):
+ '''Initialize other algorithm parameters'''
+ # set default
+ util.set_attr(self,dict(
+ action_pdtype='default',
+ action_policy='default',
+ explore_var_spec=None,
+ entropy_coef_spec=None,
+ policy_loss_coef=1.0,
+ ))
+ util.set_attr(self,self.algorithm_spec,[
+ 'action_pdtype',
+ 'action_policy',
+ # theoretically, REINFORCE does not have policy update; but in this implementation we have such option
+ 'explore_var_spec',
+ 'gamma',# the discount factor
+ 'entropy_coef_spec',
+ 'policy_loss_coef',
+ 'training_frequency',
+ ])
+ self.to_train=0
+ self.action_policy=getattr(policy_util,self.action_policy)
+ self.explore_var_scheduler=policy_util.VarScheduler(self.explore_var_spec)
+ self.body.explore_var=self.explore_var_scheduler.start_val
+ ifself.entropy_coef_specisnotNone:
+ self.entropy_coef_scheduler=policy_util.VarScheduler(self.entropy_coef_spec)
+ self.body.entropy_coef=self.entropy_coef_scheduler.start_val
+
+
[docs]@lab_api
+ definit_nets(self,global_nets=None):
+ '''
+ Initialize the neural network used to learn the policy function from the spec
+ Below we automatically select an appropriate net for a discrete or continuous action space if the setting is of the form 'MLPNet'. Otherwise the correct type of network is assumed to be specified in the spec.
+ Networks for continuous action spaces have two heads and return two values, the first is a tensor containing the mean of the action policy, the second is a tensor containing the std deviation of the action policy. The distribution is assumed to be a Gaussian (Normal) distribution.
+ Networks for discrete action spaces have a single head and return the logits for a categorical probability distribution over the discrete actions
+ '''
+ in_dim=self.body.state_dim
+ out_dim=net_util.get_out_dim(self.body)
+ NetClass=getattr(net,self.net_spec['type'])
+ self.net=NetClass(self.net_spec,in_dim,out_dim)
+ self.net_names=['net']
+ # init net optimizer and its lr scheduler
+ self.optim=net_util.get_optim(self.net,self.net.optim_spec)
+ self.lr_scheduler=net_util.get_lr_scheduler(self.optim,self.net.lr_scheduler_spec)
+ net_util.set_global_nets(self,global_nets)
+ self.post_init_nets()
+
+
[docs]@lab_api
+ defcalc_pdparam(self,x,net=None):
+ '''
+ The pdparam will be the logits for discrete prob. dist., or the mean and std for continuous prob. dist.
+ '''
+ net=self.netifnetisNoneelsenet
+ pdparam=net(x)
+ returnpdparam
[docs]@lab_api
+ defsample(self):
+ '''Samples a batch from memory'''
+ batch=self.body.memory.sample()
+ batch=util.to_torch_batch(batch,self.net.device,self.body.memory.is_episodic)
+ returnbatch
+
+
[docs]defcalc_pdparam_batch(self,batch):
+ '''Efficiently forward to get pdparam and by batch for loss computation'''
+ states=batch['states']
+ ifself.body.env.is_venv:
+ states=math_util.venv_unpack(states)
+ pdparam=self.calc_pdparam(states)
+ returnpdparam
+
+
[docs]defcalc_ret_advs(self,batch):
+ '''Calculate plain returns; which is generalized to advantage in ActorCritic'''
+ rets=math_util.calc_returns(batch['rewards'],batch['dones'],self.gamma)
+ advs=rets
+ ifself.body.env.is_venv:
+ advs=math_util.venv_unpack(advs)
+ logger.debug(f'advs: {advs}')
+ returnadvs
[docs]classWarmUpReinforce(Reinforce):
+ '''
+ Implementation of REINFORCE (Williams, 1992) with baseline for discrete or continuous actions http://www-anw.cs.umass.edu/~barto/courses/cs687/williams92simple.pdf
+ Adapted from https://github.com/pytorch/examples/blob/master/reinforcement_learning/reinforce.py
+ Algorithm:
+ 0. Collect n episodes of data
+ 1. At each timestep in an episode
+ - Calculate the advantage of that timestep
+ - Multiply the advantage by the negative of the log probability of the action taken
+ 2. Sum all the values above.
+ 3. Calculate the gradient of this value with respect to all of the parameters of the network
+ 4. Update the network parameters using the gradient
+
+ e.g. algorithm_spec:
+ "algorithm": {
+ "name": "Reinforce",
+ "action_pdtype": "default",
+ "action_policy": "default",
+ "warmup_epi": 300,
+ "explore_var_spec": null,
+ "gamma": 0.99,
+ "entropy_coef_spec": {
+ "name": "linear_decay",
+ "start_val": 0.01,
+ "end_val": 0.001,
+ "start_step": 100,
+ "end_step": 5000,
+ },
+ "training_frequency": 1,
+ }
+ '''
+ def__init__(self,agent,global_nets=None):
+ super().__init__(agent,global_nets)
+ util.set_attr(self,self.algorithm_spec,[
+ 'warmup_epi',
+ ])
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+importnumpyasnp
+importtorch
+
+fromconvlab.agentimportnet
+fromconvlab.agent.algorithmimportpolicy_util
+fromconvlab.agent.algorithm.baseimportAlgorithm
+fromconvlab.agent.netimportnet_util
+fromconvlab.libimportlogger,math_util,util
+fromconvlab.lib.decoratorimportlab_api
+
+logger=logger.get_logger(__name__)
+
+
+
[docs]classSARSA(Algorithm):
+ '''
+ Implementation of SARSA.
+
+ Algorithm:
+ Repeat:
+ 1. Collect some examples by acting in the environment and store them in an on policy replay memory (either batch or episodic)
+ 2. For each example calculate the target (bootstrapped estimate of the discounted value of the state and action taken), y, using a neural network to approximate the Q function. s_t' is the next state following the action actually taken, a_t. a_t' is the action actually taken in the next state s_t'.
+ y_t = r_t + gamma * Q(s_t', a_t')
+ 4. For each example calculate the current estimate of the discounted value of the state and action taken
+ x_t = Q(s_t, a_t)
+ 5. Calculate L(x, y) where L is a regression loss (eg. mse)
+ 6. Calculate the gradient of L with respect to all the parameters in the network and update the network parameters using the gradient
+
+ e.g. algorithm_spec
+ "algorithm": {
+ "name": "SARSA",
+ "action_pdtype": "default",
+ "action_policy": "boltzmann",
+ "explore_var_spec": {
+ "name": "linear_decay",
+ "start_val": 1.0,
+ "end_val": 0.1,
+ "start_step": 10,
+ "end_step": 1000,
+ },
+ "gamma": 0.99,
+ "training_frequency": 10,
+ }
+ '''
+
+
[docs]@lab_api
+ definit_algorithm_params(self):
+ '''Initialize other algorithm parameters.'''
+ # set default
+ util.set_attr(self,dict(
+ action_pdtype='default',
+ action_policy='default',
+ explore_var_spec=None,
+ ))
+ util.set_attr(self,self.algorithm_spec,[
+ 'action_pdtype',
+ 'action_policy',
+ # explore_var is epsilon, tau or etc. depending on the action policy
+ # these control the trade off between exploration and exploitaton
+ 'explore_var_spec',
+ 'gamma',# the discount factor
+ 'training_frequency',# how often to train for batch training (once each training_frequency time steps)
+ ])
+ self.to_train=0
+ self.action_policy=getattr(policy_util,self.action_policy)
+ self.explore_var_scheduler=policy_util.VarScheduler(self.explore_var_spec)
+ self.body.explore_var=self.explore_var_scheduler.start_val
+
+
[docs]@lab_api
+ definit_nets(self,global_nets=None):
+ '''Initialize the neural network used to learn the Q function from the spec'''
+ if'Recurrent'inself.net_spec['type']:
+ self.net_spec.update(seq_len=self.net_spec['seq_len'])
+ in_dim=self.body.state_dim
+ out_dim=net_util.get_out_dim(self.body)
+ NetClass=getattr(net,self.net_spec['type'])
+ self.net=NetClass(self.net_spec,in_dim,out_dim)
+ self.net_names=['net']
+ # init net optimizer and its lr scheduler
+ self.optim=net_util.get_optim(self.net,self.net.optim_spec)
+ self.lr_scheduler=net_util.get_lr_scheduler(self.optim,self.net.lr_scheduler_spec)
+ net_util.set_global_nets(self,global_nets)
+ self.post_init_nets()
+
+
[docs]@lab_api
+ defcalc_pdparam(self,x,net=None):
+ '''
+ To get the pdparam for action policy sampling, do a forward pass of the appropriate net, and pick the correct outputs.
+ The pdparam will be the logits for discrete prob. dist., or the mean and std for continuous prob. dist.
+ '''
+ net=self.netifnetisNoneelsenet
+ pdparam=net(x)
+ returnpdparam
+
+
[docs]@lab_api
+ defact(self,state):
+ '''Note, SARSA is discrete-only'''
+ body=self.body
+ action=self.action_policy(state,self,body)
+ returnaction.cpu().squeeze().numpy()# squeeze to handle scalar
+
+
[docs]@lab_api
+ defsample(self):
+ '''Samples a batch from memory'''
+ batch=self.body.memory.sample()
+ # this is safe for next_action at done since the calculated act_next_q_preds will be multiplied by (1 - batch['dones'])
+ batch['next_actions']=np.zeros_like(batch['actions'])
+ batch['next_actions'][:-1]=batch['actions'][1:]
+ batch=util.to_torch_batch(batch,self.net.device,self.body.memory.is_episodic)
+ returnbatch
+
+
[docs]defcalc_q_loss(self,batch):
+ '''Compute the Q value loss using predicted and target Q values from the appropriate networks'''
+ states=batch['states']
+ next_states=batch['next_states']
+ ifself.body.env.is_venv:
+ states=math_util.venv_unpack(states)
+ next_states=math_util.venv_unpack(next_states)
+ q_preds=self.net(states)
+ withtorch.no_grad():
+ next_q_preds=self.net(next_states)
+ ifself.body.env.is_venv:
+ q_preds=math_util.venv_pack(q_preds,self.body.env.num_envs)
+ next_q_preds=math_util.venv_pack(next_q_preds,self.body.env.num_envs)
+ act_q_preds=q_preds.gather(-1,batch['actions'].long().unsqueeze(-1)).squeeze(-1)
+ act_next_q_preds=next_q_preds.gather(-1,batch['next_actions'].long().unsqueeze(-1)).squeeze(-1)
+ act_q_targets=batch['rewards']+self.gamma*(1-batch['dones'])*act_next_q_preds
+ logger.debug(f'act_q_preds: {act_q_preds}\nact_q_targets: {act_q_targets}')
+ q_loss=self.net.loss_fn(act_q_preds,act_q_targets)
+ returnq_loss
+
+
[docs]@lab_api
+ deftrain(self):
+ '''
+ Completes one training step for the agent if it is time to train.
+ Otherwise this function does nothing.
+ '''
+ ifutil.in_eval_lab_modes():
+ returnnp.nan
+ clock=self.body.env.clock
+ ifself.to_train==1:
+ batch=self.sample()
+ clock.set_batch_size(len(batch))
+ loss=self.calc_q_loss(batch)
+ self.net.train_step(loss,self.optim,self.lr_scheduler,clock=clock,global_net=self.global_net)
+ # reset
+ self.to_train=0
+ logger.debug(f'Trained {self.name} at epi: {clock.epi}, frame: {clock.frame}, t: {clock.t}, total_reward so far: {self.body.total_reward}, loss: {loss:g}')
+ returnloss.item()
+ else:
+ returnnp.nan
+
+
[docs]@lab_api
+ defupdate(self):
+ '''Update the agent after training'''
+ self.body.explore_var=self.explore_var_scheduler.update(self,self.body.env.clock)
+ returnself.body.explore_var
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+fromabcimportABC,abstractmethod
+
+fromconvlab.libimportlogger
+
+logger=logger.get_logger(__name__)
+
+
+
[docs]classMemory(ABC):
+ '''Abstract Memory class to define the API methods'''
+
+ def__init__(self,memory_spec,body):
+ '''
+ @param {*} body is the unit that stores its experience in this memory. Each body has a distinct memory.
+ '''
+ self.memory_spec=memory_spec
+ self.body=body
+ # declare what data keys to store
+ self.data_keys=['states','actions','rewards','next_states','dones','priorities']
+
+
[docs]@abstractmethod
+ defreset(self):
+ '''Method to fully reset the memory storage and related variables'''
+ raiseNotImplementedError
+
+
[docs]@abstractmethod
+ defupdate(self,state,action,reward,next_state,done):
+ '''Implement memory update given the full info from the latest timestep. NOTE: guard for np.nan reward and done when individual env resets.'''
+ raiseNotImplementedError
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+fromconvlab.agent.memory.baseimportMemory
+fromconvlab.libimportlogger,util
+fromconvlab.lib.decoratorimportlab_api
+
+logger=logger.get_logger(__name__)
+
+
+
[docs]classOnPolicyReplay(Memory):
+ '''
+ Stores agent experiences and returns them in a batch for agent training.
+
+ An experience consists of
+ - state: representation of a state
+ - action: action taken
+ - reward: scalar value
+ - next state: representation of next state (should be same as state)
+ - done: 0 / 1 representing if the current state is the last in an episode
+
+ The memory does not have a fixed size. Instead the memory stores data from N episodes, where N is determined by the user. After N episodes, all of the examples are returned to the agent to learn from.
+
+ When the examples are returned to the agent, the memory is cleared to prevent the agent from learning from off policy experiences. This memory is intended for on policy algorithms.
+
+ Differences vs. Replay memory:
+ - Experiences are nested into episodes. In Replay experiences are flat, and episode is not tracked
+ - The entire memory constitues a batch. In Replay batches are sampled from memory.
+ - The memory is cleared automatically when a batch is given to the agent.
+
+ e.g. memory_spec
+ "memory": {
+ "name": "OnPolicyReplay"
+ }
+ '''
+
+ def__init__(self,memory_spec,body):
+ super().__init__(memory_spec,body)
+ # NOTE for OnPolicy replay, frequency = episode; for other classes below frequency = frames
+ util.set_attr(self,self.body.agent.agent_spec['algorithm'],['training_frequency'])
+ # Don't want total experiences reset when memory is
+ self.is_episodic=True
+ self.size=0# total experiences stored
+ self.seen_size=0# total experiences seen cumulatively
+ # declare what data keys to store
+ self.data_keys=['states','actions','rewards','next_states','dones']
+ self.reset()
+
+
[docs]@lab_api
+ defreset(self):
+ '''Resets the memory. Also used to initialize memory vars'''
+ forkinself.data_keys:
+ setattr(self,k,[])
+ self.cur_epi_data={k:[]forkinself.data_keys}
+ self.most_recent=(None,)*len(self.data_keys)
+ self.size=0
[docs]defadd_experience(self,state,action,reward,next_state,done):
+ '''Interface helper method for update() to add experience to memory'''
+ self.most_recent=(state,action,reward,next_state,done)
+ foridx,kinenumerate(self.data_keys):
+ self.cur_epi_data[k].append(self.most_recent[idx])
+ # If episode ended, add to memory and clear cur_epi_data
+ ifutil.epi_done(done):
+ forkinself.data_keys:
+ getattr(self,k).append(self.cur_epi_data[k])
+ self.cur_epi_data={k:[]forkinself.data_keys}
+ # If agent has collected the desired number of episodes, it is ready to train
+ # length is num of epis due to nested structure
+ # if len(self.states) == self.body.agent.algorithm.training_frequency:
+ iflen(self.states)%self.body.agent.algorithm.training_frequency==0:
+ self.body.agent.algorithm.to_train=1
+ # Track memory size and num experiences
+ self.size+=1
+ self.seen_size+=1
+
+
[docs]defget_most_recent_experience(self):
+ '''Returns the most recent experience'''
+ returnself.most_recent
+
+
[docs]defsample(self):
+ '''
+ Returns all the examples from memory in a single batch. Batch is stored as a dict.
+ Keys are the names of the different elements of an experience. Values are nested lists of the corresponding sampled elements. Elements are nested into episodes
+ e.g.
+ batch = {
+ 'states' : [[s_epi1], [s_epi2], ...],
+ 'actions' : [[a_epi1], [a_epi2], ...],
+ 'rewards' : [[r_epi1], [r_epi2], ...],
+ 'next_states': [[ns_epi1], [ns_epi2], ...],
+ 'dones' : [[d_epi1], [d_epi2], ...]}
+ '''
+ batch={k:getattr(self,k)forkinself.data_keys}
+ self.reset()
+ returnbatch
+
+
+
[docs]classOnPolicyBatchReplay(OnPolicyReplay):
+ '''
+ Same as OnPolicyReplay Memory with the following difference.
+
+ The memory does not have a fixed size. Instead the memory stores data from N experiences, where N is determined by the user. After N experiences or if an episode has ended, all of the examples are returned to the agent to learn from.
+
+ In contrast, OnPolicyReplay stores entire episodes and stores them in a nested structure. OnPolicyBatchReplay stores experiences in a flat structure.
+
+ e.g. memory_spec
+ "memory": {
+ "name": "OnPolicyBatchReplay"
+ }
+ * batch_size is training_frequency provided by algorithm_spec
+ '''
+
+ def__init__(self,memory_spec,body):
+ super().__init__(memory_spec,body)
+ self.is_episodic=False
+
+
[docs]defadd_experience(self,state,action,reward,next_state,done):
+ '''Interface helper method for update() to add experience to memory'''
+ self.most_recent=[state,action,reward,next_state,done]
+ foridx,kinenumerate(self.data_keys):
+ getattr(self,k).append(self.most_recent[idx])
+ # Track memory size and num experiences
+ self.size+=1
+ self.seen_size+=1
+ # Decide if agent is to train
+ iflen(self.states)==self.body.agent.algorithm.training_frequency:
+ self.body.agent.algorithm.to_train=1
+
+
[docs]defsample(self):
+ '''
+ Returns all the examples from memory in a single batch. Batch is stored as a dict.
+ Keys are the names of the different elements of an experience. Values are a list of the corresponding sampled elements
+ e.g.
+ batch = {
+ 'states' : states,
+ 'actions' : actions,
+ 'rewards' : rewards,
+ 'next_states': next_states,
+ 'dones' : dones}
+ '''
+ returnsuper().sample()
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+importrandom
+
+importnumpyasnp
+
+fromconvlab.agent.memory.replayimportReplay
+fromconvlab.libimportutil
+
+
+
[docs]classSumTree:
+ '''
+ Helper class for PrioritizedReplay
+
+ This implementation is, with minor adaptations, Jaromír Janisch's. The license is reproduced below.
+ For more information see his excellent blog series "Let's make a DQN" https://jaromiru.com/2016/09/27/lets-make-a-dqn-theory/
+
+ MIT License
+
+ Copyright (c) 2018 Jaromír Janisch
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+ '''
+ write=0
+
+ def__init__(self,capacity):
+ self.capacity=capacity
+ self.tree=np.zeros(2*capacity-1)# Stores the priorities and sums of priorities
+ self.indices=np.zeros(capacity)# Stores the indices of the experiences
+
+ def_propagate(self,idx,change):
+ parent=(idx-1)//2
+
+ self.tree[parent]+=change
+
+ ifparent!=0:
+ self._propagate(parent,change)
+
+ def_retrieve(self,idx,s):
+ left=2*idx+1
+ right=left+1
+
+ ifleft>=len(self.tree):
+ returnidx
+
+ ifs<=self.tree[left]:
+ returnself._retrieve(left,s)
+ else:
+ returnself._retrieve(right,s-self.tree[left])
+
+
[docs]classPrioritizedReplay(Replay):
+ '''
+ Prioritized Experience Replay
+
+ Implementation follows the approach in the paper "Prioritized Experience Replay", Schaul et al 2015" https://arxiv.org/pdf/1511.05952.pdf and is Jaromír Janisch's with minor adaptations.
+ See memory_util.py for the license and link to Jaromír's excellent blog
+
+ Stores agent experiences and samples from them for agent training according to each experience's priority
+
+ The memory has the same behaviour and storage structure as Replay memory with the addition of a SumTree to store and sample the priorities.
+
+ e.g. memory_spec
+ "memory": {
+ "name": "PrioritizedReplay",
+ "alpha": 1,
+ "epsilon": 0,
+ "batch_size": 32,
+ "max_size": 10000,
+ "use_cer": true
+ }
+ '''
+
+ def__init__(self,memory_spec,body):
+ util.set_attr(self,memory_spec,[
+ 'alpha',
+ 'epsilon',
+ 'batch_size',
+ 'max_size',
+ 'use_cer',
+ ])
+ super().__init__(memory_spec,body)
+
+ self.epsilon=np.full((1,),self.epsilon)
+ self.alpha=np.full((1,),self.alpha)
+ # adds a 'priorities' scalar to the data_keys and call reset again
+ self.data_keys=['states','actions','rewards','next_states','dones','priorities']
+ self.reset()
+
+
[docs]defadd_experience(self,state,action,reward,next_state,done,error=100000):
+ '''
+ Implementation for update() to add experience to memory, expanding the memory size if necessary.
+ All experiences are added with a high priority to increase the likelihood that they are sampled at least once.
+ '''
+ super().add_experience(state,action,reward,next_state,done)
+ priority=self.get_priority(error)
+ self.priorities[self.head]=priority
+ self.tree.add(priority,self.head)
+
+
[docs]defget_priority(self,error):
+ '''Takes in the error of one or more examples and returns the proportional priority'''
+ returnnp.power(error+self.epsilon,self.alpha).squeeze()
+
+
[docs]defsample_idxs(self,batch_size):
+ '''Samples batch_size indices from memory in proportional to their priority.'''
+ batch_idxs=np.zeros(batch_size)
+ tree_idxs=np.zeros(batch_size,dtype=np.int)
+
+ foriinrange(batch_size):
+ s=random.uniform(0,self.tree.total())
+ (tree_idx,p,idx)=self.tree.get(s)
+ batch_idxs[i]=idx
+ tree_idxs[i]=tree_idx
+
+ batch_idxs=np.asarray(batch_idxs).astype(int)
+ self.tree_idxs=tree_idxs
+ ifself.use_cer:# add the latest sample
+ batch_idxs[-1]=self.head
+ returnbatch_idxs
+
+
[docs]defupdate_priorities(self,errors):
+ '''
+ Updates the priorities from the most recent batch
+ Assumes the relevant batch indices are stored in self.batch_idxs
+ '''
+ priorities=self.get_priority(errors)
+ assertlen(priorities)==self.batch_idxs.size
+ foridx,pinzip(self.batch_idxs,priorities):
+ self.priorities[idx]=p
+ forp,iinzip(priorities,self.tree_idxs):
+ self.tree.update(i,p)
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+importnumpyasnp
+
+fromconvlab.agent.memory.baseimportMemory
+fromconvlab.libimportlogger,util
+fromconvlab.lib.decoratorimportlab_api
+
+logger=logger.get_logger(__name__)
+
+
+
[docs]defsample_next_states(head,max_size,ns_idx_offset,batch_idxs,states,ns_buffer):
+ '''Method to sample next_states from states, with proper guard for next_state idx being out of bound'''
+ # idxs for next state is state idxs with offset, modded
+ ns_batch_idxs=(batch_idxs+ns_idx_offset)%max_size
+ # if head < ns_idx <= head + ns_idx_offset, ns is stored in ns_buffer
+ ns_batch_idxs=ns_batch_idxs%max_size
+ buffer_ns_locs=np.argwhere(
+ (head<ns_batch_idxs)&(ns_batch_idxs<=head+ns_idx_offset)).flatten()
+ # find if there is any idxs to get from buffer
+ to_replace=buffer_ns_locs.size!=0
+ ifto_replace:
+ # extract the buffer_idxs first for replacement later
+ # given head < ns_idx <= head + offset, and valid buffer idx is [0, offset)
+ # get 0 < ns_idx - head <= offset, or equiv.
+ # get -1 < ns_idx - head - 1 <= offset - 1, i.e.
+ # get 0 <= ns_idx - head - 1 < offset, hence:
+ buffer_idxs=ns_batch_idxs[buffer_ns_locs]-head-1
+ # set them to 0 first to allow sampling, then replace later with buffer
+ ns_batch_idxs[buffer_ns_locs]=0
+ # guard all against overrun idxs from offset
+ ns_batch_idxs=ns_batch_idxs%max_size
+ next_states=util.batch_get(states,ns_batch_idxs)
+ ifto_replace:
+ # now replace using buffer_idxs and ns_buffer
+ buffer_ns=util.batch_get(ns_buffer,buffer_idxs)
+ next_states[buffer_ns_locs]=buffer_ns
+ returnnext_states
+
+
+
[docs]classReplay(Memory):
+ '''
+ Stores agent experiences and samples from them for agent training
+
+ An experience consists of
+ - state: representation of a state
+ - action: action taken
+ - reward: scalar value
+ - next state: representation of next state (should be same as state)
+ - done: 0 / 1 representing if the current state is the last in an episode
+
+ The memory has a size of N. When capacity is reached, the oldest experience
+ is deleted to make space for the lastest experience.
+ - This is implemented as a circular buffer so that inserting experiences are O(1)
+ - Each element of an experience is stored as a separate array of size N * element dim
+
+ When a batch of experiences is requested, K experiences are sampled according to a random uniform distribution.
+
+ If 'use_cer', sampling will add the latest experience.
+
+ e.g. memory_spec
+ "memory": {
+ "name": "Replay",
+ "batch_size": 32,
+ "max_size": 10000,
+ "use_cer": true
+ }
+ '''
+
+ def__init__(self,memory_spec,body):
+ super().__init__(memory_spec,body)
+ util.set_attr(self,self.memory_spec,[
+ 'batch_size',
+ 'max_size',
+ 'use_cer',
+ ])
+ self.is_episodic=False
+ self.batch_idxs=None
+ self.size=0# total experiences stored
+ self.seen_size=0# total experiences seen cumulatively
+ self.head=-1# index of most recent experience
+ # generic next_state buffer to store last next_states (allow for multiple for venv)
+ # self.ns_idx_offset = self.body.env.num_envs if body.env.is_venv else 1
+ # self.ns_buffer = deque(maxlen=self.ns_idx_offset)
+ # declare what data keys to store
+ self.data_keys=['states','actions','rewards','next_states','dones']
+ self.reset()
+
+
[docs]defreset(self):
+ '''Initializes the memory arrays, size and head pointer'''
+ # set self.states, self.actions, ...
+ forkinself.data_keys:
+ setattr(self,k,[None]*self.max_size)
+ # if k != 'next_states': # reuse self.states
+ # # list add/sample is over 10x faster than np, also simpler to handle
+ # setattr(self, k, [None] * self.max_size)
+ self.size=0
+ self.head=-1
[docs]defadd_experience(self,state,action,reward,next_state,done):
+ '''Implementation for update() to add experience to memory, expanding the memory size if necessary'''
+ # Move head pointer. Wrap around if necessary
+ self.head=(self.head+1)%self.max_size
+ self.states[self.head]=state.astype(np.float16)
+ self.actions[self.head]=action
+ self.rewards[self.head]=reward
+ self.next_states[self.head]=next_state
+ # self.ns_buffer.append(next_state.astype(np.float16))
+ self.dones[self.head]=done
+
+ # Actually occupied size of memory
+ ifself.size<self.max_size:
+ self.size+=1
+ self.seen_size+=1
+ # set to_train using memory counters head, seen_size instead of tick since clock will step by num_envs when on venv; to_train will be set to 0 after training step
+ algorithm=self.body.agent.algorithm
+ algorithm.to_train=algorithm.to_trainor(self.seen_size>algorithm.training_start_stepandself.head%algorithm.training_frequency==0)
+
+
[docs]@lab_api
+ defsample(self):
+ '''
+ Returns a batch of batch_size samples. Batch is stored as a dict.
+ Keys are the names of the different elements of an experience. Values are an array of the corresponding sampled elements
+ e.g.
+ batch = {
+ 'states' : states,
+ 'actions' : actions,
+ 'rewards' : rewards,
+ 'next_states': next_states,
+ 'dones' : dones}
+ '''
+ self.batch_idxs=self.sample_idxs(self.batch_size)
+ batch={}
+ forkinself.data_keys:
+ batch[k]=util.batch_get(getattr(self,k),self.batch_idxs)
+ # if k == 'next_states':
+ # batch[k] = sample_next_states(self.head, self.max_size, self.ns_idx_offset, self.batch_idxs, self.states, self.ns_buffer)
+ # else:
+ # batch[k] = util.batch_get(getattr(self, k), self.batch_idxs)
+ returnbatch
+
+
[docs]defsample_idxs(self,batch_size):
+ '''Batch indices a sampled random uniformly'''
+ batch_idxs=np.random.randint(self.size,size=batch_size)
+ ifself.use_cer:# add the latest sample
+ batch_idxs[-1]=self.head
+ returnbatch_idxs
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+fromabcimportABC
+
+importpydashasps
+importtorch
+importtorch.nnasnn
+
+fromconvlab.agent.netimportnet_util
+
+
+
[docs]classNet(ABC):
+ '''Abstract Net class to define the API methods'''
+
+ def__init__(self,net_spec,in_dim,out_dim):
+ '''
+ @param {dict} net_spec is the spec for the net
+ @param {int|list} in_dim is the input dimension(s) for the network. Usually use in_dim=body.state_dim
+ @param {int|list} out_dim is the output dimension(s) for the network. Usually use out_dim=body.action_dim
+ '''
+ self.net_spec=net_spec
+ self.in_dim=in_dim
+ self.out_dim=out_dim
+ self.grad_norms=None# for debugging
+ ifself.net_spec.get('gpu'):
+ iftorch.cuda.device_count():
+ self.device=f'cuda:{net_spec.get("cuda_id", 0)}'
+ else:
+ self.device='cpu'
+ else:
+ self.device='cpu'
+
+
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+importpydashasps
+importtorch
+importtorch.nnasnn
+
+fromconvlab.agent.netimportnet_util
+fromconvlab.agent.net.baseimportNet
+fromconvlab.libimportmath_util,util
+
+
+
[docs]classConvNet(Net,nn.Module):
+ '''
+ Class for generating arbitrary sized convolutional neural network,
+ with optional batch normalization
+
+ Assumes that a single input example is organized into a 3D tensor.
+ The entire model consists of three parts:
+ 1. self.conv_model
+ 2. self.fc_model
+ 3. self.model_tails
+
+ e.g. net_spec
+ "net": {
+ "type": "ConvNet",
+ "shared": true,
+ "conv_hid_layers": [
+ [32, 8, 4, 0, 1],
+ [64, 4, 2, 0, 1],
+ [64, 3, 1, 0, 1]
+ ],
+ "fc_hid_layers": [512],
+ "hid_layers_activation": "relu",
+ "out_layer_activation": "tanh",
+ "init_fn": null,
+ "normalize": false,
+ "batch_norm": false,
+ "clip_grad_val": 1.0,
+ "loss_spec": {
+ "name": "SmoothL1Loss"
+ },
+ "optim_spec": {
+ "name": "Adam",
+ "lr": 0.02
+ },
+ "lr_scheduler_spec": {
+ "name": "StepLR",
+ "step_size": 30,
+ "gamma": 0.1
+ },
+ "update_type": "replace",
+ "update_frequency": 10000,
+ "polyak_coef": 0.9,
+ "gpu": true
+ }
+ '''
+
+ def__init__(self,net_spec,in_dim,out_dim):
+ '''
+ net_spec:
+ conv_hid_layers: list containing dimensions of the convolutional hidden layers, each is a list representing hid_layer = out_d, kernel, stride, padding, dilation.
+ Asssumed to all come before the flat layers.
+ Note: a convolutional layer should specify the in_channel, out_channels, kernel_size, stride (of kernel steps), padding, and dilation (spacing between kernel points) E.g. [3, 16, (5, 5), 1, 0, (2, 2)]
+ For more details, see http://pytorch.org/docs/master/nn.html#conv2d and https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
+ fc_hid_layers: list of fc layers following the convolutional layers
+ hid_layers_activation: activation function for the hidden layers
+ out_layer_activation: activation function for the output layer, same shape as out_dim
+ init_fn: weight initialization function
+ normalize: whether to divide by 255.0 to normalize image input
+ batch_norm: whether to add batch normalization after each convolutional layer, excluding the input layer.
+ clip_grad_val: clip gradient norm if value is not None
+ loss_spec: measure of error between model predictions and correct outputs
+ optim_spec: parameters for initializing the optimizer
+ lr_scheduler_spec: Pytorch optim.lr_scheduler
+ update_type: method to update network weights: 'replace' or 'polyak'
+ update_frequency: how many total timesteps per update
+ polyak_coef: ratio of polyak weight update
+ gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing
+ '''
+ assertlen(in_dim)==3# image shape (c,w,h)
+ nn.Module.__init__(self)
+ super().__init__(net_spec,in_dim,out_dim)
+ # set default
+ util.set_attr(self,dict(
+ out_layer_activation=None,
+ init_fn=None,
+ normalize=False,
+ batch_norm=True,
+ clip_grad_val=None,
+ loss_spec={'name':'MSELoss'},
+ optim_spec={'name':'Adam'},
+ lr_scheduler_spec=None,
+ update_type='replace',
+ update_frequency=1,
+ polyak_coef=0.0,
+ gpu=False,
+ ))
+ util.set_attr(self,self.net_spec,[
+ 'conv_hid_layers',
+ 'fc_hid_layers',
+ 'hid_layers_activation',
+ 'out_layer_activation',
+ 'init_fn',
+ 'normalize',
+ 'batch_norm',
+ 'clip_grad_val',
+ 'loss_spec',
+ 'optim_spec',
+ 'lr_scheduler_spec',
+ 'update_type',
+ 'update_frequency',
+ 'polyak_coef',
+ 'gpu',
+ ])
+
+ # conv body
+ self.conv_model=self.build_conv_layers(self.conv_hid_layers)
+ self.conv_out_dim=self.get_conv_output_size()
+
+ # fc body
+ ifps.is_empty(self.fc_hid_layers):
+ tail_in_dim=self.conv_out_dim
+ else:
+ # fc body from flattened conv
+ self.fc_model=net_util.build_fc_model([self.conv_out_dim]+self.fc_hid_layers,self.hid_layers_activation)
+ tail_in_dim=self.fc_hid_layers[-1]
+
+ # tails. avoid list for single-tail for compute speed
+ ifps.is_integer(self.out_dim):
+ self.model_tail=net_util.build_fc_model([tail_in_dim,self.out_dim],self.out_layer_activation)
+ else:
+ ifnotps.is_list(self.out_layer_activation):
+ self.out_layer_activation=[self.out_layer_activation]*len(out_dim)
+ assertlen(self.out_layer_activation)==len(self.out_dim)
+ tails=[]
+ forout_d,out_activinzip(self.out_dim,self.out_layer_activation):
+ tail=net_util.build_fc_model([tail_in_dim,out_d],out_activ)
+ tails.append(tail)
+ self.model_tails=nn.ModuleList(tails)
+
+ net_util.init_layers(self,self.init_fn)
+ self.loss_fn=net_util.get_loss_fn(self,self.loss_spec)
+ self.to(self.device)
+ self.train()
+
+
[docs]defget_conv_output_size(self):
+ '''Helper function to calculate the size of the flattened features after the final convolutional layer'''
+ withtorch.no_grad():
+ x=torch.ones(1,*self.in_dim)
+ x=self.conv_model(x)
+ returnx.numel()
+
+
[docs]defbuild_conv_layers(self,conv_hid_layers):
+ '''
+ Builds all of the convolutional layers in the network and store in a Sequential model
+ '''
+ conv_layers=[]
+ in_d=self.in_dim[0]# input channel
+ fori,hid_layerinenumerate(conv_hid_layers):
+ hid_layer=[tuple(e)ifps.is_list(e)elseeforeinhid_layer]# guard list-to-tuple
+ # hid_layer = out_d, kernel, stride, padding, dilation
+ conv_layers.append(nn.Conv2d(in_d,*hid_layer))
+ ifself.hid_layers_activationisnotNone:
+ conv_layers.append(net_util.get_activation_fn(self.hid_layers_activation))
+ # Don't include batch norm in the first layer
+ ifself.batch_normandi!=0:
+ conv_layers.append(nn.BatchNorm2d(in_d))
+ in_d=hid_layer[0]# update to out_d
+ conv_model=nn.Sequential(*conv_layers)
+ returnconv_model
+
+
[docs]defforward(self,x):
+ '''
+ The feedforward step
+ Note that PyTorch takes (c,h,w) but gym provides (h,w,c), so preprocessing must be done before passing to network
+ '''
+ ifself.normalize:
+ x=x/255.0
+ x=self.conv_model(x)
+ x=x.view(x.size(0),-1)# to (batch_size, -1)
+ ifhasattr(self,'fc_model'):
+ x=self.fc_model(x)
+ # return tensor if single tail, else list of tail tensors
+ ifhasattr(self,'model_tails'):
+ outs=[]
+ formodel_tailinself.model_tails:
+ outs.append(model_tail(x))
+ returnouts
+ else:
+ returnself.model_tail(x)
+
+
+
[docs]classDuelingConvNet(ConvNet):
+ '''
+ Class for generating arbitrary sized convolutional neural network,
+ with optional batch normalization, and with dueling heads. Intended for Q-Learning algorithms only.
+ Implementation based on "Dueling Network Architectures for Deep Reinforcement Learning" http://proceedings.mlr.press/v48/wangf16.pdf
+
+ Assumes that a single input example is organized into a 3D tensor.
+ The entire model consists of three parts:
+ 1. self.conv_model
+ 2. self.fc_model
+ 3. self.model_tails
+
+ e.g. net_spec
+ "net": {
+ "type": "DuelingConvNet",
+ "shared": true,
+ "conv_hid_layers": [
+ [32, 8, 4, 0, 1],
+ [64, 4, 2, 0, 1],
+ [64, 3, 1, 0, 1]
+ ],
+ "fc_hid_layers": [512],
+ "hid_layers_activation": "relu",
+ "init_fn": "xavier_uniform_",
+ "normalize": false,
+ "batch_norm": false,
+ "clip_grad_val": 1.0,
+ "loss_spec": {
+ "name": "SmoothL1Loss"
+ },
+ "optim_spec": {
+ "name": "Adam",
+ "lr": 0.02
+ },
+ "lr_scheduler_spec": {
+ "name": "StepLR",
+ "step_size": 30,
+ "gamma": 0.1
+ },
+ "update_type": "replace",
+ "update_frequency": 10000,
+ "polyak_coef": 0.9,
+ "gpu": true
+ }
+ '''
+
+ def__init__(self,net_spec,in_dim,out_dim):
+ assertlen(in_dim)==3# image shape (c,w,h)
+ nn.Module.__init__(self)
+ Net.__init__(self,net_spec,in_dim,out_dim)
+ # set default
+ util.set_attr(self,dict(
+ init_fn=None,
+ normalize=False,
+ batch_norm=False,
+ clip_grad_val=None,
+ loss_spec={'name':'MSELoss'},
+ optim_spec={'name':'Adam'},
+ lr_scheduler_spec=None,
+ update_type='replace',
+ update_frequency=1,
+ polyak_coef=0.0,
+ gpu=False,
+ ))
+ util.set_attr(self,self.net_spec,[
+ 'conv_hid_layers',
+ 'fc_hid_layers',
+ 'hid_layers_activation',
+ 'init_fn',
+ 'normalize',
+ 'batch_norm',
+ 'clip_grad_val',
+ 'loss_spec',
+ 'optim_spec',
+ 'lr_scheduler_spec',
+ 'update_type',
+ 'update_frequency',
+ 'polyak_coef',
+ 'gpu',
+ ])
+
+ # Guard against inappropriate algorithms and environments
+ assertisinstance(out_dim,int)
+
+ # conv body
+ self.conv_model=self.build_conv_layers(self.conv_hid_layers)
+ self.conv_out_dim=self.get_conv_output_size()
+
+ # fc body
+ ifps.is_empty(self.fc_hid_layers):
+ tail_in_dim=self.conv_out_dim
+ else:
+ # fc layer from flattened conv
+ self.fc_model=net_util.build_fc_model([self.conv_out_dim]+self.fc_hid_layers,self.hid_layers_activation)
+ tail_in_dim=self.fc_hid_layers[-1]
+
+ # tails. avoid list for single-tail for compute speed
+ self.v=nn.Linear(tail_in_dim,1)# state value
+ self.adv=nn.Linear(tail_in_dim,out_dim)# action dependent raw advantage
+ self.model_tails=nn.ModuleList(self.v,self.adv)
+
+ net_util.init_layers(self,self.init_fn)
+ self.loss_fn=net_util.get_loss_fn(self,self.loss_spec)
+ self.to(self.device)
+ self.train()
+
+
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+importnumpyasnp
+importpydashasps
+importtorch
+importtorch.nnasnn
+
+fromconvlab.agent.netimportnet_util
+fromconvlab.agent.net.baseimportNet
+fromconvlab.libimportmath_util,util
+
+
+
[docs]classMLPNet(Net,nn.Module):
+ '''
+ Class for generating arbitrary sized feedforward neural network
+ If more than 1 output tensors, will create a self.model_tails instead of making last layer part of self.model
+
+ e.g. net_spec
+ "net": {
+ "type": "MLPNet",
+ "shared": true,
+ "hid_layers": [32],
+ "hid_layers_activation": "relu",
+ "out_layer_activation": null,
+ "init_fn": "xavier_uniform_",
+ "clip_grad_val": 1.0,
+ "loss_spec": {
+ "name": "MSELoss"
+ },
+ "optim_spec": {
+ "name": "Adam",
+ "lr": 0.02
+ },
+ "lr_scheduler_spec": {
+ "name": "StepLR",
+ "step_size": 30,
+ "gamma": 0.1
+ },
+ "update_type": "replace",
+ "update_frequency": 1,
+ "polyak_coef": 0.9,
+ "gpu": true
+ }
+ '''
+
+ def__init__(self,net_spec,in_dim,out_dim):
+ '''
+ net_spec:
+ hid_layers: list containing dimensions of the hidden layers
+ hid_layers_activation: activation function for the hidden layers
+ out_layer_activation: activation function for the output layer, same shape as out_dim
+ init_fn: weight initialization function
+ clip_grad_val: clip gradient norm if value is not None
+ loss_spec: measure of error between model predictions and correct outputs
+ optim_spec: parameters for initializing the optimizer
+ lr_scheduler_spec: Pytorch optim.lr_scheduler
+ update_type: method to update network weights: 'replace' or 'polyak'
+ update_frequency: how many total timesteps per update
+ polyak_coef: ratio of polyak weight update
+ gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing
+ '''
+ nn.Module.__init__(self)
+ super().__init__(net_spec,in_dim,out_dim)
+ # set default
+ util.set_attr(self,dict(
+ out_layer_activation=None,
+ init_fn=None,
+ clip_grad_val=None,
+ loss_spec={'name':'MSELoss'},
+ optim_spec={'name':'Adam'},
+ lr_scheduler_spec=None,
+ update_type='replace',
+ update_frequency=1,
+ polyak_coef=0.0,
+ gpu=False,
+ ))
+ util.set_attr(self,self.net_spec,[
+ 'shared',
+ 'hid_layers',
+ 'hid_layers_activation',
+ 'out_layer_activation',
+ 'init_fn',
+ 'clip_grad_val',
+ 'loss_spec',
+ 'optim_spec',
+ 'lr_scheduler_spec',
+ 'update_type',
+ 'update_frequency',
+ 'polyak_coef',
+ 'gpu',
+ ])
+
+ dims=[self.in_dim]+self.hid_layers
+ self.model=net_util.build_fc_model(dims,self.hid_layers_activation)
+ # add last layer with no activation
+ # tails. avoid list for single-tail for compute speed
+ ifps.is_integer(self.out_dim):
+ self.model_tail=net_util.build_fc_model([dims[-1],self.out_dim],self.out_layer_activation)
+ else:
+ ifnotps.is_list(self.out_layer_activation):
+ self.out_layer_activation=[self.out_layer_activation]*len(out_dim)
+ assertlen(self.out_layer_activation)==len(self.out_dim)
+ tails=[]
+ forout_d,out_activinzip(self.out_dim,self.out_layer_activation):
+ tail=net_util.build_fc_model([dims[-1],out_d],out_activ)
+ tails.append(tail)
+ self.model_tails=nn.ModuleList(tails)
+
+ net_util.init_layers(self,self.init_fn)
+ self.loss_fn=net_util.get_loss_fn(self,self.loss_spec)
+ self.to(self.device)
+ self.train()
+
+
[docs]classHydraMLPNet(Net,nn.Module):
+ '''
+ Class for generating arbitrary sized feedforward neural network with multiple state and action heads, and a single shared body.
+
+ e.g. net_spec
+ "net": {
+ "type": "HydraMLPNet",
+ "shared": true,
+ "hid_layers": [
+ [[32],[32]], # 2 heads with hidden layers
+ [64], # body
+ [] # tail, no hidden layers
+ ],
+ "hid_layers_activation": "relu",
+ "out_layer_activation": null,
+ "init_fn": "xavier_uniform_",
+ "clip_grad_val": 1.0,
+ "loss_spec": {
+ "name": "MSELoss"
+ },
+ "optim_spec": {
+ "name": "Adam",
+ "lr": 0.02
+ },
+ "lr_scheduler_spec": {
+ "name": "StepLR",
+ "step_size": 30,
+ "gamma": 0.1
+ },
+ "update_type": "replace",
+ "update_frequency": 1,
+ "polyak_coef": 0.9,
+ "gpu": true
+ }
+ '''
+
+ def__init__(self,net_spec,in_dim,out_dim):
+ '''
+ Multi state processing heads, single shared body, and multi action tails.
+ There is one state and action head per body/environment
+ Example:
+
+ env 1 state env 2 state
+ _______|______ _______|______
+ | head 1 | | head 2 |
+ |______________| |______________|
+ | |
+ |__________________|
+ ________________|_______________
+ | Shared body |
+ |________________________________|
+ |
+ ________|_______
+ | |
+ _______|______ ______|_______
+ | tail 1 | | tail 2 |
+ |______________| |______________|
+ | |
+ env 1 action env 2 action
+ '''
+ nn.Module.__init__(self)
+ super().__init__(net_spec,in_dim,out_dim)
+ # set default
+ util.set_attr(self,dict(
+ out_layer_activation=None,
+ init_fn=None,
+ clip_grad_val=None,
+ loss_spec={'name':'MSELoss'},
+ optim_spec={'name':'Adam'},
+ lr_scheduler_spec=None,
+ update_type='replace',
+ update_frequency=1,
+ polyak_coef=0.0,
+ gpu=False,
+ ))
+ util.set_attr(self,self.net_spec,[
+ 'hid_layers',
+ 'hid_layers_activation',
+ 'out_layer_activation',
+ 'init_fn',
+ 'clip_grad_val',
+ 'loss_spec',
+ 'optim_spec',
+ 'lr_scheduler_spec',
+ 'update_type',
+ 'update_frequency',
+ 'polyak_coef',
+ 'gpu',
+ ])
+ assertlen(self.hid_layers)==3,'Your hidden layers must specify [*heads], [body], [*tails]. If not, use MLPNet'
+ assertisinstance(self.in_dim,list),'Hydra network needs in_dim as list'
+ assertisinstance(self.out_dim,list),'Hydra network needs out_dim as list'
+ self.head_hid_layers=self.hid_layers[0]
+ self.body_hid_layers=self.hid_layers[1]
+ self.tail_hid_layers=self.hid_layers[2]
+ iflen(self.head_hid_layers)==1:
+ self.head_hid_layers=self.head_hid_layers*len(self.in_dim)
+ iflen(self.tail_hid_layers)==1:
+ self.tail_hid_layers=self.tail_hid_layers*len(self.out_dim)
+
+ self.model_heads=self.build_model_heads(in_dim)
+ heads_out_dim=np.sum([head_hid_layers[-1]forhead_hid_layersinself.head_hid_layers])
+ dims=[heads_out_dim]+self.body_hid_layers
+ self.model_body=net_util.build_fc_model(dims,self.hid_layers_activation)
+ self.model_tails=self.build_model_tails(self.out_dim,self.out_layer_activation)
+
+ net_util.init_layers(self,self.init_fn)
+ self.loss_fn=net_util.get_loss_fn(self,self.loss_spec)
+ self.to(self.device)
+ self.train()
+
+
[docs]defbuild_model_heads(self,in_dim):
+ '''Build each model_head. These are stored as Sequential models in model_heads'''
+ assertlen(self.head_hid_layers)==len(in_dim),'Hydra head hid_params inconsistent with number in dims'
+ model_heads=nn.ModuleList()
+ forin_d,hid_layersinzip(in_dim,self.head_hid_layers):
+ dims=[in_d]+hid_layers
+ model_head=net_util.build_fc_model(dims,self.hid_layers_activation)
+ model_heads.append(model_head)
+ returnmodel_heads
+
+
[docs]defbuild_model_tails(self,out_dim,out_layer_activation):
+ '''Build each model_tail. These are stored as Sequential models in model_tails'''
+ ifnotps.is_list(out_layer_activation):
+ out_layer_activation=[out_layer_activation]*len(out_dim)
+ model_tails=nn.ModuleList()
+ ifps.is_empty(self.tail_hid_layers):
+ forout_d,out_activinzip(out_dim,out_layer_activation):
+ tail=net_util.build_fc_model([self.body_hid_layers[-1],out_d],out_activ)
+ model_tails.append(tail)
+ else:
+ assertlen(self.tail_hid_layers)==len(out_dim),'Hydra tail hid_params inconsistent with number out dims'
+ forout_d,out_activ,hid_layersinzip(out_dim,out_layer_activation,self.tail_hid_layers):
+ dims=hid_layers
+ model_tail=net_util.build_fc_model(dims,self.hid_layers_activation)
+ tail_out=net_util.build_fc_model([dims[-1],out_d],out_activ)
+ model_tail.add_module(str(len(model_tail)),tail_out)
+ model_tails.append(model_tail)
+ returnmodel_tails
[docs]defget_lr(self):
+ ifhasattr(self.optim,'defaults'):
+ returnself.optim.defaults['lr']
+ else:# TODO retrieve lr more generally
+ returnself.optim.param_groups[0]['lr']
+
+
+
[docs]defbuild_fc_model(dims,activation=None):
+ '''Build a full-connected model by interleaving nn.Linear and activation_fn'''
+ assertlen(dims)>=2,'dims need to at least contain input, output'
+ # shift dims and make pairs of (in, out) dims per layer
+ dim_pairs=list(zip(dims[:-1],dims[1:]))
+ layers=[]
+ forin_d,out_dindim_pairs:
+ layers.append(nn.Linear(in_d,out_d))
+ ifactivationisnotNone:
+ layers.append(get_activation_fn(activation))
+ model=nn.Sequential(*layers)
+ returnmodel
+
+
+
[docs]defget_nn_name(uncased_name):
+ '''Helper to get the proper name in PyTorch nn given a case-insensitive name'''
+ fornn_nameinnn.__dict__:
+ ifuncased_name.lower()==nn_name.lower():
+ returnnn_name
+ raiseValueError(f'Name {uncased_name} not found in {nn.__dict__}')
+
+
+
[docs]defget_activation_fn(activation):
+ '''Helper to generate activation function layers for net'''
+ ActivationClass=getattr(nn,get_nn_name(activation))
+ returnActivationClass()
+
+
+
[docs]defget_loss_fn(cls,loss_spec):
+ '''Helper to parse loss param and construct loss_fn for net'''
+ LossClass=getattr(nn,get_nn_name(loss_spec['name']))
+ loss_spec=ps.omit(loss_spec,'name')
+ loss_fn=LossClass(**loss_spec)
+ returnloss_fn
[docs]defget_optim(net,optim_spec):
+ '''Helper to parse optim param and construct optim for net'''
+ OptimClass=getattr(torch.optim,optim_spec['name'])
+ optim_spec=ps.omit(optim_spec,'name')
+ optim=OptimClass(net.parameters(),**optim_spec)
+ returnoptim
+
+
+
[docs]defget_policy_out_dim(body):
+ '''Helper method to construct the policy network out_dim for a body according to is_discrete, action_type'''
+ action_dim=body.action_dim
+ ifbody.is_discrete:
+ ifbody.action_type=='multi_discrete':
+ assertps.is_list(action_dim),action_dim
+ policy_out_dim=action_dim
+ else:
+ assertps.is_integer(action_dim),action_dim
+ policy_out_dim=action_dim
+ else:
+ assertps.is_integer(action_dim),action_dim
+ ifaction_dim==1:# single action, use [loc, scale]
+ policy_out_dim=2
+ else:# multi-action, use [locs], [scales]
+ policy_out_dim=[action_dim,action_dim]
+ returnpolicy_out_dim
+
+
+
[docs]defget_out_dim(body,add_critic=False):
+ '''Construct the NetClass out_dim for a body according to is_discrete, action_type, and whether to add a critic unit'''
+ policy_out_dim=get_policy_out_dim(body)
+ ifadd_critic:
+ ifps.is_list(policy_out_dim):
+ out_dim=policy_out_dim+[1]
+ else:
+ out_dim=[policy_out_dim,1]
+ else:
+ out_dim=policy_out_dim
+ returnout_dim
+
+
+
[docs]definit_layers(net,init_fn_name):
+ '''Primary method to initialize the weights of the layers of a network'''
+ ifinit_fn_nameisNone:
+ return
+
+ # get nonlinearity
+ nonlinearity=get_nn_name(net.hid_layers_activation).lower()
+ ifnonlinearity=='leakyrelu':
+ nonlinearity='leaky_relu'# guard name
+
+ # get init_fn and add arguments depending on nonlinearity
+ init_fn=getattr(nn.init,init_fn_name)
+ if'kaiming'ininit_fn_name:# has 'nonlinearity' as arg
+ assertnonlinearityin['relu','leaky_relu'],f'Kaiming initialization not supported for {nonlinearity}'
+ init_fn=partial(init_fn,nonlinearity=nonlinearity)
+ elif'orthogonal'ininit_fn_nameor'xavier'ininit_fn_name:# has 'gain' as arg
+ gain=nn.init.calculate_gain(nonlinearity)
+ init_fn=partial(init_fn,gain=gain)
+ else:
+ pass
+
+ # finally, apply init_params to each layer in its modules
+ net.apply(partial(init_params,init_fn=init_fn))
+
+
+
[docs]definit_params(module,init_fn):
+ '''Initialize module's weights using init_fn, and biases to 0.0'''
+ bias_init=0.0
+ classname=util.get_class_name(module)
+ if'Net'inclassname:# skip if it's a net, not pytorch layer
+ pass
+ elifany(kinclassnameforkin('BatchNorm','Conv','Linear')):
+ init_fn(module.weight)
+ nn.init.constant_(module.bias,bias_init)
+ elif'GRU'inclassname:
+ forname,paraminmodule.named_parameters():
+ if'weight'inname:
+ init_fn(param)
+ elif'bias'inname:
+ nn.init.constant_(param,bias_init)
+ else:
+ pass
+
+
+# params methods
+
+
+
[docs]defsave(net,model_path):
+ '''Save model weights to path'''
+ torch.save(net.state_dict(),util.smart_path(model_path))
+
+
+
[docs]defsave_algorithm(algorithm,ckpt=None):
+ '''Save all the nets for an algorithm'''
+ agent=algorithm.agent
+ net_names=algorithm.net_names
+ model_prepath=agent.spec['meta']['model_prepath']
+ ifckptisnotNone:
+ model_prepath=f'{model_prepath}_ckpt-{ckpt}'
+ fornet_nameinnet_names:
+ net=getattr(algorithm,net_name)
+ model_path=f'{model_prepath}_{net_name}_model.pt'
+ save(net,model_path)
+ optim_name=net_name.replace('net','optim')
+ optim=getattr(algorithm,optim_name,None)
+ ifoptimisnotNone:# only trainable net has optim
+ optim_path=f'{model_prepath}_{net_name}_optim.pt'
+ save(optim,optim_path)
+ logger.debug(f'Saved algorithm {util.get_class_name(algorithm)} nets {net_names} to {model_prepath}_*.pt')
+
+
+
[docs]defload(net,model_path):
+ '''Save model weights from a path into a net module'''
+ device=Noneiftorch.cuda.is_available()else'cpu'
+ net.load_state_dict(torch.load(util.smart_path(model_path),map_location=device))
+
+
+
[docs]defload_algorithm(algorithm):
+ '''Save all the nets for an algorithm'''
+ agent=algorithm.agent
+ net_names=algorithm.net_names
+ ifutil.in_eval_lab_modes():
+ # load specific model in eval mode
+ model_prepath=agent.spec['meta']['eval_model_prepath']
+ else:
+ model_prepath=agent.spec['meta']['model_prepath']
+ logger.info(f'Loading algorithm {util.get_class_name(algorithm)} nets {net_names} from {model_prepath}_*.pt')
+ fornet_nameinnet_names:
+ net=getattr(algorithm,net_name)
+ model_path=f'{model_prepath}_{net_name}_model.pt'
+ load(net,model_path)
+ optim_name=net_name.replace('net','optim')
+ optim=getattr(algorithm,optim_name,None)
+ ifoptimisnotNone:# only trainable net has optim
+ optim_path=f'{model_prepath}_{net_name}_optim.pt'
+ load(optim,optim_path)
+
+
+
[docs]defcopy(src_net,tar_net):
+ '''Copy model weights from src to target'''
+ tar_net.load_state_dict(src_net.state_dict())
+
+
+
[docs]defpolyak_update(src_net,tar_net,old_ratio=0.5):
+ '''
+ Polyak weight update to update a target tar_net, retain old weights by its ratio, i.e.
+ target <- old_ratio * source + (1 - old_ratio) * target
+ '''
+ forsrc_param,tar_paraminzip(src_net.parameters(),tar_net.parameters()):
+ tar_param.data.copy_(old_ratio*src_param.data+(1.0-old_ratio)*tar_param.data)
+
+
+
[docs]defto_check_train_step():
+ '''Condition for running assert_trained'''
+ returnos.environ.get('PY_ENV')=='test'orutil.get_lab_mode()=='dev'
+
+
+
[docs]defdev_check_train_step(fn):
+ '''
+ Decorator to check if net.train_step actually updates the network weights properly
+ Triggers only if to_check_train_step is True (dev/test mode)
+ @example
+
+ @net_util.dev_check_train_step
+ def train_step(self, ...):
+ ...
+ '''
+ @wraps(fn)
+ defcheck_fn(*args,**kwargs):
+ ifnotto_check_train_step():
+ returnfn(*args,**kwargs)
+
+ net=args[0]# first arg self
+ # get pre-update parameters to compare
+ pre_params=[param.clone()forparaminnet.parameters()]
+
+ # run train_step, get loss
+ loss=fn(*args,**kwargs)
+ assertnottorch.isnan(loss).any(),loss
+
+ # get post-update parameters to compare
+ post_params=[param.clone()forparaminnet.parameters()]
+ ifloss==0.0:
+ # if loss is 0, there should be no updates
+ # TODO if without momentum, parameters should not change too
+ forp_name,paraminnet.named_parameters():
+ assertparam.grad.norm()==0
+ else:
+ # check parameter updates
+ try:
+ assertnotall(torch.equal(w1,w2)forw1,w2inzip(pre_params,post_params)),f'Model parameter is not updated in train_step(), check if your tensor is detached from graph. Loss: {loss:g}'
+ logger.info(f'Model parameter is updated in train_step(). Loss: {loss: g}')
+ exceptExceptionase:
+ logger.error(e)
+ ifos.environ.get('PY_ENV')=='test':
+ # raise error if in unit test
+ raise(e)
+
+ # check grad norms
+ min_norm,max_norm=0.0,1e5
+ forp_name,paraminnet.named_parameters():
+ try:
+ grad_norm=param.grad.norm()
+ assertmin_norm<grad_norm<max_norm,f'Gradient norm for {p_name} is {grad_norm:g}, fails the extreme value check {min_norm} < grad_norm < {max_norm}. Loss: {loss:g}. Check your network and loss computation.'
+ exceptExceptionase:
+ logger.warning(e)
+ logger.info(f'Gradient norms passed value check.')
+ logger.debug('Passed network parameter update check.')
+ # store grad norms for debugging
+ net.store_grad_norms()
+ returnloss
+ returncheck_fn
+
+
+
[docs]defget_grad_norms(algorithm):
+ '''Gather all the net's grad norms of an algorithm for debugging'''
+ grad_norms=[]
+ fornet_nameinalgorithm.net_names:
+ net=getattr(algorithm,net_name)
+ ifnet.grad_normsisnotNone:
+ grad_norms.extend(net.grad_norms)
+ returngrad_norms
+
+
+
[docs]definit_global_nets(algorithm):
+ '''
+ Initialize global_nets for Hogwild using an identical instance of an algorithm from an isolated Session
+ in spec.meta.distributed, specify either:
+ - 'shared': global network parameter is shared all the time. In this mode, algorithm local network will be replaced directly by global_net via overriding by identify attribute name
+ - 'synced': global network parameter is periodically synced to local network after each gradient push. In this mode, algorithm will keep a separate reference to `global_{net}` for each of its network
+ '''
+ dist_mode=algorithm.agent.spec['meta']['distributed']
+ assertdist_modein('shared','synced'),f'Unrecognized distributed mode'
+ global_nets={}
+ fornet_nameinalgorithm.net_names:
+ optim_name=net_name.replace('net','optim')
+ ifnothasattr(algorithm,optim_name):# only for trainable network, i.e. has an optim
+ continue
+ g_net=getattr(algorithm,net_name)
+ g_net.share_memory()# make net global
+ ifdist_mode=='shared':# use the same name to override the local net
+ global_nets[net_name]=g_net
+ else:# keep a separate reference for syncing
+ global_nets[f'global_{net_name}']=g_net
+ # if optim is Global, set to override the local optim and its scheduler
+ optim=getattr(algorithm,optim_name)
+ if'Global'inutil.get_class_name(optim):
+ optim.share_memory()# make optim global
+ global_nets[optim_name]=optim
+ lr_scheduler_name=net_name.replace('net','lr_scheduler')
+ lr_scheduler=getattr(algorithm,lr_scheduler_name)
+ global_nets[lr_scheduler_name]=lr_scheduler
+ logger.info(f'Initialized global_nets attr {list(global_nets.keys())} for Hogwild')
+ returnglobal_nets
+
+
+
[docs]defset_global_nets(algorithm,global_nets):
+ '''For Hogwild, set attr built in init_global_nets above. Use in algorithm init.'''
+ # set attr first so algorithm always has self.global_{net} to pass into train_step
+ fornet_nameinalgorithm.net_names:
+ setattr(algorithm,f'global_{net_name}',None)
+ # set attr created in init_global_nets
+ ifglobal_netsisnotNone:
+ util.set_attr(algorithm,global_nets)
+ logger.info(f'Set global_nets attr {list(global_nets.keys())} for Hogwild')
+
+
+
[docs]defpush_global_grads(net,global_net):
+ '''Push gradients to global_net, call inside train_step between loss.backward() and optim.step()'''
+ forparam,global_paraminzip(net.parameters(),global_net.parameters()):
+ ifglobal_param.gradisnotNone:
+ return# quick skip
+ global_param._grad=param.grad
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+importpydashasps
+importtorch.nnasnn
+
+fromconvlab.agent.netimportnet_util
+fromconvlab.agent.net.baseimportNet
+fromconvlab.libimportutil
+
+
+
[docs]classRecurrentNet(Net,nn.Module):
+ '''
+ Class for generating arbitrary sized recurrent neural networks which take a sequence of states as input.
+
+ Assumes that a single input example is organized into a 3D tensor
+ batch_size x seq_len x state_dim
+ The entire model consists of three parts:
+ 1. self.fc_model (state processing)
+ 2. self.rnn_model
+ 3. self.model_tails
+
+ e.g. net_spec
+ "net": {
+ "type": "RecurrentNet",
+ "shared": true,
+ "cell_type": "GRU",
+ "fc_hid_layers": [],
+ "hid_layers_activation": "relu",
+ "out_layer_activation": null,
+ "rnn_hidden_size": 32,
+ "rnn_num_layers": 1,
+ "bidirectional": False,
+ "seq_len": 4,
+ "init_fn": "xavier_uniform_",
+ "clip_grad_val": 1.0,
+ "loss_spec": {
+ "name": "MSELoss"
+ },
+ "optim_spec": {
+ "name": "Adam",
+ "lr": 0.01
+ },
+ "lr_scheduler_spec": {
+ "name": "StepLR",
+ "step_size": 30,
+ "gamma": 0.1
+ },
+ "update_type": "replace",
+ "update_frequency": 1,
+ "polyak_coef": 0.9,
+ "gpu": true
+ }
+ '''
+
+ def__init__(self,net_spec,in_dim,out_dim):
+ '''
+ net_spec:
+ cell_type: any of RNN, LSTM, GRU
+ fc_hid_layers: list of fc layers preceeding the RNN layers
+ hid_layers_activation: activation function for the fc hidden layers
+ out_layer_activation: activation function for the output layer, same shape as out_dim
+ rnn_hidden_size: rnn hidden_size
+ rnn_num_layers: number of recurrent layers
+ bidirectional: if RNN should be bidirectional
+ seq_len: length of the history of being passed to the net
+ init_fn: weight initialization function
+ clip_grad_val: clip gradient norm if value is not None
+ loss_spec: measure of error between model predictions and correct outputs
+ optim_spec: parameters for initializing the optimizer
+ lr_scheduler_spec: Pytorch optim.lr_scheduler
+ update_type: method to update network weights: 'replace' or 'polyak'
+ update_frequency: how many total timesteps per update
+ polyak_coef: ratio of polyak weight update
+ gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing
+ '''
+ nn.Module.__init__(self)
+ super().__init__(net_spec,in_dim,out_dim)
+ # set default
+ util.set_attr(self,dict(
+ out_layer_activation=None,
+ cell_type='GRU',
+ rnn_num_layers=1,
+ bidirectional=False,
+ init_fn=None,
+ clip_grad_val=None,
+ loss_spec={'name':'MSELoss'},
+ optim_spec={'name':'Adam'},
+ lr_scheduler_spec=None,
+ update_type='replace',
+ update_frequency=1,
+ polyak_coef=0.0,
+ gpu=False,
+ ))
+ util.set_attr(self,self.net_spec,[
+ 'cell_type',
+ 'fc_hid_layers',
+ 'hid_layers_activation',
+ 'out_layer_activation',
+ 'rnn_hidden_size',
+ 'rnn_num_layers',
+ 'bidirectional',
+ 'seq_len',
+ 'init_fn',
+ 'clip_grad_val',
+ 'loss_spec',
+ 'optim_spec',
+ 'lr_scheduler_spec',
+ 'update_type',
+ 'update_frequency',
+ 'polyak_coef',
+ 'gpu',
+ ])
+ # restore proper in_dim from env stacked state_dim (stack_len, *raw_state_dim)
+ self.in_dim=in_dim[1:]iflen(in_dim)>2elsein_dim[1]
+ # fc body: state processing model
+ ifps.is_empty(self.fc_hid_layers):
+ self.rnn_input_dim=self.in_dim
+ else:
+ fc_dims=[self.in_dim]+self.fc_hid_layers
+ self.fc_model=net_util.build_fc_model(fc_dims,self.hid_layers_activation)
+ self.rnn_input_dim=fc_dims[-1]
+
+ # RNN model
+ self.rnn_model=getattr(nn,net_util.get_nn_name(self.cell_type))(
+ input_size=self.rnn_input_dim,
+ hidden_size=self.rnn_hidden_size,
+ num_layers=self.rnn_num_layers,
+ batch_first=True,bidirectional=self.bidirectional)
+
+ # tails. avoid list for single-tail for compute speed
+ ifps.is_integer(self.out_dim):
+ self.model_tail=net_util.build_fc_model([self.rnn_hidden_size,self.out_dim],self.out_layer_activation)
+ else:
+ ifnotps.is_list(self.out_layer_activation):
+ self.out_layer_activation=[self.out_layer_activation]*len(out_dim)
+ assertlen(self.out_layer_activation)==len(self.out_dim)
+ tails=[]
+ forout_d,out_activinzip(self.out_dim,self.out_layer_activation):
+ tail=net_util.build_fc_model([self.rnn_hidden_size,out_d],out_activ)
+ tails.append(tail)
+ self.model_tails=nn.ModuleList(tails)
+
+ net_util.init_layers(self,self.init_fn)
+ self.loss_fn=net_util.get_loss_fn(self,self.loss_spec)
+ self.to(self.device)
+ self.train()
+
+
[docs]defforward(self,x):
+ '''The feedforward step. Input is batch_size x seq_len x state_dim'''
+ # Unstack input to (batch_size x seq_len) x state_dim in order to transform all state inputs
+ batch_size=x.size(0)
+ x=x.view(-1,self.in_dim)
+ ifhasattr(self,'fc_model'):
+ x=self.fc_model(x)
+ # Restack to batch_size x seq_len x rnn_input_dim
+ x=x.view(-1,self.seq_len,self.rnn_input_dim)
+ ifself.cell_type=='LSTM':
+ _output,(h_n,c_n)=self.rnn_model(x)
+ else:
+ _output,h_n=self.rnn_model(x)
+ hid_x=h_n[-1]# get final time-layer
+ # return tensor if single tail, else list of tail tensors
+ ifhasattr(self,'model_tails'):
+ outs=[]
+ formodel_tailinself.model_tails:
+ outs.append(model_tail(hid_x))
+ returnouts
+ else:
+ returnself.model_tail(hid_x)
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+'''
+The environment module
+Contains graduated components from experiments for building/using environment.
+Provides the rich experience for agent embodiment, reflects the curriculum and allows teaching (possibly allows teacher to enter).
+To be designed by human and evolution module, based on the curriculum and fitness metrics.
+'''
+importpydashasps
+
+fromconvlab.env.baseimportClock,ENV_DATA_NAMES
+fromconvlab.libimportlogger,util
+fromconvlab.lib.decoratorimportlab_api
+
+logger=logger.get_logger(__name__)
+
+
+
[docs]defget_base_clock(self):
+ '''Get the clock with the finest time unit, i.e. ticks the most cycles in a given time, or the highest clock_speed'''
+ fastest_env=ps.max_by(self.envs,lambdaenv:env.clock_speed)
+ clock=fastest_env.clock
+ returnclock
[docs]classClock:
+ '''Clock class for each env and space to keep track of relative time. Ticking and control loop is such that reset is at t=0 and epi=0'''
+
+ def__init__(self,max_frame=int(1e7),clock_speed=1):
+ self.max_frame=max_frame
+ self.clock_speed=int(clock_speed)
+ self.reset()
+
+
[docs]defreset(self):
+ self.t=0
+ self.frame=0# i.e. total_t
+ self.epi=0
+ self.start_wall_t=time.time()
+ self.batch_size=1# multiplier to accurately count opt steps
+ self.opt_step=0# count the number of optimizer updates
[docs]defget_elapsed_wall_t(self):
+ '''Calculate the elapsed wall time (int seconds) since self.start_wall_t'''
+ returnint(time.time()-self.start_wall_t)
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+importos
+importpickle
+importrandom
+fromcollectionsimportdefaultdict
+fromcopyimportdeepcopy
+
+importnumpyasnp
+importpydashasps
+fromgymimportspaces
+
+fromconvlab.env.baseimportBaseEnv,ENV_DATA_NAMES,set_gym_space_attr
+# from convlab.env.registration import get_env_path
+fromconvlab.libimportlogger,util
+fromconvlab.lib.decoratorimportlab_api
+
+logger=logger.get_logger(__name__)
+
+
+################################################################################
+# Parameters for Agents
+################################################################################
+agent_params={}
+agent_params['max_turn']=40
+agent_params['agent_run_mode']=1
+agent_params['agent_act_level']=0
+
+
+################################################################################
+# Parameters for User Simulators
+################################################################################
+usersim_params={}
+usersim_params['max_turn']=40
+usersim_params['slot_err_probability']=0
+usersim_params['slot_err_mode']=0
+usersim_params['intent_err_probability']=0
+usersim_params['simulator_run_mode']=1
+usersim_params['simulator_act_level']=0
+usersim_params['learning_phase']='all'
+
+DATAPATH=os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),"data/movie")
+
+dict_path=os.path.join(DATAPATH,'dicts.v3.p')
+goal_file_path=os.path.join(DATAPATH,'user_goals_first_turn_template.part.movie.v1.p')
+
+# load the user goals from .p file
+all_goal_set=pickle.load(open(goal_file_path,'rb'))
+
+# split goal set
+split_fold=5
+goal_set={'train':[],'valid':[],'test':[],'all':[]}
+foru_goal_id,u_goalinenumerate(all_goal_set):
+ ifu_goal_id%split_fold==1:goal_set['test'].append(u_goal)
+ else:goal_set['train'].append(u_goal)
+ goal_set['all'].append(u_goal)
+# end split goal set
+
+movie_kb_path=os.path.join(DATAPATH,'movie_kb.1k.p')
+# movie_kb = pickle.load(open(movie_kb_path, 'rb'), encoding='latin1')
+movie_dictionary=pickle.load(open(movie_kb_path,'rb'),encoding='latin1')
+
+
[docs]deftext_to_dict(path):
+ """ Read in a text file as a dictionary where keys are text and values are indices (line numbers) """
+
+ slot_set={}
+ withopen(path,'r')asf:
+ index=0
+ forlineinf.readlines():
+ slot_set[line.strip('\n').strip('\r')]=index
+ index+=1
+ returnslot_set
+
+act_set=text_to_dict(os.path.join(DATAPATH,'dia_acts.txt'))
+slot_set=text_to_dict(os.path.join(DATAPATH,'slot_set.txt'))
+
+################################################################################
+# a movie dictionary for user simulator - slot:possible values
+################################################################################
+# movie_dictionary = pickle.load(open(dict_path, 'rb'))
+
+sys_request_slots=['moviename','theater','starttime','date','numberofpeople','genre','state','city','zip','critic_rating','mpaa_rating','distanceconstraints','video_format','theater_chain','price','actor','description','other','numberofkids']
+sys_inform_slots=['moviename','theater','starttime','date','genre','state','city','zip','critic_rating','mpaa_rating','distanceconstraints','video_format','theater_chain','price','actor','description','other','numberofkids','taskcomplete','ticket']
+
+start_dia_acts={
+ #'greeting':[],
+ 'request':['moviename','starttime','theater','city','state','date','genre','ticket','numberofpeople']
+}
+
+################################################################################
+# Dialog status
+################################################################################
+FAILED_DIALOG=-1
+SUCCESS_DIALOG=1
+NO_OUTCOME_YET=0
+
+# Rewards
+SUCCESS_REWARD=50
+FAILURE_REWARD=0
+PER_TURN_REWARD=0
+
+################################################################################
+# Special Slot Values
+################################################################################
+I_DO_NOT_CARE="I do not care"
+NO_VALUE_MATCH="NO VALUE MATCHES!!!"
+TICKET_AVAILABLE='Ticket Available'
+
+################################################################################
+# Constraint Check
+################################################################################
+CONSTRAINT_CHECK_FAILURE=0
+CONSTRAINT_CHECK_SUCCESS=1
+
+################################################################################
+# NLG Beam Search
+################################################################################
+nlg_beam_size=10
+
+################################################################################
+# run_mode: 0 for dia-act; 1 for NL; 2 for no output
+################################################################################
+run_mode=3
+auto_suggest=0
+
+################################################################################
+# A Basic Set of Feasible actions to be Consdered By an RL agent
+################################################################################
+feasible_actions=[
+ ############################################################################
+ # greeting actions
+ ############################################################################
+ #{'diaact':"greeting", 'inform_slots':{}, 'request_slots':{}},
+ ############################################################################
+ # confirm_question actions
+ ############################################################################
+ {'diaact':"confirm_question",'inform_slots':{},'request_slots':{}},
+ ############################################################################
+ # confirm_answer actions
+ ############################################################################
+ {'diaact':"confirm_answer",'inform_slots':{},'request_slots':{}},
+ ############################################################################
+ # thanks actions
+ ############################################################################
+ {'diaact':"thanks",'inform_slots':{},'request_slots':{}},
+ ############################################################################
+ # deny actions
+ ############################################################################
+ {'diaact':"deny",'inform_slots':{},'request_slots':{}},
+]
+############################################################################
+# Adding the inform actions
+############################################################################
+forslotinsys_inform_slots:
+ feasible_actions.append({'diaact':'inform','inform_slots':{slot:"PLACEHOLDER"},'request_slots':{}})
+
+############################################################################
+# Adding the request actions
+############################################################################
+forslotinsys_request_slots:
+ feasible_actions.append({'diaact':'request','inform_slots':{},'request_slots':{slot:"UNK"}})
+
+
+
[docs]classUserSimulator:
+ """ Parent class for all user sims to inherit from """
+
+ def__init__(self,movie_dict=None,act_set=None,slot_set=None,start_set=None,params=None):
+ """ Constructor shared by all user simulators """
+
+ self.movie_dict=movie_dict
+ self.act_set=act_set
+ self.slot_set=slot_set
+ self.start_set=start_set
+
+ self.max_turn=usersim_params['max_turn']
+ self.slot_err_probability=usersim_params['slot_err_probability']
+ self.slot_err_mode=usersim_params['slot_err_mode']
+ self.intent_err_probability=usersim_params['intent_err_probability']
+
+
+
[docs]definitialize_episode(self):
+ """ Initialize a new episode (dialog)"""
+
+ print("initialize episode called, generating goal")
+ self.goal=random.choice(self.start_set)
+ self.goal['request_slots']['ticket']='UNK'
+ episode_over,user_action=self._sample_action()
+ assert(episode_over!=1),' but we just started'
+ returnuser_action
[docs]defresponse_inform(self,system_action):
+ """ Response for Inform (System Action) """
+
+ if'taskcomplete'insystem_action['inform_slots'].keys():# check all the constraints from agents with user goal
+ self.state['diaact']="thanks"
+ #if 'ticket' in self.state['rest_slots']: self.state['request_slots']['ticket'] = 'UNK'
+ self.constraint_check=CONSTRAINT_CHECK_SUCCESS
+
+ ifsystem_action['inform_slots']['taskcomplete']==NO_VALUE_MATCH:
+ self.state['history_slots']['ticket']=NO_VALUE_MATCH
+ if'ticket'inself.state['rest_slots']:self.state['rest_slots'].remove('ticket')
+ if'ticket'inself.state['request_slots'].keys():delself.state['request_slots']['ticket']
+
+ forslotinself.goal['inform_slots'].keys():
+ # Deny, if the answers from agent can not meet the constraints of user
+ ifslotnotinsystem_action['inform_slots'].keys()or(self.goal['inform_slots'][slot].lower()!=system_action['inform_slots'][slot].lower()):
+ self.state['diaact']="deny"
+ self.state['request_slots'].clear()
+ self.state['inform_slots'].clear()
+ self.constraint_check=CONSTRAINT_CHECK_FAILURE
+ break
+ else:
+ forslotinsystem_action['inform_slots'].keys():
+ self.state['history_slots'][slot]=system_action['inform_slots'][slot]
+
+ ifslotinself.goal['inform_slots'].keys():
+ ifsystem_action['inform_slots'][slot]==self.goal['inform_slots'][slot]:
+ ifslotinself.state['rest_slots']:self.state['rest_slots'].remove(slot)
+
+ iflen(self.state['request_slots'])>0:
+ self.state['diaact']="request"
+ eliflen(self.state['rest_slots'])>0:
+ rest_slot_set=deepcopy(self.state['rest_slots'])
+ if'ticket'inrest_slot_set:
+ rest_slot_set.remove('ticket')
+
+ iflen(rest_slot_set)>0:
+ inform_slot=random.choice(rest_slot_set)# self.state['rest_slots']
+ ifinform_slotinself.goal['inform_slots'].keys():
+ self.state['inform_slots'][inform_slot]=self.goal['inform_slots'][inform_slot]
+ self.state['diaact']="inform"
+ self.state['rest_slots'].remove(inform_slot)
+ elifinform_slotinself.goal['request_slots'].keys():
+ self.state['request_slots'][inform_slot]='UNK'
+ self.state['diaact']="request"
+ else:
+ self.state['request_slots']['ticket']='UNK'
+ self.state['diaact']="request"
+ else:# how to reply here?
+ self.state['diaact']="thanks"# replies "closing"? or replies "confirm_answer"
+ else:# != value Should we deny here or ?
+ ########################################################################
+ # TODO When agent informs(slot=value), where the value is different with the constraint in user goal, Should we deny or just inform the correct value?
+ ########################################################################
+ self.state['diaact']="inform"
+ self.state['inform_slots'][slot]=self.goal['inform_slots'][slot]
+ ifslotinself.state['rest_slots']:self.state['rest_slots'].remove(slot)
+ else:
+ ifslotinself.state['rest_slots']:
+ self.state['rest_slots'].remove(slot)
+ ifslotinself.state['request_slots'].keys():
+ delself.state['request_slots'][slot]
+
+ iflen(self.state['request_slots'])>0:
+ request_set=list(self.state['request_slots'].keys())
+ if'ticket'inrequest_set:
+ request_set.remove('ticket')
+
+ iflen(request_set)>0:
+ request_slot=random.choice(request_set)
+ else:
+ request_slot='ticket'
+
+ self.state['request_slots'][request_slot]="UNK"
+ self.state['diaact']="request"
+ eliflen(self.state['rest_slots'])>0:
+ rest_slot_set=deepcopy(self.state['rest_slots'])
+ if'ticket'inrest_slot_set:
+ rest_slot_set.remove('ticket')
+
+ iflen(rest_slot_set)>0:
+ inform_slot=random.choice(rest_slot_set)#self.state['rest_slots']
+ ifinform_slotinself.goal['inform_slots'].keys():
+ self.state['inform_slots'][inform_slot]=self.goal['inform_slots'][inform_slot]
+ self.state['diaact']="inform"
+ self.state['rest_slots'].remove(inform_slot)
+
+ if'ticket'inself.state['rest_slots']:
+ self.state['request_slots']['ticket']='UNK'
+ self.state['diaact']="request"
+ elifinform_slotinself.goal['request_slots'].keys():
+ self.state['request_slots'][inform_slot]=self.goal['request_slots'][inform_slot]
+ self.state['diaact']="request"
+ else:
+ self.state['request_slots']['ticket']='UNK'
+ self.state['diaact']="request"
+ else:
+ self.state['diaact']="thanks"# or replies "confirm_answer"
+
+
+
[docs]classStateTracker:
+ """ The state tracker maintains a record of which request slots are filled and which inform slots are filled """
+
+ def__init__(self,act_set,slot_set,movie_dictionary):
+ """ constructor for statetracker takes movie knowledge base and initializes a new episode
+
+ Arguments:
+ act_set -- The set of all acts availavle
+ slot_set -- The total set of available slots
+ movie_dictionary -- A representation of all the available movies. Generally this object is accessed via the KBHelper class
+
+ Class Variables:
+ history_vectors -- A record of the current dialog so far in vector format (act-slot, but no values)
+ history_dictionaries -- A record of the current dialog in dictionary format
+ current_slots -- A dictionary that keeps a running record of which slots are filled current_slots['inform_slots'] and which are requested current_slots['request_slots'] (but not filed)
+ action_dimension -- # TODO indicates the dimensionality of the vector representaiton of the action
+ kb_result_dimension -- A single integer denoting the dimension of the kb_results features.
+ turn_count -- A running count of which turn we are at in the present dialog
+ """
+ self.movie_dictionary=movie_dictionary
+ self.initialize_episode()
+ self.history_vectors=None
+ self.history_dictionaries=None
+ self.current_slots=None
+ self.action_dimension=10# TODO REPLACE WITH REAL VALUE
+ self.kb_result_dimension=10# TODO REPLACE WITH REAL VALUE
+ self.turn_count=0
+ self.kb_helper=KBHelper(movie_dictionary)
+
+
+
[docs]definitialize_episode(self):
+ """ Initialize a new episode (dialog), flush the current state and tracked slots """
+
+ self.action_dimension=10
+ self.history_vectors=np.zeros((1,self.action_dimension))
+ self.history_dictionaries=[]
+ self.turn_count=0
+ self.current_slots={}
+
+ self.current_slots['inform_slots']={}
+ self.current_slots['request_slots']={}
+ self.current_slots['proposed_slots']={}
+ self.current_slots['agent_request_slots']={}
+
+
+
[docs]defdialog_history_vectors(self):
+ """ Return the dialog history (both user and agent actions) in vector representation """
+ returnself.history_vectors
+
+
+
[docs]defdialog_history_dictionaries(self):
+ """ Return the dictionary representation of the dialog history (includes values) """
+ returnself.history_dictionaries
+
+
+
[docs]defkb_results_for_state(self):
+ """ Return the information about the database results based on the currently informed slots """
+ ########################################################################
+ # TODO Calculate results based on current informed slots
+ ########################################################################
+ kb_results=self.kb_helper.database_results_for_agent(self.current_slots)# replace this with something less ridiculous
+ # TODO turn results into vector (from dictionary)
+ results=np.zeros((0,self.kb_result_dimension))
+ returnresults
+
+
+
[docs]defget_state_for_agent(self):
+ """ Get the state representatons to send to agent """
+ #state = {'user_action': self.history_dictionaries[-1], 'current_slots': self.current_slots, 'kb_results': self.kb_results_for_state()}
+ state={'user_action':self.history_dictionaries[-1],'current_slots':self.current_slots,#'kb_results': self.kb_results_for_state(),
+ 'kb_results_dict':self.kb_helper.database_results_for_agent(self.current_slots),'turn':self.turn_count,'history':self.history_dictionaries,
+ 'agent_action':self.history_dictionaries[-2]iflen(self.history_dictionaries)>1elseNone}
+ returndeepcopy(state)
+
+
[docs]defget_suggest_slots_values(self,request_slots):
+ """ Get the suggested values for request slots """
+
+ suggest_slot_vals={}
+ iflen(request_slots)>0:
+ suggest_slot_vals=self.kb_helper.suggest_slot_values(request_slots,self.current_slots)
+
+ returnsuggest_slot_vals
+
+
[docs]defget_current_kb_results(self):
+ """ get the kb_results for current state """
+ kb_results=self.kb_helper.available_results_from_kb(self.current_slots)
+ returnkb_results
+
+
+
[docs]defupdate(self,agent_action=None,user_action=None):
+ """ Update the state based on the latest action """
+
+ ########################################################################
+ # Make sure that the function was called properly
+ ########################################################################
+ assert(not(user_actionandagent_action))
+ assert(user_actionoragent_action)
+
+ ########################################################################
+ # Update state to reflect a new action by the agent.
+ ########################################################################
+ ifagent_action:
+
+ ####################################################################
+ # Handles the act_slot response (with values needing to be filled)
+ ####################################################################
+ ifagent_action['act_slot_response']:
+ response=deepcopy(agent_action['act_slot_response'])
+
+ inform_slots=self.kb_helper.fill_inform_slots(response['inform_slots'],self.current_slots)# TODO this doesn't actually work yet, remove this warning when kb_helper is functional
+ agent_action_values={'turn':self.turn_count,'speaker':"agent",'diaact':response['diaact'],'inform_slots':inform_slots,'request_slots':response['request_slots']}
+
+ agent_action['act_slot_response'].update({'diaact':response['diaact'],'inform_slots':inform_slots,'request_slots':response['request_slots'],'turn':self.turn_count})
+
+ elifagent_action['act_slot_value_response']:
+ agent_action_values=deepcopy(agent_action['act_slot_value_response'])
+ # print("Updating state based on act_slot_value action from agent")
+ agent_action_values['turn']=self.turn_count
+ agent_action_values['speaker']="agent"
+
+ ####################################################################
+ # This code should execute regardless of which kind of agent produced action
+ ####################################################################
+ forslotinagent_action_values['inform_slots'].keys():
+ self.current_slots['proposed_slots'][slot]=agent_action_values['inform_slots'][slot]
+ self.current_slots['inform_slots'][slot]=agent_action_values['inform_slots'][slot]# add into inform_slots
+ ifslotinself.current_slots['request_slots'].keys():
+ delself.current_slots['request_slots'][slot]
+
+ forslotinagent_action_values['request_slots'].keys():
+ ifslotnotinself.current_slots['agent_request_slots']:
+ self.current_slots['agent_request_slots'][slot]="UNK"
+
+ self.history_dictionaries.append(agent_action_values)
+ current_agent_vector=np.ones((1,self.action_dimension))
+ self.history_vectors=np.vstack([self.history_vectors,current_agent_vector])
+
+ ########################################################################
+ # Update the state to reflect a new action by the user
+ ########################################################################
+ elifuser_action:
+
+ ####################################################################
+ # Update the current slots
+ ####################################################################
+ forslotinuser_action['inform_slots'].keys():
+ self.current_slots['inform_slots'][slot]=user_action['inform_slots'][slot]
+ ifslotinself.current_slots['request_slots'].keys():
+ delself.current_slots['request_slots'][slot]
+
+ forslotinuser_action['request_slots'].keys():
+ ifslotnotinself.current_slots['request_slots']:
+ self.current_slots['request_slots'][slot]="UNK"
+
+ self.history_vectors=np.vstack([self.history_vectors,np.zeros((1,self.action_dimension))])
+ new_move={'turn':self.turn_count,'speaker':"user",'request_slots':user_action['request_slots'],'inform_slots':user_action['inform_slots'],'diaact':user_action['diaact']}
+ self.history_dictionaries.append(deepcopy(new_move))
+
+ ########################################################################
+ # This should never happen if the asserts passed
+ ########################################################################
+ else:
+ pass
+
+ ########################################################################
+ # This code should execute after update code regardless of what kind of action (agent/user)
+ ########################################################################
+ self.turn_count+=1
+
+
+
[docs]classKBHelper:
+ """ An assistant to fill in values for the agent (which knows about slots of values) """
+
+ def__init__(self,movie_dictionary):
+ """ Constructor for a KBHelper """
+
+ self.movie_dictionary=movie_dictionary
+ self.cached_kb=defaultdict(list)
+ self.cached_kb_slot=defaultdict(list)
+
+
+
[docs]deffill_inform_slots(self,inform_slots_to_be_filled,current_slots):
+ """ Takes unfilled inform slots and current_slots, returns dictionary of filled informed slots (with values)
+
+ Arguments:
+ inform_slots_to_be_filled -- Something that looks like {starttime:None, theater:None} where starttime and theater are slots that the agent needs filled
+ current_slots -- Contains a record of all filled slots in the conversation so far - for now, just use current_slots['inform_slots'] which is a dictionary of the already filled-in slots
+
+ Returns:
+ filled_in_slots -- A dictionary of form {slot1:value1, slot2:value2} for each sloti in inform_slots_to_be_filled
+ """
+
+ kb_results=self.available_results_from_kb(current_slots)
+ ifauto_suggest==1:
+ print('Number of movies in KB satisfying current constraints: ',len(kb_results))
+
+ filled_in_slots={}
+ if'taskcomplete'ininform_slots_to_be_filled.keys():
+ filled_in_slots.update(current_slots['inform_slots'])
+
+ forslotininform_slots_to_be_filled.keys():
+ ifslot=='numberofpeople':
+ ifslotincurrent_slots['inform_slots'].keys():
+ filled_in_slots[slot]=current_slots['inform_slots'][slot]
+ elifslotininform_slots_to_be_filled.keys():
+ filled_in_slots[slot]=inform_slots_to_be_filled[slot]
+ continue
+
+ ifslot=='ticket'orslot=='taskcomplete':
+ filled_in_slots[slot]=TICKET_AVAILABLEiflen(kb_results)>0elseNO_VALUE_MATCH
+ continue
+
+ ifslot=='closing':continue
+
+ ####################################################################
+ # Grab the value for the slot with the highest count and fill it
+ ####################################################################
+ values_dict=self.available_slot_values(slot,kb_results)
+
+ values_counts=[(v,values_dict[v])forvinvalues_dict.keys()]
+ iflen(values_counts)>0:
+ filled_in_slots[slot]=sorted(values_counts,key=lambdax:-x[1])[0][0]
+ else:
+ filled_in_slots[slot]=NO_VALUE_MATCH#"NO VALUE MATCHES SNAFU!!!"
+
+ returnfilled_in_slots
+
+
+
[docs]defavailable_slot_values(self,slot,kb_results):
+ """ Return the set of values available for the slot based on the current constraints """
+
+ slot_values={}
+ formovie_idinkb_results.keys():
+ ifslotinkb_results[movie_id].keys():
+ slot_val=kb_results[movie_id][slot]
+ ifslot_valinslot_values.keys():
+ slot_values[slot_val]+=1
+ else:slot_values[slot_val]=1
+ returnslot_values
+
+
[docs]defavailable_results_from_kb(self,current_slots):
+ """ Return the available movies in the movie_kb based on the current constraints """
+
+ ret_result=[]
+ current_slots=current_slots['inform_slots']
+ constrain_keys=current_slots.keys()
+
+ constrain_keys=filter(lambdak:k!='ticket'and \
+ k!='numberofpeople'and \
+ k!='taskcomplete'and \
+ k!='closing',constrain_keys)
+ constrain_keys=[kforkinconstrain_keysifcurrent_slots[k]!=I_DO_NOT_CARE]
+
+ query_idx_keys=frozenset(current_slots.items())
+ cached_kb_ret=self.cached_kb[query_idx_keys]
+
+ cached_kb_length=len(cached_kb_ret)ifcached_kb_ret!=Noneelse-1
+ ifcached_kb_length>0:
+ returndict(cached_kb_ret)
+ elifcached_kb_length==-1:
+ returndict([])
+
+ # kb_results = copy.deepcopy(self.movie_dictionary)
+ foridinself.movie_dictionary.keys():
+ kb_keys=self.movie_dictionary[id].keys()
+ iflen(set(constrain_keys).union(set(kb_keys))^(set(constrain_keys)^set(kb_keys)))==len(
+ constrain_keys):
+ match=True
+ foridx,kinenumerate(constrain_keys):
+ ifstr(current_slots[k]).lower()==str(self.movie_dictionary[id][k]).lower():
+ continue
+ else:
+ match=False
+ ifmatch:
+ self.cached_kb[query_idx_keys].append((id,self.movie_dictionary[id]))
+ ret_result.append((id,self.movie_dictionary[id]))
+
+ # for slot in current_slots['inform_slots'].keys():
+ # if slot == 'ticket' or slot == 'numberofpeople' or slot == 'taskcomplete' or slot == 'closing': continue
+ # if current_slots['inform_slots'][slot] == dialog_config.I_DO_NOT_CARE: continue
+ #
+ # if slot not in self.movie_dictionary[movie_id].keys():
+ # if movie_id in kb_results.keys():
+ # del kb_results[movie_id]
+ # else:
+ # if current_slots['inform_slots'][slot].lower() != self.movie_dictionary[movie_id][slot].lower():
+ # if movie_id in kb_results.keys():
+ # del kb_results[movie_id]
+
+ iflen(ret_result)==0:
+ self.cached_kb[query_idx_keys]=None
+
+ ret_result=dict(ret_result)
+ returnret_result
[docs]defdatabase_results_for_agent(self,current_slots):
+ """ A dictionary of the number of results matching each current constraint. The agent needs this to decide what to do next. """
+
+ database_results={}# { date:100, distanceconstraints:60, theater:30, matching_all_constraints: 5}
+ database_results=self.available_results_from_kb_for_slots(current_slots['inform_slots'])
+ returndatabase_results
[docs]defstep(self,action):
+ ########################################################################
+ # Register AGENT action with the state_tracker
+ ########################################################################
+ agent_action=self.action_decode(action)
+ self.state_tracker.update(agent_action=agent_action)
+ self.print_function(agent_action=agent_action['act_slot_response'])
+
+ ########################################################################
+ # CALL USER TO TAKE HER TURN
+ ########################################################################
+ sys_action=self.state_tracker.dialog_history_dictionaries()[-1]
+ user_action,session_over,dialog_status=self.user.next(sys_action)
+ reward=self.reward_function(dialog_status)
+
+ ########################################################################
+ # Update state tracker with latest user action
+ ########################################################################
+ ifsession_over!=True:
+ self.state_tracker.update(user_action=user_action)
+ self.print_function(user_action=user_action)
+ else:
+ ifreward>0:
+ self.stat['success']+=1
+ else:self.stat['fail']+=1
+
+ state_vector=self.prepare_state_representation(self.state_tracker.get_state_for_agent())
+ self.env_info=[State(state_vector,reward,session_over)]
+
+ returnself.env_info
+
+
[docs]defreward_function(self,dialog_status):
+ """ Reward Function 1: a reward function based on the dialog_status """
+ ifdialog_status==FAILED_DIALOG:
+ reward=-self.user.max_turn#10
+ elifdialog_status==SUCCESS_DIALOG:
+ reward=2*self.user.max_turn#20
+ else:
+ reward=-1
+ returnreward
+
+
[docs]defreward_function_without_penalty(self,dialog_status):
+ """ Reward Function 2: a reward function without penalty on per turn and failure dialog """
+ ifdialog_status==FAILED_DIALOG:
+ reward=0
+ elifdialog_status==SUCCESS_DIALOG:
+ reward=2*self.user.max_turn
+ else:
+ reward=0
+ returnreward
+
+
[docs]definitialize_episode(self):
+ """ Initialize a new episode. This function is called every time a new episode is run. """
+
+ self.current_slot_id=0
+ self.phase=0
+ self.request_set=['moviename','starttime','city','date','theater','numberofpeople']
[docs]defprepare_state_representation(self,state):
+ """ Create the representation for each state """
+
+ user_action=state['user_action']
+ current_slots=state['current_slots']
+ kb_results_dict=state['kb_results_dict']
+ agent_last=state['agent_action']
+
+ ########################################################################
+ # Create one-hot of acts to represent the current user action
+ ########################################################################
+ user_act_rep=np.zeros((1,self.act_cardinality))
+ user_act_rep[0,self.act_set[user_action['diaact']]]=1.0
+
+ ########################################################################
+ # Create bag of inform slots representation to represent the current user action
+ ########################################################################
+ user_inform_slots_rep=np.zeros((1,self.slot_cardinality))
+ forslotinuser_action['inform_slots'].keys():
+ user_inform_slots_rep[0,self.slot_set[slot]]=1.0
+
+ ########################################################################
+ # Create bag of request slots representation to represent the current user action
+ ########################################################################
+ user_request_slots_rep=np.zeros((1,self.slot_cardinality))
+ forslotinuser_action['request_slots'].keys():
+ user_request_slots_rep[0,self.slot_set[slot]]=1.0
+
+ ########################################################################
+ # Creat bag of filled_in slots based on the current_slots
+ ########################################################################
+ current_slots_rep=np.zeros((1,self.slot_cardinality))
+ forslotincurrent_slots['inform_slots']:
+ current_slots_rep[0,self.slot_set[slot]]=1.0
+
+ ########################################################################
+ # Encode last agent act
+ ########################################################################
+ agent_act_rep=np.zeros((1,self.act_cardinality))
+ ifagent_last:
+ agent_act_rep[0,self.act_set[agent_last['diaact']]]=1.0
+
+ ########################################################################
+ # Encode last agent inform slots
+ ########################################################################
+ agent_inform_slots_rep=np.zeros((1,self.slot_cardinality))
+ ifagent_last:
+ forslotinagent_last['inform_slots'].keys():
+ agent_inform_slots_rep[0,self.slot_set[slot]]=1.0
+
+ ########################################################################
+ # Encode last agent request slots
+ ########################################################################
+ agent_request_slots_rep=np.zeros((1,self.slot_cardinality))
+ ifagent_last:
+ forslotinagent_last['request_slots'].keys():
+ agent_request_slots_rep[0,self.slot_set[slot]]=1.0
+
+ turn_rep=np.zeros((1,1))+state['turn']/10.
+
+ ########################################################################
+ # One-hot representation of the turn count?
+ ########################################################################
+ turn_onehot_rep=np.zeros((1,self.max_turn))
+ turn_onehot_rep[0,state['turn']]=1.0
+
+ ########################################################################
+ # Representation of KB results (scaled counts)
+ ########################################################################
+ kb_count_rep=np.zeros((1,self.slot_cardinality+1))+kb_results_dict['matching_all_constraints']/100.
+ forslotinkb_results_dict:
+ ifslotinself.slot_set:
+ kb_count_rep[0,self.slot_set[slot]]=kb_results_dict[slot]/100.
+
+ ########################################################################
+ # Representation of KB results (binary)
+ ########################################################################
+ kb_binary_rep=np.zeros((1,self.slot_cardinality+1))+np.sum(kb_results_dict['matching_all_constraints']>0.)
+ forslotinkb_results_dict:
+ ifslotinself.slot_set:
+ kb_binary_rep[0,self.slot_set[slot]]=np.sum(kb_results_dict[slot]>0.)
+
+ self.final_representation=np.squeeze(np.hstack([user_act_rep,user_inform_slots_rep,user_request_slots_rep,agent_act_rep,agent_inform_slots_rep,agent_request_slots_rep,current_slots_rep,turn_rep,turn_onehot_rep,kb_binary_rep,kb_count_rep]))
+ returnself.final_representation
+
+
[docs]defaction_index(self,act_slot_response):
+ """ Return the index of action """
+
+ for(i,action)inenumerate(self.feasible_actions):
+ ifact_slot_response==action:
+ returni
+ print(act_slot_response)
+ raiseException("action index not found")
+ returnNone
[docs]classMovieEnv(BaseEnv):
+ '''
+ Wrapper for Unity ML-Agents env to work with the Lab.
+
+ e.g. env_spec
+ "env": [{
+ "name": "gridworld",
+ "max_t": 20,
+ "max_tick": 3,
+ "unity": {
+ "gridSize": 6,
+ "numObstacles": 2,
+ "numGoals": 1
+ }
+ }],
+ '''
+
+ def__init__(self,spec,e=None,env_space=None):
+ super(MovieEnv,self).__init__(spec,e,env_space)
+ util.set_attr(self,self.env_spec,[
+ 'observation_dim',
+ 'action_dim',
+ ])
+ worker_id=int(f'{os.getpid()}{self.e+int(ps.unique_id())}'[-4:])
+ # TODO dynamically compose components according to env_spec
+ self.u_env=MovieActInActOutEnvironment(worker_id)
+ self.patch_gym_spaces(self.u_env)
+ self._set_attr_from_u_env(self.u_env)
+ # assert self.max_t is not None
+ ifenv_spaceisNone:# singleton mode
+ pass
+ else:
+ self.space_init(env_space)
+
+ logger.info(util.self_desc(self))
+
+
[docs]defpatch_gym_spaces(self,u_env):
+ '''
+ For standardization, use gym spaces to represent observation and action spaces.
+ This method iterates through the multiple brains (multiagent) then constructs and returns lists of observation_spaces and action_spaces
+ '''
+ observation_shape=(self.env_spec.get('observation_dim'),)
+ observation_space=spaces.Box(low=0,high=1,shape=observation_shape,dtype=np.int32)
+ set_gym_space_attr(observation_space)
+ action_space=spaces.Discrete(self.env_spec.get('action_dim'))
+ set_gym_space_attr(action_space)
+ # set for singleton
+ u_env.observation_space=observation_space
+ u_env.action_space=action_space
[docs]defpatch_gym_spaces(self,u_env):
+ '''
+ For standardization, use gym spaces to represent observation and action spaces.
+ This method iterates through the multiple brains (multiagent) then constructs and returns lists of observation_spaces and action_spaces
+ '''
+ observation_shape=(self.observation_dim,)
+ observation_space=spaces.Box(low=0,high=1,shape=observation_shape,dtype=np.int32)
+ set_gym_space_attr(observation_space)
+ action_space=spaces.Discrete(self.action_dim)
+ set_gym_space_attr(action_space)
+ # set for singleton
+ u_env.observation_space=observation_space
+ u_env.action_space=action_space
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+importshutil
+
+importnumpyasnp
+importpandasaspd
+importpydashasps
+importtorch
+
+fromconvlab.libimportlogger,util,viz
+
+NUM_EVAL=4
+METRICS_COLS=[
+ 'strength','max_strength','final_strength',
+ 'sample_efficiency','training_efficiency',
+ 'stability','consistency',
+]
+
+logger=logger.get_logger(__name__)
+
+
+# methods to generate returns (total rewards)
+
+
[docs]defgen_return(agent,env):
+ '''Generate return for an agent and an env in eval mode'''
+ obs=env.reset()
+ agent.reset(obs)
+ done=False
+ total_reward=0
+ env.clock.tick('epi')
+ env.clock.tick('t')
+ whilenotdone:
+ action=agent.act(obs)
+ next_obs,reward,done,info=env.step(action)
+ agent.update(obs,action,reward,next_obs,done)
+ obs=next_obs
+ total_reward+=reward
+ env.clock.tick('t')
+ returntotal_reward
+
+
+
[docs]defgen_avg_return(agent,env,num_eval=NUM_EVAL):
+ '''Generate average return for agent and an env'''
+ withutil.ctx_lab_mode('eval'):# enter eval context
+ agent.algorithm.update()# set explore_var etc. to end_val under ctx
+ withtorch.no_grad():
+ returns=[gen_return(agent,env)foriinrange(num_eval)]
+ # exit eval context, restore variables simply by updating
+ agent.algorithm.update()
+ returnnp.mean(returns)
+
+
+
[docs]defgen_result(agent,env):
+ '''Generate average return for agent and an env'''
+ withutil.ctx_lab_mode('eval'):# enter eval context
+ agent.algorithm.update()# set explore_var etc. to end_val under ctx
+ withtorch.no_grad():
+ _return=gen_return(agent,env)
+ # exit eval context, restore variables simply by updating
+ agent.algorithm.update()
+ return_return
[docs]defcalc_session_metrics(session_df,env_name,info_prepath=None,df_mode=None):
+ '''
+ Calculate the session metrics: strength, efficiency, stability
+ @param DataFrame:session_df Dataframe containing reward, frame, opt_step
+ @param str:env_name Name of the environment to get its random baseline
+ @param str:info_prepath Optional info_prepath to auto-save the output to
+ @param str:df_mode Optional df_mode to save with info_prepath
+ @returns dict:metrics Consists of scalar metrics and series local metrics
+ '''
+ mean_return=session_df['avg_return']ifdf_mode=='eval'elsesession_df['avg_return']
+ mean_length=session_df['avg_len']ifdf_mode=='eval'elseNone
+ mean_success=session_df['avg_success']ifdf_mode=='eval'elseNone
+ frames=session_df['frame']
+ opt_steps=session_df['opt_step']
+
+ # all the session local metrics
+ local={
+ 'mean_return':mean_return,
+ 'mean_length':mean_length,
+ 'mean_success':mean_success,
+ 'frames':frames,
+ 'opt_steps':opt_steps,
+ }
+ metrics={
+ 'local':local,
+ }
+ ifinfo_prepathisnotNone:# auto-save if info_prepath is given
+ util.write(metrics,f'{info_prepath}_session_metrics_{df_mode}.pkl')
+ returnmetrics
+
+
+
[docs]defcalc_trial_metrics(session_metrics_list,info_prepath=None):
+ '''
+ Calculate the trial metrics: mean(strength), mean(efficiency), mean(stability), consistency
+ @param list:session_metrics_list The metrics collected from each session; format: {session_index: {'scalar': {...}, 'local': {...}}}
+ @param str:info_prepath Optional info_prepath to auto-save the output to
+ @returns dict:metrics Consists of scalar metrics and series local metrics
+ '''
+ # calculate mean of session metrics
+ mean_return_list=[sm['local']['mean_return']forsminsession_metrics_list]
+ mean_length_list=[sm['local']['mean_length']forsminsession_metrics_list]
+ mean_success_list=[sm['local']['mean_success']forsminsession_metrics_list]
+ frames=session_metrics_list[0]['local']['frames']
+ opt_steps=session_metrics_list[0]['local']['opt_steps']
+
+ # for plotting: gather all local series of sessions
+ local={
+ 'mean_return':mean_return_list,
+ 'mean_length':mean_length_list,
+ 'mean_success':mean_success_list,
+ 'frames':frames,
+ 'opt_steps':opt_steps,
+ }
+ metrics={
+ 'local':local,
+ }
+ ifinfo_prepathisnotNone:# auto-save if info_prepath is given
+ util.write(metrics,f'{info_prepath}_trial_metrics.pkl')
+ returnmetrics
+
+
+
[docs]defcalc_experiment_df(trial_data_dict,info_prepath=None):
+ '''Collect all trial data (metrics and config) from trials into a dataframe'''
+ experiment_df=pd.DataFrame(trial_data_dict).transpose()
+ cols=METRICS_COLS
+ config_cols=sorted(ps.difference(experiment_df.columns.tolist(),cols))
+ sorted_cols=config_cols+cols
+ experiment_df=experiment_df.reindex(sorted_cols,axis=1)
+ experiment_df.sort_values(by=['strength'],ascending=False,inplace=True)
+ ifinfo_prepathisnotNone:
+ util.write(experiment_df,f'{info_prepath}_experiment_df.csv')
+ # save important metrics in info_prepath directly
+ util.write(experiment_df,f'{info_prepath.replace("info/", "")}_experiment_df.csv')
+ returnexperiment_df
+
+
+# interface analyze methods
+
+
[docs]defanalyze_session(session_spec,session_df,df_mode):
+ '''Analyze session and save data, then return metrics. Note there are 2 types of session_df: body.eval_df and body.train_df'''
+ info_prepath=session_spec['meta']['info_prepath']
+ session_df=session_df.copy()
+ assertlen(session_df)>1,f'Need more than 1 datapoint to calculate metrics'
+ util.write(session_df,f'{info_prepath}_session_df_{df_mode}.csv')
+ # calculate metrics
+ session_metrics=calc_session_metrics(session_df,ps.get(session_spec,'env.0.name'),info_prepath,df_mode)
+ # plot graph
+ viz.plot_session(session_spec,session_metrics,session_df,df_mode)
+ returnsession_metrics
+
+
+
[docs]defanalyze_trial(trial_spec,session_metrics_list):
+ '''Analyze trial and save data, then return metrics'''
+ info_prepath=trial_spec['meta']['info_prepath']
+ # calculate metrics
+ trial_metrics=calc_trial_metrics(session_metrics_list,info_prepath)
+ # plot graphs
+ viz.plot_trial(trial_spec,trial_metrics)
+ # zip files
+ ifutil.get_lab_mode()=='train':
+ predir,_,_,_,_,_=util.prepath_split(info_prepath)
+ shutil.make_archive(predir,'zip',predir)
+ logger.info(f'All trial data zipped to {predir}.zip')
+ returntrial_metrics
+
+
+
[docs]defanalyze_experiment(spec,trial_data_dict):
+ '''Analyze experiment and save data'''
+ info_prepath=spec['meta']['info_prepath']
+ util.write(trial_data_dict,f'{info_prepath}_trial_data_dict.json')
+ # calculate experiment df
+ experiment_df=calc_experiment_df(trial_data_dict,info_prepath)
+ # plot graph
+ viz.plot_experiment(spec,experiment_df,METRICS_COLS)
+ # zip files
+ predir,_,_,_,_,_=util.prepath_split(info_prepath)
+ shutil.make_archive(predir,'zip',predir)
+ logger.info(f'All experiment data zipped to {predir}.zip')
+ returnexperiment_df
+
+
+def_retro_analyze_session(session_spec_path):
+ '''Method to retro analyze a single session given only a path to its spec'''
+ session_spec=util.read(session_spec_path)
+ info_prepath=session_spec['meta']['info_prepath']
+ fordf_modein('eval','train'):
+ session_df=util.read(f'{info_prepath}_session_df_{df_mode}.csv')
+ analysis.analyze_session(session_spec,session_df,df_mode)
+
+
+
+
+
+def_retro_analyze_trial(trial_spec_path):
+ '''Method to retro analyze a single trial given only a path to its spec'''
+ trial_spec=util.read(trial_spec_path)
+ meta_spec=trial_spec['meta']
+ info_prepath=meta_spec['info_prepath']
+ session_metrics_list=[util.read(f'{info_prepath}_s{s}_session_metrics_eval.pkl')forsinrange(meta_spec['max_session'])]
+ analysis.analyze_trial(trial_spec,session_metrics_list)
+
+
+
[docs]defretro_analyze_experiment(predir):
+ '''Retro analyze an experiment'''
+ logger.info('Running retro_analyze_experiment')
+ trial_spec_paths=glob(f'{predir}/*_t*_spec.json')
+ # remove trial and session spec paths
+ experiment_spec_paths=ps.difference(glob(f'{predir}/*_spec.json'),trial_spec_paths)
+ experiment_spec_path=experiment_spec_paths[0]
+ spec=util.read(experiment_spec_path)
+ info_prepath=spec['meta']['info_prepath']
+ ifos.path.exists(f'{info_prepath}_trial_data_dict.json'):
+ return# only run analysis if experiment had been ran
+ trial_data_dict=util.read(f'{info_prepath}_trial_data_dict.json')
+ analysis.analyze_experiment(spec,trial_data_dict)
+
+
+
[docs]defretro_analyze(predir):
+ '''
+ Method to analyze experiment/trial from files after it ran.
+ @example
+
+ yarn retro_analyze data/reinforce_cartpole_2018_01_22_211751/
+ '''
+ predir=predir.strip('/')# sanitary
+ os.environ['LOG_PREPATH']=f'{predir}/log/retro_analyze'# to prevent overwriting log file
+ logger.info(f'Running retro-analysis on {predir}')
+ retro_analyze_sessions(predir)
+ retro_analyze_trials(predir)
+ retro_analyze_experiment(predir)
+ logger.info('Finished retro-analysis')
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+importtime
+fromfunctoolsimportwraps
+
+fromconvlab.libimportlogger
+
+logger=logger.get_logger(__name__)
+
+
+
[docs]deflab_api(fn):
+ '''
+ Function decorator to label and check Lab API methods
+ @example
+
+ from convlab.lib.decorator import lab_api
+ @lab_api
+ def foo():
+ print('foo')
+ '''
+ returnfn
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+importtorch
+# Custom PyTorch distribution classes to be registered in policy_util.py
+# Mainly used by policy_util action distribution
+fromtorchimportdistributions
+
+
+
[docs]classArgmax(distributions.Categorical):
+ '''
+ Special distribution class for argmax sampling, where probability is always 1 for the argmax.
+ NOTE although argmax is not a sampling distribution, this implementation is for API consistency.
+ '''
+
+ def__init__(self,probs=None,logits=None,validate_args=None):
+ ifprobsisnotNone:
+ new_probs=torch.zeros_like(probs,dtype=torch.float)
+ new_probs[probs==probs.max(dim=-1,keepdim=True)[0]]=1.0
+ probs=new_probs
+ eliflogitsisnotNone:
+ new_logits=torch.full_like(logits,-1e8,dtype=torch.float)
+ new_logits[logits==logits.max(dim=-1,keepdim=True)[0]]=1.0
+ logits=new_logits
+
+ super().__init__(probs=probs,logits=logits,validate_args=validate_args)
+
+
+
[docs]classGumbelCategorical(distributions.Categorical):
+ '''
+ Special Categorical using Gumbel distribution to simulate softmax categorical for discrete action.
+ Similar to OpenAI's https://github.com/openai/baselines/blob/98257ef8c9bd23a24a330731ae54ed086d9ce4a7/baselines/a2c/utils.py#L8-L10
+ Explanation http://amid.fish/assets/gumbel.html
+ '''
+
+
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+frompathlibimportPath
+
+fromallennlp.common.file_utilsimportcached_pathasallennlp_cached_path
+
+
+
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+importlogging
+importos
+importsys
+importwarnings
+
+importcolorlog
+importpandasaspd
+
+
+
[docs]classFixedList(list):
+ '''fixed-list to restrict addition to root logger handler'''
+
+
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+# Various math calculations used by algorithms
+importnumpyasnp
+importtorch
+
+
+# general math methods
+
+
[docs]defstandardize(v):
+ '''Method to standardize a rank-1 np array'''
+ # assert len(v) > 1, 'Cannot standardize vector of size 1'
+ iflen(v)==1:
+ returnv
+
+ v_std=(v-v.mean())/(v.std()+1e-08)
+ returnv_std
+
+
+
[docs]defto_one_hot(data,max_val):
+ '''Convert an int list of data into one-hot vectors'''
+ returnnp.eye(max_val)[np.array(data)]
+
+
+
[docs]defvenv_pack(batch_tensor,num_envs):
+ '''Apply the reverse of venv_unpack to pack a batch tensor from (b*num_envs, *shape) to (b, num_envs, *shape)'''
+ shape=list(batch_tensor.shape)
+ iflen(shape)<2:# scalar data (b, num_envs,)
+ returnbatch_tensor.view(-1,num_envs)
+ else:# non-scalar data (b, num_envs, *shape)
+ pack_shape=[-1,num_envs]+shape[1:]
+ returnbatch_tensor.view(pack_shape)
+
+
+
[docs]defvenv_unpack(batch_tensor):
+ '''
+ Unpack a sampled vec env batch tensor
+ e.g. for a state with original shape (4, ), vec env should return vec state with shape (num_envs, 4) to store in memory
+ When sampled with batch_size b, we should get shape (b, num_envs, 4). But we need to unpack the num_envs dimension to get (b * num_envs, 4) for passing to a network. This method does that.
+ '''
+ shape=list(batch_tensor.shape)
+ iflen(shape)<3:# scalar data (b, num_envs,)
+ returnbatch_tensor.view(-1)
+ else:# non-scalar data (b, num_envs, *shape)
+ unpack_shape=[-1]+shape[2:]
+ returnbatch_tensor.view(unpack_shape)
[docs]defcalc_returns(rewards,dones,gamma):
+ '''
+ Calculate the simple returns (full rollout) i.e. sum discounted rewards up till termination
+ '''
+ T=len(rewards)
+ rets=torch.zeros_like(rewards)
+ future_ret=torch.tensor(0.0,dtype=rewards.dtype)
+ not_dones=1-dones
+ fortinreversed(range(T)):
+ rets[t]=future_ret=rewards[t]+gamma*future_ret*not_dones[t]
+ returnrets
+
+
+
[docs]defcalc_nstep_returns(rewards,dones,next_v_pred,gamma,n):
+ '''
+ Calculate the n-step returns for advantage. Ref: http://www-anw.cs.umass.edu/~barto/courses/cs687/Chapter%207.pdf
+ Also see Algorithm S3 from A3C paper https://arxiv.org/pdf/1602.01783.pdf for the calculation used below
+ R^(n)_t = r_{t} + gamma r_{t+1} + ... + gamma^(n-1) r_{t+n-1} + gamma^(n) V(s_{t+n})
+ '''
+ rets=torch.zeros_like(rewards)
+ future_ret=next_v_pred
+ not_dones=1-dones
+ fortinreversed(range(n)):
+ rets[t]=future_ret=rewards[t]+gamma*future_ret*not_dones[t]
+ returnrets
+
+
+
[docs]defcalc_gaes(rewards,dones,v_preds,gamma,lam):
+ '''
+ Calculate GAE from Schulman et al. https://arxiv.org/pdf/1506.02438.pdf
+ v_preds are values predicted for current states, with one last element as the final next_state
+ delta is defined as r + gamma * V(s') - V(s) in eqn 10
+ GAE is defined in eqn 16
+ This method computes in torch tensor to prevent unnecessary moves between devices (e.g. GPU tensor to CPU numpy)
+ NOTE any standardization is done outside of this method
+ '''
+ T=len(rewards)
+ assertT+1==len(v_preds)# v_preds includes states and 1 last next_state
+ gaes=torch.zeros_like(rewards)
+ future_gae=torch.tensor(0.0,dtype=rewards.dtype)
+ # to multiply with not_dones to handle episode boundary (last state has no V(s'))
+ not_dones=1-dones
+ fortinreversed(range(T)):
+ delta=rewards[t]+gamma*v_preds[t+1]*not_dones[t]-v_preds[t]
+ gaes[t]=future_gae=delta+gamma*lam*not_dones[t]*future_gae
+ returngaes
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+# Custom PyTorch optimizer classes, to be registered in net_util.py
+importmath
+
+importtorch
+
+
+
[docs]classGlobalAdam(torch.optim.Adam):
+ '''
+ Global Adam algorithm with shared states for Hogwild.
+ Adapted from https://github.com/ikostrikov/pytorch-a3c/blob/master/my_optim.py (MIT)
+ '''
+
+ def__init__(self,params,lr=1e-3,betas=(0.9,0.999),eps=1e-8,weight_decay=0):
+ super().__init__(params,lr,betas,eps,weight_decay)
+
+ forgroupinself.param_groups:
+ forpingroup['params']:
+ state=self.state[p]
+ state['step']=torch.zeros(1)
+ state['exp_avg']=p.data.new().resize_as_(p.data).zero_()
+ state['exp_avg_sq']=p.data.new().resize_as_(p.data).zero_()
+
+
[docs]defstep(self,closure=None):
+ loss=None
+ ifclosureisnotNone:
+ loss=closure()
+
+ forgroupinself.param_groups:
+ forpingroup['params']:
+ ifp.gradisNone:
+ continue
+ grad=p.grad.data
+ state=self.state[p]
+ exp_avg,exp_avg_sq=state['exp_avg'],state['exp_avg_sq']
+ beta1,beta2=group['betas']
+ state['step']+=1
+ ifgroup['weight_decay']!=0:
+ grad=grad.add(group['weight_decay'],p.data)
+
+ # Decay the first and second moment running average coefficient
+ exp_avg.mul_(beta1).add_(1-beta1,grad)
+ exp_avg_sq.mul_(beta2).addcmul_(1-beta2,grad,grad)
+ denom=exp_avg_sq.sqrt().add_(group['eps'])
+ bias_correction1=1-beta1**state['step'].item()
+ bias_correction2=1-beta2**state['step'].item()
+ step_size=group['lr']*math.sqrt(
+ bias_correction2)/bias_correction1
+ p.data.addcdiv_(-step_size,exp_avg,denom)
+ returnloss
+
+
+
[docs]classGlobalRMSprop(torch.optim.RMSprop):
+ '''
+ Global RMSprop algorithm with shared states for Hogwild.
+ Adapted from https://github.com/jingweiz/pytorch-rl/blob/master/optims/sharedRMSprop.py (MIT)
+ '''
+
+ def__init__(self,params,lr=1e-2,alpha=0.99,eps=1e-8,weight_decay=0):
+ super().__init__(params,lr=lr,alpha=alpha,eps=eps,weight_decay=weight_decay,momentum=0,centered=False)
+
+ # State initialisation (must be done before step, else will not be shared between threads)
+ forgroupinself.param_groups:
+ forpingroup['params']:
+ state=self.state[p]
+ state['step']=p.data.new().resize_(1).zero_()
+ state['square_avg']=p.data.new().resize_as_(p.data).zero_()
+
+
[docs]defbatch_get(arr,idxs):
+ '''Get multi-idxs from an array depending if it's a python list or np.array'''
+ ifisinstance(arr,(list,deque)):
+ returnnp.array(operator.itemgetter(*idxs)(arr))
+ else:
+ returnarr[idxs]
+
+
+
[docs]defcalc_srs_mean_std(sr_list):
+ '''Given a list of series, calculate their mean and std'''
+ cat_df=pd.DataFrame(dict(enumerate(sr_list)))
+ mean_sr=cat_df.mean(axis=1)
+ std_sr=cat_df.std(axis=1)
+ returnmean_sr,std_sr
+
+
+
[docs]defcalc_ts_diff(ts2,ts1):
+ '''
+ Calculate the time from tss ts1 to ts2
+ @param {str} ts2 Later ts in the FILE_TS_FORMAT
+ @param {str} ts1 Earlier ts in the FILE_TS_FORMAT
+ @returns {str} delta_t in %H:%M:%S format
+ @example
+
+ ts1 = '2017_10_17_084739'
+ ts2 = '2017_10_17_084740'
+ ts_diff = util.calc_ts_diff(ts2, ts1)
+ # => '0:00:01'
+ '''
+ delta_t=datetime.strptime(ts2,FILE_TS_FORMAT)-datetime.strptime(ts1,FILE_TS_FORMAT)
+ returnstr(delta_t)
+
+
+
[docs]defcast_df(val):
+ '''missing pydash method to cast value as DataFrame'''
+ ifisinstance(val,pd.DataFrame):
+ returnval
+ returnpd.DataFrame(val)
+
+
+
[docs]defcast_list(val):
+ '''missing pydash method to cast value as list'''
+ ifps.is_list(val):
+ returnval
+ else:
+ return[val]
[docs]defconcat_batches(batches):
+ '''
+ Concat batch objects from body.memory.sample() into one batch, when all bodies experience similar envs
+ Also concat any nested epi sub-batches into flat batch
+ {k: arr1} + {k: arr2} = {k: arr1 + arr2}
+ '''
+ # if is nested, then is episodic
+ is_episodic=isinstance(batches[0]['dones'][0],(list,np.ndarray))
+ concat_batch={}
+ forkinbatches[0]:
+ datas=[]
+ forbatchinbatches:
+ data=batch[k]
+ ifis_episodic:# make into plain batch instead of nested
+ data=np.concatenate(data)
+ datas.append(data)
+ concat_batch[k]=np.concatenate(datas)
+ returnconcat_batch
+
+
+
[docs]defdowncast_float32(df):
+ '''Downcast any float64 col to float32 to allow safer pandas comparison'''
+ forcolindf.columns:
+ ifdf[col].dtype=='float':
+ df[col]=df[col].astype('float32')
+ returndf
+
+
+
[docs]defepi_done(done):
+ '''
+ General method to check if episode is done for both single and vectorized env
+ Only return True for singleton done since vectorized env does not have a natural episode boundary
+ '''
+ returnnp.isscalar(done)anddone
+
+
+
[docs]deffind_ckpt(prepath):
+ '''Find the ckpt-lorem-ipsum in a string and return lorem-ipsum'''
+ if'ckpt'inprepath:
+ ckpt_str=ps.find(prepath.split('_'),lambdas:s.startswith('ckpt'))
+ ckpt=ckpt_str.replace('ckpt-','')
+ else:
+ ckpt=None
+ returnckpt
+
+
+
[docs]defframe_mod(frame,frequency,num_envs):
+ '''
+ Generic mod for (frame % frequency == 0) for when num_envs is 1 or more,
+ since frame will increase multiple ticks for vector env, use the remainder'''
+ remainder=num_envsor1
+ return(frame%frequency<remainder)
[docs]defget_class_name(obj,lower=False):
+ '''Get the class name of an object'''
+ class_name=obj.__class__.__name__
+ iflower:
+ class_name=class_name.lower()
+ returnclass_name
+
+
+
[docs]defget_class_attr(obj):
+ '''Get the class attr of an object as dict'''
+ attr_dict={}
+ fork,vinobj.__dict__.items():
+ ifhasattr(v,'__dict__')orps.is_tuple(v):
+ val=str(v)
+ else:
+ val=v
+ attr_dict[k]=val
+ returnattr_dict
+
+
+
[docs]defget_file_ext(data_path):
+ '''get the `.ext` of file.ext'''
+ returnos.path.splitext(data_path)[-1]
+
+
+
[docs]defget_fn_list(a_cls):
+ '''
+ Get the callable, non-private functions of a class
+ @returns {[*str]} A list of strings of fn names
+ '''
+ fn_list=ps.filter_(dir(a_cls),lambdafn:notfn.endswith('__')andcallable(getattr(a_cls,fn)))
+ returnfn_list
[docs]defget_ts(pattern=FILE_TS_FORMAT):
+ '''
+ Get current ts, defaults to format used for filename
+ @param {str} pattern To format the ts
+ @returns {str} ts
+ @example
+
+ util.get_ts()
+ # => '2017_10_17_084739'
+ '''
+ ts_obj=datetime.now()
+ ts=ts_obj.strftime(pattern)
+ assertRE_FILE_TS.search(ts)
+ returnts
+
+
+
[docs]definsert_folder(prepath,folder):
+ '''Insert a folder into prepath'''
+ split_path=prepath.split('/')
+ prename=split_path.pop()
+ split_path+=[folder,prename]
+ return'/'.join(split_path)
+
+
+
[docs]defin_eval_lab_modes():
+ '''Check if lab_mode is one of EVAL_MODES'''
+ returnget_lab_mode()inEVAL_MODES
+
+
+
[docs]defis_jupyter():
+ '''Check if process is in Jupyter kernel'''
+ try:
+ get_ipython().config
+ returnTrue
+ exceptNameError:
+ returnFalse
+ returnFalse
+
+
+
[docs]@contextmanager
+defctx_lab_mode(lab_mode):
+ '''
+ Creates context to run method with a specific lab_mode
+ @example
+ with util.ctx_lab_mode('eval'):
+ foo()
+
+ @util.ctx_lab_mode('eval')
+ def foo():
+ ...
+ '''
+ prev_lab_mode=os.environ.get('lab_mode')
+ os.environ['lab_mode']=lab_mode
+ yield
+ ifprev_lab_modeisNone:
+ delos.environ['lab_mode']
+ else:
+ os.environ['lab_mode']=prev_lab_mode
+
+
+
[docs]defmonkey_patch(base_cls,extend_cls):
+ '''Monkey patch a base class with methods from extend_cls'''
+ ext_fn_list=get_fn_list(extend_cls)
+ forfninext_fn_list:
+ setattr(base_cls,fn,getattr(extend_cls,fn))
+
+
+
[docs]defparallelize(fn,args,num_cpus=NUM_CPUS):
+ '''
+ Parallelize a method fn, args and return results with order preserved per args.
+ args should be a list of tuples.
+ @returns {list} results Order preserved output from fn.
+ '''
+ pool=mp.Pool(num_cpus,maxtasksperchild=1)
+ results=pool.starmap(fn,args)
+ pool.close()
+ pool.join()
+ returnresults
+
+
+
[docs]defprepath_split(prepath):
+ '''
+ Split prepath into useful names. Works with predir (prename will be None)
+ prepath: output/dqn_pong_2018_12_02_082510/dqn_pong_t0_s0
+ predir: output/dqn_pong_2018_12_02_082510
+ prefolder: dqn_pong_2018_12_02_082510
+ prename: dqn_pong_t0_s0
+ spec_name: dqn_pong
+ experiment_ts: 2018_12_02_082510
+ ckpt: ckpt-best of dqn_pong_t0_s0_ckpt-best if available
+ '''
+ prepath=prepath.strip('_')
+ tail=prepath.split('output/')[-1]
+ ckpt=find_ckpt(tail)
+ ifckptisnotNone:# separate ckpt
+ tail=tail.replace(f'_ckpt-{ckpt}','')
+ if'/'intail:# tail = prefolder/prename
+ prefolder,prename=tail.split('/',1)
+ else:
+ prefolder,prename=tail,None
+ predir=f'output/{prefolder}'
+ spec_name=RE_FILE_TS.sub('',prefolder).strip('_')
+ experiment_ts=RE_FILE_TS.findall(prefolder)[0]
+ returnpredir,prefolder,prename,spec_name,experiment_ts,ckpt
+
+
+
[docs]defprepath_to_idxs(prepath):
+ '''Extract trial index and session index from prepath if available'''
+ _,_,prename,spec_name,_,_=prepath_split(prepath)
+ idxs_tail=prename.replace(spec_name,'').strip('_')
+ idxs_strs=ps.compact(idxs_tail.split('_')[:2])
+ ifps.is_empty(idxs_strs):
+ returnNone,None
+ tidx=idxs_strs[0]
+ asserttidx.startswith('t')
+ trial_index=int(tidx.strip('t'))
+ iflen(idxs_strs)==1:# has session
+ session_index=None
+ else:
+ sidx=idxs_strs[1]
+ assertsidx.startswith('s')
+ session_index=int(sidx.strip('s'))
+ returntrial_index,session_index
+
+
+
[docs]defprepath_to_spec(prepath):
+ '''
+ Given a prepath, read the correct spec recover the meta_spec that will return the same prepath for eval lab modes
+ example: output/a2c_cartpole_2018_06_13_220436/a2c_cartpole_t0_s0
+ '''
+ predir,_,prename,_,experiment_ts,ckpt=prepath_split(prepath)
+ sidx_res=re.search('_s\d+',prename)
+ ifsidx_res:# replace the _s0 if any
+ prename=prename.replace(sidx_res[0],'')
+ spec_path=f'{predir}/{prename}_spec.json'
+ # read the spec of prepath
+ spec=read(spec_path)
+ # recover meta_spec
+ trial_index,session_index=prepath_to_idxs(prepath)
+ meta_spec=spec['meta']
+ meta_spec['experiment_ts']=experiment_ts
+ meta_spec['ckpt']=ckpt
+ meta_spec['experiment']=0
+ meta_spec['trial']=trial_index
+ meta_spec['session']=session_index
+ check_prepath=get_prepath(spec,unit='session')
+ assertcheck_prepathinprepath,f'{check_prepath}, {prepath}'
+ returnspec
+
+
+
[docs]defread(data_path,**kwargs):
+ '''
+ Universal data reading method with smart data parsing
+ - {.csv} to DataFrame
+ - {.json} to dict, list
+ - {.yml} to dict
+ - {*} to str
+ @param {str} data_path The data path to read from
+ @returns {data} The read data in sensible format
+ @example
+
+ data_df = util.read('test/fixture/lib/util/test_df.csv')
+ # => <DataFrame>
+
+ data_dict = util.read('test/fixture/lib/util/test_dict.json')
+ data_dict = util.read('test/fixture/lib/util/test_dict.yml')
+ # => <dict>
+
+ data_list = util.read('test/fixture/lib/util/test_list.json')
+ # => <list>
+
+ data_str = util.read('test/fixture/lib/util/test_str.txt')
+ # => <str>
+ '''
+ data_path=smart_path(data_path)
+ try:
+ assertos.path.isfile(data_path)
+ exceptAssertionError:
+ raiseFileNotFoundError(data_path)
+ ext=get_file_ext(data_path)
+ ifext=='.csv':
+ data=read_as_df(data_path,**kwargs)
+ elifext=='.pkl':
+ data=read_as_pickle(data_path,**kwargs)
+ else:
+ data=read_as_plain(data_path,**kwargs)
+ returndata
+
+
+
[docs]defread_as_df(data_path,**kwargs):
+ '''Submethod to read data as DataFrame'''
+ ext=get_file_ext(data_path)
+ data=pd.read_csv(data_path,**kwargs)
+ returndata
+
+
+
[docs]defread_as_pickle(data_path,**kwargs):
+ '''Submethod to read data as pickle'''
+ withopen(data_path,'rb')asf:
+ data=pickle.load(f)
+ returndata
+
+
+
[docs]defread_as_plain(data_path,**kwargs):
+ '''Submethod to read data as plain type'''
+ open_file=open(data_path,'r')
+ ext=get_file_ext(data_path)
+ ifext=='.json':
+ data=ujson.load(open_file,**kwargs)
+ elifext=='.yml':
+ data=yaml.load(open_file,**kwargs)
+ else:
+ data=open_file.read()
+ open_file.close()
+ returndata
[docs]defrun_cmd_wait(proc):
+ '''Wait on a running process created by util.run_cmd and print its stdout'''
+ forlineinproc.stdout:
+ print(line.decode(),end='')
+ output=proc.communicate()[0]
+ ifproc.returncode!=0:
+ raisesubprocess.CalledProcessError(proc.args,proc.returncode,output)
+ else:
+ returnoutput
+
+
+
[docs]defself_desc(cls):
+ '''Method to get self description, used at init.'''
+ desc_list=[f'{get_class_name(cls)}:']
+ fork,vinget_class_attr(cls).items():
+ ifk=='spec':
+ desc_v=v['name']
+ elifps.is_dict(v)orps.is_dict(ps.head(v)):
+ desc_v=pformat(v)
+ else:
+ desc_v=v
+ desc_list.append(f'- {k} = {desc_v}')
+ desc='\n'.join(desc_list)
+ returndesc
+
+
+
[docs]defset_attr(obj,attr_dict,keys=None):
+ '''Set attribute of an object from a dict'''
+ ifkeysisnotNone:
+ attr_dict=ps.pick(attr_dict,keys)
+ forattr,valinattr_dict.items():
+ setattr(obj,attr,val)
+ returnobj
+
+
+
[docs]defset_cuda_id(spec):
+ '''Use trial and session id to hash and modulo cuda device count for a cuda_id to maximize device usage. Sets the net_spec for the base Net class to pick up.'''
+ # Don't trigger any cuda call if not using GPU. Otherwise will break multiprocessing on machines with CUDA.
+ # see issues https://github.com/pytorch/pytorch/issues/334 https://github.com/pytorch/pytorch/issues/3491 https://github.com/pytorch/pytorch/issues/9996
+ foragent_specinspec['agent']:
+ if'net'notinagent_specornotagent_spec['net'].get('gpu'):
+ return
+ meta_spec=spec['meta']
+ trial_idx=meta_spec['trial']or0
+ session_idx=meta_spec['session']or0
+ ifmeta_spec['distributed']=='shared':# shared hogwild uses only global networks, offset them to idx 0
+ session_idx=0
+ job_idx=trial_idx*meta_spec['max_session']+session_idx
+ job_idx+=meta_spec['cuda_offset']
+ device_count=torch.cuda.device_count()
+ cuda_id=Noneifnotdevice_countelsejob_idx%device_count
+
+ foragent_specinspec['agent']:
+ agent_spec['net']['cuda_id']=cuda_id
+
+
+
[docs]defset_logger(spec,logger,unit=None):
+ '''Set the logger for a lab unit give its spec'''
+ os.environ['LOG_PREPATH']=insert_folder(get_prepath(spec,unit=unit),'log')
+ reload(logger)# to set session-specific logger
+
+
+
[docs]defset_random_seed(spec):
+ '''Generate and set random seed for relevant modules, and record it in spec.meta.random_seed'''
+ torch.set_num_threads(1)# prevent multithread slowdown, set again for hogwild
+ trial=spec['meta']['trial']
+ session=spec['meta']['session']
+ random_seed=int(1e5*(trialor0)+1e3*(sessionor0)+time.time())
+ torch.cuda.manual_seed_all(random_seed)
+ torch.manual_seed(random_seed)
+ np.random.seed(random_seed)
+ spec['meta']['random_seed']=random_seed
+ returnrandom_seed
+
+
+def_sizeof(obj,seen=None):
+ '''Recursively finds size of objects'''
+ size=sys.getsizeof(obj)
+ ifseenisNone:
+ seen=set()
+ obj_id=id(obj)
+ ifobj_idinseen:
+ return0
+ # Important mark as seen *before* entering recursion to gracefully handle
+ # self-referential objects
+ seen.add(obj_id)
+ ifisinstance(obj,dict):
+ size+=sum([_sizeof(v,seen)forvinobj.values()])
+ size+=sum([_sizeof(k,seen)forkinobj.keys()])
+ elifhasattr(obj,'__dict__'):
+ size+=_sizeof(obj.__dict__,seen)
+ elifhasattr(obj,'__iter__')andnotisinstance(obj,(str,bytes,bytearray)):
+ size+=sum([_sizeof(i,seen)foriinobj])
+ returnsize
+
+
+
[docs]defsizeof(obj,divisor=1e6):
+ '''Return the size of object, in MB by default'''
+ return_sizeof(obj)/divisor
+
+
+
[docs]defsmart_path(data_path,as_dir=False):
+ '''
+ Resolve data_path into abspath with fallback to join from ROOT_DIR
+ @param {str} data_path The input data path to resolve
+ @param {bool} as_dir Whether to return as dirname
+ @returns {str} The normalized absolute data_path
+ @example
+
+ util.smart_path('convlab/lib')
+ # => '/Users/ANON/Documents/convlab/convlab/lib'
+
+ util.smart_path('/tmp')
+ # => '/tmp'
+ '''
+ ifnotos.path.isabs(data_path):
+ abs_path=os.path.abspath(data_path)
+ ifos.path.exists(abs_path):
+ data_path=abs_path
+ else:
+ data_path=os.path.join(ROOT_DIR,data_path)
+ ifas_dir:
+ data_path=os.path.dirname(data_path)
+ returnos.path.normpath(data_path)
+
+
+
[docs]defsplit_minibatch(batch,mb_size):
+ '''Split a batch into minibatches of mb_size or smaller, without replacement'''
+ size=len(batch['rewards'])
+ assertmb_size<size,f'Minibatch size {mb_size} must be < batch size {size}'
+ idxs=np.arange(size)
+ np.random.shuffle(idxs)
+ chunks=int(size/mb_size)
+ nested_idxs=np.array_split(idxs,chunks)
+ mini_batches=[]
+ forminibatch_idxsinnested_idxs:
+ minibatch={k:v[minibatch_idxs]fork,vinbatch.items()}
+ mini_batches.append(minibatch)
+ returnmini_batches
+
+
+
[docs]defto_json(d,indent=2):
+ '''Shorthand method for stringify JSON with indent'''
+ returnjson.dumps(d,indent=indent,cls=LabJsonEncoder)
[docs]defto_torch_batch(batch,device,is_episodic):
+ '''Mutate a batch (dict) to make its values from numpy into PyTorch tensor'''
+ forkinbatch:
+ ifis_episodic:# for episodic format
+ batch[k]=np.concatenate(batch[k])
+ elifps.is_list(batch[k]):
+ batch[k]=np.array(batch[k])
+ batch[k]=torch.from_numpy(batch[k].astype(np.float32)).to(device)
+ returnbatch
+
+
+
[docs]defwrite(data,data_path):
+ '''
+ Universal data writing method with smart data parsing
+ - {.csv} from DataFrame
+ - {.json} from dict, list
+ - {.yml} from dict
+ - {*} from str(*)
+ @param {*} data The data to write
+ @param {str} data_path The data path to write to
+ @returns {data_path} The data path written to
+ @example
+
+ data_path = util.write(data_df, 'test/fixture/lib/util/test_df.csv')
+
+ data_path = util.write(data_dict, 'test/fixture/lib/util/test_dict.json')
+ data_path = util.write(data_dict, 'test/fixture/lib/util/test_dict.yml')
+
+ data_path = util.write(data_list, 'test/fixture/lib/util/test_list.json')
+
+ data_path = util.write(data_str, 'test/fixture/lib/util/test_str.txt')
+ '''
+ data_path=smart_path(data_path)
+ data_dir=os.path.dirname(data_path)
+ os.makedirs(data_dir,exist_ok=True)
+ ext=get_file_ext(data_path)
+ ifext=='.csv':
+ write_as_df(data,data_path)
+ elifext=='.pkl':
+ write_as_pickle(data,data_path)
+ else:
+ write_as_plain(data,data_path)
+ returndata_path
+
+
+
[docs]defwrite_as_df(data,data_path):
+ '''Submethod to write data as DataFrame'''
+ df=cast_df(data)
+ ext=get_file_ext(data_path)
+ df.to_csv(data_path,index=False)
+ returndata_path
+
+
+
[docs]defwrite_as_pickle(data,data_path):
+ '''Submethod to write data as pickle'''
+ withopen(data_path,'wb')asf:
+ pickle.dump(data,f)
+ returndata_path
+
+
+
[docs]defwrite_as_plain(data,data_path):
+ '''Submethod to write data as plain type'''
+ open_file=open(data_path,'w')
+ ext=get_file_ext(data_path)
+ ifext=='.json':
+ json.dump(data,open_file,indent=2,cls=LabJsonEncoder)
+ elifext=='.yml':
+ yaml.dump(data,open_file)
+ else:
+ open_file.write(str(data))
+ open_file.close()
+ returndata_path
[docs]defnormalize_image(im):
+ '''Normalizing image by dividing max value 255'''
+ # NOTE: beware in its application, may cause loss to be 255 times lower due to smaller input values
+ returnnp.divide(im,255.0)
[docs]defdebug_image(im):
+ '''
+ Renders an image for debugging; pauses process until key press
+ Handles tensor/numpy and conventions among libraries
+ '''
+ iftorch.is_tensor(im):# if PyTorch tensor, get numpy
+ im=im.cpu().numpy()
+ im=to_opencv_image(im)
+ im=im.astype(np.uint8)# typecast guard
+ ifim.shape[0]==3:# RGB image
+ # accommodate from RGB (numpy) to BGR (cv2)
+ im=cv2.cvtColor(im,cv2.COLOR_BGR2RGB)
+ cv2.imshow('debug image',im)
+ cv2.waitKey(0)
+
+
+
[docs]defmpl_debug_image(im):
+ '''Uses matplotlib to plot image with bigger size, axes, and false color on greyscaled images'''
+ importmatplotlib.pyplotasplt
+ plt.figure()
+ plt.imshow(im)
+ plt.show()
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+importos
+
+importcolorloverascl
+importpydashasps
+# The data visualization module
+# Defines plotting methods for analysis
+fromplotlyimportgraph_objsasgo,ioaspio,tools
+fromplotly.offlineimportinit_notebook_mode,iplot
+
+fromconvlab.libimportlogger,util
+
+logger=logger.get_logger(__name__)
+
+# warn orca failure only once
+orca_warn_once=ps.once(lambdae:logger.warning(f'Failed to generate graph. Run retro-analysis to generate graphs later.'))
+ifutil.is_jupyter():
+ init_notebook_mode(connected=True)
+
+
+
[docs]defget_palette(size):
+ '''Get the suitable palette of a certain size'''
+ ifsize<=8:
+ palette=cl.scales[str(max(3,size))]['qual']['Set2']
+ else:
+ palette=cl.interp(cl.scales['8']['qual']['Set2'],size)
+ returnpalette
[docs]defnormalize_value(value_set,domain,slot,value):
+ """
+ Normalized the value produced by NLU module to map it to the ontology value space.
+ Args:
+ value_set (dict): The value set of task ontology.
+ domain (str): The domain of the slot-value pairs.
+ slot (str): The slot of the value.
+ value (str): The raw value detected by NLU module.
+ Returns:
+ value (str): The normalized value, which fits with the domain ontology.
+ """
+ slot=slot.lower()
+ value=value.lower()
+ value=' '.join(value.split())
+ try:
+ assertdomaininvalue_set
+ except:
+ raiseException('domain <{}> not found in value set'.format(domain))
+ ifslotnotinvalue_set[domain]:
+ raiseException('slot <{}> not found in db_values[{}]'.format(slot,domain))
+ value_list=value_set[domain][slot]
+ # exact match or containing match
+ v=_match_or_contain(value,value_list)
+ ifvisnotNone:
+ returnv
+ # some transfomations
+ cand_values=_transform_value(value)
+ forcvincand_values:
+ v=_match_or_contain(cv,value_list)
+ ifvisnotNone:
+ returnv
+ # special value matching
+ v=special_match(domain,slot,value)
+ ifvisnotNone:
+ returnv
+ _log('Failed: domain {} slot {} value {}, raw value returned.'.format(domain,slot,value))
+ returnvalue
+
+def_transform_value(value):
+ cand_list=[]
+ # a 's -> a's
+ if" 's"invalue:
+ cand_list.append(value.replace(" 's","'s"))
+ # a - b -> a-b
+ if" - "invalue:
+ cand_list.append(value.replace(" - ","-"))
+ # center <-> centre
+ ifvalue=='center':
+ cand_list.append('centre')
+ elifvalue=='centre':
+ cand_list.append('center')
+ # the + value
+ ifnotvalue.startswith('the '):
+ cand_list.append('the '+value)
+ returncand_list
+
+def_match_or_contain(value,value_list):
+ """match value by exact match or containing"""
+ ifvalueinvalue_list:
+ returnvalue
+ forvinvalue_list:
+ ifvinvalueorvalueinv:
+ returnv
+ ## fuzzy match, when len(value) is large and distance(v1, v2) is small
+ forvinvalue_list:
+ d=minDistance(value,v)
+ if(d<=2andlen(value)>=10)or(d<=3andlen(value)>=15):
+ returnv
+ returnNone
+
+
Source code for convlab.modules.dst.multiwoz.evaluate
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+importjson
+
+fromconvlab.modules.dst.multiwoz.dst_utilimportminDistance
+fromconvlab.modules.dst.multiwoz.rule_dstimportRuleDST
+fromconvlab.modules.nlu.multiwoz.onenet.nluimportOneNetLU
+
+
+
Source code for convlab.modules.dst.multiwoz.rule_dst
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+importcopy
+importjson
+importos
+
+importconvlab
+fromconvlab.modules.dst.multiwoz.dst_utilimportinit_state
+fromconvlab.modules.dst.multiwoz.dst_utilimportnormalize_value
+fromconvlab.modules.dst.state_trackerimportTracker
+fromconvlab.modules.util.multiwoz.multiwoz_slot_transimportREF_SYS_DA
+
+
+
[docs]classRuleDST(Tracker):
+ """Rule based DST which trivially updates new values from NLU result to states."""
+ def__init__(self):
+ Tracker.__init__(self)
+ self.state=init_state()
+ prefix=os.path.dirname(os.path.dirname(convlab.__file__))
+ self.value_dict=json.load(open(prefix+'/data/multiwoz/value_dict.json'))
+
+
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+"""
+
+
[docs]classTracker:
+ """Base class for dialog state tracker models."""
+ def__init__(self):
+ """The constructor of Tracker class."""
+ pass
+
+
[docs]defupdate(self,user_act=None):
+ """
+ Update dialog state based on new user dialog act.
+ Args:
+ sess (Session Object): (for models implemented using tensorflow) The Session Object to assist model running.
+ user_act (dict or str): The dialog act (or utterance) of user input. The class of user_act depends on
+ the method of state tracker. For example, for rule-based tracker, type(user_act) == dict; while for
+ MDBT, type(user_act) == str.
+ Returns:
+ new_state (dict): Updated dialog state, with the same form of previous state. Note that the dialog state is
+ also a private data member.
+ """
+ pass
+
+
[docs]definit_session(self):
+ """Init the Tracker to start a new session."""
+ pass
Source code for convlab.modules.e2e.multiwoz.Mem2Seq.utils.measures
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+from__future__importabsolute_import
+from__future__importdivision
+from__future__importprint_function
+from__future__importunicode_literals
+
+importos
+importre
+importsubprocess
+importtempfile
+
+importnumpy
+importnumpyasnp
+fromsix.movesimporturllib
+
+
+
[docs]defwer(r,h):
+ """
+ This is a function that calculate the word error rate in ASR.
+ You can use it like this: wer("what is it".split(), "what is".split())
+ """
+ #build the matrix
+ d=numpy.zeros((len(r)+1)*(len(h)+1),dtype=numpy.uint8).reshape((len(r)+1,len(h)+1))
+ foriinrange(len(r)+1):
+ forjinrange(len(h)+1):
+ ifi==0:d[0][j]=j
+ elifj==0:d[i][0]=i
+ foriinrange(1,len(r)+1):
+ forjinrange(1,len(h)+1):
+ ifr[i-1]==h[j-1]:
+ d[i][j]=d[i-1][j-1]
+ else:
+ substitute=d[i-1][j-1]+1
+ insert=d[i][j-1]+1
+ delete=d[i-1][j]+1
+ d[i][j]=min(substitute,insert,delete)
+ result=float(d[len(r)][len(h)])/len(r)*100
+ # result = str("%.2f" % result) + "%"
+ returnresult
+
+# -*- coding: utf-8 -*-
+# Copyright 2017 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""BLEU metric implementation.
+"""
+
+
+
[docs]defmoses_multi_bleu(hypotheses,references,lowercase=False):
+ """Calculate the bleu score for hypotheses and references
+ using the MOSES ulti-bleu.perl script.
+ Args:
+ hypotheses: A numpy array of strings where each string is a single example.
+ references: A numpy array of strings where each string is a single example.
+ lowercase: If true, pass the "-lc" flag to the multi-bleu script
+ Returns:
+ The BLEU score as a float32 value.
+ """
+
+ ifnp.size(hypotheses)==0:
+ returnnp.float32(0.0)
+
+
+ # Get MOSES multi-bleu script
+ try:
+ multi_bleu_path,_=urllib.request.urlretrieve(
+ "https://raw.githubusercontent.com/moses-smt/mosesdecoder/"
+ "master/scripts/generic/multi-bleu.perl")
+ os.chmod(multi_bleu_path,0o744)
+ except:#pylint: disable=W0702
+ print("Unable to fetch multi-bleu.perl script, using local.")
+ metrics_dir=os.path.dirname(os.path.realpath(__file__))
+ bin_dir=os.path.abspath(os.path.join(metrics_dir,"..","..","bin"))
+ multi_bleu_path=os.path.join(bin_dir,"tools/multi-bleu.perl")
+
+
+ # Dump hypotheses and references to tempfiles
+ hypothesis_file=tempfile.NamedTemporaryFile()
+ hypothesis_file.write("\n".join(hypotheses).encode("utf-8"))
+ hypothesis_file.write(b"\n")
+ hypothesis_file.flush()
+ reference_file=tempfile.NamedTemporaryFile()
+ reference_file.write("\n".join(references).encode("utf-8"))
+ reference_file.write(b"\n")
+ reference_file.flush()
+
+
+ # Calculate BLEU using multi-bleu script
+ withopen(hypothesis_file.name,"r")asread_pred:
+ bleu_cmd=[multi_bleu_path]
+ iflowercase:
+ bleu_cmd+=["-lc"]
+ bleu_cmd+=[reference_file.name]
+ try:
+ bleu_out=subprocess.check_output(bleu_cmd,stdin=read_pred,stderr=subprocess.STDOUT)
+ bleu_out=bleu_out.decode("utf-8")
+ bleu_score=re.search(r"BLEU = (.+?),",bleu_out).group(1)
+ bleu_score=float(bleu_score)
+ exceptsubprocess.CalledProcessErroraserror:
+ iferror.outputisnotNone:
+ print("multi-bleu.perl script returned non-zero exit code")
+ print(error.output)
+ bleu_score=np.float32(0.0)
+
+ # Close temp files
+ hypothesis_file.close()
+ reference_file.close()
+ returnbleu_score
[docs]classBLEUScorer(object):
+ ## BLEU score calculator via GentScorer interface
+ ## it calculates the BLEU-4 by taking the entire corpus in
+ ## Calulate based multiple candidates against multiple references
+ def__init__(self):
+ pass
+
+
[docs]@report
+ defmatch_rate_metric(self,data,sub='match',bspans='./data/kvret/test.bspan.pkl'):
+ dials=self.pack_dial(data)
+ match,total=0,1e-8
+ #bspan_data = pickle.load(open(bspans,'rb'))
+ # find out the last placeholder and see whether that is correct
+ # if no such placeholder, see the final turn, because it can be a yes/no question or scheduling conversation
+ fordial_idindials:
+ dial=dials[dial_id]
+ gen_bspan,truth_cons,gen_cons=None,None,set()
+ truth_turn_num=-1
+ forturn_num,turninenumerate(dial):
+ if'SLOT'inturn['generated_response']:
+ gen_bspan=turn['generated_bspan']
+ gen_cons=self._extract_constraint(gen_bspan)
+ if'SLOT'inturn['response']:
+ truth_cons=self._extract_constraint(turn['bspan'])
+
+ # KVRET dataset includes "scheduling" (so often no SLOT decoded in ground truth)
+ ifnottruth_cons:
+ truth_bspan=dial[-1]['bspan']
+ truth_cons=self._extract_constraint(truth_bspan)
+ ifnotgen_cons:
+ gen_bspan=dial[-1]['generated_bspan']
+ gen_cons=self._extract_constraint(gen_bspan)
+
+ iftruth_cons:
+ ifself.constraint_same(gen_cons,truth_cons):
+ match+=1
+ #print(gen_cons, truth_cons, '+')
+ else:
+ print(gen_cons,truth_cons,'-')
+ total+=1
+
+ returnmatch/total
+
+ def_tokenize(self,sent):
+ return' '.join(word_tokenize(sent))
+
+ def_lemmatize(self,sent):
+ words=[wn.lemmatize(_)for_insent.split()]
+ #for idx,w in enumerate(words):
+ # if w !=
+ return' '.join(words)
+
+
[docs]defpad_sequences(sequences,maxlen=None,dtype='int32',
+ padding='pre',truncating='pre',value=0.):
+ ifnothasattr(sequences,'__len__'):
+ raiseValueError('`sequences` must be iterable.')
+ lengths=[]
+ forxinsequences:
+ ifnothasattr(x,'__len__'):
+ raiseValueError('`sequences` must be a list of iterables. '
+ 'Found non-iterable: '+str(x))
+ lengths.append(len(x))
+
+ num_samples=len(sequences)
+ seq_maxlen=np.max(lengths)
+ ifmaxlenisnotNoneandcfg.truncated:
+ maxlen=min(seq_maxlen,maxlen)
+ else:
+ maxlen=seq_maxlen
+ # take the sample shape from the first non empty sequence
+ # checking for consistency in the main loop below.
+ sample_shape=tuple()
+ forsinsequences:
+ iflen(s)>0:
+ sample_shape=np.asarray(s).shape[1:]
+ break
+
+ x=(np.ones((num_samples,maxlen)+sample_shape)*value).astype(dtype)
+ foridx,sinenumerate(sequences):
+ ifnotlen(s):
+ continue# empty list/array was found
+ iftruncating=='pre':
+ trunc=s[-maxlen:]
+ eliftruncating=='post':
+ trunc=s[:maxlen]
+ else:
+ raiseValueError('Truncating type "%s" not understood'%truncating)
+
+ # check `trunc` has expected shape
+ trunc=np.asarray(trunc,dtype=dtype)
+ iftrunc.shape[1:]!=sample_shape:
+ raiseValueError('Shape of sample %s of sequence at position %s is different from expected shape %s'%
+ (trunc.shape[1:],idx,sample_shape))
+
+ ifpadding=='post':
+ x[idx,:len(trunc)]=trunc
+ elifpadding=='pre':
+ x[idx,-len(trunc):]=trunc
+ else:
+ raiseValueError('Padding type "%s" not understood'%padding)
+ returnx
Source code for convlab.modules.nlg.multiwoz.multiwoz_template_nlg.multiwoz_template_nlg
+"""
+template NLG for multiwoz dataset. templates are in `multiwoz_template_nlg/` dir.
+See `example` function in this file for usage.
+"""
+importjson
+importos
+importrandom
+frompprintimportpprint
+
+fromconvlab.modules.nlg.nlgimportNLG
+
+
+
[docs]classMultiwozTemplateNLG(NLG):
+ def__init__(self,is_user,mode="manual"):
+ """
+ :param is_user: if dialog_act from user or system
+ :param mode: `auto`: templates extracted from data without manual modification, may have no match;
+ `manual`: templates with manual modification, sometimes verbose;
+ `auto_manual`: use auto templates first. When fails, use manual templates.
+ both template are dict, *_template[dialog_act][slot] is a list of templates.
+ """
+ super().__init__()
+ self.is_user=is_user
+ self.mode=mode
+ template_dir=os.path.dirname(os.path.abspath(__file__))
+ self.auto_user_template=read_json(os.path.join(template_dir,'auto_user_template_nlg.json'))
+ self.auto_system_template=read_json(os.path.join(template_dir,'auto_system_template_nlg.json'))
+ self.manual_user_template=read_json(os.path.join(template_dir,'manual_user_template_nlg.json'))
+ self.manual_system_template=read_json(os.path.join(template_dir,'manual_system_template_nlg.json'))
+
+
Source code for convlab.modules.nlg.multiwoz.sc_lstm.bleu
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+importargparse
+importjson
+importsys
+importtime
+
+fromnltk.translate.bleu_scoreimportcorpus_bleu,SmoothingFunction
+
+
+#def delexicalise(sent,dact): # for domain4
+# feat = SoftDActFormatter().parse(dact,keepValues=True)
+# return ExactMatchDataLexicaliser().delexicalise(sent,feat['s2v'])
+#
+#
+#def lexicalise(sent,dact): # for domain4
+# feat = SoftDActFormatter().parse(dact,keepValues=True)
+# return ExactMatchDataLexicaliser().lexicalise(sent,feat['s2v'])
+#
+#
+#def parse_sr(sr, domain): # for domain4
+# '''
+# input da: 'inform(name=piperade;goodformeal=dinner;food=basque)'
+# return : a str 'domain|da|slot1, slot2, ...'
+# Note: cannot deal with repeat slots, e.g. slot_name*2 will has the same sr as slot_name*1
+# '''
+# da = sr.split('(')[0]
+# _sr = sr.split('(')[1].split(')')[0].split(';')
+# slots = []
+# for sv in _sr:
+# slots.append(sv.split('=')[0])
+# slots = sorted(slots)
+#
+# res = domain + '|' + da + '|'
+# for slot in slots:
+# res += (slot+',')
+# res = (res[:-1]) # remove last ,
+# return res
+#
+#
+
+# def score_domain4(res_file):
+# # parse test set to have semantic representation of each target
+# target2sr = {} # target sentence to a defined str of sr
+# sr2content = {}
+# domains = ['restaurant', 'hotel', 'tv', 'laptop']
+# repeat_count = 0
+# for domain in domains:
+# with open('data/domain4/original/'+domain+'/test.json') as f:
+# for i in range(5):
+# f.readline()
+# data = json.load(f)
+#
+# for sr, target, base in data:
+# target = delexicalise( normalize(re.sub(' [\.\?\!]$','',target)),sr)
+# target = lexicalise(target, sr)
+#
+# sr = parse_sr(sr, domain)
+# if target in target2sr:
+# repeat_count += 1
+# continue
+# if target[-1] == ' ':
+# target = target[:-1]
+# target2sr[target] = sr
+#
+# if sr not in sr2content:
+# sr2content[sr] = [[], [], []] # [ [refs], [bases], [gens] ]
+#
+# with open(res_file) as f:
+# for line in f:
+# if 'Target' in line:
+# target = line.strip().split(':')[1][1:]
+# sr = target2sr[target]
+# sr2content[sr][0].append(target)
+#
+# if 'Base' in line:
+# base = line.strip().split(':')[1][1:]
+# if base[-1] == ' ':
+# base = base[:-1]
+# sr2content[sr][1].append(base)
+#
+# if 'Gen' in line:
+# gen = line.strip().split(':')[1][1:]
+# sr2content[sr][2].append(gen)
+#
+# return sr2content
+
+
+
[docs]defmergeDicts(d0,d1):
+ """ for all k in d0, d0 += d1 . d's are dictionaries of key -> numpy array """
+ forkind1:
+ ifkind0:d0[k]+=d1[k]
+ else:d0[k]=d1[k]
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+"""
+
+
[docs]classNLG:
+ """Base class for NLG model."""
+ def__init__(self):
+ """ Constructor for NLG class. """
+ pass
+
+
[docs]defgenerate(self,dialog_act):
+ """
+ Generate a natural language utterance conditioned on the dialog act produced by Agenda or Policy.
+ Args:
+ dialog_act (dict): The dialog act of the following system response. The dialog act can be either produced
+ by user agenda or system policy module.
+ Returns:
+ response (str): The natural language utterance of the input dialog_act.
+ """
+ pass
Source code for convlab.modules.nlu.multiwoz.milu.dai_f1_measure
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+fromtypingimportDict,List,Any
+
+fromallennlp.training.metrics.metricimportMetric
+
+
+
[docs]classDialogActItemF1Measure(Metric):
+ """
+ """
+ def__init__(self)->None:
+ """
+ Parameters
+ ----------
+ """
+ # These will hold per label span counts.
+ self._true_positives=0
+ self._false_positives=0
+ self._false_negatives=0
+
+
+ def__call__(self,
+ predictions:List[Dict[str,Any]],
+ gold_labels:List[Dict[str,Any]]):
+ """
+ Parameters
+ ----------
+ predictions : ``torch.Tensor``, required.
+ A tensor of predictions of shape (batch_size, sequence_length, num_classes).
+ gold_labels : ``torch.Tensor``, required.
+ A tensor of integer class label of shape (batch_size, sequence_length). It must be the same
+ shape as the ``predictions`` tensor without the ``num_classes`` dimension.
+ """
+ forprediction,gold_labelinzip(predictions,gold_labels):
+ fordatinprediction:
+ forsvinprediction[dat]:
+ ifdatnotingold_labelorsvnotingold_label[dat]:
+ self._false_positives+=1
+ else:
+ self._true_positives+=1
+ fordatingold_label:
+ forsvingold_label[dat]:
+ ifdatnotinpredictionorsvnotinprediction[dat]:
+ self._false_negatives+=1
+
+
+
[docs]defget_metric(self,reset:bool=False):
+ """
+ Returns
+ -------
+ A Dict per label containing following the span based metrics:
+ precision : float
+ recall : float
+ f1-measure : float
+
+ Additionally, an ``overall`` key is included, which provides the precision,
+ recall and f1-measure for all spans.
+ """
+ # Compute the precision, recall and f1 for all spans jointly.
+ precision,recall,f1_measure=self._compute_metrics(self._true_positives,
+ self._false_positives,
+ self._false_negatives)
+ metrics={}
+ metrics["precision"]=precision
+ metrics["recall"]=recall
+ metrics["f1-measure"]=f1_measure
+ ifreset:
+ self.reset()
+ returnmetrics
Source code for convlab.modules.nlu.multiwoz.milu.dataset_reader
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+importjson
+importlogging
+importos
+importrandom
+importzipfile
+fromtypingimportDict,List,Any
+
+fromallennlp.data.dataset_readers.dataset_readerimportDatasetReader
+fromallennlp.data.fieldsimportTextField,SequenceLabelField,MultiLabelField,MetadataField,Field
+fromallennlp.data.instanceimportInstance
+fromallennlp.data.token_indexersimportTokenIndexer,SingleIdTokenIndexer
+fromallennlp.data.tokenizersimportToken
+fromoverridesimportoverrides
+
+fromconvlab.lib.file_utilimportcached_path
+
+logger=logging.getLogger(__name__)# pylint: disable=invalid-name
+
+
+
[docs]@DatasetReader.register("milu")
+classMILUDatasetReader(DatasetReader):
+ """
+ Reads instances from a pretokenised file where each line is in the following format:
+
+ WORD###TAG [TAB] WORD###TAG [TAB] ..... \n
+
+ and converts it into a ``Dataset`` suitable for sequence tagging. You can also specify
+ alternative delimiters in the constructor.
+
+ Parameters
+ ----------
+ word_tag_delimiter: ``str``, optional (default=``"###"``)
+ The text that separates each WORD from its TAG.
+ token_delimiter: ``str``, optional (default=``None``)
+ The text that separates each WORD-TAG pair from the next pair. If ``None``
+ then the line will just be split on whitespace.
+ token_indexers : ``Dict[str, TokenIndexer]``, optional (default=``{"tokens": SingleIdTokenIndexer()}``)
+ We use this to define the input representation for the text. See :class:`TokenIndexer`.
+ Note that the `output` tags will always correspond to single token IDs based on how they
+ are pre-tokenised in the data file.
+ """
+ def__init__(self,
+ context_size:int=0,
+ agent:str=None,
+ random_context_size:bool=True,
+ token_delimiter:str=None,
+ token_indexers:Dict[str,TokenIndexer]=None,
+ lazy:bool=False)->None:
+ super().__init__(lazy)
+ self._context_size=context_size
+ self._agent=agent
+ self._random_context_size=random_context_size
+ self._token_indexers=token_indexersor{'tokens':SingleIdTokenIndexer()}
+ self._token_delimiter=token_delimiter
+
+ @overrides
+ def_read(self,file_path):
+ # if `file_path` is a URL, redirect to the cache
+ file_path=cached_path(file_path)
+
+ iffile_path.endswith("zip"):
+ archive=zipfile.ZipFile(file_path,"r")
+ data_file=archive.open(os.path.basename(file_path)[:-4])
+ else:
+ data_file=open(file_path,"r")
+
+ logger.info("Reading instances from lines in file at: %s",file_path)
+
+ dialogs=json.load(data_file)
+
+ fordial_nameindialogs:
+ dialog=dialogs[dial_name]["log"]
+ context_tokens_list=[]
+ fori,turninenumerate(dialog):
+ tokens=turn["text"].split()
+
+ dialog_act={}
+ fordactsinturn["span_info"]:
+ ifdacts[0]notindialog_act:
+ dialog_act[dacts[0]]=[]
+ dialog_act[dacts[0]].append([dacts[1]," ".join(tokens[dacts[3]:dacts[4]+1])])
+
+ spans=turn["span_info"]
+ tags=[]
+ foriinrange(len(tokens)):
+ forspaninspans:
+ ifi==span[3]:
+ tags.append("B-"+span[0]+"+"+span[1])
+ break
+ ifi>span[3]andi<=span[4]:
+ tags.append("I-"+span[0]+"+"+span[1])
+ break
+ else:
+ tags.append("O")
+
+ intents=[]
+ fordactsinturn["dialog_act"]:
+ fordactinturn["dialog_act"][dacts]:
+ ifdactsnotindialog_actordact[0]notin[sv[0]forsvindialog_act[dacts]]:
+ ifdact[1]in["none","?","yes","no","do nt care","do n't care"]:
+ intents.append(dacts+"+"+dact[0]+"*"+dact[1])
+
+ fordactsinturn["dialog_act"]:
+ fordactinturn["dialog_act"][dacts]:
+ ifdactsnotindialog_act:
+ dialog_act[dacts]=turn["dialog_act"][dacts]
+ break
+ elifdact[0]notin[sv[0]forsvindialog_act[dacts]]:
+ dialog_act[dacts].append(dact)
+
+ num_context=random.randint(0,self._context_size)ifself._random_context_sizeelseself._context_size
+ iflen(context_tokens_list)>0andnum_context>0:
+ wrapped_context_tokens=[Token(token)forcontext_tokensincontext_tokens_list[-num_context:]fortokenincontext_tokens]
+ else:
+ wrapped_context_tokens=[Token("SENT_END")]
+ wrapped_tokens=[Token(token)fortokenintokens]
+ context_tokens_list.append(tokens+["SENT_END"])
+
+ ifself._agentandself._agent=="user"andi%2!=1:
+ continue
+ ifself._agentandself._agent=="system"andi%2!=0:
+ continue
+ yieldself.text_to_instance(wrapped_context_tokens,wrapped_tokens,tags,intents,dialog_act)
+
+
+
[docs]deftext_to_instance(self,context_tokens:List[Token],tokens:List[Token],tags:List[str]=None,
+ intents:List[str]=None,dialog_act:Dict[str,Any]=None)->Instance:# type: ignore
+ """
+ We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
+ """
+ # pylint: disable=arguments-differ
+ fields:Dict[str,Field]={}
+ # print([t.text for t in context_tokens])
+ fields["context_tokens"]=TextField(context_tokens,self._token_indexers)
+ fields["tokens"]=TextField(tokens,self._token_indexers)
+ fields["metadata"]=MetadataField({"words":[x.textforxintokens]})
+ iftagsisnotNone:
+ fields["tags"]=SequenceLabelField(tags,fields["tokens"])
+ ifintentsisnotNone:
+ fields["intents"]=MultiLabelField(intents,label_namespace="intent_labels")
+ ifdialog_actisnotNone:
+ fields["metadata"]=MetadataField({"words":[x.textforxintokens],
+ 'dialog_act':dialog_act})
+ else:
+ fields["metadata"]=MetadataField({"words":[x.textforxintokens],'dialog_act':{}})
+ returnInstance(fields)
Source code for convlab.modules.nlu.multiwoz.milu.evaluate
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+The ``evaluate`` subcommand can be used to
+evaluate a trained model against a dataset
+and report any metrics calculated by the model.
+"""
+importargparse
+importjson
+importlogging
+fromtypingimportDict,Any
+
+fromallennlp.commonimportParams
+fromallennlp.common.utilimportprepare_environment
+fromallennlp.data.dataset_readers.dataset_readerimportDatasetReader
+fromallennlp.data.iteratorsimportDataIterator
+fromallennlp.models.archivalimportload_archive
+fromallennlp.training.utilimportevaluate
+
+fromconvlab.modules.nlu.multiwoz.miluimportdataset_reader,model
+
+logger=logging.getLogger(__name__)# pylint: disable=invalid-name
+
+
+argparser=argparse.ArgumentParser(description="Evaluate the specified model + dataset.")
+argparser.add_argument('archive_file',type=str,help='path to an archived trained model')
+
+argparser.add_argument('input_file',type=str,help='path to the file containing the evaluation data')
+
+argparser.add_argument('--output-file',type=str,help='path to output file')
+
+argparser.add_argument('--weights-file',
+ type=str,
+ help='a path that overrides which weights file to use')
+
+cuda_device=argparser.add_mutually_exclusive_group(required=False)
+cuda_device.add_argument('--cuda-device',
+ type=int,
+ default=-1,
+ help='id of GPU to use (if any)')
+
+argparser.add_argument('-o','--overrides',
+ type=str,
+ default="",
+ help='a JSON structure used to override the experiment configuration')
+
+argparser.add_argument('--batch-weight-key',
+ type=str,
+ default="",
+ help='If non-empty, name of metric used to weight the loss on a per-batch basis.')
+
+argparser.add_argument('--extend-vocab',
+ action='store_true',
+ default=False,
+ help='if specified, we will use the instances in your new dataset to '
+ 'extend your vocabulary. If pretrained-file was used to initialize '
+ 'embedding layers, you may also need to pass --embedding-sources-mapping.')
+
+argparser.add_argument('--embedding-sources-mapping',
+ type=str,
+ default="",
+ help='a JSON dict defining mapping from embedding module path to embedding'
+ 'pretrained-file used during training. If not passed, and embedding needs to be '
+ 'extended, we will try to use the original file paths used during training. If '
+ 'they are not available we will use random vectors for embedding extension.')
+
+
+
[docs]defevaluate_from_args(args:argparse.Namespace)->Dict[str,Any]:
+ # Disable some of the more verbose logging statements
+ logging.getLogger('allennlp.common.params').disabled=True
+ logging.getLogger('allennlp.nn.initializers').disabled=True
+ logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO)
+
+ # Load from archive
+ archive=load_archive(args.archive_file,args.cuda_device,args.overrides,args.weights_file)
+ config=archive.config
+ prepare_environment(config)
+ model=archive.model
+ model.eval()
+
+ # Load the evaluation data
+
+ # Try to use the validation dataset reader if there is one - otherwise fall back
+ # to the default dataset_reader used for both training and validation.
+ validation_dataset_reader_params=config.pop('validation_dataset_reader',None)
+ ifvalidation_dataset_reader_paramsisnotNone:
+ dataset_reader=DatasetReader.from_params(validation_dataset_reader_params)
+ else:
+ dataset_reader=DatasetReader.from_params(config.pop('dataset_reader'))
+ evaluation_data_path=args.input_file
+ logger.info("Reading evaluation data from %s",evaluation_data_path)
+ instances=dataset_reader.read(evaluation_data_path)
+
+ embedding_sources:Dict[str,str]=(json.loads(args.embedding_sources_mapping)
+ ifargs.embedding_sources_mappingelse{})
+ ifargs.extend_vocab:
+ logger.info("Vocabulary is being extended with test instances.")
+ model.vocab.extend_from_instances(Params({}),instances=instances)
+ model.extend_embedder_vocab(embedding_sources)
+
+ iterator_params=config.pop("validation_iterator",None)
+ ifiterator_paramsisNone:
+ iterator_params=config.pop("iterator")
+ iterator=DataIterator.from_params(iterator_params)
+ iterator.index_with(model.vocab)
+
+ metrics=evaluate(model,instances,iterator,args.cuda_device,args.batch_weight_key)
+
+ logger.info("Finished evaluating.")
+ logger.info("Metrics:")
+ forkey,metricinmetrics.items():
+ logger.info("%s: %s",key,metric)
+
+ output_file=args.output_file
+ ifoutput_file:
+ withopen(output_file,"w")asfile:
+ json.dump(metrics,file,indent=4)
+ returnmetrics
[docs]defget_predicted_tags(self,sequence_logits:torch.Tensor)->torch.Tensor:
+ """
+ Does a simple position-wise argmax over each token, converts indices to string labels, and
+ adds a ``"tags"`` key to the dictionary with the result.
+ """
+ all_predictions=sequence_logits
+ all_predictions=all_predictions.detach().cpu().numpy()
+ ifall_predictions.ndim==3:
+ predictions_list=[all_predictions[i]foriinrange(all_predictions.shape[0])]
+ else:
+ predictions_list=[all_predictions]
+ all_tags=[]
+ forpredictionsinpredictions_list:
+ tags=np.argmax(predictions,axis=-1)
+ all_tags.append(tags)
+ returnall_tags
+
+
+
[docs]@overrides
+ defdecode(self,output_dict:Dict[str,torch.Tensor])->Dict[str,torch.Tensor]:
+ """
+ Converts the tag ids to the actual tags.
+ ``output_dict["tags"]`` is a list of lists of tag_ids,
+ so we use an ugly nested list comprehension.
+ """
+ output_dict["tags"]=[
+ [self.vocab.get_token_from_index(tag,namespace=self.sequence_label_namespace)
+ fortagininstance_tags]
+ forinstance_tagsinoutput_dict["tags"]
+ ]
+ output_dict["intents"]=[
+ [self.vocab.get_token_from_index(intent[0],namespace=self.intent_label_namespace)
+ forintentininstance_intents.nonzero().tolist()]
+ forinstance_intentsinoutput_dict["intents"]
+ ]
+
+ output_dict["dialog_act"]=[]
+ fori,tagsinenumerate(output_dict["tags"]):
+ seq_len=len(output_dict["words"][i])
+ spans=bio_tags_to_spans(tags[:seq_len])
+ dialog_act={}
+ forspaninspans:
+ domain_act=span[0].split("+")[0]
+ slot=span[0].split("+")[1]
+ value=" ".join(output_dict["words"][i][span[1][0]:span[1][1]+1])
+ ifdomain_actnotindialog_act:
+ dialog_act[domain_act]=[[slot,value]]
+ else:
+ dialog_act[domain_act].append([slot,value])
+ forintentinoutput_dict["intents"][i]:
+ if"+"inintent:
+ if"*"inintent:
+ intent,value=intent.split("*",1)
+ else:
+ value="?"
+ domain_act=intent.split("+")[0]
+ ifdomain_actnotindialog_act:
+ dialog_act[domain_act]=[[intent.split("+")[1],value]]
+ else:
+ dialog_act[domain_act].append([intent.split("+")[1],value])
+ else:
+ dialog_act[intent]=[["none","none"]]
+ output_dict["dialog_act"].append(dialog_act)
+
+ returnoutput_dict
[docs]defparse(self,utterance,context=[]):
+ """
+ Predict the dialog act of a natural language utterance and apply error model.
+ Args:
+ utterance (str): A natural language utterance.
+ Returns:
+ output (dict): The dialog act of utterance.
+ """
+ iflen(utterance)==0:
+ return{}
+
+ ifself.context_size>0andlen(context)>0:
+ context_tokens=sum([self.tokenizer.split_words(utterance+" SENT_END")forutteranceincontext[-self.context_size:]],[])
+ else:
+ context_tokens=self.tokenizer.split_words("SENT_END")
+ tokens=self.tokenizer.split_words(utterance)
+ instance=self.dataset_reader.text_to_instance(context_tokens,tokens)
+ outputs=self.model.forward_on_instance(instance)
+
+ returnoutputs["dialog_act"]
+
+
+if__name__=="__main__":
+ nlu=MILU()
+ test_contexts=[
+ "SENT_END",
+ "SENT_END",
+ "SENT_END",
+ "SENT_END",
+ "SENT_END",
+ "SENT_END",
+ "SENT_END",
+ "SENT_END",
+ "SENT_END",
+ "SENT_END",
+ "SENT_END",
+ "SENT_END",
+ ]
+ test_utterances=[
+ "What type of accommodations are they. No , i just need their address . Can you tell me if the hotel has internet available ?",
+ "What type of accommodations are they.",
+ "No , i just need their address .",
+ "Can you tell me if the hotel has internet available ?",
+ "you're welcome! enjoy your visit! goodbye.",
+ "yes. it should be moderately priced.",
+ "i want to book a table for 6 at 18:45 on thursday",
+ "i will be departing out of stevenage.",
+ "What is the Name of attraction ?",
+ "Can I get the name of restaurant?",
+ "Can I get the address and phone number of the restaurant?",
+ "do you have a specific area you want to stay in?"
+ ]
+ forctxt,uttinzip(test_contexts,test_utterances):
+ print(ctxt)
+ print(utt)
+ pprint(nlu.parse(utt))
+ # pprint(nlu.parse(utt.lower()))
+
+ test_contexts=[
+ "The phone number of the hotel is 12345678",
+ "I have many that meet your requests",
+ "The phone number of the hotel is 12345678",
+ "I found one hotel room",
+ "thank you",
+ "Is it moderately priced?",
+ "Can I help you with booking?",
+ "Where are you departing from?",
+ "I found an attraction",
+ "I found a restaurant",
+ "I found a restaurant",
+ "I'm looking for a place to stay.",
+ ]
+ forctxt,uttinzip(test_contexts,test_utterances):
+ print(ctxt)
+ print(utt)
+ pprint(nlu.parse(utt,[ctxt]))
+ # pprint(nlu.parse(utt.lower(), ctxt.lower()))
+
Source code for convlab.modules.nlu.multiwoz.milu.train
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+The ``train`` subcommand can be used to train a model.
+It requires a configuration file and a directory in
+which to write the results.
+"""
+
+importargparse
+importlogging
+importos
+
+fromallennlp.commonimportParams
+fromallennlp.common.checksimportcheck_for_gpu
+fromallennlp.common.utilimportprepare_environment,prepare_global_logging,cleanup_global_logging,dump_metrics
+fromallennlp.models.archivalimportarchive_model,CONFIG_NAME
+fromallennlp.models.modelimportModel,_DEFAULT_WEIGHTS
+fromallennlp.training.trainerimportTrainer,TrainerPieces
+fromallennlp.training.trainer_baseimportTrainerBase
+fromallennlp.training.utilimportcreate_serialization_dir,evaluate
+
+fromconvlab.modules.nlu.multiwoz.miluimportdataset_reader,model
+
+logger=logging.getLogger(__name__)# pylint: disable=invalid-name
+
+
+argparser=argparse.ArgumentParser(description="Train a model.")
+argparser.add_argument('param_path',
+ type=str,
+ help='path to parameter file describing the model to be trained')
+argparser.add_argument('-s','--serialization-dir',
+ required=True,
+ type=str,
+ help='directory in which to save the model and its logs')
+argparser.add_argument('-r','--recover',
+ action='store_true',
+ default=False,
+ help='recover training from the state in serialization_dir')
+argparser.add_argument('-f','--force',
+ action='store_true',
+ required=False,
+ help='overwrite the output directory if it exists')
+argparser.add_argument('-o','--overrides',
+ type=str,
+ default="",
+ help='a JSON structure used to override the experiment configuration')
+argparser.add_argument('--file-friendly-logging',
+ action='store_true',
+ default=False,
+ help='outputs tqdm status on separate lines and slows tqdm refresh rate')
+
+
+
+
[docs]deftrain_model_from_args(args:argparse.Namespace):
+ """
+ Just converts from an ``argparse.Namespace`` object to string paths.
+ """
+ train_model_from_file(args.param_path,
+ args.serialization_dir,
+ args.overrides,
+ args.file_friendly_logging,
+ args.recover,
+ args.force)
+
+
+
[docs]deftrain_model_from_file(parameter_filename:str,
+ serialization_dir:str,
+ overrides:str="",
+ file_friendly_logging:bool=False,
+ recover:bool=False,
+ force:bool=False)->Model:
+ """
+ A wrapper around :func:`train_model` which loads the params from a file.
+
+ Parameters
+ ----------
+ parameter_filename : ``str``
+ A json parameter file specifying an AllenNLP experiment.
+ serialization_dir : ``str``
+ The directory in which to save results and logs. We just pass this along to
+ :func:`train_model`.
+ overrides : ``str``
+ A JSON string that we will use to override values in the input parameter file.
+ file_friendly_logging : ``bool``, optional (default=False)
+ If ``True``, we make our output more friendly to saved model files. We just pass this
+ along to :func:`train_model`.
+ recover : ``bool`, optional (default=False)
+ If ``True``, we will try to recover a training run from an existing serialization
+ directory. This is only intended for use when something actually crashed during the middle
+ of a run. For continuing training a model on new data, see the ``fine-tune`` command.
+ force : ``bool``, optional (default=False)
+ If ``True``, we will overwrite the serialization directory if it already exists.
+ """
+ # Load the experiment config from a file and pass it to ``train_model``.
+ params=Params.from_file(parameter_filename,overrides)
+ returntrain_model(params,serialization_dir,file_friendly_logging,recover,force)
+
+
+
[docs]deftrain_model(params:Params,
+ serialization_dir:str,
+ file_friendly_logging:bool=False,
+ recover:bool=False,
+ force:bool=False)->Model:
+ """
+ Trains the model specified in the given :class:`Params` object, using the data and training
+ parameters also specified in that object, and saves the results in ``serialization_dir``.
+
+ Parameters
+ ----------
+ params : ``Params``
+ A parameter object specifying an AllenNLP Experiment.
+ serialization_dir : ``str``
+ The directory in which to save results and logs.
+ file_friendly_logging : ``bool``, optional (default=False)
+ If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow
+ down tqdm's output to only once every 10 seconds.
+ recover : ``bool``, optional (default=False)
+ If ``True``, we will try to recover a training run from an existing serialization
+ directory. This is only intended for use when something actually crashed during the middle
+ of a run. For continuing training a model on new data, see the ``fine-tune`` command.
+ force : ``bool``, optional (default=False)
+ If ``True``, we will overwrite the serialization directory if it already exists.
+
+ Returns
+ -------
+ best_model: ``Model``
+ The model with the best epoch weights.
+ """
+ prepare_environment(params)
+ create_serialization_dir(params,serialization_dir,recover,force)
+ stdout_handler=prepare_global_logging(serialization_dir,file_friendly_logging)
+
+ cuda_device=params.params.get('trainer').get('cuda_device',-1)
+ check_for_gpu(cuda_device)
+
+ params.to_file(os.path.join(serialization_dir,CONFIG_NAME))
+
+ evaluate_on_test=params.pop_bool("evaluate_on_test",False)
+
+ trainer_type=params.get("trainer",{}).get("type","default")
+
+ iftrainer_type=="default":
+ # Special logic to instantiate backward-compatible trainer.
+ pieces=TrainerPieces.from_params(params,serialization_dir,recover)# pylint: disable=no-member
+ trainer=Trainer.from_params(
+ model=pieces.model,
+ serialization_dir=serialization_dir,
+ iterator=pieces.iterator,
+ train_data=pieces.train_dataset,
+ validation_data=pieces.validation_dataset,
+ params=pieces.params,
+ validation_iterator=pieces.validation_iterator)
+ evaluation_iterator=pieces.validation_iteratororpieces.iterator
+ evaluation_dataset=pieces.test_dataset
+
+ else:
+ trainer=TrainerBase.from_params(params,serialization_dir,recover)
+ # TODO(joelgrus): handle evaluation in the general case
+ evaluation_iterator=evaluation_dataset=None
+
+ params.assert_empty('base train command')
+
+ try:
+ metrics=trainer.train()
+ exceptKeyboardInterrupt:
+ # if we have completed an epoch, try to create a model archive.
+ ifos.path.exists(os.path.join(serialization_dir,_DEFAULT_WEIGHTS)):
+ logging.info("Training interrupted by the user. Attempting to create "
+ "a model archive using the current best epoch weights.")
+ archive_model(serialization_dir,files_to_archive=params.files_to_archive)
+ raise
+
+ # Evaluate
+ ifevaluation_datasetandevaluate_on_test:
+ logger.info("The model will be evaluated using the best epoch weights.")
+ test_metrics=evaluate(trainer.model,evaluation_dataset,evaluation_iterator,
+ cuda_device=trainer._cuda_devices[0],# pylint: disable=protected-access,
+ # TODO(brendanr): Pass in an arg following Joel's trainer refactor.
+ batch_weight_key="")
+
+ forkey,valueintest_metrics.items():
+ metrics["test_"+key]=value
+
+ elifevaluation_dataset:
+ logger.info("To evaluate on the test set after training, pass the "
+ "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.")
+
+ cleanup_global_logging(stdout_handler)
+
+ # Now tar up results
+ archive_model(serialization_dir,files_to_archive=params.files_to_archive)
+ dump_metrics(os.path.join(serialization_dir,"metrics.json"),metrics,log=True)
+
+ # We count on the trainer to have the model with best weights
+ returntrainer.model
Source code for convlab.modules.nlu.multiwoz.onenet.dai_f1_measure
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+fromtypingimportDict,List,Any
+
+fromallennlp.training.metrics.metricimportMetric
+
+
+
[docs]classDialogActItemF1Measure(Metric):
+ """
+ """
+ def__init__(self)->None:
+ """
+ Parameters
+ ----------
+ """
+ # These will hold per label span counts.
+ self._true_positives=0
+ self._false_positives=0
+ self._false_negatives=0
+
+
+ def__call__(self,
+ predictions:List[Dict[str,Any]],
+ gold_labels:List[Dict[str,Any]]):
+ """
+ Parameters
+ ----------
+ predictions : ``torch.Tensor``, required.
+ A tensor of predictions of shape (batch_size, sequence_length, num_classes).
+ gold_labels : ``torch.Tensor``, required.
+ A tensor of integer class label of shape (batch_size, sequence_length). It must be the same
+ shape as the ``predictions`` tensor without the ``num_classes`` dimension.
+ """
+ forprediction,gold_labelinzip(predictions,gold_labels):
+ fordatinprediction:
+ forsvinprediction[dat]:
+ ifdatnotingold_labelorsvnotingold_label[dat]:
+ self._false_positives+=1
+ else:
+ self._true_positives+=1
+ fordatingold_label:
+ forsvingold_label[dat]:
+ ifdatnotinpredictionorsvnotinprediction[dat]:
+ self._false_negatives+=1
+
+
+
[docs]defget_metric(self,reset:bool=False):
+ """
+ Returns
+ -------
+ A Dict per label containing following the span based metrics:
+ precision : float
+ recall : float
+ f1-measure : float
+
+ Additionally, an ``overall`` key is included, which provides the precision,
+ recall and f1-measure for all spans.
+ """
+ # Compute the precision, recall and f1 for all spans jointly.
+ precision,recall,f1_measure=self._compute_metrics(self._true_positives,
+ self._false_positives,
+ self._false_negatives)
+ metrics={}
+ metrics["precision"]=precision
+ metrics["recall"]=recall
+ metrics["f1-measure"]=f1_measure
+ ifreset:
+ self.reset()
+ returnmetrics
Source code for convlab.modules.nlu.multiwoz.onenet.evaluate
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+The ``evaluate`` subcommand can be used to
+evaluate a trained model against a dataset
+and report any metrics calculated by the model.
+"""
+importargparse
+importjson
+importlogging
+fromtypingimportDict,Any
+
+fromallennlp.commonimportParams
+fromallennlp.common.utilimportprepare_environment
+fromallennlp.data.dataset_readers.dataset_readerimportDatasetReader
+fromallennlp.data.iteratorsimportDataIterator
+fromallennlp.models.archivalimportload_archive
+fromallennlp.training.utilimportevaluate
+
+fromconvlab.modules.nlu.multiwoz.onenetimportdataset_reader,model
+
+logger=logging.getLogger(__name__)# pylint: disable=invalid-name
+
+
+argparser=argparse.ArgumentParser(description="Evaluate the specified model + dataset.")
+argparser.add_argument('archive_file',type=str,help='path to an archived trained model')
+
+argparser.add_argument('input_file',type=str,help='path to the file containing the evaluation data')
+
+argparser.add_argument('--output-file',type=str,help='path to output file')
+
+argparser.add_argument('--weights-file',
+ type=str,
+ help='a path that overrides which weights file to use')
+
+cuda_device=argparser.add_mutually_exclusive_group(required=False)
+cuda_device.add_argument('--cuda-device',
+ type=int,
+ default=-1,
+ help='id of GPU to use (if any)')
+
+argparser.add_argument('-o','--overrides',
+ type=str,
+ default="",
+ help='a JSON structure used to override the experiment configuration')
+
+argparser.add_argument('--batch-weight-key',
+ type=str,
+ default="",
+ help='If non-empty, name of metric used to weight the loss on a per-batch basis.')
+
+argparser.add_argument('--extend-vocab',
+ action='store_true',
+ default=False,
+ help='if specified, we will use the instances in your new dataset to '
+ 'extend your vocabulary. If pretrained-file was used to initialize '
+ 'embedding layers, you may also need to pass --embedding-sources-mapping.')
+
+argparser.add_argument('--embedding-sources-mapping',
+ type=str,
+ default="",
+ help='a JSON dict defining mapping from embedding module path to embedding'
+ 'pretrained-file used during training. If not passed, and embedding needs to be '
+ 'extended, we will try to use the original file paths used during training. If '
+ 'they are not available we will use random vectors for embedding extension.')
+
+
+
[docs]defevaluate_from_args(args:argparse.Namespace)->Dict[str,Any]:
+ # Disable some of the more verbose logging statements
+ logging.getLogger('allennlp.common.params').disabled=True
+ logging.getLogger('allennlp.nn.initializers').disabled=True
+ logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO)
+
+ # Load from archive
+ archive=load_archive(args.archive_file,args.cuda_device,args.overrides,args.weights_file)
+ config=archive.config
+ prepare_environment(config)
+ model=archive.model
+ model.eval()
+
+ # Load the evaluation data
+
+ # Try to use the validation dataset reader if there is one - otherwise fall back
+ # to the default dataset_reader used for both training and validation.
+ validation_dataset_reader_params=config.pop('validation_dataset_reader',None)
+ ifvalidation_dataset_reader_paramsisnotNone:
+ dataset_reader=DatasetReader.from_params(validation_dataset_reader_params)
+ else:
+ dataset_reader=DatasetReader.from_params(config.pop('dataset_reader'))
+ evaluation_data_path=args.input_file
+ logger.info("Reading evaluation data from %s",evaluation_data_path)
+ instances=dataset_reader.read(evaluation_data_path)
+
+ embedding_sources:Dict[str,str]=(json.loads(args.embedding_sources_mapping)
+ ifargs.embedding_sources_mappingelse{})
+ ifargs.extend_vocab:
+ logger.info("Vocabulary is being extended with test instances.")
+ model.vocab.extend_from_instances(Params({}),instances=instances)
+ model.extend_embedder_vocab(embedding_sources)
+
+ iterator_params=config.pop("validation_iterator",None)
+ ifiterator_paramsisNone:
+ iterator_params=config.pop("iterator")
+ iterator=DataIterator.from_params(iterator_params)
+ iterator.index_with(model.vocab)
+
+ metrics=evaluate(model,instances,iterator,args.cuda_device,args.batch_weight_key)
+
+ logger.info("Finished evaluating.")
+ logger.info("Metrics:")
+ forkey,metricinmetrics.items():
+ logger.info("%s: %s",key,metric)
+
+ output_file=args.output_file
+ ifoutput_file:
+ withopen(output_file,"w")asfile:
+ json.dump(metrics,file,indent=4)
+ returnmetrics
[docs]defget_predicted_tags(self,sequence_logits:torch.Tensor)->torch.Tensor:
+ """
+ Does a simple position-wise argmax over each token, converts indices to string labels, and
+ adds a ``"tags"`` key to the dictionary with the result.
+ """
+ all_predictions=sequence_logits
+ all_predictions=all_predictions.detach().cpu().numpy()
+ ifall_predictions.ndim==3:
+ predictions_list=[all_predictions[i]foriinrange(all_predictions.shape[0])]
+ else:
+ predictions_list=[all_predictions]
+ all_tags=[]
+ forpredictionsinpredictions_list:
+ tags=np.argmax(predictions,axis=-1)
+ all_tags.append(tags)
+ returnall_tags
+
+
+
[docs]@overrides
+ defdecode(self,output_dict:Dict[str,torch.Tensor])->Dict[str,torch.Tensor]:
+ """
+ Converts the tag ids to the actual tags.
+ ``output_dict["tags"]`` is a list of lists of tag_ids,
+ so we use an ugly nested list comprehension.
+ """
+ output_dict["tags"]=[
+ [self.vocab.get_token_from_index(tag,namespace=self.tag_label_namespace)
+ fortagininstance_tags]
+ forinstance_tagsinoutput_dict["tags"]
+ ]
+
+ argmax_indices=np.argmax(output_dict["domain_probs"].detach().cpu().numpy(),axis=-1)
+ output_dict["domain"]=[self.vocab.get_token_from_index(x,namespace="domain_labels")
+ forxinargmax_indices]
+
+ argmax_indices=np.argmax(output_dict["intent_probs"].detach().cpu().numpy(),axis=-1)
+ output_dict["intent"]=[self.vocab.get_token_from_index(x,namespace="intent_labels")
+ forxinargmax_indices]
+
+ output_dict["dialog_act"]=[]
+ fori,domaininenumerate(output_dict["domain"]):
+ if"+"notinoutput_dict["intent"][i]:
+ tags=[]
+ seq_len=len(output_dict["words"][i])
+ forspaninbio_tags_to_spans(output_dict["tags"][i][:seq_len]):
+ tags.append([span[0]," ".join(output_dict["words"][i][span[1][0]:span[1][1]+1])])
+ intent=output_dict["intent"][i]iflen(tags)>0else"None"
+ else:
+ intent,value=output_dict["intent"][i].split("*",1)
+ intent,slot=intent.split("+")
+ tags=[[slot,value]]
+ dialog_act={domain+"-"+intent:tags}ifdomain!="None"andintent!="None"else{}
+ output_dict["dialog_act"].append(dialog_act)
+
+ returnoutput_dict
[docs]defparse(self,utterance,context=[]):
+ """
+ Predict the dialog act of a natural language utterance and apply error model.
+ Args:
+ utterance (str): A natural language utterance.
+ Returns:
+ output (dict): The dialog act of utterance.
+ """
+ # print("nlu input:")
+ # pprint(utterance)
+
+ iflen(utterance)==0:
+ return{}
+
+ tokens=self.tokenizer.split_words(utterance)
+ instance=self.dataset_reader.text_to_instance(tokens)
+ outputs=self.model.forward_on_instance(instance)
+
+ returnoutputs["dialog_act"]
+
+
+if__name__=="__main__":
+ nlu=OneNetLU()
+ test_utterances=[
+ "What type of accommodations are they. No , i just need their address . Can you tell me if the hotel has internet available ?",
+ "What type of accommodations are they.",
+ "No , i just need their address .",
+ "Can you tell me if the hotel has internet available ?",
+ "you're welcome! enjoy your visit! goodbye.",
+ "yes. it should be moderately priced.",
+ "i want to book a table for 6 at 18:45 on thursday",
+ "i will be departing out of stevenage.",
+ "What is the Name of attraction ?",
+ "Can I get the name of restaurant?",
+ "do you have a specific area you want to stay in?"
+ ]
+ foruttintest_utterances:
+ print(utt)
+ pprint(nlu.parse(utt))
+
Source code for convlab.modules.nlu.multiwoz.onenet.train
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+The ``train`` subcommand can be used to train a model.
+It requires a configuration file and a directory in
+which to write the results.
+"""
+
+importargparse
+importlogging
+importos
+
+fromallennlp.commonimportParams
+fromallennlp.common.checksimportcheck_for_gpu
+fromallennlp.common.utilimportprepare_environment,prepare_global_logging,cleanup_global_logging,dump_metrics
+fromallennlp.models.archivalimportarchive_model,CONFIG_NAME
+fromallennlp.models.modelimportModel,_DEFAULT_WEIGHTS
+fromallennlp.training.trainerimportTrainer,TrainerPieces
+fromallennlp.training.trainer_baseimportTrainerBase
+fromallennlp.training.utilimportcreate_serialization_dir,evaluate
+
+fromconvlab.modules.nlu.multiwoz.onenetimportdataset_reader,model
+
+logger=logging.getLogger(__name__)# pylint: disable=invalid-name
+
+
+argparser=argparse.ArgumentParser(description="Train a model.")
+argparser.add_argument('param_path',
+ type=str,
+ help='path to parameter file describing the model to be trained')
+argparser.add_argument('-s','--serialization-dir',
+ required=True,
+ type=str,
+ help='directory in which to save the model and its logs')
+argparser.add_argument('-r','--recover',
+ action='store_true',
+ default=False,
+ help='recover training from the state in serialization_dir')
+argparser.add_argument('-f','--force',
+ action='store_true',
+ required=False,
+ help='overwrite the output directory if it exists')
+argparser.add_argument('-o','--overrides',
+ type=str,
+ default="",
+ help='a JSON structure used to override the experiment configuration')
+argparser.add_argument('--file-friendly-logging',
+ action='store_true',
+ default=False,
+ help='outputs tqdm status on separate lines and slows tqdm refresh rate')
+
+
+
+
[docs]deftrain_model_from_args(args:argparse.Namespace):
+ """
+ Just converts from an ``argparse.Namespace`` object to string paths.
+ """
+ train_model_from_file(args.param_path,
+ args.serialization_dir,
+ args.overrides,
+ args.file_friendly_logging,
+ args.recover,
+ args.force)
+
+
+
[docs]deftrain_model_from_file(parameter_filename:str,
+ serialization_dir:str,
+ overrides:str="",
+ file_friendly_logging:bool=False,
+ recover:bool=False,
+ force:bool=False)->Model:
+ """
+ A wrapper around :func:`train_model` which loads the params from a file.
+
+ Parameters
+ ----------
+ parameter_filename : ``str``
+ A json parameter file specifying an AllenNLP experiment.
+ serialization_dir : ``str``
+ The directory in which to save results and logs. We just pass this along to
+ :func:`train_model`.
+ overrides : ``str``
+ A JSON string that we will use to override values in the input parameter file.
+ file_friendly_logging : ``bool``, optional (default=False)
+ If ``True``, we make our output more friendly to saved model files. We just pass this
+ along to :func:`train_model`.
+ recover : ``bool`, optional (default=False)
+ If ``True``, we will try to recover a training run from an existing serialization
+ directory. This is only intended for use when something actually crashed during the middle
+ of a run. For continuing training a model on new data, see the ``fine-tune`` command.
+ force : ``bool``, optional (default=False)
+ If ``True``, we will overwrite the serialization directory if it already exists.
+ """
+ # Load the experiment config from a file and pass it to ``train_model``.
+ params=Params.from_file(parameter_filename,overrides)
+ returntrain_model(params,serialization_dir,file_friendly_logging,recover,force)
+
+
+
[docs]deftrain_model(params:Params,
+ serialization_dir:str,
+ file_friendly_logging:bool=False,
+ recover:bool=False,
+ force:bool=False)->Model:
+ """
+ Trains the model specified in the given :class:`Params` object, using the data and training
+ parameters also specified in that object, and saves the results in ``serialization_dir``.
+
+ Parameters
+ ----------
+ params : ``Params``
+ A parameter object specifying an AllenNLP Experiment.
+ serialization_dir : ``str``
+ The directory in which to save results and logs.
+ file_friendly_logging : ``bool``, optional (default=False)
+ If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow
+ down tqdm's output to only once every 10 seconds.
+ recover : ``bool``, optional (default=False)
+ If ``True``, we will try to recover a training run from an existing serialization
+ directory. This is only intended for use when something actually crashed during the middle
+ of a run. For continuing training a model on new data, see the ``fine-tune`` command.
+ force : ``bool``, optional (default=False)
+ If ``True``, we will overwrite the serialization directory if it already exists.
+
+ Returns
+ -------
+ best_model: ``Model``
+ The model with the best epoch weights.
+ """
+ prepare_environment(params)
+ create_serialization_dir(params,serialization_dir,recover,force)
+ stdout_handler=prepare_global_logging(serialization_dir,file_friendly_logging)
+
+ cuda_device=params.params.get('trainer').get('cuda_device',-1)
+ check_for_gpu(cuda_device)
+
+ params.to_file(os.path.join(serialization_dir,CONFIG_NAME))
+
+ evaluate_on_test=params.pop_bool("evaluate_on_test",False)
+
+ trainer_type=params.get("trainer",{}).get("type","default")
+
+ iftrainer_type=="default":
+ # Special logic to instantiate backward-compatible trainer.
+ pieces=TrainerPieces.from_params(params,serialization_dir,recover)# pylint: disable=no-member
+ trainer=Trainer.from_params(
+ model=pieces.model,
+ serialization_dir=serialization_dir,
+ iterator=pieces.iterator,
+ train_data=pieces.train_dataset,
+ validation_data=pieces.validation_dataset,
+ params=pieces.params,
+ validation_iterator=pieces.validation_iterator)
+ evaluation_iterator=pieces.validation_iteratororpieces.iterator
+ evaluation_dataset=pieces.test_dataset
+
+ else:
+ trainer=TrainerBase.from_params(params,serialization_dir,recover)
+ # TODO(joelgrus): handle evaluation in the general case
+ evaluation_iterator=evaluation_dataset=None
+
+ params.assert_empty('base train command')
+
+ try:
+ metrics=trainer.train()
+ exceptKeyboardInterrupt:
+ # if we have completed an epoch, try to create a model archive.
+ ifos.path.exists(os.path.join(serialization_dir,_DEFAULT_WEIGHTS)):
+ logging.info("Training interrupted by the user. Attempting to create "
+ "a model archive using the current best epoch weights.")
+ archive_model(serialization_dir,files_to_archive=params.files_to_archive)
+ raise
+
+ # Evaluate
+ ifevaluation_datasetandevaluate_on_test:
+ logger.info("The model will be evaluated using the best epoch weights.")
+ test_metrics=evaluate(trainer.model,evaluation_dataset,evaluation_iterator,
+ cuda_device=trainer._cuda_devices[0],# pylint: disable=protected-access,
+ # TODO(brendanr): Pass in an arg following Joel's trainer refactor.
+ batch_weight_key="")
+
+ forkey,valueintest_metrics.items():
+ metrics["test_"+key]=value
+
+ elifevaluation_dataset:
+ logger.info("To evaluate on the test set after training, pass the "
+ "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.")
+
+ cleanup_global_logging(stdout_handler)
+
+ # Now tar up results
+ archive_model(serialization_dir,files_to_archive=params.files_to_archive)
+ dump_metrics(os.path.join(serialization_dir,"metrics.json"),metrics,log=True)
+
+ # We count on the trainer to have the model with best weights
+ returntrainer.model
[docs]defexport(self,models_fname,dictionary_fname,config_fname):
+ print("exporting Classifier for Caesar to read")
+ print("models to be saved in",models_fname)
+ print("dictionary to be saved in",dictionary_fname)
+ print("config to be saved in",config_fname)
+
+ ifself.type!="svm":
+ print("Only know how to export SVMs")
+ return
+ lines=[]
+ forthis_tupleinself.classifiers:
+ ifself.classifiers[this_tuple]!=None:
+ t=this_tuple
+ ifTuples.is_generic(this_tuple[-1]):
+ t=this_tuple[:-1]+("<generic_value>",)
+ lines+=['('+','.join(t)+')']
+ lines+=sutils.svm_to_libsvm(self.classifiers[this_tuple].model)
+ lines+=[".",""]
+ models_savefile=open(models_fname,"wb")
+ forlineinlines:
+ models_savefile.write(line+"\n")
+ models_savefile.close()
+
+ # save dictionary
+ json_dictionary=[]
+ dictionary_items=self.dictionary.items()
+ dictionary_items.sort(key=lambdax:x[1])
+ assert[x[1]forxindictionary_items]==range(len(self.dictionary))
+ keys=[list(x[0])forxindictionary_items]
+
+ json.dump(keys,open(dictionary_fname,"w"))
+
+
+ # save config
+ config_savefile=open(config_fname,"w")
+ config_savefile.write("# Automatically generated by CNetTrain scripts\n")
+ options={
+ "FEATURES":json.dumps(self.features),
+ "MAX_ACTIVE_TUPLES":str(self.tuples.max_active),
+ "TAIL_CUTOFF":str(self.tuples.tail_cutoff),
+ "MODELS":os.path.join(os.getcwd(),models_fname),
+ "DICTIONARY":os.path.join(os.getcwd(),dictionary_fname),
+
+ }
+ if"cnet"inself.features:
+ index=self.features.index("cnet")
+ cnf=self.feature_extractors[index]
+ options["MAX_NGRAM_LENGTH"]=str(cnf.max_length)
+ options["MAX_NGRAMS"]=str(cnf.max_ngrams)
+ forkeyinoptions:
+ this_line="CNET : %s"%key
+ this_line=this_line.ljust(30)
+ this_line+="= "+options[key]
+ config_savefile.write("\t"+this_line+"\n")
+ config_savefile.close()
+ print("exported Classifier.")
+
+
+
[docs]deftoSparse(baseX,X,dictionary):
+ # convert baseX & X (a list of dictionaries), to a sparse matrix, using dictionary to map to indices
+ out=lil_matrix((len(X),len(dictionary)))
+ fori,(basex,x)inenumerate(zip(baseX,X)):
+ forkeyinbasex:
+ ifkeynotindictionary:
+ continue
+ out[i,dictionary[key]]=basex[key]
+ forkeyinx:
+ ifkeynotindictionary:
+ continue
+ out[i,dictionary[key]]=x[key]
+
+ out=out.tocsr()
+ returnout
+
+
+# classifiers define :
+# train(X,y)
+# predict(X)
+# params()
+# load(params)
+# X is a sparse matrix, y is a vector of class labels (ints)
+fromsklearnimportsvm
+
Source code for convlab.modules.nlu.multiwoz.svm.Features
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+
+importitertools
+importmath
+importjson
+fromcollectionsimportdefaultdict
+
+fromconvlab.modules.nlu.multiwoz.svmimportTuples
+
+
+
Source code for convlab.modules.nlu.multiwoz.svm.Tuples
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+# deal with tuples and dialog acts
+importmath
+importos
+importre
+importjson
+
+fromconvlab.modules.nlu.multiwoz.svmimportsutils
+
+
+
+
+if__name__=="__main__":
+ nlu=SVMNLU()
+ test_utterances=[
+ "What type of accommodations are they. No , i just need their address . Can you tell me if the hotel has internet available ?",
+ "What type of accommodations are they.",
+ "No , i just need their address .",
+ "Can you tell me if the hotel has internet available ?"
+ "you're welcome! enjoy your visit! goodbye.",
+ "yes. it should be moderately priced.",
+ "i want to book a table for 6 at 18:45 on thursday",
+ "i will be departing out of stevenage.",
+ "What is the Name of attraction ?",
+ "Can I get the name of restaurant?",
+ "Can I get the address and phone number of the restaurant?",
+ "do you have a specific area you want to stay in?"
+ ]
+ foruttintest_utterances:
+ print(utt)
+ print(nlu.parse(utt))
+
Source code for convlab.modules.nlu.multiwoz.svm.sutils
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+# misc useful functions
+
+importimp
+importos
+
+
+
[docs]defdataset_walker(dataset=None,dataroot=None,labels=None):
+ # we assume that the dataset_walker class in dataroot/../scripts
+ # is the one to use
+ scripts_folder=os.path.join(dataroot,'../..',"scripts")
+ # print(scripts_folder)
+ _dw=imp.load_source('dataset_walker',os.path.join(scripts_folder,"dataset_walker.py"))
+ return_dw.dataset_walker(dataset,dataroot=dataroot,labels=labels)
Source code for convlab.modules.nlu.multiwoz.svm.train
+# Modified by Microsoft Corporation.
+# Licensed under the MIT license.
+
+
+importconfigparser
+importos
+importpprint
+importsys
+importzipfile
+
+fromconvlab.modules.nlu.multiwoz.svmimportClassifier,sutils
+
+
+
[docs]defmergeDicts(d0,d1):
+ """ for all k in d0, d0 += d1 . d's are dictionaries of key -> numpy array """
+ forkind1:
+ ifkind0:d0[k]+=d1[k]
+ else:d0[k]=d1[k]
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+"""
+
+
[docs]classNLU:
+ """Base class for NLU model."""
+
+ def__init__(self):
+ """ Constructor for NLU class. """
+
+
[docs]defparse(self,utterance,context=None):
+ """
+ Predict the dialog act of a natural language utterance and apply error model.
+ Args:
+ utterance (str): The user input, a natural language utterance.
+ Returns:
+ output (dict): The parsed dialog act of the input NL utterance.
+ """
+ pass
Source code for convlab.modules.policy.system.multiwoz.vanilla_mle.evaluate
+"""
+The ``evaluate`` subcommand can be used to
+evaluate a trained model against a dataset
+and report any metrics calculated by the model.
+"""
+importargparse
+importjson
+importlogging
+fromtypingimportDict,Any
+
+fromallennlp.commonimportParams
+fromallennlp.common.utilimportprepare_environment
+fromallennlp.data.dataset_readers.dataset_readerimportDatasetReader
+fromallennlp.data.iteratorsimportDataIterator
+fromallennlp.models.archivalimportload_archive
+fromallennlp.training.utilimportevaluate
+
+fromconvlab.modules.policy.system.multiwoz.vanilla_mleimportdataset_reader,model
+
+logger=logging.getLogger(__name__)# pylint: disable=invalid-name
+
+
+argparser=argparse.ArgumentParser(description="Evaluate the specified model + dataset.")
+argparser.add_argument('archive_file',type=str,help='path to an archived trained model')
+
+argparser.add_argument('input_file',type=str,help='path to the file containing the evaluation data')
+
+argparser.add_argument('--output-file',type=str,help='path to output file')
+
+argparser.add_argument('--weights-file',
+ type=str,
+ help='a path that overrides which weights file to use')
+
+cuda_device=argparser.add_mutually_exclusive_group(required=False)
+cuda_device.add_argument('--cuda-device',
+ type=int,
+ default=-1,
+ help='id of GPU to use (if any)')
+
+argparser.add_argument('-o','--overrides',
+ type=str,
+ default="",
+ help='a JSON structure used to override the experiment configuration')
+
+argparser.add_argument('--batch-weight-key',
+ type=str,
+ default="",
+ help='If non-empty, name of metric used to weight the loss on a per-batch basis.')
+
+argparser.add_argument('--extend-vocab',
+ action='store_true',
+ default=False,
+ help='if specified, we will use the instances in your new dataset to '
+ 'extend your vocabulary. If pretrained-file was used to initialize '
+ 'embedding layers, you may also need to pass --embedding-sources-mapping.')
+
+argparser.add_argument('--embedding-sources-mapping',
+ type=str,
+ default="",
+ help='a JSON dict defining mapping from embedding module path to embedding'
+ 'pretrained-file used during training. If not passed, and embedding needs to be '
+ 'extended, we will try to use the original file paths used during training. If '
+ 'they are not available we will use random vectors for embedding extension.')
+
+
+
[docs]defevaluate_from_args(args:argparse.Namespace)->Dict[str,Any]:
+ # Disable some of the more verbose logging statements
+ logging.getLogger('allennlp.common.params').disabled=True
+ logging.getLogger('allennlp.nn.initializers').disabled=True
+ logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO)
+
+ # Load from archive
+ archive=load_archive(args.archive_file,args.cuda_device,args.overrides,args.weights_file)
+ config=archive.config
+ prepare_environment(config)
+ model=archive.model
+ model.eval()
+
+ # Load the evaluation data
+
+ # Try to use the validation dataset reader if there is one - otherwise fall back
+ # to the default dataset_reader used for both training and validation.
+ validation_dataset_reader_params=config.pop('validation_dataset_reader',None)
+ ifvalidation_dataset_reader_paramsisnotNone:
+ dataset_reader=DatasetReader.from_params(validation_dataset_reader_params)
+ else:
+ dataset_reader=DatasetReader.from_params(config.pop('dataset_reader'))
+ evaluation_data_path=args.input_file
+ logger.info("Reading evaluation data from %s",evaluation_data_path)
+ instances=dataset_reader.read(evaluation_data_path)
+
+ embedding_sources:Dict[str,str]=(json.loads(args.embedding_sources_mapping)ifargs.embedding_sources_mappingelse{})
+ ifargs.extend_vocab:
+ logger.info("Vocabulary is being extended with test instances.")
+ model.vocab.extend_from_instances(Params({}),instances=instances)
+ model.extend_embedder_vocab(embedding_sources)
+
+ iterator_params=config.pop("validation_iterator",None)
+ ifiterator_paramsisNone:
+ iterator_params=config.pop("iterator")
+ iterator=DataIterator.from_params(iterator_params)
+ iterator.index_with(model.vocab)
+
+ metrics=evaluate(model,instances,iterator,args.cuda_device,args.batch_weight_key)
+
+ logger.info("Finished evaluating.")
+ logger.info("Metrics:")
+ forkey,metricinmetrics.items():
+ logger.info("%s: %s",key,metric)
+
+ output_file=args.output_file
+ ifoutput_file:
+ withopen(output_file,"w")asfile:
+ json.dump(metrics,file,indent=4)
+ returnmetrics
[docs]@overrides
+ defdecode(self,output_dict:Dict[str,torch.Tensor])->Dict[str,torch.Tensor]:
+ """
+ Does a simple argmax over the class probabilities.
+ """
+ predictions=output_dict["probs"].detach().cpu().numpy()
+ argmax_indices=np.argmax(predictions,axis=-1)
+ output_dict["actions"]=argmax_indices
+
+ returnoutput_dict
Source code for convlab.modules.policy.system.multiwoz.vanilla_mle.train
+"""
+The ``train`` subcommand can be used to train a model.
+It requires a configuration file and a directory in
+which to write the results.
+"""
+
+importargparse
+importlogging
+importos
+
+fromallennlp.commonimportParams
+fromallennlp.common.checksimportcheck_for_gpu
+fromallennlp.common.utilimportprepare_environment,prepare_global_logging,cleanup_global_logging,dump_metrics
+fromallennlp.models.archivalimportarchive_model,CONFIG_NAME
+fromallennlp.models.modelimportModel,_DEFAULT_WEIGHTS
+fromallennlp.training.trainerimportTrainer,TrainerPieces
+fromallennlp.training.trainer_baseimportTrainerBase
+fromallennlp.training.utilimportcreate_serialization_dir,evaluate
+
+fromconvlab.modules.policy.system.multiwoz.vanilla_mleimportdataset_reader,model
+
+logger=logging.getLogger(__name__)# pylint: disable=invalid-name
+
+
+argparser=argparse.ArgumentParser(description="Train a model.")
+argparser.add_argument('param_path',
+ type=str,
+ help='path to parameter file describing the model to be trained')
+argparser.add_argument('-s','--serialization-dir',
+ required=True,
+ type=str,
+ help='directory in which to save the model and its logs')
+argparser.add_argument('-r','--recover',
+ action='store_true',
+ default=False,
+ help='recover training from the state in serialization_dir')
+argparser.add_argument('-f','--force',
+ action='store_true',
+ required=False,
+ help='overwrite the output directory if it exists')
+argparser.add_argument('-o','--overrides',
+ type=str,
+ default="",
+ help='a JSON structure used to override the experiment configuration')
+argparser.add_argument('--file-friendly-logging',
+ action='store_true',
+ default=False,
+ help='outputs tqdm status on separate lines and slows tqdm refresh rate')
+
+
+
+
[docs]deftrain_model_from_args(args:argparse.Namespace):
+ """
+ Just converts from an ``argparse.Namespace`` object to string paths.
+ """
+ train_model_from_file(args.param_path,
+ args.serialization_dir,
+ args.overrides,
+ args.file_friendly_logging,
+ args.recover,
+ args.force)
+
+
+
[docs]deftrain_model_from_file(parameter_filename:str,
+ serialization_dir:str,
+ overrides:str="",
+ file_friendly_logging:bool=False,
+ recover:bool=False,
+ force:bool=False)->Model:
+ """
+ A wrapper around :func:`train_model` which loads the params from a file.
+
+ Parameters
+ ----------
+ parameter_filename : ``str``
+ A json parameter file specifying an AllenNLP experiment.
+ serialization_dir : ``str``
+ The directory in which to save results and logs. We just pass this along to
+ :func:`train_model`.
+ overrides : ``str``
+ A JSON string that we will use to override values in the input parameter file.
+ file_friendly_logging : ``bool``, optional (default=False)
+ If ``True``, we make our output more friendly to saved model files. We just pass this
+ along to :func:`train_model`.
+ recover : ``bool`, optional (default=False)
+ If ``True``, we will try to recover a training run from an existing serialization
+ directory. This is only intended for use when something actually crashed during the middle
+ of a run. For continuing training a model on new data, see the ``fine-tune`` command.
+ force : ``bool``, optional (default=False)
+ If ``True``, we will overwrite the serialization directory if it already exists.
+ """
+ # Load the experiment config from a file and pass it to ``train_model``.
+ params=Params.from_file(parameter_filename,overrides)
+ returntrain_model(params,serialization_dir,file_friendly_logging,recover,force)
+
+
+
[docs]deftrain_model(params:Params,
+ serialization_dir:str,
+ file_friendly_logging:bool=False,
+ recover:bool=False,
+ force:bool=False)->Model:
+ """
+ Trains the model specified in the given :class:`Params` object, using the data and training
+ parameters also specified in that object, and saves the results in ``serialization_dir``.
+
+ Parameters
+ ----------
+ params : ``Params``
+ A parameter object specifying an AllenNLP Experiment.
+ serialization_dir : ``str``
+ The directory in which to save results and logs.
+ file_friendly_logging : ``bool``, optional (default=False)
+ If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow
+ down tqdm's output to only once every 10 seconds.
+ recover : ``bool``, optional (default=False)
+ If ``True``, we will try to recover a training run from an existing serialization
+ directory. This is only intended for use when something actually crashed during the middle
+ of a run. For continuing training a model on new data, see the ``fine-tune`` command.
+ force : ``bool``, optional (default=False)
+ If ``True``, we will overwrite the serialization directory if it already exists.
+
+ Returns
+ -------
+ best_model: ``Model``
+ The model with the best epoch weights.
+ """
+ prepare_environment(params)
+ create_serialization_dir(params,serialization_dir,recover,force)
+ stdout_handler=prepare_global_logging(serialization_dir,file_friendly_logging)
+
+ cuda_device=params.params.get('trainer').get('cuda_device',-1)
+ check_for_gpu(cuda_device)
+
+ params.to_file(os.path.join(serialization_dir,CONFIG_NAME))
+
+ evaluate_on_test=params.pop_bool("evaluate_on_test",False)
+
+ trainer_type=params.get("trainer",{}).get("type","default")
+
+ iftrainer_type=="default":
+ # Special logic to instantiate backward-compatible trainer.
+ pieces=TrainerPieces.from_params(params,serialization_dir,recover)# pylint: disable=no-member
+ trainer=Trainer.from_params(
+ model=pieces.model,
+ serialization_dir=serialization_dir,
+ iterator=pieces.iterator,
+ train_data=pieces.train_dataset,
+ validation_data=pieces.validation_dataset,
+ params=pieces.params,
+ validation_iterator=pieces.validation_iterator)
+ evaluation_iterator=pieces.validation_iteratororpieces.iterator
+ evaluation_dataset=pieces.test_dataset
+
+ else:
+ trainer=TrainerBase.from_params(params,serialization_dir,recover)
+ # TODO(joelgrus): handle evaluation in the general case
+ evaluation_iterator=evaluation_dataset=None
+
+ params.assert_empty('base train command')
+
+ try:
+ metrics=trainer.train()
+ exceptKeyboardInterrupt:
+ # if we have completed an epoch, try to create a model archive.
+ ifos.path.exists(os.path.join(serialization_dir,_DEFAULT_WEIGHTS)):
+ logging.info("Training interrupted by the user. Attempting to create "
+ "a model archive using the current best epoch weights.")
+ archive_model(serialization_dir,files_to_archive=params.files_to_archive)
+ raise
+
+ # Evaluate
+ ifevaluation_datasetandevaluate_on_test:
+ logger.info("The model will be evaluated using the best epoch weights.")
+ test_metrics=evaluate(trainer.model,evaluation_dataset,evaluation_iterator,
+ cuda_device=trainer._cuda_devices[0],# pylint: disable=protected-access,
+ # TODO(brendanr): Pass in an arg following Joel's trainer refactor.
+ batch_weight_key="")
+
+ forkey,valueintest_metrics.items():
+ metrics["test_"+key]=value
+
+ elifevaluation_dataset:
+ logger.info("To evaluate on the test set after training, pass the "
+ "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.")
+
+ cleanup_global_logging(stdout_handler)
+
+ # Now tar up results
+ archive_model(serialization_dir,files_to_archive=params.files_to_archive)
+ dump_metrics(os.path.join(serialization_dir,"metrics.json"),metrics,log=True)
+
+ # We count on the trainer to have the model with best weights
+ returntrainer.model
Source code for convlab.modules.policy.system.policy
+"""
+The policy base class for system bot.
+"""
+
+
+
[docs]classSysPolicy:
+ """Base class for system policy model."""
+
+ def__init__(self):
+ """ Constructor for SysPolicy class. """
+ pass
+
+
[docs]defpredict(self,state):
+ """
+ Predict the system action (dialog act) given state.
+ Args:
+ state (dict): Dialog state. For more details about the each field of the dialog state, please refer to
+ the init_state method in convlab/dst/dst_util.py
+ Returns:
+ action (dict): The dialog act of the current turn system response, which is then passed to NLG module to
+ generate a NL utterance.
+ """
+ pass
+
+
[docs]definit_session(self):
+ """Init the SysPolicy module to start a new session."""
+ pass
Source code for convlab.modules.policy.user.multiwoz.policy_agenda_multiwoz
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+"""
+
+__time__='2019/1/31 10:24'
+
+importcopy
+importjson
+importos
+importrandom
+importre
+
+fromconvlab.libimportlogger
+fromconvlab.modules.policy.user.policyimportUserPolicy
+fromconvlab.modules.usr.multiwoz.goal_generatorimportGoalGenerator
+fromconvlab.modules.util.multiwoz.multiwoz_slot_transimportREF_USR_DA,REF_SYS_DA
+
+logger=logger.get_logger(__name__)
+
+DEF_VAL_UNK='?'# Unknown
+DEF_VAL_DNC='don\'t care'# Do not care
+DEF_VAL_NUL='none'# for none
+DEF_VAL_BOOKED='yes'# for booked
+DEF_VAL_NOBOOK='no'# for booked
+NOT_SURE_VALS=[DEF_VAL_UNK,DEF_VAL_DNC,DEF_VAL_NUL,DEF_VAL_NOBOOK]
+
+# import reflect table
+REF_USR_DA_M=copy.deepcopy(REF_USR_DA)
+REF_SYS_DA_M={}
+fordom,ref_slotsinREF_SYS_DA.items():
+ dom=dom.lower()
+ REF_SYS_DA_M[dom]={}
+ forslot_a,slot_binref_slots.items():
+ REF_SYS_DA_M[dom][slot_a.lower()]=slot_b
+ REF_SYS_DA_M[dom]['none']=None
+
+# def book slot
+BOOK_SLOT=['people','day','stay','time']
+
+
[docs]classUserPolicyAgendaMultiWoz(UserPolicy):
+ """ The rule-based user policy model by agenda. Derived from the UserPolicy class """
+
+ # load stand value
+ stand_value_dict=json.load(open(os.path.join(os.path.dirname(
+ os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))),
+ 'data/value_set.json')))
+
+ def__init__(self,max_goal_num=100,seed=2019):
+ """
+ Constructor for User_Policy_Agenda class.
+ """
+ self.max_turn=40
+ self.max_initiative=4
+
+ self.goal_generator=GoalGenerator(corpus_path='data/multiwoz/annotated_user_da_with_span_full.json')
+
+ self.__turn=0
+ self.goal=None
+ self.agenda=None
+
+ random.seed(seed)
+ self.goal_seeds=[random.randint(1,1e7)foriinrange(max_goal_num)]
+
+ #UserPolicy.__init__(self, act_types, slots, slot_dict)
+ UserPolicy.__init__(self)
+
+
[docs]definit_session(self):
+ """ Build new Goal and Agenda for next session """
+ self.__turn=0
+ iflen(self.goal_seeds)>1:
+ self.goal=Goal(self.goal_generator,self.goal_seeds[0])
+ self.goal_seeds=self.goal_seeds[1:]
+ else:
+ self.goal=Goal(self.goal_generator)
+ self.domain_goals=self.goal.domain_goals
+ self.agenda=Agenda(self.goal)
+
+
[docs]defpredict(self,state,sys_action):
+ """
+ Predict an user act based on state and preorder system action.
+ Args:
+ state (tuple): Dialog state.
+ sys_action (tuple): Preorder system action.s
+ Returns:
+ action (tuple): User act.
+ session_over (boolean): True to terminate session, otherwise session continues.
+ reward (float): Reward given by user.
+ """
+ self.__turn+=2
+
+ # At the beginning of a dialog when there is no NLU.
+ ifsys_action=="null":
+ sys_action={}
+
+ ifself.__turn>self.max_turn:
+ self.agenda.close_session()
+ else:
+ sys_action=self._transform_sysact_in(sys_action)
+ self.agenda.update(sys_action,self.goal)
+ ifself.goal.task_complete():
+ self.agenda.close_session()
+
+ # A -> A' + user_action
+ # action = self.agenda.get_action(random.randint(1, self.max_initiative))
+ action=self.agenda.get_action(self.max_initiative)
+
+ # Is there any action to say?
+ session_over=self.agenda.is_empty()
+
+ # reward
+ reward=self._reward()
+
+ # transform to DA
+ action=self._transform_usract_out(action)
+
+ returnaction,session_over,reward
Source code for convlab.modules.policy.user.policy
+"""
+The policy base class for user bot.
+"""
+
+
+
[docs]classUserPolicy:
+ """Base model for user policy model."""
+ def__init__(self):
+ """ Constructor for UserPolicy class. """
+ pass
+
+
[docs]defpredict(self,state,sys_action):
+ """
+ Predict an user act based on state and preorder system action.
+ Args:
+ state (tuple): Dialog state.
+ sys_action (tuple): Preorder system action.s
+ Returns:
+ action (tuple): User act.
+ session_over (boolean): True to terminate session, otherwise session continues.
+ reward (float): Reward given by the user.
+ """
+ pass
+
+
[docs]definit_session(self):
+ """
+ Restore after one session
+ """
+ pass
Source code for convlab.modules.state_encoder.multiwoz.multiwoz_state_encoder
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+importnumpyasnp
+
+fromconvlab.modules.util.multiwoz.dbqueryimportquery
+fromconvlab.modules.util.multiwoz.multiwoz_slot_transimportREF_SYS_DA,REF_USR_DA
+
+
+
Source code for convlab.modules.usr.multiwoz.goal_generator
+"""
+"""
+
+importjson
+importos
+importpickle
+importrandom
+fromcollectionsimportCounter
+fromcopyimportdeepcopy
+
+importnumpyasnp
+
+fromconvlab.modules.util.multiwozimportdbquery
+
+domains={'attraction','hotel','restaurant','train','taxi','hospital','police'}
+days=['monday','tuesday','wednesday','thursday','friday','saturday','sunday']
+domain_keywords={
+ 'restaurant':'place to dine',
+ 'train':'train',
+ 'hotel':'place to stay',
+ 'attraction':'places to go',
+ 'police':'help',
+ 'taxi':'taxi',
+ 'hospital':'hospital'
+}
+request_slot_string_map={
+ 'phone':'phone number',
+ 'pricerange':'price range',
+ 'duration':'travel time',
+ 'arriveBy':'arrival time',
+ 'leaveAt':'departure time',
+ 'trainID':'train ID'
+}
+templates={
+ 'intro':'You are looking for information in Cambridge.',
+ 'restaurant':{
+ 'intro':'You are looking forward to trying local restaurants.',
+ 'request':'Once you find a restaurnat, make sure you get {}.',
+ 'area':'The restaurant should be in the {}.',
+ 'food':'The restaurant should serve {} food.',
+ 'name':'You are looking for a particular restaurant. Its name is called {}.',
+ 'pricerange':'The restaurant should be in the {} price range.',
+ 'book':'Once you find the restaurant you want to book a table {}.',
+ 'fail_info food':'If there is no such restaurant, how about one that serves {} food.',
+ 'fail_info area':'If there is no such restaurant, how about one in the {} area.',
+ 'fail_info pricerange':'If there is no such restaurant, how about one in the {} price range.',
+ 'fail_book time':'If the booking fails how about {}.',
+ 'fail_book day':'If the booking fails how about {}.'
+ },
+ 'hotel':{
+ 'intro':'You are looking for a place to stay.',
+ 'request':'Once you find a hotel, make sure you get {}.',
+ 'stars':'The hotel should have a star of {}.',
+ 'area':'The hotel should be in the {}.',
+ 'type':'The hotel should be in the type of {}.',
+ 'pricerange':'The hotel should be in the {} price range.',
+ 'name':'You are looking for a particular hotel. Its name is called {}.',
+ 'internet yes':'The hotel should include free wifi.',
+ 'internet no':'The hotel does not need to include free wifi.',
+ 'parking yes':'The hotel should include free parking.',
+ 'parking no':'The hotel does not need to include free parking.',
+ 'book':'Once you find the hotel you want to book it {}.',
+ 'fail_info type':'If there is no such hotel, how about one that is in the type of {}.',
+ 'fail_info area':'If there is no such hotel, how about one that is in the {} area.',
+ 'fail_info stars':'If there is no such hotel, how about one that has a star of {}.',
+ 'fail_info pricerange':'If there is no such hotel, how about one that is in the {} price range.',
+ 'fail_info parking yes':'If there is no such hotel, how about one that has free parking.',
+ 'fail_info parking no':'If there is no such hotel, how about one that does not has free parking.',
+ 'fail_info internet yes':'If there is no such hotel, how about one that has free wifi.',
+ 'fail_info internet no':'If there is no such hotel, how about one that does not has free wifi.',
+ 'fail_book stay':'If the booking fails how about {} nights.',
+ 'fail_book day':'If the booking fails how about {}.'
+ },
+ 'attraction':{
+ 'intro':'You are excited about seeing local tourist attractions.',
+ 'request':'Once you find an attraction, make sure you get {}.',
+ 'area':'The attraction should be in the {}.',
+ 'type':'The attraction should be in the type of {}.',
+ 'name':'You are looking for a particular attraction. Its name is called {}.',
+ 'fail_info type':'If there is no such attraction, how about one that is in the type of {}.',
+ 'fail_info area':'If there is no such attraction, how about one in the {} area.'
+ },
+ 'taxi':{
+ 'intro':'You are also looking for a taxi.',
+ 'commute':'You also want to book a taxi to commute between the two places.',
+ 'restaurant':'You want to make sure it arrives the restaurant by the booked time.',
+ 'request':'Once you find a taxi, make sure you get {}.',
+ 'departure':'The taxi should depart from {}.',
+ 'destination':'The taxi should go to {}.',
+ 'leaveAt':'The taxi should leave after {}.',
+ 'arriveBy':'The taxi should arrive by {}.'
+ },
+ 'train':{
+ 'intro':'You are also looking for a train.',
+ 'request':'Once you find a train, make sure you get {}.',
+ 'departure':'The train should depart from {}.',
+ 'destination':'The train should go to {}.',
+ 'day':'The train should leave on {}.',
+ 'leaveAt':'The train should leave after {}.',
+ 'arriveBy':'The train should arrive by {}.',
+ 'book':'Once you find the train you want to make a booking {}.'
+ },
+ 'police':{
+ 'intro':'You were robbed and are looking for help.',
+ 'request':'Make sure you get {}.'
+ },
+ 'hospital':{
+ 'intro':'You got injured and are looking for a hospital nearby',
+ 'request':'Make sure you get {}.',
+ 'department':'The hospital should have the {} department.'
+ }
+}
+
+pro_correction={
+ # "info": 0.2,
+ "info":0.0,
+ # "reqt": 0.2,
+ "reqt":0.0,
+ # "book": 0.2
+ "book":0.0
+}
+
+
+
[docs]defget_user_goal(self,seed=None):
+ ifseedisnotNone:
+ random.seed(seed)
+ np.random.seed(seed)
+ domain_ordering=()
+ whilelen(domain_ordering)<=0:
+ domain_ordering=nomial_sample(self.domain_ordering_dist)
+ # domain_ordering = ('restaurant',)
+
+ user_goal={dom:self._get_domain_goal(dom)fordomindomain_ordering}
+ assertlen(user_goal.keys())>0
+
+ # using taxi to communte between places, removing destination and departure.
+ if'taxi'indomain_ordering:
+ places=[domfordomindomain_ordering[:domain_ordering.index('taxi')]if'address'inself.ind_slot_dist[dom]['reqt'].keys()]
+ iflen(places)>=1:
+ deluser_goal['taxi']['info']['destination']
+ user_goal[places[-1]]['reqt']=list(set(user_goal[places[-1]].get('reqt',[])).union({'address'}))
+ ifplaces[-1]=='restaurant'and'book'inuser_goal['restaurant']:
+ user_goal['taxi']['info']['arriveBy']=user_goal['restaurant']['book']['time']
+ if'leaveAt'inuser_goal['taxi']['info']:
+ deluser_goal['taxi']['info']['leaveAt']
+ iflen(places)>=2:
+ deluser_goal['taxi']['info']['departure']
+ user_goal[places[-2]]['reqt']=list(set(user_goal[places[-2]].get('reqt',[])).union({'address'}))
+
+ # match area of attraction and restaurant
+ if'restaurant'indomain_orderingand \
+ 'attraction'indomain_orderingand \
+ 'fail_info'notinuser_goal['restaurant']and \
+ domain_ordering.index('restaurant')>domain_ordering.index('attraction')and \
+ 'area'inuser_goal['restaurant']['info']and'area'inuser_goal['attraction']['info']:
+ adjusted_restaurant_goal=deepcopy(user_goal['restaurant']['info'])
+ adjusted_restaurant_goal['area']=user_goal['attraction']['info']['area']
+ iflen(dbquery.query('restaurant',adjusted_restaurant_goal.items()))>0andrandom.random()<0.5:
+ user_goal['restaurant']['info']['area']=user_goal['attraction']['info']['area']
+
+ # match day and people of restaurant and hotel
+ if'restaurant'indomain_orderingand'hotel'indomain_orderingand \
+ 'book'inuser_goal['restaurant']and'book'inuser_goal['hotel']:
+ ifrandom.random()<0.5:
+ user_goal['restaurant']['book']['people']=user_goal['hotel']['book']['people']
+ if'fail_book'inuser_goal['restaurant']:
+ user_goal['restaurant']['fail_book']['people']=user_goal['hotel']['book']['people']
+ ifrandom.random()<1.0:
+ user_goal['restaurant']['book']['day']=user_goal['hotel']['book']['day']
+ if'fail_book'inuser_goal['restaurant']:
+ user_goal['restaurant']['fail_book']['day']=user_goal['hotel']['book']['day']
+ ifuser_goal['restaurant']['book']['day']==user_goal['restaurant']['fail_book']['day']and \
+ user_goal['restaurant']['book']['time']==user_goal['restaurant']['fail_book']['time']and \
+ user_goal['restaurant']['book']['people']==user_goal['restaurant']['fail_book']['people']:
+ deluser_goal['restaurant']['fail_book']
+
+ # match day and people of hotel and train
+ if'hotel'indomain_orderingand'train'indomain_orderingand \
+ 'book'inuser_goal['hotel']and'info'inuser_goal['train']:
+ ifuser_goal['train']['info']['destination']=='cambridge'and \
+ 'day'inuser_goal['hotel']['book']:
+ user_goal['train']['info']['day']=user_goal['hotel']['book']['day']
+ elifuser_goal['train']['info']['departure']=='cambridge'and \
+ 'day'inuser_goal['hotel']['book']and'stay'inuser_goal['hotel']['book']:
+ user_goal['train']['info']['day']=days[
+ (days.index(user_goal['hotel']['book']['day'])+int(
+ user_goal['hotel']['book']['stay']))%7]
+ # In case, we have no query results with adjusted train goal, we simply drop the train goal.
+ iflen(dbquery.query('train',user_goal['train']['info'].items()))==0:
+ deluser_goal['train']
+ domain_ordering=tuple(list(domain_ordering).remove('train'))
+
+ user_goal['domain_ordering']=domain_ordering
+
+ returnuser_goal
+
+ def_adjust_info(self,domain,info):
+ # adjust one of the slots of the info
+ adjusted_info=deepcopy(info)
+ slot=random.choice(list(info.keys()))
+ adjusted_info[slot]=random.choice(list(self.ind_slot_value_dist[domain]['info'][slot].keys()))
+ returnadjusted_info
+
+
[docs]defbuild_message(self,user_goal,boldify=null_boldify):
+ message=[]
+ state=deepcopy(user_goal)
+
+ fordominuser_goal['domain_ordering']:
+ dom_msg=[]
+ state=deepcopy(user_goal[dom])
+ num_acts_in_unit=0
+
+ ifnot(dom=='taxi'andlen(state['info'])==1):
+ # intro
+ m=[templates[dom]['intro']]
+
+ # info
+ deffill_info_template(user_goal,domain,slot,info):
+ ifslot!='area'ornot('restaurant'inuser_goaland
+ 'attraction'inuser_goaland
+ infoinuser_goal['restaurant'].keys()and
+ infoinuser_goal['attraction'].keys()and
+ 'area'inuser_goal['restaurant'][info]and
+ 'area'inuser_goal['attraction'][info]and
+ user_goal['restaurant'][info]['area']==user_goal['attraction'][info]['area']):
+ returntemplates[domain][slot].format(self.boldify(user_goal[domain][info][slot]))
+ else:
+ restaurant_index=user_goal['domain_ordering'].index('restaurant')
+ attraction_index=user_goal['domain_ordering'].index('attraction')
+ ifrestaurant_index>attraction_indexanddomain=='restaurant':
+ returntemplates[domain][slot].format(self.boldify('same area as the attraction'))
+ elifattraction_index>restaurant_indexanddomain=='attraction':
+ returntemplates[domain][slot].format(self.boldify('same area as the restaurant'))
+ returntemplates[domain][slot].format(self.boldify(user_goal[domain][info][slot]))
+
+ info='info'
+ if'fail_info'inuser_goal[dom]:
+ info='fail_info'
+ ifdom=='taxi'andlen(state[info])==1:
+ taxi_index=user_goal['domain_ordering'].index('taxi')
+ places=[domfordominuser_goal['domain_ordering'][:taxi_index]if
+ domin['attraction','hotel','restaurant']]
+ iflen(places)>=2:
+ random.shuffle(places)
+ m.append(templates['taxi']['commute'])
+ if'arriveBy'instate[info]:
+ m.append('The taxi should arrive at the {} from the {} by {}.'.format(self.boldify(places[0]),
+ self.boldify(places[1]),
+ self.boldify(state[info]['arriveBy'])))
+ elif'leaveAt'instate[info]:
+ m.append('The taxi should leave from the {} to the {} after {}.'.format(self.boldify(places[0]),
+ self.boldify(places[1]),
+ self.boldify(state[info]['leaveAt'])))
+ message.append(' '.join(m))
+ else:
+ whilelen(state[info])>0:
+ num_acts=random.randint(1,min(len(state[info]),3))
+ slots=random.sample(list(state[info].keys()),num_acts)
+ sents=[fill_info_template(user_goal,dom,slot,info)forslotinslotsifslotnotin['parking','internet']]
+ if'parking'inslots:
+ sents.append(templates[dom]['parking '+state[info]['parking']])
+ if'internet'inslots:
+ sents.append(templates[dom]['internet '+state[info]['internet']])
+ m.extend(sents)
+ message.append(' '.join(m))
+ m=[]
+ forslotinslots:
+ delstate[info][slot]
+
+ # fail_info
+ if'fail_info'inuser_goal[dom]:
+ # if 'fail_info' in user_goal[dom]:
+ adjusted_slot=list(filter(lambdax:x[0][1]!=x[1][1],
+ zip(user_goal[dom]['info'].items(),user_goal[dom]['fail_info'].items())))[0][0][0]
+ ifadjusted_slotin['internet','parking']:
+ message.append(templates[dom]['fail_info '+adjusted_slot+' '+user_goal[dom]['info'][adjusted_slot]])
+ else:
+ message.append(templates[dom]['fail_info '+adjusted_slot].format(self.boldify(user_goal[dom]['info'][adjusted_slot])))
+
+ # reqt
+ if'reqt'instate:
+ slot_strings=[]
+ forslotinstate['reqt']:
+ ifslotin['internet','parking','food']:
+ continue
+ slot_strings.append(slotifslotnotinrequest_slot_string_mapelserequest_slot_string_map[slot])
+ iflen(slot_strings)>0:
+ message.append(templates[dom]['request'].format(self.boldify(', '.join(slot_strings))))
+ if'internet'instate['reqt']:
+ message.append('Make sure to ask if the hotel includes free wifi.')
+ if'parking'instate['reqt']:
+ message.append('Make sure to ask if the hotel includes free parking.')
+ if'food'instate['reqt']:
+ message.append('Make sure to ask about what food it serves.')
+
+ defget_same_people_domain(user_goal,domain,slot):
+ ifslotnotin['day','people']:
+ returnNone
+ domain_index=user_goal['domain_ordering'].index(domain)
+ previous_domains=user_goal['domain_ordering'][:domain_index]
+ forprevinprevious_domains:
+ ifprevin['restaurant','hotel','train']and'book'inuser_goal[prev]and \
+ slotinuser_goal[prev]['book']anduser_goal[prev]['book'][slot]== \
+ user_goal[domain]['book'][slot]:
+ returnprev
+ returnNone
+
+ # book
+ book='book'
+ if'fail_book'inuser_goal[dom]:
+ book='fail_book'
+ if'book'instate:
+ slot_strings=[]
+ forslotin['people','time','day','stay']:
+ ifslotinstate[book]:
+ ifslot=='people':
+ same_people_domain=get_same_people_domain(user_goal,dom,slot)
+ ifsame_people_domainisNone:
+ slot_strings.append('for {} people'.format(self.boldify(state[book][slot])))
+ else:
+ slot_strings.append(self.boldify(
+ 'for the same group of people as the {} booking'.format(same_people_domain)))
+ elifslot=='time':
+ slot_strings.append('at {}'.format(self.boldify(state[book][slot])))
+ elifslot=='day':
+ same_people_domain=get_same_people_domain(user_goal,dom,slot)
+ ifsame_people_domainisNone:
+ slot_strings.append('on {}'.format(self.boldify(state[book][slot])))
+ else:
+ slot_strings.append(
+ self.boldify('on the same day as the {} booking'.format(same_people_domain)))
+ elifslot=='stay':
+ slot_strings.append('for {} nights'.format(self.boldify(state[book][slot])))
+ delstate[book][slot]
+
+ assertlen(state[book])<=0,state[book]
+
+ iflen(slot_strings)>0:
+ message.append(templates[dom]['book'].format(' '.join(slot_strings)))
+
+ # fail_book
+ if'fail_book'inuser_goal[dom]:
+ adjusted_slot=list(filter(lambdax:x[0][1]!=x[1][1],zip(user_goal[dom]['book'].items(),
+ user_goal[dom]['fail_book'].items())))[0][0][0]
+
+ ifadjusted_slotin['internet','parking']:
+ message.append(
+ templates[dom]['fail_book '+adjusted_slot+' '+user_goal[dom]['book'][adjusted_slot]])
+ else:
+ message.append(templates[dom]['fail_book '+adjusted_slot].format(
+ self.boldify(user_goal[dom]['book'][adjusted_slot])))
+
+ ifboldify==do_boldify:
+ fori,minenumerate(message):
+ message[i]=message[i].replace('wifi',"<b>wifi</b>")
+ message[i]=message[i].replace('internet',"<b>internet</b>")
+ message[i]=message[i].replace('parking',"<b>parking</b>")
+
+ returnmessage
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+"""
+
+
[docs]classUserSimulator:
+ """An aggregation of user simulator components."""
+ def__init__(self,nlu_model,policy,nlg_model):
+ """
+ The constructor of UserSimulator class. The input are the models of each component.
+ Args:
+ nlu_model (NLU): An instance of NLU class.
+ policy (UserPolicy): An instance of Policy class.
+ nlg_model (NLG): An instance of NLG class.
+ """
+ self.nlu_model=nlu_model
+ # self.tracker = tracker
+ self.policy=policy
+ self.nlg_model=nlg_model
+
+ self.sys_act=None
+ self.current_action=None
+ self.policy.init_session()
+
+
[docs]defresponse(self,input,context=[]):
+ """
+ Generate the response of user.
+ Args:
+ input (str or dict): Preorder system output. The type is str if system.nlg is not None, else dict.
+ Returns:
+ output (str or dict): User response. If the nlg component is None, type(output) == dict, else str.
+ action (dict): The dialog act of output. Note that if the nlg component is None, the output and action are
+ identical.
+ session_over (boolean): True to terminate session, else session continues.
+ reward (float): The reward given by the user.
+ """
+
+ ifself.nlu_modelisnotNone:
+ sys_act=self.nlu_model.parse(input,context)
+ else:
+ sys_act=input
+ self.sys_act=sys_act
+ action,session_over,reward=self.policy.predict(None,sys_act)
+ ifself.nlg_modelisnotNone:
+ output=self.nlg_model.generate(action)
+ else:
+ output=action
+
+ self.current_action=action
+
+ returnoutput,action,session_over,reward
+
+
[docs]definit_session(self):
+ """Init the parameters for a new session by calling the init_session methods of policy component."""
+ self.policy.init_session()
+ self.current_action=None
+
+
[docs]definit_response(self):
+ """Return a init response of the user."""
+ ifself.nlg_modelisnotNone:
+ output=self.nlg_model.generate({})
+ else:
+ output={}
+ returnoutput
Source code for convlab.modules.word_dst.multiwoz.evaluate
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+importjson
+
+fromconvlab.modules.dst.multiwoz.dst_utilimportminDistance
+fromconvlab.modules.word_dst.multiwoz.mdbtimportMDBTTracker
+
+
+
[docs]classWord_DST:
+ """A temporary semi-finishingv agent for word_dst testing, which takes as input utterances and output dialog state."""
+ def__init__(self):
+ self.dst=MDBTTracker(data_dir='../../../../data/mdbt')
+ self.nlu=None
+
+
[docs]defupdate(self,action,observation):
+ # update history
+ self.dst.state['history'].append([str(action)])
+
+ # NLU parsing
+ input_act=self.nlu.parse(observation,sum(self.dst.state['history'],[]))ifself.nluelseobservation
+
+ # state tracking
+ self.dst.update(input_act)
+ self.dst.state['history'][-1].append(observation)
+
+ # update history
+ returnself.dst.state
[docs]defrestore_model(self,sess,saver):
+ saver.restore(sess,self.model_url)
+ print('Loading trained MDBT model from ',self.model_url)
+ self.param_restored=True
+
+
[docs]deftrain(self):
+ """
+ Train the model.
+ Model saved to
+ """
+ num_hid,bidir,net_type,n2p,batch_size,model_url,graph_url,dev= \
+ None,True,None,None,None,None,None,None
+ globaltrain_batch_size,MODEL_URL,GRAPH_URL,device,TRAIN_MODEL_URL,TRAIN_GRAPH_URL
+
+ ifbatch_size:
+ train_batch_size=batch_size
+ print("Setting up the batch size to {}.........................".format(batch_size))
+ ifmodel_url:
+ TRAIN_MODEL_URL=model_url
+ print("Setting up the model url to {}.........................".format(TRAIN_MODEL_URL))
+ ifgraph_url:
+ TRAIN_GRAPH_URL=graph_url
+ print("Setting up the graph url to {}.........................".format(TRAIN_GRAPH_URL))
+
+ ifdev:
+ device=dev
+ print("Setting up the device to {}.........................".format(device))
+
+ # 1 Load and process the input data including the ontology
+ # Load the word embeddings
+ word_vectors=load_word_vectors(self.word_vectors_url)
+
+ # Load the ontology and extract the feature vectors
+ ontology,ontology_vectors,slots=load_ontology(self.ontology_url,word_vectors)
+
+ # Load and process the training data
+ dialogues,_=load_woz_data(self.training_url,word_vectors,ontology)
+ no_dialogues=len(dialogues)
+
+ # Load and process the validation data
+ val_dialogues,_=load_woz_data(self.validation_url,word_vectors,ontology)
+
+ # Generate the validation batch data
+ val_data=generate_batch(val_dialogues,0,len(val_dialogues),len(ontology))
+ val_iterations=int(len(val_dialogues)/train_batch_size)
+
+ # 2 Initialise and set up the model graph
+ # Initialise the model
+ graph=tf.Graph()
+ withgraph.as_default():
+ model_variables=model_definition(ontology_vectors,len(ontology),slots,num_hidden=num_hid,bidir=bidir,
+ net_type=net_type,dev=device)
+ (user,sys_res,no_turns,user_uttr_len,sys_uttr_len,labels,domain_labels,domain_accuracy,
+ slot_accuracy,value_accuracy,value_f1,train_step,keep_prob,_,_,_)=model_variables
+ [precision,recall,value_f1]=value_f1
+ saver=tf.train.Saver()
+ ifdevice=='gpu':
+ config=tf.ConfigProto(allow_soft_placement=True)
+ config.gpu_options.allow_growth=True
+ else:
+ config=tf.ConfigProto(device_count={'GPU':0})
+
+ sess=tf.Session(config=config)
+ ifos.path.exists(TRAIN_MODEL_URL+".index"):
+ saver.restore(sess,TRAIN_MODEL_URL)
+ print("Loading from an existing model {} ....................".format(TRAIN_MODEL_URL))
+ else:
+ ifnotos.path.exists(TRAIN_MODEL_URL):
+ os.makedirs('/'.join(TRAIN_MODEL_URL.split('/')[:-1]))
+ os.makedirs('/'.join(TRAIN_GRAPH_URL.split('/')[:-1]))
+ init=tf.global_variables_initializer()
+ sess.run(init)
+ print("Create new model parameters.....................................")
+ merged=tf.summary.merge_all()
+ val_accuracy=tf.summary.scalar('validation_accuracy',value_accuracy)
+ val_f1=tf.summary.scalar('validation_f1_score',value_f1)
+ train_writer=tf.summary.FileWriter(TRAIN_GRAPH_URL,graph)
+ train_writer.flush()
+
+ # 3 Perform an epoch of training
+ last_update=-1
+ best_f_score=-1
+ forepochinrange(no_epochs):
+
+ batch_size=train_batch_size
+ sys.stdout.flush()
+ iterations=math.ceil(no_dialogues/train_batch_size)
+ start_time=time.time()
+ val_i=0
+ shuffle(dialogues)
+ forbatch_idinrange(iterations):
+
+ ifbatch_id==iterations-1andno_dialogues%iterations!=0:
+ batch_size=no_dialogues%train_batch_size
+
+ batch_user,batch_sys,batch_labels,batch_domain_labels,batch_user_uttr_len,batch_sys_uttr_len, \
+ batch_no_turns=generate_batch(dialogues,batch_id,batch_size,len(ontology))
+
+ [_,summary,da,sa,va,vf,pr,re]=sess.run([train_step,merged,domain_accuracy,slot_accuracy,
+ value_accuracy,value_f1,precision,recall],
+ feed_dict={user:batch_user,sys_res:batch_sys,
+ labels:batch_labels,
+ domain_labels:batch_domain_labels,
+ user_uttr_len:batch_user_uttr_len,
+ sys_uttr_len:batch_sys_uttr_len,
+ no_turns:batch_no_turns,
+ keep_prob:0.5})
+
+ print("The accuracies for domain is {:.2f}, slot {:.2f}, value {:.2f}, f1_score {:.2f} precision {:.2f}"
+ " recall {:.2f} for batch {}".format(da,sa,va,vf,pr,re,batch_id+iterations*epoch))
+
+ train_writer.add_summary(summary,start_batch+batch_id+iterations*epoch)
+
+ # ================================ VALIDATION ==============================================
+
+ ifbatch_id%batches_per_eval==0orbatch_id==0:
+ ifbatch_id==0:
+ print("Batch","0","to",batch_id,"took",round(time.time()-start_time,2),"seconds.")
+
+ else:
+ print("Batch",batch_id+iterations*epoch-batches_per_eval,"to",
+ batch_id+iterations*epoch,"took",
+ round(time.time()-start_time,3),"seconds.")
+ start_time=time.time()
+
+ _,_,v_acc,f1_score,sm1,sm2=evaluate_model(sess,model_variables,val_data,
+ [val_accuracy,val_f1],batch_id,val_i)
+ val_i+=1
+ val_i%=val_iterations
+ train_writer.add_summary(sm1,start_batch+batch_id+iterations*epoch)
+ train_writer.add_summary(sm2,start_batch+batch_id+iterations*epoch)
+ stime=time.time()
+ current_metric=f1_score
+ print(" Validation metric:",round(current_metric,5)," eval took",
+ round(time.time()-stime,2),"last update at:",last_update,"/",iterations)
+
+ # and if we got a new high score for validation f-score, we need to save the parameters:
+ ifcurrent_metric>best_f_score:
+ last_update=batch_id+iterations*epoch+1
+ print("\n ====================== New best validation metric:",round(current_metric,4),
+ " - saving these parameters. Batch is:",last_update,"/",iterations,
+ "---------------- =========== \n")
+
+ best_f_score=current_metric
+
+ saver.save(sess,TRAIN_MODEL_URL)
+
+ print("The best parameters achieved a validation metric of",round(best_f_score,4))
+
+
[docs]deftest(self,sess):
+ """Test the MDBT model on mdbt dataset. Almost the same as original code."""
+ ifnotos.path.exists("../../data/mdbt/results"):
+ os.makedirs("../../data/mdbt/results")
+
+ globaltrain_batch_size,MODEL_URL,GRAPH_URL
+
+ model_variables=self.model_variables
+ (user,sys_res,no_turns,user_uttr_len,sys_uttr_len,labels,domain_labels,domain_accuracy,
+ slot_accuracy,value_accuracy,value_f1,train_step,keep_prob,predictions,
+ true_predictions,[y,_])=model_variables
+ [precision,recall,value_f1]=value_f1
+ # print("\tMDBT: Loading from an existing model {} ....................".format(MODEL_URL))
+
+ iterations=math.ceil(self.no_dialogues/train_batch_size)
+ batch_size=train_batch_size
+ [slot_acc,tot_accuracy]=[np.zeros(len(self.ontology),dtype="float32"),0]
+ slot_accurac=0
+ # value_accurac = np.zeros((len(slots),), dtype="float32")
+ value_accurac=0
+ joint_accuracy=0
+ f1_score=0
+ preci=0
+ recal=0
+ processed_dialogues=[]
+ # np.set_printoptions(threshold=np.nan)
+ forbatch_idinrange(int(iterations)):
+
+ ifbatch_id==iterations-1:
+ batch_size=self.no_dialogues-batch_id*train_batch_size
+
+ batch_user,batch_sys,batch_labels,batch_domain_labels,batch_user_uttr_len,batch_sys_uttr_len, \
+ batch_no_turns=generate_batch(self.dialogues,batch_id,batch_size,len(self.ontology))
+
+ [da,sa,va,vf,pr,re,pred,true_pred,y_pred]=sess.run(
+ [domain_accuracy,slot_accuracy,value_accuracy,
+ value_f1,precision,recall,predictions,
+ true_predictions,y],
+ feed_dict={user:batch_user,sys_res:batch_sys,
+ labels:batch_labels,
+ domain_labels:batch_domain_labels,
+ user_uttr_len:batch_user_uttr_len,
+ sys_uttr_len:batch_sys_uttr_len,
+ no_turns:batch_no_turns,
+ keep_prob:1.0})
+
+ true=sum([1ifnp.array_equal(pred[k,:],true_pred[k,:])andsum(true_pred[k,:])>0else0
+ forkinrange(true_pred.shape[0])])
+ actual=sum([1ifsum(true_pred[k,:])>0else0forkinrange(true_pred.shape[0])])
+ ja=true/actual
+ tot_accuracy+=da
+ # joint_accuracy += ja
+ slot_accurac+=sa
+ ifmath.isnan(pr):
+ pr=0
+ preci+=pr
+ recal+=re
+ ifmath.isnan(vf):
+ vf=0
+ f1_score+=vf
+ # value_accurac += va
+ slot_acc+=np.mean(np.asarray(np.equal(pred,true_pred),dtype="float32"),axis=0)
+
+ dialgs,va1,ja=track_dialogue(self.actual_dialogues[batch_id*train_batch_size:
+ batch_id*train_batch_size+batch_size],
+ self.ontology,pred,y_pred)
+ processed_dialogues+=dialgs
+ joint_accuracy+=ja
+ value_accurac+=va1
+
+ print(
+ "The accuracies for domain is {:.2f}, slot {:.2f}, value {:.2f}, other value {:.2f}, f1_score {:.2f} precision {:.2f}"
+ " recall {:.2f} for batch {}".format(da,sa,np.mean(va),va1,vf,pr,re,batch_id))
+
+ print(
+ "End of evaluating the test set...........................................................................")
+
+ slot_acc/=iterations
+ # print("The accuracies for each slot:")
+ # print(value_accurac/iterations)
+ print("The overall accuracies for domain is"
+ " {}, slot {}, value {}, f1_score {}, precision {},"
+ " recall {}, joint accuracy {}".format(tot_accuracy/iterations,slot_accurac/iterations,
+ value_accurac/iterations,f1_score/iterations,
+ preci/iterations,recal/iterations,
+ joint_accuracy/iterations))
+
+ withopen(self.results_url,'w')asf:
+ json.dump(processed_dialogues,f,indent=4)
+
+
+
[docs]deftest_update():
+ os.environ["CUDA_VISIBLE_DEVICES"]='0'
+ _config=tf.ConfigProto()
+ _config.gpu_options.allow_growth=True
+ _config.allow_soft_placement=True
+ start_time=time.time()
+ mdbt=MDBTTracker()
+ print('\tMDBT: model build time: {:.2f} seconds'.format(time.time()-start_time))
+ saver=tf.train.Saver()
+ mdbt.restore_model(mdbt.sess,saver)
+ # demo state history
+ mdbt.state['history']=[['null','I\'m trying to find an expensive restaurant in the centre part of town.'],
+ [
+ 'The Cambridge Chop House is an good expensive restaurant in the centre of town. Would you like me to book it for you?',
+ 'Yes, a table for 1 at 16:15 on sunday. I need the reference number.']]
+ new_state=mdbt.update(None,'hi, this is not good')
+ print(json.dumps(new_state,indent=4))
+ print('all time: {:.2f} seconds'.format(time.time()-start_time))
[docs]deflstm_model(text_input,utterance_length,num_hidden,name,net_type,bidir):
+ '''
+ Define an Lstm model that will run across the user input and system act
+ :param text_input: [batch_size, max_num_turns, max_utterance_size, vector_dimension]
+ :param utterance_length: number words in every utterance [batch_size, max_num_turns, 1]
+ :param num_hidden: -- int --
+ :param name: The name of lstm network
+ :param net_type: type of the network ("lstm" or "gru" or "rnn")
+ :param bidir: use a bidirectional network -- bool --
+ :return: output at each state [batch_size, max_num_turns, max_utterance_size, num_hidden],
+ output of the final state [batch_size, max_num_turns, num_hidden]
+ '''
+ withtf.variable_scope(name):
+
+ text_input=tf.reshape(text_input,[-1,max_utterance_length,vector_dimension])
+ utterance_length=tf.reshape(utterance_length,[-1])
+
+ defrnn(net_typ,num_units):
+ ifnet_typ=="lstm":
+ returntf.nn.rnn_cell.LSTMCell(num_units)
+ elifnet_typ=="gru":
+ returntf.nn.rnn_cell.GRUCell(num_units)
+ else:
+ returntf.nn.rnn_cell.BasicRNNCell(num_units)
+
+ ifbidir:
+ assertnum_hidden%2==0
+ rev_cell=rnn(net_type,num_hidden//2)
+ cell=rnn(net_type,num_hidden//2)
+ _,lspd=tf.nn.bidirectional_dynamic_rnn(cell,rev_cell,text_input,dtype=tf.float32,
+ sequence_length=utterance_length)
+ ifnet_type=="lstm":
+ lspd=(lspd[0].h,lspd[1].h)
+
+ last_state=tf.concat(lspd,1)
+ else:
+ cell=rnn(net_type,num_hidden)
+ _,last_state=tf.nn.dynamic_rnn(cell,text_input,dtype=tf.float32,sequence_length=utterance_length)
+ ifnet_type=="lstm":
+ last_state=last_state.h
+
+ last_state=tf.reshape(last_state,[-1,max_no_turns,num_hidden])
+
+ returnlast_state
+
+
+
[docs]defmodel_definition(ontology,num_slots,slots,num_hidden=None,net_type=None,bidir=None,test=False,dev=None):
+ '''
+ Create neural belief tracker model that is defined in my notes. It consists of encoding the user and system input,
+ then use the ontology to decode the encoder in manner that detects if a domain-slot-value class is mentioned
+ :param ontology: numpy array of the embedded vectors of the ontology [num_slots, 3*vector_dimension]
+ :param num_slots: number of ontology classes --int--
+ :param slots: indices of the values of each slot list of lists of ints
+ :param num_hidden: Number of hidden units or dimension of the hidden space
+ :param net_type: The type of the encoder network cnn, lstm, gru, rnn ...etc
+ :param bidir: For recurrent networks should it be bidirectional
+ :param test: This is testing mode (no back-propagation)
+ :param dev: Device to run the model on (cpu or gpu)
+ :return: All input variable/placeholders output metrics (precision, recall, f1-score) and trainer
+ '''
+ # print('model definition')
+ # print(ontology, num_slots, slots, num_hidden, net_type, bidir, test, dev)
+ globallstm_num_hidden
+
+ ifnotnet_type:
+ net_type=network
+ else:
+ print("\tMDBT: Setting up the type of the network to {}..............................".format(net_type))
+ ifbidir==None:
+ bidir=bidirect
+ else:
+ pass
+ # print("\tMDBT: Setting up type of the recurrent network to bidirectional {}...........................".format(bidir))
+ ifnum_hidden:
+ lstm_num_hidden=num_hidden
+ print("\tMDBT: Setting up type of the dimension of the hidden space to {}.........................".format(num_hidden))
+
+ ontology=tf.constant(ontology,dtype=tf.float32)
+
+ # ----------------------------------- Define the input variables --------------------------------------------------
+ user_input=tf.placeholder(tf.float32,[None,max_no_turns,max_utterance_length,vector_dimension],name="user")
+ system_input=tf.placeholder(tf.float32,[None,max_no_turns,max_utterance_length,vector_dimension],name="sys")
+ num_turns=tf.placeholder(tf.int32,[None],name="num_turns")
+ user_utterance_lengths=tf.placeholder(tf.int32,[None,max_no_turns],name="user_sen_len")
+ sys_utterance_lengths=tf.placeholder(tf.int32,[None,max_no_turns],name="sys_sen_len")
+ labels=tf.placeholder(tf.float32,[None,max_no_turns,num_slots],name="labels")
+ domain_labels=tf.placeholder(tf.float32,[None,max_no_turns,num_slots],name="domain_labels")
+ # dropout placeholder, 0.5 for training, 1.0 for validation/testing:
+ keep_prob=tf.placeholder("float")
+
+ # ------------------------------------ Create the Encoder networks ------------------------------------------------
+ devs=['/device:CPU:0']
+ ifdev=='gpu':
+ devs=get_available_devs()
+
+ ifnet_type=="cnn":
+ withtf.device(devs[1%len(devs)]):
+ # Encode the domain of the user input using a LSTM network
+ usr_dom_en=define_CNN_model(user_input,num_filters=lstm_num_hidden,name="h_u_d")
+ # Encode the domain of the system act using a LSTM network
+ sys_dom_en=define_CNN_model(system_input,num_filters=lstm_num_hidden,name="h_s_d")
+
+ withtf.device(devs[2%len(devs)]):
+ # Encode the slot of the user input using a CNN network
+ usr_slot_en=define_CNN_model(user_input,num_filters=lstm_num_hidden,name="h_u_s")
+ # Encode the slot of the system act using a CNN network
+ sys_slot_en=define_CNN_model(system_input,num_filters=lstm_num_hidden,name="h_s_s")
+ # Encode the value of the user input using a CNN network
+ usr_val_en=define_CNN_model(user_input,num_filters=lstm_num_hidden,name="h_u_v")
+ # Encode the value of the system act using a CNN network
+ sys_val_en=define_CNN_model(system_input,num_filters=lstm_num_hidden,name="h_s_v")
+ # Encode the user using a CNN network
+ usr_en=define_CNN_model(user_input,num_filters=lstm_num_hidden//5,name="h_u")
+
+ else:
+
+ withtf.device(devs[1%len(devs)]):
+ # Encode the domain of the user input using a LSTM network
+ usr_dom_en=lstm_model(user_input,user_utterance_lengths,lstm_num_hidden,"h_u_d",net_type,bidir)
+ usr_dom_en=tf.nn.dropout(usr_dom_en,keep_prob,name="h_u_d_out")
+ # Encode the domain of the system act using a LSTM network
+ sys_dom_en=lstm_model(system_input,sys_utterance_lengths,lstm_num_hidden,"h_s_d",net_type,bidir)
+ sys_dom_en=tf.nn.dropout(sys_dom_en,keep_prob,name="h_s_d_out")
+
+ withtf.device(devs[2%len(devs)]):
+ # Encode the slot of the user input using a LSTM network
+ usr_slot_en=lstm_model(user_input,user_utterance_lengths,lstm_num_hidden,"h_u_s",net_type,bidir)
+ usr_slot_en=tf.nn.dropout(usr_slot_en,keep_prob,name="h_u_s_out")
+ # Encode the slot of the system act using a LSTM network
+ sys_slot_en=lstm_model(system_input,sys_utterance_lengths,lstm_num_hidden,"h_s_s",net_type,bidir)
+ sys_slot_en=tf.nn.dropout(sys_slot_en,keep_prob,name="h_s_s_out")
+ # Encode the value of the user input using a LSTM network
+ usr_val_en=lstm_model(user_input,user_utterance_lengths,lstm_num_hidden,"h_u_v",net_type,bidir)
+ usr_val_en=tf.nn.dropout(usr_val_en,keep_prob,name="h_u_v_out")
+ # Encode the value of the system act using a LSTM network
+ sys_val_en=lstm_model(system_input,sys_utterance_lengths,lstm_num_hidden,"h_s_v",net_type,bidir)
+ sys_val_en=tf.nn.dropout(sys_val_en,keep_prob,name="h_s_v_out")
+ # Encode the user using a LSTM network
+ usr_en=lstm_model(user_input,user_utterance_lengths,lstm_num_hidden//5,"h_u",net_type,bidir)
+ usr_en=tf.nn.dropout(usr_en,keep_prob,name="h_u_out")
+
+ withtf.device(devs[1%len(devs)]):
+ usr_dom_en=tf.tile(tf.expand_dims(usr_dom_en,axis=2),[1,1,num_slots,1],name="h_u_d")
+ sys_dom_en=tf.tile(tf.expand_dims(sys_dom_en,axis=2),[1,1,num_slots,1],name="h_s_d")
+ withtf.device(devs[2%len(devs)]):
+ usr_slot_en=tf.tile(tf.expand_dims(usr_slot_en,axis=2),[1,1,num_slots,1],name="h_u_s")
+ sys_slot_en=tf.tile(tf.expand_dims(sys_slot_en,axis=2),[1,1,num_slots,1],name="h_s_s")
+ usr_val_en=tf.tile(tf.expand_dims(usr_val_en,axis=2),[1,1,num_slots,1],name="h_u_v")
+ sys_val_en=tf.tile(tf.expand_dims(sys_val_en,axis=2),[1,1,num_slots,1],name="h_s_v")
+ usr_en=tf.tile(tf.expand_dims(usr_en,axis=2),[1,1,num_slots,1],name="h_u")
+
+ # All encoding vectors have size [batch_size, max_turns, num_slots, num_hidden]
+
+ # Matrix that transforms the ontology from the embedding space to the hidden representation
+ withtf.device(devs[1%len(devs)]):
+ W_onto_domain=tf.Variable(tf.random_normal([vector_dimension,lstm_num_hidden]),name="W_onto_domain")
+ W_onto_slot=tf.Variable(tf.random_normal([vector_dimension,lstm_num_hidden]),name="W_onto_slot")
+ W_onto_value=tf.Variable(tf.random_normal([vector_dimension,lstm_num_hidden]),name="W_onto_value")
+
+ # And biases
+ b_onto_domain=tf.Variable(tf.zeros([lstm_num_hidden]),name="b_onto_domain")
+ b_onto_slot=tf.Variable(tf.zeros([lstm_num_hidden]),name="b_onto_slot")
+ b_onto_value=tf.Variable(tf.zeros([lstm_num_hidden]),name="b_onto_value")
+
+ # Apply the transformation from the embedding space of the ontology to the hidden space
+ domain_vec=tf.slice(ontology,begin=[0,0],size=[-1,vector_dimension])
+ slot_vec=tf.slice(ontology,begin=[0,vector_dimension],size=[-1,vector_dimension])
+ value_vec=tf.slice(ontology,begin=[0,2*vector_dimension],size=[-1,vector_dimension])
+ # Each [num_slots, vector_dimension]
+ d=tf.nn.dropout(tf.tanh(tf.matmul(domain_vec,W_onto_domain)+b_onto_domain),keep_prob,name="d")
+ s=tf.nn.dropout(tf.tanh(tf.matmul(slot_vec,W_onto_slot)+b_onto_slot),keep_prob,name="s")
+ v=tf.nn.dropout(tf.tanh(tf.matmul(value_vec,W_onto_value)+b_onto_value),keep_prob,name="v")
+ # Each [num_slots, num_hidden]
+
+ # Apply the comparison mechanism for all the user and system utterances and ontology values
+ domain_user=tf.multiply(usr_dom_en,d,name="domain_user")
+ domain_sys=tf.multiply(sys_dom_en,d,name="domain_sys")
+ slot_user=tf.multiply(usr_slot_en,s,name="slot_user")
+ slot_sys=tf.multiply(sys_slot_en,s,name="slot_sys")
+ value_user=tf.multiply(usr_val_en,v,name="value_user")
+ value_sys=tf.multiply(sys_val_en,v,name="value_sys")
+ # All of size [batch_size, max_turns, num_slots, num_hidden]
+
+ # -------------- Domain Detection -------------------------------------------------------------------------
+ W_domain=tf.Variable(tf.random_normal([2*lstm_num_hidden]),name="W_domain")
+ b_domain=tf.Variable(tf.zeros([1]),name="b_domain")
+ y_d=tf.sigmoid(tf.reduce_sum(tf.multiply(tf.concat([domain_user,domain_sys],axis=3),W_domain),axis=3)
+ +b_domain)# [batch_size, max_turns, num_slots]
+
+ # -------- Run through each of the 3 case ( inform, request, confirm) and decode the inferred state ---------
+ # 1 Inform (User is informing the system about the goal, e.g. "I am looking for a place to stay in the centre")
+ W_inform=tf.Variable(tf.random_normal([2*lstm_num_hidden]),name="W_inform")
+ b_inform=tf.Variable(tf.random_normal([1]),name="b_inform")
+ inform=tf.add(tf.reduce_sum(tf.multiply(tf.concat([slot_user,value_user],axis=3),W_inform),axis=3),b_inform,
+ name="inform")# [batch_size, max_turns, num_slots]
+
+ # 2 Request (The system is requesting information from the user, e.g. "what type of food would you like?")
+ withtf.device(devs[2%len(devs)]):
+ W_request=tf.Variable(tf.random_normal([2*lstm_num_hidden]),name="W_request")
+ b_request=tf.Variable(tf.random_normal([1]),name="b_request")
+ request=tf.add(tf.reduce_sum(tf.multiply(tf.concat([slot_sys,value_user],axis=3),W_request),axis=3),
+ b_request,name="request")# [batch_size, max_turns, num_slots]
+
+ # 3 Confirm (The system is confirming values given by the user, e.g. "How about turkish food?")
+ withtf.device(devs[3%len(devs)]):
+ size=2*lstm_num_hidden+lstm_num_hidden//5
+ W_confirm=tf.Variable(tf.random_normal([size]),name="W_confirm")
+ b_confirm=tf.Variable(tf.random_normal([1]),name="b_confirm")
+ confirm=tf.add(
+ tf.reduce_sum(tf.multiply(tf.concat([slot_sys,value_sys,usr_en],axis=3),W_confirm),axis=3),
+ b_confirm,name="confirm")# [batch_size, max_turns, num_slots]
+
+ output=inform+request+confirm
+
+ # -------------------- Adding the belief update RNN with memory cell (Taken from previous model) -------------------
+ withtf.device(devs[2%len(devs)]):
+ domain_memory=tf.Variable(tf.random_normal([1,1]),name="domain_memory")
+ domain_current=tf.Variable(tf.random_normal([1,1]),name="domain_current")
+ domain_M_h=tf.Variable(tf.random_normal([1,1]),name="domain_M_h")
+ domain_W_m=tf.Variable(tf.random_normal([1,1],name="domain_W_m"))
+ domain_U_m=tf.Variable(tf.random_normal([1,1]),name="domain_U_m")
+ a_memory=tf.Variable(tf.random_normal([1,1]),name="a_memory")
+ b_memory=tf.Variable(tf.random_normal([1,1]),name="b_memory")
+ a_current=tf.Variable(tf.random_normal([1,1]),name="a_current")
+ b_current=tf.Variable(tf.random_normal([1,1]),name="b_current")
+ M_h_a=tf.Variable(tf.random_normal([1,1]),name="M_h_a")
+ M_h_b=tf.Variable(tf.random_normal([1,1]),name="M_h_b")
+ W_m_a=tf.Variable(tf.random_normal([1,1]),name="W_m_a")
+ W_m_b=tf.Variable(tf.random_normal([1,1]),name="W_m_b")
+ U_m_a=tf.Variable(tf.random_normal([1,1]),name="U_m_a")
+ U_m_b=tf.Variable(tf.random_normal([1,1]),name="U_m_b")
+
+ # ---------------------------------- Unroll the domain over time --------------------------------------------------
+ withtf.device(devs[1%len(devs)]):
+ cell=GRU(domain_memory*tf.diag(tf.ones(num_slots)),domain_current*tf.diag(tf.ones(num_slots)),
+ domain_M_h*tf.diag(tf.ones(num_slots)),domain_W_m*tf.diag(tf.ones(num_slots)),
+ domain_U_m*tf.diag(tf.ones(num_slots)),num_slots,
+ binary_output=True)
+
+ y_d,_=tf.nn.dynamic_rnn(cell,y_d,sequence_length=num_turns,dtype=tf.float32)
+
+ domain_loss=tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=domain_labels,logits=y_d),axis=2,
+ name="domain_loss")/(num_slots/len(slots))
+
+ y_d=tf.sigmoid(y_d)
+
+ withtf.device(devs[0%len(devs)]):
+
+ loss=[Nonefor_inrange(len(slots))]
+ slot_pred=[Nonefor_inrange(len(slots))]
+ slot_label=[Nonefor_inrange(len(slots))]
+ val_pred=[Nonefor_inrange(len(slots))]
+ val_label=[Nonefor_inrange(len(slots))]
+ y=[Nonefor_inrange(len(slots))]
+ y_pred=[Nonefor_inrange(len(slots))]
+ foriinrange(len(slots)):
+
+ num_values=slots[i]+1# For the none case
+ size=sum(slots[:i+1])-slots[i]
+ iftest:
+ domain_output=tf.slice(tf.round(y_d),begin=[0,0,size],size=[-1,-1,slots[i]])
+ else:
+ domain_output=tf.slice(domain_labels,begin=[0,0,size],size=[-1,-1,slots[i]])
+ max_val=tf.expand_dims(tf.reduce_max(domain_output,axis=2),axis=2)
+ # tf.assert_less_equal(max_val, 1.0)
+ # tf.assert_equal(tf.round(max_val), max_val)
+ domain_output=tf.concat([tf.zeros(tf.shape(domain_output)),1-max_val],axis=2)
+
+ slot_output=tf.slice(output,begin=[0,0,size],size=[-1,-1,slots[i]])
+ slot_output=tf.concat([slot_output,tf.zeros([tf.shape(output)[0],max_no_turns,1])],axis=2)
+
+ labels_output=tf.slice(labels,begin=[0,0,size],size=[-1,-1,slots[i]])
+ max_val=tf.expand_dims(tf.reduce_max(labels_output,axis=2),axis=2)
+ # tf.assert_less_equal(max_val, 1.0)
+ # tf.assert_equal(tf.round(max_val), max_val)
+ slot_label[i]=max_val
+ # [Batch_size, max_turns, 1]
+ labels_output=tf.argmax(tf.concat([labels_output,1-max_val],axis=2),axis=2)
+ # [Batch_size, max_turns]
+ val_label[i]=tf.cast(tf.expand_dims(labels_output,axis=2),dtype="float")
+ # [Batch_size, max_turns, 1]
+
+ diag_memory=a_memory*tf.diag(tf.ones(num_values))
+ non_diag_memory=tf.matrix_set_diag(b_memory*tf.ones([num_values,num_values]),tf.zeros(num_values))
+ W_memory=diag_memory+non_diag_memory
+
+ diag_current=a_current*tf.diag(tf.ones(num_values))
+ non_diag_current=tf.matrix_set_diag(b_current*tf.ones([num_values,num_values]),tf.zeros(num_values))
+ W_current=diag_current+non_diag_current
+
+ diag_M_h=M_h_a*tf.diag(tf.ones(num_values))
+ non_diag_M_h=tf.matrix_set_diag(M_h_b*tf.ones([num_values,num_values]),tf.zeros(num_values))
+ M_h=diag_M_h+non_diag_M_h
+
+ diag_U_m=U_m_a*tf.diag(tf.ones(num_values))
+ non_diag_U_m=tf.matrix_set_diag(U_m_b*tf.ones([num_values,num_values]),tf.zeros(num_values))
+ U_m=diag_U_m+non_diag_U_m
+
+ diag_W_m=W_m_a*tf.diag(tf.ones(num_values))
+ non_diag_W_m=tf.matrix_set_diag(W_m_b*tf.ones([num_values,num_values]),tf.zeros(num_values))
+ W_m=diag_W_m+non_diag_W_m
+
+ cell=GRU(W_memory,W_current,M_h,W_m,U_m,num_values)
+ y_predict,_=tf.nn.dynamic_rnn(cell,slot_output,sequence_length=num_turns,dtype=tf.float32)
+
+ y_predict=y_predict+1000000.0*domain_output
+ # [Batch_size, max_turns, num_values]
+
+ y[i]=tf.nn.softmax(y_predict)
+ val_pred[i]=tf.cast(tf.expand_dims(tf.argmax(y[i],axis=2),axis=2),dtype="float32")
+ # [Batch_size, max_turns, 1]
+ y_pred[i]=tf.slice(tf.one_hot(tf.argmax(y[i],axis=2),dtype=tf.float32,depth=num_values),
+ begin=[0,0,0],size=[-1,-1,num_values-1])
+ y[i]=tf.slice(y[i],begin=[0,0,0],size=[-1,-1,num_values-1])
+ slot_pred[i]=tf.cast(tf.reduce_max(y_pred[i],axis=2,keep_dims=True),dtype="float32")
+ # [Batch_size, max_turns, 1]
+ loss[i]=tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels_output,logits=y_predict)
+ # [Batch_size, max_turns]
+
+ # ---------------- Compute the output and the loss function (cross_entropy) and add to optimizer--------------------
+ cross_entropy=tf.add_n(loss,name="cross_entropy")
+ # Add the error from the domains
+ cross_entropy=tf.add(cross_entropy,domain_loss,name="total_loss")
+
+ y=tf.concat(y,axis=2,name="y")
+
+ mask=tf.cast(tf.sequence_mask(num_turns,maxlen=max_no_turns),dtype=tf.float32)
+ mask_extended=tf.tile(tf.expand_dims(mask,axis=2),[1,1,num_slots])
+ cross_entropy=tf.reduce_sum(mask*cross_entropy,axis=1)/tf.cast(num_turns,dtype=tf.float32)
+
+ optimizer=tf.train.AdamOptimizer(0.001)
+ train_step=optimizer.minimize(cross_entropy,colocate_gradients_with_ops=True)
+
+ # ----------------- Get the precision, recall f1-score and accuracy -----------------------------------------------
+
+ # Domain accuracy
+ true_predictions=tf.reshape(domain_labels,[-1,num_slots])
+ predictions=tf.reshape(tf.round(y_d)*mask_extended,[-1,num_slots])
+
+ y_d=tf.reshape(y_d*mask_extended,[-1,num_slots])
+
+ _,_,_,domain_accuracy=get_metrics(predictions,true_predictions,num_turns,mask_extended,num_slots)
+
+ mask_extended_2=tf.tile(tf.expand_dims(mask,axis=2),[1,1,len(slots)])
+
+ # Slot accuracy
+ true_predictions=tf.reshape(tf.concat(slot_label,axis=2),[-1,len(slots)])
+ predictions=tf.reshape(tf.concat(slot_pred,axis=2)*mask_extended_2,[-1,len(slots)])
+
+ _,_,_,slot_accuracy=get_metrics(predictions,true_predictions,num_turns,mask_extended_2,len(slots))
+
+ # accuracy
+ iftest:
+ value_accuracy=[]
+ mask_extended_3=tf.expand_dims(mask,axis=2)
+ foriinrange(len(slots)):
+ true_predictions=tf.reshape(val_label[i]*mask_extended_3,[-1,1])
+ predictions=tf.reshape(val_pred[i]*mask_extended_3,[-1,1])
+
+ _,_,_,value_acc=get_metrics(predictions,true_predictions,num_turns,mask_extended_3,1)
+ value_accuracy.append(value_acc)
+
+ value_accuracy=tf.stack(value_accuracy)
+ else:
+ true_predictions=tf.reshape(tf.concat(val_label,axis=2)*mask_extended_2,[-1,len(slots)])
+ predictions=tf.reshape(tf.concat(val_pred,axis=2)*mask_extended_2,[-1,len(slots)])
+
+ _,_,_,value_accuracy=get_metrics(predictions,true_predictions,num_turns,mask_extended_2,len(slots))
+
+ # Value f1score a
+ true_predictions=tf.reshape(labels,[-1,num_slots])
+ predictions=tf.reshape(tf.concat(y_pred,axis=2)*mask_extended,[-1,num_slots])
+
+ precision,recall,value_f1_score,_=get_metrics(predictions,true_predictions,num_turns,
+ mask_extended,num_slots)
+
+ y_=tf.reshape(y,[-1,num_slots])
+
+ # -------------------- Summarise the statistics of training to be viewed in tensorboard-----------------------------
+ tf.summary.scalar("domain_accuracy",domain_accuracy)
+ tf.summary.scalar("slot_accuracy",slot_accuracy)
+ tf.summary.scalar("value_accuracy",value_accuracy)
+ tf.summary.scalar("value_f1_score",value_f1_score)
+ tf.summary.scalar("cross_entropy",tf.reduce_mean(cross_entropy))
+
+ value_f1_score=[precision,recall,value_f1_score]
+
+ returnuser_input,system_input,num_turns,user_utterance_lengths,sys_utterance_lengths,labels,domain_labels, \
+ domain_accuracy,slot_accuracy,value_accuracy,value_f1_score,train_step,keep_prob,predictions, \
+ true_predictions,[y_,y_d]
[docs]defnormalise_word_vectors(word_vectors,norm=1.0):
+ """
+ This method normalises the collection of word vectors provided in the word_vectors dictionary.
+ """
+ forwordinword_vectors:
+ word_vectors[word]/=math.sqrt(sum(word_vectors[word]**2)+1e-6)
+ word_vectors[word]*=norm
+ returnword_vectors
+
+
+
[docs]defxavier_vector(word,D=300):
+ """
+ Returns a D-dimensional vector for the word.
+
+ We hash the word to always get the same vector for the given word.
+ """
+ defhash_string(_s):
+ returnabs(hash(_s))%(10**8)
+ seed_value=hash_string(word)
+ np.random.seed(seed_value)
+
+ neg_value=-math.sqrt(6)/math.sqrt(D)
+ pos_value=math.sqrt(6)/math.sqrt(D)
+
+ rsample=np.random.uniform(low=neg_value,high=pos_value,size=(D,))
+ norm=np.linalg.norm(rsample)
+ rsample_normed=rsample/norm
+
+ returnrsample_normed
+
+
+
[docs]defload_ontology(url,word_vectors):
+ '''
+ Load the ontology from a file
+ :param url: to the ontology
+ :param word_vectors: dictionary of the word embeddings [words, vector_dimension]
+ :return: list([domain-slot-value]), [no_slots, vector_dimension]
+ '''
+ globalnum_slots
+ # print("\tMDBT: Loading the ontology....................")
+ data=json.load(open(url,mode='r',encoding='utf8'),object_pairs_hook=OrderedDict)
+ slot_values=[]
+ ontology=[]
+ slots_values=[]
+ ontology_vectors=[]
+ forslotsindata:
+ [domain,slot]=slots.split('-')
+ ifdomainnotindomainsorslot=='name':
+ continue
+ values=data[slots]
+ if"book"inslot:
+ [slot,value]=slot.split(" ")
+ booking_slots[domain+'-'+value]=values
+ values=[value]
+ elifslot=="departure"orslot=="destination":
+ values=["place"]
+ domain_vec=np.sum(process_text(domain,word_vectors),axis=0)
+ ifdomainnotinword_vectors:
+ word_vectors[domain.replace(" ","")]=domain_vec
+ slot_vec=np.sum(process_text(slot,word_vectors),axis=0)
+ ifdomain+'-'+slotnotinslots_values:
+ slots_values.append(domain+'-'+slot)
+ ifslotnotinword_vectors:
+ word_vectors[slot.replace(" ","")]=slot_vec
+ slot_values.append(len(values))
+ forvalueinvalues:
+ ontology.append(domain+'-'+slot+'-'+value)
+ value_vec=np.sum(process_text(value,word_vectors,print_mode=True),axis=0)
+ ifvaluenotinword_vectors:
+ word_vectors[value.replace(" ","")]=value_vec
+ ontology_vectors.append(np.concatenate((domain_vec,slot_vec,value_vec)))
+
+ num_slots=len(slots_values)
+ # print("\tMDBT: We have about {} values".format(len(ontology)))
+ # print("\tMDBT: The Full Ontology is:")
+ # print(ontology)
+ # print("\tMDBT: The slots in this ontology:")
+ # print(slots_values)
+ returnontology,np.asarray(ontology_vectors,dtype='float32'),slot_values
+
+
+
[docs]defload_word_vectors(url):
+ '''
+ Load the word embeddings from the url
+ :param url: to the word vectors
+ :return: dict of word and vector values
+ '''
+ word_vectors={}
+ # print("Loading the word embeddings....................")
+ # print('abs path: ', os.path.abspath(url))
+ withopen(url,mode='r',encoding='utf8')asf:
+ forlineinf:
+ line=line.split(" ",1)
+ key=line[0]
+ word_vectors[key]=np.fromstring(line[1],dtype="float32",sep=" ")
+ # print("\tMDBT: The vocabulary contains about {} word embeddings".format(len(word_vectors)))
+ returnnormalise_word_vectors(word_vectors)
[docs]defforward(self,input_tensor,input_lengths,target_tensor,target_lengths,db_tensor,bs_tensor):
+ """Given the user sentence, user belief state and database pointer,
+ encode the sentence, decide what policy vector construct and
+ feed it as the first hiddent state to the decoder."""
+ target_length=target_tensor.size(1)
+
+ # for fixed encoding this is zero so it does not contribute
+ batch_size,seq_len=input_tensor.size()
+
+ # ENCODER
+ encoder_outputs,encoder_hidden=self.encoder(input_tensor,input_lengths)
+
+ # POLICY
+ decoder_hidden=self.policy(encoder_hidden,db_tensor,bs_tensor)
+
+ # GENERATOR
+ # Teacher forcing: Feed the target as the next input
+ _,target_len=target_tensor.size()
+ decoder_input=torch.LongTensor([[SOS_token]for_inrange(batch_size)],device=self.device)
+
+ proba=torch.zeros(batch_size,target_length,self.vocab_size)# [B,T,V]
+
+ fortinrange(target_len):
+ decoder_output,decoder_hidden=self.decoder(decoder_input,decoder_hidden,encoder_outputs)
+
+ use_teacher_forcing=Trueifrandom.random()<self.args.teacher_ratioelseFalse
+ ifuse_teacher_forcing:
+ decoder_input=target_tensor[:,t].view(-1,1)# [B,1] Teacher forcing
+ else:
+ # Without teacher forcing: use its own predictions as the next input
+ topv,topi=decoder_output.topk(1)
+ decoder_input=topi.squeeze().detach()# detach from history as input
+
+ proba[:,t,:]=decoder_output
+
+ decoded_sent=None
+
+ returnproba,None,decoded_sent
[docs]defnormalize(text):
+ # lower case every word
+ text=text.lower()
+
+ # replace white spaces in front and end
+ text=re.sub(r'^\s*|\s*$','',text)
+
+ # hotel domain pfb30
+ text=re.sub(r"b&b","bed and breakfast",text)
+ text=re.sub(r"b and b","bed and breakfast",text)
+
+ # normalize phone number
+ ms=re.findall('\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4,5})',text)
+ ifms:
+ sidx=0
+ forminms:
+ sidx=text.find(m[0],sidx)
+ iftext[sidx-1]=='(':
+ sidx-=1
+ eidx=text.find(m[-1],sidx)+len(m[-1])
+ text=text.replace(text[sidx:eidx],''.join(m))
+
+ # normalize postcode
+ ms=re.findall('([a-z]{1}[\. ]?[a-z]{1}[\. ]?\d{1,2}[, ]+\d{1}[\. ]?[a-z]{1}[\. ]?[a-z]{1}|[a-z]{2}\d{2}[a-z]{2})',
+ text)
+ ifms:
+ sidx=0
+ forminms:
+ sidx=text.find(m,sidx)
+ eidx=sidx+len(m)
+ text=text[:sidx]+re.sub('[,\. ]','',m)+text[eidx:]
+
+ # weird unicode bug
+ text=re.sub(u"(\u2018|\u2019)","'",text)
+
+ # replace time and and price
+ text=re.sub(timepat,' [value_time] ',text)
+ text=re.sub(pricepat,' [value_price] ',text)
+ #text = re.sub(pricepat2, '[value_price]', text)
+
+ # replace st.
+ text=text.replace(';',',')
+ text=re.sub('$\/','',text)
+ text=text.replace('/',' and ')
+
+ # replace other special characters
+ text=text.replace('-',' ')
+ text=re.sub('[\":\<>@\(\)]','',text)
+
+ # insert white space before and after tokens:
+ fortokenin['?','.',',','!']:
+ text=insertSpace(token,text)
+
+ # insert white space for 's
+ text=insertSpace('\'s',text)
+
+ # replace it's, does't, you'd ... etc
+ text=re.sub('^\'','',text)
+ text=re.sub('\'$','',text)
+ text=re.sub('\'\s',' ',text)
+ text=re.sub('\s\'',' ',text)
+ forfromx,toxinreplacements:
+ text=' '+text+' '
+ text=text.replace(fromx,tox)[1:-1]
+
+ # remove multiple spaces
+ text=re.sub(' +',' ',text)
+
+ # concatenate numbers
+ tmp=text
+ tokens=text.split()
+ i=1
+ whilei<len(tokens):
+ ifre.match(u'^\d+$',tokens[i])and \
+ re.match(u'\d+$',tokens[i-1]):
+ tokens[i-1]+=tokens[i]
+ deltokens[i]
+ else:
+ i+=1
+ text=' '.join(tokens)
+
+ returntext
+
+
+
[docs]classBLEUScorer(object):
+ ## BLEU score calculator via GentScorer interface
+ ## it calculates the BLEU-4 by taking the entire corpus in
+ ## Calulate based multiple candidates against multiple references
+ def__init__(self):
+ pass
+
+
[docs]defcheck_comp_spec(comp_spec,comp_spec_format):
+ '''Base method to check component spec'''
+ forspec_k,spec_format_vincomp_spec_format.items():
+ comp_spec_v=comp_spec[spec_k]
+ ifps.is_list(spec_format_v):
+ v_set=spec_format_v
+ assertcomp_spec_vinv_set,f'Component spec value {ps.pick(comp_spec, spec_k)} needs to be one of {util.to_json(v_set)}'
+ else:
+ v_type=spec_format_v
+ assertisinstance(comp_spec_v,v_type),f'Component spec {ps.pick(comp_spec, spec_k)} needs to be of type: {v_type}'
+ ifisinstance(v_type,tuple)andintinv_typeandisinstance(comp_spec_v,float):
+ # cast if it can be int
+ comp_spec[spec_k]=int(comp_spec_v)
+
+
+
[docs]defcheck_body_spec(spec):
+ '''Base method to check body spec for multi-agent multi-env'''
+ ae_product=ps.get(spec,'body.product')
+ body_num=ps.get(spec,'body.num')
+ ifae_product=='outer':
+ pass
+ elifae_product=='inner':
+ agent_num=len(spec['agent'])
+ env_num=len(spec['env'])
+ assertagent_num==env_num,'Agent and Env spec length must be equal for body `inner` product. Given {agent_num}, {env_num}'
+ else:# custom
+ assertps.is_list(body_num)
+
+
+
[docs]defcheck_compatibility(spec):
+ '''Check compatibility among spec setups'''
+ # TODO expand to be more comprehensive
+ ifspec['meta'].get('distributed')=='synced':
+ assertps.get(spec,'agent.0.net.gpu')==False,f'Distributed mode "synced" works with CPU only. Set gpu: false.'
[docs]defsave(spec,unit='experiment'):
+ '''Save spec to proper path. Called at Experiment or Trial init.'''
+ prepath=util.get_prepath(spec,unit)
+ util.write(spec,f'{prepath}_spec.json')
+
+
+
[docs]deftick(spec,unit):
+ '''
+ Method to tick lab unit (experiment, trial, session) in meta spec to advance their indices
+ Reset lower lab indices to -1 so that they tick to 0
+ spec_util.tick(spec, 'session')
+ session = Session(spec)
+ '''
+ meta_spec=spec['meta']
+ ifunit=='experiment':
+ meta_spec['experiment_ts']=util.get_ts()
+ meta_spec['experiment']+=1
+ meta_spec['trial']=-1
+ meta_spec['session']=-1
+ elifunit=='trial':
+ ifmeta_spec['experiment']==-1:
+ meta_spec['experiment']+=1
+ meta_spec['trial']+=1
+ meta_spec['session']=-1
+ elifunit=='session':
+ ifmeta_spec['experiment']==-1:
+ meta_spec['experiment']+=1
+ ifmeta_spec['trial']==-1:
+ meta_spec['trial']+=1
+ meta_spec['session']+=1
+ else:
+ raiseValueError(f'Unrecognized lab unit to tick: {unit}')
+ # set prepath since it is determined at this point
+ meta_spec['prepath']=prepath=util.get_prepath(spec,unit)
+ forfolderin('graph','info','log','model'):
+ folder_prepath=util.insert_folder(prepath,folder)
+ os.makedirs(os.path.dirname(util.smart_path(folder_prepath)),exist_ok=True)
+ meta_spec[f'{folder}_prepath']=folder_prepath
+ returnspec
Implementation of single threaded Advantage Actor Critic
+Original paper: “Asynchronous Methods for Deep Reinforcement Learning”
+https://arxiv.org/abs/1602.01783
+Algorithm specific spec param:
+memory.name: batch (through OnPolicyBatchReplay memory class) or episodic through (OnPolicyReplay memory class)
+lam: if not null, used as the lambda value of generalized advantage estimation (GAE) introduced in “High-Dimensional Continuous Control Using Generalized Advantage Estimation https://arxiv.org/abs/1506.02438. This lambda controls the bias variance tradeoff for GAE. Floating point value between 0 and 1. Lower values correspond to more bias, less variance. Higher values to more variance, less bias. Algorithm becomes A2C(GAE).
+num_step_returns: if lam is null and this is not null, specifies the number of steps for N-step returns from “Asynchronous Methods for Deep Reinforcement Learning”. The algorithm becomes A2C(Nstep).
+If both lam and num_step_returns are null, use the default TD error. Then the algorithm stays as AC.
+net.type: whether the actor and critic should share params (e.g. through ‘MLPNetShared’) or have separate params (e.g. through ‘MLPNetSeparate’). If param sharing is used then there is also the option to control the weight given to the policy and value components of the loss function through ‘policy_loss_coef’ and ‘val_loss_coef’
+Algorithm - separate actor and critic:
+
+
+
Repeat:
+
+
Collect k examples
+
Train the critic network using these examples
+
Calculate the advantage of each example using the critic
+
Multiply the advantage by the negative of log probability of the action taken, and sum all the values. This is the policy loss.
+
Calculate the gradient the parameters of the actor network with respect to the policy loss
+
Update the actor network parameters using the gradient
+
+
+
+
+
+
Algorithm - shared parameters:
+
+
Repeat:
+
+
Collect k examples
+
Calculate the target for each example for the critic
+
Compute current estimate of state-value for each example using the critic
+
Calculate the critic loss using a regression loss (e.g. square loss) between the target and estimate of the state-value for each example
+
Calculate the advantage of each example using the rewards and critic
+
Multiply the advantage by the negative of log probability of the action taken, and sum all the values. This is the policy loss.
+
Compute the total loss by summing the value and policy lossses
+
Calculate the gradient of the parameters of shared network with respect to the total loss
+
Update the shared network parameters using the gradient
Initialize the neural networks used to learn the actor and critic from the spec
+Below we automatically select an appropriate net based on two different conditions
+1. If the action space is discrete or continuous action
+
+
+
Networks for continuous action spaces have two heads and return two values, the first is a tensor containing the mean of the action policy, the second is a tensor containing the std deviation of the action policy. The distribution is assumed to be a Gaussian (Normal) distribution.
+
Networks for discrete action spaces have a single head and return the logits for a categorical probability distribution over the discrete actions
+
+
+
+
+
If the actor and critic are separate or share weights
+
+
If the networks share weights then the single network returns a list.
+
Continuous action spaces: The return list contains 3 elements: The first element contains the mean output for the actor (policy), the second element the std dev of the policy, and the third element is the state-value estimated by the network.
+
Discrete action spaces: The return list contains 2 element. The first element is a tensor containing the logits for a categorical probability distribution over the actions. The second element contains the state-value estimated by the network.
+
+
+
+
+
+
If the network type is feedforward, convolutional, or recurrent
+
+
Feedforward and convolutional networks take a single state as input and require an OnPolicyReplay or OnPolicyBatchReplay memory
+
Recurrent networks take n states as input and require env spec “frame_op”: “concat”, “frame_op_len”: seq_len
Abstract class ancestor to all Algorithms,
+specifies the necessary design blueprint for agent to work in Lab.
+Mostly, implement just the abstract methods and properties.
To get the pdparam for action policy sampling, do a forward pass of the appropriate net, and pick the correct outputs.
+The pdparam will be the logits for discrete prob. dist., or the mean and std for continuous prob. dist.
Implementation of the base DQN algorithm.
+The algorithm follows the same general approach as VanillaDQN but is more general since it allows
+for two different networks (through self.net and self.target_net).
+
self.net is used to act, and is the network trained.
+self.target_net is used to estimate the maximum value of the Q-function in the next state when calculating the target (see VanillaDQN comments).
+self.target_net is updated periodically to either match self.net (self.net.update_type = “replace”) or to be a weighted average of self.net and the previous self.target_net (self.net.update_type = “polyak”)
+If desired, self.target_net can be updated slowly, and this can help to stabilize learning.
+
It also allows for different nets to be used to select the action in the next state and to evaluate the value of that action through self.online_net and self.eval_net. This can help reduce the tendency of DQN’s to overestimate the value of the Q-function. Following this approach leads to the DoubleDQN algorithm.
+
Setting all nets to self.net reduces to the VanillaDQN case.
Implementation of a simple DQN algorithm.
+Algorithm:
+
+
+
Collect some examples by acting in the environment and store them in a replay memory
+
Every K steps sample N examples from replay memory
+
+
For each example calculate the target (bootstrapped estimate of the discounted value of the state and action taken), y, using a neural network to approximate the Q function. s’ is the next state following the action actually taken.
+
y_t = r_t + gamma * argmax_a Q(s_t’, a)
+
+
+
+
For each example calculate the current estimate of the discounted value of the state and action taken
+
x_t = Q(s_t, a_t)
+
+
+
Calculate L(x, y) where L is a regression loss (eg. mse)
+
Calculate the gradient of L with respect to all the parameters in the network and update the network parameters using the gradient
Completes one training step for the agent if it is time to train.
+i.e. the environment timestep is greater than the minimum training timestep and a multiple of the training_frequency.
+Each training step consists of sampling n batches from the agent’s memory.
+For each of the batches, the target Q values (q_targets) are computed and a single training step is taken k times
+Otherwise this function does nothing.
Completes one training step for the agent if it is time to train.
+i.e. the environment timestep is greater than the minimum training timestep and a multiple of the training_frequency.
+Each training step consists of sampling n batches from the agent’s memory.
+For each of the batches, the target Q values (q_targets) are computed and a single training step is taken k times
+Otherwise this function does nothing.
Prepare the state and run algorithm.calc_pdparam to get pdparam for action_pd
+@param tensor:state For pdparam = net(state)
+@param algorithm The algorithm containing self.net
+@param body Body which links algorithm to the env which the action is for
+@returns tensor:pdparam
+@example
+
pdparam = calc_pdparam(state, algorithm, body)
+action_pd = ActionPD(logits=pdparam) # e.g. ActionPD is Categorical
+action = action_pd.sample()
Convenience method to sample action(s) from action_pd = ActionPD(pdparam)
+Works with batched pdparam too
+@returns tensor:action Sampled action(s)
+@example
Implementation of PPO
+This is actually just ActorCritic with a custom loss function
+Original paper: “Proximal Policy Optimization Algorithms”
+https://arxiv.org/pdf/1707.06347.pdf
Initialize the neural network used to learn the policy function from the spec
+Below we automatically select an appropriate net for a discrete or continuous action space if the setting is of the form ‘MLPNet’. Otherwise the correct type of network is assumed to be specified in the spec.
+Networks for continuous action spaces have two heads and return two values, the first is a tensor containing the mean of the action policy, the second is a tensor containing the std deviation of the action policy. The distribution is assumed to be a Gaussian (Normal) distribution.
+Networks for discrete action spaces have a single head and return the logits for a categorical probability distribution over the discrete actions
Collect some examples by acting in the environment and store them in an on policy replay memory (either batch or episodic)
+
+
For each example calculate the target (bootstrapped estimate of the discounted value of the state and action taken), y, using a neural network to approximate the Q function. s_t’ is the next state following the action actually taken, a_t. a_t’ is the action actually taken in the next state s_t’.
+
y_t = r_t + gamma * Q(s_t’, a_t’)
+
+
+
+
+
+
For each example calculate the current estimate of the discounted value of the state and action taken
+
x_t = Q(s_t, a_t)
+
+
+
Calculate L(x, y) where L is a regression loss (eg. mse)
+
Calculate the gradient of L with respect to all the parameters in the network and update the network parameters using the gradient
To get the pdparam for action policy sampling, do a forward pass of the appropriate net, and pick the correct outputs.
+The pdparam will be the logits for discrete prob. dist., or the mean and std for continuous prob. dist.
Calculate the SIL policy losses for actor and critic
+sil_policy_loss = -log_prob * max(R - v_pred, 0)
+sil_val_loss = (max(R - v_pred, 0)^2) / 2
+This is called on a randomly-sample batch from experience replay
The algorithm module
+Contains implementations of reinforcement learning algorithms.
+Uses the nets module to build neural networks as the relevant function approximators
Body of an agent inside an environment, it:
+- enables the automatic dimension inference for constructing network input/output
+- acts as reference bridge between agent and environment (useful for multi-agent, multi-env)
+- acts as non-gradient variable storage for monitoring and analysis
Same as OnPolicyReplay Memory with the following difference.
+
The memory does not have a fixed size. Instead the memory stores data from N experiences, where N is determined by the user. After N experiences or if an episode has ended, all of the examples are returned to the agent to learn from.
+
In contrast, OnPolicyReplay stores entire episodes and stores them in a nested structure. OnPolicyBatchReplay stores experiences in a flat structure.
+
e.g. memory_spec
+“memory”: {
+
+
“name”: “OnPolicyBatchReplay”
+
}
+* batch_size is training_frequency provided by algorithm_spec
Returns all the examples from memory in a single batch. Batch is stored as a dict.
+Keys are the names of the different elements of an experience. Values are a list of the corresponding sampled elements
+e.g.
+batch = {
Stores agent experiences and returns them in a batch for agent training.
+
+
An experience consists of
+
+
state: representation of a state
+
action: action taken
+
reward: scalar value
+
next state: representation of next state (should be same as state)
+
done: 0 / 1 representing if the current state is the last in an episode
+
+
+
+
The memory does not have a fixed size. Instead the memory stores data from N episodes, where N is determined by the user. After N episodes, all of the examples are returned to the agent to learn from.
+
When the examples are returned to the agent, the memory is cleared to prevent the agent from learning from off policy experiences. This memory is intended for on policy algorithms.
+
+
Differences vs. Replay memory:
+
+
Experiences are nested into episodes. In Replay experiences are flat, and episode is not tracked
+
The entire memory constitues a batch. In Replay batches are sampled from memory.
+
The memory is cleared automatically when a batch is given to the agent.
Returns all the examples from memory in a single batch. Batch is stored as a dict.
+Keys are the names of the different elements of an experience. Values are nested lists of the corresponding sampled elements. Elements are nested into episodes
+e.g.
+batch = {
Implementation follows the approach in the paper “Prioritized Experience Replay”, Schaul et al 2015” https://arxiv.org/pdf/1511.05952.pdf and is Jaromír Janisch’s with minor adaptations.
+See memory_util.py for the license and link to Jaromír’s excellent blog
+
Stores agent experiences and samples from them for agent training according to each experience’s priority
+
The memory has the same behaviour and storage structure as Replay memory with the addition of a SumTree to store and sample the priorities.
Implementation for update() to add experience to memory, expanding the memory size if necessary.
+All experiences are added with a high priority to increase the likelihood that they are sampled at least once.
This implementation is, with minor adaptations, Jaromír Janisch’s. The license is reproduced below.
+For more information see his excellent blog series “Let’s make a DQN” https://jaromiru.com/2016/09/27/lets-make-a-dqn-theory/
+
MIT License
+
Copyright (c) 2018 Jaromír Janisch
+
Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the “Software”), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
Returns a batch of batch_size samples. Batch is stored as a dict.
+Keys are the names of the different elements of an experience. Values are an array of the corresponding sampled elements
+e.g.
+batch = {
Class for generating arbitrary sized convolutional neural network,
+with optional batch normalization, and with dueling heads. Intended for Q-Learning algorithms only.
+Implementation based on “Dueling Network Architectures for Deep Reinforcement Learning” http://proceedings.mlr.press/v48/wangf16.pdf
+
Assumes that a single input example is organized into a 3D tensor.
+The entire model consists of three parts:
Class for generating arbitrary sized feedforward neural network, with dueling heads. Intended for Q-Learning algorithms only.
+Implementation based on “Dueling Network Architectures for Deep Reinforcement Learning” http://proceedings.mlr.press/v48/wangf16.pdf
Class for generating arbitrary sized feedforward neural network
+If more than 1 output tensors, will create a self.model_tails instead of making last layer part of self.model
Decorator to check if net.train_step actually updates the network weights properly
+Triggers only if to_check_train_step is True (dev/test mode)
+@example
Initialize global_nets for Hogwild using an identical instance of an algorithm from an isolated Session
+in spec.meta.distributed, specify either:
+- ‘shared’: global network parameter is shared all the time. In this mode, algorithm local network will be replaced directly by global_net via overriding by identify attribute name
+- ‘synced’: global network parameter is periodically synced to local network after each gradient push. In this mode, algorithm will keep a separate reference to global_{net} for each of its network
Takes unfilled inform slots and current_slots, returns dictionary of filled informed slots (with values)
+
Arguments:
+inform_slots_to_be_filled – Something that looks like {starttime:None, theater:None} where starttime and theater are slots that the agent needs filled
+current_slots – Contains a record of all filled slots in the conversation so far - for now, just use current_slots[‘inform_slots’] which is a dictionary of the already filled-in slots
+
Returns:
+filled_in_slots – A dictionary of form {slot1:value1, slot2:value2} for each sloti in inform_slots_to_be_filled
For standardization, use gym spaces to represent observation and action spaces.
+This method iterates through the multiple brains (multiagent) then constructs and returns lists of observation_spaces and action_spaces
Initialize a new episode (dialog)
+state[‘history_slots’]: keeps all the informed_slots
+state[‘rest_slots’]: keep all the slots (which is still in the stack yet)
For standardization, use gym spaces to represent observation and action spaces.
+This method iterates through the multiple brains (multiagent) then constructs and returns lists of observation_spaces and action_spaces
The environment module
+Contains graduated components from experiments for building/using environment.
+Provides the rich experience for agent embodiment, reflects the curriculum and allows teaching (possibly allows teacher to enter).
+To be designed by human and evolution module, based on the curriculum and fitness metrics.
Subspace of AEBSpace, collection of all envs, with interface to Session logic; same methods as singleton envs.
+Access AgentSpace properties by: AgentSpace - AEBSpace - EnvSpace - Envs
Calculate the session metrics: strength, efficiency, stability
+@param DataFrame:session_df Dataframe containing reward, frame, opt_step
+@param str:env_name Name of the environment to get its random baseline
+@param str:info_prepath Optional info_prepath to auto-save the output to
+@param str:df_mode Optional df_mode to save with info_prepath
+@returns dict:metrics Consists of scalar metrics and series local metrics
Calculate the trial metrics: mean(strength), mean(efficiency), mean(stability), consistency
+@param list:session_metrics_list The metrics collected from each session; format: {session_index: {‘scalar’: {…}, ‘local’: {…}}}
+@param str:info_prepath Optional info_prepath to auto-save the output to
+@returns dict:metrics Consists of scalar metrics and series local metrics
This task consists of an MTurk agent evaluating a chit-chat model. They
+are asked to chat to the model adopting a specific persona. After their
+conversation, they are asked to evaluate their partner on several metrics.
+convlab.human_eval.task_config.task_config = {'hit_description': 'You will chat to a tour information bot and then evaluate that bot.', 'hit_keywords': 'chat,dialog', 'hit_title': 'Chat and evaluate bot!', 'task_description': '\n (You can keep accepting new HITs after you finish your current one, so keep working on it if you like the task!)\n <br>\n <b>In this task you will chat with an information desk clerk bot to plan your tour according to a given goal.</b>\n <br>\n For example, your given goal and expected conversation could be: <br><br> \n <table border="1" cellpadding="10">\n <tr><th>Your goal</th><th>Expected conversation</th></tr>\n <tr><td>\n <ul>\n <li>You are looking for a <b>place to stay</b>. The hotel should be in the <b>cheap</b> price range and should be in the type of <b>hotel</b></li>\n <li>The hotel should include <b>free parking</b> and should include <b>free wifi</b></li>\n <li>Once you find the hotel, you want to book it for <b>6</b> people and <b>3</b> nights</b> starting from <b>tuesday</b></li>\n <li>If the booking fails how about <b>2</b> nights</li>\n <li>Make sure you get the <b>reference number</b></li>\n </ul>\n </td>\n <td>\n <b>You: </b>I am looking for a place to to stay that has cheap price range it should be in a type of hotel<br>\n <b>Info desk: </b>Okay, do you have a specific area you want to stay in?<br>\n <b>You: </b>no, i just need to make sure it\'s cheap. oh, and i need parking<br>\n <b>Info desk: </b>I found 1 cheap hotel for you that includes parking. Do you like me to book it?<br>\n <b>You: </b>Yes, please. 6 people 3 nights starting on tuesday.<br>\n <b>Info desk: </b>I am sorry but I wasn\'t able to book that for you for Tuesday. Is there another day you would like to stay or perhaps a shorter stay?<br>\n <b>You: </b>how about only 2 nights.<br>\n <b>Info desk: </b>Booking was successful.\nReference number is : 7GAWK763. Anything else I can do for you?<br>\n <b>You: </b>No, that will be all. Good bye.<br>\n <b>Info desk: </b>Thank you for using our services.<br>\n </td>\n </table>\n <br><br>\n Chat with the bot naturally and stick to your own goal but <b>do not trivially copy the goal descriptions into the message.</b>\n <br>\n Once the conversation is done, you will be asked to rate the bot on metrics like <b>goal accomplishment, language understanding, and response naturalness</b>.\n <br>\n There is a <b>2 min</b> time limit for each turn.\n <br>\n <br>\n - Do not reference the task or MTurk itself during the conversation.\n <br>\n <b><span style="color:red">- No racism, sexism or otherwise offensive comments, or the submission will be rejected and we will report to Amazon.</b></span>\n <br>\n <br>\n '}¶
+
A short and descriptive title about the kind of task the HIT contains.
+On the Amazon Mechanical Turk web site, the HIT title appears in search results,
+and everywhere the HIT is mentioned.
Special distribution class for argmax sampling, where probability is always 1 for the argmax.
+NOTE although argmax is not a sampling distribution, this implementation is for API consistency.
Returns tensor containing all values supported by a discrete
+distribution. The result will enumerate over dimension 0, so the shape
+of the result will be (cardinality,) + batch_shape + event_shape
+(where event_shape = () for univariate distributions).
+
Note that this enumerates over all batched tensors in lock-step
+[[0, 0], [1, 1], …]. With expand=False, enumeration happens
+along dim 0, but with the remaining batch dimensions being
+singleton dimensions, [[0], [1], ...
+
To iterate over the full Cartesian product use
+itertools.product(m.enumerate_support()).
+
+
+
+
+
Parameters:
expand (bool) – whether to expand the support over the
+batch dims to match the distribution’s batch_shape.
Calculate GAE from Schulman et al. https://arxiv.org/pdf/1506.02438.pdf
+v_preds are values predicted for current states, with one last element as the final next_state
+delta is defined as r + gamma * V(s’) - V(s) in eqn 10
+GAE is defined in eqn 16
+This method computes in torch tensor to prevent unnecessary moves between devices (e.g. GPU tensor to CPU numpy)
+NOTE any standardization is done outside of this method
Linearly decaying sinusoid that decays in roughly 10 iterations until explore_anneal_epi
+Plot the equation below to see the pattern
+suppose sinusoidal decay, start_val = 1, end_val = 0.2, stop after 60 unscaled x steps
+then we get 0.2+0.5*(1-0.2)(1 + cos x)*(1-x/60)
Unpack a sampled vec env batch tensor
+e.g. for a state with original shape (4, ), vec env should return vec state with shape (num_envs, 4) to store in memory
+When sampled with batch_size b, we should get shape (b, num_envs, 4). But we need to unpack the num_envs dimension to get (b * num_envs, 4) for passing to a network. This method does that.
Calculate the time from tss ts1 to ts2
+@param {str} ts2 Later ts in the FILE_TS_FORMAT
+@param {str} ts1 Earlier ts in the FILE_TS_FORMAT
+@returns {str} delta_t in %H:%M:%S format
+@example
Concat batch objects from body.memory.sample() into one batch, when all bodies experience similar envs
+Also concat any nested epi sub-batches into flat batch
+{k: arr1} + {k: arr2} = {k: arr1 + arr2}
General method to check if episode is done for both single and vectorized env
+Only return True for singleton done since vectorized env does not have a natural episode boundary
Parallelize a method fn, args and return results with order preserved per args.
+args should be a list of tuples.
+@returns {list} results Order preserved output from fn.
Split prepath into useful names. Works with predir (prename will be None)
+prepath: output/dqn_pong_2018_12_02_082510/dqn_pong_t0_s0
+predir: output/dqn_pong_2018_12_02_082510
+prefolder: dqn_pong_2018_12_02_082510
+prename: dqn_pong_t0_s0
+spec_name: dqn_pong
+experiment_ts: 2018_12_02_082510
+ckpt: ckpt-best of dqn_pong_t0_s0_ckpt-best if available
Given a prepath, read the correct spec recover the meta_spec that will return the same prepath for eval lab modes
+example: output/a2c_cartpole_2018_06_13_220436/a2c_cartpole_t0_s0
Universal data reading method with smart data parsing
+- {.csv} to DataFrame
+- {.json} to dict, list
+- {.yml} to dict
+- {*} to str
+@param {str} data_path The data path to read from
+@returns {data} The read data in sensible format
+@example
Use trial and session id to hash and modulo cuda device count for a cuda_id to maximize device usage. Sets the net_spec for the base Net class to pick up.
Resolve data_path into abspath with fallback to join from ROOT_DIR
+@param {str} data_path The input data path to resolve
+@param {bool} as_dir Whether to return as dirname
+@returns {str} The normalized absolute data_path
+@example
Universal data writing method with smart data parsing
+- {.csv} from DataFrame
+- {.json} from dict, list
+- {.yml} from dict
+- {*} from str(*)
+@param {*} data The data to write
+@param {str} data_path The data path to write to
+@returns {data_path} The data path written to
+@example