-
Notifications
You must be signed in to change notification settings - Fork 3
/
evaluation.py
57 lines (44 loc) · 1.95 KB
/
evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import time
import numpy as np
import torch
from a2c_ppo_acktr import utils
from a2c_ppo_acktr.envs import make_vec_envs
def evaluate(actor_critic, ob_rms, env_name, seed, num_processes, eval_log_dir,
device, num_runs=100, render=False,
width=84, height=84):
eval_envs = make_vec_envs(env_name, seed + num_processes, num_processes,
None, eval_log_dir, device, True,
width=width, height=height)
vec_norm = utils.get_vec_normalize(eval_envs)
if vec_norm is not None:
vec_norm.eval()
vec_norm.ob_rms = ob_rms
eval_episode_rewards = []
obs = eval_envs.reset()
eval_recurrent_hidden_states = torch.zeros(
num_processes, actor_critic.recurrent_hidden_state_size, device=device)
eval_masks = torch.zeros(num_processes, 1, device=device)
while len(eval_episode_rewards) < num_runs:
with torch.no_grad():
_, action, _, eval_recurrent_hidden_states = actor_critic.act(
obs,
eval_recurrent_hidden_states,
eval_masks,
deterministic=True)
eval_recurrent_hidden_states, meta = eval_recurrent_hidden_states
if render:
eval_envs.render()
time.sleep(1.0/60)
# Obser reward and next obs
obs, _, done, infos = eval_envs.step(action)
eval_masks = torch.tensor(
[[0.0] if done_ else [1.0] for done_ in done],
dtype=torch.float32,
device=device)
for info in infos:
if 'episode' in info.keys():
eval_episode_rewards.append(info['episode']['r'])
eval_envs.close()
print(" Evaluation using {} episodes: mean reward {:.5f}, max reward {:.5f}\n".format(
len(eval_episode_rewards), np.mean(eval_episode_rewards), np.max(eval_episode_rewards)))
return np.mean(eval_episode_rewards), np.max(eval_episode_rewards), eval_episode_rewards