forked from caomw/icra2017-visual-navigation-1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
evaluate.py
125 lines (99 loc) · 4.3 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import ai2thor.controller
from agent_RL_controller import RLController
import tensorflow as tf
# NOTE: necessary for only using v1 code
import tensorflow.compat.v1 as tf
import numpy as np
import random
import sys
import json
from network import ActorCriticFFNetwork
from training_thread import A3CTrainingThread
from utils.ops import sample_action
from utils.tools import SimpleImageViewer
from constants import ACTION_SIZE
from constants import CHECKPOINT_DIR
from constants import NUM_EVAL_EPISODES
from constants import VERBOSE
from constants import TASK_TYPE
from constants import TASK_LIST
GOAL_FILE = "data/FP402_goal_towel.json"
GOAL_POS = None
def main():
# disable all v2 behavior
tf.disable_v2_behavior()
tf.disable_eager_execution()
device = "/cpu:0" # use CPU for display tool
network_scope = TASK_TYPE # Always 'navigation'
list_of_tasks = TASK_LIST
scene_scopes = list_of_tasks.keys()
global_network = ActorCriticFFNetwork(action_size=ACTION_SIZE,
device=device,
network_scope=network_scope,
scene_scopes=scene_scopes)
sess = tf.Session()
# sess = tf.coSession()
init = tf.global_variables_initializer()
sess.run(init)
saver = tf.train.Saver()
checkpoint = tf.train.get_checkpoint_state(CHECKPOINT_DIR)
# see if we saved a checkpoint from past training?
if checkpoint and checkpoint.model_checkpoint_path:
saver.restore(sess, checkpoint.model_checkpoint_path)
print("checkpoint loaded: {}".format(checkpoint.model_checkpoint_path))
else:
print("Could not find old checkpoint")
scene_stats = dict()
for scene_scope in scene_scopes:
# TODO: remove
scene_scope = "FloorPlan402"
scene_stats[scene_scope] = []
for task_scope in list_of_tasks[scene_scope]:
# tasks are positions!!!
# env = ai2thor.controller.Controller(scene="FloorPlan227", gridSize=0.25, width=1000, height=1000)
with open(GOAL_FILE, 'r') as f:
GOAL_DATA = json.load(f)
GOAL_POS = GOAL_DATA["agent_position"]
env = RLController({
'scene': scene_scope,
'terminal_state_id': int(task_scope),
'goal_pos': GOAL_POS,
'goal_image_fpath': "data/FP402_goal_towel.png"
})
env.docker_enabled = True
ep_rewards = []
ep_lengths = []
ep_collisions = []
scopes = [network_scope, scene_scope]
for i_episode in range(NUM_EVAL_EPISODES):
env.reset()
terminal = False
ep_reward = 0
ep_collision = 0
ep_t = 0
while not terminal:
# mirrors actions taken in paper
# NOTE: rearranged these to mirror code in scene_loader
list_of_actions = ["MoveAhead", "RotateRight", "RotateLeft", "MoveBack"]
pi_values = global_network.run_policy(sess, env.curr_state, env.target, scopes)
# action returned is an integer -- critical that the list_of_actions is in correct order
action = sample_action(pi_values)
print("Ep_t: {} \n\tCollided?: {} \n\tAction: {} \n\tValue: {} \n\tAll Action Values: {}".format(ep_t, env.collided, list_of_actions[action], pi_values[action], pi_values))
env.step(list_of_actions[action])
env.update()
terminal = env.terminal
if ep_t == 10000: break
if env.collided: ep_collision += 1
ep_reward += env.reward
ep_t += 1
ep_lengths.append(ep_t)
ep_rewards.append(ep_reward)
ep_collisions.append(ep_collision)
print('evaluation: %s %s' % (scene_scope, task_scope))
print('mean episode reward: %.2f' % np.mean(ep_rewards))
print('mean episode length: %.2f' % np.mean(ep_lengths))
print('mean episode collision: %.2f' % np.mean(ep_collisions))
if __name__ == "__main__":
main()