forked from XHN-1/Cassie_mujoco_RL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path5k_test.py
391 lines (347 loc) · 16.7 KB
/
5k_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
from tools.test_commands import *
from tools.eval_perturb import *
from tools.eval_mission import *
from tools.compare_pols import *
from tools.eval_sensitivity import *
from collections import OrderedDict
from util import env_factory
from cassie.cassiemujoco import CassieSim
import torch
import pickle
import os, sys, argparse
import numpy as np
import copy, time, psutil
import ray
import fpdf
@ray.remote
class test_worker(object):
def __init__ (self, id_num, env_fn, policy, mission_data):
self.id_num = id_num
self.cassie_env = env_fn()
self.policy = copy.deepcopy(policy)
self.mission_data = mission_data # Dictionary containing all mission data to be tested across all workers
torch.set_num_threads(1)
def test_5k(self, mission, mission_speed, terrain, friction, foot_mass):
if ".npy" in terrain:
self.cassie_env.sim = CassieSim("./cassie/cassiemujoco/cassie_hfield.xml", reinit=True)
hfield_data = np.load(os.path.join("./cassie/cassiemujoco/terrains/", terrain))
self.cassie_env.sim.set_hfield_data(hfield_data.flatten())
else:
self.cassie_env.sim = CassieSim("./cassie/cassiemujoco/cassie.xml", reinit=True)
if not (".xml" in terrain): # If not xml file, assume specify direction and angle for tilt
direct, angle = terrain.split("_")
if direct == "left":
floor_quat = euler2quat(z=0, x=np.deg2rad(angle), y=0)
elif direct == "right":
floor_quat = euler2quat(z=0, x=np.deg2rad(-angle), y=0)
elif direct == "up":
floor_quat = euler2quat(z=0, x=0, y=np.deg2rad(-angle))
elif direct == "right":
floor_quat = euler2quat(z=0, x=0, y=np.deg2rad(angle))
else:
print("Error: Terrain type not understood")
return 1
self.cassie_env.sim.set_geom_quat(floor_quat, name="floor")
self.cassie_env.sim.set_geom_friction(friction, "floor")
self.cassie_env.sim.set_body_mass(foot_mass, "right-foot")
self.cassie_env.sim.set_body_mass(foot_mass, "left-foot")
# Load in mission
# mission_path = os.path.join(mission, "command_trajectory_{}.pkl".format(mission_speed))
# print("mission", mission)
# print(mission_path)
# with open(os.path.join("./cassie/missions/"+mission, "command_trajectory_{}.pkl".format(mission_speed)), 'rb') as mission_file:
# mission_commands = pickle.load(mission_file)
mission_commands = self.mission_data[mission+str(mission_speed)]
mission_len = len(mission_commands['speed'])
speeds = mission_commands['speed']
orients = mission_commands['orient']
state = self.cassie_env.reset_for_test()
for i in range(mission_len):
self.cassie_env.update_speed(speeds[i])
self.cassie_env.orient_add = orients[i]
with torch.no_grad():
action = self.policy.forward(torch.Tensor(state), deterministic=True).detach().numpy()
state = self.cassie_env.step_basic(action)
if self.cassie_env.sim.qpos()[2] < 0.4: # Failed, done testing
# print("eval time: ", time.time()-start_t)
return self.id_num, False, mission, mission_speed, terrain, friction, foot_mass
# print("eval time: ", time.time()-start_t)
return self.id_num, True, mission, mission_speed, terrain, friction, foot_mass
# Visualizes a 5k test using the inputted env and policy for the given mission, terrain (xml model file)
# ground friction (3-long array), and foot mass (float)
def vis_5k_test(cassie_env, policy, mission, terrain, friction, foot_mass):
# Reload CassieSim object for new terrain
cassie_env.sim = CassieSim(terrain, reinit=True)
# Load in mission
with open(mission, 'rb') as mission_file:
mission_commands = pickle.load(mission_file)
mission_len = len(mission_commands['speed'])
speeds = mission_commands['speed']
orients = mission_commands['orient']
state = cassie_env.reset_for_test()
render_state = cassie_env.render()
command_ind = 0
while render_state and command_ind < mission_len:
start = time.time()
if (not cassie_env.vis.ispaused()):
cassie_env.speed = speeds[command_ind]
cassie_env.orient_add = orients[command_ind]
action = policy.forward(torch.Tensor(state), deterministic=True).detach().numpy()
state, reward, done, _ = cassie_env.step(action)
command_ind += 1
render_state = cassie_env.render()
end = time.time()
delaytime = max(0, 1000 / 30000 - (end-start))
time.sleep(delaytime)
# Runs a 5k test using the inputted env and policy for the given mission, terrain (xml model file)
# ground friction (3-long array), and foot mass (float)
def sim_5k_test(cassie_env, policy, mission, mission_speed, terrain, friction, foot_mass):
start_t = time.time()
# Reload CassieSim object for new terrain
cassie_env.sim = CassieSim(terrain, reinit=True)
# Load in mission
# with open(mission, 'rb') as mission_file:
# mission_commands = pickle.load(mission_file)
mission_commands = mission_dict[mission+str(mission_speed)]
mission_len = len(mission_commands['speed'])
print(mission_len)
speeds = mission_commands['speed']
orients = mission_commands['orient']
state = cassie_env.reset_for_test()
for i in range(mission_len):
cassie_env.speed = speeds[i]
cassie_env.orient_add = orients[i]
with torch.no_grad():
action = policy.forward(torch.Tensor(state), deterministic=True).detach().numpy()
state = cassie_env.step_basic(action)
if cassie_env.sim.qpos()[2] < 0.4: # Failed, reset and record force
print("eval time: ", time.time()-start_t)
return False
print("eval time: ", time.time()-start_t)
return True
def calc_stats(pass_data, terrain_data, mission_data, mission_speed_data, friction_data, mass_data):
test_len = len(pass_data)
pass_data = np.array(pass_data)
friction_data = np.array(friction_data)
avg_pass = np.sum(pass_data)/test_len
# Terrain breakdown
terrain_names = set(terrain_data)
terrain_dict = {}
for terrain in terrain_names:
terr_inds = [i for i, x in enumerate(terrain_data) if x == terrain]
rel_pass = np.sum(pass_data[terr_inds]) / len(terr_inds)
terrain_dict[os.path.basename(terrain)] = rel_pass
# Mission breakdown
# Compose mission with each speed, i.e. treat mission with a single speed as a single separate mission
# NOTE: Assumes that EVERY mission is tested at EVERY speed. This is method is also probably pretty
# inefficient, but fine for now
mission_names = set(mission_data)
speeds = set(mission_speed_data)
# Compute ind list for every speed
speed_inds = {}
for speed in speeds:
curr_inds = [i for i, x in enumerate(mission_speed_data) if x == speed]
speed_inds[speed] = curr_inds
mission_dict = {}
for mission in mission_names:
mission_inds = [i for i, x in enumerate(mission_data) if x == mission]
miss_ind_set = set(mission_inds)
for speed in speeds:
speed_ind_set = set(speed_inds[speed])
inter_inds = miss_ind_set.intersection(speed_ind_set)
rel_pass = np.sum(pass_data[list(inter_inds)]) / len(inter_inds)
mission_name = "{} {}".format(mission, speed)
mission_dict[mission_name] = rel_pass
# Friction breakdown
frictions = np.unique(friction_data, axis=0)
fric_dict = {}
for fric in frictions:
fric_inds = [i for i, x in enumerate(friction_data) if np.all(x == fric)]
rel_pass = np.sum(pass_data[fric_inds]) / len(fric_inds)
fric_dict[np.array2string(fric)] = rel_pass
# Terrain breakdown
masses = set(mass_data)
mass_dict = {}
for mass in masses:
mass_inds = [i for i, x in enumerate(mass_data) if x == mass]
rel_pass = np.sum(pass_data[mass_inds]) / len(mass_inds)
mass_dict[str(round(mass, 6))] = rel_pass
return avg_pass, terrain_dict, mission_dict, fric_dict, mass_dict
def report_stats(path):
filepath = os.path.join(path, "5k_test.pkl")
with open(filepath, "rb") as datafile:
# pass_data, terrain_data, mission_data, friction_data, mass_data = pickle.load(datafile)
data = pickle.load(datafile)
# print(data)
avg_pass, terrain_dict, mission_dict, fric_dict, mass_dict = calc_stats(*data)
# Initial PDF setup
pdf = fpdf.FPDF(format='letter', unit='in')
pdf.add_page()
pdf.set_font('Times', '', 12.0)
# Effective page width, or just epw
epw = pdf.w - 2*pdf.l_margin
th = pdf.font_size
# Set title
pdf.set_font('Times', '', 18.0)
polname = os.path.basename(path)
pdf.cell(epw, 2*th, "5K Test Report".format(polname), 0, 1, "C")
pdf.ln(2*th)
pdf.set_font('Times', '', 12.0)
pdf.cell(epw, 2*th, "Policy: {}".format(polname), 0, 1)
pdf.ln(2*th)
pdf.cell(epw, 2*th, "Total Pass Rate: {}".format(avg_pass), 0, 1)
pdf.ln(2*th)
# Terrain breakdown
pdf.cell(epw, 2*th, "Terrain Breakdown", 0, 1)
pdf.ln(th)
print_table(pdf, terrain_dict, "Terrain")
pdf.ln(2*th)
# Mission breakdown
pdf.cell(epw, 2*th, "Mission Breakdown", 0, 1)
pdf.ln(th)
print_table(pdf, mission_dict, "Mission")
pdf.ln(2*th)
# Friction breakdown
pdf.cell(epw, 2*th, "Friction Breakdown", 0, 1)
pdf.ln(th)
print_table(pdf, fric_dict, "Friction")
pdf.ln(2*th)
# Mission breakdown
pdf.cell(epw, 2*th, "Foot Mass Breakdown", 0, 1)
pdf.ln(th)
print_table(pdf, mass_dict, "Foot Mass")
pdf.ln(2*th)
pdf.output(os.path.join(path, "5k_test.pdf"))
# Print table for the inputted data dictionary. Gives the neccessary width for the strings in the
# dict's keys, and gives rest of width the to values (rel pass rates)
def print_table(pdf, data_dict, title):
epw = pdf.w - 2*pdf.l_margin
th = pdf.font_size
# print(data_dict.keys())
# print(max(data_dict.keys(), key=len))
name_width = map(pdf.get_string_width, data_dict.keys())
col1_width = max(name_width) + .2
col2_width = epw - col1_width
start_x = pdf.get_x()
start_y = pdf.get_y()
pdf.cell(col1_width, 2*th, title, border=1, align="C")
pdf.cell(col2_width, 2*th, "Relative Pass Rate", border=1, align="C")
pdf.ln(2*th)
for key in data_dict.keys():
pdf.cell(col1_width, 2*th, key, border=1, align="C")
pdf.cell(col2_width, 2*th, str(data_dict[key]), border=1, align="C")
pdf.ln(2*th)
# Get policy to test from args, load policy and env
parser = argparse.ArgumentParser()
# General args
parser.add_argument("--path", type=str, default="./trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2", help="path to folder containing policy and run details")
parser.add_argument("--n_procs", type=int, default=4, help="Number of procs to use for multi-processing")
parser.add_argument("--lite", dest='full', default=True, action="store_false", help="run the lite test instead of full test")
parser.add_argument("--eval", default=True, action="store_false", help="Whether to call policy.eval() or not")
parser.add_argument("--vis", default=False, action="store_true", help="Whether to visualize test or not")
parser.add_argument("--report", default=False, action="store_true", help="Whether to report stats or not")
args = parser.parse_args()
run_args = pickle.load(open(os.path.join(args.path, "experiment.pkl"), "rb"))
# Make mirror False so that env_factory returns a regular wrap env function and not a symmetric env function that can be called to return
# a cassie environment (symmetric env cannot be called to make another env)
if hasattr(run_args, 'simrate'):
env_fn = env_factory(run_args.env_name, traj=run_args.traj, simrate=run_args.simrate, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random,
mirror=False, clock_based=run_args.clock_based, reward=run_args.reward, history=run_args.history)
else:
env_fn = env_factory(run_args.env_name, traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random,
mirror=False, clock_based=run_args.clock_based, reward=run_args.reward, history=run_args.history)
cassie_env = env_fn()
policy = torch.load(os.path.join(args.path, "actor.pt"))
if args.eval:
policy.eval()
if hasattr(policy, 'init_hidden_state'):
policy.init_hidden_state()
num_procs = args.n_procs
print("num cpus:", psutil.cpu_count())
torch.set_num_threads(1)
model_dir = "./cassie/cassiemujoco"
mission_dir = "./cassie/missions/"
default_fric = np.array([1, 5e-3, 1e-4])
default_mass = .1498
if args.full:
print("Running full test")
# Run all terrains and missions
terrains = ["cassie.xml", "noise1.npy", "noise2.npy", "noise3.npy", "rand_hill1.npy", "rand_hill2.npy", "rand_hill3.npy",
"left_3", "right_3", "up_3", "down_3"]
missions = ["curvy", "straight", "90_left", "90_right"]
mission_speeds = [0.5, 0.9, 1.4, 1.9, 2.3, 2.8]
frictions = np.linspace(.8*default_fric, default_fric, 10)
frictions = np.concatenate((frictions, np.linspace(default_fric, 1.2*default_fric, 10)[1:]), axis=0)
masses = np.linspace(.8*default_mass, default_mass, 10)
masses = np.append(masses, np.linspace(default_mass, default_mass*1.2, 10)[1:])
else:
print("Running lite test")
# Only run flat, noisy, and hill terrain with straight and curvy missions
terrains = ["cassie.xml", "noise1.npy", "rand_hill1.npy"]
missions = ["curvy", "straight"]
mission_speeds = [0.5, 0.9, 1.4, 1.9, 2.8]
frictions = [default_fric]
masses = [default_mass]
# Load missions
mission_dict = {}
for mission in missions:
for speed in mission_speeds:
with open(os.path.join(mission_dir, mission+"/command_trajectory_{}.pkl".format(speed)), 'rb') as mission_file:
mission_dict[mission+str(speed)] = pickle.load(mission_file)
# Make list of test args
test_args = [(mission, mission_speed, terrain, friction, mass) \
for terrain in terrains for mission in missions for mission_speed in mission_speeds for friction in frictions for mass in masses]
# test_args = test_args[0:4] # For debugging. Makes n_procs > 4 fail obbiously
# If visualizing, only use 1 process, don't start any workers
if args.vis:
for arg in test_args:
print("Testing ", arg)
vis_5k_test(cassie_env, policy, *arg)
else:
# Make and start all workers
print("Using {} processes".format(num_procs))
ray.shutdown()
ray.init(num_cpus=num_procs)
workers = [test_worker.remote(i, env_fn, policy, mission_dict) for i in range(num_procs)]
print("made workers")
result_ids = [workers[i].test_5k.remote(*test_args[i]) for i in range(num_procs)]
print("started workers")
curr_arg_ind = num_procs
# num_args = len(terrains)*len(missions)*len(mission_speeds)*len(frictions)*len(masses)
num_args = len(test_args)
pass_data = [0]*num_args
terrain_data = [0]*num_args
mission_data = [0]*num_args
mission_speed_data = [0]*num_args
friction_data = [0]*num_args
mass_data = [0]*num_args
arg_count = 0
sys.stdout.write("Finished {} out of {} tests".format(arg_count, num_args))
sys.stdout.flush()
start_t = time.time()
while result_ids:
done_id = ray.wait(result_ids, num_returns=1, timeout=None)[0][0]
worker_id, success, mission, mission_speed, terrain, friction, mass = ray.get(done_id)
pass_data[arg_count] = success
terrain_data[arg_count] = terrain
mission_data[arg_count] = mission
mission_speed_data[arg_count] = mission_speed
friction_data[arg_count] = friction
mass_data[arg_count] = mass
result_ids.remove(done_id)
if curr_arg_ind < num_args:
result_ids.append(workers[worker_id].test_5k.remote(*test_args[curr_arg_ind]))
curr_arg_ind += 1
arg_count += 1
elapsed_time = time.time() - start_t
time_left = elapsed_time/arg_count * (num_args-arg_count)
sys.stdout.write("\rFinished {} out of {} tests. {:.1f}s elapsed, {:.1f}s left".format(arg_count, num_args, elapsed_time, time_left))
sys.stdout.flush()
# TODO: Add progress bar and estimated time left
print()
print("Total time: ", time.time() - start_t)
if not args.vis:
ray.shutdown()
with open(os.path.join(args.path, "5k_test.pkl"), 'wb') as savefile:
pickle.dump([pass_data, terrain_data, mission_data, mission_speed_data, friction_data, mass_data], savefile)
report_stats(args.path)