-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
54 lines (47 loc) · 1.69 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import gym
from docking import Docking
import numpy as np
from stable_baselines import PPO2
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
config = {
"t_step": 0.01, # length of timestep
"env_size": 100, # environment will be env_size x env_size meters(?)
"desired_vel": 1, # desired cruise velocity in m/s
# reward function parameters
"r_prop_change":-0.05, # penalize large changes in propeller speed
"r_rudder_change":-0.05, # penalize large changes in rudder angle
"r_vel_error":-0.05, # penalize velocity error
"r_track_error":-0.05, # penalize cross track error
"r_heading_error":-0.05, # penalize heading error
# scaling parameters (observation values will be between -max and max)
"max_prop_vel":200, # rev/s (?)
"max_prop_vel_change":100, # rev/s (?)
"max_rudder_angle":35*(np.pi/180), # radians (?)
"max_rudder_angle_change":np.pi/10, # radians(?)
"max_velocity":20, #m/s
"max_yaw":np.pi, # radians
"max_yaw_rate":np.pi/8, # rad/s
"max_velocity_error":2, # m/sec
"max_track_error":25, # m
"max_heading_error":np.pi, # radians
"max_wave_amp":2, # m
"max_wave_direction":np.pi #radians
}
hyperparams = {
'n_steps': 6144,
'nminibatches': 1024,
'learning_rate': 5e-5,
'lam': 0.95,
'gamma': 0.999,
'noptepochs': 4,
'cliprange': 0.2,
'ent_coef': 0.01,
'verbose': 2
}
if __name__ == '__main__':
env = DummyVecEnv([lambda: Docking(config)])
model = PPO2(MlpPolicy, env, **hyperparams)
time_steps = int(1e5)
model.learn(total_timesteps = time_steps)
model.save("./model.pkl")