-
Notifications
You must be signed in to change notification settings - Fork 1
/
moreinforce_split.py
55 lines (44 loc) · 1.3 KB
/
moreinforce_split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import torch
import torch.nn as nn
import torch.nn.functional as F
class Actor(nn.Module):
def __init__(self, tunnel):
super(Actor, self).__init__()
self.tunnel = tunnel
self.out = nn.Sequential(
nn.Linear(2*self.tunnel+6+2,2*self.tunnel),
nn.Tanh(),
nn.Linear(2*self.tunnel,2)
)
# self.out = nn.Linear(7,2)
def forward(self, x):
x, return_ = x[:,0].flatten().long(), x[:,1:]
x = F.one_hot(x, num_classes=2*self.tunnel+6).float()
x = torch.cat((x, return_), 1)
x = self.out(x)
x = F.log_softmax(x, dim=-1)
return x
def utility_mul(rewards):
u = rewards[:,0]*rewards[:,1]
return u.view(-1,1)
if __name__ == '__main__':
from policies.policy import Categorical
from memory.memory import EpisodeMemory
from agents.moreinforce import MOReinforce
from envs.split import SplitEnv
from log.plotter import Plotter
tunnel = 10
env = SplitEnv(tunnel)
actor = Actor(tunnel)
logdir = f'runs/split_env/tunnel_{tunnel}'
agent = MOReinforce(
env,
Categorical(),
EpisodeMemory(),
actor,
lr=1e-3,
utility=utility_mul,
logdir=logdir,
)
agent.train(episodes=500) #, eval_freq=0.1)
Plotter(logdir)