-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdouble_dqn_atari.py
159 lines (133 loc) · 6.21 KB
/
double_dqn_atari.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#!/usr/bin/env python
"""Run Atari Environment with double-DQN."""
import argparse
import os
import random
import time
import numpy as np
import tensorflow as tf
from keras.layers import (Activation, Convolution2D, Dense, Flatten, Input, Dropout,
Permute)
from keras.models import Model
from keras.optimizers import Adam
from keras import losses
import gym
from PIL import Image
import deeprl_hw2 as tfrl
from deeprl_hw2.dqn import DQNAgent
from deeprl_hw2.objectives import mean_huber_loss
def create_model(window, input_shape, num_actions,
model_name='q_network'): # noqa: D103
"""Create the Q-network model.
Use Keras to construct a keras.models.Model instance (you can also
use the SequentialModel class).
We highly recommend that you use tf.name_scope as discussed in
class when creating the model and the layers. This will make it
far easier to understnad your network architecture if you are
logging with tensorboard.
Parameters
----------
window: int
Each input to the network is a sequence of frames. This value
defines how many frames are in the sequence.
input_shape: tuple(int, int)
The expected input image size.
num_actions: int
Number of possible actions. Defined by the gym environment.
model_name: str
Useful when debugging. Makes the model show up nicer in tensorboard.
Returns
-------
keras.models.Model
The Q-model.
"""
# Using tensorflow name scope
# Create a deep Q-network
with tf.name_scope(model_name):
input_img = Input(shape = (window,) + input_shape) # Input shape = (batch, 4, 84, 84)
conv1 = Convolution2D(16, (8,8), data_format='channels_first', strides=(4, 4), padding='valid')(input_img)
conv1 = Activation('relu')(conv1)
conv2 = Convolution2D(32, (4,4), data_format='channels_first', strides=(2, 2), padding='valid')(conv1)
conv2 = Activation('relu')(conv2)
flat = Flatten()(conv2) # Flatten the convoluted hidden layers before full-connected layers
full = Dense(256)(flat)
full = Activation('relu')(full)
out = Dense(num_actions)(full) # output layer has node number = num_actions
model = Model(input = input_img, output = out)
return model
def get_output_folder(parent_dir, env_name):
"""Return save folder.
Assumes folders in the parent_dir have suffix -run{run
number}. Finds the highest run number and sets the output folder
to that number + 1. This is just convenient so that if you run the
same script multiple times tensorboard can plot all of the results
on the same plots with different names.
Parameters
----------
parent_dir: str
Path of the directory containing all experiment runs.
Returns
-------
parent_dir/run_dir
Path to this run's save directory.
"""
os.makedirs(parent_dir, exist_ok=True)
experiment_id = 0
for folder_name in os.listdir(parent_dir):
if not os.path.isdir(os.path.join(parent_dir, folder_name)):
continue
try:
folder_name = int(folder_name.split('-run')[-1])
if folder_name > experiment_id:
experiment_id = folder_name
except:
pass
experiment_id += 1
parent_dir = os.path.join(parent_dir, env_name)
parent_dir = parent_dir + '-run{}'.format(experiment_id)
return parent_dir
def main(): # noqa: D103
parser = argparse.ArgumentParser(description='Run DQN on Atari Breakout')
parser.add_argument('--env', default='SpaceInvaders-v0', help='Atari env name')
parser.add_argument(
'-o', '--output', default='double-deepQ', help='Directory to save data to')
parser.add_argument('--seed', default=703, type=int, help='Random seed')
args = parser.parse_args()
args.output = get_output_folder(args.output, args.env)
# args.output = '/home/thupxd/deeprl_for_atari_games/' + args.output # Comment out when running locally!
os.makedirs(args.output, exist_ok=True)
# here is where you should start up a session,
# create your DQN agent, create your model, etc.
# then you can run your fit method.
# Make the environment
env = gym.make(args.env)
# input('************************** Hit to begin training... ******************************')
# Create a Q network
num_actions = env.action_space.n
q_net = create_model(4, (84, 84), num_actions, model_name='Double_Deep_Q_Net')
# print('======================== Keras Q-network model is created. =========================')
# Initialize a preporcessor sequence object
atari_preprocessor = tfrl.preprocessors.AtariPreprocessor((84, 84))
# print('======================== Preprocessor object is created. =========================')
# Initialize a replay memory
replay_memory = tfrl.core.ReplayMemory(1000000, 4)
# print('======================== Replay_memory object is created. =========================')
# Initialize a policy
_policy = tfrl.policy.GreedyEpsilonPolicy(0.05, num_actions)
policy = tfrl.policy.LinearDecayGreedyEpsilonPolicy(_policy, 1, 0.1, 1000000)
# print('======================== (linear-decay) Eps-Greedy Policy object is created. =========================')
# Initialize a DQNAgent
DQNAgent = tfrl.dqn.DQNAgent(q_net, atari_preprocessor, replay_memory, policy, gamma=0.99,
target_update_freq=10000, num_burn_in=100000, train_freq=4,
batch_size=32, window_size=4)
# print('======================== DQN agent is created. =========================')
# Compiling, Training, Test
# print('======================== Model compilation begin! =========================')
adam = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
q_net.compile(optimizer=adam, loss=mean_huber_loss)
# print('======================== Model compilation finished! =========================')
# print('======================== Model training begin! =========================')
DQNAgent.fit_double(env, args.env, args.output, 5000000, 100000)
# print('======================== Model training finished! =========================')
if __name__ == '__main__':
main()