Skip to content

Commit

Permalink
Add noisy MLP
Browse files Browse the repository at this point in the history
  • Loading branch information
maliesa96 committed Oct 22, 2020
1 parent a6c3438 commit 2a7996a
Show file tree
Hide file tree
Showing 8 changed files with 342 additions and 45 deletions.
27 changes: 15 additions & 12 deletions examples/torch/dqn_atari.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@
buffer_batch_size=32,
max_epsilon=1.0,
double=True,
dueling=True,
dueling=False,
noisy=False,
noisy_sigma=0.5,
min_epsilon=0.01,
decay_ratio=0.1,
buffer_size=int(1e4),
Expand Down Expand Up @@ -157,27 +159,28 @@ def dqn_atari(ctxt=None,

qf = DiscreteCNNQFunction(
env_spec=env.spec,
<<<<<<< HEAD
=======
minibatch_size=hyperparams['buffer_batch_size'],
>>>>>>> Add torch DQN
hidden_channels=hyperparams['hidden_channels'],
kernel_sizes=hyperparams['kernel_sizes'],
strides=hyperparams['strides'],
dueling=hyperparams['dueling'],
noisy=hyperparams['noisy'],
noisy_sigma=hyperparams['noisy_sigma'],
hidden_w_init=(
lambda x: torch.nn.init.orthogonal_(x, gain=np.sqrt(2))),
hidden_sizes=hyperparams['hidden_sizes'],
is_image=True)

policy = DiscreteQFArgmaxPolicy(env_spec=env.spec, qf=qf)
exploration_policy = EpsilonGreedyPolicy(
env_spec=env.spec,
policy=policy,
total_timesteps=num_timesteps,
max_epsilon=hyperparams['max_epsilon'],
min_epsilon=hyperparams['min_epsilon'],
decay_ratio=hyperparams['decay_ratio'])

exploration_policy = policy
if not hyperparams['noisy']:
exploration_policy = EpsilonGreedyPolicy(
env_spec=env.spec,
policy=policy,
total_timesteps=num_timesteps,
max_epsilon=hyperparams['max_epsilon'],
min_epsilon=hyperparams['min_epsilon'],
decay_ratio=hyperparams['decay_ratio'])

algo = DQN(env_spec=env.spec,
policy=policy,
Expand Down
3 changes: 3 additions & 0 deletions src/garage/torch/algos/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,9 @@ def _log_eval_results(self, epoch):
tabular.record('QFunction/MaxY', np.max(self._epoch_ys))
tabular.record('QFunction/AverageAbsY',
np.mean(np.abs(self._epoch_ys)))
# log noise levels if using a NoisyNet.
# If NoisyNet is not used, this does nothing.
self._qf.log_noise('QFunction/Noisy-Sigma')

def _optimize_qf(self, timesteps):
"""Perform algorithm optimizing.
Expand Down
2 changes: 2 additions & 0 deletions src/garage/torch/modules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from garage.torch.modules.gaussian_mlp_module import GaussianMLPModule
from garage.torch.modules.mlp_module import MLPModule
from garage.torch.modules.multi_headed_mlp_module import MultiHeadedMLPModule
from garage.torch.modules.noisy_mlp_module import NoisyMLPModule
# DiscreteCNNModule must go after MLPModule
from garage.torch.modules.discrete_cnn_module import DiscreteCNNModule
# yapf: enable
Expand All @@ -20,6 +21,7 @@
'DiscreteCNNModule',
'MLPModule',
'MultiHeadedMLPModule',
'NoisyMLPModule',
'GaussianMLPModule',
'GaussianMLPIndependentStdModule',
'GaussianMLPTwoHeadedModule',
Expand Down
129 changes: 98 additions & 31 deletions src/garage/torch/modules/discrete_cnn_module.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Discrete CNN Q Function."""
from dowel import tabular
import torch
from torch import nn

from garage.torch.modules import CNNModule, MLPModule
from garage.torch.modules import CNNModule, MLPModule, NoisyMLPModule


# pytorch v1.6 issue, see https://github.com/pytorch/pytorch/issues/42305
Expand Down Expand Up @@ -33,6 +34,13 @@ class DiscreteCNNModule(nn.Module):
of two hidden layers, each with 32 hidden units.
dueling (bool): Whether to use a dueling architecture for the
fully-connected layer.
noisy (bool): Whether to use parameter noise for the fully-connected
layers. If True, hidden_w_init, hidden_b_init, output_w_init, and
output_b_init are ignored.
noisy_sigma (float): Level of scaling to apply to the parameter noise.
This is ignored if noisy is set to False.
std_noise (float): Standard deviation of the gaussian parameters noise.
This is ignored if noisy is set to False.
mlp_hidden_nonlinearity (callable): Activation function for
intermediate dense layer(s) in the MLP. It should return
a torch.Tensor. Set it to None to maintain a linear activation.
Expand Down Expand Up @@ -81,6 +89,9 @@ def __init__(self,
hidden_w_init=nn.init.xavier_uniform_,
hidden_b_init=nn.init.zeros_,
paddings=0,
noisy=True,
noisy_sigma=0.5,
std_noise=1.,
padding_mode='zeros',
max_pool=False,
pool_shape=None,
Expand All @@ -94,6 +105,8 @@ def __init__(self,
super().__init__()

self._dueling = dueling
self._noisy = noisy
self._noisy_layers = None

input_var = torch.zeros(input_shape)
cnn_module = CNNModule(input_var=input_var,
Expand All @@ -116,26 +129,49 @@ def __init__(self,
flat_dim = torch.flatten(cnn_out, start_dim=1).shape[1]

if dueling:
self._val = MLPModule(flat_dim,
1,
hidden_sizes,
hidden_nonlinearity=mlp_hidden_nonlinearity,
hidden_w_init=hidden_w_init,
hidden_b_init=hidden_b_init,
output_nonlinearity=output_nonlinearity,
output_w_init=output_w_init,
output_b_init=output_b_init,
layer_normalization=layer_normalization)
self._act = MLPModule(flat_dim,
output_dim,
hidden_sizes,
hidden_nonlinearity=mlp_hidden_nonlinearity,
hidden_w_init=hidden_w_init,
hidden_b_init=hidden_b_init,
output_nonlinearity=output_nonlinearity,
output_w_init=output_w_init,
output_b_init=output_b_init,
layer_normalization=layer_normalization)
if noisy:
self._val = NoisyMLPModule(
flat_dim,
1,
hidden_sizes,
sigma_naught=noisy_sigma,
std_noise=std_noise,
hidden_nonlinearity=mlp_hidden_nonlinearity,
output_nonlinearity=output_nonlinearity)
self._act = NoisyMLPModule(
flat_dim,
output_dim,
hidden_sizes,
sigma_naught=noisy_sigma,
std_noise=std_noise,
hidden_nonlinearity=mlp_hidden_nonlinearity,
output_nonlinearity=output_nonlinearity)
self._noisy_layers = [self._val, self._act]
else:
self._val = MLPModule(
flat_dim,
1,
hidden_sizes,
hidden_nonlinearity=mlp_hidden_nonlinearity,
hidden_w_init=hidden_w_init,
hidden_b_init=hidden_b_init,
output_nonlinearity=output_nonlinearity,
output_w_init=output_w_init,
output_b_init=output_b_init,
layer_normalization=layer_normalization)

self._act = MLPModule(
flat_dim,
output_dim,
hidden_sizes,
hidden_nonlinearity=mlp_hidden_nonlinearity,
hidden_w_init=hidden_w_init,
hidden_b_init=hidden_b_init,
output_nonlinearity=output_nonlinearity,
output_w_init=output_w_init,
output_b_init=output_b_init,
layer_normalization=layer_normalization)

if mlp_hidden_nonlinearity is None:
self._module = nn.Sequential(cnn_module, nn.Flatten())
else:
Expand All @@ -144,16 +180,29 @@ def __init__(self,
nn.Flatten())

else:
mlp_module = MLPModule(flat_dim,
output_dim,
hidden_sizes,
hidden_nonlinearity=mlp_hidden_nonlinearity,
hidden_w_init=hidden_w_init,
hidden_b_init=hidden_b_init,
output_nonlinearity=output_nonlinearity,
output_w_init=output_w_init,
output_b_init=output_b_init,
layer_normalization=layer_normalization)
mlp_module = None
if noisy:
mlp_module = NoisyMLPModule(
flat_dim,
output_dim,
hidden_sizes,
sigma_naught=noisy_sigma,
std_noise=std_noise,
hidden_nonlinearity=mlp_hidden_nonlinearity,
output_nonlinearity=output_nonlinearity)
self._noisy_layers = [mlp_module]
else:
mlp_module = MLPModule(
flat_dim,
output_dim,
hidden_sizes,
hidden_nonlinearity=mlp_hidden_nonlinearity,
hidden_w_init=hidden_w_init,
hidden_b_init=hidden_b_init,
output_nonlinearity=output_nonlinearity,
output_w_init=output_w_init,
output_b_init=output_b_init,
layer_normalization=layer_normalization)

if mlp_hidden_nonlinearity is None:
self._module = nn.Sequential(cnn_module, nn.Flatten(),
Expand Down Expand Up @@ -182,3 +231,21 @@ def forward(self, inputs):
return val + act

return self._module(inputs)

def log_noise(self, key):
"""Log sigma levels for noisy layers.
Args:
key (str): Prefix to use for logging.
"""
if self._noisy:
layer_num = 0
for layer in self._noisy_layers:
for name, param in layer.named_parameters():
if name.endswith('weight_sigma'):
layer_num += 1
sigma_mean = float(
(param**2).mean().sqrt().data.cpu().numpy())
tabular.record(key + '_layer_' + str(layer_num),
sigma_mean)
4 changes: 2 additions & 2 deletions src/garage/torch/modules/multi_headed_mlp_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from garage.torch import NonLinearity


# pytorch v1.6 issue, see https://github.com/pytorch/pytorch/issues/42305
# pylint: disable=abstract-method
class MultiHeadedMLPModule(nn.Module):
"""MultiHeadedMLPModule Model.
Expand Down Expand Up @@ -71,8 +73,6 @@ def __init__(self,
output_nonlinearities = self._check_parameter_for_output_layer(
'output_nonlinearities', output_nonlinearities, n_heads)

self._layers = nn.ModuleList()

prev_size = input_dim
for size in hidden_sizes:
hidden_layers = nn.Sequential()
Expand Down
Loading

0 comments on commit 2a7996a

Please sign in to comment.