From b3fd098cd3349d1aacb46cfc28d2b4bb6047b616 Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 14:39:19 +0100 Subject: [PATCH 01/57] reset_last_layer in atari_lib.py --- dopamine/discrete_domains/atari_lib.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dopamine/discrete_domains/atari_lib.py b/dopamine/discrete_domains/atari_lib.py index 91a5a4ce..61038f45 100644 --- a/dopamine/discrete_domains/atari_lib.py +++ b/dopamine/discrete_domains/atari_lib.py @@ -158,6 +158,11 @@ def __init__(self, num_actions, name=None): name='fully_connected') self.dense2 = tf.keras.layers.Dense(num_actions, name='fully_connected') + # Modification + def reset_last_layer(self): + """Reset the last layer of the network.""" + self.dense2 = tf.keras.layers.Dense(self.num_actions, name='fully_connected') + def call(self, state): """Creates the output tensor/op given the state tensor as input. From 78a8670ed2a62bd4588ca33c8fd2ffee7b88d8d8 Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 14:49:50 +0100 Subject: [PATCH 02/57] Update atari_lib.py --- dopamine/discrete_domains/atari_lib.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dopamine/discrete_domains/atari_lib.py b/dopamine/discrete_domains/atari_lib.py index 61038f45..ddd3c8d6 100644 --- a/dopamine/discrete_domains/atari_lib.py +++ b/dopamine/discrete_domains/atari_lib.py @@ -160,8 +160,8 @@ def __init__(self, num_actions, name=None): # Modification def reset_last_layer(self): - """Reset the last layer of the network.""" - self.dense2 = tf.keras.layers.Dense(self.num_actions, name='fully_connected') + """Reset the last layer of the network.""" + self.dense2 = tf.keras.layers.Dense(self.num_actions, name='fully_connected') def call(self, state): """Creates the output tensor/op given the state tensor as input. From 222642e6b8914974e2b0d4db4078a2ab2054265e Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 14:56:03 +0100 Subject: [PATCH 03/57] ResetLastLayers call in run_experiment.py --- dopamine/discrete_domains/run_experiment.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dopamine/discrete_domains/run_experiment.py b/dopamine/discrete_domains/run_experiment.py index 7e5d9ffc..4c84b40e 100644 --- a/dopamine/discrete_domains/run_experiment.py +++ b/dopamine/discrete_domains/run_experiment.py @@ -603,6 +603,9 @@ def run_experiment(self): return for iteration in range(self._start_iteration, self._num_iterations): + # Modified + if iteration % 25 == 0: + self._agent.ResetLastLayers() statistics = self._run_one_iteration(iteration) if self._use_legacy_logger: self._log_experiment(iteration, statistics) From 0cc2a278d28c7dec89c4bda5f73fbacf15a33d5a Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 15:07:14 +0100 Subject: [PATCH 04/57] ResetLastLayers in dqn_agent.py --- dopamine/agents/dqn/dqn_agent.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index 2dafafb1..9d62e420 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -231,6 +231,12 @@ def __init__(self, self.summary_writer.add_graph(graph=tf.compat.v1.get_default_graph()) self._sess.run(tf.compat.v1.global_variables_initializer()) + # Modified + def ResetLastLayers(self): + self.online_convnet.reset_last_layer() + self.target_convnet.reset_last_layer() + self._net_outputs = self.online_convnet(self.state_ph) + def _create_network(self, name): """Builds the convolutional network used to compute the agent's Q-values. From bc98e5271b67da90dc1d26659026557a923c6552 Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 16:12:24 +0100 Subject: [PATCH 05/57] Update run_experiment.py --- dopamine/discrete_domains/run_experiment.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dopamine/discrete_domains/run_experiment.py b/dopamine/discrete_domains/run_experiment.py index 4c84b40e..94fffcc7 100644 --- a/dopamine/discrete_domains/run_experiment.py +++ b/dopamine/discrete_domains/run_experiment.py @@ -606,6 +606,7 @@ def run_experiment(self): # Modified if iteration % 25 == 0: self._agent.ResetLastLayers() + sys.stdout.write("PATRIK") statistics = self._run_one_iteration(iteration) if self._use_legacy_logger: self._log_experiment(iteration, statistics) From 42956420281a7d1b72f0d86c577369d36439bfb0 Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 16:13:05 +0100 Subject: [PATCH 06/57] Update run_experiment.py --- dopamine/discrete_domains/run_experiment.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dopamine/discrete_domains/run_experiment.py b/dopamine/discrete_domains/run_experiment.py index 94fffcc7..89b46829 100644 --- a/dopamine/discrete_domains/run_experiment.py +++ b/dopamine/discrete_domains/run_experiment.py @@ -607,6 +607,7 @@ def run_experiment(self): if iteration % 25 == 0: self._agent.ResetLastLayers() sys.stdout.write("PATRIK") + sys.stdout.flush() statistics = self._run_one_iteration(iteration) if self._use_legacy_logger: self._log_experiment(iteration, statistics) From d587d56df05376a8f2dfc82fb03e4f7fc0b4deee Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 16:19:59 +0100 Subject: [PATCH 07/57] Update run_experiment.py --- dopamine/discrete_domains/run_experiment.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dopamine/discrete_domains/run_experiment.py b/dopamine/discrete_domains/run_experiment.py index 89b46829..c36bda57 100644 --- a/dopamine/discrete_domains/run_experiment.py +++ b/dopamine/discrete_domains/run_experiment.py @@ -604,9 +604,10 @@ def run_experiment(self): for iteration in range(self._start_iteration, self._num_iterations): # Modified + sys.stdout.write("PATRIK") if iteration % 25 == 0: self._agent.ResetLastLayers() - sys.stdout.write("PATRIK") + sys.stdout.write("PATRIKkkk") sys.stdout.flush() statistics = self._run_one_iteration(iteration) if self._use_legacy_logger: From e5634e4f70a08cb08c97a5fe2199f8c5ec589e06 Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 16:28:33 +0100 Subject: [PATCH 08/57] Update run_experiment.py --- dopamine/discrete_domains/run_experiment.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dopamine/discrete_domains/run_experiment.py b/dopamine/discrete_domains/run_experiment.py index c36bda57..89b46829 100644 --- a/dopamine/discrete_domains/run_experiment.py +++ b/dopamine/discrete_domains/run_experiment.py @@ -604,10 +604,9 @@ def run_experiment(self): for iteration in range(self._start_iteration, self._num_iterations): # Modified - sys.stdout.write("PATRIK") if iteration % 25 == 0: self._agent.ResetLastLayers() - sys.stdout.write("PATRIKkkk") + sys.stdout.write("PATRIK") sys.stdout.flush() statistics = self._run_one_iteration(iteration) if self._use_legacy_logger: From 4efe9d003590607c9645ef951a0696b4536beefa Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 16:41:03 +0100 Subject: [PATCH 09/57] Update run_experiment.py --- dopamine/discrete_domains/run_experiment.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dopamine/discrete_domains/run_experiment.py b/dopamine/discrete_domains/run_experiment.py index 89b46829..94fffcc7 100644 --- a/dopamine/discrete_domains/run_experiment.py +++ b/dopamine/discrete_domains/run_experiment.py @@ -607,7 +607,6 @@ def run_experiment(self): if iteration % 25 == 0: self._agent.ResetLastLayers() sys.stdout.write("PATRIK") - sys.stdout.flush() statistics = self._run_one_iteration(iteration) if self._use_legacy_logger: self._log_experiment(iteration, statistics) From a2e5c1778ddd47dbdccaceb3b557b4e18d070a9e Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 16:45:30 +0100 Subject: [PATCH 10/57] Update run_experiment.py --- dopamine/discrete_domains/run_experiment.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dopamine/discrete_domains/run_experiment.py b/dopamine/discrete_domains/run_experiment.py index 94fffcc7..d8344768 100644 --- a/dopamine/discrete_domains/run_experiment.py +++ b/dopamine/discrete_domains/run_experiment.py @@ -607,6 +607,7 @@ def run_experiment(self): if iteration % 25 == 0: self._agent.ResetLastLayers() sys.stdout.write("PATRIK") + self._collector_dispatcher.write("PATRIK") statistics = self._run_one_iteration(iteration) if self._use_legacy_logger: self._log_experiment(iteration, statistics) From b3f962cb8cca8983d8153e4916965cd72ee22890 Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 16:51:20 +0100 Subject: [PATCH 11/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index 4ff8b9c6..7c7c4e5c 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -126,6 +126,9 @@ def __init__(self, min_vals, max_vals, num_actions, else: self.last_layer = tf.keras.layers.Dense(num_actions * num_atoms, name='fully_connected') + def reset_last_layer(self): + """Reset the last layer of the network.""" + self.dense2 = tf.keras.layers.Dense(self.num_actions, name='fully_connected') def call(self, state): """Creates the output tensor/op given the state tensor as input.""" From a4ec827d3719fe656bacf7d1b95be7ee174a41d2 Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 16:52:29 +0100 Subject: [PATCH 12/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index 7c7c4e5c..cd1bf380 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -129,6 +129,8 @@ def __init__(self, min_vals, max_vals, num_actions, def reset_last_layer(self): """Reset the last layer of the network.""" self.dense2 = tf.keras.layers.Dense(self.num_actions, name='fully_connected') + sys.stdout.write("PATRIK in gym") + sys.stdout.flush() def call(self, state): """Creates the output tensor/op given the state tensor as input.""" From 50f2cfd95d8283f954a414a090ab5bf86402f1ef Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 16:57:14 +0100 Subject: [PATCH 13/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index cd1bf380..6ef48eb7 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -126,6 +126,8 @@ def __init__(self, min_vals, max_vals, num_actions, else: self.last_layer = tf.keras.layers.Dense(num_actions * num_atoms, name='fully_connected') + + # Modified def reset_last_layer(self): """Reset the last layer of the network.""" self.dense2 = tf.keras.layers.Dense(self.num_actions, name='fully_connected') @@ -163,6 +165,10 @@ def __init__(self, num_actions, name=None): self.net = BasicDiscreteDomainNetwork( CARTPOLE_MIN_VALS, CARTPOLE_MAX_VALS, num_actions) + # Modified + def reset_last_layer(self): + self.net.reset_last_layer() + def call(self, state): """Creates the output tensor/op given the state tensor as input.""" x = self.net(state) From 2e2cad54b0ac24af9d476fbf0f9120c752319dcf Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 16:58:52 +0100 Subject: [PATCH 14/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index 6ef48eb7..6b00c2ab 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -131,8 +131,6 @@ def __init__(self, min_vals, max_vals, num_actions, def reset_last_layer(self): """Reset the last layer of the network.""" self.dense2 = tf.keras.layers.Dense(self.num_actions, name='fully_connected') - sys.stdout.write("PATRIK in gym") - sys.stdout.flush() def call(self, state): """Creates the output tensor/op given the state tensor as input.""" From d0f1bb53e152ea2a32220c5ef59207eb5ada4972 Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 17:01:24 +0100 Subject: [PATCH 15/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index 6b00c2ab..b053937e 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -130,8 +130,8 @@ def __init__(self, min_vals, max_vals, num_actions, # Modified def reset_last_layer(self): """Reset the last layer of the network.""" - self.dense2 = tf.keras.layers.Dense(self.num_actions, name='fully_connected') - + self.last_layer = tf.keras.layers.Dense(num_actions * num_atoms, + name='fully_connected') def call(self, state): """Creates the output tensor/op given the state tensor as input.""" x = tf.cast(state, tf.float32) From e576019818927356063690aaea83522fdbf4d614 Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 17:03:53 +0100 Subject: [PATCH 16/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index b053937e..edf34517 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -130,7 +130,11 @@ def __init__(self, min_vals, max_vals, num_actions, # Modified def reset_last_layer(self): """Reset the last layer of the network.""" - self.last_layer = tf.keras.layers.Dense(num_actions * num_atoms, + if self.num_atoms is None: + self.last_layer = tf.keras.layers.Dense(self.num_actions, + name='fully_connected') + else: + self.last_layer = tf.keras.layers.Dense(self.num_actions * self.num_atoms, name='fully_connected') def call(self, state): """Creates the output tensor/op given the state tensor as input.""" From bd8fd6a7c1a1dc182b2994dc1518d8fec306eb0d Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 17:05:56 +0100 Subject: [PATCH 17/57] Update run_experiment.py --- dopamine/discrete_domains/run_experiment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dopamine/discrete_domains/run_experiment.py b/dopamine/discrete_domains/run_experiment.py index d8344768..89b46829 100644 --- a/dopamine/discrete_domains/run_experiment.py +++ b/dopamine/discrete_domains/run_experiment.py @@ -607,7 +607,7 @@ def run_experiment(self): if iteration % 25 == 0: self._agent.ResetLastLayers() sys.stdout.write("PATRIK") - self._collector_dispatcher.write("PATRIK") + sys.stdout.flush() statistics = self._run_one_iteration(iteration) if self._use_legacy_logger: self._log_experiment(iteration, statistics) From dc11db3505feedfb62760a23dbeb9c603199cf1b Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 17:15:58 +0100 Subject: [PATCH 18/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index edf34517..4b3406b0 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -114,6 +114,7 @@ def __init__(self, min_vals, max_vals, num_actions, self.num_atoms = num_atoms self.min_vals = min_vals self.max_vals = max_vals + self.activation_fn = activation_fn # Defining layers. self.flatten = tf.keras.layers.Flatten() self.dense1 = tf.keras.layers.Dense(512, activation=activation_fn, @@ -129,7 +130,10 @@ def __init__(self, min_vals, max_vals, num_actions, # Modified def reset_last_layer(self): - """Reset the last layer of the network.""" + """Reset the last layer(s) of the network.""" + + self.dense2 = tf.keras.layers.Dense(512, activation=self.activation_fn, + name='fully_connected') if self.num_atoms is None: self.last_layer = tf.keras.layers.Dense(self.num_actions, name='fully_connected') From dd1c694ba688909bdad7ff839ebde6d2206dd7cb Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 17:21:43 +0100 Subject: [PATCH 19/57] Update dqn_agent.py --- dopamine/agents/dqn/dqn_agent.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index 9d62e420..8f42903f 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -236,6 +236,10 @@ def ResetLastLayers(self): self.online_convnet.reset_last_layer() self.target_convnet.reset_last_layer() self._net_outputs = self.online_convnet(self.state_ph) + self._q_argmax = tf.argmax(self._net_outputs.q_values, axis=1)[0] + self._replay_net_outputs = self.online_convnet(self._replay.states) + self._replay_next_target_net_outputs = self.target_convnet( + self._replay.next_states) def _create_network(self, name): """Builds the convolutional network used to compute the agent's Q-values. From d5927b6816f2471c08e66bf081498f10c4de4e6e Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 17:25:32 +0100 Subject: [PATCH 20/57] Update dqn_agent.py --- dopamine/agents/dqn/dqn_agent.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index 8f42903f..38da1106 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -235,11 +235,11 @@ def __init__(self, def ResetLastLayers(self): self.online_convnet.reset_last_layer() self.target_convnet.reset_last_layer() - self._net_outputs = self.online_convnet(self.state_ph) - self._q_argmax = tf.argmax(self._net_outputs.q_values, axis=1)[0] - self._replay_net_outputs = self.online_convnet(self._replay.states) - self._replay_next_target_net_outputs = self.target_convnet( - self._replay.next_states) + #self._net_outputs = self.online_convnet(self.state_ph) + #self._q_argmax = tf.argmax(self._net_outputs.q_values, axis=1)[0] + #self._replay_net_outputs = self.online_convnet(self._replay.states) + #self._replay_next_target_net_outputs = self.target_convnet( + # self._replay.next_states) def _create_network(self, name): """Builds the convolutional network used to compute the agent's Q-values. From 5419474054d5db6af444467dcbd28c32081fb32a Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 17:32:41 +0100 Subject: [PATCH 21/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index 4b3406b0..dcc8cab0 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -131,6 +131,10 @@ def __init__(self, min_vals, max_vals, num_actions, # Modified def reset_last_layer(self): """Reset the last layer(s) of the network.""" + + self.flatten = tf.keras.layers.Flatten() + self.dense1 = tf.keras.layers.Dense(512, activation=activation_fn, + name='fully_connected') self.dense2 = tf.keras.layers.Dense(512, activation=self.activation_fn, name='fully_connected') From cef39751eace511dc0d7adbcebf585f9eb2168af Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 17:34:23 +0100 Subject: [PATCH 22/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index dcc8cab0..e61f42f6 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -133,7 +133,7 @@ def reset_last_layer(self): """Reset the last layer(s) of the network.""" self.flatten = tf.keras.layers.Flatten() - self.dense1 = tf.keras.layers.Dense(512, activation=activation_fn, + self.dense1 = tf.keras.layers.Dense(512, activation=self.activation_fn, name='fully_connected') self.dense2 = tf.keras.layers.Dense(512, activation=self.activation_fn, From 0d735ac9d93f085f9d3fcc75a603a5b5dfe22d56 Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 17:43:09 +0100 Subject: [PATCH 23/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index e61f42f6..b096ae59 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -128,22 +128,14 @@ def __init__(self, min_vals, max_vals, num_actions, self.last_layer = tf.keras.layers.Dense(num_actions * num_atoms, name='fully_connected') + # Modified: saving the initial weights to load them after + model.save_weights('model.h5') + # Modified def reset_last_layer(self): """Reset the last layer(s) of the network.""" - - self.flatten = tf.keras.layers.Flatten() - self.dense1 = tf.keras.layers.Dense(512, activation=self.activation_fn, - name='fully_connected') - - self.dense2 = tf.keras.layers.Dense(512, activation=self.activation_fn, - name='fully_connected') - if self.num_atoms is None: - self.last_layer = tf.keras.layers.Dense(self.num_actions, - name='fully_connected') - else: - self.last_layer = tf.keras.layers.Dense(self.num_actions * self.num_atoms, - name='fully_connected') + self.dense2.reset_states() + self.last_layer.reset_states() def call(self, state): """Creates the output tensor/op given the state tensor as input.""" x = tf.cast(state, tf.float32) From ff9b7798996e2f31c76660233c92166624d4ece9 Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 17:43:29 +0100 Subject: [PATCH 24/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index b096ae59..8449fb70 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -129,7 +129,7 @@ def __init__(self, min_vals, max_vals, num_actions, name='fully_connected') # Modified: saving the initial weights to load them after - model.save_weights('model.h5') + # model.save_weights('model.h5') # Modified def reset_last_layer(self): From eba9aa2f9448ed7878ff34a5b2c7d2f9e0b5f742 Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 17:48:17 +0100 Subject: [PATCH 25/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index 8449fb70..7cc05227 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -134,8 +134,17 @@ def __init__(self, min_vals, max_vals, num_actions, # Modified def reset_last_layer(self): """Reset the last layer(s) of the network.""" - self.dense2.reset_states() - self.last_layer.reset_states() + session = K.get_session() + layer = self.last_layer + for v in layer.__dict__: + v_arg = getattr(layer,v) + if hasattr(v_arg,'initializer'): + initializer_method = getattr(v_arg, 'initializer') + initializer_method.run(session=session) + print('reinitializing layer {}.{}'.format(layer.name, v)) + + + def call(self, state): """Creates the output tensor/op given the state tensor as input.""" x = tf.cast(state, tf.float32) From ae778e1b17ba172f48a1ea88eaf0e3cccd8ba51d Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 17:51:15 +0100 Subject: [PATCH 26/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index 7cc05227..1e8d9806 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -134,16 +134,14 @@ def __init__(self, min_vals, max_vals, num_actions, # Modified def reset_last_layer(self): """Reset the last layer(s) of the network.""" - session = K.get_session() layer = self.last_layer for v in layer.__dict__: v_arg = getattr(layer,v) if hasattr(v_arg,'initializer'): initializer_method = getattr(v_arg, 'initializer') - initializer_method.run(session=session) + initializer_method.run() print('reinitializing layer {}.{}'.format(layer.name, v)) - def call(self, state): """Creates the output tensor/op given the state tensor as input.""" From 63f46e368787ded575067c9df4d1e6075b46b224 Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 17:55:27 +0100 Subject: [PATCH 27/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index 1e8d9806..a5b2b330 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -134,13 +134,14 @@ def __init__(self, min_vals, max_vals, num_actions, # Modified def reset_last_layer(self): """Reset the last layer(s) of the network.""" - layer = self.last_layer - for v in layer.__dict__: - v_arg = getattr(layer,v) - if hasattr(v_arg,'initializer'): - initializer_method = getattr(v_arg, 'initializer') - initializer_method.run() - print('reinitializing layer {}.{}'.format(layer.name, v)) + self.dense1.set_weights([tf.keras.initializers.glorot_uniform()(self.dense1.get_weights()[0]), + tf.keras.initializers.zeros()(self.dense1.get_weights()[1])]) + + self.dense2.set_weights([tf.keras.initializers.glorot_uniform()(self.dense2.get_weights()[0]), + tf.keras.initializers.zeros()(self.dense2.get_weights()[1])]) + + self.last_layer.set_weights([tf.keras.initializers.glorot_uniform()(self.last_layer.get_weights()[0]), + tf.keras.initializers.zeros()(self.last_layer.get_weights()[1])]) def call(self, state): From 43023e4210125b9c3aaec6eb5986e9d1719aed6c Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 18:07:19 +0100 Subject: [PATCH 28/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index a5b2b330..a6db0fc6 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -134,14 +134,10 @@ def __init__(self, min_vals, max_vals, num_actions, # Modified def reset_last_layer(self): """Reset the last layer(s) of the network.""" - self.dense1.set_weights([tf.keras.initializers.glorot_uniform()(self.dense1.get_weights()[0]), - tf.keras.initializers.zeros()(self.dense1.get_weights()[1])]) - - self.dense2.set_weights([tf.keras.initializers.glorot_uniform()(self.dense2.get_weights()[0]), - tf.keras.initializers.zeros()(self.dense2.get_weights()[1])]) - - self.last_layer.set_weights([tf.keras.initializers.glorot_uniform()(self.last_layer.get_weights()[0]), - tf.keras.initializers.zeros()(self.last_layer.get_weights()[1])]) + initializers = [tf.keras.initializers.glorot_uniform(), tf.keras.initializers.Zeros()] + self.dense1.set_weights([initializers[0]((512, 512)), initializers[1]((512,))]) + self.dense2.set_weights([initializers[0]((512, 512)), initializers[1]((512,))]) + self.last_layer.set_weights([initializers[0]((self.num_actions, 512)), initializers[1]((self.num_actions,))]) def call(self, state): From f64967565b297dcffb4ff6e4553f39def93abff3 Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 18:11:46 +0100 Subject: [PATCH 29/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index a6db0fc6..5d8f6e14 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -135,7 +135,7 @@ def __init__(self, min_vals, max_vals, num_actions, def reset_last_layer(self): """Reset the last layer(s) of the network.""" initializers = [tf.keras.initializers.glorot_uniform(), tf.keras.initializers.Zeros()] - self.dense1.set_weights([initializers[0]((512, 512)), initializers[1]((512,))]) + self.dense1.set_weights([initializers[0]((4, 512)), initializers[1]((512,))]) self.dense2.set_weights([initializers[0]((512, 512)), initializers[1]((512,))]) self.last_layer.set_weights([initializers[0]((self.num_actions, 512)), initializers[1]((self.num_actions,))]) From 29a91fa474035ce5389341da547ac95eafd10bd4 Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 18:14:30 +0100 Subject: [PATCH 30/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index 5d8f6e14..392b2bcc 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -135,9 +135,17 @@ def __init__(self, min_vals, max_vals, num_actions, def reset_last_layer(self): """Reset the last layer(s) of the network.""" initializers = [tf.keras.initializers.glorot_uniform(), tf.keras.initializers.Zeros()] - self.dense1.set_weights([initializers[0]((4, 512)), initializers[1]((512,))]) - self.dense2.set_weights([initializers[0]((512, 512)), initializers[1]((512,))]) - self.last_layer.set_weights([initializers[0]((self.num_actions, 512)), initializers[1]((self.num_actions,))]) + dense1.set_weights([np.array(initializers[0]((4, 512)), dtype=np.float32), + np.array(initializers[1]((512,)), dtype=np.float32)]) + dense2.set_weights([np.array(initializers[0]((512, 512)), dtype=np.float32), + np.array(initializers[1]((512,)), dtype=np.float32)]) + + if num_atoms is None: + last_layer.set_weights([np.array(initializers[0]((self.num_actions, 512)), dtype=np.float32), + np.array(initializers[1]((self.num_actions,)), dtype=np.float32)]) + else: + last_layer.set_weights([np.array(initializers[0]((self.num_actions * num_atoms, 512)), dtype=np.float32), + np.array(initializers[1]((self.num_actions * num_atoms,), dtype=np.float32)]) def call(self, state): From d80df95c02a1d340dce15f4cd0b0bfa0e36eb485 Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 18:19:18 +0100 Subject: [PATCH 31/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index 392b2bcc..638bb2cb 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -145,7 +145,7 @@ def reset_last_layer(self): np.array(initializers[1]((self.num_actions,)), dtype=np.float32)]) else: last_layer.set_weights([np.array(initializers[0]((self.num_actions * num_atoms, 512)), dtype=np.float32), - np.array(initializers[1]((self.num_actions * num_atoms,), dtype=np.float32)]) + np.array(initializers[1]((self.num_actions * num_atoms,)), dtype=np.float32)]) def call(self, state): From bca846c9b76ce007e1196280b3f18eed9de75f14 Mon Sep 17 00:00:00 2001 From: Mattia Colbertaldo <100996597+Mattia-Colbertaldo@users.noreply.github.com> Date: Wed, 8 Nov 2023 18:20:36 +0100 Subject: [PATCH 32/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index 638bb2cb..0acce3df 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -135,16 +135,16 @@ def __init__(self, min_vals, max_vals, num_actions, def reset_last_layer(self): """Reset the last layer(s) of the network.""" initializers = [tf.keras.initializers.glorot_uniform(), tf.keras.initializers.Zeros()] - dense1.set_weights([np.array(initializers[0]((4, 512)), dtype=np.float32), + self.dense1.set_weights([np.array(initializers[0]((4, 512)), dtype=np.float32), np.array(initializers[1]((512,)), dtype=np.float32)]) - dense2.set_weights([np.array(initializers[0]((512, 512)), dtype=np.float32), + self.dense2.set_weights([np.array(initializers[0]((512, 512)), dtype=np.float32), np.array(initializers[1]((512,)), dtype=np.float32)]) if num_atoms is None: - last_layer.set_weights([np.array(initializers[0]((self.num_actions, 512)), dtype=np.float32), + self.last_layer.set_weights([np.array(initializers[0]((self.num_actions, 512)), dtype=np.float32), np.array(initializers[1]((self.num_actions,)), dtype=np.float32)]) else: - last_layer.set_weights([np.array(initializers[0]((self.num_actions * num_atoms, 512)), dtype=np.float32), + self.last_layer.set_weights([np.array(initializers[0]((self.num_actions * num_atoms, 512)), dtype=np.float32), np.array(initializers[1]((self.num_actions * num_atoms,)), dtype=np.float32)]) From b6b3af588c146808ad95c46098f8368d969bb258 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20R=C3=A1c?= <69363378+patrikrac@users.noreply.github.com> Date: Wed, 8 Nov 2023 18:24:45 +0100 Subject: [PATCH 33/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index 0acce3df..65faf350 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -131,21 +131,17 @@ def __init__(self, min_vals, max_vals, num_actions, # Modified: saving the initial weights to load them after # model.save_weights('model.h5') + # Modified + def reset_layer(self, layer): + a,b = layer.get_weights()[0].shape + layer.set_weights([np.random.randn(a,b), np.ones(layer.get_weights()[1].shape)]) + # Modified def reset_last_layer(self): """Reset the last layer(s) of the network.""" - initializers = [tf.keras.initializers.glorot_uniform(), tf.keras.initializers.Zeros()] - self.dense1.set_weights([np.array(initializers[0]((4, 512)), dtype=np.float32), - np.array(initializers[1]((512,)), dtype=np.float32)]) - self.dense2.set_weights([np.array(initializers[0]((512, 512)), dtype=np.float32), - np.array(initializers[1]((512,)), dtype=np.float32)]) - - if num_atoms is None: - self.last_layer.set_weights([np.array(initializers[0]((self.num_actions, 512)), dtype=np.float32), - np.array(initializers[1]((self.num_actions,)), dtype=np.float32)]) - else: - self.last_layer.set_weights([np.array(initializers[0]((self.num_actions * num_atoms, 512)), dtype=np.float32), - np.array(initializers[1]((self.num_actions * num_atoms,)), dtype=np.float32)]) + self.reset_layers(self.dense1) + self.reset_layers(self.dense2) + self.reset_layers(self.last_layer) def call(self, state): From 3f35f75a52b351512917ca7ab4670086fb0fdb66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20R=C3=A1c?= <69363378+patrikrac@users.noreply.github.com> Date: Wed, 8 Nov 2023 18:25:23 +0100 Subject: [PATCH 34/57] Update gym_lib.py --- dopamine/discrete_domains/gym_lib.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index 65faf350..b61716d0 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -139,9 +139,9 @@ def reset_layer(self, layer): # Modified def reset_last_layer(self): """Reset the last layer(s) of the network.""" - self.reset_layers(self.dense1) - self.reset_layers(self.dense2) - self.reset_layers(self.last_layer) + self.reset_layer(self.dense1) + self.reset_layer(self.dense2) + self.reset_layer(self.last_layer) def call(self, state): From c101f648328821a41ff122e3c2bbf5bde9c09902 Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Sat, 11 Nov 2023 19:59:19 +0100 Subject: [PATCH 35/57] Added a reset weights method to the DQNAgent class --- dopamine/agents/dqn/dqn_agent.py | 24 ++++++++++++++++++++- dopamine/discrete_domains/run_experiment.py | 8 +++---- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index 38da1106..8c77cc54 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -101,7 +101,8 @@ def __init__(self, centered=True), summary_writer=None, summary_writing_frequency=500, - allow_partial_reload=False): + allow_partial_reload=False, + reset_period=10000): """Initializes the agent and constructs the components of its graph. Args: @@ -182,6 +183,11 @@ def __init__(self, self.eval_mode = eval_mode self.training_steps = 0 self.optimizer = optimizer + #Modified + self.optimizer_state = [self.optimizer.iterations, self.optimizer.lr,self.optimizer.beta_1, + self.optimizer.beta_2, self.optimizer.decay] + + self.reset_period = reset_period tf.compat.v1.disable_v2_behavior() if isinstance(summary_writer, str): # If we're passing in directory name. self.summary_writer = tf.compat.v1.summary.FileWriter(summary_writer) @@ -210,6 +216,8 @@ def __init__(self, self._build_networks() + self.online_convnet_state = self.online_convnet.get_weights() + self._train_op = self._build_train_op() self._sync_qt_ops = self._build_sync_op() @@ -460,8 +468,22 @@ def _train_step(self): if self.training_steps % self.target_update_period == 0: self._sess.run(self._sync_qt_ops) + if self.training_steps % self.reset_period == 0: + print("Resetting last layers...") + self.ResetWeights() + self.training_steps += 1 + def ResetWeights(self): + #Reset the weights of the last layer + self.online_convnet.set_weights(self.online_convnet_state) + self.target_convnet.set_weights(self.online_convnet_state) + self._sess.run(tf.compat.v1.global_variables_initializer()) + #Reset the optimizer state + optimizer_reset = tf.compat.v1.variables_initializer(self.optimizer_state) + self._sess.run(optimizer_reset) + + def _record_observation(self, observation): """Records an observation and update state. diff --git a/dopamine/discrete_domains/run_experiment.py b/dopamine/discrete_domains/run_experiment.py index 89b46829..444d64bc 100644 --- a/dopamine/discrete_domains/run_experiment.py +++ b/dopamine/discrete_domains/run_experiment.py @@ -604,10 +604,10 @@ def run_experiment(self): for iteration in range(self._start_iteration, self._num_iterations): # Modified - if iteration % 25 == 0: - self._agent.ResetLastLayers() - sys.stdout.write("PATRIK") - sys.stdout.flush() + #if iteration % 25 == 0: + # self._agent.ResetLastLayers() + # sys.stdout.write("PATRIK") + # sys.stdout.flush() statistics = self._run_one_iteration(iteration) if self._use_legacy_logger: self._log_experiment(iteration, statistics) From 6f42a1005218154ac97dbb84cf1c215fd1618460 Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Sat, 11 Nov 2023 20:01:09 +0100 Subject: [PATCH 36/57] Added a reset weights method to the DQNAgent class --- dopamine/agents/dqn/dqn_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index 8c77cc54..f237a76c 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -102,7 +102,7 @@ def __init__(self, summary_writer=None, summary_writing_frequency=500, allow_partial_reload=False, - reset_period=10000): + reset_period=100): """Initializes the agent and constructs the components of its graph. Args: From 4835b95c885545287d3da66a125aea47d89bf667 Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Sat, 11 Nov 2023 20:10:41 +0100 Subject: [PATCH 37/57] Added a reset weights method to the DQNAgent class --- dopamine/agents/dqn/dqn_agent.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index f237a76c..110e5e4c 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -102,7 +102,7 @@ def __init__(self, summary_writer=None, summary_writing_frequency=500, allow_partial_reload=False, - reset_period=100): + reset_period=500): """Initializes the agent and constructs the components of its graph. Args: @@ -184,8 +184,7 @@ def __init__(self, self.training_steps = 0 self.optimizer = optimizer #Modified - self.optimizer_state = [self.optimizer.iterations, self.optimizer.lr,self.optimizer.beta_1, - self.optimizer.beta_2, self.optimizer.decay] + self.optimizer_state = self.optimizer.variables() self.reset_period = reset_period tf.compat.v1.disable_v2_behavior() From d51f3ee2aeaa71e102ab5cb01c38e1a5bfd0a0be Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Mon, 13 Nov 2023 20:50:02 +0100 Subject: [PATCH 38/57] removed target net update --- dopamine/agents/dqn/dqn_agent.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index 110e5e4c..773fba0d 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -183,7 +183,7 @@ def __init__(self, self.eval_mode = eval_mode self.training_steps = 0 self.optimizer = optimizer - #Modified + # Modified self.optimizer_state = self.optimizer.variables() self.reset_period = reset_period @@ -474,15 +474,14 @@ def _train_step(self): self.training_steps += 1 def ResetWeights(self): - #Reset the weights of the last layer + # Reset the weights of the last layer self.online_convnet.set_weights(self.online_convnet_state) - self.target_convnet.set_weights(self.online_convnet_state) + # self.target_convnet.set_weights(self.online_convnet_state) self._sess.run(tf.compat.v1.global_variables_initializer()) - #Reset the optimizer state + # Reset the optimizer state optimizer_reset = tf.compat.v1.variables_initializer(self.optimizer_state) self._sess.run(optimizer_reset) - def _record_observation(self, observation): """Records an observation and update state. From 27693fce02f039f639cf55a23b283c2549a04eef Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Mon, 13 Nov 2023 21:16:15 +0100 Subject: [PATCH 39/57] Added some funky layer reset methodology --- dopamine/agents/dqn/dqn_agent.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index 773fba0d..f8954bf7 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -475,7 +475,9 @@ def _train_step(self): def ResetWeights(self): # Reset the weights of the last layer - self.online_convnet.set_weights(self.online_convnet_state) + # self.online_convnet.set_weights(self.online_convnet_state) + self.online_convnet.last_layer.kernel.initializer.run(session=self._sess) + self.online_convnet.last_layer.bias.initializer.run(session=self._sess) # self.target_convnet.set_weights(self.online_convnet_state) self._sess.run(tf.compat.v1.global_variables_initializer()) # Reset the optimizer state From 1046cae75240223325ceb371904ec20c0cb7f28b Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Mon, 13 Nov 2023 21:40:34 +0100 Subject: [PATCH 40/57] Some more tests --- dopamine/agents/dqn/dqn_agent.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index f8954bf7..30f46e7e 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -476,9 +476,16 @@ def _train_step(self): def ResetWeights(self): # Reset the weights of the last layer # self.online_convnet.set_weights(self.online_convnet_state) - self.online_convnet.last_layer.kernel.initializer.run(session=self._sess) - self.online_convnet.last_layer.bias.initializer.run(session=self._sess) # self.target_convnet.set_weights(self.online_convnet_state) + for layer in self.online_convnet.layers: + print(layer.name) + if layer.name == "last_layer": + layer.kernel.initializer.run(session=self._sess) + layer.bias.initializer.run(session=self._sess) + + #self.online_convnet.last_layer.kernel.initializer.run(session=self._sess) + #self.online_convnet.last_layer.bias.initializer.run(session=self._sess) + self._sess.run(tf.compat.v1.global_variables_initializer()) # Reset the optimizer state optimizer_reset = tf.compat.v1.variables_initializer(self.optimizer_state) From 772384a19e9b34e11aef43f0d257765657a7828e Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Mon, 13 Nov 2023 21:44:35 +0100 Subject: [PATCH 41/57] Tests --- dopamine/agents/dqn/dqn_agent.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index 30f46e7e..f1612aad 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -479,7 +479,10 @@ def ResetWeights(self): # self.target_convnet.set_weights(self.online_convnet_state) for layer in self.online_convnet.layers: print(layer.name) + for l in layer: + print(l.name) if layer.name == "last_layer": + print("Resetting last layer!") layer.kernel.initializer.run(session=self._sess) layer.bias.initializer.run(session=self._sess) From 5970b1c8fd58530e9151b588bc6fbc0b678fd6d1 Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Mon, 13 Nov 2023 21:46:30 +0100 Subject: [PATCH 42/57] Tests --- dopamine/agents/dqn/dqn_agent.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index f1612aad..52a19237 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -479,8 +479,6 @@ def ResetWeights(self): # self.target_convnet.set_weights(self.online_convnet_state) for layer in self.online_convnet.layers: print(layer.name) - for l in layer: - print(l.name) if layer.name == "last_layer": print("Resetting last layer!") layer.kernel.initializer.run(session=self._sess) From 8434aa0f3098ec25caf670a52d7b8771d22e65bc Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Mon, 13 Nov 2023 21:48:12 +0100 Subject: [PATCH 43/57] Tests --- dopamine/agents/dqn/dqn_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index 52a19237..a4ba27e2 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -487,7 +487,7 @@ def ResetWeights(self): #self.online_convnet.last_layer.kernel.initializer.run(session=self._sess) #self.online_convnet.last_layer.bias.initializer.run(session=self._sess) - self._sess.run(tf.compat.v1.global_variables_initializer()) + # self._sess.run(tf.compat.v1.global_variables_initializer()) # Reset the optimizer state optimizer_reset = tf.compat.v1.variables_initializer(self.optimizer_state) self._sess.run(optimizer_reset) From e9946a76020124ea11375c5369bec19a6c1c3f60 Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Mon, 13 Nov 2023 21:50:52 +0100 Subject: [PATCH 44/57] Tests --- dopamine/agents/dqn/dqn_agent.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index a4ba27e2..99f2f5e2 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -477,12 +477,11 @@ def ResetWeights(self): # Reset the weights of the last layer # self.online_convnet.set_weights(self.online_convnet_state) # self.target_convnet.set_weights(self.online_convnet_state) - for layer in self.online_convnet.layers: - print(layer.name) - if layer.name == "last_layer": - print("Resetting last layer!") - layer.kernel.initializer.run(session=self._sess) - layer.bias.initializer.run(session=self._sess) + for net in self.online_convnet.layers: + print(net.last_layer) + print("Resetting last layer!") + net.last_layer.kernel.initializer.run(session=self._sess) + net.last_layer.bias.initializer.run(session=self._sess) #self.online_convnet.last_layer.kernel.initializer.run(session=self._sess) #self.online_convnet.last_layer.bias.initializer.run(session=self._sess) From 017cf3f9bb31147e40bdaaf1b07f93a7f8492642 Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Mon, 13 Nov 2023 22:10:24 +0100 Subject: [PATCH 45/57] Tests --- dopamine/agents/dqn/dqn_agent.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index 99f2f5e2..53363b7a 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -483,8 +483,11 @@ def ResetWeights(self): net.last_layer.kernel.initializer.run(session=self._sess) net.last_layer.bias.initializer.run(session=self._sess) - #self.online_convnet.last_layer.kernel.initializer.run(session=self._sess) - #self.online_convnet.last_layer.bias.initializer.run(session=self._sess) + for layer in self.online_convnet.layers.layers: + print(layer) + + # self.online_convnet.last_layer.kernel.initializer.run(session=self._sess) + # self.online_convnet.last_layer.bias.initializer.run(session=self._sess) # self._sess.run(tf.compat.v1.global_variables_initializer()) # Reset the optimizer state From f364c285706be42f9229477d3fea9d2899d04c23 Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Mon, 13 Nov 2023 22:13:27 +0100 Subject: [PATCH 46/57] Tests --- dopamine/agents/dqn/dqn_agent.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index 53363b7a..ae9ad87c 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -482,9 +482,8 @@ def ResetWeights(self): print("Resetting last layer!") net.last_layer.kernel.initializer.run(session=self._sess) net.last_layer.bias.initializer.run(session=self._sess) - - for layer in self.online_convnet.layers.layers: - print(layer) + for layer in net: + print(layer.name) # self.online_convnet.last_layer.kernel.initializer.run(session=self._sess) # self.online_convnet.last_layer.bias.initializer.run(session=self._sess) From 7e58add436bedf6a3dfe425d66850ffa14b1ba5f Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Mon, 13 Nov 2023 22:16:08 +0100 Subject: [PATCH 47/57] Tests --- dopamine/agents/dqn/dqn_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index ae9ad87c..a35c48c7 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -482,7 +482,7 @@ def ResetWeights(self): print("Resetting last layer!") net.last_layer.kernel.initializer.run(session=self._sess) net.last_layer.bias.initializer.run(session=self._sess) - for layer in net: + for layer in net.layers: print(layer.name) # self.online_convnet.last_layer.kernel.initializer.run(session=self._sess) From 107d4a3f7736e773bb148d0fc914ea5b2e09f341 Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Mon, 13 Nov 2023 22:21:22 +0100 Subject: [PATCH 48/57] Tests --- dopamine/agents/dqn/dqn_agent.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index a35c48c7..9fb67dc1 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -482,8 +482,7 @@ def ResetWeights(self): print("Resetting last layer!") net.last_layer.kernel.initializer.run(session=self._sess) net.last_layer.bias.initializer.run(session=self._sess) - for layer in net.layers: - print(layer.name) + print(dir(net)) # self.online_convnet.last_layer.kernel.initializer.run(session=self._sess) # self.online_convnet.last_layer.bias.initializer.run(session=self._sess) From 97ee39157d784f31f09950a9ad1e55eccd29506d Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Mon, 13 Nov 2023 22:27:39 +0100 Subject: [PATCH 49/57] Tests --- dopamine/agents/dqn/dqn_agent.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index 9fb67dc1..f22ed1bf 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -478,12 +478,18 @@ def ResetWeights(self): # self.online_convnet.set_weights(self.online_convnet_state) # self.target_convnet.set_weights(self.online_convnet_state) for net in self.online_convnet.layers: - print(net.last_layer) + # print(net.last_layer) print("Resetting last layer!") net.last_layer.kernel.initializer.run(session=self._sess) net.last_layer.bias.initializer.run(session=self._sess) - print(dir(net)) + print("Resetting dense1 layer!") + net.dense1.kernel.initializer.run(session=self._sess) + net.dense1.bias.initializer.run(session=self._sess) + + print("Resetting dense2 layer!") + net.dense2.kernel.initializer.run(session=self._sess) + net.dense2.bias.initializer.run(session=self._sess) # self.online_convnet.last_layer.kernel.initializer.run(session=self._sess) # self.online_convnet.last_layer.bias.initializer.run(session=self._sess) From 605722132fecd32a47473578b18c9fc2c5ab4819 Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Mon, 13 Nov 2023 22:29:20 +0100 Subject: [PATCH 50/57] Tests --- dopamine/agents/dqn/dqn_agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index f22ed1bf..2561c66a 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -477,7 +477,7 @@ def ResetWeights(self): # Reset the weights of the last layer # self.online_convnet.set_weights(self.online_convnet_state) # self.target_convnet.set_weights(self.online_convnet_state) - for net in self.online_convnet.layers: + with self.online_convnet.layers[-1] as net: # print(net.last_layer) print("Resetting last layer!") net.last_layer.kernel.initializer.run(session=self._sess) @@ -486,7 +486,7 @@ def ResetWeights(self): print("Resetting dense1 layer!") net.dense1.kernel.initializer.run(session=self._sess) net.dense1.bias.initializer.run(session=self._sess) - + print("Resetting dense2 layer!") net.dense2.kernel.initializer.run(session=self._sess) net.dense2.bias.initializer.run(session=self._sess) From 7d6a99b737d2d91257dd71234b61e15a9fcfbe47 Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Mon, 13 Nov 2023 22:32:27 +0100 Subject: [PATCH 51/57] Tests --- dopamine/agents/dqn/dqn_agent.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index 2561c66a..998b35c3 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -477,19 +477,18 @@ def ResetWeights(self): # Reset the weights of the last layer # self.online_convnet.set_weights(self.online_convnet_state) # self.target_convnet.set_weights(self.online_convnet_state) - with self.online_convnet.layers[-1] as net: - # print(net.last_layer) - print("Resetting last layer!") - net.last_layer.kernel.initializer.run(session=self._sess) - net.last_layer.bias.initializer.run(session=self._sess) - - print("Resetting dense1 layer!") - net.dense1.kernel.initializer.run(session=self._sess) - net.dense1.bias.initializer.run(session=self._sess) - - print("Resetting dense2 layer!") - net.dense2.kernel.initializer.run(session=self._sess) - net.dense2.bias.initializer.run(session=self._sess) + # print(net.last_layer) + print("Resetting last layer!") + self.online_convnet.layers[-1].last_layer.kernel.initializer.run(session=self._sess) + self.online_convnet.layers[-1].last_layer.bias.initializer.run(session=self._sess) + + print("Resetting dense1 layer!") + self.online_convnet.layers[-1].dense1.kernel.initializer.run(session=self._sess) + self.online_convnet.layers[-1].dense1.bias.initializer.run(session=self._sess) + + print("Resetting dense2 layer!") + self.online_convnet.layers[-1].dense2.kernel.initializer.run(session=self._sess) + self.online_convnet.layers[-1].dense2.bias.initializer.run(session=self._sess) # self.online_convnet.last_layer.kernel.initializer.run(session=self._sess) # self.online_convnet.last_layer.bias.initializer.run(session=self._sess) From 82f47eae0123739173cb7a9351b395c5403828a8 Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Mon, 13 Nov 2023 22:48:25 +0100 Subject: [PATCH 52/57] Added three new system variables to the agent --- dopamine/agents/dqn/dqn_agent.py | 39 +++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index 998b35c3..5f188fee 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -102,7 +102,10 @@ def __init__(self, summary_writer=None, summary_writing_frequency=500, allow_partial_reload=False, - reset_period=500): + reset_period=500, + reset_dense1=False, + reset_dense2=False, + reset_last_layer=False): """Initializes the agent and constructs the components of its graph. Args: @@ -187,6 +190,10 @@ def __init__(self, self.optimizer_state = self.optimizer.variables() self.reset_period = reset_period + self.reset_dense1 = reset_dense1 + self.reset_dense2 = reset_dense2 + self.reset_last_layer = reset_last_layer + tf.compat.v1.disable_v2_behavior() if isinstance(summary_writer, str): # If we're passing in directory name. self.summary_writer = tf.compat.v1.summary.FileWriter(summary_writer) @@ -477,18 +484,24 @@ def ResetWeights(self): # Reset the weights of the last layer # self.online_convnet.set_weights(self.online_convnet_state) # self.target_convnet.set_weights(self.online_convnet_state) - # print(net.last_layer) - print("Resetting last layer!") - self.online_convnet.layers[-1].last_layer.kernel.initializer.run(session=self._sess) - self.online_convnet.layers[-1].last_layer.bias.initializer.run(session=self._sess) - - print("Resetting dense1 layer!") - self.online_convnet.layers[-1].dense1.kernel.initializer.run(session=self._sess) - self.online_convnet.layers[-1].dense1.bias.initializer.run(session=self._sess) - - print("Resetting dense2 layer!") - self.online_convnet.layers[-1].dense2.kernel.initializer.run(session=self._sess) - self.online_convnet.layers[-1].dense2.bias.initializer.run(session=self._sess) + + print("Resetting weights...") + if self.reset_last_layer: + print("Resetting last layer!") + self.online_convnet.layers[-1].last_layer.kernel.initializer.run(session=self._sess) + self.online_convnet.layers[-1].last_layer.bias.initializer.run(session=self._sess) + + if self.reset_dense1: + print("Resetting dense1 layer!") + self.online_convnet.layers[-1].dense1.kernel.initializer.run(session=self._sess) + self.online_convnet.layers[-1].dense1.bias.initializer.run(session=self._sess) + + if self.reset_dense2: + print("Resetting dense2 layer!") + self.online_convnet.layers[-1].dense2.kernel.initializer.run(session=self._sess) + self.online_convnet.layers[-1].dense2.bias.initializer.run(session=self._sess) + + # Legacy code # self.online_convnet.last_layer.kernel.initializer.run(session=self._sess) # self.online_convnet.last_layer.bias.initializer.run(session=self._sess) From bd3b1a93f93aa7455a4ab8d40741693d65b5515c Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Tue, 14 Nov 2023 23:24:16 +0100 Subject: [PATCH 53/57] Some minor adjustments on the branch --- dopamine/agents/dqn/dqn_agent.py | 12 ++++++++++-- dopamine/discrete_domains/run_experiment.py | 7 +++---- dopamine/jax/agents/sac/sac_agent.py | 6 +++++- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index 5f188fee..774d4cc9 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -105,7 +105,8 @@ def __init__(self, reset_period=500, reset_dense1=False, reset_dense2=False, - reset_last_layer=False): + reset_last_layer=False, + reset_max=3): """Initializes the agent and constructs the components of its graph. Args: @@ -193,6 +194,8 @@ def __init__(self, self.reset_dense1 = reset_dense1 self.reset_dense2 = reset_dense2 self.reset_last_layer = reset_last_layer + self.reset_max = reset_max + self.reset_counter = 0 tf.compat.v1.disable_v2_behavior() if isinstance(summary_writer, str): # If we're passing in directory name. @@ -474,7 +477,8 @@ def _train_step(self): if self.training_steps % self.target_update_period == 0: self._sess.run(self._sync_qt_ops) - if self.training_steps % self.reset_period == 0: + if self.training_steps % self.reset_period == 0\ + and self.reset_counter < self.reset_max: print("Resetting last layers...") self.ResetWeights() @@ -484,6 +488,8 @@ def ResetWeights(self): # Reset the weights of the last layer # self.online_convnet.set_weights(self.online_convnet_state) # self.target_convnet.set_weights(self.online_convnet_state) + if self.reset_counter >= self.reset_max: + return print("Resetting weights...") if self.reset_last_layer: @@ -510,6 +516,8 @@ def ResetWeights(self): optimizer_reset = tf.compat.v1.variables_initializer(self.optimizer_state) self._sess.run(optimizer_reset) + self.reset_counter += 1 + def _record_observation(self, observation): """Records an observation and update state. diff --git a/dopamine/discrete_domains/run_experiment.py b/dopamine/discrete_domains/run_experiment.py index 444d64bc..d035838d 100644 --- a/dopamine/discrete_domains/run_experiment.py +++ b/dopamine/discrete_domains/run_experiment.py @@ -604,10 +604,9 @@ def run_experiment(self): for iteration in range(self._start_iteration, self._num_iterations): # Modified - #if iteration % 25 == 0: - # self._agent.ResetLastLayers() - # sys.stdout.write("PATRIK") - # sys.stdout.flush() + if iteration % 40 == 0: + self._agent.ResetWeights() + statistics = self._run_one_iteration(iteration) if self._use_legacy_logger: self._log_experiment(iteration, statistics) diff --git a/dopamine/jax/agents/sac/sac_agent.py b/dopamine/jax/agents/sac/sac_agent.py index 9eea489d..ae29e348 100644 --- a/dopamine/jax/agents/sac/sac_agent.py +++ b/dopamine/jax/agents/sac/sac_agent.py @@ -283,7 +283,8 @@ def __init__(self, summary_writing_frequency=500, allow_partial_reload=False, seed=None, - collector_allowlist=('tensorboard')): + collector_allowlist=('tensorboard'), + reset_period=None): r"""Initializes the agent and constructs the necessary components. Args: @@ -387,6 +388,9 @@ def __init__(self, self.allow_partial_reload = allow_partial_reload self._collector_allowlist = collector_allowlist + # Reset period is used to reset the agent's state every reset_period steps. + self.reset_period = reset_period + self._rng = jax.random.PRNGKey(seed) state_shape = self.observation_shape + (stack_size,) self.state = onp.zeros(state_shape) From 2e2aa11a7401a4d970d56a08540e5e2519b48091 Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Wed, 15 Nov 2023 08:58:14 +0100 Subject: [PATCH 54/57] Stopped agent from reseting at itertaion 0 --- dopamine/discrete_domains/run_experiment.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dopamine/discrete_domains/run_experiment.py b/dopamine/discrete_domains/run_experiment.py index d035838d..7af073f3 100644 --- a/dopamine/discrete_domains/run_experiment.py +++ b/dopamine/discrete_domains/run_experiment.py @@ -604,9 +604,9 @@ def run_experiment(self): for iteration in range(self._start_iteration, self._num_iterations): # Modified - if iteration % 40 == 0: + if iteration != 0 and iteration % 30 == 0: self._agent.ResetWeights() - + statistics = self._run_one_iteration(iteration) if self._use_legacy_logger: self._log_experiment(iteration, statistics) From 012e1d55b524110a53e55f4cb67b911ab804c374 Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Wed, 15 Nov 2023 09:49:25 +0100 Subject: [PATCH 55/57] Stopped agent from reseting at itertaion 0 --- dopamine/discrete_domains/run_experiment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dopamine/discrete_domains/run_experiment.py b/dopamine/discrete_domains/run_experiment.py index 7af073f3..042986b7 100644 --- a/dopamine/discrete_domains/run_experiment.py +++ b/dopamine/discrete_domains/run_experiment.py @@ -604,7 +604,7 @@ def run_experiment(self): for iteration in range(self._start_iteration, self._num_iterations): # Modified - if iteration != 0 and iteration % 30 == 0: + if iteration != 0 and iteration % 40 == 0: self._agent.ResetWeights() statistics = self._run_one_iteration(iteration) From 536e219059d593da606eade4c038e1eca779f94b Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Sat, 18 Nov 2023 15:36:44 +0100 Subject: [PATCH 56/57] Added some much needed consistency to the reseting --- dopamine/agents/dqn/dqn_agent.py | 9 --------- dopamine/discrete_domains/run_experiment.py | 10 +++++++--- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index 774d4cc9..1fa60d16 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -248,15 +248,6 @@ def __init__(self, self.summary_writer.add_graph(graph=tf.compat.v1.get_default_graph()) self._sess.run(tf.compat.v1.global_variables_initializer()) - # Modified - def ResetLastLayers(self): - self.online_convnet.reset_last_layer() - self.target_convnet.reset_last_layer() - #self._net_outputs = self.online_convnet(self.state_ph) - #self._q_argmax = tf.argmax(self._net_outputs.q_values, axis=1)[0] - #self._replay_net_outputs = self.online_convnet(self._replay.states) - #self._replay_next_target_net_outputs = self.target_convnet( - # self._replay.next_states) def _create_network(self, name): """Builds the convolutional network used to compute the agent's Q-values. diff --git a/dopamine/discrete_domains/run_experiment.py b/dopamine/discrete_domains/run_experiment.py index 042986b7..a78b3e5f 100644 --- a/dopamine/discrete_domains/run_experiment.py +++ b/dopamine/discrete_domains/run_experiment.py @@ -173,7 +173,8 @@ def __init__(self, max_steps_per_episode=27000, clip_rewards=True, use_legacy_logger=True, - fine_grained_print_to_console=True): + fine_grained_print_to_console=True, + reset_period=None): """Initialize the Runner object in charge of running a full experiment. Args: @@ -234,6 +235,8 @@ def __init__(self, self._initialize_checkpointer_and_maybe_resume(checkpoint_file_prefix) + self._reset_period = reset_period + # Create a collector dispatcher for metrics reporting. self._collector_dispatcher = collector_dispatcher.CollectorDispatcher( self._base_dir) @@ -603,8 +606,9 @@ def run_experiment(self): return for iteration in range(self._start_iteration, self._num_iterations): - # Modified - if iteration != 0 and iteration % 40 == 0: + # Modified: Check if the reset period is reached, and if so, reset the weights. + if (self._reset_period is not None and + iteration != 0 and iteration % self._reset_period == 0): self._agent.ResetWeights() statistics = self._run_one_iteration(iteration) From 3a57732b6958aab3c71608de6fdc62c932cbc091 Mon Sep 17 00:00:00 2001 From: Patrik Rac Date: Sat, 18 Nov 2023 15:48:05 +0100 Subject: [PATCH 57/57] Some more qol changes --- dopamine/agents/dqn/dqn_agent.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index 1fa60d16..ee2f28ac 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -102,7 +102,7 @@ def __init__(self, summary_writer=None, summary_writing_frequency=500, allow_partial_reload=False, - reset_period=500, + reset_period=None, reset_dense1=False, reset_dense2=False, reset_last_layer=False, @@ -468,8 +468,9 @@ def _train_step(self): if self.training_steps % self.target_update_period == 0: self._sess.run(self._sync_qt_ops) - if self.training_steps % self.reset_period == 0\ - and self.reset_counter < self.reset_max: + if (self.reset_period is not None and + self.training_steps % self.reset_period == 0\ + and self.reset_counter < self.reset_max): print("Resetting last layers...") self.ResetWeights()