From c5c43cca7488291e49eb8912ccc48aaa514e161b Mon Sep 17 00:00:00 2001
From: Markel Sanz Ausin <markelsanz14@gmail.com>
Date: Fri, 10 Jul 2020 18:27:35 -0400
Subject: [PATCH 1/2] Updates the call to Dreamer to ensure it only trains in
 training mode

I believe the current implementation will also train the RSSM model during evaluation, when training=False is passed. I'm not sure if this is the intended behavior but it looks like a bug to me.
---
 dreamer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dreamer.py b/dreamer.py
index fa6236d..9fbcb6a 100644
--- a/dreamer.py
+++ b/dreamer.py
@@ -113,7 +113,7 @@ def __call__(self, obs, reset, state=None, training=True):
     if state is not None and reset.any():
       mask = tf.cast(1 - reset, self._float)[:, None]
       state = tf.nest.map_structure(lambda x: x * mask, state)
-    if self._should_train(step):
+    if self._should_train(step) and training:
       log = self._should_log(step)
       n = self._c.pretrain if self._should_pretrain() else self._c.train_steps
       print(f'Training for {n} steps.')

From 4ce18ed347a5064f53b97a7b5137bd00b1f4a42c Mon Sep 17 00:00:00 2001
From: Markel Sanz Ausin <markelsanz14@gmail.com>
Date: Fri, 10 Jul 2020 18:30:35 -0400
Subject: [PATCH 2/2] NotImplementedError on ActionDecoder uses a None
 variable. Replace with the string of the passed distribution.

Minor bug fix. dist does not exist at that location in the code. Using self._dist would make the code clear if an incorrect distribution is passed.
---
 models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/models.py b/models.py
index 0f40316..0b2d9bb 100644
--- a/models.py
+++ b/models.py
@@ -172,5 +172,5 @@ def __call__(self, features):
       x = self.get(f'hout', tfkl.Dense, self._size)(x)
       dist = tools.OneHotDist(x)
     else:
-      raise NotImplementedError(dist)
+      raise NotImplementedError(self._dist)
     return dist