From c5c43cca7488291e49eb8912ccc48aaa514e161b Mon Sep 17 00:00:00 2001 From: Markel Sanz Ausin Date: Fri, 10 Jul 2020 18:27:35 -0400 Subject: [PATCH 1/2] Updates the call to Dreamer to ensure it only trains in training mode I believe the current implementation will also train the RSSM model during evaluation, when training=False is passed. I'm not sure if this is the intended behavior but it looks like a bug to me. --- dreamer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dreamer.py b/dreamer.py index fa6236d..9fbcb6a 100644 --- a/dreamer.py +++ b/dreamer.py @@ -113,7 +113,7 @@ def __call__(self, obs, reset, state=None, training=True): if state is not None and reset.any(): mask = tf.cast(1 - reset, self._float)[:, None] state = tf.nest.map_structure(lambda x: x * mask, state) - if self._should_train(step): + if self._should_train(step) and training: log = self._should_log(step) n = self._c.pretrain if self._should_pretrain() else self._c.train_steps print(f'Training for {n} steps.') From 4ce18ed347a5064f53b97a7b5137bd00b1f4a42c Mon Sep 17 00:00:00 2001 From: Markel Sanz Ausin Date: Fri, 10 Jul 2020 18:30:35 -0400 Subject: [PATCH 2/2] NotImplementedError on ActionDecoder uses a None variable. Replace with the string of the passed distribution. Minor bug fix. dist does not exist at that location in the code. Using self._dist would make the code clear if an incorrect distribution is passed. --- models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models.py b/models.py index 0f40316..0b2d9bb 100644 --- a/models.py +++ b/models.py @@ -172,5 +172,5 @@ def __call__(self, features): x = self.get(f'hout', tfkl.Dense, self._size)(x) dist = tools.OneHotDist(x) else: - raise NotImplementedError(dist) + raise NotImplementedError(self._dist) return dist