From e46a3a841f973d9f45bcb69526a057fd55ffc353 Mon Sep 17 00:00:00 2001 From: zsdonghao Date: Mon, 24 Jul 2017 20:41:03 +0100 Subject: [PATCH] add comment, GLIE --- example/tutorial_frozenlake_dqn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/example/tutorial_frozenlake_dqn.py b/example/tutorial_frozenlake_dqn.py index fb3580990..3db40ee40 100644 --- a/example/tutorial_frozenlake_dqn.py +++ b/example/tutorial_frozenlake_dqn.py @@ -5,7 +5,7 @@ from tensorlayer.layers import * import matplotlib.pyplot as plt -""" Q-Network Q(a, s) - TD Learning, Off-Policy, e-Greedy Exploration +""" Q-Network Q(a, s) - TD Learning, Off-Policy, e-Greedy Exploration (GLIE) Q(S, A) <- Q(S, A) + alpha * (R + lambda * Q(newS, newA) - Q(S, A)) delta_w = R + lambda * Q(newS, newA) @@ -90,7 +90,7 @@ def to_one_hot(i, n_classes=None): s = s1 ## Reduce chance of random action if an episode is done. if d == True: - e = 1./((i/50) + 10) # reduce e + e = 1./((i/50) + 10) # reduce e, GLIE: Greey in the limit with infinite Exploration break ## Note that, the rewards here with random action