You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
a = np.argmax(Q[s, :] + np.random.randn(1, env.action_space.n) * (1. / (i + 1)))
IndexError: only integers, slices (:), ellipsis (...), numpy.newaxis (None) and integer or boolean arrays are valid indices
import argparse
import os
import time
import gym
import matplotlib.pyplot as plt
import numpy as np
parser.add_argument(
'--save_path', default=None, help='folder to save if mode == train else model path,'
'qnet will be saved once target net update'
)
parser.add_argument('--seed', help='random seed', type=int, default=0)
parser.add_argument('--env_id', default='FrozenLake-v1')
args = parser.parse_args()
Load the environment
alg_name = 'Qlearning'
env_id = args.env_id
env = gym.make(env_id)
render = False # display the game environment
lr = .85 # alpha, if use value function approximation, we can ignore it
lambd = .99 # decay factor
num_episodes = 10000
t0 = time.time()
if args.train:
all_episode_reward = []
for i in range(num_episodes):
## Reset environment and get first new observation
s = env.reset()
rAll = 0
## The Q-Table learning algorithm
for j in range(99):
if render: env.render()
## Choose an action by greedily (with noise) picking from Q table
a = np.argmax(Q[s, :] + np.random.randn(1, env.action_space.n) * (1. / (i + 1)))
## Get new state and reward from environment
s1, r, d, _ = env.step(a)
## Update Q-Table with new knowledge
Q[s, a] = Q[s, a] + lr * (r + lambd * np.max(Q[s1, :]) - Q[s, a])
rAll += r
s = s1
if d is True:
break
print(
'Training | Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}'.format(
i + 1, num_episodes, rAll,
time.time() - t0
)
)
if i == 0:
all_episode_reward.append(rAll)
else:
all_episode_reward.append(all_episode_reward[-1] * 0.9 + rAll * 0.1)
# save
path = os.path.join('model', '_'.join([alg_name, env_id]))
if not os.path.exists(path):
os.makedirs(path)
np.save(os.path.join(path, 'Q_table.npy'), Q)
plt.plot(all_episode_reward)
if not os.path.exists('image'):
os.makedirs('image')
plt.savefig(os.path.join('image', '_'.join([alg_name, env_id])))
# print("Final Q-Table Values:/n %s" % Q)
if args.test:
path = os.path.join('model', '_'.join([alg_name, env_id]))
Q = np.load(os.path.join(path, 'Q_table.npy'))
for i in range(num_episodes):
## Reset environment and get first new observation
s = env.reset()
rAll = 0
## The Q-Table learning algorithm
for j in range(99):
## Choose an action by greedily (with noise) picking from Q table
a = np.argmax(Q[s, :])
## Get new state and reward from environment
s1, r, d, _ = env.step(a)
## Update Q-Table with new knowledge
rAll += r
s = s1
if d is True:
break
print(
'Testing | Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}'.format(
i + 1, num_episodes, rAll,
time.time() - t0
)
)
a = np.argmax(Q[s, :] + np.random.randn(1, env.action_space.n) * (1. / (i + 1)))
IndexError: only integers, slices (:), ellipsis (...), numpy.newaxis (None) and integer or boolean arrays are valid indices
The text was updated successfully, but these errors were encountered:
a = np.argmax(Q[s, :] + np.random.randn(1, env.action_space.n) * (1. / (i + 1)))
IndexError: only integers, slices (
:
), ellipsis (...
), numpy.newaxis (None
) and integer or boolean arrays are valid indicesimport argparse
import os
import time
import gym
import matplotlib.pyplot as plt
import numpy as np
parser = argparse.ArgumentParser()
parser.add_argument('--train', dest='train', action='store_true', default=True)
parser.add_argument('--test', dest='test', action='store_true', default=True)
parser.add_argument(
'--save_path', default=None, help='folder to save if mode == train else model path,'
'qnet will be saved once target net update'
)
parser.add_argument('--seed', help='random seed', type=int, default=0)
parser.add_argument('--env_id', default='FrozenLake-v1')
args = parser.parse_args()
Load the environment
alg_name = 'Qlearning'
env_id = args.env_id
env = gym.make(env_id)
render = False # display the game environment
##================= Implement Q-Table learning algorithm =====================##
Initialize table with all zeros
Q = np.zeros([env.observation_space.n, env.action_space.n])
Set learning parameters
lr = .85 # alpha, if use value function approximation, we can ignore it
lambd = .99 # decay factor
num_episodes = 10000
t0 = time.time()
if args.train:
all_episode_reward = []
for i in range(num_episodes):
## Reset environment and get first new observation
s = env.reset()
rAll = 0
## The Q-Table learning algorithm
for j in range(99):
if render: env.render()
## Choose an action by greedily (with noise) picking from Q table
a = np.argmax(Q[s, :] + np.random.randn(1, env.action_space.n) * (1. / (i + 1)))
## Get new state and reward from environment
s1, r, d, _ = env.step(a)
## Update Q-Table with new knowledge
Q[s, a] = Q[s, a] + lr * (r + lambd * np.max(Q[s1, :]) - Q[s, a])
rAll += r
s = s1
if d is True:
break
print(
'Training | Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}'.format(
i + 1, num_episodes, rAll,
time.time() - t0
)
)
if i == 0:
all_episode_reward.append(rAll)
else:
all_episode_reward.append(all_episode_reward[-1] * 0.9 + rAll * 0.1)
if args.test:
path = os.path.join('model', '_'.join([alg_name, env_id]))
Q = np.load(os.path.join(path, 'Q_table.npy'))
for i in range(num_episodes):
## Reset environment and get first new observation
s = env.reset()
rAll = 0
## The Q-Table learning algorithm
for j in range(99):
## Choose an action by greedily (with noise) picking from Q table
a = np.argmax(Q[s, :])
## Get new state and reward from environment
s1, r, d, _ = env.step(a)
## Update Q-Table with new knowledge
rAll += r
s = s1
if d is True:
break
print(
'Testing | Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}'.format(
i + 1, num_episodes, rAll,
time.time() - t0
)
)
a = np.argmax(Q[s, :] + np.random.randn(1, env.action_space.n) * (1. / (i + 1)))
IndexError: only integers, slices (
:
), ellipsis (...
), numpy.newaxis (None
) and integer or boolean arrays are valid indicesThe text was updated successfully, but these errors were encountered: