This repository has been archived by the owner on Oct 31, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 13
/
play_gym.py
executable file
·60 lines (53 loc) · 2.39 KB
/
play_gym.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env python
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import time
import argparse
import gym
import random
import getch
import rtfm.tasks
from rtfm.dynamics.monster import Player
from rtfm import featurizer as F
if __name__ == '__main__':
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--env', help='environment to use', default='rock_paper_scissors')
parser.add_argument('--height', type=int, help='room size', default=12)
parser.add_argument('--width', type=int, help='room size', default=22)
parser.add_argument('--seed', type=int, help='seed', default=42)
parser.add_argument('-p', '--partially_observable', action='store_true', help='only show partial observability')
parser.add_argument('-c', '--control', action='store_true', help='assume direct control')
parser.add_argument('-w', '--shuffle_wiki', action='store_true', help='shuffle facts in the wiki')
args = parser.parse_args()
featurizer = F.Concat([F.Progress(), F.ValidMoves(), F.Terminal()])
env = gym.make('rtfm:{}-v0'.format(args.env), featurizer=featurizer, partially_observable=args.partially_observable, room_shape=(args.height, args.width), shuffle_wiki=args.shuffle_wiki)
random.seed(args.seed)
score = total = 0
while True:
env.reset()
feat = reward = finished = won = None
while not finished:
if args.control:
ch = None
while ch not in Player.keymap:
print('Current score {} out of {}'.format(score, total))
print('Enter your command. x to quit.')
ch = getch.getch()
if ch == 'x':
import sys
sys.exit(0)
feat, reward, finished, won = env.step(Player.keymap[ch])
else:
feat, reward, finished, won = env.step(random.choice(Player.valid_moves))
time.sleep(1)
print(feat)
print('Reward = {}'.format(reward))
print('Finished = {}'.format(finished))
print('Won = {}'.format(won))
if finished:
score += won
total += 1
break