forked from algolab-inc/tf-dqn-simple
-
Notifications
You must be signed in to change notification settings - Fork 0
/
catch_ball.py
79 lines (65 loc) · 2.16 KB
/
catch_ball.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import os
import numpy as np
class CatchBall:
def __init__(self):
# parameters
self.name = os.path.splitext(os.path.basename(__file__))[0]
self.screen_n_rows = 8
self.screen_n_cols = 8
self.player_length = 3
self.enable_actions = (0, 1, 2)
self.frame_rate = 5
# variables
self.reset()
def update(self, action):
"""
action:
0: do nothing
1: move left
2: move right
"""
# update player position
if action == self.enable_actions[1]:
# move left
self.player_col = max(0, self.player_col - 1)
elif action == self.enable_actions[2]:
# move right
self.player_col = min(self.player_col + 1, self.screen_n_cols - self.player_length)
else:
# do nothing
pass
# update ball position
self.ball_row += 1
# collision detection
self.reward = 0
self.terminal = False
if self.ball_row == self.screen_n_rows - 1:
self.terminal = True
if self.player_col <= self.ball_col < self.player_col + self.player_length:
# catch
self.reward = 1
else:
# drop
self.reward = -1
def draw(self):
# reset screen
self.screen = np.zeros((self.screen_n_rows, self.screen_n_cols))
# draw player
self.screen[self.player_row, self.player_col:self.player_col + self.player_length] = 1
# draw ball
self.screen[self.ball_row, self.ball_col] = 1
def observe(self):
self.draw()
return self.screen, self.reward, self.terminal
def execute_action(self, action):
self.update(action)
def reset(self):
# reset player position
self.player_row = self.screen_n_rows - 1
self.player_col = np.random.randint(self.screen_n_cols - self.player_length)
# reset ball position
self.ball_row = 0
self.ball_col = np.random.randint(self.screen_n_cols)
# reset other variables
self.reward = 0
self.terminal = False