-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgridworld.py
136 lines (117 loc) · 4.01 KB
/
gridworld.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import numpy as np
def randPair(s,e):
return np.random.randint(s,e), np.random.randint(s,e)
#finds an array in the "depth" dimension of the grid
def findLoc(state, obj):
for i in range(0,4):
for j in range(0,4):
if (state[i,j] == obj).all():
return i,j
#Initialize stationary grid, all items are placed deterministically
def initGrid():
state = np.zeros((4,4,4))
#place player
state[0,1] = np.array([0,0,0,1])
#place wall
state[2,2] = np.array([0,0,1,0])
#place pit
state[1,1] = np.array([0,1,0,0])
#place goal
state[3,3] = np.array([1,0,0,0])
return state
#Initialize player in random location, but keep wall, goal and pit stationary
def initGridPlayer():
state = np.zeros((4,4,4))
#place player
state[randPair(0,4)] = np.array([0,0,0,1])
#place wall
state[2,2] = np.array([0,0,1,0])
#place pit
state[1,1] = np.array([0,1,0,0])
#place goal
state[1,2] = np.array([1,0,0,0])
a = findLoc(state, np.array([0,0,0,1])) #find grid position of player (agent)
w = findLoc(state, np.array([0,0,1,0])) #find wall
g = findLoc(state, np.array([1,0,0,0])) #find goal
p = findLoc(state, np.array([0,1,0,0])) #find pit
if (not a or not w or not g or not p):
#print('Invalid grid. Rebuilding..')
return initGridPlayer()
return state
#Initialize grid so that goal, pit, wall, player are all randomly placed
def initGridRand():
state = np.zeros((4,4,4))
#place player
state[randPair(0,4)] = np.array([0,0,0,1])
#place wall
state[randPair(0,4)] = np.array([0,0,1,0])
#place pit
state[randPair(0,4)] = np.array([0,1,0,0])
#place goal
state[randPair(0,4)] = np.array([1,0,0,0])
a = findLoc(state, np.array([0,0,0,1]))
w = findLoc(state, np.array([0,0,1,0]))
g = findLoc(state, np.array([1,0,0,0]))
p = findLoc(state, np.array([0,1,0,0]))
#If any of the "objects" are superimposed, just call the function again to re-place
if (not a or not w or not g or not p):
#print('Invalid grid. Rebuilding..')
return initGridRand()
return state
def makeMove(state, action):
#need to locate player in grid
#need to determine what object (if any) is in the new grid spot the player is moving to
player_loc = findLoc(state, np.array([0,0,0,1]))
wall = findLoc(state, np.array([0,0,1,0]))
goal = findLoc(state, np.array([1,0,0,0]))
pit = findLoc(state, np.array([0,1,0,0]))
state = np.zeros((4,4,4))
actions = [[-1,0],[1,0],[0,-1],[0,1]]
#e.g. up => (player row - 1, player column + 0)
new_loc = (player_loc[0] + actions[action][0], player_loc[1] + actions[action][1])
if (new_loc != wall):
if ((np.array(new_loc) <= (3,3)).all() and (np.array(new_loc) >= (0,0)).all()):
state[new_loc][3] = 1
new_player_loc = findLoc(state, np.array([0,0,0,1]))
if (not new_player_loc):
state[player_loc] = np.array([0,0,0,1])
#re-place pit
state[pit][1] = 1
#re-place wall
state[wall][2] = 1
#re-place goal
state[goal][0] = 1
return state
def getLoc(state, level):
for i in range(0,4):
for j in range(0,4):
if (state[i,j][level] == 1):
return i,j
def getReward(state):
player_loc = getLoc(state, 3)
pit = getLoc(state, 1)
goal = getLoc(state, 0)
if (player_loc == pit):
return -10
elif (player_loc == goal):
return 10
else:
return -1
def dispGrid(state):
grid = np.zeros((4,4), dtype= str)
player_loc = findLoc(state, np.array([0,0,0,1]))
wall = findLoc(state, np.array([0,0,1,0]))
goal = findLoc(state, np.array([1,0,0,0]))
pit = findLoc(state, np.array([0,1,0,0]))
for i in range(0,4):
for j in range(0,4):
grid[i,j] = ' '
if player_loc:
grid[player_loc] = 'P' #player
if wall:
grid[wall] = 'W' #wall
if goal:
grid[goal] = '+' #goal
if pit:
grid[pit] = '-' #pit
return grid