-
Notifications
You must be signed in to change notification settings - Fork 0
/
Control_Policy.py
500 lines (395 loc) · 19.3 KB
/
Control_Policy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 29 12:59:55 2021
@author: georg
%% uses an algorithmic, state based method to locate maxima within a 3D
matrix. To simulate the tactile exploration of an object
for an example, run find_highs_2D(nearest_empty_2D, stimulus, 1)
"""
# method is a function that operates on an actor (sensor) that is interacting
# with a stimulus (3D matrix) until a finishing condition (state of information) is
# reached
# a policy mpas all history to a new choice
# To do:
# Make predictor, i.e. predict the cell from current knowledge
# Govern control policy from certainty, rather than knowledge (mostly done)
#certainty should be a combination of integrating over the pdf and using neighbouring cells to predict
#the strength of how well cells can predict their neighbours can be trained over multiple data
# Make reward and "punishment" function
# This could be displayed in a matrix too - hot areas would be areas of interest (boundaries)
# is there a critical point for the sd at which one approach becomes more efficient?
# Information (in bits) I = log_2 (1/p) = -log_2(p) ----p is probability
# we can add bits of sequential information to get overall value for info in bits
#E(I) = sum p(x) * -log_2 (p)
#p is expectancy/ prob of measurement x
#E(I) is expected information from a measurement (entropy), where x is
#distribution of all possible outcomes of the measurement
#Consider greedy (next step) vs global
#Wordle https://www.youtube.com/watch?v=v68zYyaEmEA
import numpy as np
import scipy.integrate as integrate
import pygame
from time import sleep as sleep
import imageio
#import png as rgb array
stimulus = imageio.imread('2D_stim_no_lines_40.png')
# generate random stimulus:
#stimulus = np.random.randint(5,45,size=(10,10))
#knowledge = np.zeros([50,50])
#state = Sensor_state
#state.x = 25
#state.y = 25
# example:
def find_highs(method, stimulus, render):
# render the original input
# render the exploration/return values
# apply method to stimulus
return locs, time
# trial with 2D, i.e. Minesweeper
res_m = 4 # resolution multiplier
def find_highs_2D(method, stimulus, render):
sol_space = np.shape(stimulus)
#print(sol_space)
res_m = 4 # resolution multiplier
# obtained values, third index is the 4 RGBA colour, so doesdn't multiply
knowledge = np.ones((sol_space[0]*res_m, sol_space[1]*res_m, sol_space[2]))*-1
# how certain we can be that the knowledge represents the stimulus.
# e.g. if 8 values are taken that tightly agree with each other, we don't
# need the other 8
# certainty should be inversely proportional to the range of the 95% CI
certainty = {}
certainty = np.zeros((sol_space[0], sol_space[1], sol_space[2]))
#print(certainty)
'''
for i in range(0,sol_space[0]):
for j in range(0, sol_space[1]):
certainty[i,j] = [0, 0, 0]
'''
#certainty = [[0,0,0]]*(sol_space)
state = Sensor_state
state.x = round(sol_space[0]*res_m/2)
state.y = round(sol_space[1]*res_m/2)
#override
#state.x = 28
cell_size = 8
##### details to visualise the interaction ###########
# render the original input
background_colour = (255,255,255)
(width, height) = (((sol_space[0]+3)*5)*cell_size, sol_space[1]*cell_size)
screen = pygame.display.set_mode((width, height))
screen.fill(background_colour)
pygame.display.set_caption('Rendering of knowledge')
#dictionary
cell={}
for i in range(sol_space[0],sol_space[0]+sol_space[0]):
for j in range(sol_space[1]):
cell[i,j] = Particle(((i+3)*cell_size, j*cell_size), cell_size, stimulus[i-sol_space[0],j])#[0, 0, 0])
#cell[i,j].update(5*stimulus[i-sol_space[0],j])
cell[i,j].display(screen)
pygame.display.flip()
for event in pygame.event.get():
if event.type == pygame.QUIT:
running = False
#######################################################
# apply method to stimulus
while(1):
(state, knowledge, certainty) = method(state, knowledge, stimulus, certainty, res_m)
#### Render the current knowledge state
for i in range(0,res_m):
for j in range(0,res_m):
# make sure we're not looking outside the region
if state.x + i >= 0 and state.x + i < res_m*sol_space[0] and state.y + j >= 0 and state.y + j < res_m*sol_space[1]:
if knowledge[state.x+i,state.y+j][0] != -1:
cell[state.x+i,state.y+j] = Particle(((state.x+i)*cell_size/res_m, (state.y+j)*cell_size/res_m), cell_size/res_m, knowledge[state.x+i,state.y+j])
#cell[state.x,state.y].update(5*knowledge[state.x,state.y])
cell[state.x+i,state.y+j].display(screen)
#pygame.display.flip()
#sleep(0.01)
#### Render the certainty state, black is known, green is implied,
# white is unknown
#space = np.shape(stimulus)
for i in [2*sol_space[0]+int(state.x/res_m) - 1,
2*sol_space[0]+int(state.x/res_m),
2*sol_space[0]+int(state.x/res_m) + 1]:
for j in [int(state.y/res_m)-1,
int(state.y/res_m),
int(state.y/res_m)+1]:
# don't draw outside the area
if i >= 3*sol_space[0] or j >= sol_space[1] or i < 2*sol_space[0] or j < 0:
pass
#print("passing")
#else:
else:
#print("not passing")
#print(cell_size*int(2*(sol_space[0]+3)+(i+state.x/res_m)), cell_size*int(j/res_m))
cell[i,j] = Particle(((i+6)*cell_size,j*cell_size),#(2*(res_m*(sol_space[0]+3)) + cell_size*i,
# cell_size*int(j)),
cell_size,
certainty[i-2*sol_space[0],j])
#print(certainty[i-2*sol_space[0],j])
cell[i,j].display(screen)
#### Render the averaged colour for each area
# - this redraws the whole space each iteration, could be more efficient
#for i in range(sol_space[0]):
# for j in range(sol_space[1]):
for i in [int(state.x/res_m), int(state.x/res_m)]:
for j in [int(state.y/res_m), int(state.y/res_m)]:
### get average over the values
total = 0
count = 0
colour = 0
#merge the data that lands in the same cell
for ind1 in range(i*res_m, (i+1)*res_m):
for ind2 in range(j*res_m, (j+1)*res_m):
if knowledge[ind1,ind2][0] != -1:
total = total + knowledge[ind1,ind2]
count = count + 1
if count !=0:
colour = total/count
#print(total)
#print(colour)
cell[i,j] = Particle(((i+3*(sol_space[0]+3))*cell_size, j*cell_size), cell_size, colour[0:3])
#cell[i,j].update(5*colour)
cell[i,j].display(screen)
#### Render the error between averaged and truth
# - this redraws the whole space each iteration, could be more efficient
#for i in range(sol_space[0]):
# for j in range(sol_space[1]):
for i in [int(state.x/res_m), int(state.x/res_m)]:
for j in [int(state.y/res_m), int(state.y/res_m)]:
### get average over the values
total = 0
count = 0
colour = 0
for ind1 in range(i*res_m, (i+1)*res_m):
for ind2 in range(j*res_m, (j+1)*res_m):
if knowledge[ind1,ind2][0] !=-1:
total = total + knowledge[ind1,ind2]
count = count + 1
if count !=0:
colour = abs(stimulus[i,j] - total/count)#*5
cell[i,j] = Particle(((i+4*(sol_space[0]+3))*cell_size, j*cell_size), cell_size, colour[0:3])
#print(colour[0:3])
#cell[i,j].update(5*colour)
cell[i,j].display(screen)
pygame.display.flip()
# pause so that it's visible
#sleep(0.05)
Finishing_condition = np.count_nonzero(knowledge==-1) < 3
if Finishing_condition:
break
return (state, knowledge)
class Sensor_state:
x = 0
y = 0
z = 0
rot_x = 0
rot_y = 0
rot_z = 0
reading = 0
######### 2D methods ####################################
# to draw the changing knowledge and stimulus
# This class represents a single cell to render on screen
class Particle:
def __init__(self, position, size, colour): # ,colour
self.x, self.y = position
self.size = size
self.colour = (255-colour[0], 255-colour[1], 255-colour[2]) # , make this a function of colour
self.thickness = 0
def display(self, screen):
pygame.draw.rect(screen, self.colour, ((self.x, self.y), (self.size, self.size))) #, self.size, self.thickness
def update(self, R):
#print(R)
#self.colour = (5*R, 0, 255 - 5*R)
self.colour = (R, 0, 255 - R)
#############################
#def method2D(state, knowledge, stimulus):
#
# knowledge[state.x, state.y, state.z] = stimulus[state.x, state.y, state.z]
#
# return (knowledge, state)
# if Finishing_condition:
# return(state)
# else:
# method2D(state, knowledge, stimulus)
def nearest_empty_2D(state, knowledge, stimulus, certainty, res_m):
(state.x, state.y) = find_nearest_empty(state, knowledge, 0)
sd = 50
measurement=[0,0,0,255]
for i in [0, 1, 2]:
measurement[i] = np.random.normal(loc=stimulus[int(state.x/res_m), int(state.y/res_m)][i], scale = sd)
if measurement[i] < 0:
measurement[i] = 0
if measurement[i] > 255:
measurement[i] = 255
knowledge[state.x, state.y] = measurement
#knowledge[state.x, state.y] = stimulus[int(state.x/res_m), int(state.y/res_m)]
for x in [int(state.x/res_m)-1, int(state.x/res_m), int(state.x/res_m)+1]:
for y in [int(state.y/res_m)-1, int(state.y/res_m), int(state.y/res_m)+1]:
space = np.shape(stimulus)
# don't draw outside the area
if x >= space[0] or y >= space[1] or x < 0 or y < 0:
pass
# update the current location equally
elif x == int(state.x/res_m) and y == int(state.y/res_m):
#print(certainty[x,y])
#print(x, y)
certainty[x, y] = [certainty[x, y][0]+255/(2*res_m*res_m),
certainty[x, y][1]+255/(2*res_m*res_m),
certainty[x, y][2]+255/(2*res_m*res_m),
1]
# update the neighbours with a green tint
else:
#print(certainty[x, y])
#print(certainty[x, y][0])
#print(certainty[x, y][1]+255/(32*res_m*res_m))
#print(certainty[x, y][2])
certainty[x, y] = [certainty[x, y][0],
certainty[x, y][1]+255/(32*res_m*res_m),
certainty[x, y][2],
1]
# also colour its neighbours (certainty)
return (state, knowledge, certainty)
#subsidary function
def find_nearest_empty(state, knowledge, threshold, certforknow=0):
size = np.shape(knowledge)
loc = [state.x, state.y]
#search in rings, dist is 1 off because of index 0
dist_to_edge = max(state.x, state.y, size[0]-1-state.x, size[1]-1-state.y)
# quick algorithm to find the coords of nearest nodes in ascending order
radii = []
for i in range(0, dist_to_edge+1): # to edge
for j in range(0, i+1):
radii.append(np.sqrt(pow(i,2)+pow(j,2)))
radii.sort()
nodes = []
# check for unexplored places in each radius
for rad in radii:
# list integers up to this distance: x cannot be more than the radius
x = range(0, int(rad + 1));
for v in x:
y = np.sqrt(pow(rad,2) - pow(v,2))
# discount tiny computational error, then see if the answer is an integer
if abs(y - round(y)) < pow(10, -10):
nodes.append((v, int(round(y))))
for i in [-v, v]:
if loc[0]+i < size[0] and loc[0]+i >= 0:
for j in [int(round(y)), -int(round(y))]:
#if it is within range
if loc[1]+j < size[1] and loc[1]+j >= 0:
#print(loc[0]+i, loc[1]+j)
#print(knowledge[loc[0]+i, loc[1]+j][0:3])
#print(sum(knowledge[loc[0]+i, loc[1]+j][0:3]))
#print(threshold)
#if sum(knowledge[loc[0]+i, loc[1]+j][0:3])/3 > threshold:
# print("no", loc[0]+i, loc[1]+j)
if sum(knowledge[loc[0]+i, loc[1]+j][0:3])/3 < threshold:
# print(knowledge[loc[0]+i, loc[1]+j][0:3])
# print("yes", loc[0]+i, loc[1]+j)
return (loc[0]+i, loc[1]+j)
# includes a certainty matrix, which increases as repeat measurements are
# gathered over the same region, particularly if they are in accordance
# perhaps the BAyes approach could focus on regions, and group 'certain' regions
# as one, to minimise computation
# we also want to render the certainty matrix
def Bayesian_2D(state, knowledge, stimulus, certainty, res_m):
(state.x, state.y) = (int(state.x/res_m), int(state.y/res_m))
(state.x, state.y) = find_nearest_empty(state, certainty, 0.6*255, 1)
(state.x, state.y) = (state.x*res_m, state.y*res_m)
#knowledge[state.x, state.y] = stimulus[int(state.x/res_m), int(state.y/res_m)]
sd = 50
measurement=[0,0,0,255]
for i in [0, 1, 2]:
#take measurement with some gaussian noise
measurement[i] = np.random.normal(loc=stimulus[int(state.x/res_m), int(state.y/res_m)][i], scale = sd)
if measurement[i] < 0:
measurement[i] = 0
if measurement[i] > 255:
measurement[i] = 255
#fill in the next available knowledge "slot"
done = 0
for i in range(0,res_m):
for j in range(0,res_m):
if knowledge[state.x+i, state.y+j][0] == -1 and done == 0:
knowledge[state.x+i, state.y+j] = measurement
done = 1
#look in the neighbouring region
for x in [int(state.x/res_m)-1, int(state.x/res_m), int(state.x/res_m)+1]:
for y in [int(state.y/res_m)-1, int(state.y/res_m), int(state.y/res_m)+1]:
space = np.shape(stimulus)
# don't draw outside the area
if x >= space[0] or y >= space[1] or x < 0 or y < 0:
pass
# update the certainty at the current location equally
elif x == int(state.x/res_m) and y == int(state.y/res_m):
#this should be a combination of integrating over the pdf and
#using neighbouring cells to predict
# - the strength of how well cells can predict their neighbours
# can be trained over multiple data
certainty[x, y] = [certainty[x, y][0]+255/(2*res_m*res_m),
certainty[x, y][1]+255/(2*res_m*res_m),
certainty[x, y][2]+255/(2*res_m*res_m),
255]
# update the neighbours with a green tint
else:
certainty[x, y] = [certainty[x, y][0],
certainty[x, y][1]+255/(32*res_m*res_m),
certainty[x, y][2],
255]
# also colour its neighbours (certainty)
# Choose an action to minimise entropy of belief
# beleif = P(O|h)
# (intent parameter | history)
# new belief = P(b'|y,a,b)
# new belief = (b'|latest info,latest choice,old beleif)
# What if my reward was guessing the stiffness/response, with diminishing
# returns as the measurements are repeated?
# Certainty is some function of number of measurements and s.d. between them
# Perhaps if a ceiling number of measurements is ascertained, then the
# current level of uncertainty can be compared to that. e.g. 20 readings
# always results in a "true" value, so the s.d. can be calculated knowing
# this
# Find the expected shape of the Bell-curve. Take few measurements, find
# the likliest COM/mean from these measurements
# Assuming a bell/gaussian curve:
# certainty[int(state.x/res_m), int(state.y/res_m)] =
'''
sigma = 10
mean = 50
# x axis is assumed density
(1/(sigma*sqrt(2*pi)))*exp(-((x-mean)/sigma)^2/2)
'''
return (state, knowledge, certainty)
# subsidiary function
def Gaussian_dist(x):
avg = 0 #avg doesn't need to be included - this is just for consistency with gaussian
sd = 10
prob_dens = (1/(sd*np.sqrt(2*np.pi)))*np.exp(-(((x-avg)/sd)**2)/2)
return prob_dens
# integrates over the tolerance to return the liklihood that the current value
# is within tolerance
def prob_from_gauss(tolerance, measurements):
sd = 10
cert = integrate.quad(lambda x: pow(Gaussian_dist(x),measurements)/ \
(integrate.quad(lambda x: pow(Gaussian_dist(x),measurements),\
-5*sd, 5*sd)[0]), \
-tolerance, tolerance)
return cert
######### 3D methods ####################################
# This method rewards itself when it correctly predicts the reaction from an
# unexplored region of the stimulus.
# The Bayesian/calculated matrix should have a finer level of detail
# (Nyquist-Shannon <f/2) than the resolution that is desired.
# It explores to minimise entropy in its belief over the whole stimulus.
# Belief function is initialised to have maximum entropy (min information).
# Finishing condition can be one of:
# minimum reward expected for any unknown region reaches threshold
# the region with max entropy reaches a minimum
def Bayes_Approach(state, knowledge, stimulus):
# selects and executes a movement, generating a new state, knowledge
if Finishing_condition:
return(state)
else:
Bayes_Approach(state, knowledge, stimulus)
# method moves a character in a worldstate. It has a choice of moving (1
# square) or "palpating". ------ This is similar to clicking in minesweeper(?)