This repository has been archived by the owner on Feb 17, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Game Theory Simulations.Rmd
1478 lines (1231 loc) · 57.3 KB
/
Game Theory Simulations.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
---
title: "Game Theory Simulations"
author: "K. C. Enevoldsen & P. Waade"
date: "11/24/2018"
output: html_document
editor_options:
chunk_output_type: console
---
#Very Nice to do:
- Is it called internal states or hidden states?
#Nice to do:
- Flag for Noisy - making the gradient prior = 1 for bias
- Perturb Learning
- Dilution/forgetting
- Add output/input/function (e.g. what it does) to all functions
- add "(Not an error)" to messages which might be misconstrued as an error message
- add the recursive preperation function to be applied at default if not specified to the k_tom function
- test whether the compete function works without hidden states specified
- Do evolution
- Add a reinforcement learning agent
- Consider if we can save p_op_1_k and use that instead of p_op_1_k_approx
- Consider if we can use the decision functions using volatility (see if the 1=0.36 works with other volatility values)
- Handedness/Bias
- Save in compete function should be a list operation
#Packages and WD
```{r setup, include=FALSE}
# !diagnostics off
#^the above argument prevents the mulitple 'unknown column' warnings (harmless warnings)
knitr::opts_chunk$set(echo = TRUE)
#devtools::install_github("thomasp85/patchwork")
pacman::p_load(pacman, plyr, tidyverse, raster, reshape2, knitr, brms, boot, rethinking, groupdata2, patchwork, RColorBrewer)
```
#Function for fetching payoff-matrices
```{r payoff matrix}
fetch_p_matrix <- function(game, custom = c(0,0,0,0,0,0,0,0)) {
#Makes a payoff matrix for use in the simulation.
#String commands for getting pre-made matrices:
#"staghunt" - the stag hunt game
#"penny_cooperative" - a cooperative penny matching game
#"penny_competitive" - a competitive penny matching game
#"prisoners_dilemma" - the prisoner's dilemma game
#"party" - the party dilemma game
#"sexes" - the battle of the sexes game
#"chicken" - the chicken game
#"deadlock" - a deadlock game
#if the string "custom" is used, a custom matrix can be specified by another argument custom = c(a,b,c,d,e,f,g,h), where each number is an entry in the payoff matrix
if (game == "staghunt") {
p_matrix <-
data_frame(
p1_choice = c(1, 0, 1, 0),
p2_choice = c(1, 1, 0, 0),
p1_reward = c(5, 3, 0, 3),
p2_reward = c(5, 0, 3, 3),
game_title = c(game,
NA,NA,NA))
return(p_matrix)
} else if (game == "penny_cooperative") {
p_matrix <-
data_frame(
p1_choice = c(1, 0, 1, 0),
p2_choice = c(1, 1, 0, 0),
p1_reward = c(1, -1, -1, 1),
p2_reward = c(1, -1, -1, 1),
game_title = c(game,
NA,NA,NA))
return(p_matrix)
} else if (game == "penny_competitive") {
p_matrix <-
data_frame(
p1_choice = c( 1, 0, 1, 0),
p2_choice = c( 1, 1, 0, 0),
p1_reward = c(-1, 1, 1, -1),
p2_reward = c( 1, -1, -1, 1),
game_title = c(game,
NA,NA,NA))
return(p_matrix)
} else if (game == "prisoners_dilemma") {
p_matrix <-
data_frame(
p1_choice = c(1, 0, 1, 0),
p2_choice = c(1, 1, 0, 0),
p1_reward = c(2, 3,-1, 0),
p2_reward = c(2,-1, 3, 0),
game_title = c(game,
NA,NA,NA))
return(p_matrix)
} else if (game == "party") {
p_matrix <-
data_frame(
p1_choice = c(1, 0, 1, 0),
p2_choice = c(1, 1, 0, 0),
p1_reward = c(10, 0, 0, 5),
p2_reward = c(10, 0, 0, 5),
game_title = c(game,
NA,NA,NA))
return(p_matrix)
} else if (game == "sexes") {
p_matrix <-
data_frame(
p1_choice = c(1, 0, 1, 0),
p2_choice = c(1, 1, 0, 0),
p1_reward = c(5, 0, 0, 10),
p2_reward = c(10, 0, 0, 5),
game_title = c(game,
NA,NA,NA))
return(p_matrix)
} else if (game == "chicken") {
p_matrix <-
data_frame(
p1_choice = c(1, 0, 1, 0),
p2_choice = c(1, 1, 0, 0),
p1_reward = c(0, -1, 1, -1000),
p2_reward = c(0, 1, -1, -1000),
game_title = c(game,
NA,NA,NA))
return(p_matrix)
} else if (game == "deadlock") {
p_matrix <-
data_frame(
p1_choice = c(1, 0, 1, 0),
p2_choice = c(1, 1, 0, 0),
p1_reward = c(2, 0, 3, 1),
p2_reward = c(2, 3, 0, 1),
game_title = c(game,
NA,NA,NA))
return(p_matrix)
} else if (game == "custom") {
p_matrix <-
data_frame(
p1_choice = c(1, 0, 1, 0),
p2_choice = c(1, 1, 0, 0),
p1_reward = c(custom[1], custom[3], custom[5], custom[7]),
p2_reward = c(custom[2], custom[4], custom[6], custom[8]),
game_title = c(game,
NA,NA,NA))
return(p_matrix)
}
}
```
#General functions
```{r general functions}
#Utility function
U <- function(c_self, c_op, player, p_matrix){
#Returns the reward for a player given a payoff matrix and choices
#INPUT
#c_self: player's own choice
#c_op: opponent's choice
#player: which side of the payof matrix is used. 0: first player, 1: second player
#p_matrix: a given 2-by-2 payoff matrix
#OUTPUT
#The reward for the specified player
# get values from payoff matrix for player 1
a_1 <- p_matrix$p1_reward[p_matrix$p1_choice == 1 & p_matrix$p2_choice == 1] #value if both choose 1
b_1 <- p_matrix$p1_reward[p_matrix$p1_choice == 1 & p_matrix$p2_choice == 0] #value if self chooses 1 and opponent chooses 0
c_1 <- p_matrix$p1_reward[p_matrix$p1_choice == 0 & p_matrix$p2_choice == 1] #value if self chooses 0 and opponent chooses 1
d_1 <- p_matrix$p1_reward[p_matrix$p1_choice == 0 & p_matrix$p2_choice == 0] #value if both choose 0
# get values from payoff matrix for player 2
a_2 <- p_matrix$p2_reward[p_matrix$p1_choice == 1 & p_matrix$p2_choice == 1] #value if both choose 1
b_2 <- p_matrix$p2_reward[p_matrix$p1_choice == 0 & p_matrix$p2_choice == 1] #value if self chooses 1 and opponent chooses 0
c_2 <- p_matrix$p2_reward[p_matrix$p1_choice == 1 & p_matrix$p2_choice == 0] #value if self chooses 0 and opponent chooses 1
d_2 <- p_matrix$p2_reward[p_matrix$p1_choice == 0 & p_matrix$p2_choice == 0] #value if both choose 0
# calculate reward
reward <-
(1-player) * #for player 1
(c_self * c_op * a_1 + #if both choose 1
c_self * (1 - c_op) * b_1 + #if you choose 1 and opponent chooses 0
(1 - c_self) * c_op * c_1 + #if you choose 0 and the opponent chooses 1
(1 - c_self) * (1 - c_op) * d_1 #if both choose 0
) +
player * #for player 2
(c_self * c_op * a_2 + #if both choose 1
c_self * (1 - c_op) * b_2 + #if you choose 1 and opponent chooses 0
(1 - c_self) * c_op * c_2 + #if you choose 0 and the opponent chooses 1
(1 - c_self) * (1 - c_op) * d_2 #if both choose 0
)
return(reward)
}
#Expected payoff
expected_payoff_difference <- function(p_op_1, player, p_matrix) {
#Returns the value difference between choosing 1 and 0 over opponent chocie scenarios, each scenario weighted by the probability of the opponent's choice
#INPUT
#p_op_1: estimated choice probability of the opponent
#player: the current player
#p_matrix: a given payoff matrix
#OUTPUT
#An expected payoff difference for 1
e_payoff_dif <-
p_op_1 * (U(1, 1, player, p_matrix) - U(0, 1, player, p_matrix)) +
(1 - p_op_1) * (U(1, 0, player, p_matrix) - U(0, 0, player, p_matrix))
return(e_payoff_dif)
}
#Softmax function
softmax <- function(e_payoff_diff, behavioural_temperature){
#Returns a choice probability for 1 given an expected payoff
#INPUT
#e_payoff_diff: an expected payoff difference for 1
#behavioural_temperature: a randomizing temperature parameter
#OUTPUT
#A choice probability for 1
#prepare behavioural temperature
behavioural_temperature <- exp(behavioural_temperature)
#Calculate probability of choosing 1
p_self_1 <- 1 / (1 + exp(-(e_payoff_diff / behavioural_temperature)))
#Make an upper bound on softmax's output to avoid getting infinite values when transforming to logodds
if (p_self_1 > 0.999) {
p_self_1 = 0.999
}
#Similarily, make a lower bound
if (p_self_1 < 0.001) {
p_self_1 = 0.001
}
return(p_self_1)
}
```
#Simple Agents functions
```{r simple agents}
#A Human Player
PlaySelf <- function(params = NULL, hidden_states = NULL, player = NULL, p_matrix = NULL, choice_self = NULL, choice_op = NULL, return_hidden_states = F, messages = T){
#Lets a human player make a choice
#INPUT
#player: the current player role
#p_matrix: the current payoff matric
#choice_self: player's own choice on last round
#choice_op: opponent's choice last round
#OUTPUT
#player's choice
if (messages){
#Responds from last round
if (is.null(choice_op) == F){ #if there was a previous round
#Print the choices and rewards form last round
message(paste("Last round you chose: ", choice_self,
". Your opponent chose: ", choice_op,
". Your score was: ", U(choice_self, choice_op, player, p_matrix),
".", sep = ""))
#Remind how to see the payoff matrix
message("If you want to print the payoff matrix again, respond 'print'")
} else {
#Say which player you are
message(paste("You are player", player + 1))
#Fetch the name if the game and say it
game_string <-p_matrix$game_title[1]
game_string <- str_replace(game_string, "_matrix", "")
message(paste("You are currently playing the ", game_string, " game.",sep = ""))
#Show the payoff matrix
message("The following is the payoff matrix for this game:")
print(p_matrix[1:4])
}
}
choice = NA
while(choice %in% c(1, 0) == F){ #waiting for player to make a choice
if (choice %in% c("print", "Print", "'print'")){ #If they write print
#Fetch the name if the game and say it
game_string <-p_matrix$game_title[1]
game_string <- str_replace(game_string, "_matrix", "")
message(paste("You are currently playing the ", game_string, " game.",sep = ""))
#Say which player the human is
message(paste("You are player", player + 1))
#Print the payoff matrix
message("The following is the payoff matrix for this game:")
print(p_matrix[1:4])
} else if (is.na(choice) == F) { #If they write something that doesn't make sense
message("invalid response, please try again.")
}
#Get choice from player
choice <- readline(prompt = "What do you choose this round? Respond 1 or 0: ")
}
if (return_hidden_states == T){
return(list(choice = as.numeric(choice), hidden_states = hidden_states))
} else {
return(as.numeric(choice))
}
}
#Random Choice with a Bias
RB <- function(params, hidden_states = NULL, player = NULL, p_matrix = NULL, choice_self = NULL, choice_op = NULL, return_hidden_states = F){
#RB: A Random Bias strategy choice. Selects randomly with a given probability
#INPUT
#params: a list of 1 element, RB's choice probability parameter
#OUTPUT
#RB's choice
#randomly select rabbit or stag (with a slight bias)
choice <- rbinom(1, 1, prob = params$prop)
if (return_hidden_states == T){
return(list(choice = choice, hidden_states = hidden_states))
} else {
return(choice)
}
}
#Tit for Tat
TFT <- function(params, hidden_states = NULL, player = NULL, p_matrix = NULL, choice_self, choice_op, return_hidden_states = F) {
#A probabilistic Tit for Tat strategy. Copies the opponent's last choice with a given probability.
#INPUT
#params: list of 1 element: TFT's choice probability parameter
#OUTPUT
#TFT's choice
#The probability of TFT copying opponent's choice
copy_prob = params$copy_prob
if (is.null(choice_op)) { #initial round or missed trial
choice <- rbinom(1, 1, 0.5) #make random choice
} else {
#Decide whether TFT copies opponent
copy = rbinom(1, 1, copy_prob)
#Calculate resulting choice
choice = copy*choice_op + (1-copy)*(1-choice_op)
}
if (return_hidden_states == T){
return(list(choice = choice, hidden_states = hidden_states))
} else {
return(choice)
}
}
###Win-Stay-Loose-Switch
WSLS <- function(params, hidden_states = NULL, player, p_matrix, choice_self, choice_op, return_hidden_states = F){
#A probabilistic Win-stay Loose-shift strategy. Copies itself if it won last round, does the opposite if it lost, both with given probabilities
#INPUT
#params: a list of two elements, WSLS's win stay probability parameter and loose shift probability parameter
#OUTPUT
#WSLS's choice
if (is.null(choice_op)) { #initial round or missed trial
choice <- rbinom(1, 1, 0.5) #make random choice
} else {
#Read in parameters
stay_prob <- params$stay_prob
switch_prob <- params$switch_prob
#Read in score from last round
prev_reward <- U(choice_self, choice_op, player, p_matrix)
#Calculate the mean of all possible rewards for current player
mean_reward <- (1 - player) * mean(p_matrix$p1_reward) + player * mean(p_matrix$p2_reward)
#Calculate choice
if (prev_reward > mean_reward) { #if the agent won, i.e. got more than mean amount of points
stay <- rbinom(1, 1, stay_prob) #decide whether agent stays
choice <- stay * choice_self + (1-stay) * (1-choice_self) #staying -> same choice as last
} else if (prev_reward < mean_reward) { #if the agent lost, i.e. got less than mean score
switch <- rbinom(1, 1, switch_prob) #decide whether agent switches
choice <- switch * (1-choice_self) + (1-switch) * choice_self #switching -> opposite choice of last
} else if (prev_reward == mean_reward) { #if the agent got mean score
choice <- rbinom(1, 1, 0.5) #make random choice
}
}
if (return_hidden_states == T){
return(list(choice = choice, hidden_states = hidden_states))
} else {
return(choice)
}
}
```
#K-ToM functions
Learning function - 0ToM estimate update
```{r}
basic_variance_update <- function(prev_hidden_states, params) {
#0-ToM updates the variance of its parameter estimate
#INPUT
#prev_hidden_states: a list structure containing the states from last round
#params: a list structure containing 0-ToM's volatility parameter
#OUTPUT
#An updated estimate variance
volatility <- params$volatility #the volatility parameter reduces learning, assuming that there is noise in the opponents decisions
prev_variance_basic <- prev_hidden_states$own_hidden_states$variance_basic #the uncertainty of opponent probability
prev_mean_basic <- prev_hidden_states$own_hidden_states$mean_basic #the mean estimate of opponent probability
#prepare volatility
volatility <- exp(volatility)
#prepare variance
prev_variance_basic <- exp(prev_variance_basic)
#calculate the new variance
variance_basic <-
1 / (
(1 / (volatility + prev_variance_basic)) +
inv.logit(prev_mean_basic) * (1 - inv.logit(prev_mean_basic)))
#logistic transform
variance_basic <- log(variance_basic)
return(variance_basic)
}
basic_mean_update <- function(prev_hidden_states, choices, variance_basic) {
#0-ToM updates the mean of its parameter estimate
#INPUT
#prev_hidden_states: a list structure containing the states from last round
#choices: a vector of [1] own choice and [2] opponent's choice from last round
#variance_basic: the updated estimate variance
#OUTPUT
#An updated mean estimate
prev_c_op <- choices[2] #opponent's choice
prev_mean_basic <- prev_hidden_states$own_hidden_states$mean_basic #the uncertainty of opponent probability
variance_basic #the uncertainty of opponent probability
#prepare variance
variance_basic <- exp(variance_basic)
#calculate the new mean
mean_basic <- prev_mean_basic + variance_basic * (prev_c_op - inv.logit(prev_mean_basic))
return(mean_basic)
}
```
Learning function - update p(k)
```{r}
p_op_1_k_approx_fun <- function(prev_hidden_states, level){
#Approximates the estimated choice probability of the opponent on the previous round. A semi-analytical approximation derived in Daunizeau, J. (2017)
#INPUT
#prev_hidden_states: a list structure containing the states from last round
#level: k-ToM's sophistication level
#OUTPUT
#An approximation of the estimated choice probability of last round.
#input
#for each sophistication level
prev_mean <- prev_hidden_states$own_hidden_states$mean # mean of opponent probability estimation VECTOR
prev_variance <- prev_hidden_states$own_hidden_states$variance # the variance for each estimated parameter MATRIX
prev_gradient <- prev_hidden_states$own_hidden_states$gradient # gradients for each parameter MATRIX
#constants
a <- 0.205
b <- -0.319
c <- 0.781
d <- 0.870
#prepare variance
prev_variance_prepared <- NULL #make empty list
#variance is exponated, gradient is squared
for (level_index in 1:level) { #for each level
#matrix-mutliply the transposed variances on the gradient. This gives a single value
prev_variance_prepared[level_index] <- t(exp(prev_variance[level,])) %*% prev_gradient[level,]^2
}
#calculate estimated probability of opponent choice
p_op_1_k_approx <-
inv.logit((prev_mean + b * prev_variance_prepared^c) / sqrt(1 + a * prev_variance_prepared^d))
#log-transform
p_op_1_k_approx <- log(p_op_1_k_approx)
return(p_op_1_k_approx)
}
update_pk <- function(prev_hidden_states, choices, p_op_1_k_approx){
#Updates k-ToM's estimated probability of the opponent having each possible sophistication level.
#INPUT:
#prev_hidden_states: A list structure containing the states of last trial
#choices: a vector of [1] own choice and [2] opponent's choice from last round
#p_op_1_k_approx: an approximated opponent choice probability form last round
#OUTPUT
#The updated level probabilities
#input
prev_c_op <- choices[2] #opponent's choice
#for each sophistication level
prev_p_k <- prev_hidden_states$own_hidden_states$p_k # probability of sophistication level k VECTOR
p_op_1_k_approx # probability of opponent choosing 1, approximated semi-analytically VECTOR
#prepare probability
p_op_1_k_approx <- exp(p_op_1_k_approx)
#calculate probability of each possible sophistication level
p_k <-
prev_c_op* #if opponent chose 1
((prev_p_k*p_op_1_k_approx)/sum(prev_p_k*p_op_1_k_approx)) +
(1-prev_c_op)* #if opponent chose 0
(prev_p_k*(1-p_op_1_k_approx)/sum(prev_p_k*(1-p_op_1_k_approx)))
if (length(p_k) > 1) { #if higher than a 1-ToM
#Force 0-ToM probability so that the probabilities sum to 1
p_k[1] <-
1 - sum(p_k[2:length(p_k)])
}
return(p_k)
}
```
Learning function - updating parameter estimates
```{r}
parameter_variance_update <- function(prev_hidden_states, params, p_k) {
#k-ToM updates the variance of its parameter estimates
#INPUT
#prev_hidden_states: a list strucutre containing the states from the last round
#params: a list structure containing k-ToM's volatility parameter and a dummy parameter which decides which parmaeter estimates are affected by volatility
#p_k: a vector of level probabilities
#OUTPUT
#Updated variances of k-ToM's parameter estimates
#input
volatility <- params$volatility
volatility_dummy <- params$volatility_dummy #dummy parable flags which parameters are affected by volatility
#for each k:
prev_param_mean <- prev_hidden_states$own_hidden_states$param_mean #the mean for each estimated parameter MATRIX
prev_variance <- prev_hidden_states$own_hidden_states$variance #the uncertainty for each estimated parameter MATRIX
prev_gradient <- prev_hidden_states$own_hidden_states$gradient #the gradient for each estimated parameter MATRIX
p_k #the probability of sopistication level k VECTOR
#prepare volatility
volatility <- exp(volatility)*volatility_dummy
#prepare variance
prev_variance <- exp(prev_variance)
#calculate new variance
variance <-
1 /
(1 / (prev_variance + volatility) +
p_k *
inv.logit(prev_param_mean) * (1 - inv.logit(prev_param_mean)) *
prev_gradient^2)
#logistic transform
variance <- log(variance)
return(variance)
}
parameter_mean_update <- function(prev_hidden_states, choices, p_k, variance){
#k-ToM updates the mean of its parameter estimates
#INPUT
#prev_hidden_states: a list strucutre containing the states from the last round
#choices: a vector of [1] own choice and [2] opponent's choice from last round
#p_k: a vector of level probabilities
#variance: k-ToM's variance on parameter estimates
#OUTPUT
#Updated means of k-ToM's parameter estimates
#input
prev_c_op <- choices[2] #opponent's choice
#for each sophistication level k:
prev_mean <- prev_hidden_states$own_hidden_states$mean #the mean of opponent probability estimation VECTOR
prev_param_mean <- prev_hidden_states$own_hidden_states$param_mean #the mean for each estimated parameter MATRIX
prev_gradient <- prev_hidden_states$own_hidden_states$gradient #the gradient for each estimated parameter MATRIX
p_k #the probability of sophistication level k VECTOR
variance #the variance of each estimated parameter MATRIX
#prepare variance
variance <- exp(variance)*prev_gradient
#calculate new mean estimates
param_mean <-
prev_param_mean + p_k * variance * (prev_c_op - inv.logit(prev_mean))
#?# "for numerical purposes" - unsure if necessary
#param_mean <- inv.logit(logit(param_mean))
return(param_mean)
}
```
Learning function - updating gradient
```{r}
gradient_update <- function(opponent_prev_hidden_states, params, mean, param_mean, reverse_choices, opponent_level, opponent_player, p_matrix.) {
#k-ToM calculates the gradient between parameter estimates and choice probability estimates
#INPUT
#opponent_prev_hidden_states:
#mean: the mean of k-ToM's choice probability estimate of its opponent
#param_mean: the means of k-ToM's parameter estimates
#reverse_choices: a vector of k-ToM's [1] opponent's choice and [2] own choice. Inserted as choices when simulating the opponent
#opponent_level: the level of the opponent for which the gradient is calculated
#opponent_player: the reverse player role of k-ToM's own. Inserted as player role when simulaitng the opponent
#p_matrix: a given 2-by-2 payoff matrix
#OUTPUT
#The gradient between each parameter estimate and the choice probability estimate
#input
opponent_prev_hidden_states #opponent's hidden states, for running the learning function
reverse_choices #opponent's perspective
opponent_level #
opponent_player #
mean #the mean of opponent probability estimation VECTOR
param_mean #the mean for each estimated parameter MATRIX
#Make empty list for filling in gradients
gradient <- NULL
for (param in 1:length(param_mean)) {
#calculate increment
increment <- max(abs(1e-4*param_mean[param]), 1e-4)
#use the parameter estimates
param_mean_incremented <- param_mean
#but use one of the incremented instead
param_mean_incremented[param] <- param_mean[param] + increment
#Make a list for parameters to be inserted
opponent_params <- list(
behavioural_temperature = param_mean_incremented[2], #temperature is the second column in the matrix
volatility = param_mean_incremented[1], #volatility is the first column in the matrix
volatility_dummy = params$volatility_dummy
)
#run the learning function of opponent using param_mean_temp as parameter values
opponent_hidden_states_incremented <- rec_learning_function(prev_hidden_states = opponent_prev_hidden_states,
params = opponent_params,
choices = reverse_choices,
level = opponent_level,
player = opponent_player,
p_matrix = p_matrix.)
#run the decision function of opponent using the temporary hidden states
mean_incremented <- decision_function(hidden_states = opponent_hidden_states_incremented,
params = opponent_params,
player = opponent_player,
level = opponent_level,
p_matrix = p_matrix.)
#calculate the gradient between parameter increment and probability estimate
gradient[param] <- (mean_incremented - mean)/increment
}
return(gradient)
}
```
Full learning function
```{r}
rec_learning_function <- function(
prev_hidden_states,
params,
choices,
level,
player,
p_matrix
) {
#k-ToM's learning function, where it updates its level probability, parameter, choice probability and gradient etimates. This is called recursively.
#INPUT
#prev_hidden_states: a list structure containing the states from last round
#params: a list structure containing k-ToM's volatility parameter
#choices: a vector of [1] own choice and [2] opponent's choice from last round
#level: k-ToM's sophistication level
#player: k-ToM's player role, i.e. which side of the payoff matrix is used
#p_matrix: a given 2-by-2 payoff matrix
#OUTPUT:
#A list structure containing the updates estimates by k-ToM and all the recursively simulated opponents
#p_matrix is stored under another name, to avoid recursion-related errors
p_matrix. <- p_matrix
#Make empty list for filling with updated values
new_hidden_states <- list()
if (level == 0) { #If the (simulated) agent is a 0-ToM
#Update 0-ToM's uncertainty of opponent choice probability
variance_basic <- basic_variance_update(prev_hidden_states, params)
#Update 0-ToM's mean estimate of opponent choice probability
mean_basic <- basic_mean_update(prev_hidden_states, choices, variance_basic)
#Gather own hidden states into one list
own_hidden_states <- list(mean_basic = mean_basic, variance_basic = variance_basic)
} else { #If the (simulated) agent is a K-ToM
#Update p_k
p_op_1_k_approx <- p_op_1_k_approx_fun(prev_hidden_states, level)
p_k <- update_pk(prev_hidden_states, choices, p_op_1_k_approx)
variance <- parameter_variance_update(prev_hidden_states, params, p_k)
param_mean <- parameter_mean_update(prev_hidden_states, choices, p_k, variance)
#Make empty structures for filling in new means
mean <- NULL
gradient <- matrix(NA, ncol = ncol(param_mean), nrow = level) #An empty matrix with a column for each parameter and a row for each level
#Prepare opponent's perspective
reverse_choices <- choices[2:1]
opponent_player <- 1-player
#Now we need to go through each possible opponent's level one at a time. Highest opponent level is 1 lower than own level
for (level_index in 1:level) {
#Set the simulated opponents level. "level_index" is one higher than the actual level because it isn't possible to index 0
opponent_level <- level_index-1
#Extract the currently simulated opponent's hidden states
opponent_hidden_states <- prev_hidden_states[[level_index]]
#Extract the estimated parameters of the current opponent
opponent_params <- list(
behavioural_temperature = param_mean[level_index, 2], #temperature is the second column in the matrix
volatility = param_mean[level_index, 1], #volatility is the first column in the matrix
volatility_dummy = params$volatility_dummy
)
#Simulate opponent learning
new_opponent_hidden_states <- rec_learning_function(prev_hidden_states = opponent_hidden_states,
params = opponent_params,
choices = reverse_choices,
level = opponent_level,
player = opponent_player,
p_matrix = p_matrix.)
#Simulate opponent deciding
mean[level_index] <- decision_function(hidden_states = new_opponent_hidden_states,
params = opponent_params,
player = opponent_player,
level = opponent_level,
p_matrix = p_matrix.)
#Update gradient
gradient[level_index,] <- gradient_update(opponent_prev_hidden_states = opponent_hidden_states,
params = params,
mean = mean[level_index],
param_mean = param_mean[level_index,], #only input the param_mean for the current level
reverse_choices = reverse_choices,
opponent_level = opponent_level,
opponent_player = opponent_player,
p_matrix. = p_matrix.)
#Save opponent's hidden states in the list structure. Name it k-ToM
eval(parse(text = paste(
"new_hidden_states$ToM_",
opponent_level,
" = new_opponent_hidden_states",
sep = "")))
}
#Gather own hidden states into one list
own_hidden_states <- list(p_k = p_k, mean = mean, param_mean = param_mean, variance = variance, gradient = gradient)
}
#Save own updated hidden states to new hidden states
new_hidden_states$own_hidden_states <- own_hidden_states
return(new_hidden_states)
}
```
Decision function - 0-ToM opponent probability
```{r}
basic_p_op_1_fun <- function(hidden_states, params){
#0-ToM combines the mean and variance of its parameter estimate into a final choice probability estimate.
#NB: this is the function taken from the VBA package (Daunizeau 2014), which does not use 0-ToM's volatility parameter
#INPUT
#hidden_states: 0-ToM's updated estimates for this round
#OUTPUT
#The estimated choice probability of the opponent
#for each sophistication level k:
mean_basic <- hidden_states$own_hidden_states$mean_basic #mean opponent probability estimate VECTOR
variance_basic <- hidden_states$own_hidden_states$variance_basic #variance of parameter estimates MATRIX
a <- 0.36 #this number is taken from the matlab code in ObsRecGen
#Prepare variance
variance_basic <- exp(variance_basic)
#calculate opponent's probability of choosing 1
p_op_1_basic <- inv.logit(mean_basic / sqrt(1 + a * variance_basic))
return(p_op_1_basic)
}
# ##ALTERNATIVE VERSION
# basic_p_op_1_fun <- function(hidden_states, params){
# #0-ToM combines the mean and variance of its parameter estimate into a final choice probability estimate.
# #NB: this is the theretically derived function shown in the Reading Wild Minds article (Devaine 2017), which does uses 0-ToM's volatility parameter.
# #As a default the other variant is used
# #INPUT
# #hidden_states: 0-ToM's updated estimates for this round
# #params: a list structure containing 0-ToM's volatility parameter
# #OUTPUT
# #The estimated choice probability of the opponent
#
# #input
# volatility <- params$volatility
# mean_basic <- hidden_states$own_hidden_states$mean_basic #mean opponent probability estimate VECTOR
# variance_basic <- hidden_states$own_hidden_states$variance_basic #variance of parameter estimates MATRIX
#
# #Prepare variance
# variance_basic <- exp(variance_basic)
#
# #prepare volatility
# volatility <- exp(volatility)
#
# #calculate opponent's probability of choosing 1
# p_op_1_basic <- inv.logit(mean_basic/sqrt(1+(variance_basic+volatility)*3/pi^2))
#
# return(p_op_1_basic)
# }
```
Decision function - opponent probability
```{r}
#Probability of opponnent a_op choosing 1
p_op_1_k_fun <- function(hidden_states, params){
#k-ToM combines the mean choice probability estimate and the variances of its parameter estimates into a final choice probability estimate.
#NB: this is the function taken from the VBA package (Daunizeau 2014), which does not use k-ToM's volatility parameter
#INPUT
#hidden_states: k-ToM's updated estimates for this round
#OUTPUT
#The estimated choice probabilities of the opponent, for each possible opponent level
#for each sophistication level k:
mean <- hidden_states$own_hidden_states$mean #mean opponent probability estimate VECTOR
variance <- hidden_states$own_hidden_states$variance #variance of parameter estimates MATRIX
gradient <- hidden_states$own_hidden_states$gradient
a <- 0.36 #this number is taken from the VBA package function ObsRecGen
#Prepare variance
variance <- rowSums(exp(variance) * gradient^2) #summing the variances of each parameter (after weighting by gradient). One sum per sophistication level
#calculate opponent's probability of choosing 1
p_op_1_k <- inv.logit(mean / sqrt(1 + a * variance))
return(p_op_1_k)
}
# ##ALTERNATIVE VERSION
# p_op_1_k_fun <- function(hidden_states, params){
# #k-ToM combines the mean choice probability estimate and the variances of its parameter estimates into a final choice probability estimate.
# #NB: this is the theretically derived function shown in the Reading Wild Minds article (Devaine 2017), which does uses k-ToM's volatility parameter.
# #As a default the other variant is used
#
# #INPUT
# #hidden_states: k-ToM's updated estimates for this round
# #params: a list structure containing k-ToM's volatility parameter
# #OUTPUT
# #The estimated choice probabilities of the opponent, for each possible opponent level
#
# #input
# volatility <- params$volatility
# volatility_dummy <- params$volatility_dummy #dummy parable flags which parameters are affected by volatility
# #for each sophistication level k:
# mean <- hidden_states$own_hidden_states$mean #mean opponent probability estimate VECTOR
# variance <- hidden_states$own_hidden_states$variance #variance of parameter estimates MATRIX
# gradient <- hidden_states$own_hidden_states$gradient
#
# #prepare volatility
# volatility <- exp(volatility)*volatility_dummy
#
# #Prepare variance
# variance <- sum(exp(variance)*gradient^2)
#
# #calculate opponent's probability of choosing 1
# p_op_1_k <- inv.logit(mean/sqrt(1+(variance+volatility)*3/pi^2))
#
# return(p_op_1_k)
# }
```
Full decision function
```{r}
decision_function <- function(
hidden_states,
params,
player,
level,
p_matrix
) {
#k-ToM's decision function, where it calculates its own choice probability based on the updated estimates
#INPUT
#hidden_states: k-ToM's updated estimates
#params: a list structure containing k-ToM's volatility and behavioural temperature parameters
#player: k-ToM's player role, i.e. which side of the payoff matrix is used
#level: k-ToM's sophistication level k
#p_matrix: a given 2-by-2 payoff matrix
#OUTPUT
#k-ToM's own choice probability
if (level == 0) { #If the (simulated) agent is a 0-ToM
#Calculate opponent probability of choosing 1
p_op_1 <- basic_p_op_1_fun(hidden_states, params)
} else { #If the (simulated) agent is a K-ToM
#Calculate opponent probability of choosing 1, for each k
p_op_1_k <- p_op_1_k_fun(hidden_states, params)
#extract probabilities for each opponent level
p_k <- hidden_states$own_hidden_states$p_k
#Weigh probabilities by corresponding level probabilities, to calculate an aggregate probability of opponent choosing 1
p_op_1 <- sum(p_op_1_k * p_k)
}
#Calculate the expected payoff difference
e_payoff_dif <- expected_payoff_difference(p_op_1, player, p_matrix)
#Put into the softmax function to calculate the probability of choosing 1
p_self_1 <- softmax(e_payoff_dif, params$behavioural_temperature)
#Make into logodds
p_self_1 <- logit(p_self_1)
return(p_self_1)
}
```
Full k-ToM function
```{r}
k_ToM <- function(params = "default", hidden_states, player, level = NULL, p_matrix, choice_self = NULL, choice_op = NULL, return_hidden_states = T) {
#The full k-ToM function. First it first updates level probability, choice probability, parameter and gradient estimates. Then it calculates the estimated choice proability of the opponent, and calculates its own choice probability in response. The function also contains the simpler 0-ToM strategy, which only updates opponent's choice probability, and reacts.
#INPUT:
#params: a list structure containing k-ToM's volatility parameter, the dummy variable which decides which parameter estimates are affected by volatility, and the behavioural temperature. If a string is inputted, default values are used.
#hidden_states: the estimates from last round
#player: k-ToM's player role, i.e. which side of the payoff matrix is used
#level: k-ToM's sophistication level k
#p_matrix: a given 2-by-2 payoff matrix
#choice_self: k-ToM's choice from last round
#choice_op: opponent's choice from last round
#OUTPUT:
#a list structure containing k-ToM's choice and updated estimates
if (class(params) != "list"){ #Use default parameter values if nothing else is specified
message("No parameter values specified, using default values")
params <- list(behavioural_temperature = -1, # these are the values used in the Matlab script
volatility = -2,
volatility_dummy = c(1,0), #dummy parable flags which parameters are affected by volatility
level = level
)
}
#if no level where specified use the one specified in the loop
if (is.null(level)){
level <- params$level
}
#the input comes from last round
prev_hidden_states <- hidden_states
#bind choices together for easy reorganising
choices <- c(choice_self, choice_op)
if (is.null(choice_self)){ #If first round or missed trial
new_hidden_states <- prev_hidden_states #No update
} else {
#Update hidden states
new_hidden_states <-
rec_learning_function(
prev_hidden_states,
params,
choices,
level,
player,
p_matrix)
}