diff --git a/CFR_rnd_prop_sampling.py b/CFR_rnd_prop_sampling.py index 5ca18c5..5afd175 100644 --- a/CFR_rnd_prop_sampling.py +++ b/CFR_rnd_prop_sampling.py @@ -103,19 +103,30 @@ def Train(self): print("Player one avg strategy:") trainer.playerOneTree.PrintAvgStrategy() -print("Player one best resp strategy:") -trainer.playerOneTree.PrintBestResp() -print("Player one regrets:") -trainer.playerOneTree.PrintRegrets() +# print("Player one best resp strategy:") +# trainer.playerOneTree.PrintBestResp() +# print("Player one regrets:") +# trainer.playerOneTree.PrintRegrets() print("----------------------") print("Player two avg strategy:") -trainer.playerTwoTree.PrintAvgStrategy() -print("Player two best resp strategy:") -trainer.playerTwoTree.PrintBestResp() -print("Player two regrets:") -trainer.playerTwoTree.PrintRegrets() +# trainer.playerTwoTree.PrintAvgStrategy() +# print("Player two best resp strategy:") +# trainer.playerTwoTree.PrintBestResp() +# print("Player two regrets:") +# trainer.playerTwoTree.PrintRegrets() + + +if (trainer.kuhn.IsPlayerOneCloseToNash(trainer.playerOneTree)): + print("Player one is in Nash") +else: + print("Player one is not in Nash") + +if(trainer.kuhn.IsPlayerTwoCloseToNash(trainer.playerTwoTree)): + print("Player two is in Nash") +else: + print("Player two is not in Nash") print("done") diff --git a/CfrNode.py b/CfrNode.py index 57f92f3..9daeb23 100644 --- a/CfrNode.py +++ b/CfrNode.py @@ -13,7 +13,7 @@ def __init__(self, infoset): self.utilsCount = 0 self.TotalUtil = 0 - def GetUtilStrategy(self): + def GetUtilRegretStrategy(self): utilsSum = 0 for action in range(NUM_ACTIONS): @@ -47,6 +47,18 @@ def GetUtilStrategy(self): return self.strategy + + def GetUtilStrategy(self): + + utilsSum = 0 + for action in range(NUM_ACTIONS): + utilsSum += self.util[action] + + for a in range(NUM_ACTIONS): + self.strategy[a] = self.util[action] / utilsSum + + return self.strategy + def GetStrategy(self, realizationWeight): normalizingSum = 0 for a in range(NUM_ACTIONS): diff --git a/GameTree.py b/GameTree.py index 1218c09..f85a2d4 100644 --- a/GameTree.py +++ b/GameTree.py @@ -72,6 +72,21 @@ def PrintRegrets(self): def PrintStrategy(self): self._printFunc(lambda gameNode: gameNode.strategy) + def GetUtilStrategy(self): + self._printFunc(lambda gameNode: gameNode.GetUtilStrategy()) + + def PrintUtilRegretStrategy(self): + self._printFunc(lambda gameNode: gameNode.GetUtilRegretStrategy()) + + def PrintUtils(self): + self._printFunc(lambda gameNode: gameNode.util) + + + + + + + diff --git a/KuhnCFR.py b/KuhnCFR.py index 226e60c..1e394e0 100644 --- a/KuhnCFR.py +++ b/KuhnCFR.py @@ -2,14 +2,21 @@ from treelib import Node, Tree from CfrNode import CfrNode from GameTree import GameTree -from matplotlib import pyplot as plt +#from matplotlib import pyplot as plt import Utils +import math +from collections import Counter +from math import sqrt + +import time class CFRtrainer: - def __init__(self): + def __init__(self, alpha): self.playerOneTree = GameTree(CfrNode) self.playerTwoTree = GameTree(CfrNode) self.kuhn = KuhnPoker() + self.stats = Counter() + self.alpha = alpha # def HasChild(self, parentId, childTag, tree): # if(self.GetChildByTag(parentId, childTag, tree)): @@ -32,6 +39,8 @@ def CFR(self, p0, p1): return self.kuhn.GetPayoff(curPlayer) curPlayerProb = p0 if curPlayer == Players.one else p1 + opProb = p1 if curPlayer == Players.one else p0 + tree = self.playerOneTree if curPlayer == Players.one else self.playerTwoTree cfrNode = tree.GetOrCreateDataNode(self.kuhn, curPlayer) strategy = cfrNode.GetStrategy(curPlayerProb) @@ -39,42 +48,59 @@ def CFR(self, p0, p1): nodeUtil = 0 infosetStr = self.kuhn.GetInfoset(curPlayer) + + + if(infosetStr == '2 | pas;bet;uplayed'): + card = self.kuhn.GetPlayerCard(Players.two) + self.stats[card] += card * opProb + infosetBackup = self.kuhn.SaveInfoSet() #'1 | bet;bet;uplayed' #'1 | bet;pas;uplayed' - if(('1 | bet;bet' in infosetStr) and curPlayer == Players.one): - g = 6 - - if(('1 | bet;pas' in infosetStr) and curPlayer == Players.one): - g = 6 + # if(('1 | bet;bet' in infosetStr) and curPlayer == Players.one): + # g = 6 for action in range(NUM_ACTIONS): self.kuhn.MakeAction(action) if(curPlayer == Players.one): - util[action] = -self.CFR(p0 * strategy[action], p1) + util[action] += -self.CFR(p0 * strategy[action], p1) + #util[action] += -self.CFR(p0 * strategy[action], p1) else: - util[action] = -self.CFR(p0, p1 * strategy[action]) + util[action] += -self.CFR(p0, p1 * strategy[action]) + #util[action] += -self.CFR(p0, p1 * strategy[action]) + + #util[action] /= 2 nodeUtil += strategy[action] * util[action] self.kuhn.RestoreInfoSet(infosetBackup) for action in range(NUM_ACTIONS): - regret = util[action] - nodeUtil - opProb = p1 if curPlayer == Players.one else p0 - cfrNode.regretSum[action] += opProb * regret + regret = util[action] - nodeUtil + if(regret > 0): + regret = regret + else: + regret = 0 + + #regret = max(0, regret) + cfrNode.regretSum[action] = cfrNode.regretSum[action] + opProb * regret #0445733333 return nodeUtil + def running_mean(self, x, N): + cumsum = np.cumsum(np.insert(x, 0, 0)) + return (cumsum[N:] - cumsum[:-N]) / N + def Train(self): util = 0 cnt = 0 + start_time = time.time() # self.playerOneTree.GetOrCreateCFRNode(self.kuhn, Players.one) # self.playerTwoTree.GetOrCreateCFRNode(self.kuhn, Players.one) @@ -85,50 +111,58 @@ def Train(self): # if(cnt % 10 == 0): # print(util / cnt) results = [] - - for i in range(1, 5000): + # utils = [] + for i in range(1, 30000): self.kuhn.NewRound() - util += self.CFR(1, 1) + curUtil = self.CFR(1, 1) + # utils.append(curUtil) + util += curUtil if(cnt % 100 == 0): results.append(util / i) - print("Avg util:", util / i) - plt.plot(results) - plt.show() + # print("Time: ", time.time() - start_time) + # print("Avg util:", util / i) + # plt.plot(results) + # plt.show() + def CheckNash(self): + if (self.kuhn.IsPlayerOneCloseToNash(self.playerOneTree)): + print("Player one is in Nash") + else: + print("Player one is not in Nash") + + if(self.kuhn.IsPlayerTwoCloseToNash(self.playerTwoTree)): + print("Player two is in Nash") + else: + print("Player two is not in Nash") -trainer = CFRtrainer() -trainer.Train() +trainer = CFRtrainer(1) +trainer.Train() +trainer.CheckNash() print("Player one avg strategy:") trainer.playerOneTree.PrintAvgStrategy() -# print("Player one best resp strategy:") -# trainer.playerOneTree.PrintBestResp() -# print("Player one regrets:") -# trainer.playerOneTree.PrintRegrets() - - -print("----------------------") -print("Player two avg strategy:") -trainer.playerTwoTree.PrintAvgStrategy() +print("Player one best resp strategy:") +trainer.playerOneTree.PrintBestResp() +# +# # # print("Player one regrets:") +# # # trainer.playerOneTree.PrintRegrets() +# # +# # +# print("----------------------") +# print("Player two avg strategy:") +# trainer.playerTwoTree.PrintAvgStrategy() # print("Player two best resp strategy:") # trainer.playerTwoTree.PrintBestResp() -# print("Player two regrets:") -# trainer.playerTwoTree.PrintRegrets() - -if (trainer.kuhn.IsPlayerOneCloseToNash(trainer.playerOneTree)): - print("Player one is in Nash") -else: - print("Player one is not in Nash") - -if(trainer.kuhn.IsPlayerTwoCloseToNash(trainer.playerTwoTree)): - print("Player two is in Nash") -else: - print("Player two is not in Nash") - - -print("done") +# # print("Player two regrets:") +# # trainer.playerTwoTree.PrintRegrets() +# + +# +# print("Max dif: " , KuhnPoker.MaxDif) +# print("done") +# diff --git a/KuhnPoker.py b/KuhnPoker.py index a61ae37..fb45b69 100644 --- a/KuhnPoker.py +++ b/KuhnPoker.py @@ -32,7 +32,10 @@ def NextPlayer(currentPlayer): return Players.one + class KuhnPoker: + MaxDif = 0 + @staticmethod def JoinMoves(moves): res = ";".join(move.name for move in moves) @@ -221,3 +224,71 @@ def MakeOneHotMove(self, adHocMove): + + @staticmethod + def IsClose(real, target, tolerance): + dif = abs(real - target) + if(KuhnPoker.MaxDif < dif): + KuhnPoker.MaxDif = dif + + return dif <= tolerance + + @staticmethod + def IsPlayerTwoCloseToNash(playerTwoTree, tolerance=0.07): + playerTwoStrategy = playerTwoTree['1 | bet;uplayed;uplayed'].data.GetAverageStrategy() + if (not KuhnPoker.IsClose(playerTwoStrategy[1], 0.0, tolerance)): # when having a Jack, never calling + return False + + playerTwoStrategy = playerTwoTree['1 | pas;uplayed;uplayed'].data.GetAverageStrategy() + if (not KuhnPoker.IsClose(playerTwoStrategy[1], 1 / 3, tolerance)): # when having a Jack, betting with the probability of 1/3 + return False + + playerTwoStrategy = playerTwoTree['2 | pas;uplayed;uplayed'].data.GetAverageStrategy() + if (not KuhnPoker.IsClose(playerTwoStrategy[0], 1.0, tolerance)): # when having a Queen, checking if possible + return False + + playerTwoStrategy = playerTwoTree['2 | bet;uplayed;uplayed'].data.GetAverageStrategy() + if (not KuhnPoker.IsClose(playerTwoStrategy[1], 1 / 3, tolerance)): # otherwise calling with the probability of 1/3 + return False + + playerTwoStrategy = playerTwoTree['3 | pas;uplayed;uplayed'].data.GetAverageStrategy() + if (not KuhnPoker.IsClose(playerTwoStrategy[0], 0.0, tolerance)): # Always betting or calling when having a King + return False + + playerTwoStrategy = playerTwoTree['3 | bet;uplayed;uplayed'].data.GetAverageStrategy() + if (not KuhnPoker.IsClose(playerTwoStrategy[1], 1.0, tolerance)): # Always betting or calling when having a King + return False + + return True + + @staticmethod + def IsPlayerOneCloseToNash(playerOneTree, tolerance=0.07): + + playerStrategy = playerOneTree['1 | uplayed;uplayed;uplayed'].data.GetAverageStrategy() + alpha = playerStrategy[1] + if (alpha > (1/3 + tolerance) or alpha < 0): # freely chooses the probability alpha with which he will bet when having a Jack [0; 1/3] + return False + + playerStrategy = playerOneTree['1 | pas;bet;uplayed'].data.GetAverageStrategy() + if (not KuhnPoker.IsClose(playerStrategy[1], 0.0, tolerance)): + return False + + playerStrategy = playerOneTree['2 | uplayed;uplayed;uplayed'].data.GetAverageStrategy() + if (not KuhnPoker.IsClose(playerStrategy[0], 1.0, tolerance)): # he should always check when having a Queen + return False + + playerStrategy = playerOneTree['2 | pas;bet;uplayed'].data.GetAverageStrategy() + if (not KuhnPoker.IsClose(playerStrategy[1], (1/3 + alpha), tolerance)): # if the other player bets after this check, he should call with the probability of 1/3 + alpha + return False + + playerStrategy = playerOneTree['3 | uplayed;uplayed;uplayed'].data.GetAverageStrategy() + if (not KuhnPoker.IsClose(playerStrategy[1], 3 * alpha, tolerance)): # When having a King, he should bet with the probability of 3 * alpha + return False + + playerStrategy = playerOneTree['3 | pas;bet;uplayed'].data.GetAverageStrategy() + if (not KuhnPoker.IsClose(playerStrategy[1], 1.0, tolerance)): + return False + + return True + + diff --git a/KuhnReverse.py b/KuhnReverse.py new file mode 100644 index 0000000..8579731 --- /dev/null +++ b/KuhnReverse.py @@ -0,0 +1 @@ +#def BuildTree(self): \ No newline at end of file diff --git a/2.py b/QL_pure.py similarity index 64% rename from 2.py rename to QL_pure.py index 6652016..df027b4 100644 --- a/2.py +++ b/QL_pure.py @@ -3,6 +3,8 @@ from CfrNode import CfrNode from GameTree import GameTree from matplotlib import pyplot as plt +import Utils +import math class CFRtrainer: def __init__(self): @@ -40,8 +42,11 @@ def CFR(self, p0, p1): infosetStr = self.kuhn.GetInfoset(curPlayer) infosetBackup = self.kuhn.SaveInfoSet() - if(('3 | pas' in infosetStr) and curPlayer == Players.two): - g = 6 + #'1 | bet;bet;uplayed' + #'1 | bet;pas;uplayed' + + # if(('1 | bet;bet' in infosetStr) and curPlayer == Players.one): + # g = 6 for action in range(NUM_ACTIONS): self.kuhn.MakeAction(action) @@ -51,21 +56,14 @@ def CFR(self, p0, p1): else: util[action] = -self.CFR(p0, p1 * strategy[action]) + cfrNode.util[action] += util[action] + nodeUtil += strategy[action] * util[action] self.kuhn.RestoreInfoSet(infosetBackup) - # gamma = 0.95 - # if(cfrNode.TotalUtil > 0): - # cfrNode.TotalUtil = (gamma * cfrNode.TotalUtil + (1 - gamma) * nodeUtil) / 2 - # else: - # cfrNode.TotalUtil = nodeUtil - cfrNode.TotalUtil += nodeUtil - - cfrNode.utilsCount += 1 - for action in range(NUM_ACTIONS): - regret = util[action] - nodeUtil + regret = util[action] - nodeUtil opProb = p1 if curPlayer == Players.one else p0 cfrNode.regretSum[action] += opProb * regret @@ -74,6 +72,10 @@ def CFR(self, p0, p1): #0445733333 return nodeUtil + def running_mean(self, x, N): + cumsum = np.cumsum(np.insert(x, 0, 0)) + return (cumsum[N:] - cumsum[:-N]) / N + def Train(self): util = 0 cnt = 0 @@ -87,39 +89,66 @@ def Train(self): # if(cnt % 10 == 0): # print(util / cnt) results = [] - + # utils = [] for i in range(1, 10000): self.kuhn.NewRound() - util += self.CFR(1, 1) + curUtil = self.CFR(1, 1) + # utils.append(curUtil) + util += curUtil if(cnt % 100 == 0): results.append(util / i) print("Avg util:", util / i) - plt.plot(results) - plt.show() + # plt.plot(results) + # plt.show() + + def CheckNash(self): + if (self.kuhn.IsPlayerOneCloseToNash(self.playerOneTree)): + print("Player one is in Nash") + else: + print("Player one is not in Nash") + + if(self.kuhn.IsPlayerTwoCloseToNash(self.playerTwoTree)): + print("Player two is in Nash") + else: + print("Player two is not in Nash") + trainer = CFRtrainer() trainer.Train() - +# print("Player one avg strategy:") trainer.playerOneTree.PrintAvgStrategy() print("Player one best resp strategy:") trainer.playerOneTree.PrintBestResp() -print("Player one regrets:") -trainer.playerOneTree.PrintRegrets() +print("Player Util regret strategy:") +trainer.playerOneTree.PrintUtilRegretStrategy() -print("----------------------") -print("Player two avg strategy:") -trainer.playerTwoTree.PrintAvgStrategy() -print("Player two best resp strategy:") -trainer.playerTwoTree.PrintBestResp() -print("Player two regrets:") -trainer.playerTwoTree.PrintRegrets() +print("Player Util strategy:") +trainer.playerOneTree.GetUtilStrategy() +print("Player Utils:") +trainer.playerOneTree.PrintUtils() -print("done") +print("Player one regrets:") +trainer.playerOneTree.PrintRegrets() +# +# +# print("----------------------") +# print("Player two avg strategy:") +# trainer.playerTwoTree.PrintAvgStrategy() +# # print("Player two best resp strategy:") +# # trainer.playerTwoTree.PrintBestResp() +# # print("Player two regrets:") +# # trainer.playerTwoTree.PrintRegrets() +# + +# +# print("Max dif: " , KuhnPoker.MaxDif) +# print("done") +# diff --git a/ReverseCFR.py b/ReverseCFR.py new file mode 100644 index 0000000..e69de29 diff --git a/Tester.py b/Tester.py index 3c8c28a..c61cc48 100644 --- a/Tester.py +++ b/Tester.py @@ -1,8 +1,12 @@ from KuhnPoker import * import Utils -from matplotlib import pyplot as plt -from m3 import CFRtrainer as rndSampler +#from matplotlib import pyplot as plt +#from m3 import CFRtrainer as rndSampler from KuhnCFR import CFRtrainer as vanillaCFR +#from rnd_smapling_2 import RndSampler + +#from severalMove import CFRtrainer as RndSampler + def CFR(kuhn, playerOneTree, playerTwoTree): curPlayer = kuhn.GetCurrentPlayer() @@ -16,7 +20,7 @@ def CFR(kuhn, playerOneTree, playerTwoTree): action = Utils.MakeChoise(strategy, 1)[0] kuhn.MakeAction(action) - util = CFR(kuhn, playerOneTree, playerTwoTree) + util = -CFR(kuhn, playerOneTree, playerTwoTree) return util @@ -24,26 +28,54 @@ def Test(playerOneTree, playerTwoTree): kuhn = KuhnPoker() util = 0 - for i in range(1, 5000): + for i in range(1, 8000): kuhn.NewRound() - util += CFR(kuhn, playerOneTree, playerTwoTree) + curUtil = CFR(kuhn, playerOneTree, playerTwoTree) + util += curUtil return util / i print("Training vanillaCFR") -vanillaCFRtrainer = vanillaCFR() -vanillaCFRtrainer.Train() -util2 = Test(vanillaCFRtrainer.playerOneTree, vanillaCFRtrainer.playerTwoTree) +sumU = 0 +countU = 0 + +for i in range(10): + vanillaCFRtrainer1 = vanillaCFR(1.0) + vanillaCFRtrainer1.Train() + + vanillaCFRtrainer2 = vanillaCFR(10) + vanillaCFRtrainer2.Train() + + # vanillaCFRtrainer.CheckNash() + testUtil1 = Test(vanillaCFRtrainer1.playerOneTree, vanillaCFRtrainer2.playerTwoTree) + #print("Vanilla safe play test util: ", testUtil1) + + + testUtil2 = Test(vanillaCFRtrainer2.playerOneTree, vanillaCFRtrainer1.playerTwoTree) + sumU += testUtil1 + (-testUtil2) + countU += 1 + + +print("Avg Vanila profit 1: ", sumU / countU) +print(countU) + #print("Vanilla safe play test util: _2", testUtil2) + + + +#print("Vanila profit 1 ", testUtil1 + (-testUtil2)) # print("Training rndSampler") -# rndTrainer = rndSampler() +# rndTrainer = RndSampler() # rndTrainer.Train() # -# util1 = Test(vanillaCFRtrainer.playerOneTree, rndTrainer.playerTwoTree) -# print("Avg util vanillaCFR (p1) vs rndSampler (p2):", util1) - -#util2 = Test(rndTrainer.playerOneTree, vanillaCFRtrainer.playerTwoTree) -print("Avg util rndSampler (p1) vs rndSampler (p2):", util2) \ No newline at end of file +# vanila1 = Test(vanillaCFRtrainer.playerOneTree, rndTrainer.playerTwoTree) +# print("Avg util vanillaCFR (p1) vs rndSampler (p2):", vanila1) +# +# vanila2 = Test(rndTrainer.playerOneTree, vanillaCFRtrainer.playerTwoTree) +# print("Avg util rndSampler (p1) vs vanillaCFR (p2):", vanila2) +# +# +# print("Vanila profit", vanila1 + (-vanila2)) \ No newline at end of file diff --git a/Utils.py b/Utils.py index 7ea7061..14bc6ce 100644 --- a/Utils.py +++ b/Utils.py @@ -16,4 +16,64 @@ def MakeChoise(dist, batchSize): def MakeNormChoise(dist, batchSize): dist = Normalise(dist) - return MakeChoise(dist, batchSize) \ No newline at end of file + return MakeChoise(dist, batchSize) + + +def IsClose(real, target, tolerance): + dif = abs(real - target) + return dif <= tolerance + +def IsPlayerTwoCloseToNash(playerTwoTree, tolerance = 0.5): + playerTwoStrategy = playerTwoTree['1 | bet;uplayed;uplayed'].data.GetAverageStrategy() + if(not IsClose(playerTwoStrategy[1], 0.0, tolerance)): # when having a Jack, never calling + return False + + playerTwoStrategy = playerTwoTree['1 | pas;uplayed;uplayed'].data.GetAverageStrategy() + if(not IsClose(playerTwoStrategy[1], 1/3, tolerance)): # when having a Jack, betting with the probability of 1/3 + return False + + playerTwoStrategy = playerTwoTree['2 | pas;uplayed;uplayed'].data.GetAverageStrategy() + if(not IsClose(playerTwoStrategy[0], 1.0, tolerance)): # when having a Queen, checking if possible + return False + + playerTwoStrategy = playerTwoTree['2 | bet;uplayed;uplayed'].data.GetAverageStrategy() + if(not IsClose(playerTwoStrategy[1], 1/3, tolerance)): # otherwise calling with the probability of 1/3 + return False + + playerTwoStrategy = playerTwoTree['3 | pas;uplayed;uplayed'].data.GetAverageStrategy() + if (not IsClose(playerTwoStrategy[0], 0.0, tolerance)): # Always betting or calling when having a King + return False + + playerTwoStrategy = playerTwoTree['3 | bet;uplayed;uplayed'].data.GetAverageStrategy() + if (not IsClose(playerTwoStrategy[1], 1.0, tolerance)): # Always betting or calling when having a King + return False + + return True + + +def IsPlayerTwoCloseToNash(playerTwoTree, tolerance=0.5): + playerTwoStrategy = playerTwoTree['1 | bet;uplayed;uplayed'].data.GetAverageStrategy() + if (not IsClose(playerTwoStrategy[1], 0.0, tolerance)): # when having a Jack, never calling + return False + + playerTwoStrategy = playerTwoTree['1 | pas;uplayed;uplayed'].data.GetAverageStrategy() + if (not IsClose(playerTwoStrategy[1], 1 / 3, tolerance)): # when having a Jack, betting with the probability of 1/3 + return False + + playerTwoStrategy = playerTwoTree['2 | pas;uplayed;uplayed'].data.GetAverageStrategy() + if (not IsClose(playerTwoStrategy[0], 1.0, tolerance)): # when having a Queen, checking if possible + return False + + playerTwoStrategy = playerTwoTree['2 | bet;uplayed;uplayed'].data.GetAverageStrategy() + if (not IsClose(playerTwoStrategy[1], 1 / 3, tolerance)): # otherwise calling with the probability of 1/3 + return False + + playerTwoStrategy = playerTwoTree['3 | pas;uplayed;uplayed'].data.GetAverageStrategy() + if (not IsClose(playerTwoStrategy[0], 0.0, tolerance)): # Always betting or calling when having a King + return False + + playerTwoStrategy = playerTwoTree['3 | bet;uplayed;uplayed'].data.GetAverageStrategy() + if (not IsClose(playerTwoStrategy[1], 1.0, tolerance)): # Always betting or calling when having a King + return False + + return True \ No newline at end of file diff --git a/m3.py b/m3.py index f19d928..aadc738 100644 --- a/m3.py +++ b/m3.py @@ -29,7 +29,7 @@ def CFR(self, curSamplesCount): if (random.random() < CFRtrainer.BETA): strategy = np.array([0.5] * NUM_ACTIONS) else: - strategy = cfrNode.GetUtilStrategy() + strategy = cfrNode.GetUtilRegretStrategy() #CFRtrainer.BETA *= 0.9 sampleSize = max(int(round(curSamplesCount)), 1) diff --git a/rnd_smapling_2.py b/rnd_smapling_2.py new file mode 100644 index 0000000..4e40981 --- /dev/null +++ b/rnd_smapling_2.py @@ -0,0 +1,132 @@ +import random +import Utils +from KuhnPoker import * +from treelib import Node, Tree +from CfrNode import CfrNode +from GameTree import GameTree +from matplotlib import pyplot as plt + +class RndSampler: + BETA = 0.0 + SAMPLE_SIZE = 1 + + def __init__(self): + self.playerOneTree = GameTree(CfrNode) + self.playerTwoTree = GameTree(CfrNode) + self.kuhn = KuhnPoker() + + # def UpdateUtil(self, curPlayer, util, strategy, action, p0, p1, isP1Freeze): + # if (curPlayer == Players.one): + # util[action] = -self.CFR(p0 * strategy[action], p1, isP1Freeze) + # else: + # util[action] = -self.CFR(p0, p1 * strategy[action], isP1Freeze) + + def CFR(self, p0, p1): + curPlayer = self.kuhn.GetCurrentPlayer() + + if(self.kuhn.IsTerminateState()): + return self.kuhn.GetPayoff(curPlayer) + + curPlayerProb = p0 if curPlayer == Players.one else p1 + tree = self.playerOneTree if curPlayer == Players.one else self.playerTwoTree + cfrNode = tree.GetOrCreateDataNode(self.kuhn, curPlayer) + util = [0.0] * NUM_ACTIONS + nodeUtil = 0 + + strategy = cfrNode.GetStrategy(curPlayerProb) + + if (random.random() < RndSampler.BETA): + sampleStrategy = np.array([0.5] * NUM_ACTIONS) + else: + sampleStrategy = strategy + + sampleStrategy = Utils.Normalise(sampleStrategy) + + sampleSize = max(int(round(RndSampler.SAMPLE_SIZE)), 1) + actions = Utils.MakeChoise(sampleStrategy, sampleSize) + + + infosetStr = self.kuhn.GetInfoset(curPlayer) + + + repsCount = [0] * NUM_ACTIONS + for action in actions: + infosetBackup = self.kuhn.SaveInfoSet() + self.kuhn.MakeAction(action) + if (curPlayer == Players.one): + util[action] += -self.CFR(p0 * strategy[action], p1) + else: + util[action] += -self.CFR(p0, p1 * strategy[action]) + + repsCount[action] += 1 + + self.kuhn.RestoreInfoSet(infosetBackup) + + for action in range(NUM_ACTIONS): + if(repsCount[action] > 0): + util[action] /= repsCount[action] + nodeUtil += strategy[action] * util[action] + + opProb = p1 if curPlayer == Players.one else p0 + + for action in range(NUM_ACTIONS): + regret = util[action] - nodeUtil + cfrNode.regretSum[action] += opProb * regret + + return nodeUtil + + def Train(self): + util = 0 + cnt = 0 + + results = [] + + for i in range(1, 2000): + self.kuhn.NewRound() + util += self.CFR(1, 1) + if(cnt % 100 == 0): + results.append(util / i) + + # RndSampler.BETA *= 0.9999 + + print("Avg util:", util / i) + plt.plot(results) + plt.show() + print("Beta ", RndSampler.BETA) + + + + +trainer = RndSampler() +trainer.Train() + +print("Player one avg strategy:") +trainer.playerOneTree.PrintAvgStrategy() +# print("Player one best resp strategy:") +# trainer.playerOneTree.PrintBestResp() +# print("Player one regrets:") +# trainer.playerOneTree.PrintRegrets() + + +print("----------------------") +print("Player two avg strategy:") +trainer.playerTwoTree.PrintAvgStrategy() +# print("Player two best resp strategy:") +# trainer.playerTwoTree.PrintBestResp() +# print("Player two regrets:") +# trainer.playerTwoTree.PrintRegrets() + + +if (trainer.kuhn.IsPlayerOneCloseToNash(trainer.playerOneTree)): + print("Player one is in Nash") +else: + print("Player one is not in Nash") + +if(trainer.kuhn.IsPlayerTwoCloseToNash(trainer.playerTwoTree)): + print("Player two is in Nash") +else: + print("Player two is not in Nash") + + +print("done") + diff --git a/severalMove.py b/severalMove.py new file mode 100644 index 0000000..9b377c0 --- /dev/null +++ b/severalMove.py @@ -0,0 +1,158 @@ +from KuhnPoker import * +from treelib import Node, Tree +from CfrNode import CfrNode +from GameTree import GameTree +from matplotlib import pyplot as plt + +class CFRtrainer: + def __init__(self): + self.playerOneTree = GameTree(CfrNode) + self.playerTwoTree = GameTree(CfrNode) + self.kuhn = KuhnPoker() + + # def HasChild(self, parentId, childTag, tree): + # if(self.GetChildByTag(parentId, childTag, tree)): + # return True + # + # return False + # + # def GetChildByTag(self, parentId, childTag, tree): + # for childId in tree.children(parentId): + # childNode = tree[childId] + # if(childNode.tag == childTag): + # return childNode + # + # return None + + def CFR(self, p0, p1): + curPlayer = self.kuhn.GetCurrentPlayer() + + if(self.kuhn.IsTerminateState()): + return self.kuhn.GetPayoff(curPlayer) + + curPlayerProb = p0 if curPlayer == Players.one else p1 + tree = self.playerOneTree if curPlayer == Players.one else self.playerTwoTree + cfrNode = tree.GetOrCreateDataNode(self.kuhn, curPlayer) + strategy = cfrNode.GetStrategy(curPlayerProb) + util = [0.0] * NUM_ACTIONS + nodeUtil = 0 + + infosetStr = self.kuhn.GetInfoset(curPlayer) + infosetBackup = self.kuhn.SaveInfoSet() + + + # for action in range(NUM_ACTIONS): + # self.kuhn.MakeAction(action) + # + # if(curPlayer == Players.one): + # util[action] += -self.CFR(p0 * strategy[action], p1) + # else: + # util[action] += -self.CFR(p0, p1 * strategy[action]) + # + # self.kuhn.RestoreInfoSet(infosetBackup) + # + # + # nodeUtil += strategy[action] * util[action] + + self.kuhn.MakeAction(1) + + if (curPlayer == Players.one): + util[1] += -self.CFR(p0 * strategy[1], p1) + else: + util[1] += -self.CFR(p0, p1 * strategy[1]) + + self.kuhn.RestoreInfoSet(infosetBackup) + + nodeUtil += strategy[1] * util[1] + + self.kuhn.MakeAction(0) + if (curPlayer == Players.one): + util[0] += -self.CFR(p0 * strategy[0], p1) / 2 + util[0] += -self.CFR(p0 * strategy[0], p1) / 2 + else: + util[0] += -self.CFR(p0, p1 * strategy[0]) / 2 + util[0] += -self.CFR(p0, p1 * strategy[0]) / 2 + + self.kuhn.RestoreInfoSet(infosetBackup) + + + nodeUtil += strategy[0] * util[0] + # gamma = 0.95 + # if(cfrNode.TotalUtil > 0): + # cfrNode.TotalUtil = (gamma * cfrNode.TotalUtil + (1 - gamma) * nodeUtil) / 2 + # else: + # cfrNode.TotalUtil = nodeUtil + cfrNode.TotalUtil += nodeUtil + + cfrNode.utilsCount += 1 + + for action in range(NUM_ACTIONS): + regret = util[action] - nodeUtil + opProb = p1 if curPlayer == Players.one else p0 + cfrNode.regretSum[action] += opProb * regret + + + +#0445733333 + return nodeUtil + + def Train(self): + util = 0 + cnt = 0 + + # self.playerOneTree.GetOrCreateCFRNode(self.kuhn, Players.one) + # self.playerTwoTree.GetOrCreateCFRNode(self.kuhn, Players.one) + + # while (self.kuhn.NewRound() != 1): + # util += self.CFR(1, 1) + # cnt += 1 + # if(cnt % 10 == 0): + # print(util / cnt) + results = [] + + for i in range(1, 10000): + self.kuhn.NewRound() + util += self.CFR(1, 1) + if(cnt % 100 == 0): + results.append(util / i) + + print("Avg util:", util / i) + plt.plot(results) + plt.show() + + + + def CheckNash(self): + if (self.kuhn.IsPlayerOneCloseToNash(self.playerOneTree)): + print("Player one is in Nash") + else: + print("Player one is not in Nash") + + if(self.kuhn.IsPlayerTwoCloseToNash(self.playerTwoTree)): + print("Player two is in Nash") + else: + print("Player two is not in Nash") + +trainer = CFRtrainer() +trainer.Train() +trainer.CheckNash() + +print("Player one avg strategy:") +trainer.playerOneTree.PrintAvgStrategy() +# print("Player one best resp strategy:") +# trainer.playerOneTree.PrintBestResp() +# print("Player one regrets:") +# trainer.playerOneTree.PrintRegrets() + + +print("----------------------") +print("Player two avg strategy:") +trainer.playerTwoTree.PrintAvgStrategy() +# print("Player two best resp strategy:") +# trainer.playerTwoTree.PrintBestResp() +# print("Player two regrets:") +# trainer.playerTwoTree.PrintRegrets() + + +print("done") +