Skip to content

Commit

Permalink
temp save
Browse files Browse the repository at this point in the history
  • Loading branch information
snarb committed May 12, 2017
1 parent 4c7ffd8 commit 983474d
Show file tree
Hide file tree
Showing 13 changed files with 652 additions and 97 deletions.
29 changes: 20 additions & 9 deletions CFR_rnd_prop_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,19 +103,30 @@ def Train(self):

print("Player one avg strategy:")
trainer.playerOneTree.PrintAvgStrategy()
print("Player one best resp strategy:")
trainer.playerOneTree.PrintBestResp()
print("Player one regrets:")
trainer.playerOneTree.PrintRegrets()
# print("Player one best resp strategy:")
# trainer.playerOneTree.PrintBestResp()
# print("Player one regrets:")
# trainer.playerOneTree.PrintRegrets()


print("----------------------")
print("Player two avg strategy:")
trainer.playerTwoTree.PrintAvgStrategy()
print("Player two best resp strategy:")
trainer.playerTwoTree.PrintBestResp()
print("Player two regrets:")
trainer.playerTwoTree.PrintRegrets()
# trainer.playerTwoTree.PrintAvgStrategy()
# print("Player two best resp strategy:")
# trainer.playerTwoTree.PrintBestResp()
# print("Player two regrets:")
# trainer.playerTwoTree.PrintRegrets()


if (trainer.kuhn.IsPlayerOneCloseToNash(trainer.playerOneTree)):
print("Player one is in Nash")
else:
print("Player one is not in Nash")

if(trainer.kuhn.IsPlayerTwoCloseToNash(trainer.playerTwoTree)):
print("Player two is in Nash")
else:
print("Player two is not in Nash")


print("done")
Expand Down
14 changes: 13 additions & 1 deletion CfrNode.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def __init__(self, infoset):
self.utilsCount = 0
self.TotalUtil = 0

def GetUtilStrategy(self):
def GetUtilRegretStrategy(self):

utilsSum = 0
for action in range(NUM_ACTIONS):
Expand Down Expand Up @@ -47,6 +47,18 @@ def GetUtilStrategy(self):

return self.strategy


def GetUtilStrategy(self):

utilsSum = 0
for action in range(NUM_ACTIONS):
utilsSum += self.util[action]

for a in range(NUM_ACTIONS):
self.strategy[a] = self.util[action] / utilsSum

return self.strategy

def GetStrategy(self, realizationWeight):
normalizingSum = 0
for a in range(NUM_ACTIONS):
Expand Down
15 changes: 15 additions & 0 deletions GameTree.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,21 @@ def PrintRegrets(self):
def PrintStrategy(self):
self._printFunc(lambda gameNode: gameNode.strategy)

def GetUtilStrategy(self):
self._printFunc(lambda gameNode: gameNode.GetUtilStrategy())

def PrintUtilRegretStrategy(self):
self._printFunc(lambda gameNode: gameNode.GetUtilRegretStrategy())

def PrintUtils(self):
self._printFunc(lambda gameNode: gameNode.util)










Expand Down
122 changes: 78 additions & 44 deletions KuhnCFR.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,21 @@
from treelib import Node, Tree
from CfrNode import CfrNode
from GameTree import GameTree
from matplotlib import pyplot as plt
#from matplotlib import pyplot as plt
import Utils
import math
from collections import Counter
from math import sqrt

import time

class CFRtrainer:
def __init__(self):
def __init__(self, alpha):
self.playerOneTree = GameTree(CfrNode)
self.playerTwoTree = GameTree(CfrNode)
self.kuhn = KuhnPoker()
self.stats = Counter()
self.alpha = alpha

# def HasChild(self, parentId, childTag, tree):
# if(self.GetChildByTag(parentId, childTag, tree)):
Expand All @@ -32,49 +39,68 @@ def CFR(self, p0, p1):
return self.kuhn.GetPayoff(curPlayer)

curPlayerProb = p0 if curPlayer == Players.one else p1
opProb = p1 if curPlayer == Players.one else p0

tree = self.playerOneTree if curPlayer == Players.one else self.playerTwoTree
cfrNode = tree.GetOrCreateDataNode(self.kuhn, curPlayer)
strategy = cfrNode.GetStrategy(curPlayerProb)
util = [0.0] * NUM_ACTIONS
nodeUtil = 0

infosetStr = self.kuhn.GetInfoset(curPlayer)


if(infosetStr == '2 | pas;bet;uplayed'):
card = self.kuhn.GetPlayerCard(Players.two)
self.stats[card] += card * opProb

infosetBackup = self.kuhn.SaveInfoSet()

#'1 | bet;bet;uplayed'
#'1 | bet;pas;uplayed'

if(('1 | bet;bet' in infosetStr) and curPlayer == Players.one):
g = 6

if(('1 | bet;pas' in infosetStr) and curPlayer == Players.one):
g = 6
# if(('1 | bet;bet' in infosetStr) and curPlayer == Players.one):
# g = 6

for action in range(NUM_ACTIONS):
self.kuhn.MakeAction(action)

if(curPlayer == Players.one):
util[action] = -self.CFR(p0 * strategy[action], p1)
util[action] += -self.CFR(p0 * strategy[action], p1)
#util[action] += -self.CFR(p0 * strategy[action], p1)
else:
util[action] = -self.CFR(p0, p1 * strategy[action])
util[action] += -self.CFR(p0, p1 * strategy[action])
#util[action] += -self.CFR(p0, p1 * strategy[action])

#util[action] /= 2

nodeUtil += strategy[action] * util[action]

self.kuhn.RestoreInfoSet(infosetBackup)

for action in range(NUM_ACTIONS):
regret = util[action] - nodeUtil
opProb = p1 if curPlayer == Players.one else p0
cfrNode.regretSum[action] += opProb * regret
regret = util[action] - nodeUtil
if(regret > 0):
regret = regret
else:
regret = 0

#regret = max(0, regret)
cfrNode.regretSum[action] = cfrNode.regretSum[action] + opProb * regret



#0445733333
return nodeUtil

def running_mean(self, x, N):
cumsum = np.cumsum(np.insert(x, 0, 0))
return (cumsum[N:] - cumsum[:-N]) / N

def Train(self):
util = 0
cnt = 0
start_time = time.time()

# self.playerOneTree.GetOrCreateCFRNode(self.kuhn, Players.one)
# self.playerTwoTree.GetOrCreateCFRNode(self.kuhn, Players.one)
Expand All @@ -85,50 +111,58 @@ def Train(self):
# if(cnt % 10 == 0):
# print(util / cnt)
results = []

for i in range(1, 5000):
# utils = []
for i in range(1, 30000):
self.kuhn.NewRound()
util += self.CFR(1, 1)
curUtil = self.CFR(1, 1)
# utils.append(curUtil)
util += curUtil
if(cnt % 100 == 0):
results.append(util / i)

print("Avg util:", util / i)
plt.plot(results)
plt.show()
# print("Time: ", time.time() - start_time)
# print("Avg util:", util / i)
# plt.plot(results)
# plt.show()

def CheckNash(self):
if (self.kuhn.IsPlayerOneCloseToNash(self.playerOneTree)):
print("Player one is in Nash")
else:
print("Player one is not in Nash")

if(self.kuhn.IsPlayerTwoCloseToNash(self.playerTwoTree)):
print("Player two is in Nash")
else:
print("Player two is not in Nash")




trainer = CFRtrainer()
trainer.Train()

trainer = CFRtrainer(1)
trainer.Train()
trainer.CheckNash()
print("Player one avg strategy:")
trainer.playerOneTree.PrintAvgStrategy()
# print("Player one best resp strategy:")
# trainer.playerOneTree.PrintBestResp()
# print("Player one regrets:")
# trainer.playerOneTree.PrintRegrets()


print("----------------------")
print("Player two avg strategy:")
trainer.playerTwoTree.PrintAvgStrategy()
print("Player one best resp strategy:")
trainer.playerOneTree.PrintBestResp()
#
# # # print("Player one regrets:")
# # # trainer.playerOneTree.PrintRegrets()
# #
# #
# print("----------------------")
# print("Player two avg strategy:")
# trainer.playerTwoTree.PrintAvgStrategy()
# print("Player two best resp strategy:")
# trainer.playerTwoTree.PrintBestResp()
# print("Player two regrets:")
# trainer.playerTwoTree.PrintRegrets()

if (trainer.kuhn.IsPlayerOneCloseToNash(trainer.playerOneTree)):
print("Player one is in Nash")
else:
print("Player one is not in Nash")

if(trainer.kuhn.IsPlayerTwoCloseToNash(trainer.playerTwoTree)):
print("Player two is in Nash")
else:
print("Player two is not in Nash")


print("done")
# # print("Player two regrets:")
# # trainer.playerTwoTree.PrintRegrets()
#

#
# print("Max dif: " , KuhnPoker.MaxDif)
# print("done")
#

71 changes: 71 additions & 0 deletions KuhnPoker.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ def NextPlayer(currentPlayer):

return Players.one


class KuhnPoker:
MaxDif = 0

@staticmethod
def JoinMoves(moves):
res = ";".join(move.name for move in moves)
Expand Down Expand Up @@ -221,3 +224,71 @@ def MakeOneHotMove(self, adHocMove):




@staticmethod
def IsClose(real, target, tolerance):
dif = abs(real - target)
if(KuhnPoker.MaxDif < dif):
KuhnPoker.MaxDif = dif

return dif <= tolerance

@staticmethod
def IsPlayerTwoCloseToNash(playerTwoTree, tolerance=0.07):
playerTwoStrategy = playerTwoTree['1 | bet;uplayed;uplayed'].data.GetAverageStrategy()
if (not KuhnPoker.IsClose(playerTwoStrategy[1], 0.0, tolerance)): # when having a Jack, never calling
return False

playerTwoStrategy = playerTwoTree['1 | pas;uplayed;uplayed'].data.GetAverageStrategy()
if (not KuhnPoker.IsClose(playerTwoStrategy[1], 1 / 3, tolerance)): # when having a Jack, betting with the probability of 1/3
return False

playerTwoStrategy = playerTwoTree['2 | pas;uplayed;uplayed'].data.GetAverageStrategy()
if (not KuhnPoker.IsClose(playerTwoStrategy[0], 1.0, tolerance)): # when having a Queen, checking if possible
return False

playerTwoStrategy = playerTwoTree['2 | bet;uplayed;uplayed'].data.GetAverageStrategy()
if (not KuhnPoker.IsClose(playerTwoStrategy[1], 1 / 3, tolerance)): # otherwise calling with the probability of 1/3
return False

playerTwoStrategy = playerTwoTree['3 | pas;uplayed;uplayed'].data.GetAverageStrategy()
if (not KuhnPoker.IsClose(playerTwoStrategy[0], 0.0, tolerance)): # Always betting or calling when having a King
return False

playerTwoStrategy = playerTwoTree['3 | bet;uplayed;uplayed'].data.GetAverageStrategy()
if (not KuhnPoker.IsClose(playerTwoStrategy[1], 1.0, tolerance)): # Always betting or calling when having a King
return False

return True

@staticmethod
def IsPlayerOneCloseToNash(playerOneTree, tolerance=0.07):

playerStrategy = playerOneTree['1 | uplayed;uplayed;uplayed'].data.GetAverageStrategy()
alpha = playerStrategy[1]
if (alpha > (1/3 + tolerance) or alpha < 0): # freely chooses the probability alpha with which he will bet when having a Jack [0; 1/3]
return False

playerStrategy = playerOneTree['1 | pas;bet;uplayed'].data.GetAverageStrategy()
if (not KuhnPoker.IsClose(playerStrategy[1], 0.0, tolerance)):
return False

playerStrategy = playerOneTree['2 | uplayed;uplayed;uplayed'].data.GetAverageStrategy()
if (not KuhnPoker.IsClose(playerStrategy[0], 1.0, tolerance)): # he should always check when having a Queen
return False

playerStrategy = playerOneTree['2 | pas;bet;uplayed'].data.GetAverageStrategy()
if (not KuhnPoker.IsClose(playerStrategy[1], (1/3 + alpha), tolerance)): # if the other player bets after this check, he should call with the probability of 1/3 + alpha
return False

playerStrategy = playerOneTree['3 | uplayed;uplayed;uplayed'].data.GetAverageStrategy()
if (not KuhnPoker.IsClose(playerStrategy[1], 3 * alpha, tolerance)): # When having a King, he should bet with the probability of 3 * alpha
return False

playerStrategy = playerOneTree['3 | pas;bet;uplayed'].data.GetAverageStrategy()
if (not KuhnPoker.IsClose(playerStrategy[1], 1.0, tolerance)):
return False

return True


1 change: 1 addition & 0 deletions KuhnReverse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#def BuildTree(self):
Loading

0 comments on commit 983474d

Please sign in to comment.