-
Notifications
You must be signed in to change notification settings - Fork 8
/
mcts.go
100 lines (87 loc) · 2.99 KB
/
mcts.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
package gomcts
import (
"math"
)
type monteCarloTreeSearchGameNode struct {
parent *monteCarloTreeSearchGameNode
children []*monteCarloTreeSearchGameNode
value GameState
untriedActions []Action
causingAction Action
q float64
n float64
}
// MonteCarloTreeSearch - function starting Monte Carlo Tree Search over provided GameState using RolloutPolicy of your choice, repeating simulation requested amount of time
func MonteCarloTreeSearch(state GameState, rolloutPolicy RolloutPolicy, simulations int) Action {
root := newMCTSNode(nil, state, nil)
var leaf *monteCarloTreeSearchGameNode
for i := 0; i < simulations; i++ {
leaf = root.treePolicy()
result := leaf.rollout(rolloutPolicy)
leaf.backpropagate(result)
}
return root.uctBestChild(0.0).causingAction
}
func newMCTSNode(parentNode *monteCarloTreeSearchGameNode, state GameState, causingAction Action) monteCarloTreeSearchGameNode {
node := monteCarloTreeSearchGameNode{parent: parentNode, value: state, causingAction: causingAction}
node.children = make([]*monteCarloTreeSearchGameNode, 0, 0)
node.untriedActions = state.GetLegalActions()
return node
}
func rootMCTSNode(state GameState) monteCarloTreeSearchGameNode {
return newMCTSNode(nil, state, nil)
}
func (node *monteCarloTreeSearchGameNode) uctBestChild(c float64) *monteCarloTreeSearchGameNode {
chosenIndex := 0
maxValue := -math.MaxFloat64
for i, child := range node.children {
if (child.q/child.n)+c*math.Sqrt(2*math.Log(node.n)/child.n) > maxValue {
maxValue = (child.q / child.n) + c*math.Sqrt(2*math.Log(node.n)/child.n)
chosenIndex = i
}
}
return node.children[chosenIndex]
}
func (node *monteCarloTreeSearchGameNode) rollout(policy RolloutPolicy) GameResult {
currentState := node.value
for !currentState.IsGameEnded() {
currentState = policy(currentState).ApplyTo(currentState)
}
gameResult, _ := currentState.EvaluateGame()
return gameResult
}
func (node *monteCarloTreeSearchGameNode) backpropagate(result GameResult) {
for !node.isRoot() {
node.q += float64(result) * float64(node.parent.value.NextToMove())
node.n++
node = node.parent
}
node.n++
}
func (node *monteCarloTreeSearchGameNode) isTerminal() bool {
_, ended := node.value.EvaluateGame()
return ended
}
func (node *monteCarloTreeSearchGameNode) isFullyExpanded() bool {
return len(node.untriedActions) == 0
}
func (node *monteCarloTreeSearchGameNode) popFirstUntriedAction() Action {
action := node.untriedActions[0]
node.untriedActions = node.untriedActions[1:]
return action
}
func (node *monteCarloTreeSearchGameNode) expand() *monteCarloTreeSearchGameNode {
action := node.popFirstUntriedAction()
expandedChild := newMCTSNode(node, action.ApplyTo(node.value), action)
node.addChild(&expandedChild)
return &expandedChild
}
func (node *monteCarloTreeSearchGameNode) treePolicy() *monteCarloTreeSearchGameNode {
for !node.isTerminal() {
if !node.isFullyExpanded() {
return node.expand()
}
node = node.uctBestChild(1.4)
}
return node
}