-
Notifications
You must be signed in to change notification settings - Fork 0
/
mab.rb
111 lines (91 loc) · 2.14 KB
/
mab.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/local/env ruby
require 'bundler/inline'
gemfile do
source 'https://rubygems.org'
gem 'matrix'
gem 'rubystats'
end
module Mab
class Arm
attr_reader :prob
def initialize(prob)
@prob = prob
end
end
module Base
def pull_arm(arms, choise)
p = arms[choise].prob
dist = Rubystats::BinomialDistribution.new(1, p)
dist.rng
end
def init_reward
@arms.each_with_index do |_elm, i|
@rewards[i] = 0
end
end
def run
@t.times do
strategy = @strategy.new(@arms, @rewards, 0.1)
choise = strategy.choose
r = pull_arm(@arms, choise)
@rewards[choise] = @rewards[choise] + r
end
end
end
class Bandit
include Mab::Base
attr_reader :arms, :rewards, :strategy, :t
def initialize(arms, rewards, strategy, t)
@arms = arms
@rewards = rewards
@strategy = strategy
@t = t
end
end
module Strategy
class Base
attr_reader :arms, :rewards, :epsilon
def initialize(arms, rewards, epsilon=0.1)
@arms = arms
@rewards = rewards
@epsilon = epsilon
end
def choose
raise StandardError.new("Please implement strategy")
end
end
class EpsilonGreedy < Base
def choose
epsilon_greedy_choose
end
private
def epsilon_greedy_choose
dist = Rubystats::BinomialDistribution.new(1, @epsilon)
if dist.rng
epsilon_choose
else
greedy_choose
end
end
def greedy_choose
# Choose arm that has max rewards value in history
# same as argmax rewards
@rewards.invert.max.last
end
def epsilon_choose
([email protected]).to_a.sample
end
end
end
end
if __FILE__ == $0
my_mab = Mab::Bandit.new([Mab::Arm.new(0.2),
Mab::Arm.new(0.5),
Mab::Arm.new(0.6)],
{},
strategy,
t=2000)
my_mab.init_reward
my_mab.run
p my_mab
end