Skip to content

Commit

Permalink
Merge pull request #90 from idontgetoutmuch/master
Browse files Browse the repository at this point in the history
Binomial Sampler
  • Loading branch information
Shimuuar authored Jun 15, 2024
2 parents 33c89e1 + 4b2bc36 commit 08c6be9
Show file tree
Hide file tree
Showing 7 changed files with 560 additions and 4 deletions.
130 changes: 130 additions & 0 deletions System/Random/MWC/Distributions.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ module System.Random.MWC.Distributions
, geometric0
, geometric1
, bernoulli
, binomial
-- ** Multivariate
, dirichlet
-- * Permutations
Expand All @@ -51,6 +52,7 @@ import System.Random.Stateful (StatefulGen(..),Uniform(..),UniformRange(..),unif
import qualified Data.Vector.Unboxed as I
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Generic.Mutable as M
import Numeric.SpecFunctions (logFactorial)

-- Unboxed 2-tuple
data T = T {-# UNPACK #-} !Double {-# UNPACK #-} !Double
Expand Down Expand Up @@ -355,3 +357,131 @@ pkgError func msg = error $ "System.Random.MWC.Distributions." ++ func ++
-- (2007). Gaussian random number generators.
-- /ACM Computing Surveys/ 39(4).
-- <http://www.cse.cuhk.edu.hk/~phwl/mt/public/archives/papers/grng_acmcs07.pdf>
--
-- * Kachitvichyanukul, V. and Schmeiser, B. W. Binomial Random
-- Variate Generation. Communications of the ACM, 31, 2 (February,
-- 1988) 216. <https://dl.acm.org/doi/pdf/10.1145/42372.42381>
-- Here's an example of how the algorithm's sampling regions look
-- ![Something](docs/RecreateFigure.svg)

-- | Random variate generator for Binomial distribution
--
-- The probability of getting exactly k successes in n trials is
-- given by the probability mass function:
--
-- \[
-- f(k;n,p) = \Pr(X = k) = \binom n k p^k(1-p)^{n-k}
-- \]
binomial :: forall g m . StatefulGen g m
=> Int -- ^ Number of trials
-> Double -- ^ Probability of success (returning True)
-> g -- ^ Generator
-> m Int
{-# INLINE binomial #-}
binomial nTrials prob gen
| nTrials <= 0 = pkgError "binomial" "number of trials must be positive"
| prob < 0.0 || prob > 1.0 = pkgError "binomial" "probability must be >= 0 and <= 1"
| prob == 0.0 = return 0
| prob == 1.0 = return nTrials
| otherwise = do let (p', flipped) = if prob > 0.5 then (1.0 - prob, True) else (prob, False)
ix <- if fromIntegral nTrials * p' < bInvThreshold
then binomialInv nTrials p' gen
else binomialTPE nTrials p' gen
if flipped
then return $ nTrials - ix
else return ix

where
binomialTPE n p g =
let q = 1 - p
np = fromIntegral n * p
ffm = np + p
bigM = floor ffm
-- Half integer mean (tip of triangle)
xm = fromIntegral bigM + 0.5
npq = np * q

-- p1: the distance to the left and right edges of the triangle
-- region below the target distribution; since height=1, also:
-- area of region (half base * height)
p1 = fromIntegral (floor (2.195 * sqrt npq - 4.6 * q) :: Int) + 0.5
-- Left edge of triangle
xl = xm - p1
-- Right edge of triangle
xr = xm + p1
c = 0.134 + 20.5 / (15.3 + fromIntegral bigM)
-- p1 + area of parallelogram region
p2 = p1 * (1.0 + c + c)
al = (ffm - xl) / (ffm - xl * p)
lambdaL = al * (1.0 + 0.5 * al)
ar = (xr - ffm) / (xr * q)
lambdaR = ar * (1.0 + 0.5 * ar)

-- p2 + area of left tail
p3 = p2 + c / lambdaL
-- p3 + area of right tail
p4 = p3 + c / lambdaR

-- Acceptance / rejection comparison
step5 :: Int -> Double -> m Int
step5 ix v
| var <= accept = return $ if p > 0 then ix else n - ix
| otherwise = hh
where
var = log v
accept = logFactorial bigM + logFactorial (n - bigM) -
logFactorial ix - logFactorial (n - ix) +
fromIntegral (ix - bigM) * log (p / q)

h :: Double -> Double -> m Int
h u v | -- Triangular region
u <= p1 = return $ floor $ xm - p1 * v + u

-- Parallelogram region
| u <= p2 = do let x = xl + (u - p1) / c
w = v * c + 1.0 - abs (x - xm) / p1
if w > 1 || w <= 0
then hh
else do let ix = floor x
step5 ix w

-- Left tail
| u <= p3 = do let ix = floor $ xl + log v / lambdaL
if ix < 0
then hh
else do let w = v * (u - p2) * lambdaL
step5 ix w

-- Right tail
| otherwise = do let ix = floor $ xr - log v / lambdaR
if ix > n
then hh
else do let w = v * (u - p3) * lambdaR
step5 ix w

hh = do
u <- uniformRM (0.0, p4) g
v <- uniformDoublePositive01M g
h u v

in hh

binomialInv :: StatefulGen g m => Int -> Double -> g -> m Int
binomialInv n p g = do
let q = 1 - p
s = p / q
a = fromIntegral (n + 1) * s
r = q^n
f (rPrev, uPrev, xPrev) = (rNew, uNew, xNew)
where
uNew = uPrev - rPrev
xNew = xPrev + 1
rNew = rPrev * ((a / fromIntegral xNew) - s)
u <- uniformDoublePositive01M g
let (_, _, x) = until (\(t, v, _) -> v <= t) f (r, u, 0) in return x

-- Threshold for preferring the BINV algorithm / inverse cdf
-- logic. The paper suggests 10, Ranlib uses 30, R uses 30, Rust uses
-- 10 and GSL uses 14.
bInvThreshold :: Double
bInvThreshold = 10
16 changes: 16 additions & 0 deletions bench/Benchmark.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import Data.Word
import Data.Proxy
import qualified Data.Vector.Unboxed as U
import qualified System.Random as R
import System.Random.Stateful (StatefulGen)
import System.Random.MWC
import System.Random.MWC.Distributions
import System.Random.MWC.CondensedTable
Expand Down Expand Up @@ -91,6 +92,11 @@ main = do
, bench "gamma,a<1" $ whnfIO $ loop iter (gamma 0.5 1 mwc :: IO Double)
, bench "gamma,a>1" $ whnfIO $ loop iter (gamma 2 1 mwc :: IO Double)
, bench "chiSquare" $ whnfIO $ loop iter (chiSquare 4 mwc :: IO Double)
, bench "binomial" $ whnfIO $ loop iter (binomial 1400 0.4 mwc :: IO Int)
, bench "beta binomial 10" $ whnfIO $ loop iter (betaBinomial 600 400 10 mwc :: IO Int)
, bench "beta binomial 100" $ whnfIO $ loop iter (betaBinomial 600 400 100 mwc :: IO Int)
, bench "beta binomial table 10" $ whnfIO $ loop iter (betaBinomialTable 600 400 10 mwc :: IO Int)
, bench "beta binomial table 100" $ whnfIO $ loop iter (betaBinomialTable 600 400 100 mwc :: IO Int)
]
-- Test sampling performance. Table creation must be floated out!
, bgroup "CT/gen" $ concat
Expand Down Expand Up @@ -132,3 +138,13 @@ main = do
, bench "Int" $ whnfIO $ loop iter (M.random mtg :: IO Int)
]
]

betaBinomial :: StatefulGen g m => Double -> Double -> Int -> g -> m Int
betaBinomial a b n g = do
p <- beta a b g
binomial n p g

betaBinomialTable :: StatefulGen g m => Double -> Double -> Int -> g -> m Int
betaBinomialTable a b n g = do
p <- beta a b g
genFromTable (tableBinomial n p) g
9 changes: 9 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
## Changes in 0.15.1.0

* Additon of binomial sampler using the rejection sampling method in
Kachitvichyanukul, V. and Schmeiser, B. W. Binomial Random
Variate Generation. Communications of the ACM, 31, 2 (February,
1988) 216. <https://dl.acm.org/doi/pdf/10.1145/42372.42381>. A more
efficient basis for e.g. the beta binomial distribution:
`beta a b g >>= \p -> binomial n p g`.

## Changes in 0.15.0.2

* Doctests on 32-bit platforms are fixed. (#79)
Expand Down
Loading

0 comments on commit 08c6be9

Please sign in to comment.