Skip to content

Commit

Permalink
Merge pull request #95 from Shimuuar/binomial-bench
Browse files Browse the repository at this point in the history
Benchmarks and performance improvements for binomial
  • Loading branch information
Shimuuar authored Jul 9, 2024
2 parents 08c6be9 + b6c4ab2 commit 5fcf3ec
Show file tree
Hide file tree
Showing 2 changed files with 153 additions and 123 deletions.
263 changes: 141 additions & 122 deletions System/Random/MWC/Distributions.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE BangPatterns, CPP, GADTs, FlexibleContexts, ScopedTypeVariables #-}
-- |
-- Module : System.Random.MWC.Distributions
Expand Down Expand Up @@ -347,6 +348,146 @@ pkgError :: String -> String -> a
pkgError func msg = error $ "System.Random.MWC.Distributions." ++ func ++
": " ++ msg

-- | Random variate generator for Binomial distribution. Will throw
-- exception when parameters are out range.
--
-- The probability of getting exactly k successes in n trials is
-- given by the probability mass function:
--
-- \[
-- f(k;n,p) = \Pr(X = k) = \binom n k p^k(1-p)^{n-k}
-- \]
binomial :: forall g m . StatefulGen g m
=> Int -- ^ Number of trials, must be positive.
-> Double -- ^ Probability of success \(p \in [0,1]\)
-> g -- ^ Generator
-> m Int
{-# INLINE binomial #-}
binomial n p gen
| n <= 0 = pkgError "binomial" "number of trials must be positive"
| p < 0.0 || p > 1.0 = pkgError "binomial" "probability must be >= 0 and <= 1"
| p == 0.0 = return 0
| p == 1.0 = return n
| p <= 0.5 = if
| fromIntegral n * p < inv_thr -> binomialInv n p gen
| otherwise -> binomialTPE n p gen
| p > 0.5 = do
ix <- case 1 - p of
p' | fromIntegral n * p' < inv_thr -> binomialInv n p' gen
| otherwise -> binomialTPE n p' gen
pure $! n - ix
-- Reachable when p is NaN
| otherwise = pkgError "binomial" "probability must be >= 0 and <= 1"
where
-- Threshold for preferring the BINV algorithm / inverse cdf
-- logic. The paper suggests 10, Ranlib uses 30, R uses 30, Rust uses
-- 10 and GSL uses 14.
inv_thr = 10

-- Binomial-Triangle-Parallelogram-Exponential algorithm (BTPE)
-- described in Kachitvichyanukul1988
binomialTPE :: forall g m . StatefulGen g m => Int -> Double -> g -> m Int
{-# INLINE binomialTPE #-}
binomialTPE n p g = loop
where
-- Main accept/reject loop
loop = do
u <- uniformRM (0.0, p4) g
v <- uniformDoublePositive01M g
selectArea u v
-- Acceptance / rejection of sample [step 5]
acceptReject :: Int -> Double -> m Int
acceptReject !ix !v
| var <= accept = return ix
| otherwise = loop
where
var = log v
accept = logFactorial bigM + logFactorial (n - bigM) -
logFactorial ix - logFactorial (n - ix) +
fromIntegral (ix - bigM) * log (p / q)
-- Select area to be used [Steps 1-4]
selectArea :: Double -> Double -> m Int
selectArea !u !v
-- Triangular region
| u <= p1 = return $! floor $ xm - p1 * v + u
-- Parallelogram region
| u <= p2 = do let x = xl + (u - p1) / c
w = v * c + 1.0 - abs (x - xm) / p1
if w > 1 || w <= 0
then loop
else do let ix = floor x
acceptReject ix w
-- Left tail
| u <= p3 = case floor $ xl + log v / lambdaL of
ix | ix < 0 -> loop
| otherwise -> do let w = v * (u - p2) * lambdaL
acceptReject ix w
-- Right tail
| otherwise = case floor $ xr - log v / lambdaR of
ix | ix > n -> loop
| otherwise -> do let w = v * (u - p3) * lambdaR
acceptReject ix w
----------------------------------------
-- Constants used in algorithm. See [Step 0]
q = 1 - p
np = fromIntegral n * p
ffm = np + p
bigM = floor ffm
-- Half integer mean (tip of triangle)
xm = fromIntegral bigM + 0.5
-- p1: the distance to the left and right edges of the triangle
-- region below the target distribution; since height=1, also:
-- area of region (half base * height)
!p1 = let npq = np * q
in fromIntegral (floor (2.195 * sqrt npq - 4.6 * q) :: Int) + 0.5
xl = xm - p1 -- Left edge of triangle
xr = xm + p1 -- Right edge of triangle
c = 0.134 + 20.5 / (15.3 + fromIntegral bigM)
-- p1 + area of parallelogram region
!p2 = p1 * (1.0 + c + c)
--
lambdaL = let al = (ffm - xl) / (ffm - xl * p)
in al * (1.0 + 0.5 * al)
lambdaR = let ar = (xr - ffm) / (xr * q)
in ar * (1.0 + 0.5 * ar)
-- p2 + area of left tail
!p3 = p2 + c / lambdaL
-- p3 + area of right tail
!p4 = p3 + c / lambdaR


-- Compute binomial variate using inversion method (BINV in
-- Kachitvichyanukul1988)
binomialInv :: StatefulGen g m => Int -> Double -> g -> m Int
{-# INLINE binomialInv #-}
binomialInv n p g = do
u <- uniformDoublePositive01M g
return $! invertBinomial n p u

-- This function is defined on top level to avoid inlining it since it's rather
-- large and we don't need specializations since it's monomorphic anyway
invertBinomial
:: Int -- N of trials
-> Double -- probability of success
-> Double -- Output of PRNG
-> Int
invertBinomial !n !p !u0 = invert (q^n) u0 0
where
-- We forcing s&a in order to avoid allocating thunks. Those are
-- more expensive than computing them unconditionally
q = 1 - p
!s = p / q
!a = fromIntegral (n + 1) * s
--
invert !r !u !x
| u <= r = x
| otherwise = invert r' u' x'
where
u' = u - r
x' = x + 1
r' = r * ((a / fromIntegral x') - s)


-- $references
--
-- * Doornik, J.A. (2005) An improved ziggurat method to generate
Expand All @@ -363,125 +504,3 @@ pkgError func msg = error $ "System.Random.MWC.Distributions." ++ func ++
-- 1988) 216. <https://dl.acm.org/doi/pdf/10.1145/42372.42381>
-- Here's an example of how the algorithm's sampling regions look
-- ![Something](docs/RecreateFigure.svg)

-- | Random variate generator for Binomial distribution
--
-- The probability of getting exactly k successes in n trials is
-- given by the probability mass function:
--
-- \[
-- f(k;n,p) = \Pr(X = k) = \binom n k p^k(1-p)^{n-k}
-- \]
binomial :: forall g m . StatefulGen g m
=> Int -- ^ Number of trials
-> Double -- ^ Probability of success (returning True)
-> g -- ^ Generator
-> m Int
{-# INLINE binomial #-}
binomial nTrials prob gen
| nTrials <= 0 = pkgError "binomial" "number of trials must be positive"
| prob < 0.0 || prob > 1.0 = pkgError "binomial" "probability must be >= 0 and <= 1"
| prob == 0.0 = return 0
| prob == 1.0 = return nTrials
| otherwise = do let (p', flipped) = if prob > 0.5 then (1.0 - prob, True) else (prob, False)
ix <- if fromIntegral nTrials * p' < bInvThreshold
then binomialInv nTrials p' gen
else binomialTPE nTrials p' gen
if flipped
then return $ nTrials - ix
else return ix

where
binomialTPE n p g =
let q = 1 - p
np = fromIntegral n * p
ffm = np + p
bigM = floor ffm
-- Half integer mean (tip of triangle)
xm = fromIntegral bigM + 0.5
npq = np * q

-- p1: the distance to the left and right edges of the triangle
-- region below the target distribution; since height=1, also:
-- area of region (half base * height)
p1 = fromIntegral (floor (2.195 * sqrt npq - 4.6 * q) :: Int) + 0.5
-- Left edge of triangle
xl = xm - p1
-- Right edge of triangle
xr = xm + p1
c = 0.134 + 20.5 / (15.3 + fromIntegral bigM)
-- p1 + area of parallelogram region
p2 = p1 * (1.0 + c + c)
al = (ffm - xl) / (ffm - xl * p)
lambdaL = al * (1.0 + 0.5 * al)
ar = (xr - ffm) / (xr * q)
lambdaR = ar * (1.0 + 0.5 * ar)

-- p2 + area of left tail
p3 = p2 + c / lambdaL
-- p3 + area of right tail
p4 = p3 + c / lambdaR

-- Acceptance / rejection comparison
step5 :: Int -> Double -> m Int
step5 ix v
| var <= accept = return $ if p > 0 then ix else n - ix
| otherwise = hh
where
var = log v
accept = logFactorial bigM + logFactorial (n - bigM) -
logFactorial ix - logFactorial (n - ix) +
fromIntegral (ix - bigM) * log (p / q)

h :: Double -> Double -> m Int
h u v | -- Triangular region
u <= p1 = return $ floor $ xm - p1 * v + u

-- Parallelogram region
| u <= p2 = do let x = xl + (u - p1) / c
w = v * c + 1.0 - abs (x - xm) / p1
if w > 1 || w <= 0
then hh
else do let ix = floor x
step5 ix w

-- Left tail
| u <= p3 = do let ix = floor $ xl + log v / lambdaL
if ix < 0
then hh
else do let w = v * (u - p2) * lambdaL
step5 ix w

-- Right tail
| otherwise = do let ix = floor $ xr - log v / lambdaR
if ix > n
then hh
else do let w = v * (u - p3) * lambdaR
step5 ix w

hh = do
u <- uniformRM (0.0, p4) g
v <- uniformDoublePositive01M g
h u v

in hh

binomialInv :: StatefulGen g m => Int -> Double -> g -> m Int
binomialInv n p g = do
let q = 1 - p
s = p / q
a = fromIntegral (n + 1) * s
r = q^n
f (rPrev, uPrev, xPrev) = (rNew, uNew, xNew)
where
uNew = uPrev - rPrev
xNew = xPrev + 1
rNew = rPrev * ((a / fromIntegral xNew) - s)
u <- uniformDoublePositive01M g
let (_, _, x) = until (\(t, v, _) -> v <= t) f (r, u, 0) in return x

-- Threshold for preferring the BINV algorithm / inverse cdf
-- logic. The paper suggests 10, Ranlib uses 30, R uses 30, Rust uses
-- 10 and GSL uses 14.
bInvThreshold :: Double
bInvThreshold = 10
13 changes: 12 additions & 1 deletion bench/Benchmark.hs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,18 @@ main = do
, bench "gamma,a<1" $ whnfIO $ loop iter (gamma 0.5 1 mwc :: IO Double)
, bench "gamma,a>1" $ whnfIO $ loop iter (gamma 2 1 mwc :: IO Double)
, bench "chiSquare" $ whnfIO $ loop iter (chiSquare 4 mwc :: IO Double)
, bench "binomial" $ whnfIO $ loop iter (binomial 1400 0.4 mwc :: IO Int)
-- NOTE: We switch between algorithms when Np=10
, bgroup "binomial"
[ bench (show p ++ " " ++ show n) $ whnfIO $ loop iter (binomial n p mwc)
| (n,p) <- [ (2, 0.2), (2, 0.5), (2, 0.8)
, (10, 0.1), (10, 0.9)
, (20, 0.2), (20, 0.8)
--
, (60, 0.2), (60, 0.8)
, (600, 0.2), (600, 0.8)
, (6000, 0.2), (6000, 0.8)
]
]
, bench "beta binomial 10" $ whnfIO $ loop iter (betaBinomial 600 400 10 mwc :: IO Int)
, bench "beta binomial 100" $ whnfIO $ loop iter (betaBinomial 600 400 100 mwc :: IO Int)
, bench "beta binomial table 10" $ whnfIO $ loop iter (betaBinomialTable 600 400 10 mwc :: IO Int)
Expand Down

0 comments on commit 5fcf3ec

Please sign in to comment.