From f16f4d155d5cdc094c658cb8438f183a1d39eea7 Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Mon, 29 Apr 2024 17:16:47 +0800 Subject: [PATCH 01/30] The start of a binomial sampler --- System/Random/MWC/Binomial.hs | 123 ++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 System/Random/MWC/Binomial.hs diff --git a/System/Random/MWC/Binomial.hs b/System/Random/MWC/Binomial.hs new file mode 100644 index 0000000..917dfc4 --- /dev/null +++ b/System/Random/MWC/Binomial.hs @@ -0,0 +1,123 @@ +import System.Random.Stateful +import System.Random.MWC +import System.Random.MWC.Distributions +import Control.Monad.Reader + +-- | Random variate generator for Binomial distribution +-- +-- The probability of getting exactly k successes in n trials is +-- given by the probability mass function: +-- +-- \[ +-- f(k;n,p) = \Pr(X = k) = \binom n k p^k(1-p)^{n-k} +-- \] +binomial :: StatefulGen g m + => Int -- ^ Number of trials + -> Double -- ^ Probability of success (returning True) + -> g -- ^ Generator + -> m Int +binomial n p g = + if p == 0.0 + then return 0 + else if p == 1.0 + then return n + else do + let q = 1 - p + if fromIntegral n * p < bInvThreshold + then do + let s = p / q + let a = fromIntegral (n + 1) * s + undefined + else undefined + +-- FIXME: The paper contains mysterious (to me at least) +-- constants. The Rust author seems to understand these and makes +-- comments such as +-- +-- * radius of triangle region, since height=1 also area of region +-- +-- The Rust author also comments +-- +-- * It is possible for BINV to get stuck, so we break if x > +-- BINV_MAX_X and try again. +-- +-- * It would be safer to set BINV_MAX_X to self.n, but it is +-- extremely unlikely to be relevant. +-- +-- * When n*p < 10, so is n*p*q which is the variance, so a result > +-- 110 would be 100 / sqrt(10) = 31 standard deviations away. + + +baz :: StatefulGen g m => Int -> Double -> g -> m Int +baz n p g = do + let q = if p <= 0.5 then p else 1 - p + np = fromIntegral n * p + npq = np * q + fm = np + p + m = floor fm + p1 = fromIntegral (floor (2.195 * sqrt npq - 4.6 * q)) + 0.5 + -- FIXME: This comment I understand and will come back to + -- Tip of triangle + xm = fromIntegral m + 0.5 + -- Left edge of triangle + xl = xm - p1 + -- Right edge of triangle + xr = xm + p1 + -- FIXME: I am not sure I understand this + -- p1 + area of parallelogram region + c = 0.134 + 20.5 / (15.3 + fromIntegral m) + p2 = p1 * (1 + 2 * c) + lambda a = a * (1 + 0.5 * a) + lambdaL = lambda ((fm - xl) / (fm - xl * p)) + lambdaR = lambda ((xr - fm) / (xr * q)) + -- FIXME: p1 + area of left tail + p3 = p2 + c / lambdaL + -- p1 + area of right tail + p4 = p3 + c / lambdaR + return undefined + +bar :: StatefulGen g m => Int -> Double -> g -> m Int +bar n p gen = do + let q = 1 - p + s = p / q + a = fromIntegral (n + 1) * s + r = q^n + f (rPrev, uPrev, xPrev) = (rNew, uNew, xNew) + where + uNew = uPrev - rPrev + xNew = xPrev + 1 + rNew = rPrev * ((a / fromIntegral xNew) - s) + u <- uniformDoublePositive01M gen + let (_, _, x) = until (\(r, u, _) -> u <= r) f (r, u, 0) in return x + +foo :: StatefulGen g m => Int -> Double -> g -> m Int +foo n p g = fmap sum $ fmap (fmap fromEnum) $ replicateM n $ bernoulli p g + +nSamples = 100000 + +test :: StatefulGen g m => g -> m (Double, Double) +test g = do + ss <- replicateM nSamples $ bar 20 0.4 g + tt <- replicateM nSamples $ foo 20 0.4 g + return $ ((fromIntegral (sum ss) / fromIntegral nSamples), (fromIntegral (sum tt) / fromIntegral nSamples)) + +main :: IO () +main = do + monadicGen <- create + x <- runReaderT (ReaderT test) monadicGen + print x + + -- if uPrev > rPrev + + -- if xPrev > bInvMaxX + -- then foo s a undefined + + +-- Threshold for preferring the BINV algorithm / inverse cdf +-- logic. The paper suggests 10, Ranlib uses 30, R uses 30, Rust uses +-- 10 and GSL uses 14. +bInvThreshold :: Double +bInvThreshold = 10 + +bInvMaxX :: Int +bInvMaxX = 110 From 908186f981d1a495cc4c173ada7b68d01c9010f2 Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Wed, 1 May 2024 12:16:28 +0100 Subject: [PATCH 02/30] Some experiments --- System/Random/MWC/Binomial.hs | 156 ++++++++++++++--------------- System/Random/MWC/Distributions.hs | 121 ++++++++++++++++++++++ mwc-random.cabal | 16 +++ 3 files changed, 210 insertions(+), 83 deletions(-) diff --git a/System/Random/MWC/Binomial.hs b/System/Random/MWC/Binomial.hs index 917dfc4..1bc3303 100644 --- a/System/Random/MWC/Binomial.hs +++ b/System/Random/MWC/Binomial.hs @@ -16,68 +16,10 @@ binomial :: StatefulGen g m -> Double -- ^ Probability of success (returning True) -> g -- ^ Generator -> m Int -binomial n p g = - if p == 0.0 - then return 0 - else if p == 1.0 - then return n - else do - let q = 1 - p - if fromIntegral n * p < bInvThreshold - then do - let s = p / q - let a = fromIntegral (n + 1) * s - undefined - else undefined +binomial n p g = undefined --- FIXME: The paper contains mysterious (to me at least) --- constants. The Rust author seems to understand these and makes --- comments such as --- --- * radius of triangle region, since height=1 also area of region --- --- The Rust author also comments --- --- * It is possible for BINV to get stuck, so we break if x > --- BINV_MAX_X and try again. --- --- * It would be safer to set BINV_MAX_X to self.n, but it is --- extremely unlikely to be relevant. --- --- * When n*p < 10, so is n*p*q which is the variance, so a result > --- 110 would be 100 / sqrt(10) = 31 standard deviations away. - - -baz :: StatefulGen g m => Int -> Double -> g -> m Int -baz n p g = do - let q = if p <= 0.5 then p else 1 - p - np = fromIntegral n * p - npq = np * q - fm = np + p - m = floor fm - p1 = fromIntegral (floor (2.195 * sqrt npq - 4.6 * q)) + 0.5 - -- FIXME: This comment I understand and will come back to - -- Tip of triangle - xm = fromIntegral m + 0.5 - -- Left edge of triangle - xl = xm - p1 - -- Right edge of triangle - xr = xm + p1 - -- FIXME: I am not sure I understand this - -- p1 + area of parallelogram region - c = 0.134 + 20.5 / (15.3 + fromIntegral m) - p2 = p1 * (1 + 2 * c) - lambda a = a * (1 + 0.5 * a) - lambdaL = lambda ((fm - xl) / (fm - xl * p)) - lambdaR = lambda ((xr - fm) / (xr * q)) - -- FIXME: p1 + area of left tail - p3 = p2 + c / lambdaL - -- p1 + area of right tail - p4 = p3 + c / lambdaR - return undefined - -bar :: StatefulGen g m => Int -> Double -> g -> m Int -bar n p gen = do +inv :: StatefulGen g m => Int -> Double -> g -> m Int +inv n p gen = do let q = 1 - p s = p / q a = fromIntegral (n + 1) * s @@ -90,34 +32,82 @@ bar n p gen = do u <- uniformDoublePositive01M gen let (_, _, x) = until (\(r, u, _) -> u <= r) f (r, u, 0) in return x -foo :: StatefulGen g m => Int -> Double -> g -> m Int -foo n p g = fmap sum $ fmap (fmap fromEnum) $ replicateM n $ bernoulli p g +inv'' :: StatefulGen g m => Int -> Int -> Double -> g -> m [Int] +inv'' m n p gen = do + let q = 1 - p + s = p / q + a = fromIntegral (n + 1) * s + r = q^n + f (rPrev, uPrev, xPrev) = (rNew, uNew, xNew) + where + uNew = uPrev - rPrev + xNew = xPrev + 1 + rNew = rPrev * ((a / fromIntegral xNew) - s) + replicateM m $ do u <- uniformDoublePositive01M gen + let (_, _, x) = until (\(r, u, _) -> u <= r) f (r, u, 0) in return x + +inv' :: StatefulGen g m => g -> ReaderT (Double, Double, Double) m Int +inv' g = do + (s, a, r) <- ask + let f (rPrev, uPrev, xPrev) = (rNew, uNew, xNew) + where + uNew = uPrev - rPrev + xNew = xPrev + 1 + rNew = rPrev * ((a / fromIntegral xNew) - s) + u <- lift $ uniformDoublePositive01M g + let (_, _, x) = until (\(r, u, _) -> u <= r) f (r, u, 0) in return x + +ber :: StatefulGen g m => Int -> Double -> g -> m Int +ber n p g = fmap sum $ fmap (fmap fromEnum) $ replicateM n $ bernoulli p g nSamples = 100000 -test :: StatefulGen g m => g -> m (Double, Double) -test g = do - ss <- replicateM nSamples $ bar 20 0.4 g - tt <- replicateM nSamples $ foo 20 0.4 g - return $ ((fromIntegral (sum ss) / fromIntegral nSamples), (fromIntegral (sum tt) / fromIntegral nSamples)) +testInv :: StatefulGen g m => g -> m Double +testInv g = do + ss <- replicateM nSamples $ inv 1400 0.4 g + return $ fromIntegral (sum ss) / fromIntegral nSamples -main :: IO () -main = do - monadicGen <- create - x <- runReaderT (ReaderT test) monadicGen - print x +testInv'' :: StatefulGen g m => g -> m Double +testInv'' g = do + ss <- inv'' nSamples 1400 0.4 g + return $ fromIntegral (sum ss) / fromIntegral nSamples - -- if uPrev > rPrev +testInv' :: StatefulGen g m => g -> ReaderT (Double, Double, Double) m Double +testInv' g = do + ss <- replicateM nSamples $ inv' g + return $ fromIntegral (sum ss) / fromIntegral nSamples - -- if xPrev > bInvMaxX - -- then foo s a undefined +testBer :: StatefulGen g m => g -> m Double +testBer g = do + tt <- replicateM nSamples $ ber 1400 0.4 g + return $ fromIntegral (sum tt) / fromIntegral nSamples + +mainB :: IO () +mainB = do + monadicGen <- create + x <- runReaderT (ReaderT testBer) monadicGen + print x +mainI :: IO () +mainI = do + monadicGen <- create + x <- runReaderT (ReaderT testInv) monadicGen + print x --- Threshold for preferring the BINV algorithm / inverse cdf --- logic. The paper suggests 10, Ranlib uses 30, R uses 30, Rust uses --- 10 and GSL uses 14. -bInvThreshold :: Double -bInvThreshold = 10 +mainII :: IO () +mainII = do + monadicGen <- create + x <- runReaderT (ReaderT testInv'') monadicGen + print x -bInvMaxX :: Int -bInvMaxX = 110 +mainMI :: IO () +mainMI = do + let n = 1400 + p = 0.4 + q = 1 - p + s = p / q + a = fromIntegral (n + 1) * s + r = q^n + g <- create + x <- runReaderT (ReaderT (runReaderT (testInv' g))) (s, a, r) + print x diff --git a/System/Random/MWC/Distributions.hs b/System/Random/MWC/Distributions.hs index 013671c..51b0098 100644 --- a/System/Random/MWC/Distributions.hs +++ b/System/Random/MWC/Distributions.hs @@ -27,6 +27,8 @@ module System.Random.MWC.Distributions , geometric0 , geometric1 , bernoulli + , bar + , binomial -- ** Multivariate , dirichlet -- * Permutations @@ -355,3 +357,122 @@ pkgError func msg = error $ "System.Random.MWC.Distributions." ++ func ++ -- (2007). Gaussian random number generators. -- /ACM Computing Surveys/ 39(4). -- + +-- | Random variate generator for Binomial distribution +-- +-- The probability of getting exactly k successes in n trials is +-- given by the probability mass function: +-- +-- \[ +-- f(k;n,p) = \Pr(X = k) = \binom n k p^k(1-p)^{n-k} +-- \] +binomial :: forall g m . StatefulGen g m + => Int -- ^ Number of trials + -> Double -- ^ Probability of success (returning True) + -> g -- ^ Generator + -> m Int +binomial n p g = + if p == 0.0 + then return 0 + else if p == 1.0 + then return n + else do + let q = 1 - p + if fromIntegral n * p < bInvThreshold + then do + let s = p / q + let a = fromIntegral (n + 1) * s + bar n p g + else baz n p g + +-- FIXME: The paper contains mysterious (to me at least) +-- constants. The Rust author seems to understand these and makes +-- comments such as +-- +-- * radius of triangle region, since height=1 also area of region +-- +-- The Rust author also comments +-- +-- * It is possible for BINV to get stuck, so we break if x > +-- BINV_MAX_X and try again. +-- +-- * It would be safer to set BINV_MAX_X to self.n, but it is +-- extremely unlikely to be relevant. +-- +-- * When n*p < 10, so is n*p*q which is the variance, so a result > +-- 110 would be 100 / sqrt(10) = 31 standard deviations away. + + +baz :: forall g m . StatefulGen g m => Int -> Double -> g -> m Int +baz n p g = do + let q = if p <= 0.5 then p else 1 - p + np = fromIntegral n * p + npq = np * q + fm = np + p + m = floor fm + p1 = fromIntegral (floor (2.195 * sqrt npq - 4.6 * q)) + 0.5 + -- FIXME: This comment I understand and will come back to + -- Tip of triangle + xm = fromIntegral m + 0.5 + -- Left edge of triangle + xl = xm - p1 + -- Right edge of triangle + xr = xm + p1 + -- FIXME: I am not sure I understand this + -- p1 + area of parallelogram region + c = 0.134 + 20.5 / (15.3 + fromIntegral m) + p2 = p1 * (1.0 + 2.0 * c) + lambda a = a * (1.0 + 0.5 * a) + lambdaL = lambda ((fm - xl) / (fm - xl * p)) + lambdaR = lambda ((xr - fm) / (xr * q)) + -- FIXME: p1 + area of left tail + p3 = p2 + c / lambdaL + -- p1 + area of right tail + p4 = p3 + c / lambdaR + f :: m Int + f = do + u <- uniformRM (0.0, p4) g + v <- uniformDoublePositive01M g + y <- if u <= p1 + then return $ floor $ xm - p1 * v + u + else if u <= p2 + then undefined + else undefined + return undefined + -- then do let x = xl + (u - p1) / c + -- v = v * c + 1.0 - abs (x - xm) / p1 + -- if v > 1 + -- then f + -- else return $ floor x + -- else return undefined + return undefined + +bar :: StatefulGen g m => Int -> Double -> g -> m Int +bar n p gen = do + let q = 1 - p + s = p / q + a = fromIntegral (n + 1) * s + r = q^n + f (rPrev, uPrev, xPrev) = (rNew, uNew, xNew) + where + uNew = uPrev - rPrev + xNew = xPrev + 1 + rNew = rPrev * ((a / fromIntegral xNew) - s) + u <- uniformDoublePositive01M gen + let (_, _, x) = until (\(r, u, _) -> u <= r) f (r, u, 0) in return x + + + -- if uPrev > rPrev + + -- if xPrev > bInvMaxX + -- then foo s a undefined + + +-- Threshold for preferring the BINV algorithm / inverse cdf +-- logic. The paper suggests 10, Ranlib uses 30, R uses 30, Rust uses +-- 10 and GSL uses 14. +bInvThreshold :: Double +bInvThreshold = 10 + +bInvMaxX :: Int +bInvMaxX = 110 diff --git a/mwc-random.cabal b/mwc-random.cabal index c4c05e0..42ef234 100644 --- a/mwc-random.cabal +++ b/mwc-random.cabal @@ -141,3 +141,19 @@ test-suite mwc-doctests , primitive , vector >=0.11 , random >=1.2 + +executable binomial + main-is: main.hs + hs-source-dirs: apps + default-language: Haskell2010 + if impl(ghcjs) || impl(ghc < 8.0) + Buildable: False + build-depends: + base -any + , mtl + , mwc-random -any + , bytestring + , primitive + , vector >=0.11 + , random >=1.2 + From d27d6ede7592d539055b86a497906547b4c3ccee Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Wed, 1 May 2024 12:27:29 +0100 Subject: [PATCH 03/30] Nix setup --- .envrc | 1 + shell.nix | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 .envrc create mode 100644 shell.nix diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..1d953f4 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use nix diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..b1564ae --- /dev/null +++ b/shell.nix @@ -0,0 +1,36 @@ +let + +myHaskellPackageOverlay = self: super: { + myHaskellPackages = super.haskellPackages.override { + overrides = hself: hsuper: rec { + }; + }; +}; + +in + +{ nixpkgs ? import (fetchTarball "https://github.com/NixOS/nixpkgs/archive/refs/tags/23.11.tar.gz") + { + config.allowBroken = false; + overlays = [ myHaskellPackageOverlay ]; + } +}: + +let + + pkgs = nixpkgs; + + haskellDeps = ps: with ps; [ + base mwc-random random + ]; + +in + +pkgs.stdenv.mkDerivation { + name = "Whatever"; + + buildInputs = [ + (pkgs.myHaskellPackages.ghcWithPackages haskellDeps) + pkgs.cabal-install + ]; +} From 1ea8c7e3d8f21922e496ae284fa71394a288eca7 Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Sat, 4 May 2024 12:27:33 +0100 Subject: [PATCH 04/30] Maybe needed to run the tests --- shell.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shell.nix b/shell.nix index b1564ae..6284450 100644 --- a/shell.nix +++ b/shell.nix @@ -21,7 +21,7 @@ let pkgs = nixpkgs; haskellDeps = ps: with ps; [ - base mwc-random random + base doctest math-functions mwc-random primitive random tasty-hunit tasty-quickcheck vector ]; in From a9048ee98360974dd84f54700ac4fc9c4fc8d634 Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Thu, 2 May 2024 05:28:18 +0100 Subject: [PATCH 05/30] Use Binomial.hs as the development module for now --- System/Random/MWC/Binomial.hs | 67 ++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/System/Random/MWC/Binomial.hs b/System/Random/MWC/Binomial.hs index 1bc3303..e5c0c25 100644 --- a/System/Random/MWC/Binomial.hs +++ b/System/Random/MWC/Binomial.hs @@ -16,7 +16,19 @@ binomial :: StatefulGen g m -> Double -- ^ Probability of success (returning True) -> g -- ^ Generator -> m Int -binomial n p g = undefined +binomial n p g = + if p == 0.0 + then return 0 + else if p == 1.0 + then return n + else do + let q = 1 - p + if fromIntegral n * p < bInvThreshold + then do + let s = p / q + let a = fromIntegral (n + 1) * s + bar n p g + else baz n p g inv :: StatefulGen g m => Int -> Double -> g -> m Int inv n p gen = do @@ -111,3 +123,56 @@ mainMI = do g <- create x <- runReaderT (ReaderT (runReaderT (testInv' g))) (s, a, r) print x + +baz :: forall g m . StatefulGen g m => Int -> Double -> g -> m Int +baz n p g = do + let q = if p <= 0.5 then p else 1 - p + np = fromIntegral n * p + npq = np * q + fm = np + p + m = floor fm + p1 = fromIntegral (floor (2.195 * sqrt npq - 4.6 * q)) + 0.5 + -- FIXME: This comment I understand and will come back to + -- Tip of triangle + xm = fromIntegral m + 0.5 + -- Left edge of triangle + xl = xm - p1 + -- Right edge of triangle + xr = xm + p1 + -- FIXME: I am not sure I understand this + -- p1 + area of parallelogram region + c = 0.134 + 20.5 / (15.3 + fromIntegral m) + p2 = p1 * (1.0 + 2.0 * c) + lambda a = a * (1.0 + 0.5 * a) + lambdaL = lambda ((fm - xl) / (fm - xl * p)) + lambdaR = lambda ((xr - fm) / (xr * q)) + -- FIXME: p1 + area of left tail + p3 = p2 + c / lambdaL + -- p1 + area of right tail + p4 = p3 + c / lambdaR + f :: m Int + f = do + u <- uniformRM (0.0, p4) g + v <- uniformDoublePositive01M g + y <- if u <= p1 + then return $ floor $ xm - p1 * v + u + else if u <= p2 + then undefined + else undefined + return undefined + -- then do let x = xl + (u - p1) / c + -- v = v * c + 1.0 - abs (x - xm) / p1 + -- if v > 1 + -- then f + -- else return $ floor x + -- else return undefined + return undefined + +-- Threshold for preferring the BINV algorithm / inverse cdf +-- logic. The paper suggests 10, Ranlib uses 30, R uses 30, Rust uses +-- 10 and GSL uses 14. +bInvThreshold :: Double +bInvThreshold = 10 + +bInvMaxX :: Int +bInvMaxX = 110 From 030b284151f14ab0bd003c21af58d830b5998359 Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Sun, 5 May 2024 15:55:09 +0100 Subject: [PATCH 06/30] The BTPE without the optimisations --- System/Random/MWC/Binomial.hs | 162 ++++++++++++++++++++++------------ 1 file changed, 104 insertions(+), 58 deletions(-) diff --git a/System/Random/MWC/Binomial.hs b/System/Random/MWC/Binomial.hs index e5c0c25..e271ffe 100644 --- a/System/Random/MWC/Binomial.hs +++ b/System/Random/MWC/Binomial.hs @@ -1,7 +1,8 @@ import System.Random.Stateful import System.Random.MWC -import System.Random.MWC.Distributions +import System.Random.MWC.Distributions {- FIXME -} hiding (binomial) import Control.Monad.Reader +import Numeric.SpecFunctions (logFactorial) -- | Random variate generator for Binomial distribution -- @@ -11,24 +12,101 @@ import Control.Monad.Reader -- \[ -- f(k;n,p) = \Pr(X = k) = \binom n k p^k(1-p)^{n-k} -- \] -binomial :: StatefulGen g m +binomial :: forall g m . StatefulGen g m => Int -- ^ Number of trials -> Double -- ^ Probability of success (returning True) -> g -- ^ Generator -> m Int binomial n p g = - if p == 0.0 - then return 0 - else if p == 1.0 - then return n - else do - let q = 1 - p - if fromIntegral n * p < bInvThreshold - then do - let s = p / q - let a = fromIntegral (n + 1) * s - bar n p g - else baz n p g + let q = 1 - p + np = fromIntegral n * p + ffm = np + p + bigM = floor ffm + -- Half integer mean (tip of triangle) + xm = fromIntegral bigM + 0.5 + npq = np * q + + -- p1: the distance to the left and right edges of the triangle + -- region below the target distribution; since height=1, also: + -- area of region (half base * height) + p1 = fromIntegral (floor (2.195 * sqrt npq - 4.6 * q)) + 0.5 + -- Left edge of triangle + xl = xm - p1 + -- Right edge of triangle + xr = xm + p1 + c = 0.134 + 20.5 / (15.3 + fromIntegral bigM) + -- p1 + area of parallelogram region + p2 = p1 * (1.0 + c + c) + al = (ffm - xl) / (ffm - xl * p) + lambdaL = al * (1.0 + 0.5 * al) + ar = (xr - ffm) / (xr * q) + lambdaR = ar * (1.0 + 0.5 * ar) + + -- p2 + area of left tail + p3 = p2 + c / lambdaL + -- p3 + area of right tail + p4 = p3 + c / lambdaR + + + -- Acceptance / rejection comparison + step5 :: Int -> Double -> m Int + step5 ix v = if var <= accept + then if p > 0 + then return ix + else return $ n - ix + else hh + where + var = log v + accept = logFactorial bigM + logFactorial (n - bigM) - + logFactorial ix - logFactorial (n - ix) + + fromIntegral (ix - bigM) * log (p / q) + + h :: Double -> Double -> m Int + h u v | -- Triangular region + u <= p1 = return $ floor $ xm - p1 * v + u + + -- Parallelogram region + | u <= p2 = do let x = xl + (u - p1) / c + w = v * c + 1.0 - abs (x - xm) / p1 + if w > 1 || w <= 0 + then hh + else do let ix = floor x + step5 ix w + + -- Left tail + | u <= p3 = do let ix = floor $ xl + log v / lambdaL + if ix < 0 + then hh + else do let w = v * (u - p2) * lambdaL + step5 ix w + + -- Right tail + | otherwise = do let ix = floor $ xr - log v / lambdaL + if ix > 0 && ix > n + then hh + else do let w = v * (u - p3) * lambdaR + step5 ix w + + hh = do + u <- uniformRM (0.0, p4) g + v <- uniformDoublePositive01M g + h u v + + in hh + +-- binomial n p g = +-- if p == 0.0 +-- then return 0 +-- else if p == 1.0 +-- then return n +-- else do +-- let q = 1 - p +-- if fromIntegral n * p < bInvThreshold +-- then do +-- let s = p / q +-- let a = fromIntegral (n + 1) * s +-- bar n p g +-- else baz n p g inv :: StatefulGen g m => Int -> Double -> g -> m Int inv n p gen = do @@ -72,13 +150,18 @@ inv' g = do ber :: StatefulGen g m => Int -> Double -> g -> m Int ber n p g = fmap sum $ fmap (fmap fromEnum) $ replicateM n $ bernoulli p g -nSamples = 100000 +nSamples = 1000000 testInv :: StatefulGen g m => g -> m Double testInv g = do ss <- replicateM nSamples $ inv 1400 0.4 g return $ fromIntegral (sum ss) / fromIntegral nSamples +testTPE :: StatefulGen g m => g -> m Double +testTPE g = do + ss <- replicateM nSamples $ binomial 1400 0.4 g + return $ fromIntegral (sum ss) / fromIntegral nSamples + testInv'' :: StatefulGen g m => g -> m Double testInv'' g = do ss <- inv'' nSamples 1400 0.4 g @@ -106,6 +189,12 @@ mainI = do x <- runReaderT (ReaderT testInv) monadicGen print x +mainTPE :: IO () +mainTPE = do + monadicGen <- create + x <- runReaderT (ReaderT testTPE) monadicGen + print x + mainII :: IO () mainII = do monadicGen <- create @@ -124,49 +213,6 @@ mainMI = do x <- runReaderT (ReaderT (runReaderT (testInv' g))) (s, a, r) print x -baz :: forall g m . StatefulGen g m => Int -> Double -> g -> m Int -baz n p g = do - let q = if p <= 0.5 then p else 1 - p - np = fromIntegral n * p - npq = np * q - fm = np + p - m = floor fm - p1 = fromIntegral (floor (2.195 * sqrt npq - 4.6 * q)) + 0.5 - -- FIXME: This comment I understand and will come back to - -- Tip of triangle - xm = fromIntegral m + 0.5 - -- Left edge of triangle - xl = xm - p1 - -- Right edge of triangle - xr = xm + p1 - -- FIXME: I am not sure I understand this - -- p1 + area of parallelogram region - c = 0.134 + 20.5 / (15.3 + fromIntegral m) - p2 = p1 * (1.0 + 2.0 * c) - lambda a = a * (1.0 + 0.5 * a) - lambdaL = lambda ((fm - xl) / (fm - xl * p)) - lambdaR = lambda ((xr - fm) / (xr * q)) - -- FIXME: p1 + area of left tail - p3 = p2 + c / lambdaL - -- p1 + area of right tail - p4 = p3 + c / lambdaR - f :: m Int - f = do - u <- uniformRM (0.0, p4) g - v <- uniformDoublePositive01M g - y <- if u <= p1 - then return $ floor $ xm - p1 * v + u - else if u <= p2 - then undefined - else undefined - return undefined - -- then do let x = xl + (u - p1) / c - -- v = v * c + 1.0 - abs (x - xm) / p1 - -- if v > 1 - -- then f - -- else return $ floor x - -- else return undefined - return undefined -- Threshold for preferring the BINV algorithm / inverse cdf -- logic. The paper suggests 10, Ranlib uses 30, R uses 30, Rust uses From d7fb0c36f571f2c181cdf9bc97a7ed4ef877295a Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Sat, 4 May 2024 06:49:59 +0100 Subject: [PATCH 07/30] Switch between inv and tpe --- System/Random/MWC/Distributions.hs | 123 ++++++++++++++++++----------- 1 file changed, 77 insertions(+), 46 deletions(-) diff --git a/System/Random/MWC/Distributions.hs b/System/Random/MWC/Distributions.hs index 51b0098..921e4f6 100644 --- a/System/Random/MWC/Distributions.hs +++ b/System/Random/MWC/Distributions.hs @@ -27,7 +27,6 @@ module System.Random.MWC.Distributions , geometric0 , geometric1 , bernoulli - , bar , binomial -- ** Multivariate , dirichlet @@ -53,6 +52,7 @@ import System.Random.Stateful (StatefulGen(..),Uniform(..),UniformRange(..),unif import qualified Data.Vector.Unboxed as I import qualified Data.Vector.Generic as G import qualified Data.Vector.Generic.Mutable as M +import Numeric.SpecFunctions (logFactorial) -- Unboxed 2-tuple data T = T {-# UNPACK #-} !Double {-# UNPACK #-} !Double @@ -382,15 +382,9 @@ binomial n p g = then do let s = p / q let a = fromIntegral (n + 1) * s - bar n p g - else baz n p g + binomialInv n p g + else binomialTPE n p g --- FIXME: The paper contains mysterious (to me at least) --- constants. The Rust author seems to understand these and makes --- comments such as --- --- * radius of triangle region, since height=1 also area of region --- -- The Rust author also comments -- -- * It is possible for BINV to get stuck, so we break if x > @@ -402,53 +396,90 @@ binomial n p g = -- * When n*p < 10, so is n*p*q which is the variance, so a result > -- 110 would be 100 / sqrt(10) = 31 standard deviations away. - -baz :: forall g m . StatefulGen g m => Int -> Double -> g -> m Int -baz n p g = do - let q = if p <= 0.5 then p else 1 - p - np = fromIntegral n * p - npq = np * q - fm = np + p - m = floor fm +binomialTPE :: forall g m . StatefulGen g m + => Int + -> Double + -> g + -> m Int +binomialTPE n p g = + let q = 1 - p + np = fromIntegral n * p + ffm = np + p + bigM = floor ffm + -- Half integer mean (tip of triangle) + xm = fromIntegral bigM + 0.5 + npq = np * q + + -- p1: the distance to the left and right edges of the triangle + -- region below the target distribution; since height=1, also: + -- area of region (half base * height) p1 = fromIntegral (floor (2.195 * sqrt npq - 4.6 * q)) + 0.5 - -- FIXME: This comment I understand and will come back to - -- Tip of triangle - xm = fromIntegral m + 0.5 -- Left edge of triangle xl = xm - p1 -- Right edge of triangle xr = xm + p1 - -- FIXME: I am not sure I understand this + c = 0.134 + 20.5 / (15.3 + fromIntegral bigM) -- p1 + area of parallelogram region - c = 0.134 + 20.5 / (15.3 + fromIntegral m) - p2 = p1 * (1.0 + 2.0 * c) - lambda a = a * (1.0 + 0.5 * a) - lambdaL = lambda ((fm - xl) / (fm - xl * p)) - lambdaR = lambda ((xr - fm) / (xr * q)) - -- FIXME: p1 + area of left tail + p2 = p1 * (1.0 + c + c) + al = (ffm - xl) / (ffm - xl * p) + lambdaL = al * (1.0 + 0.5 * al) + ar = (xr - ffm) / (xr * q) + lambdaR = ar * (1.0 + 0.5 * ar) + + -- p2 + area of left tail p3 = p2 + c / lambdaL - -- p1 + area of right tail + -- p3 + area of right tail p4 = p3 + c / lambdaR - f :: m Int - f = do + + + -- Acceptance / rejection comparison + step5 :: Int -> Double -> m Int + step5 ix v = if var <= accept + then if p > 0 + then return ix + else return $ n - ix + else hh + where + var = log v + accept = logFactorial bigM + logFactorial (n - bigM) - + logFactorial ix - logFactorial (n - ix) + + fromIntegral (ix - bigM) * log (p / q) + + h :: Double -> Double -> m Int + h u v | -- Triangular region + u <= p1 = return $ floor $ xm - p1 * v + u + + -- Parallelogram region + | u <= p2 = do let x = xl + (u - p1) / c + w = v * c + 1.0 - abs (x - xm) / p1 + if w > 1 || w <= 0 + then hh + else do let ix = floor x + step5 ix w + + -- Left tail + | u <= p3 = do let ix = floor $ xl + log v / lambdaL + if ix < 0 + then hh + else do let w = v * (u - p2) * lambdaL + step5 ix w + + -- Right tail + | otherwise = do let ix = floor $ xr - log v / lambdaL + if ix > 0 && ix > n + then hh + else do let w = v * (u - p3) * lambdaR + step5 ix w + + hh = do u <- uniformRM (0.0, p4) g v <- uniformDoublePositive01M g - y <- if u <= p1 - then return $ floor $ xm - p1 * v + u - else if u <= p2 - then undefined - else undefined - return undefined - -- then do let x = xl + (u - p1) / c - -- v = v * c + 1.0 - abs (x - xm) / p1 - -- if v > 1 - -- then f - -- else return $ floor x - -- else return undefined - return undefined - -bar :: StatefulGen g m => Int -> Double -> g -> m Int -bar n p gen = do + h u v + + in hh + +binomialInv :: StatefulGen g m => Int -> Double -> g -> m Int +binomialInv n p gen = do let q = 1 - p s = p / q a = fromIntegral (n + 1) * s From d675d35dd21a36acaf6c2ce5380c69bc8ea1415b Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Sat, 4 May 2024 12:25:04 +0100 Subject: [PATCH 08/30] Now the doctests pass --- System/Random/MWC.hs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/System/Random/MWC.hs b/System/Random/MWC.hs index 5cc616b..18b684a 100644 --- a/System/Random/MWC.hs +++ b/System/Random/MWC.hs @@ -27,11 +27,11 @@ -- 'createSystemSeed' or 'withSystemRandomST' (All examples assume -- that @System.Random.Stateful@ is imported) -- --- >>> g <- createSystemRandom --- >>> uniformM g :: IO Int +-- $$$ g <- createSystemRandom +-- $$$ uniformM g :: IO Int -- ... -- --- >>> withSystemRandomST $ \g -> uniformM g :: IO Int +-- $$$ withSystemRandomST $ \g -> uniformM g :: IO Int -- ... -- -- Deterministically create generator from given seed using @@ -546,10 +546,10 @@ withSystemRandomST act = do -- This function is unsafe and for example allows STRefs or any -- other mutable data structure to escape scope: -- --- >>> ref <- withSystemRandom $ \_ -> newSTRef 1 --- >>> withSystemRandom $ \_ -> modifySTRef ref succ >> readSTRef ref +-- $$$ ref <- withSystemRandom $ \_ -> newSTRef 1 +-- $$$ withSystemRandom $ \_ -> modifySTRef ref succ >> readSTRef ref -- 2 --- >>> withSystemRandom $ \_ -> modifySTRef ref succ >> readSTRef ref +-- $$$ withSystemRandom $ \_ -> modifySTRef ref succ >> readSTRef ref -- 3 withSystemRandom :: PrimBase m => (Gen (PrimState m) -> m a) -> IO a From 94b24cc68c0d667a4b313d5cad5ff62eff1a1d48 Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Sun, 5 May 2024 15:57:00 +0100 Subject: [PATCH 09/30] Bump version and allow newer doctest --- mwc-random.cabal | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/mwc-random.cabal b/mwc-random.cabal index 42ef234..090693c 100644 --- a/mwc-random.cabal +++ b/mwc-random.cabal @@ -12,6 +12,7 @@ homepage: https://github.com/haskell/mwc-random bug-reports: https://github.com/haskell/mwc-random/issues category: Math, Statistics +version: 0.15.0.3 synopsis: Fast, high quality pseudo random number generation description: This package contains code for generating high quality random @@ -141,19 +142,3 @@ test-suite mwc-doctests , primitive , vector >=0.11 , random >=1.2 - -executable binomial - main-is: main.hs - hs-source-dirs: apps - default-language: Haskell2010 - if impl(ghcjs) || impl(ghc < 8.0) - Buildable: False - build-depends: - base -any - , mtl - , mwc-random -any - , bytestring - , primitive - , vector >=0.11 - , random >=1.2 - From 790f32ca6f88eba1720b93bddb14bb46f8a0495b Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Sat, 4 May 2024 12:27:02 +0100 Subject: [PATCH 10/30] Inv for small distributions and TPE for large --- System/Random/MWC/Distributions.hs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/System/Random/MWC/Distributions.hs b/System/Random/MWC/Distributions.hs index 921e4f6..01d55af 100644 --- a/System/Random/MWC/Distributions.hs +++ b/System/Random/MWC/Distributions.hs @@ -1,3 +1,5 @@ +{-# OPTIONS_GHC -Wall #-} + {-# LANGUAGE BangPatterns, CPP, GADTs, FlexibleContexts, ScopedTypeVariables #-} -- | -- Module : System.Random.MWC.Distributions @@ -376,14 +378,15 @@ binomial n p g = then return 0 else if p == 1.0 then return n - else do - let q = 1 - p - if fromIntegral n * p < bInvThreshold - then do - let s = p / q - let a = fromIntegral (n + 1) * s - binomialInv n p g - else binomialTPE n p g + else do let (p', flipped) = if p > 0.5 + then (1.0 - p, True) + else (p, False) + ix <- if fromIntegral n * p < bInvThreshold + then binomialInv n p' g + else binomialTPE n p' g + if flipped + then return $ n - ix + else return ix -- The Rust author also comments -- From 8dfbecb143389b4b101ff21abf51d26c67e3e0a2 Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Sat, 4 May 2024 17:10:55 +0100 Subject: [PATCH 11/30] Add back in benmarks --- shell.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shell.nix b/shell.nix index 6284450..1a283a9 100644 --- a/shell.nix +++ b/shell.nix @@ -21,7 +21,7 @@ let pkgs = nixpkgs; haskellDeps = ps: with ps; [ - base doctest math-functions mwc-random primitive random tasty-hunit tasty-quickcheck vector + base doctest gauge math-functions mersenne-random mwc-random primitive random tasty-hunit tasty-quickcheck vector ]; in From ff481fa24eb331efd9720b66c56e955b82d366d0 Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Sat, 4 May 2024 17:43:51 +0100 Subject: [PATCH 12/30] Benchmark and performance test --- System/Random/MWC/Binomial.hs | 117 ++++------------------------------ 1 file changed, 12 insertions(+), 105 deletions(-) diff --git a/System/Random/MWC/Binomial.hs b/System/Random/MWC/Binomial.hs index e271ffe..303397a 100644 --- a/System/Random/MWC/Binomial.hs +++ b/System/Random/MWC/Binomial.hs @@ -1,112 +1,10 @@ import System.Random.Stateful import System.Random.MWC -import System.Random.MWC.Distributions {- FIXME -} hiding (binomial) +import System.Random.MWC.Distributions import Control.Monad.Reader import Numeric.SpecFunctions (logFactorial) +import System.Random.MWC.CondensedTable --- | Random variate generator for Binomial distribution --- --- The probability of getting exactly k successes in n trials is --- given by the probability mass function: --- --- \[ --- f(k;n,p) = \Pr(X = k) = \binom n k p^k(1-p)^{n-k} --- \] -binomial :: forall g m . StatefulGen g m - => Int -- ^ Number of trials - -> Double -- ^ Probability of success (returning True) - -> g -- ^ Generator - -> m Int -binomial n p g = - let q = 1 - p - np = fromIntegral n * p - ffm = np + p - bigM = floor ffm - -- Half integer mean (tip of triangle) - xm = fromIntegral bigM + 0.5 - npq = np * q - - -- p1: the distance to the left and right edges of the triangle - -- region below the target distribution; since height=1, also: - -- area of region (half base * height) - p1 = fromIntegral (floor (2.195 * sqrt npq - 4.6 * q)) + 0.5 - -- Left edge of triangle - xl = xm - p1 - -- Right edge of triangle - xr = xm + p1 - c = 0.134 + 20.5 / (15.3 + fromIntegral bigM) - -- p1 + area of parallelogram region - p2 = p1 * (1.0 + c + c) - al = (ffm - xl) / (ffm - xl * p) - lambdaL = al * (1.0 + 0.5 * al) - ar = (xr - ffm) / (xr * q) - lambdaR = ar * (1.0 + 0.5 * ar) - - -- p2 + area of left tail - p3 = p2 + c / lambdaL - -- p3 + area of right tail - p4 = p3 + c / lambdaR - - - -- Acceptance / rejection comparison - step5 :: Int -> Double -> m Int - step5 ix v = if var <= accept - then if p > 0 - then return ix - else return $ n - ix - else hh - where - var = log v - accept = logFactorial bigM + logFactorial (n - bigM) - - logFactorial ix - logFactorial (n - ix) + - fromIntegral (ix - bigM) * log (p / q) - - h :: Double -> Double -> m Int - h u v | -- Triangular region - u <= p1 = return $ floor $ xm - p1 * v + u - - -- Parallelogram region - | u <= p2 = do let x = xl + (u - p1) / c - w = v * c + 1.0 - abs (x - xm) / p1 - if w > 1 || w <= 0 - then hh - else do let ix = floor x - step5 ix w - - -- Left tail - | u <= p3 = do let ix = floor $ xl + log v / lambdaL - if ix < 0 - then hh - else do let w = v * (u - p2) * lambdaL - step5 ix w - - -- Right tail - | otherwise = do let ix = floor $ xr - log v / lambdaL - if ix > 0 && ix > n - then hh - else do let w = v * (u - p3) * lambdaR - step5 ix w - - hh = do - u <- uniformRM (0.0, p4) g - v <- uniformDoublePositive01M g - h u v - - in hh - --- binomial n p g = --- if p == 0.0 --- then return 0 --- else if p == 1.0 --- then return n --- else do --- let q = 1 - p --- if fromIntegral n * p < bInvThreshold --- then do --- let s = p / q --- let a = fromIntegral (n + 1) * s --- bar n p g --- else baz n p g inv :: StatefulGen g m => Int -> Double -> g -> m Int inv n p gen = do @@ -150,7 +48,7 @@ inv' g = do ber :: StatefulGen g m => Int -> Double -> g -> m Int ber n p g = fmap sum $ fmap (fmap fromEnum) $ replicateM n $ bernoulli p g -nSamples = 1000000 +nSamples = 100000 testInv :: StatefulGen g m => g -> m Double testInv g = do @@ -172,6 +70,10 @@ testInv' g = do ss <- replicateM nSamples $ inv' g return $ fromIntegral (sum ss) / fromIntegral nSamples +testTab g = do + ss <- replicateM nSamples $ genFromTable (tableBinomial 1400 0.4) g + return $ fromIntegral (sum ss) / fromIntegral nSamples + testBer :: StatefulGen g m => g -> m Double testBer g = do tt <- replicateM nSamples $ ber 1400 0.4 g @@ -213,6 +115,11 @@ mainMI = do x <- runReaderT (ReaderT (runReaderT (testInv' g))) (s, a, r) print x +mainTab :: IO () +mainTab = do + g <- create + x <- testTab g + print x -- Threshold for preferring the BINV algorithm / inverse cdf -- logic. The paper suggests 10, Ranlib uses 30, R uses 30, Rust uses From 40b38d787215e05a4d90c5b9320d48ffcac64353 Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Sat, 4 May 2024 17:53:09 +0100 Subject: [PATCH 13/30] ghc 8 complains about $$$ in comments --- System/Random/MWC.hs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/System/Random/MWC.hs b/System/Random/MWC.hs index 18b684a..926a73b 100644 --- a/System/Random/MWC.hs +++ b/System/Random/MWC.hs @@ -27,11 +27,11 @@ -- 'createSystemSeed' or 'withSystemRandomST' (All examples assume -- that @System.Random.Stateful@ is imported) -- --- $$$ g <- createSystemRandom --- $$$ uniformM g :: IO Int +-- AAA g <- createSystemRandom +-- AAA uniformM g :: IO Int -- ... -- --- $$$ withSystemRandomST $ \g -> uniformM g :: IO Int +-- AAA withSystemRandomST $ \g -> uniformM g :: IO Int -- ... -- -- Deterministically create generator from given seed using @@ -546,10 +546,10 @@ withSystemRandomST act = do -- This function is unsafe and for example allows STRefs or any -- other mutable data structure to escape scope: -- --- $$$ ref <- withSystemRandom $ \_ -> newSTRef 1 --- $$$ withSystemRandom $ \_ -> modifySTRef ref succ >> readSTRef ref +-- AAA ref <- withSystemRandom $ \_ -> newSTRef 1 +-- AAA withSystemRandom $ \_ -> modifySTRef ref succ >> readSTRef ref -- 2 --- $$$ withSystemRandom $ \_ -> modifySTRef ref succ >> readSTRef ref +-- AAA withSystemRandom $ \_ -> modifySTRef ref succ >> readSTRef ref -- 3 withSystemRandom :: PrimBase m => (Gen (PrimState m) -> m a) -> IO a From c0bbe1344ee435326b3bef79a70cc1f20b01e4bf Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Sat, 4 May 2024 17:43:51 +0100 Subject: [PATCH 14/30] Benchmark and performance test --- mwc-random.cabal | 3 +-- shell.nix | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/mwc-random.cabal b/mwc-random.cabal index 090693c..445eaa6 100644 --- a/mwc-random.cabal +++ b/mwc-random.cabal @@ -1,7 +1,7 @@ cabal-version: 3.0 build-type: Simple name: mwc-random -version: 0.15.0.2 +version: 0.15.0.3 license: BSD-2-Clause license-file: LICENSE copyright: 2009, 2010, 2011 Bryan O'Sullivan @@ -12,7 +12,6 @@ homepage: https://github.com/haskell/mwc-random bug-reports: https://github.com/haskell/mwc-random/issues category: Math, Statistics -version: 0.15.0.3 synopsis: Fast, high quality pseudo random number generation description: This package contains code for generating high quality random diff --git a/shell.nix b/shell.nix index 1a283a9..be54375 100644 --- a/shell.nix +++ b/shell.nix @@ -21,7 +21,7 @@ let pkgs = nixpkgs; haskellDeps = ps: with ps; [ - base doctest gauge math-functions mersenne-random mwc-random primitive random tasty-hunit tasty-quickcheck vector + base doctest gauge math-functions mersenne-random mwc-random primitive random tasty-bench tasty-hunit tasty-quickcheck vector ]; in From ea7c419ed3420f07df1138570c4ad751adb43e5b Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Mon, 6 May 2024 11:21:37 +0100 Subject: [PATCH 15/30] doctests fixed locally (my environment) --- System/Random/MWC.hs | 12 ++++++------ shell.nix | 9 ++++++++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/System/Random/MWC.hs b/System/Random/MWC.hs index 926a73b..5cc616b 100644 --- a/System/Random/MWC.hs +++ b/System/Random/MWC.hs @@ -27,11 +27,11 @@ -- 'createSystemSeed' or 'withSystemRandomST' (All examples assume -- that @System.Random.Stateful@ is imported) -- --- AAA g <- createSystemRandom --- AAA uniformM g :: IO Int +-- >>> g <- createSystemRandom +-- >>> uniformM g :: IO Int -- ... -- --- AAA withSystemRandomST $ \g -> uniformM g :: IO Int +-- >>> withSystemRandomST $ \g -> uniformM g :: IO Int -- ... -- -- Deterministically create generator from given seed using @@ -546,10 +546,10 @@ withSystemRandomST act = do -- This function is unsafe and for example allows STRefs or any -- other mutable data structure to escape scope: -- --- AAA ref <- withSystemRandom $ \_ -> newSTRef 1 --- AAA withSystemRandom $ \_ -> modifySTRef ref succ >> readSTRef ref +-- >>> ref <- withSystemRandom $ \_ -> newSTRef 1 +-- >>> withSystemRandom $ \_ -> modifySTRef ref succ >> readSTRef ref -- 2 --- AAA withSystemRandom $ \_ -> modifySTRef ref succ >> readSTRef ref +-- >>> withSystemRandom $ \_ -> modifySTRef ref succ >> readSTRef ref -- 3 withSystemRandom :: PrimBase m => (Gen (PrimState m) -> m a) -> IO a diff --git a/shell.nix b/shell.nix index be54375..bfcf03d 100644 --- a/shell.nix +++ b/shell.nix @@ -9,13 +9,20 @@ myHaskellPackageOverlay = self: super: { in -{ nixpkgs ? import (fetchTarball "https://github.com/NixOS/nixpkgs/archive/refs/tags/23.11.tar.gz") +{ nixpkgs ? import (fetchTarball "https://github.com/NixOS/nixpkgs/tarball/nixos-unstable") { config.allowBroken = false; overlays = [ myHaskellPackageOverlay ]; } }: +# { nixpkgs ? import (fetchTarball "https://github.com/NixOS/nixpkgs/archive/refs/tags/23.11.tar.gz") +# { +# config.allowBroken = false; +# overlays = [ myHaskellPackageOverlay ]; +# } +# }: + let pkgs = nixpkgs; From 439b70009d54121318c84b22ee78c3e12bbe11a8 Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Tue, 7 May 2024 16:10:08 +0100 Subject: [PATCH 16/30] Not needed for PR --- System/Random/MWC/Binomial.hs | 131 ---------------------------------- shell.nix | 43 ----------- 2 files changed, 174 deletions(-) delete mode 100644 System/Random/MWC/Binomial.hs delete mode 100644 shell.nix diff --git a/System/Random/MWC/Binomial.hs b/System/Random/MWC/Binomial.hs deleted file mode 100644 index 303397a..0000000 --- a/System/Random/MWC/Binomial.hs +++ /dev/null @@ -1,131 +0,0 @@ -import System.Random.Stateful -import System.Random.MWC -import System.Random.MWC.Distributions -import Control.Monad.Reader -import Numeric.SpecFunctions (logFactorial) -import System.Random.MWC.CondensedTable - - -inv :: StatefulGen g m => Int -> Double -> g -> m Int -inv n p gen = do - let q = 1 - p - s = p / q - a = fromIntegral (n + 1) * s - r = q^n - f (rPrev, uPrev, xPrev) = (rNew, uNew, xNew) - where - uNew = uPrev - rPrev - xNew = xPrev + 1 - rNew = rPrev * ((a / fromIntegral xNew) - s) - u <- uniformDoublePositive01M gen - let (_, _, x) = until (\(r, u, _) -> u <= r) f (r, u, 0) in return x - -inv'' :: StatefulGen g m => Int -> Int -> Double -> g -> m [Int] -inv'' m n p gen = do - let q = 1 - p - s = p / q - a = fromIntegral (n + 1) * s - r = q^n - f (rPrev, uPrev, xPrev) = (rNew, uNew, xNew) - where - uNew = uPrev - rPrev - xNew = xPrev + 1 - rNew = rPrev * ((a / fromIntegral xNew) - s) - replicateM m $ do u <- uniformDoublePositive01M gen - let (_, _, x) = until (\(r, u, _) -> u <= r) f (r, u, 0) in return x - -inv' :: StatefulGen g m => g -> ReaderT (Double, Double, Double) m Int -inv' g = do - (s, a, r) <- ask - let f (rPrev, uPrev, xPrev) = (rNew, uNew, xNew) - where - uNew = uPrev - rPrev - xNew = xPrev + 1 - rNew = rPrev * ((a / fromIntegral xNew) - s) - u <- lift $ uniformDoublePositive01M g - let (_, _, x) = until (\(r, u, _) -> u <= r) f (r, u, 0) in return x - -ber :: StatefulGen g m => Int -> Double -> g -> m Int -ber n p g = fmap sum $ fmap (fmap fromEnum) $ replicateM n $ bernoulli p g - -nSamples = 100000 - -testInv :: StatefulGen g m => g -> m Double -testInv g = do - ss <- replicateM nSamples $ inv 1400 0.4 g - return $ fromIntegral (sum ss) / fromIntegral nSamples - -testTPE :: StatefulGen g m => g -> m Double -testTPE g = do - ss <- replicateM nSamples $ binomial 1400 0.4 g - return $ fromIntegral (sum ss) / fromIntegral nSamples - -testInv'' :: StatefulGen g m => g -> m Double -testInv'' g = do - ss <- inv'' nSamples 1400 0.4 g - return $ fromIntegral (sum ss) / fromIntegral nSamples - -testInv' :: StatefulGen g m => g -> ReaderT (Double, Double, Double) m Double -testInv' g = do - ss <- replicateM nSamples $ inv' g - return $ fromIntegral (sum ss) / fromIntegral nSamples - -testTab g = do - ss <- replicateM nSamples $ genFromTable (tableBinomial 1400 0.4) g - return $ fromIntegral (sum ss) / fromIntegral nSamples - -testBer :: StatefulGen g m => g -> m Double -testBer g = do - tt <- replicateM nSamples $ ber 1400 0.4 g - return $ fromIntegral (sum tt) / fromIntegral nSamples - -mainB :: IO () -mainB = do - monadicGen <- create - x <- runReaderT (ReaderT testBer) monadicGen - print x - -mainI :: IO () -mainI = do - monadicGen <- create - x <- runReaderT (ReaderT testInv) monadicGen - print x - -mainTPE :: IO () -mainTPE = do - monadicGen <- create - x <- runReaderT (ReaderT testTPE) monadicGen - print x - -mainII :: IO () -mainII = do - monadicGen <- create - x <- runReaderT (ReaderT testInv'') monadicGen - print x - -mainMI :: IO () -mainMI = do - let n = 1400 - p = 0.4 - q = 1 - p - s = p / q - a = fromIntegral (n + 1) * s - r = q^n - g <- create - x <- runReaderT (ReaderT (runReaderT (testInv' g))) (s, a, r) - print x - -mainTab :: IO () -mainTab = do - g <- create - x <- testTab g - print x - --- Threshold for preferring the BINV algorithm / inverse cdf --- logic. The paper suggests 10, Ranlib uses 30, R uses 30, Rust uses --- 10 and GSL uses 14. -bInvThreshold :: Double -bInvThreshold = 10 - -bInvMaxX :: Int -bInvMaxX = 110 diff --git a/shell.nix b/shell.nix deleted file mode 100644 index bfcf03d..0000000 --- a/shell.nix +++ /dev/null @@ -1,43 +0,0 @@ -let - -myHaskellPackageOverlay = self: super: { - myHaskellPackages = super.haskellPackages.override { - overrides = hself: hsuper: rec { - }; - }; -}; - -in - -{ nixpkgs ? import (fetchTarball "https://github.com/NixOS/nixpkgs/tarball/nixos-unstable") - { - config.allowBroken = false; - overlays = [ myHaskellPackageOverlay ]; - } -}: - -# { nixpkgs ? import (fetchTarball "https://github.com/NixOS/nixpkgs/archive/refs/tags/23.11.tar.gz") -# { -# config.allowBroken = false; -# overlays = [ myHaskellPackageOverlay ]; -# } -# }: - -let - - pkgs = nixpkgs; - - haskellDeps = ps: with ps; [ - base doctest gauge math-functions mersenne-random mwc-random primitive random tasty-bench tasty-hunit tasty-quickcheck vector - ]; - -in - -pkgs.stdenv.mkDerivation { - name = "Whatever"; - - buildInputs = [ - (pkgs.myHaskellPackages.ghcWithPackages haskellDeps) - pkgs.cabal-install - ]; -} From 1070c721fc96f22682e0fe9c765a93f74360ee22 Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Wed, 8 May 2024 09:38:37 +0100 Subject: [PATCH 17/30] General tidy --- System/Random/MWC/Distributions.hs | 243 +++++++++++++---------------- 1 file changed, 108 insertions(+), 135 deletions(-) diff --git a/System/Random/MWC/Distributions.hs b/System/Random/MWC/Distributions.hs index 01d55af..c84e8e8 100644 --- a/System/Random/MWC/Distributions.hs +++ b/System/Random/MWC/Distributions.hs @@ -1,5 +1,3 @@ -{-# OPTIONS_GHC -Wall #-} - {-# LANGUAGE BangPatterns, CPP, GADTs, FlexibleContexts, ScopedTypeVariables #-} -- | -- Module : System.Random.MWC.Distributions @@ -373,140 +371,115 @@ binomial :: forall g m . StatefulGen g m -> Double -- ^ Probability of success (returning True) -> g -- ^ Generator -> m Int -binomial n p g = - if p == 0.0 +{-# INLINE binomial #-} +binomial nTrials prob gen = + if prob == 0.0 then return 0 - else if p == 1.0 - then return n - else do let (p', flipped) = if p > 0.5 - then (1.0 - p, True) - else (p, False) - ix <- if fromIntegral n * p < bInvThreshold - then binomialInv n p' g - else binomialTPE n p' g + else if prob == 1.0 + then return nTrials + else do let (p', flipped) = if prob > 0.5 + then (1.0 - prob, True) + else (prob, False) + ix <- if fromIntegral nTrials * p' < bInvThreshold + then binomialInv nTrials p' gen + else binomialTPE nTrials p' gen if flipped - then return $ n - ix + then return $ nTrials - ix else return ix --- The Rust author also comments --- --- * It is possible for BINV to get stuck, so we break if x > --- BINV_MAX_X and try again. --- --- * It would be safer to set BINV_MAX_X to self.n, but it is --- extremely unlikely to be relevant. --- --- * When n*p < 10, so is n*p*q which is the variance, so a result > --- 110 would be 100 / sqrt(10) = 31 standard deviations away. - -binomialTPE :: forall g m . StatefulGen g m - => Int - -> Double - -> g - -> m Int -binomialTPE n p g = - let q = 1 - p - np = fromIntegral n * p - ffm = np + p - bigM = floor ffm - -- Half integer mean (tip of triangle) - xm = fromIntegral bigM + 0.5 - npq = np * q - - -- p1: the distance to the left and right edges of the triangle - -- region below the target distribution; since height=1, also: - -- area of region (half base * height) - p1 = fromIntegral (floor (2.195 * sqrt npq - 4.6 * q)) + 0.5 - -- Left edge of triangle - xl = xm - p1 - -- Right edge of triangle - xr = xm + p1 - c = 0.134 + 20.5 / (15.3 + fromIntegral bigM) - -- p1 + area of parallelogram region - p2 = p1 * (1.0 + c + c) - al = (ffm - xl) / (ffm - xl * p) - lambdaL = al * (1.0 + 0.5 * al) - ar = (xr - ffm) / (xr * q) - lambdaR = ar * (1.0 + 0.5 * ar) - - -- p2 + area of left tail - p3 = p2 + c / lambdaL - -- p3 + area of right tail - p4 = p3 + c / lambdaR - - - -- Acceptance / rejection comparison - step5 :: Int -> Double -> m Int - step5 ix v = if var <= accept - then if p > 0 - then return ix - else return $ n - ix - else hh - where - var = log v - accept = logFactorial bigM + logFactorial (n - bigM) - - logFactorial ix - logFactorial (n - ix) + - fromIntegral (ix - bigM) * log (p / q) - - h :: Double -> Double -> m Int - h u v | -- Triangular region - u <= p1 = return $ floor $ xm - p1 * v + u - - -- Parallelogram region - | u <= p2 = do let x = xl + (u - p1) / c - w = v * c + 1.0 - abs (x - xm) / p1 - if w > 1 || w <= 0 - then hh - else do let ix = floor x - step5 ix w - - -- Left tail - | u <= p3 = do let ix = floor $ xl + log v / lambdaL - if ix < 0 - then hh - else do let w = v * (u - p2) * lambdaL - step5 ix w - - -- Right tail - | otherwise = do let ix = floor $ xr - log v / lambdaL - if ix > 0 && ix > n - then hh - else do let w = v * (u - p3) * lambdaR - step5 ix w - - hh = do - u <- uniformRM (0.0, p4) g - v <- uniformDoublePositive01M g - h u v - - in hh - -binomialInv :: StatefulGen g m => Int -> Double -> g -> m Int -binomialInv n p gen = do - let q = 1 - p - s = p / q - a = fromIntegral (n + 1) * s - r = q^n - f (rPrev, uPrev, xPrev) = (rNew, uNew, xNew) - where - uNew = uPrev - rPrev - xNew = xPrev + 1 - rNew = rPrev * ((a / fromIntegral xNew) - s) - u <- uniformDoublePositive01M gen - let (_, _, x) = until (\(r, u, _) -> u <= r) f (r, u, 0) in return x - - - -- if uPrev > rPrev - - -- if xPrev > bInvMaxX - -- then foo s a undefined - - --- Threshold for preferring the BINV algorithm / inverse cdf --- logic. The paper suggests 10, Ranlib uses 30, R uses 30, Rust uses --- 10 and GSL uses 14. -bInvThreshold :: Double -bInvThreshold = 10 - -bInvMaxX :: Int -bInvMaxX = 110 + where + binomialTPE n p g = + let q = 1 - p + np = fromIntegral n * p + ffm = np + p + bigM = floor ffm + -- Half integer mean (tip of triangle) + xm = fromIntegral bigM + 0.5 + npq = np * q + + -- p1: the distance to the left and right edges of the triangle + -- region below the target distribution; since height=1, also: + -- area of region (half base * height) + p1 = fromIntegral (floor (2.195 * sqrt npq - 4.6 * q) :: Int) + 0.5 + -- Left edge of triangle + xl = xm - p1 + -- Right edge of triangle + xr = xm + p1 + c = 0.134 + 20.5 / (15.3 + fromIntegral bigM) + -- p1 + area of parallelogram region + p2 = p1 * (1.0 + c + c) + al = (ffm - xl) / (ffm - xl * p) + lambdaL = al * (1.0 + 0.5 * al) + ar = (xr - ffm) / (xr * q) + lambdaR = ar * (1.0 + 0.5 * ar) + + -- p2 + area of left tail + p3 = p2 + c / lambdaL + -- p3 + area of right tail + p4 = p3 + c / lambdaR + + -- Acceptance / rejection comparison + step5 :: Int -> Double -> m Int + step5 ix v = if var <= accept + then if p > 0 + then return ix + else return $ n - ix + else hh + where + var = log v + accept = logFactorial bigM + logFactorial (n - bigM) - + logFactorial ix - logFactorial (n - ix) + + fromIntegral (ix - bigM) * log (p / q) + + h :: Double -> Double -> m Int + h u v | -- Triangular region + u <= p1 = return $ floor $ xm - p1 * v + u + + -- Parallelogram region + | u <= p2 = do let x = xl + (u - p1) / c + w = v * c + 1.0 - abs (x - xm) / p1 + if w > 1 || w <= 0 + then hh + else do let ix = floor x + step5 ix w + + -- Left tail + | u <= p3 = do let ix = floor $ xl + log v / lambdaL + if ix < 0 + then hh + else do let w = v * (u - p2) * lambdaL + step5 ix w + + -- Right tail + | otherwise = do let ix = floor $ xr - log v / lambdaL + if ix > 0 && ix > n + then hh + else do let w = v * (u - p3) * lambdaR + step5 ix w + + hh = do + u <- uniformRM (0.0, p4) g + v <- uniformDoublePositive01M g + h u v + + in hh + + binomialInv :: StatefulGen g m => Int -> Double -> g -> m Int + binomialInv n p g = do + let q = 1 - p + s = p / q + a = fromIntegral (n + 1) * s + r = q^n + f (rPrev, uPrev, xPrev) = (rNew, uNew, xNew) + where + uNew = uPrev - rPrev + xNew = xPrev + 1 + rNew = rPrev * ((a / fromIntegral xNew) - s) + u <- uniformDoublePositive01M g + let (_, _, x) = until (\(t, v, _) -> v <= t) f (r, u, 0) in return x + + -- Threshold for preferring the BINV algorithm / inverse cdf + -- logic. The paper suggests 10, Ranlib uses 30, R uses 30, Rust uses + -- 10 and GSL uses 14. + bInvThreshold :: Double + bInvThreshold = 10 From 49671d3814195731bf63a65336adfb2bad842340 Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Wed, 8 May 2024 11:37:58 +0100 Subject: [PATCH 18/30] Add reference --- System/Random/MWC/Distributions.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/System/Random/MWC/Distributions.hs b/System/Random/MWC/Distributions.hs index c84e8e8..a87258f 100644 --- a/System/Random/MWC/Distributions.hs +++ b/System/Random/MWC/Distributions.hs @@ -357,6 +357,10 @@ pkgError func msg = error $ "System.Random.MWC.Distributions." ++ func ++ -- (2007). Gaussian random number generators. -- /ACM Computing Surveys/ 39(4). -- +-- +-- * Kachitvichyanukul, V. and Schmeiser, B. W. Binomial Random +-- Variate Generation. Communications of the ACM, 31, 2 (February, +-- 1988) 216. -- | Random variate generator for Binomial distribution -- From 3665635945aed970b13015a5a5f2742ca27973fb Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Fri, 10 May 2024 10:36:39 +0100 Subject: [PATCH 19/30] A test for beta and binomial --- tests/props.hs | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/tests/props.hs b/tests/props.hs index 997f464..3aced52 100644 --- a/tests/props.hs +++ b/tests/props.hs @@ -1,4 +1,5 @@ import Control.Monad +import Control.Monad.Reader import Data.Word import qualified Data.Vector.Unboxed as U @@ -8,6 +9,8 @@ import Test.Tasty.HUnit import Test.QuickCheck.Monadic import System.Random.MWC +import System.Random.MWC.Distributions +import System.Random.Stateful (StatefulGen) ---------------------------------------------------------------- @@ -25,6 +28,7 @@ main = do g <- create xs <- replicateM 513 (uniform g) assertEqual "[Word32]" xs golden + , testCase "beta binomial mean" $ prop_betaBinomialMean ] updateGenState :: GenIO -> IO () @@ -43,7 +47,7 @@ saveRestoreUserSeed = do let seed = toSeed $ U.replicate 258 0 seed' <- save =<< restore seed assertEqual "Seeds must be equal" seed' seed - + emptySeed :: IO () emptySeed = do let seed = toSeed U.empty @@ -118,3 +122,28 @@ golden = , 2487349478, 4174144946, 2793869557, 559398604, 1898140528, 991962870, 864792875, 3861665129 , 4024051364, 3383200293, 773730975, 33517291, 2660126073, 689133464, 2248134097, 3874737781 , 3358012678] + +-- We can test two for the price of one +betaBinomial :: StatefulGen g m => + Double -> Double -> Int -> g -> m Int +betaBinomial a b n g = do + p <- beta a b g + binomial n p g + +nSamples :: Int +nSamples = 10000 + +a, b :: Double +a = 600.0 +b = 400.0 + +n :: Int +n = 10 + +prop_betaBinomialMean :: IO () +prop_betaBinomialMean = do + g <- create + ss <- replicateM nSamples $ betaBinomial 600 400 10 g + let m = fromIntegral (sum ss) / fromIntegral nSamples + let x1 = fromIntegral n * a / (a + b) + assertBool ("Mean is " ++ show x1 ++ " but estimated as " ++ show m) (abs (m - x1) < 0.001) From 13d3c20e882a7dcccbd6e3f36a9272e3624356c6 Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Fri, 10 May 2024 10:56:09 +0100 Subject: [PATCH 20/30] Fix dependencies for test --- mwc-random.cabal | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mwc-random.cabal b/mwc-random.cabal index 445eaa6..2d5f351 100644 --- a/mwc-random.cabal +++ b/mwc-random.cabal @@ -119,6 +119,8 @@ test-suite mwc-prop-tests , tasty >=1.3.1 , tasty-quickcheck , tasty-hunit + , random >=1.2 + , mtl test-suite mwc-doctests type: exitcode-stdio-1.0 From e3c2362d50eda2f2d66f8f57088006c174882626 Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Fri, 10 May 2024 15:44:11 +0100 Subject: [PATCH 21/30] Leave benchmarks as they are and add one for binomial --- bench/Benchmark.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/bench/Benchmark.hs b/bench/Benchmark.hs index e2f46b1..a1f4aad 100644 --- a/bench/Benchmark.hs +++ b/bench/Benchmark.hs @@ -91,6 +91,7 @@ main = do , bench "gamma,a<1" $ whnfIO $ loop iter (gamma 0.5 1 mwc :: IO Double) , bench "gamma,a>1" $ whnfIO $ loop iter (gamma 2 1 mwc :: IO Double) , bench "chiSquare" $ whnfIO $ loop iter (chiSquare 4 mwc :: IO Double) + , bench "binomial" $ whnfIO $ loop iter (binomial 1400 0.4 mwc :: IO Int) ] -- Test sampling performance. Table creation must be floated out! , bgroup "CT/gen" $ concat From b57a2eb3135b1cb91247998caf99345936b7f623 Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Fri, 10 May 2024 15:46:09 +0100 Subject: [PATCH 22/30] .envrc not needed in repo --- .envrc | 1 - 1 file changed, 1 deletion(-) delete mode 100644 .envrc diff --git a/.envrc b/.envrc deleted file mode 100644 index 1d953f4..0000000 --- a/.envrc +++ /dev/null @@ -1 +0,0 @@ -use nix From bb03f5453b7a17b27862c6c396100d210abd67fe Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Sun, 12 May 2024 09:55:15 +0100 Subject: [PATCH 23/30] Response to comments --- System/Random/MWC/Distributions.hs | 44 ++++++++++++++---------------- mwc-random.cabal | 2 +- tests/props.hs | 15 +++++----- 3 files changed, 28 insertions(+), 33 deletions(-) diff --git a/System/Random/MWC/Distributions.hs b/System/Random/MWC/Distributions.hs index a87258f..b7e9114 100644 --- a/System/Random/MWC/Distributions.hs +++ b/System/Random/MWC/Distributions.hs @@ -376,20 +376,18 @@ binomial :: forall g m . StatefulGen g m -> g -- ^ Generator -> m Int {-# INLINE binomial #-} -binomial nTrials prob gen = - if prob == 0.0 - then return 0 - else if prob == 1.0 - then return nTrials - else do let (p', flipped) = if prob > 0.5 - then (1.0 - prob, True) - else (prob, False) - ix <- if fromIntegral nTrials * p' < bInvThreshold - then binomialInv nTrials p' gen - else binomialTPE nTrials p' gen - if flipped - then return $ nTrials - ix - else return ix +binomial nTrials prob gen + | nTrials <= 0 = pkgError "binomial" "number of trials must be positive" + | prob < 0.0 || prob > 1.0 = pkgError "binomial" "probability must be >= 0 and <= 1" + | prob == 0.0 = return 0 + | prob == 1.0 = return nTrials + | otherwise = do let (p', flipped) = if prob > 0.5 then (1.0 - prob, True) else (prob, False) + ix <- if fromIntegral nTrials * p' < bInvThreshold + then binomialInv nTrials p' gen + else binomialTPE nTrials p' gen + if flipped + then return $ nTrials - ix + else return ix where binomialTPE n p g = @@ -424,16 +422,14 @@ binomial nTrials prob gen = -- Acceptance / rejection comparison step5 :: Int -> Double -> m Int - step5 ix v = if var <= accept - then if p > 0 - then return ix - else return $ n - ix - else hh - where - var = log v - accept = logFactorial bigM + logFactorial (n - bigM) - - logFactorial ix - logFactorial (n - ix) + - fromIntegral (ix - bigM) * log (p / q) + step5 ix v + | var <= accept = return $ if p > 0 then ix else n - ix + | otherwise = hh + where + var = log v + accept = logFactorial bigM + logFactorial (n - bigM) - + logFactorial ix - logFactorial (n - ix) + + fromIntegral (ix - bigM) * log (p / q) h :: Double -> Double -> m Int h u v | -- Triangular region diff --git a/mwc-random.cabal b/mwc-random.cabal index 2d5f351..6542183 100644 --- a/mwc-random.cabal +++ b/mwc-random.cabal @@ -1,7 +1,7 @@ cabal-version: 3.0 build-type: Simple name: mwc-random -version: 0.15.0.3 +version: 0.15.1.0 license: BSD-2-Clause license-file: LICENSE copyright: 2009, 2010, 2011 Bryan O'Sullivan diff --git a/tests/props.hs b/tests/props.hs index 3aced52..86c13d1 100644 --- a/tests/props.hs +++ b/tests/props.hs @@ -1,5 +1,4 @@ import Control.Monad -import Control.Monad.Reader import Data.Word import qualified Data.Vector.Unboxed as U @@ -133,17 +132,17 @@ betaBinomial a b n g = do nSamples :: Int nSamples = 10000 -a, b :: Double -a = 600.0 -b = 400.0 +alpha, delta :: Double +alpha = 600.0 +delta = 400.0 -n :: Int -n = 10 +nTrials :: Int +nTrials = 10 prop_betaBinomialMean :: IO () prop_betaBinomialMean = do g <- create - ss <- replicateM nSamples $ betaBinomial 600 400 10 g + ss <- replicateM nSamples $ betaBinomial alpha delta nTrials g let m = fromIntegral (sum ss) / fromIntegral nSamples - let x1 = fromIntegral n * a / (a + b) + let x1 = fromIntegral nTrials * alpha / (alpha + delta) assertBool ("Mean is " ++ show x1 ++ " but estimated as " ++ show m) (abs (m - x1) < 0.001) From 8f1b2bfc490cd3759fe1eb6ef04dc9127054e627 Mon Sep 17 00:00:00 2001 From: Alexey Khudyakov Date: Wed, 29 May 2024 19:13:11 +0300 Subject: [PATCH 24/30] Add test that `binomial' really samples from binomial distribution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test fails for n_trials >≈ 25 --- mwc-random.cabal | 1 + tests/props.hs | 106 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 106 insertions(+), 1 deletion(-) diff --git a/mwc-random.cabal b/mwc-random.cabal index 6542183..7d7e326 100644 --- a/mwc-random.cabal +++ b/mwc-random.cabal @@ -121,6 +121,7 @@ test-suite mwc-prop-tests , tasty-hunit , random >=1.2 , mtl + , math-functions >=0.3.4 test-suite mwc-doctests type: exitcode-stdio-1.0 diff --git a/tests/props.hs b/tests/props.hs index 86c13d1..00ed03d 100644 --- a/tests/props.hs +++ b/tests/props.hs @@ -1,9 +1,15 @@ +{-# LANGUAGE ViewPatterns #-} import Control.Monad import Data.Word +import Data.Proxy import qualified Data.Vector.Unboxed as U +import qualified Data.Vector.Unboxed.Mutable as MVU +import Numeric.SpecFunctions (logChoose,incompleteGamma,log1p) import Test.Tasty import Test.Tasty.QuickCheck +import Test.Tasty.Runners +import Test.Tasty.Options import Test.Tasty.HUnit import Test.QuickCheck.Monadic @@ -11,6 +17,28 @@ import System.Random.MWC import System.Random.MWC.Distributions import System.Random.Stateful (StatefulGen) +---------------------------------------------------------------- +-- +---------------------------------------------------------------- + +-- | Average number of events per bin for binned statistical tests +newtype NPerBin = NPerBin Int + +instance IsOption NPerBin where + defaultValue = NPerBin 100 + parseValue = fmap NPerBin . safeRead + optionName = pure "n-per-bin" + optionHelp = pure "Average number of events per bin" + + +-- | P-value for statistical test. +newtype PValue = PValue Double + +instance IsOption PValue where + defaultValue = PValue 1e-9 + parseValue = fmap PValue . safeRead + optionName = pure "pvalue" + optionHelp = pure "P-value for statistical test" ---------------------------------------------------------------- -- @@ -18,8 +46,24 @@ import System.Random.Stateful (StatefulGen) main :: IO () main = do + -- Set up tasty + let tasty_opts = [ Option (Proxy :: Proxy NPerBin) + , Option (Proxy :: Proxy PValue) + , Option (Proxy :: Proxy QuickCheckTests) + , Option (Proxy :: Proxy QuickCheckReplay) + , Option (Proxy :: Proxy QuickCheckShowReplay) + , Option (Proxy :: Proxy QuickCheckMaxSize) + , Option (Proxy :: Proxy QuickCheckMaxRatio) + , Option (Proxy :: Proxy QuickCheckVerbose) + , Option (Proxy :: Proxy QuickCheckMaxShrinks) + ] + ingredients = includingOptions tasty_opts : defaultIngredients + opts <- parseOptions ingredients (testCase "Fake" (pure ())) + let n_per_bin = lookupOption opts :: NPerBin + p_val = lookupOption opts + -- g0 <- createSystemRandom - defaultMain $ testGroup "mwc" + defaultMainWithIngredients ingredients $ testGroup "mwc" [ testProperty "save/restore" $ prop_SeedSaveRestore g0 , testCase "user save/restore" $ saveRestoreUserSeed , testCase "empty seed data" $ emptySeed @@ -28,6 +72,7 @@ main = do xs <- replicateM 513 (uniform g) assertEqual "[Word32]" xs golden , testCase "beta binomial mean" $ prop_betaBinomialMean + , testProperty "binomial is binomial" $ prop_binomial_PMF n_per_bin p_val g0 ] updateGenState :: GenIO -> IO () @@ -146,3 +191,62 @@ prop_betaBinomialMean = do let m = fromIntegral (sum ss) / fromIntegral nSamples let x1 = fromIntegral nTrials * alpha / (alpha + delta) assertBool ("Mean is " ++ show x1 ++ " but estimated as " ++ show m) (abs (m - x1) < 0.001) + + + + +-- Test that `binomial` really samples from binomial distribution. +-- +-- If we have binomial random variate with number of trials N and +-- sample it M times. Then number of events with K successes is +-- described by multinomial distribution and we can test whether +-- experimental distribution is described using likelihood ratio test +prop_binomial_PMF :: NPerBin -> PValue -> GenIO -> Property +prop_binomial_PMF (NPerBin n_per_bin) (PValue p_val) g = property $ do + p <- choose (0, 1.0) -- Success probability + n_trial <- choose (2, 100) -- Number of trials in binomial distribution + -- Number of binomial samples to generate + let n_samples = n_trial * n_per_bin + n_samples' = fromIntegral n_samples + -- Compute number of outcomes + pure $ ioProperty $ do + hist <- do + buf <- MVU.new (n_trial + 1) + replicateM_ n_samples $ + MVU.modify buf (+(1::Int)) =<< binomial n_trial p g + U.unsafeFreeze buf + -- Here we compute twice log of likelihood ratio. Alternative + -- hypothesis is some distribution which fits data perfectly + -- + -- Asymtotically it's ditributed as χ² with n_trial-1 degrees of + -- freedom + let likelihood _ 0 + = 0 + likelihood k (fromIntegral -> n_obs) + = n_obs * (log (n_obs / n_samples') - logProbBinomial n_trial p k) + let logL = 2 * U.sum (U.imap likelihood hist) + let significance = 1 - cumulativeChi2 (n_trial - 1) logL + pure $ counterexample ("p = " ++ show p) + $ counterexample ("N = " ++ show n_trial) + $ counterexample ("p-val = " ++ show significance) + $ counterexample ("chi2 = " ++ show logL) + $ significance > p_val + + +---------------------------------------------------------------- +-- Statistical helpers +---------------------------------------------------------------- + +-- Logarithm of probability for binomial distribution +logProbBinomial :: Int -> Double -> Int -> Double +logProbBinomial n p k + = logChoose n k + log p * k' + log1p (-p) * nk' + where + k' = fromIntegral k + nk' = fromIntegral $ n - k + + +cumulativeChi2 :: Int -> Double -> Double +cumulativeChi2 (fromIntegral -> ndf) x + | x <= 0 = 0 + | otherwise = incompleteGamma (ndf/2) (x/2) From 4ebd2078c3a36906052c62fd0ef7ebd474e34671 Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Mon, 10 Jun 2024 14:57:08 +0100 Subject: [PATCH 25/30] Fix failure of chi-squared test --- System/Random/MWC/Distributions.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/System/Random/MWC/Distributions.hs b/System/Random/MWC/Distributions.hs index b7e9114..be7b303 100644 --- a/System/Random/MWC/Distributions.hs +++ b/System/Random/MWC/Distributions.hs @@ -451,8 +451,8 @@ binomial nTrials prob gen step5 ix w -- Right tail - | otherwise = do let ix = floor $ xr - log v / lambdaL - if ix > 0 && ix > n + | otherwise = do let ix = floor $ xr - log v / lambdaR + if ix > n then hh else do let w = v * (u - p3) * lambdaR step5 ix w From 118b1b1dcdac23b4dc13f7707aeade6f24ee34d6 Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Wed, 12 Jun 2024 07:52:45 +0100 Subject: [PATCH 26/30] Benchmark a "realistic" usage of binomial --- bench/Benchmark.hs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/bench/Benchmark.hs b/bench/Benchmark.hs index a1f4aad..352acfa 100644 --- a/bench/Benchmark.hs +++ b/bench/Benchmark.hs @@ -7,6 +7,7 @@ import Data.Word import Data.Proxy import qualified Data.Vector.Unboxed as U import qualified System.Random as R +import System.Random.Stateful (StatefulGen) import System.Random.MWC import System.Random.MWC.Distributions import System.Random.MWC.CondensedTable @@ -92,6 +93,10 @@ main = do , bench "gamma,a>1" $ whnfIO $ loop iter (gamma 2 1 mwc :: IO Double) , bench "chiSquare" $ whnfIO $ loop iter (chiSquare 4 mwc :: IO Double) , bench "binomial" $ whnfIO $ loop iter (binomial 1400 0.4 mwc :: IO Int) + , bench "beta binomial 10" $ whnfIO $ loop iter (betaBinomial 600 400 10 mwc :: IO Int) + , bench "beta binomial 100" $ whnfIO $ loop iter (betaBinomial 600 400 100 mwc :: IO Int) + , bench "beta binomial table 10" $ whnfIO $ loop iter (betaBinomialTable 600 400 10 mwc :: IO Int) + , bench "beta binomial table 100" $ whnfIO $ loop iter (betaBinomialTable 600 400 100 mwc :: IO Int) ] -- Test sampling performance. Table creation must be floated out! , bgroup "CT/gen" $ concat @@ -133,3 +138,13 @@ main = do , bench "Int" $ whnfIO $ loop iter (M.random mtg :: IO Int) ] ] + +betaBinomial :: StatefulGen g m => Double -> Double -> Int -> g -> m Int +betaBinomial a b n g = do + p <- beta a b g + binomial n p g + +betaBinomialTable :: StatefulGen g m => Double -> Double -> Int -> g -> m Int +betaBinomialTable a b n g = do + p <- beta a b g + genFromTable (tableBinomial n p) g From a7ee5a0d3a2da1b4491a70b46e105144ad074872 Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Wed, 12 Jun 2024 13:51:09 +0100 Subject: [PATCH 27/30] Update the change log --- changelog.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/changelog.md b/changelog.md index fb3c060..bb35110 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,12 @@ +## Changes in 0.15.1.0 + + * Additon of binomial sampler using the rejection sampling method in + Kachitvichyanukul, V. and Schmeiser, B. W. Binomial Random + Variate Generation. Communications of the ACM, 31, 2 (February, + 1988) 216. . A more + efficient basis for e.g. the beta binomial distribution: + `beta a b g >>= \p -> binomial n p g`. + ## Changes in 0.15.0.2 * Doctests on 32-bit platforms are fixed. (#79) From e7c7fef64f813c7aa4616b7e4f5d02935946d04c Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Sat, 15 Jun 2024 09:16:48 +0100 Subject: [PATCH 28/30] A diagram showing the binomial sampling regions (no cabal) --- System/Random/MWC/Distributions.hs | 2 + docs/RecreateFigure.hs | 260 +++++++++++++++++++++++++++++ docs/RecreateFigure.svg | 3 + 3 files changed, 265 insertions(+) create mode 100644 docs/RecreateFigure.hs create mode 100644 docs/RecreateFigure.svg diff --git a/System/Random/MWC/Distributions.hs b/System/Random/MWC/Distributions.hs index be7b303..66d89ee 100644 --- a/System/Random/MWC/Distributions.hs +++ b/System/Random/MWC/Distributions.hs @@ -361,6 +361,8 @@ pkgError func msg = error $ "System.Random.MWC.Distributions." ++ func ++ -- * Kachitvichyanukul, V. and Schmeiser, B. W. Binomial Random -- Variate Generation. Communications of the ACM, 31, 2 (February, -- 1988) 216. +-- Here's an example of how the algorithm's sampling regions look +-- ![Something](docs/RecreateFigure.svg) -- | Random variate generator for Binomial distribution -- diff --git a/docs/RecreateFigure.hs b/docs/RecreateFigure.hs new file mode 100644 index 0000000..697ce0c --- /dev/null +++ b/docs/RecreateFigure.hs @@ -0,0 +1,260 @@ +{-# LANGUAGE ScopedTypeVariables #-} + +{-# OPTIONS_GHC -Wall #-} +{-# OPTIONS_GHC -Wno-type-defaults #-} + +module Main(main) where + +import System.FilePath () +import Data.Colour +import Diagrams.Backend.SVG.CmdLine +import Diagrams.Prelude hiding ( sample, render, Vector, offset ) +import System.Environment + + +majorizingFn :: Int -> Double -> Double -> Double +majorizingFn i pp x + | x <= xL - 0.5 = c * exp (-lambdaL * (xL - x - 0.5)) + | x <= xR - 0.5 = (1 + c) - abs (bigM - x) / p1 + | otherwise = c * exp (-lambdaR * (x + 0.5 - xR)) + where + (xL, xR, _, c, p1, bigM, fm, q, r) = xLRMc i pp + a = (fm - xL) / (fm - xL * r) + lambdaL = a * (1 + a / 2) + b = (xR - fm) / (xR * q) + lambdaR = b * (1 + b / 2) + +minorizingFn :: Int-> Double -> Double -> Double +minorizingFn i pp x + | x <= xL - 0.5 = 0.0 + | x <= xR - 0.5 = 1.0 - abs (bigM - x) / p1 + | otherwise = 0.0 + where + (xL, xR, _, _, p1, bigM, _, _, _) = xLRMc i pp + +xLRMc :: Int -> Double -> + (Double, Double, Double, Double, Double, Double, Double, Double, Double) +xLRMc i pp = (xL, xR, xM, c, p1, bigM, fm, q, r) + where + m :: Double + m = fromIntegral i + r = min pp (1 - pp) + q = 1 - r + fm = m * r + r + bigM :: Double + bigM = fromIntegral $ floor fm + p1 = (fromIntegral (floor (2.195 * sqrt (m * pp * q) - 4.6 * q) :: Int)) + 0.5 + xM = bigM + 0.5 + xL = xM - p1 + xR = xM + p1 + c = 0.134 + 20.5 / (15.3 + bigM) + +n :: Int +n = 200 + +p :: Double +p = 0.25 + +m1, sd :: Double +m1 = (fromIntegral n) * p + p +sd = sqrt $ (fromIntegral n) * p * (1- p) + +lb, ub :: Double +lb = m1 - 3 * sd +ub = m1 + 3 * sd + +tick, skala :: Double +tick = 0.01 +skala = 20.0 + +majors :: [(Double, Double)] +majors = [(x, majorizingFn n p x) | x <- [lb, lb + tick .. ub]] + +minors :: [(Double, Double)] +minors = [(x, minorizingFn n p x) | x <- [lb, lb + tick .. ub]] + +integralBinomialPDF :: (Integral a, Fractional b) => a -> b -> a -> b +integralBinomialPDF t q 0 = (1 - q)^t +integralBinomialPDF t q x = integralBinomialPDF t q (x - 1) * a * b + where + a = fromIntegral (t - x + 1) / fromIntegral x + b = q / (1 - q) + +binPdfs :: [Double] +binPdfs = map (/ v) $ map (integralBinomialPDF n p) $ map floor $ map (+ 0.5) [lb :: Double, lb + tick .. ub] + +v :: Double +v = integralBinomialPDF n p bigM + where + bigM = floor $ (fromIntegral n) * p + p + +majorVs :: [P2 Double] +majorVs = map p2 majors + +binPdfVs :: [P2 Double] +binPdfVs = map p2 $ zip (map fst majors) binPdfs + +minorVs :: [P2 Double] +minorVs = map p2 minors + +lineChart :: String -> IO () +lineChart fn = do + withArgs ["-h 800", "-w 800", "-o" ++ fn ++ ".svg"] (mainWith exampleQ) + +main :: IO () +main = lineChart "docs/RecreateFigure" + +exampleQ :: Diagram B +exampleQ = strutX 2 ||| mconcat + [ mconcat cs # fc purple # lw none + + -- x-axis + , strokeLocT xAxisT # lc black # lwL 0.01 # scaleY skala + , moveTo (p2 (xM - offset , - 2.0)) $ text "Number of Successes" + , strokeLocT brokenXAxisT # lc black # lwL 0.05 + + -- y-axis + , strokeLocT yAxisT # lc black # lwL 0.01 # scaleY skala + , moveTo (p2 (-sd - yAxixOffset - 3.0 , skala * 1.0)) $ text "Probability Density (Unnormalised)" # rotateBy (1/4) + + -- Key + , let majKey = fromVertices $ map p2 [ (xM - offset - 10.0, skala * 2.0 - 2.0) + , (xM - offset, skala * 2.0 - 2.0) + ] + in strokeLocT majKey # lc blue # lwL 0.1 === + moveTo (p2 (xM - offset + 8, skala * 2.0 - 4.0)) (text "Majorizing Function" # fc blue) + , let minKey = fromVertices $ map p2 [ (xM - offset - 10.0, skala * 2.0 - 4.0) + , (xM - offset, skala * 2.0 - 4.0) + ] + in strokeLocT minKey # lc green # lwL 0.1 === + moveTo (p2 (xM - offset + 8, skala * 2.0 - 6.0)) (text "Minorizing Function" # fc green) + , let tgtKey = fromVertices $ map p2 [ (xM - offset - 10.0, skala * 2.0 - 6.0) + , (xM - offset, skala * 2.0 - 6.0) + ] + in strokeLocT tgtKey # lc red # lwL 0.1 === + moveTo (p2 (xM - offset + 8, skala * 2.0 - 8.0)) (text "Target PDF" # fc red) + , moveTo (p2 (xM - offset - 3 * sd, skala * 2.0 - 10.0)) (text ("n = " ++ show n) # fc black) + , moveTo (p2 (xM - offset - 3 * sd, skala * 2.0 - 11.0)) (text ("p = " ++ show p) # fc black) + + -- Areas + , area1, area2, area3, area4 + , moveTo (p2 (xM - 0.5 - offset, 0.5 * skala * xMinM)) $ text "One" + , moveTo (p2 (xM - 0.5 - offset, skala * (0.5 * (xMajM - xMinM) + xMinM))) $ text "Two" + , moveTo (p2 (lb + (xL - lb) / 2 - offset, 0.5 * skala * majorizingFn n p (lb + (xL - lb) / 2))) $ text "Three" + , moveTo (p2 (ub + (xR - ub) / 2 - offset, 0.5 * skala * majorizingFn n p (ub + (xR - ub) / 2))) $ text "Four" + + , strokeLocT majorT # lc blue # lwL 0.01 # scaleY skala + , strokeLocT binPdfT # lc red # lwL 0.01 # scaleY skala + , strokeLocT minorT # lc green # lwL 0.01 # scaleY skala + , strokeLocT verticalLT # lc purple # lwL 0.01 + , strokeLocT verticalRT # lc purple # lwL 0.01 + + , moveTo (p2 (xL - 0.5 - offset, - 1.0)) $ text $ show (xL - 0.5) + , moveTo (p2 (xR - 0.5 - offset, - 1.0)) $ text $ show (xR - 0.5) + , moveTo (p2 (0.0, - 1.0)) $ text $ show $ (fromIntegral (round (lb * 100))) / 100 + , moveTo (p2 (ub - offset, - 1.0)) $ text $ show $ (fromIntegral (round (ub * 100))) / 100 + , let y = 1.5 in moveTo (p2 (-sd - yAxixOffset - 1.0, skala * y)) $ text $ show $ (fromIntegral (round (y * 100))) / 100 + , let y = 1.0 in moveTo (p2 (-sd - yAxixOffset - 1.0, skala * y)) $ text $ show $ (fromIntegral (round (y * 100))) / 100 + , let y = 0.5 in moveTo (p2 (-sd - yAxixOffset - 1.0, skala * y)) $ text $ show $ (fromIntegral (round (y * 100))) / 100 + ] + where + xAxisT :: Located (Trail V2 Double) + xAxisT = (fromVertices $ map p2 [(m1 - 4 * sd, 0), (m1 + 4 * sd, 0)]) `at` (-sd ^& 0) + + brokenXAxisT :: Located (Trail V2 Double) + brokenXAxisT = fromVertices $ map p2 [ (m1 - 4 * sd - offset - 0.0, 0.0) + , (m1 - 4 * sd - offset - 0.1, 0.3) + , (m1 - 4 * sd - offset - 0.2, 0.0) + , (m1 - 4 * sd - offset - 0.3, -0.3) + , (m1 - 4 * sd - offset - 0.4, 0.0) + , (m1 - 4 * sd - offset - 2.0, 0.0) + ] + + yAxixOffset = 2.0 + yAxisT :: Located (Trail V2 Double) + yAxisT = fromVertices $ map p2 [(-sd - yAxixOffset , 0.0), (-sd - yAxixOffset , 2.0)] + + binPdfT :: Located (Trail V2 Double) + binPdfT = fromVertices binPdfVs `at` (0 ^& (minimum binPdfs)) + + majorT :: Located (Trail V2 Double) + majorT = fromVertices majorVs `at` (0 ^& (minimum (map snd majors))) + + minorT :: Located (Trail V2 Double) + minorT = fromVertices minorVs `at` (0 ^& (minimum (map snd minors))) + + verticalLT :: Located (Trail V2 Double) + verticalLT = fromVertices $ map p2 [(xL - 0.5 - offset, 0.0) + , (xL - 0.5 - offset, skala * xMajL) + ] + + verticalRT :: Located (Trail V2 Double) + verticalRT = fromVertices $ map p2 [ (xR - 0.5 - offset, 0.0) + , (xR - 0.5 - offset, skala * xMajR) + ] + + pt1 = (xL - 0.5, 0.0) + pt2 = (xR - 0.5, 0.0) + pt3 = (xM - 0.5, skala * xMinM) + + area1 :: Diagram B + area1 = t # fc red # opacity 0.1 + where + t :: Diagram B + t = strokeLocT $ mapLoc Trail u + u :: Located (Trail' Loop V2 Double) + u = ((fromVertices $ map p2 [pt1, pt2, pt3]) # closeLine) `at` ((xL - offset -0.5) ^& 0) + + area2 :: Diagram B + area2 = t # fc blue # opacity 0.1 + where + t :: Diagram B + t = strokeLocT $ mapLoc Trail u + u :: Located (Trail' Loop V2 Double) + u = (fromVertices ([w1] ++ vs ++ [wn, w2]) # closeLine) `at` ((xL - offset - 0.5) ^& 0) + vs = fmap (scaleY 20.0) $ filter (\x -> fst (unp2 x) <= xR - 0.5) $ filter (\x -> fst (unp2 x) > xL - 0.5) majorVs + w1 = p2 (fst (unp2 (head vs)), 0.0) + w2 = p2 (xM - 0.5, skala * xMinM) + wn = p2 (fst (unp2 (last vs)), 0.0) + + area3 :: Diagram B + area3 = t # fc yellow # opacity 0.1 + where + t :: Diagram B + t = strokeLocT $ mapLoc Trail u + u :: Located (Trail' Loop V2 Double) + u = (fromVertices ([w1] ++ vs ++ [wn]) # closeLine) `at` (0 ^& 0) + vs = fmap (scaleY 20.0) $ filter (\x -> fst (unp2 x) <= xL - 0.5) majorVs + w1 = p2 (fst (unp2 (head vs)), 0.0) + wn = p2 (fst (unp2 (last vs)), 0.0) + + area4 :: Diagram B + area4 = t # fc yellow # opacity 0.1 + where + t :: Diagram B + t = strokeLocT $ mapLoc Trail u + u :: Located (Trail' Loop V2 Double) + u = (fromVertices ([w1] ++ vs ++ [wn]) # closeLine) `at` ((xR - offset - 0.5) ^& 0) + vs = fmap (scaleY 20.0) $ filter (\x -> fst (unp2 x) > xR - 0.5) majorVs + w1 = p2 (fst (unp2 (head vs)), 0.0) + wn = p2 (fst (unp2 (last vs)), 0.0) + + cs = map mkCircle $ map p2 $ [ (xL - 0.5 - offset, 0.0) + , (xL - 0.5 - offset, skala * xMajL) + , (xM - 0.5 - offset, skala * xMinM) + , (xM - 0.5 - offset, skala * xMajM) + , (xR - 0.5 - offset, 0.0) + , (xR - 0.5 - offset, skala * xMajR) + , (0.0, 0.0) + , (ub - offset, 0.0) + ] + + (xL, xR, xM, _, _, _, _, _, _) = xLRMc n p + + xMinM = minorizingFn n p (xM - 0.5) + xMajL = majorizingFn n p (xL - 0.5) + xMajR = majorizingFn n p (xR - 0.5) + xMajM = majorizingFn n p (xM - 0.5) + offset = minimum (map fst majors) + + mkCircle q = circle 0.1 # moveTo q diff --git a/docs/RecreateFigure.svg b/docs/RecreateFigure.svg new file mode 100644 index 0000000..75e5295 --- /dev/null +++ b/docs/RecreateFigure.svg @@ -0,0 +1,3 @@ + +0.51.01.568.6231.8859.540.5FourThreeTwoOnep = 0.25n = 200Target PDFMinorizing FunctionMajorizing FunctionProbability Density (Unnormalised)Number of Successes \ No newline at end of file From c4cbd2d7babb47c61c60b407e984e5d960bfde09 Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Sat, 15 Jun 2024 11:14:01 +0100 Subject: [PATCH 29/30] Extra documentation --- mwc-random.cabal | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mwc-random.cabal b/mwc-random.cabal index 7d7e326..75f3533 100644 --- a/mwc-random.cabal +++ b/mwc-random.cabal @@ -32,6 +32,9 @@ extra-source-files: changelog.md README.md +extra-doc-files: + docs/*.svg + tested-with: GHC ==8.0.2 || ==8.2.2 From 4b2bc36c07e3df7f19c42d4dedf518416c0374bd Mon Sep 17 00:00:00 2001 From: Dominic Steinitz Date: Sat, 15 Jun 2024 11:20:05 +0100 Subject: [PATCH 30/30] Changelog is documentation --- mwc-random.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mwc-random.cabal b/mwc-random.cabal index 75f3533..3ee8aed 100644 --- a/mwc-random.cabal +++ b/mwc-random.cabal @@ -29,11 +29,11 @@ description: extra-source-files: - changelog.md README.md extra-doc-files: docs/*.svg + changelog.md tested-with: GHC ==8.0.2