From 4b30bbc800e521534201a9ce67bbac02996141e4 Mon Sep 17 00:00:00 2001 From: meooow25 Date: Wed, 18 Dec 2024 21:59:22 +0530 Subject: [PATCH] Improve folds over bits for IntSet * Make foldr and foldl short-circuit instead of lazily accumulating thunks. * Switch to a non-empty style to avoid unnecessary comparisons. This also helps GHC with arity-analysis (somehow), which greatly improves performance of CPS-style foldr and foldl. * Change the bitwise operations used from bitmask = bm .&. -bm; bi = ctz bitmask; bm' = bm `xor` bitmask to bi = ctz bm; bm' = bm .&. (bm-1) which is slightly faster. --- containers-tests/benchmarks/Utils/Fold.hs | 2 +- containers-tests/tests/intset-properties.hs | 24 +++---- containers/src/Data/IntSet/Internal.hs | 80 +++++++++++---------- 3 files changed, 55 insertions(+), 51 deletions(-) diff --git a/containers-tests/benchmarks/Utils/Fold.hs b/containers-tests/benchmarks/Utils/Fold.hs index bb4baece5..61caaf8f5 100644 --- a/containers-tests/benchmarks/Utils/Fold.hs +++ b/containers-tests/benchmarks/Utils/Fold.hs @@ -37,7 +37,7 @@ foldBenchmarks foldr foldl foldr' foldl' foldMap xs = , bench "foldr_traverseSum" $ whnf foldr_traverseSum xs -- foldl - , bench "foldl_skip" $ whnf foldl_elem xs + , bench "foldl_elem" $ whnf foldl_elem xs , bench "foldl_cpsSum" $ whnf foldl_cpsSum xs , bench "foldl_cpsOneShotSum" $ whnf foldl_cpsOneShotSum xs , bench "foldl_traverseSum" $ whnf foldl_traverseSum xs diff --git a/containers-tests/tests/intset-properties.hs b/containers-tests/tests/intset-properties.hs index 9085fe224..531c90410 100644 --- a/containers-tests/tests/intset-properties.hs +++ b/containers-tests/tests/intset-properties.hs @@ -64,10 +64,10 @@ main = defaultMain $ testGroup "intset-properties" , testProperty "prop_findMin" prop_findMin , testProperty "prop_ord" prop_ord , testProperty "prop_readShow" prop_readShow - , testProperty "prop_foldR" prop_foldR - , testProperty "prop_foldR'" prop_foldR' - , testProperty "prop_foldL" prop_foldL - , testProperty "prop_foldL'" prop_foldL' + , testProperty "prop_foldr" prop_foldr + , testProperty "prop_foldr'" prop_foldr' + , testProperty "prop_foldl" prop_foldl + , testProperty "prop_foldl'" prop_foldl' , testProperty "prop_foldMap" prop_foldMap , testProperty "prop_map" prop_map , testProperty "prop_mapMonotonicId" prop_mapMonotonicId @@ -370,17 +370,17 @@ prop_ord s1 s2 = s1 `compare` s2 == toList s1 `compare` toList s2 prop_readShow :: IntSet -> Bool prop_readShow s = s == read (show s) -prop_foldR :: IntSet -> Bool -prop_foldR s = foldr (:) [] s == toList s +prop_foldr :: IntSet -> Property +prop_foldr s = foldr (:) [] s === toList s -prop_foldR' :: IntSet -> Bool -prop_foldR' s = foldr' (:) [] s == toList s +prop_foldr' :: IntSet -> Property +prop_foldr' s = foldr' (:) [] s === toList s -prop_foldL :: IntSet -> Bool -prop_foldL s = foldl (flip (:)) [] s == List.foldl (flip (:)) [] (toList s) +prop_foldl :: IntSet -> Property +prop_foldl s = foldl (flip (:)) [] s === toDescList s -prop_foldL' :: IntSet -> Bool -prop_foldL' s = foldl' (flip (:)) [] s == List.foldl' (flip (:)) [] (toList s) +prop_foldl' :: IntSet -> Property +prop_foldl' s = foldl' (flip (:)) [] s === toDescList s prop_foldMap :: IntSet -> Property prop_foldMap s = foldMap (:[]) s === toList s diff --git a/containers/src/Data/IntSet/Internal.hs b/containers/src/Data/IntSet/Internal.hs index 742571db8..feec8ee78 100644 --- a/containers/src/Data/IntSet/Internal.hs +++ b/containers/src/Data/IntSet/Internal.hs @@ -1699,10 +1699,6 @@ takeWhileAntitoneBits :: Int -> (Int -> Bool) -> Word -> Word #if defined(__GLASGOW_HASKELL__) -lowestBitMask :: Word -> Word -lowestBitMask x = x .&. negate x -{-# INLINE lowestBitMask #-} - lowestBitSet x = countTrailingZeros x highestBitSet x = WORD_SIZE_IN_BITS - 1 - countLeadingZeros x @@ -1724,45 +1720,53 @@ revWord x1 = case ((x1 `shiftRL` 1) .&. 0x5555555555555555) .|. ((x1 .&. 0x55555 x6 -> ( x6 `shiftRL` 32 ) .|. ( x6 `shiftLL` 32); #endif -foldlBits prefix f z bitmap = go bitmap z - where go 0 acc = acc - go bm acc = go (bm `xor` bitmask) ((f acc) $! (prefix+bi)) - where - !bitmask = lowestBitMask bm - !bi = countTrailingZeros bitmask - -foldl'Bits prefix f z bitmap = go bitmap z - where go 0 acc = acc - go bm !acc = go (bm `xor` bitmask) ((f acc) $! (prefix+bi)) - where !bitmask = lowestBitMask bm - !bi = countTrailingZeros bitmask - -foldrBits prefix f z bitmap = go (revWord bitmap) z - where go 0 acc = acc - go bm acc = go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc) - where !bitmask = lowestBitMask bm - !bi = countTrailingZeros bitmask - - -foldr'Bits prefix f z bitmap = go (revWord bitmap) z - where go 0 acc = acc - go bm !acc = go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc) - where !bitmask = lowestBitMask bm - !bi = countTrailingZeros bitmask - -foldMapBits prefix f bitmap = go (prefix + bi0) (bitmap `xor` bitmask0) +foldlBits prefix f z0 bitmap = go z0 (revWord bitmap) + where + -- Note: We pass the z as a static argument because it helps GHC with demand + -- analysis. See GHC #25578 for details. + go z !bm = f (if bm' == 0 then z else go z bm') (prefix .|. bi) + where + bi = WORD_SIZE_IN_BITS - 1 - countTrailingZeros bm + bm' = bm .&. (bm-1) + +foldl'Bits prefix f z0 bitmap = go z0 bitmap + where + go !z !bm = if bm' == 0 then z' else go z' bm' + where + bi = countTrailingZeros bm + !z' = f z (prefix .|. bi) + bm' = bm .&. (bm-1) + +foldrBits prefix f z0 bitmap = go bitmap z0 + where + -- Note: We pass the z as a static argument because it helps GHC with demand + -- analysis. See GHC #25578 for details. + go !bm z = f (prefix .|. bi) (if bm' == 0 then z else go bm' z) + where + bi = countTrailingZeros bm + bm' = bm .&. (bm-1) + +foldr'Bits prefix f z0 bitmap = go (revWord bitmap) z0 + where + go !bm !z = if bm' == 0 then z' else go bm' z' + where + bi = WORD_SIZE_IN_BITS - 1 - countTrailingZeros bm + !z' = f (prefix .|. bi) z + bm' = bm .&. (bm-1) + +foldMapBits prefix f bitmap = go bitmap where - bitmask0 = lowestBitMask bitmap - bi0 = countTrailingZeros bitmask0 - go !x 0 = f x + go !bm = if bm' == 0 + then f x #if MIN_VERSION_base(4,11,0) - go !x bm = f x <> go (prefix + bi) (bm `xor` bitmask) + else f x <> go bm' #else - go !x bm = f x `mappend` go (prefix + bi) (bm `xor` bitmask) + else f x `mappend` go bm' #endif where - bitmask = lowestBitMask bm - bi = countTrailingZeros bitmask + bi = countTrailingZeros bm + x = prefix .|. bi + bm' = bm .&. (bm-1) takeWhileAntitoneBits prefix predicate bitmap = -- Binary search for the first index where the predicate returns false, but skip a predicate