bench: Avoid lists and excessive inlining

This should make the benches faster and more reliable to run.
composewell · Jun 14, 2024 · 6efd326 · 6efd326
1 parent dfb502e
commit 6efd326
Show file tree

Hide file tree

Showing 11 changed files with 445 additions and 225 deletions.
diff --git a/unicode-data/bench/Unicode/Char/Bench.hs b/unicode-data/bench/Unicode/Char/Bench.hs
@@ -1,30 +1,65 @@
 {-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE CPP #-}
 
 module Unicode.Char.Bench
-    ( Bench(..)
+    ( -- Range benchmark
+      benchRange
+      -- Char benchmark
+    , Bench(..)
     , CharRange(..)
-    , bgroup'
-    , benchChars
-    , benchCharsNF
+    , bgroupWithValidCharRange
+    , bgroupWithValidCharRange'
+    , bgroupWithCharRange
+    , bgroupWithCharRange'
+    , bgroupWithChars
     ) where
 
-import Control.DeepSeq (NFData, deepseq, force)
-import Control.Exception (evaluate)
-import Test.Tasty.Bench (Benchmark, bgroup, bench, bcompare, env, nf)
-import Test.Tasty.Options
-    ( IsOption(defaultValue, optionHelp, optionName, parseValue) )
-
+import Control.DeepSeq (NFData (..), deepseq)
+import Control.Exception (evaluate, assert)
+import Data.Char (ord)
 import qualified Data.Char as Char
+import Foreign (Storable (..))
+import qualified GHC.Exts as Exts
+import GHC.IO (IO (..))
+import Test.Tasty.Bench (Benchmark, bcompare, bench, bgroup, env, nf)
+import Test.Tasty.Options (
+    IsOption (defaultValue, optionHelp, optionName, parseValue),
+ )
+#if MIN_VERSION_base(4,10,0) && !MIN_VERSION_base(4,15,0)
+import qualified GHC.Magic as Exts (noinline)
+#endif
+
 import qualified Unicode.Char.General as G
+import Data.Ix (Ix (..))
 
--- | A unit benchmark
-data Bench a = Bench
-  { _title :: !String  -- ^ Name
-  , _func :: Char -> a -- ^ Function to benchmark
-  }
+--------------------------------------------------------------------------------
+-- Range benchmark
+--------------------------------------------------------------------------------
 
+{-# INLINE benchRange #-}
+benchRange
+    :: forall a b. (Bounded a, Ix a, NFData b)
+    => String
+    -> (a -> b)
+    -> Benchmark
+benchRange t f = bench t (nf (fold_ f) (minBound, maxBound))
+
+{-# INLINE fold_ #-}
+fold_
+    :: forall a b. (Ix a, NFData b)
+    => (a -> b)
+    -> (a, a)
+    -> ()
+fold_ f = foldr (deepseq . f) () . range
+
+--------------------------------------------------------------------------------
+-- Char range
+--------------------------------------------------------------------------------
+
+-- | Characters range
 data CharRange = CharRange !Char !Char
 
+-- | Characters range configurable from CLI
 instance IsOption CharRange where
     defaultValue = CharRange minBound maxBound
     parseValue = \case
@@ -38,55 +73,213 @@ instance IsOption CharRange where
     optionName = pure "chars"
     optionHelp = pure "Range of chars to test"
 
-{-# INLINE bgroup' #-}
-bgroup' :: NFData a => String -> CharRange -> [Bench a] -> Benchmark
-bgroup' groupTitle charRange bs = bgroup groupTitle
-    [ benchChars' title f
-    | Bench title f <- bs
-    ]
+--------------------------------------------------------------------------------
+-- Characters benchmark
+--------------------------------------------------------------------------------
+
+-- | A unit benchmark
+data Bench a = Bench
+  { _title :: !String     -- ^ Name
+  , _func :: !(Char -> a) -- ^ Function to benchmark
+  }
+
+-- | Helper to compare benchmarks of function from this package to ones in base.
+{-# INLINE bgroupWithValidCharRange #-}
+bgroupWithValidCharRange ::
+    String ->
+    CharRange ->
+    (Char -> Bool) ->
+    (Chars -> [Benchmark]) ->
+    Benchmark
+bgroupWithValidCharRange groupTitle charRange isValid mkBenches =
+    -- Avoid side-effects with garbage collection (see tasty-bench doc for env).
+    -- We use pinned ByteArray# instead of lists to avoid that GC kicks in.
+    env
+        (initialize isValid charRange >>= evaluate)
+        (bgroup groupTitle . mkBenches)
+
+-- | Helper to compare benchmarks of function from this package to ones in base.
+-- Filter out Surrogates, Private Use Areas and unsassigned code points.
+{-# INLINE bgroupWithCharRange #-}
+bgroupWithCharRange ::
+    String ->
+    CharRange ->
+    (Chars -> [Benchmark]) ->
+    Benchmark
+bgroupWithCharRange title charRange =
+    bgroupWithValidCharRange title charRange isValid
     where
-    {-# INLINE benchChars' #-}
-    benchChars' title = case title of
-        "base" -> benchChars title charRange
-        _      -> bcompare' "base" . benchChars title charRange
+    isValid c = G.generalCategory c < G.Surrogate
+
+-- | Variant of 'bgroupWithValidCharRange'
+{-# INLINE bgroupWithValidCharRange' #-}
+bgroupWithValidCharRange' ::
+    (NFData a) =>
+    String ->
+    CharRange ->
+    (Char -> Bool) ->
+    [Bench a] ->
+    Benchmark
+bgroupWithValidCharRange' groupTitle charRange isValid bs =
+    bgroupWithValidCharRange groupTitle charRange isValid $ \chars ->
+        [ benchCharsRange groupTitle title chars f
+        | Bench title f <- bs
+        ]
 
+{-# INLINE benchCharsRange #-}
+benchCharsRange :: NFData a => String -> String -> Chars -> (Char -> a) -> Benchmark
+benchCharsRange groupTitle title chars = case title of
+    "base" -> benchCharsNF title chars
+    _      -> bcompare' "base" . benchCharsNF title chars
+    where
     {-# INLINE bcompare' #-}
     -- [NOTE] Works if groupTitle uniquely identifies the benchmark group.
     bcompare' ref = bcompare
         (mconcat ["$NF == \"", ref, "\" && $(NF-1) == \"", groupTitle, "\""])
 
-{-# INLINE benchChars #-}
-benchChars
-    :: (NFData a)
-    => String
-    -> CharRange
-    -> (Char -> a)
-    -> Benchmark
-benchChars t charRange = benchCharsNF t charRange isValid
+-- | Variant of 'bgroupWithCharRange'
+{-# INLINE bgroupWithCharRange' #-}
+bgroupWithCharRange' ::
+    (NFData a) =>
+    String ->
+    CharRange ->
+    [Bench a] ->
+    Benchmark
+bgroupWithCharRange' groupTitle charRange =
+    bgroupWithValidCharRange' groupTitle charRange isValid
     where
-    -- Filter out: Surrogates, Private Use Areas and unsassigned code points
     isValid c = G.generalCategory c < G.Surrogate
 
+-- | Helper to compare benchmarks of function from this package to ones in base.
+{-# INLINE bgroupWithChars #-}
+bgroupWithChars :: (NFData a) => String -> Chars -> [Bench a] -> Benchmark
+bgroupWithChars groupTitle chars bs = bgroup groupTitle
+    [ benchCharsRange groupTitle title chars f
+    | Bench title f <- bs
+    ]
+
+-- | Helper to bench a char function on a filtered char range
 {-# INLINE benchCharsNF #-}
 benchCharsNF
-    :: forall a. (NFData a)
+    :: (NFData a)
     => String
-    -> CharRange
-    -> (Char -> Bool)
+    -> Chars
     -> (Char -> a)
     -> Benchmark
-benchCharsNF t charRange isValid f =
-    -- Avoid side-effects with garbage collection (see tasty-bench doc)
-    env
-        (evaluate (force chars'))     -- initialize
-        (bench t . nf (foldString f)) -- benchmark
+benchCharsNF title chars f = bench title (nf (foldrChars f) chars)
+
+--------------------------------------------------------------------------------
+-- Chars byte array
+--------------------------------------------------------------------------------
+
+-- | Pinned array of characters
+data Chars = Chars !Exts.ByteArray# !Int
+
+instance NFData Chars where
+    rnf (Chars !_ !_) = ()
+
+-- | Fold over a chars byte array
+foldrChars :: NFData a => (Char -> a) -> Chars -> ()
+foldrChars f = go
+    where
+    -- Loop over the pinned char array. The loop itself does not allocate.
+    go (Chars cs len) = foldr
+        (\(Exts.I# k) ->
+            let c = Exts.indexWideCharArray# cs (k Exts.-# 1#)
+#if MIN_VERSION_base(4,10,0)
+            -- `inline` is necessary to avoid excessive inlining, resulting
+            -- in benchmarking empty loop iterations, i.e. not the function.
+            -- We could use `inline` with more care at call site, but then we
+            -- would have to test the functions one by one and everytime we
+            -- modify them. Using it here is a hammer but more secure and
+            -- maintainable.
+            -- Note that we may improve this by controling the inlining for each
+            -- phase.
+            in deepseq (Exts.noinline f (Exts.C# c)))
+#else
+            -- HACK: No `inline` for GHC < 8.2. Should we drop support?
+            in deepseq (f (Exts.C# c)))
+#endif
+        ()
+        [1..len]
+
+-- | Create a byte array of the chars to bench
+initialize :: (Char -> Bool) -> CharRange -> IO Chars
+initialize isValid charRange = IO $ \s1 ->
+    case Exts.newPinnedByteArray# initialLength s1 of { (# s2, ma #) ->
+    -- Write the filtered char range
+    case writeChars isValid ma 0# s2 start end of { (# s3, filteredCount #) ->
+    -- Duplicate to get enough chars to bench
+    case tile ma 0# finalLength filteredLength s3 of { s4 ->
+    case Exts.unsafeFreezeByteArray# ma s4 of { (# s5, a #) ->
+        (# s5, Chars a (Exts.I# (replications Exts.*# filteredCount)) #)
+    }}
+       where
+       -- Ensure to have enough chars
+       replications = case Exts.quotInt# targetCharsCount filteredCount of
+           0# -> 1#
+           r# -> r#
+       filteredLength = filteredCount Exts.*# wcharSize
+       finalLength = filteredLength Exts.*# replications
+   }}
+    where
+    targetCharsCount = 0x10FFFF#
+    !(CharRange start end) = assert
+        (ord end - ord start + 1 < Exts.I# targetCharsCount)
+        charRange
+    !initialLength = targetCharsCount Exts.*# wcharSize
+    !(Exts.I# wcharSize) = sizeOf 'x'
+
+-- | Write a range of chars that match the given predicate
+writeChars ::
+    (Char -> Bool) ->
+    Exts.MutableByteArray# d ->
+    Exts.Int# ->
+    Exts.State# d ->
+    Char ->
+    Char ->
+    (# Exts.State# d, Exts.Int# #)
+writeChars isValid ma = go
+    where
+    go i s c1@(Exts.C# c1#) !c2 = if c1 < c2
+        then go i' s' (succ c1) c2
+        else (# s', i' #)
+        where
+        !(# s', i' #) = if isValid c1
+            then (# Exts.writeWideCharArray# ma i c1# s, i Exts.+# 1# #)
+            else (# s, i #)
+
+-- | Duplicate a portion of an array
+--
+-- Adapted from Data.Text.Array.tile
+tile ::
+    -- | Mutable array
+    Exts.MutableByteArray# s ->
+    -- | Start of the portion to duplicate
+    Exts.Int# ->
+    -- | Total length of the duplicate
+    Exts.Int# ->
+    -- | Length of the portion to duplicate
+    Exts.Int# ->
+    Exts.State# s ->
+    Exts.State# s
+tile dest destOff totalLen = go
     where
-    CharRange l u = charRange
-    chars = filter isValid [l..u]
-    -- Ensure to have sufficiently chars
-    n = 0x10FFFF `div` length chars
-    chars' = mconcat (replicate n chars)
-
-{-# INLINE foldString #-}
-foldString :: forall a. (NFData a) => (Char -> a) -> String -> ()
-foldString f = foldr (deepseq . f) ()
+    go l s
+        | Exts.isTrue# ((2# Exts.*# l) Exts.># totalLen) =
+            Exts.copyMutableByteArray#
+                dest
+                destOff
+                dest
+                (destOff Exts.+# l)
+                (totalLen Exts.-# l)
+                s
+        | otherwise =
+            case Exts.copyMutableByteArray#
+                    dest
+                    destOff
+                    dest
+                    (destOff Exts.+# l)
+                    l
+                    s of
+                        s' -> go (2# Exts.*# l) s'
diff --git a/unicode-data/bench/Unicode/Char/Case/CompatBench.hs b/unicode-data/bench/Unicode/Char/Case/CompatBench.hs
@@ -2,32 +2,37 @@ module Unicode.Char.Case.CompatBench
     ( benchmarks
     ) where
 
-import Test.Tasty.Bench ( bgroup, Benchmark )
-
 import qualified Data.Char as Char
-import Unicode.Char.Bench (CharRange, Bench(..), bgroup')
+import Test.Tasty.Bench (Benchmark)
+
+import Unicode.Char.Bench (
+    Bench (..),
+    CharRange,
+    bgroupWithCharRange,
+    bgroupWithChars,
+ )
 import qualified Unicode.Char.Case.Compat as CC
 
 {-# NOINLINE benchmarks #-}
 benchmarks :: CharRange -> Benchmark
-benchmarks charRange = bgroup "Unicode.Char.Case.Compat"
-    [ bgroup' "isLower" charRange
+benchmarks r = bgroupWithCharRange "Unicode.Char.Case.Compat" r $ \chars ->
+    [ bgroupWithChars "isLower" chars
       [ Bench "base"         Char.isLower
       , Bench "unicode-data" CC.isLower
       ]
-    , bgroup' "isUpper" charRange
+    , bgroupWithChars "isUpper" chars
       [ Bench "base"         Char.isUpper
       , Bench "unicode-data" CC.isUpper
       ]
-    , bgroup' "toLower" charRange
+    , bgroupWithChars "toLower" chars
       [ Bench "base"         Char.toLower
       , Bench "unicode-data" CC.toLower
       ]
-    , bgroup' "toTitle" charRange
+    , bgroupWithChars "toTitle" chars
       [ Bench "base"         Char.toTitle
       , Bench "unicode-data" CC.toTitle
       ]
-    , bgroup' "toUpper" charRange
+    , bgroupWithChars "toUpper" chars
       [ Bench "base"         Char.toUpper
       , Bench "unicode-data" CC.toUpper
       ]