From 193daa9c81bb6f591cf35503ba4537e6c070c44f Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Thu, 13 Jun 2024 10:26:59 +0200 Subject: [PATCH] bench: Avoid lists and excessive inlining This should make the benches faster and more reliable to run. --- unicode-data/bench/Unicode/Char/Bench.hs | 166 +++++++++++++++++++---- unicode-data/unicode-data.cabal | 3 + 2 files changed, 143 insertions(+), 26 deletions(-) diff --git a/unicode-data/bench/Unicode/Char/Bench.hs b/unicode-data/bench/Unicode/Char/Bench.hs index 97b0947e..e4ce220d 100644 --- a/unicode-data/bench/Unicode/Char/Bench.hs +++ b/unicode-data/bench/Unicode/Char/Bench.hs @@ -1,4 +1,5 @@ {-# LANGUAGE LambdaCase #-} +{-# LANGUAGE CPP #-} module Unicode.Char.Bench ( Bench(..) @@ -8,23 +9,31 @@ module Unicode.Char.Bench , benchCharsNF ) where -import Control.DeepSeq (NFData, deepseq, force) -import Control.Exception (evaluate) -import Test.Tasty.Bench (Benchmark, bgroup, bench, bcompare, env, nf) -import Test.Tasty.Options - ( IsOption(defaultValue, optionHelp, optionName, parseValue) ) - +import Control.DeepSeq (NFData (..), deepseq) +import Control.Exception (evaluate, assert) +import Data.Char (ord) import qualified Data.Char as Char +import Foreign (Storable (..)) +import qualified GHC.Exts as Exts +import GHC.IO (IO (..)) +import Test.Tasty.Bench (Benchmark, bcompare, bench, bgroup, env, nf) +import Test.Tasty.Options ( + IsOption (defaultValue, optionHelp, optionName, parseValue), + ) +#if !MIN_VERSION_base(4,15,0) +import qualified GHC.Magic as Exts (noinline) +#endif + import qualified Unicode.Char.General as G --- | A unit benchmark -data Bench a = Bench - { _title :: !String -- ^ Name - , _func :: Char -> a -- ^ Function to benchmark - } +-------------------------------------------------------------------------------- +-- Char range +-------------------------------------------------------------------------------- +-- | Characters range data CharRange = CharRange !Char !Char +-- | Characters range configurable from CLI instance IsOption CharRange where defaultValue = CharRange minBound maxBound parseValue = \case @@ -38,8 +47,19 @@ instance IsOption CharRange where optionName = pure "chars" optionHelp = pure "Range of chars to test" +-------------------------------------------------------------------------------- +-- Characters benchmark +-------------------------------------------------------------------------------- + +-- | A unit benchmark +data Bench a = Bench + { _title :: !String -- ^ Name + , _func :: Char -> a -- ^ Function to benchmark + } + +-- | Helper to compare benchmarks of function from this package to ones in base. {-# INLINE bgroup' #-} -bgroup' :: NFData a => String -> CharRange -> [Bench a] -> Benchmark +bgroup' :: (NFData a) => String -> CharRange -> [Bench a] -> Benchmark bgroup' groupTitle charRange bs = bgroup groupTitle [ benchChars' title f | Bench title f <- bs @@ -55,6 +75,7 @@ bgroup' groupTitle charRange bs = bgroup groupTitle bcompare' ref = bcompare (mconcat ["$NF == \"", ref, "\" && $(NF-1) == \"", groupTitle, "\""]) +-- | Helper to bench a char function on a filtered char range {-# INLINE benchChars #-} benchChars :: (NFData a) @@ -67,6 +88,13 @@ benchChars t charRange = benchCharsNF t charRange isValid -- Filter out: Surrogates, Private Use Areas and unsassigned code points isValid c = G.generalCategory c < G.Surrogate +-- | Pinned array of characters +data Chars = Chars !Exts.ByteArray# !Int + +instance NFData Chars where + rnf (Chars !_ !_) = () + +-- | Helper that handle the creation of the pinned chars array and the loop over it {-# INLINE benchCharsNF #-} benchCharsNF :: forall a. (NFData a) @@ -75,18 +103,104 @@ benchCharsNF -> (Char -> Bool) -> (Char -> a) -> Benchmark -benchCharsNF t charRange isValid f = - -- Avoid side-effects with garbage collection (see tasty-bench doc) - env - (evaluate (force chars')) -- initialize - (bench t . nf (foldString f)) -- benchmark +benchCharsNF title charRange isValid f = + -- Avoid side-effects with garbage collection (see tasty-bench doc for env). + -- We use pinned ByteArray# instead of lists to avoid that GC kicks in. + env (initialize isValid charRange >>= evaluate) (bench title . nf go) + where + -- Loop over the pinned char array. The loop itself does not allocate. + go (Chars cs len) = foldr + (\(Exts.I# k) -> + let c = Exts.indexWideCharArray# cs (k Exts.-# 1#) + -- `inline` is necessary to avoid excessive inlining, resulting + -- in benchmarking empty loop iterations, i.e. not the function. + -- We could use `inline` with more care at call site, but then we + -- would have to test the functions one by one and everytime we + -- modify them. Using it here is a hammer but more secure and + -- maintainable. + -- Note that we may improve this by controling the inlining for each + -- phase. + in deepseq (Exts.noinline f (Exts.C# c))) + () + [1..len] + +-- | Create a byte array of the chars to bench +initialize :: (Char -> Bool) -> CharRange -> IO Chars +initialize isValid charRange = IO $ \s1 -> + case Exts.newPinnedByteArray# initialLength s1 of { (# s2, ma #) -> + -- Write the filtered char range + case writeChars isValid ma 0# s2 start end of { (# s3, filteredCount #) -> + -- Duplicate to get enough chars to bench + case tile ma 0# finalLength filteredLength s3 of { s4 -> + case Exts.unsafeFreezeByteArray# ma s4 of { (# s5, a #) -> + (# s5, Chars a (Exts.I# (replications Exts.*# filteredCount)) #) + }} + where + -- Ensure to have enough chars + replications = case Exts.quotInt# targetCharsCount filteredCount of + 0# -> 1# + r# -> r# + filteredLength = filteredCount Exts.*# wcharSize + finalLength = filteredLength Exts.*# replications + }} + where + targetCharsCount = 0x10FFFF# + !(CharRange start end) = assert + (ord end - ord start + 1 < Exts.I# targetCharsCount) + charRange + !initialLength = targetCharsCount Exts.*# wcharSize + !(Exts.I# wcharSize) = sizeOf 'x' + +-- | Write a range of chars that match the given predicate +writeChars :: + (Char -> Bool) -> + Exts.MutableByteArray# d -> + Exts.Int# -> + Exts.State# d -> + Char -> + Char -> + (# Exts.State# d, Exts.Int# #) +writeChars isValid ma = go + where + go i s c1@(Exts.C# c1#) !c2 = if c1 < c2 + then go i' s' (succ c1) c2 + else (# s', i' #) + where + !(# s', i' #) = if isValid c1 + then (# Exts.writeWideCharArray# ma i c1# s, i Exts.+# 1# #) + else (# s, i #) + +-- | Duplicate a portion of an array +-- +-- Adapted from Data.Text.Array.tile +tile :: + -- | Mutable array + Exts.MutableByteArray# s -> + -- | Start of the portion to duplicate + Exts.Int# -> + -- | Total length of the duplicate + Exts.Int# -> + -- | Length of the portion to duplicate + Exts.Int# -> + Exts.State# s -> + Exts.State# s +tile dest destOff totalLen = go where - CharRange l u = charRange - chars = filter isValid [l..u] - -- Ensure to have sufficiently chars - n = 0x10FFFF `div` length chars - chars' = mconcat (replicate n chars) - -{-# INLINE foldString #-} -foldString :: forall a. (NFData a) => (Char -> a) -> String -> () -foldString f = foldr (deepseq . f) () + go l s + | Exts.isTrue# ((2# Exts.*# l) Exts.># totalLen) = + Exts.copyMutableByteArray# + dest + destOff + dest + (destOff Exts.+# l) + (totalLen Exts.-# l) + s + | otherwise = + case Exts.copyMutableByteArray# + dest + destOff + dest + (destOff Exts.+# l) + l + s of + s' -> go (2# Exts.*# l) s' diff --git a/unicode-data/unicode-data.cabal b/unicode-data/unicode-data.cabal index 348ea65d..7d5aa957 100644 --- a/unicode-data/unicode-data.cabal +++ b/unicode-data/unicode-data.cabal @@ -148,6 +148,9 @@ benchmark bench tasty-bench >= 0.2.5 && < 0.4, tasty >= 1.4.1 && < 1.6, unicode-data + if impl(ghc < 9.0) + -- Required for noinline + build-depends: ghc-prim -- [NOTE] Recommendation of tasty-bench to reduce garbage collection noisiness ghc-options: -O2 -fdicts-strict -rtsopts -with-rtsopts=-A32m -- [NOTE] Recommendation of tasty-bench for comparison against baseline