Skip to content

Commit

Permalink
bench: Avoid lists and excessive inlining
Browse files Browse the repository at this point in the history
This should make the benches faster and more reliable to run.
  • Loading branch information
wismill committed Jun 13, 2024
1 parent dfb502e commit 193daa9
Show file tree
Hide file tree
Showing 2 changed files with 143 additions and 26 deletions.
166 changes: 140 additions & 26 deletions unicode-data/bench/Unicode/Char/Bench.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE CPP #-}

module Unicode.Char.Bench
( Bench(..)
Expand All @@ -8,23 +9,31 @@ module Unicode.Char.Bench
, benchCharsNF
) where

import Control.DeepSeq (NFData, deepseq, force)
import Control.Exception (evaluate)
import Test.Tasty.Bench (Benchmark, bgroup, bench, bcompare, env, nf)
import Test.Tasty.Options
( IsOption(defaultValue, optionHelp, optionName, parseValue) )

import Control.DeepSeq (NFData (..), deepseq)
import Control.Exception (evaluate, assert)
import Data.Char (ord)
import qualified Data.Char as Char
import Foreign (Storable (..))
import qualified GHC.Exts as Exts
import GHC.IO (IO (..))
import Test.Tasty.Bench (Benchmark, bcompare, bench, bgroup, env, nf)
import Test.Tasty.Options (
IsOption (defaultValue, optionHelp, optionName, parseValue),
)
#if !MIN_VERSION_base(4,15,0)
import qualified GHC.Magic as Exts (noinline)
#endif

import qualified Unicode.Char.General as G

-- | A unit benchmark
data Bench a = Bench
{ _title :: !String -- ^ Name
, _func :: Char -> a -- ^ Function to benchmark
}
--------------------------------------------------------------------------------
-- Char range
--------------------------------------------------------------------------------

-- | Characters range
data CharRange = CharRange !Char !Char

-- | Characters range configurable from CLI
instance IsOption CharRange where
defaultValue = CharRange minBound maxBound
parseValue = \case
Expand All @@ -38,8 +47,19 @@ instance IsOption CharRange where
optionName = pure "chars"
optionHelp = pure "Range of chars to test"

--------------------------------------------------------------------------------
-- Characters benchmark
--------------------------------------------------------------------------------

-- | A unit benchmark
data Bench a = Bench
{ _title :: !String -- ^ Name
, _func :: Char -> a -- ^ Function to benchmark
}

-- | Helper to compare benchmarks of function from this package to ones in base.
{-# INLINE bgroup' #-}
bgroup' :: NFData a => String -> CharRange -> [Bench a] -> Benchmark
bgroup' :: (NFData a) => String -> CharRange -> [Bench a] -> Benchmark
bgroup' groupTitle charRange bs = bgroup groupTitle
[ benchChars' title f
| Bench title f <- bs
Expand All @@ -55,6 +75,7 @@ bgroup' groupTitle charRange bs = bgroup groupTitle
bcompare' ref = bcompare
(mconcat ["$NF == \"", ref, "\" && $(NF-1) == \"", groupTitle, "\""])

-- | Helper to bench a char function on a filtered char range
{-# INLINE benchChars #-}
benchChars
:: (NFData a)
Expand All @@ -67,6 +88,13 @@ benchChars t charRange = benchCharsNF t charRange isValid
-- Filter out: Surrogates, Private Use Areas and unsassigned code points
isValid c = G.generalCategory c < G.Surrogate

-- | Pinned array of characters
data Chars = Chars !Exts.ByteArray# !Int

instance NFData Chars where
rnf (Chars !_ !_) = ()

-- | Helper that handle the creation of the pinned chars array and the loop over it
{-# INLINE benchCharsNF #-}
benchCharsNF
:: forall a. (NFData a)
Expand All @@ -75,18 +103,104 @@ benchCharsNF
-> (Char -> Bool)
-> (Char -> a)
-> Benchmark
benchCharsNF t charRange isValid f =
-- Avoid side-effects with garbage collection (see tasty-bench doc)
env
(evaluate (force chars')) -- initialize
(bench t . nf (foldString f)) -- benchmark
benchCharsNF title charRange isValid f =
-- Avoid side-effects with garbage collection (see tasty-bench doc for env).
-- We use pinned ByteArray# instead of lists to avoid that GC kicks in.
env (initialize isValid charRange >>= evaluate) (bench title . nf go)
where
-- Loop over the pinned char array. The loop itself does not allocate.
go (Chars cs len) = foldr
(\(Exts.I# k) ->
let c = Exts.indexWideCharArray# cs (k Exts.-# 1#)
-- `inline` is necessary to avoid excessive inlining, resulting
-- in benchmarking empty loop iterations, i.e. not the function.
-- We could use `inline` with more care at call site, but then we
-- would have to test the functions one by one and everytime we
-- modify them. Using it here is a hammer but more secure and
-- maintainable.
-- Note that we may improve this by controling the inlining for each
-- phase.
in deepseq (Exts.noinline f (Exts.C# c)))
()
[1..len]

-- | Create a byte array of the chars to bench
initialize :: (Char -> Bool) -> CharRange -> IO Chars
initialize isValid charRange = IO $ \s1 ->
case Exts.newPinnedByteArray# initialLength s1 of { (# s2, ma #) ->
-- Write the filtered char range
case writeChars isValid ma 0# s2 start end of { (# s3, filteredCount #) ->
-- Duplicate to get enough chars to bench
case tile ma 0# finalLength filteredLength s3 of { s4 ->
case Exts.unsafeFreezeByteArray# ma s4 of { (# s5, a #) ->
(# s5, Chars a (Exts.I# (replications Exts.*# filteredCount)) #)
}}
where
-- Ensure to have enough chars
replications = case Exts.quotInt# targetCharsCount filteredCount of
0# -> 1#
r# -> r#
filteredLength = filteredCount Exts.*# wcharSize
finalLength = filteredLength Exts.*# replications
}}
where
targetCharsCount = 0x10FFFF#
!(CharRange start end) = assert
(ord end - ord start + 1 < Exts.I# targetCharsCount)
charRange
!initialLength = targetCharsCount Exts.*# wcharSize
!(Exts.I# wcharSize) = sizeOf 'x'

-- | Write a range of chars that match the given predicate
writeChars ::
(Char -> Bool) ->
Exts.MutableByteArray# d ->
Exts.Int# ->
Exts.State# d ->
Char ->
Char ->
(# Exts.State# d, Exts.Int# #)
writeChars isValid ma = go
where
go i s c1@(Exts.C# c1#) !c2 = if c1 < c2
then go i' s' (succ c1) c2
else (# s', i' #)
where
!(# s', i' #) = if isValid c1
then (# Exts.writeWideCharArray# ma i c1# s, i Exts.+# 1# #)
else (# s, i #)

-- | Duplicate a portion of an array
--
-- Adapted from Data.Text.Array.tile
tile ::
-- | Mutable array
Exts.MutableByteArray# s ->
-- | Start of the portion to duplicate
Exts.Int# ->
-- | Total length of the duplicate
Exts.Int# ->
-- | Length of the portion to duplicate
Exts.Int# ->
Exts.State# s ->
Exts.State# s
tile dest destOff totalLen = go
where
CharRange l u = charRange
chars = filter isValid [l..u]
-- Ensure to have sufficiently chars
n = 0x10FFFF `div` length chars
chars' = mconcat (replicate n chars)

{-# INLINE foldString #-}
foldString :: forall a. (NFData a) => (Char -> a) -> String -> ()
foldString f = foldr (deepseq . f) ()
go l s
| Exts.isTrue# ((2# Exts.*# l) Exts.># totalLen) =
Exts.copyMutableByteArray#
dest
destOff
dest
(destOff Exts.+# l)
(totalLen Exts.-# l)
s
| otherwise =
case Exts.copyMutableByteArray#
dest
destOff
dest
(destOff Exts.+# l)
l
s of
s' -> go (2# Exts.*# l) s'
3 changes: 3 additions & 0 deletions unicode-data/unicode-data.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ benchmark bench
tasty-bench >= 0.2.5 && < 0.4,
tasty >= 1.4.1 && < 1.6,
unicode-data
if impl(ghc < 9.0)
-- Required for noinline
build-depends: ghc-prim
-- [NOTE] Recommendation of tasty-bench to reduce garbage collection noisiness
ghc-options: -O2 -fdicts-strict -rtsopts -with-rtsopts=-A32m
-- [NOTE] Recommendation of tasty-bench for comparison against baseline
Expand Down

0 comments on commit 193daa9

Please sign in to comment.