From c78033e966d1ca0d67e6fde8c5ade3d173c80781 Mon Sep 17 00:00:00 2001 From: Adithya Kumar Date: Thu, 21 Sep 2023 19:53:21 +0530 Subject: [PATCH] Add the initial bench-suite benchmarking Int list & Text --- benchmark/Main.hs | 194 +++++++++++++++++++++++++++++ streamly-serialize-instances.cabal | 46 ++++++- 2 files changed, 238 insertions(+), 2 deletions(-) create mode 100644 benchmark/Main.hs diff --git a/benchmark/Main.hs b/benchmark/Main.hs new file mode 100644 index 0000000..c5b1523 --- /dev/null +++ b/benchmark/Main.hs @@ -0,0 +1,194 @@ +-- | +-- Copyright : (c) 2022 Composewell technologies +-- License : Apache-2.0 +-- Maintainer : streamly@composewell.com +-- Stability : experimental +-- Portability : GHC + +module Main (main) where + +------------------------------------------------------------------------------- +-- Imports +------------------------------------------------------------------------------- + +import Control.Monad (when) +import Data.Function ((&)) +import Control.DeepSeq (NFData(..), deepseq) +import System.Random (randomRIO) +import Test.QuickCheck (arbitrary) +import Streamly.Data.Serialize.Instances () +import Control.DeepSeq (force) +import Test.QuickCheck (Gen, generate) +import Streamly.Internal.Data.Unbox (newBytes, MutableByteArray) +import Streamly.Internal.Data.Serialize hiding (encode) + +import qualified Streamly.Data.Stream as Stream +import qualified Data.Text as TextS +import qualified Data.Text.Lazy as TextL + +import Test.Tasty.Bench + +------------------------------------------------------------------------------- +-- Size helpers +------------------------------------------------------------------------------- + +{-# INLINE getSize #-} +getSize :: forall a. Serialize a => a -> Int +getSize = size 0 + +------------------------------------------------------------------------------- +-- Common helpers +------------------------------------------------------------------------------- + +-- Parts of "f" that are dependent on val will not be optimized out. +{-# INLINE loop #-} +loop :: Int -> (a -> IO b) -> a -> IO () +loop count f val = go count val + where + + go n x = do + if n > 0 + then f x >> go (n-1) x + else return () + +-- The first arg of "f" is the environment which is not threaded around in the +-- loop. +{-# INLINE loopWith #-} +loopWith :: Int -> (env -> a -> IO b) -> env -> a -> IO () +loopWith count f e val = go count val + where + + go n x = do + if n > 0 + then f e x >> go (n-1) x + else return () + +benchSink :: NFData b => String -> Int -> (Int -> IO b) -> Benchmark +benchSink name times f = bench name (nfIO (randomRIO (times, times) >>= f)) + +------------------------------------------------------------------------------- +-- Types +------------------------------------------------------------------------------- + +{-# INLINE poke #-} +poke :: Serialize a => MutableByteArray -> a -> IO () +poke arr val = serialize 0 arr val >> return () + +{-# INLINE pokeTimes #-} +pokeTimes :: Serialize a => a -> Int -> IO () +pokeTimes val times = do + let n = getSize val + arr <- newBytes n + loopWith times poke arr val + +{-# INLINE encode #-} +encode :: Serialize a => a -> IO () +encode val = do + let n = getSize val + arr <- newBytes n + serialize 0 arr val >> return () + +{-# INLINE encodeTimes #-} +encodeTimes :: Serialize a => a -> Int -> IO () +encodeTimes val times = loop times encode val + +{-# INLINE peek #-} +peek :: forall a. (NFData a, Serialize a) => + (a, Int) -> MutableByteArray -> IO () +peek (_val, n) arr = do + (_, val1 :: a) <- deserialize 0 arr n + -- If the datatype is not deeply strict or deepseq is not used then use + -- Equality. + -- Ensure that we are actually constructing the type and using it. This + -- is important, otherwise the structure is created and discarded, the + -- cost of creation of the structure is not accounted. Otherwise we may + -- just read the values and discard them. The comparison adds to the + -- cost though. We could use deepseq but then we need to write + -- instances of NFData and ensure that they are correct and perform + -- well. Equality check also ensures correctness. + {- + if (val1 /= val) + then error "peek: no match" + else return () + -} + val1 `deepseq` return () + +{-# INLINE peekTimes #-} +peekTimes :: (NFData a, Serialize a) => Int -> a -> Int -> IO () +peekTimes n val times = do + arr <- newBytes n + _ <- serialize 0 arr val + loopWith times peek (val, n) arr + +{-# INLINE roundtrip #-} +roundtrip :: forall a. (NFData a, Serialize a) => a -> IO () +roundtrip val = do + let n = getSize val + arr <- newBytes n + _ <- serialize 0 arr val + (_, val1 :: a) <- deserialize 0 arr n + -- Do not remove this or use deepseq, see the comments in peek. + {- + if (val1 /= val) + then error "roundtrip: no match" + else return () + -} + -- Note: deepseq is not needed if the datatype is strict + val1 `deepseq` return () + +{-# INLINE roundtripTimes #-} +roundtripTimes :: (NFData a, Serialize a) => a -> Int -> IO () +roundtripTimes val times = loop times roundtrip val + +-------------------------------------------------------------------------------- +-- Benchmarks helpers +-------------------------------------------------------------------------------- + +bencher + :: (NFData b, Serialize b) + => String + -> b + -> Int + -> Benchmark +bencher gname val times = + bgroup gname + [ benchSink "poke" times (pokeTimes val) + , benchSink "encode" times (encodeTimes val) + , let !n = getSize val + in benchSink "peek" times (peekTimes n val) + , benchSink "roundtrip" times (roundtripTimes val) + ] + +-------------------------------------------------------------------------------- +-- Main +-------------------------------------------------------------------------------- + +main :: IO () +main = do + -- Environment + let !intList = force ([1 .. 1000] :: [Int]) + !strictText <- genStrictText 1000 + !lazyText <- do + testSList <- Stream.replicateM 20 (genStrictText 50) & Stream.toList + pure $ force $ TextL.fromChunks testSList + + -- Asserts + when (not (TextS.length strictText == 1000)) + (error "TextS.length strictText == 1000") + when (not (TextL.length lazyText == 1000)) + (error "TextL.length lazyText == 1000") + + -- Benchmarks + defaultMain + [ bencher "[Int]" intList 100 + , bencher "Strict.Text" strictText 100 + , bencher "Lazy.Text" lazyText 100 + ] + + where + + genStrictText n = do + let genChar = generate (arbitrary :: Gen Char) + Stream.replicateM n genChar + & Stream.toList + & fmap (force . TextS.pack) diff --git a/streamly-serialize-instances.cabal b/streamly-serialize-instances.cabal index 3bdd048..124b5d5 100644 --- a/streamly-serialize-instances.cabal +++ b/streamly-serialize-instances.cabal @@ -59,6 +59,12 @@ common warnings -- orphan instances. ghc-options: -Wall -Werror -fno-warn-orphans +common optimization-options + ghc-options: -O2 + -fdicts-strict + -fspec-constr-recursive=16 + -fmax-worker-args=16 + common default-extensions default-extensions: BangPatterns @@ -123,7 +129,7 @@ library -- Base language which the package is written in. default-language: Haskell2010 -test-suite main +test-suite serialization-tests -- Import common warning flags. import: warnings , default-extensions @@ -156,4 +162,40 @@ test-suite main , hspec , QuickCheck , time - , text \ No newline at end of file + , text + +benchmark serialization-benchmarks + -- Import common warning flags. + import: warnings + , default-extensions + , optimization-options + + -- Base language which the package is written in. + default-language: Haskell2010 + + -- Modules included in this executable, other than Main. + -- other-modules: + + -- LANGUAGE extensions used by modules in this package. + -- other-extensions: + + -- The interface type and version of the test suite. + type: exitcode-stdio-1.0 + + -- Directories containing source files. + hs-source-dirs: benchmark + + -- The entrypoint to the test suite. + main-is: Main.hs + + -- Test dependencies. + build-depends: + base + , streamly-core + , streamly-serialize-instances + , random + , deepseq >= 1.4.1 && < 1.5 + , tasty-bench >= 0.3 && < 0.4 + , tasty >= 1.4.1 && < 1.5 + , text + , QuickCheck