From b2f6356ac4ed3af97d6faa7d5cbe1d44a56375ae Mon Sep 17 00:00:00 2001 From: Adithya Kumar Date: Tue, 13 Jun 2023 14:43:44 +0530 Subject: [PATCH] Add a typeclass for serializing variable length types --- core/src/Streamly/Internal/Data/Serialize.hs | 152 +++++++++++++++++++ core/streamly-core.cabal | 2 + test/Streamly/Test/Data/Unbox.hs | 30 ++++ 3 files changed, 184 insertions(+) create mode 100644 core/src/Streamly/Internal/Data/Serialize.hs diff --git a/core/src/Streamly/Internal/Data/Serialize.hs b/core/src/Streamly/Internal/Data/Serialize.hs new file mode 100644 index 0000000000..df8cf138ec --- /dev/null +++ b/core/src/Streamly/Internal/Data/Serialize.hs @@ -0,0 +1,152 @@ +module Streamly.Internal.Data.Serialize + ( Size(..) + , Serialize(..) + ) where + +-------------------------------------------------------------------------------- +-- Imports +-------------------------------------------------------------------------------- + +import Control.Monad (void) +import Data.List (foldl') +import Data.Proxy (Proxy (..)) +import Streamly.Internal.Data.Unboxed (MutableByteArray(..)) + +import GHC.Int (Int16(..), Int32(..), Int64(..), Int8(..)) +import GHC.Word (Word16(..), Word32(..), Word64(..), Word8(..)) +import GHC.Stable (StablePtr(..)) + +import qualified Streamly.Internal.Data.Unboxed as Unbox + +import GHC.Exts + +-------------------------------------------------------------------------------- +-- Types +-------------------------------------------------------------------------------- + +-- | Info about the length of a serializable type. Length can depend on the +-- value or can be independent. +data Size a + = VarSize (a -> Int) + | ConstSize !Int + +-- | A type implementing the 'Serialize' interface supplies operations for +-- reading and writing the type from and to a mutable byte array (an unboxed +-- representation of the type) in memory. The read operation 'deserialize' +-- deserializes the boxed type from the mutable byte array. The write operation +-- 'serialize' serializes the boxed type to the mutable byte array. +-- +-- 'Serialize' contains enough information to serialize and deserialize variable +-- length types. +-- +-- >>> import Streamly.Internal.Data.Serialize (Serialize(..), Size(..)) +-- +-- >>> :{ +-- data Object = Object +-- { _varLen :: [Int] +-- , _constLen :: Int +-- } +-- :} +-- +-- >>> :{ +-- instance Serialize Object where +-- size = +-- case (size :: Size [Int], size :: Size Int) of +-- (VarSize f, ConstSize g) -> +-- VarSize $ \obj -> +-- f (_varLen obj) + g +-- _ -> error "size is not defined properly" +-- deserialize i arr = do +-- (i1, x0) <- deserialize i arr +-- (i2, x1) <- deserialize i1 arr +-- pure (i2, Object x0 x1) +-- serialize i arr (Object x0 x1) = do +-- i1 <- serialize i arr x0 +-- i2 <- serialize i1 arr x1 +-- pure i2 +-- :} +-- +class Serialize a where + -- | Get the 'Size', in bytes, reqired to store the serialized + -- representation of the type. Size cannot be zero. + size :: Size a + + -- We can implement the following functions without returning the `Int` + -- offset but that may require traversing the Haskell structure again to get + -- the size. Therefore, this is a performance optimization. + + -- | Deserialize a value from the given byte-index in the array. Returns a + -- tuple of the next byte-index and the deserialized value. + deserialize :: Int -> MutableByteArray -> IO (Int, a) + + -- | Write the serialized representation of the value in the array at the + -- given byte-index. Returns the next byte-index. + serialize :: Int -> MutableByteArray -> a -> IO Int + +-------------------------------------------------------------------------------- +-- Instances +-------------------------------------------------------------------------------- + +#define DERIVE_SERIALIZE_FROM_UNBOX(_type) \ +instance Serialize _type where \ +; size = ConstSize $ Unbox.sizeOf (Proxy :: Proxy _type) \ +; deserialize off arr = \ + Unbox.peekByteIndex off arr >>= \ + \val -> let sz = Unbox.sizeOf (Proxy :: Proxy _type) \ + in pure (off + sz, val) \ +; serialize off arr val = \ + Unbox.pokeByteIndex off arr val \ + >> let sz = Unbox.sizeOf (Proxy :: Proxy _type) \ + in pure (off + sz) + +DERIVE_SERIALIZE_FROM_UNBOX(Char) +DERIVE_SERIALIZE_FROM_UNBOX(Int8) +DERIVE_SERIALIZE_FROM_UNBOX(Int16) +DERIVE_SERIALIZE_FROM_UNBOX(Int32) +DERIVE_SERIALIZE_FROM_UNBOX(Int) +DERIVE_SERIALIZE_FROM_UNBOX(Int64) +DERIVE_SERIALIZE_FROM_UNBOX(Word) +DERIVE_SERIALIZE_FROM_UNBOX(Word8) +DERIVE_SERIALIZE_FROM_UNBOX(Word16) +DERIVE_SERIALIZE_FROM_UNBOX(Word32) +DERIVE_SERIALIZE_FROM_UNBOX(Word64) +DERIVE_SERIALIZE_FROM_UNBOX(Double) +DERIVE_SERIALIZE_FROM_UNBOX(Float) +DERIVE_SERIALIZE_FROM_UNBOX((StablePtr a)) +DERIVE_SERIALIZE_FROM_UNBOX((Ptr a)) +DERIVE_SERIALIZE_FROM_UNBOX((FunPtr a)) + +instance forall a. Serialize a => Serialize [a] where + + {-# INLINE size #-} + size = VarSize $ \lst -> + case size :: Size a of + VarSize f -> + foldl' + (\acc x -> acc + f x) + (Unbox.sizeOf (Proxy :: Proxy Int)) + lst + ConstSize sz -> + length lst + * sz + + Unbox.sizeOf (Proxy :: Proxy Int) + + {-# INLINE deserialize #-} + deserialize off arr = do + len <- Unbox.peekByteIndex off arr :: IO Int + let off1 = off + Unbox.sizeOf (Proxy :: Proxy Int) + let peekList buf o 0 = pure (o, reverse buf) + peekList buf o i = do + (o1, x) <- deserialize o arr + peekList (x:buf) o1 (i - 1) + peekList [] off1 len + + {-# INLINE serialize #-} + serialize off arr val = do + void $ serialize off arr (length val) + let off1 = off + Unbox.sizeOf (Proxy :: Proxy Int) + let pokeList o [] = pure o + pokeList o (x:xs) = do + o1 <- serialize o arr x + pokeList o1 xs + pokeList off1 val diff --git a/core/streamly-core.cabal b/core/streamly-core.cabal index 0d442a7ecc..61b906ea30 100644 --- a/core/streamly-core.cabal +++ b/core/streamly-core.cabal @@ -302,6 +302,8 @@ library -- streamly-core-array-types , Streamly.Internal.Data.Unbox + , Streamly.Internal.Data.Serialize + -- Unboxed IORef , Streamly.Internal.Data.IORef.Unboxed -- May depend on streamly-core-stream diff --git a/test/Streamly/Test/Data/Unbox.hs b/test/Streamly/Test/Data/Unbox.hs index 507312780d..b43187a8af 100644 --- a/test/Streamly/Test/Data/Unbox.hs +++ b/test/Streamly/Test/Data/Unbox.hs @@ -40,6 +40,8 @@ import Streamly.Internal.Data.Unbox import Test.Hspec as H +import qualified Streamly.Internal.Data.Serialize as Serialize + -------------------------------------------------------------------------------- -- Types -------------------------------------------------------------------------------- @@ -146,6 +148,27 @@ testGenericConsistency val = do checkSizeOf :: forall a. Unbox a => Proxy a -> Int -> IO () checkSizeOf _ size = sizeOf (Proxy :: Proxy a) `shouldBe` size +testSerializeList + :: forall a. (Eq a, Show a, Serialize.Serialize a) + => Int + -> a + -> IO () +testSerializeList sizeOfA val = do + + let sz = + case Serialize.size :: Serialize.Size a of + Serialize.VarSize f -> f val + Serialize.ConstSize csz -> csz + + sz `shouldBe` sizeOfA + + arr <- newUnpinnedBytes sz + + off1 <- Serialize.serialize 0 arr val + (off2, val2) <- Serialize.deserialize 0 arr + val2 `shouldBe` val + off2 `shouldBe` off1 + -------------------------------------------------------------------------------- -- CPP helpers -------------------------------------------------------------------------------- @@ -203,6 +226,13 @@ testCases = do it "GenericConsistency (Identity Int)" $ testGenericConsistency (Identity 56760 :: Identity Int) + it "Serialize [Int]" + $ testSerializeList (8 + 4 * 8) ([1, 2, 3, 4] :: [Int]) + it "Serialize [[Int]]" + $ testSerializeList + (8 + 3 * 8 + 6 * 8) + ([[1], [1, 2], [1, 2, 3]] :: [[Int]]) + -- Fingerprint does not work for GHC 8.6.5 -- it "Fingerprint" $ testSerialization (Fingerprint 123456 876588) -- it "GenericConsistency Fingerprint"