Skip to content

Commit

Permalink
ucd2haskell: Better imports
Browse files Browse the repository at this point in the history
  • Loading branch information
wismill committed Jun 21, 2024
1 parent 899c222 commit be9ce9c
Show file tree
Hide file tree
Showing 21 changed files with 438 additions and 299 deletions.
400 changes: 275 additions & 125 deletions ucd2haskell/exe/UCD2Haskell/Generator.hs

Large diffs are not rendered by default.

23 changes: 10 additions & 13 deletions ucd2haskell/exe/UCD2Haskell/Modules/Properties.hs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@ import qualified Unicode.CharacterDatabase.Parser.Properties.Multiple as Props
import UCD2Haskell.Common (Fold (..))
import UCD2Haskell.Generator (
FileRecipe (..),
ShamochuCode (..),
apacheLicense,
genBitmapShamochu,
mkImports,
unlinesBB,
)

Expand Down Expand Up @@ -78,20 +80,21 @@ genCorePropertiesModule moduleName isProp = Fold step initial done
Nothing -> Just xs
Just ys -> Just (xs <> ys)

done Acc{..} = unlinesBB (header properties <> genBitmaps values properties)
done Acc{..} = header imports properties <> code
where
ShamochuCode{..} = genBitmaps values properties

genBitmaps values = foldr addBitMap mempty
genBitmaps values = foldMap addBitMap
where
addBitMap property =
(:)
(genBitmapShamochu
genBitmapShamochu
(prop2FuncNameStr property)
(5 NE.:| [6, 7])
-- [2,3,4,5,6]
[]
(IntSet.toAscList (values Map.! property)))
(IntSet.toAscList (values Map.! property))

header exports =
header imports exports = unlinesBB
[ apacheLicense 2020 moduleName
, "{-# OPTIONS_HADDOCK hide #-}"
, "{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}"
Expand All @@ -102,11 +105,5 @@ genCorePropertiesModule moduleName isProp = Fold step initial done
<> mconcat (L.intersperse "\n , " (map prop2FuncName exports))
, " ) where"
, ""
, "import Data.Bits (Bits(..))"
, "import Data.Char (ord)"
, "import Data.Int (Int8)"
, "import Data.Word (Word8, Word16)"
, "import GHC.Exts (Ptr(..))"
, "import Unicode.Internal.Bits (lookupBit, lookupWord16AsInt, lookupWord8AsInt)"
, ""
, mkImports imports
]
72 changes: 39 additions & 33 deletions ucd2haskell/exe/UCD2Haskell/Modules/Scripts.hs
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,14 @@ import UCD2Haskell.Common (
import UCD2Haskell.Generator (
BitmapType (..),
FileRecipe (..),
ShamochuCode (..),
apacheLicense,
generateShamochuBitmaps,
mkImports',
toLookupBitMapName,
unlinesBB,
word32ToWord8s,
(<+>),
)

recipe :: PropertyValuesAliases -> FileRecipe Prop.Entry
Expand All @@ -56,6 +59,13 @@ genScriptsModule moduleName aliases = Fold step mempty done
done ranges =
let scripts = Set.toList
(foldr addScript (Set.singleton Defaults.defaultScript) ranges)
ShamochuCode{..} = if length scripts <= 0xff
then mkCharScripts scripts ranges
else error "Cannot encode scripts"
imports' = imports <+> Map.fromList
[ ( "GHC.Exts"
, Set.fromList ["Addr#", "Int(..)", "nullAddr#"] )
, ( "Data.Ix", Set.singleton "Ix" )]
in unlinesBB
[ "{-# LANGUAGE PatternSynonyms #-}"
, "{-# OPTIONS_HADDOCK hide #-}"
Expand All @@ -71,13 +81,7 @@ genScriptsModule moduleName aliases = Fold step mempty done
, " , pattern ScriptCharMaskComplement )"
, "where"
, ""
, "import Data.Char (ord)"
, "import Data.Int (Int8)"
, "import Data.Ix (Ix)"
, "import Data.Word (Word16)"
, "import GHC.Exts (Addr#, Int#, Int(..), Ptr(..), nullAddr#, andI#, iShiftL#, iShiftRL#, (+#), (-#))"
, "import Unicode.Internal.Bits.Scripts (lookupWord8AsInt#, lookupWord16AsInt#)"
, ""
, mkImports' "Scripts" imports'
, "-- | Unicode [script](https://www.unicode.org/reports/tr24/)."
, "--"
, "-- The constructors descriptions are the original Unicode values"
Expand Down Expand Up @@ -127,9 +131,7 @@ genScriptsModule moduleName aliases = Fold step mempty done
, "-- | Script of a character."
, "--"
, "-- @since 0.1.0"
, if length scripts <= 0xff
then mkCharScripts scripts ranges
else error "Cannot encode scripts"
, code
, ""
]

Expand Down Expand Up @@ -297,7 +299,7 @@ genScriptsModule moduleName aliases = Fold step mempty done
encodeBytes = foldr addByte "" . word32ToWord8s
addByte n acc = BB.char7 '\\' <> BB.word8Dec n <> acc

mkCharScripts :: [BS.ShortByteString] -> [Prop.Entry] -> BB.Builder
mkCharScripts :: [BS.ShortByteString] -> [Prop.Entry] -> ShamochuCode
mkCharScripts scripts scriptsRanges =
let charScripts = L.sort (foldMap (rangeToCharScripts getScript) scriptsRanges)
charScripts' = reverse (fst (foldl' addMissing (mempty, '\0') charScripts))
Expand Down Expand Up @@ -325,28 +327,32 @@ genScriptsModule moduleName aliases = Fold step mempty done
assert (fromEnum (length scripts) < 0xff)
(fromIntegral . fromEnum)
bitmap0To1 = "scriptPlanes0To1"
in mconcat
[ "{-# INLINE script #-}\n"
, "script :: Char -> Int#\n"
, "script c\n"
, " -- Planes 0-1\n"
, " | cp < 0x", showPaddedHeXB boundPlanes0To1
, " = ", toLookupBitMapName bitmap0To1, " cp#\n"
, mkScriptsBounds def (scripts !!) otherPlanes
, " -- Default: ", BB.shortByteString Defaults.defaultScript, "\n"
, " | otherwise = ", BB.intDec def, "#\n"
, " where\n"
, " !cp@(I# cp#) = ord c\n"
, "\n"
, generateShamochuBitmaps
bitmap0To1
True
ByteMap
(NE.singleton 3)
[5]
toWord8
planes0To1
]
ShamochuCode{..} = generateShamochuBitmaps
bitmap0To1
True
ByteMap
(NE.singleton 3)
[5]
toWord8
planes0To1
in ShamochuCode
{ code = mconcat
[ "{-# INLINE script #-}\n"
, "script :: Char -> Int#\n"
, "script c\n"
, " -- Planes 0-1\n"
, " | cp < 0x", showPaddedHeXB boundPlanes0To1
, " = ", toLookupBitMapName bitmap0To1, " cp#\n"
, mkScriptsBounds def (scripts !!) otherPlanes
, " -- Default: ", BB.shortByteString Defaults.defaultScript, "\n"
, " | otherwise = ", BB.intDec def, "#\n"
, " where\n"
, " !cp@(I# cp#) = ord c\n"
, "\n"
, code
]
, imports = imports }


mkScriptsBounds :: Int -> (Int -> BS.ShortByteString) -> [(Int,Char)] -> BB.Builder
mkScriptsBounds def getScriptName
Expand Down
36 changes: 18 additions & 18 deletions ucd2haskell/exe/UCD2Haskell/Modules/ScriptsExtensions.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,14 @@ import qualified Unicode.CharacterDatabase.Parser.Properties.Single as Prop
import UCD2Haskell.Common (Fold (..), mkHaskellConstructor)
import UCD2Haskell.Generator (
FileRecipe (..),
ShamochuCode (..),
apacheLicense,
enumMapToAddrLiteral,
genEnumBitmapShamochu,
mkImports',
splitPlanes,
unlinesBB,
(<+>),
)

recipe :: PropertyValuesAliases -> ScriptExtensions -> FileRecipe Prop.Entry
Expand Down Expand Up @@ -98,14 +101,7 @@ genScriptExtensionsModule moduleName aliases extensions = Fold step initial done
, "(scriptExtensions)"
, "where"
, ""
, "import Data.Char (ord)"
, "import Data.Word (Word8, Word16)"
, "import Data.Int (Int8)"
, "import GHC.Exts"
, " ( Addr#, Int#, Int(..), Ptr(..), nullAddr#"
, " , negateInt#, andI#, iShiftL#, iShiftRL#, (+#), (-#) )"
, "import Unicode.Internal.Bits.Scripts (lookupWord8AsInt#, lookupWord16AsInt#)"
, ""
, mkImports' "Scripts" imports'
, "-- | Script extensions of a character."
, "--"
, "-- Returns a pair:"
Expand All @@ -123,16 +119,7 @@ genScriptExtensionsModule moduleName aliases extensions = Fold step initial done
(usedExts Set.\\ singleScriptExtensionsSet)
, " s -> (# negateInt# s, nullAddr# #)"
, ""
, genEnumBitmapShamochu
"encodedScriptExtensions"
True
(NE.singleton 3)
[5]
toWord8
(def, BB.intDec (fromEnum def))
(def, BB.intDec (fromEnum def))
planes0To3
plane14
, code
]
where
-- List ordered by Haskell constructors
Expand Down Expand Up @@ -174,6 +161,19 @@ genScriptExtensionsModule moduleName aliases extensions = Fold step initial done
"Cannot generate: genScriptExtensionsModule"
(== def)
scriptExtensions
ShamochuCode{..} = genEnumBitmapShamochu
"encodedScriptExtensions"
True
(NE.singleton 3)
[5]
toWord8
(def, BB.intDec (fromEnum def))
(def, BB.intDec (fromEnum def))
planes0To3
plane14
imports' = imports <+> Map.singleton
"GHC.Exts"
(Set.fromList ["Addr#", "Int(..)", "nullAddr#", "negateInt#"])

mkDecodeScriptExtensions
:: (NE.NonEmpty BS.ShortByteString -> Word8)
Expand Down
20 changes: 10 additions & 10 deletions ucd2haskell/exe/UCD2Haskell/Modules/Security/IdentifierStatus.hs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ import qualified Unicode.CharacterDatabase.Parser.Properties.Single as Prop
import UCD2Haskell.Common (Fold (..))
import UCD2Haskell.Generator (
FileRecipe (..),
ShamochuCode (..),
apacheLicense,
genBitmapShamochu,
mkImports,
unlinesBB,
)

Expand Down Expand Up @@ -46,14 +48,12 @@ genIdentifierStatusModule moduleName = Fold step mempty done
, "(isAllowedInIdentifier)"
, "where"
, ""
, "import Data.Char (ord)"
, "import Data.Word (Word8)"
, "import GHC.Exts (Ptr(..))"
, "import Unicode.Internal.Bits (lookupBit)"
, ""
, genBitmapShamochu
"isAllowedInIdentifier"
(NE.singleton 6)
[2,3,4,5,6]
(reverse values)
, mkImports imports
, code
]
where
ShamochuCode{..} = genBitmapShamochu
"isAllowedInIdentifier"
(NE.singleton 6)
[2,3,4,5,6]
(reverse values)
33 changes: 15 additions & 18 deletions ucd2haskell/exe/UCD2Haskell/Modules/Security/IdentifierType.hs
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@ import qualified Unicode.CharacterDatabase.Parser.Properties.Single as Prop
import UCD2Haskell.Common (Fold (..))
import UCD2Haskell.Generator (
FileRecipe (..),
ShamochuCode (..),
apacheLicense,
genEnumBitmapShamochu,
mkImports,
splitPlanes,
unlinesBB,
(<+>),
)

recipe :: FileRecipe Prop.Entry
Expand Down Expand Up @@ -152,14 +155,7 @@ genIdentifierTypeModule moduleName = Fold step mempty done
, "(IdentifierType(..), identifierTypes, decodeIdentifierTypes)"
, "where"
, ""
, "import Data.Bits (Bits(..))"
, "import Data.Char (ord)"
, "import Data.Int (Int8)"
, "import Data.List.NonEmpty (NonEmpty)"
, "import Data.Word (Word8, Word16)"
, "import GHC.Exts (Ptr(..))"
, "import Unicode.Internal.Bits (lookupWord8AsInt, lookupWord16AsInt)"
, ""
, mkImports (imports <+> Map.singleton "Data.List.NonEmpty" (Set.singleton "NonEmpty"))
, "-- | Identifier type"
, "--"
, "-- @since 0.1.0"
Expand Down Expand Up @@ -203,16 +199,7 @@ genIdentifierTypeModule moduleName = Fold step mempty done
, " _ -> " <> mkHaskellConstructorsList def
, ""
, "-- | Returns the 'IdentifierType's corresponding to a character."
, genEnumBitmapShamochu
"identifierTypes"
False
(NE.singleton 3)
[5]
toWord8
(defIdx, BB.intDec (fromEnum defIdx))
(defIdx, BB.intDec (fromEnum defIdx))
planes0To3
plane14
, code
]
where
toWord8 =
Expand All @@ -223,3 +210,13 @@ genIdentifierTypeModule moduleName = Fold step mempty done
(== defIdx)
(reverse identifiersTypes)
(encoding, identifiersTypes, defIdx) = mkIdentifiersTypes acc
ShamochuCode{..} = genEnumBitmapShamochu
"identifierTypes"
False
(NE.singleton 3)
[5]
toWord8
(defIdx, BB.intDec (fromEnum defIdx))
(defIdx, BB.intDec (fromEnum defIdx))
planes0To3
plane14
22 changes: 10 additions & 12 deletions ucd2haskell/exe/UCD2Haskell/Modules/UnicodeData/CombiningClass.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ import qualified Unicode.CharacterDatabase.Parser.UnicodeData as UD
import UCD2Haskell.Common (Fold (..), showB)
import UCD2Haskell.Generator (
FileRecipe (..),
ShamochuCode (..),
apacheLicense,
genBitmapShamochu,
mkImports,
unlinesBB,
)

Expand Down Expand Up @@ -60,24 +62,20 @@ genCombiningClassModule moduleName = Fold step initial done
, "(combiningClass, isCombining)"
, "where"
, ""
, "import Data.Bits (Bits(..))"
, "import Data.Char (ord)"
, "import Data.Int (Int8)"
, "import Data.Word (Word8, Word16)"
, "import GHC.Exts (Ptr(..))"
, "import Unicode.Internal.Bits (lookupBit, lookupWord8AsInt, lookupWord16AsInt)"
, ""
, mkImports imports
, "combiningClass :: Char -> Int"
, "combiningClass = \\case"
, unlinesBB (reverse combiningClasses)
, " _ -> 0\n"
, ""
, genBitmapShamochu
"isCombining"
(NE.singleton 6)
[2,3,4,5,6]
(reverse combiningCodePoints)
, code
]
where
ShamochuCode{..} = genBitmapShamochu
"isCombining"
(NE.singleton 6)
[2,3,4,5,6]
(reverse combiningCodePoints)

genCombiningClassDef c cc = mconcat
[ " "
Expand Down
Loading

0 comments on commit be9ce9c

Please sign in to comment.