Skip to content

Commit

Permalink
core: Move isAlphaNum to Compat module
Browse files Browse the repository at this point in the history
  • Loading branch information
wismill committed Jul 3, 2024
1 parent 2089651 commit da4c987
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 17 deletions.
5 changes: 5 additions & 0 deletions unicode-data/Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
- Removed deprecated `Unicode.Char.Numeric.isNumber`.
Use `Unicode.Char.Numeric.Compat.isNumber` instead.

### Deprecations

- `Unicode.Char.General.isAlphaNum`.
Use `Unicode.Char.General.Compat.isAlphaNum` instead.

## 0.5.0 (July 2024)

- Fix the inlining of `Addr#` literals and reduce their size. This results in
Expand Down
4 changes: 4 additions & 0 deletions unicode-data/bench/Unicode/Char/General/CompatBench.hs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ benchmarks r = bgroupWithCharRange "Unicode.Char.General.Compat" r $ \chars ->
[ Bench "base" Char.isAlpha
, Bench "unicode-data" GC.isAlpha
]
, bgroupWithChars "isAlphaNum" chars
[ Bench "base" Char.isAlphaNum
, Bench "unicode-data" GC.isAlphaNum
]
, bgroupWithChars "isLetter" chars
[ Bench "base" Char.isLetter
, Bench "unicode-data" GC.isLetter
Expand Down
4 changes: 0 additions & 4 deletions unicode-data/bench/Unicode/Char/GeneralBench.hs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ benchmarks r = bgroupWithCharRange "Unicode.Char.General" r $ \chars ->
, bgroupWithChars "isAlphabetic" chars
[ Bench "unicode-data" G.isAlphabetic
]
, bgroupWithChars "isAlphaNum" chars
[ Bench "base" Char.isAlphaNum
, Bench "unicode-data" G.isAlphaNum
]
, bgroupWithChars "isControl" chars
[ Bench "base" Char.isControl
, Bench "unicode-data" G.isControl
Expand Down
2 changes: 1 addition & 1 deletion unicode-data/lib/Unicode/Char.hs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ where
import Data.Char (chr, ord)
import Unicode.Char.Case hiding (Unfold(..), Step(..))
import Unicode.Char.Case.Compat
import Unicode.Char.General
import Unicode.Char.General hiding (isAlphaNum)
import Unicode.Char.General.Compat
import Unicode.Char.Identifiers
import Unicode.Char.Numeric
Expand Down
24 changes: 12 additions & 12 deletions unicode-data/lib/Unicode/Char/General.hs
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,12 @@ import Control.Exception (assert)
import Data.Bits ((.&.))
import Data.Char (isAscii, isLatin1, isAsciiUpper, isAsciiLower, ord)
import Data.Ix (Ix)
import Unicode.Internal.Division (quotRem28)

import qualified Unicode.Char.General.Compat as Compat
import qualified Unicode.Internal.Char.DerivedCoreProperties as P
import qualified Unicode.Internal.Char.PropList as P
import qualified Unicode.Internal.Char.UnicodeData.GeneralCategory as UC
import Unicode.Internal.Division (quotRem28)

--------------------------------------------------------------------------------
-- General Category
Expand Down Expand Up @@ -381,20 +382,19 @@ following 'GeneralCategory's, or 'False' otherwise:
prop> isAlphaNum c == Data.Char.isAlphaNum c
__Note:__ this function is incompatible with 'isAlphabetic':
>>> isAlphabetic '\x345'
True
>>> isAlphaNum '\x345'
False
@since 0.3.0
-}
{-# INLINE isAlphaNum #-}
{-# DEPRECATED isAlphaNum "Use Unicode.Char.General.Compat.isAlphaNum instead." #-}
isAlphaNum :: Char -> Bool
isAlphaNum c =
let !cp = ord c
-- NOTE: The guard constant is updated at each Unicode revision.
-- It must be < 0x40000 to be accepted by generalCategoryPlanes0To3.
in cp <= UC.MaxIsAlphaNum &&
let !gc = UC.generalCategoryPlanes0To3 cp
in gc <= UC.OtherLetter ||
(UC.DecimalNumber <= gc && gc <= UC.OtherNumber)
-- Use the following in case the previous code is not valid anymore:
-- gc <= UC.OtherLetter || (UC.DecimalNumber <= gc && gc <= UC.OtherNumber)
-- where !gc = UC.generalCategory c
isAlphaNum = Compat.isAlphaNum

{-| Selects control characters, which are the non-printing characters
of the Latin-1 subset of Unicode.
Expand Down
44 changes: 44 additions & 0 deletions unicode-data/lib/Unicode/Char/General/Compat.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,64 @@
--
module Unicode.Char.General.Compat
( isAlpha
, isAlphaNum
, isLetter
, isSpace
) where

import Data.Char (ord)
import qualified Unicode.Internal.Char.UnicodeData.GeneralCategory as UC

-- $setup
-- import qualified Unicode.Char.General

-- | Same as 'isLetter'.
--
-- @since 0.3.0
{-# INLINE isAlpha #-}
isAlpha :: Char -> Bool
isAlpha = isLetter

{-| Selects alphabetic or numeric Unicode characters.
This function returns 'True' if its argument has one of the
following 'GeneralCategory's, or 'False' otherwise:
* 'UppercaseLetter'
* 'LowercaseLetter'
* 'TitlecaseLetter'
* 'ModifierLetter'
* 'OtherLetter'
* 'DecimalNumber'
* 'LetterNumber'
* 'OtherNumber'
prop> isAlphaNum c == Data.Char.isAlphaNum c
__Note:__ this function is incompatible with 'Unicode.Char.General.isAlphabetic':
>>> Unicode.Char.General.isAlphabetic '\x345'
True
>>> isAlphaNum '\x345'
False
@since 0.6.0 moved to Compat module
@since 0.3.0
-}
isAlphaNum :: Char -> Bool
isAlphaNum c =
let !cp = ord c
-- NOTE: The guard constant is updated at each Unicode revision.
-- It must be < 0x40000 to be accepted by generalCategoryPlanes0To3.
in cp <= UC.MaxIsAlphaNum &&
let !gc = UC.generalCategoryPlanes0To3 cp
in gc <= UC.OtherLetter ||
(UC.DecimalNumber <= gc && gc <= UC.OtherNumber)
-- Use the following in case the previous code is not valid anymore:
-- gc <= UC.OtherLetter || (UC.DecimalNumber <= gc && gc <= UC.OtherNumber)
-- where !gc = UC.generalCategory c

{-| Selects alphabetic Unicode characters (lower-case, upper-case and title-case
letters, plus letters of caseless scripts and modifiers letters).
Expand Down

0 comments on commit da4c987

Please sign in to comment.