Skip to content

Commit

Permalink
core: Improve Case & Unfold
Browse files Browse the repository at this point in the history
  • Loading branch information
wismill committed Jun 18, 2024
1 parent defa062 commit e324a09
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 12 deletions.
5 changes: 4 additions & 1 deletion unicode-data/lib/Unicode/Char/Case.hs
Original file line number Diff line number Diff line change
Expand Up @@ -249,4 +249,7 @@ step = \case
where
-- Mask for a single Unicode code point: (1 << 21) - 1
mask = 0x1fffff
cp = fromIntegral (s .&. mask)
-- [NOTE] As of GHC 9.4, Int64 is represented internally by Int64#,
-- so the previous code `fromIntegral s .&. mask` leads to
-- unefficient generated code.
cp = fromIntegral s .&. mask
11 changes: 6 additions & 5 deletions unicode-data/lib/Unicode/Char/Case/Compat.hs
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ import qualified Unicode.Internal.Char.UnicodeData.SimpleUpperCaseMapping as C
-- Title case is used by a small number of letter ligatures like the
-- single-character form of /Lj/.
--
-- It matches characters with general category 'UppercaseLetter' and
-- 'TitlecaseLetter'.
-- It matches characters with general category
-- 'Unicode.Char.General.UppercaseLetter' and
-- 'Unicode.Char.General.TitlecaseLetter'.
--
-- See: 'Unicode.Char.Case.isUpperCase' for the /full upper/ case predicate.
--
Expand All @@ -48,11 +49,11 @@ isUpper c =
UC.UppercaseLetter -> True
UC.TitlecaseLetter -> True
_ -> False
where cp = ord c
where !cp = ord c

-- | Selects lower-case alphabetic Unicode characters (letters).
--
-- It matches characters with general category 'LowercaseLetter'.
-- It matches characters with general category 'Unicode.Char.Case.LowercaseLetter'.
--
-- See: 'Unicode.Char.Case.isLowerCase' for the /full/ lower case predicate.
--
Expand All @@ -67,7 +68,7 @@ isLower c =
case UC.generalCategoryPlanes0To3 cp of
UC.LowercaseLetter -> True
_ -> False
where cp = ord c
where !cp = ord c

-- | Convert a letter to the corresponding upper-case letter, if any.
-- Any other character is returned unchanged.
Expand Down
15 changes: 9 additions & 6 deletions unicode-data/lib/Unicode/Internal/Unfold.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE LambdaCase #-}

-- |
-- Module : Unicode.Internal.Unfold
Expand All @@ -19,6 +18,8 @@ module Unicode.Internal.Unfold
, toList
) where

import GHC.Base (build)

-- | An @Unfold a b@ is a generator of a stream of values of type @b@ from a
-- seed of type @a@.
--
Expand Down Expand Up @@ -57,8 +58,10 @@ toList :: Unfold a a -> a -> [a]
toList (Unfold step inject) input =
case inject input of
Stop -> [input]
Yield b s -> b : go (step s)
where
go = \case
Yield b s -> let !s' = step s in b : go s'
Stop -> []
Yield b s0 -> b : build
( \c n ->
let go s = case step s of
Yield a s' -> a `c` go s'
Stop -> n
in go s0
)

0 comments on commit e324a09

Please sign in to comment.