diff --git a/unicode-data/bench/Unicode/Char/Bench.hs b/unicode-data/bench/Unicode/Char/Bench.hs index 97b0947e..6b823045 100644 --- a/unicode-data/bench/Unicode/Char/Bench.hs +++ b/unicode-data/bench/Unicode/Char/Bench.hs @@ -1,30 +1,65 @@ {-# LANGUAGE LambdaCase #-} +{-# LANGUAGE CPP #-} module Unicode.Char.Bench - ( Bench(..) + ( -- Range benchmark + benchRange + -- Char benchmark + , Bench(..) , CharRange(..) - , bgroup' - , benchChars - , benchCharsNF + , bgroupWithValidCharRange + , bgroupWithValidCharRange' + , bgroupWithCharRange + , bgroupWithCharRange' + , bgroupWithChars ) where -import Control.DeepSeq (NFData, deepseq, force) -import Control.Exception (evaluate) -import Test.Tasty.Bench (Benchmark, bgroup, bench, bcompare, env, nf) -import Test.Tasty.Options - ( IsOption(defaultValue, optionHelp, optionName, parseValue) ) - +import Control.DeepSeq (NFData (..), deepseq) +import Control.Exception (evaluate, assert) +import Data.Char (ord) import qualified Data.Char as Char +import Foreign (Storable (..)) +import qualified GHC.Exts as Exts +import GHC.IO (IO (..)) +import Test.Tasty.Bench (Benchmark, bcompare, bench, bgroup, env, nf) +import Test.Tasty.Options ( + IsOption (defaultValue, optionHelp, optionName, parseValue), + ) +#if MIN_VERSION_base(4,10,0) && !MIN_VERSION_base(4,15,0) +import qualified GHC.Magic as Exts (noinline) +#endif + import qualified Unicode.Char.General as G +import Data.Ix (Ix (..)) --- | A unit benchmark -data Bench a = Bench - { _title :: !String -- ^ Name - , _func :: Char -> a -- ^ Function to benchmark - } +-------------------------------------------------------------------------------- +-- Range benchmark +-------------------------------------------------------------------------------- +{-# INLINE benchRange #-} +benchRange + :: forall a b. (Bounded a, Ix a, NFData b) + => String + -> (a -> b) + -> Benchmark +benchRange t f = bench t (nf (fold_ f) (minBound, maxBound)) + +{-# INLINE fold_ #-} +fold_ + :: forall a b. (Ix a, NFData b) + => (a -> b) + -> (a, a) + -> () +fold_ f = foldr (deepseq . f) () . range + +-------------------------------------------------------------------------------- +-- Char range +-------------------------------------------------------------------------------- + +-- | Characters range data CharRange = CharRange !Char !Char +-- | Characters range configurable from CLI instance IsOption CharRange where defaultValue = CharRange minBound maxBound parseValue = \case @@ -38,55 +73,213 @@ instance IsOption CharRange where optionName = pure "chars" optionHelp = pure "Range of chars to test" -{-# INLINE bgroup' #-} -bgroup' :: NFData a => String -> CharRange -> [Bench a] -> Benchmark -bgroup' groupTitle charRange bs = bgroup groupTitle - [ benchChars' title f - | Bench title f <- bs - ] +-------------------------------------------------------------------------------- +-- Characters benchmark +-------------------------------------------------------------------------------- + +-- | A unit benchmark +data Bench a = Bench + { _title :: !String -- ^ Name + , _func :: !(Char -> a) -- ^ Function to benchmark + } + +-- | Helper to compare benchmarks of function from this package to ones in base. +{-# INLINE bgroupWithValidCharRange #-} +bgroupWithValidCharRange :: + String -> + CharRange -> + (Char -> Bool) -> + (Chars -> [Benchmark]) -> + Benchmark +bgroupWithValidCharRange groupTitle charRange isValid mkBenches = + -- Avoid side-effects with garbage collection (see tasty-bench doc for env). + -- We use pinned ByteArray# instead of lists to avoid that GC kicks in. + env + (initialize isValid charRange >>= evaluate) + (bgroup groupTitle . mkBenches) + +-- | Helper to compare benchmarks of function from this package to ones in base. +-- Filter out Surrogates, Private Use Areas and unsassigned code points. +{-# INLINE bgroupWithCharRange #-} +bgroupWithCharRange :: + String -> + CharRange -> + (Chars -> [Benchmark]) -> + Benchmark +bgroupWithCharRange title charRange = + bgroupWithValidCharRange title charRange isValid where - {-# INLINE benchChars' #-} - benchChars' title = case title of - "base" -> benchChars title charRange - _ -> bcompare' "base" . benchChars title charRange + isValid c = G.generalCategory c < G.Surrogate + +-- | Variant of 'bgroupWithValidCharRange' +{-# INLINE bgroupWithValidCharRange' #-} +bgroupWithValidCharRange' :: + (NFData a) => + String -> + CharRange -> + (Char -> Bool) -> + [Bench a] -> + Benchmark +bgroupWithValidCharRange' groupTitle charRange isValid bs = + bgroupWithValidCharRange groupTitle charRange isValid $ \chars -> + [ benchCharsRange groupTitle title chars f + | Bench title f <- bs + ] +{-# INLINE benchCharsRange #-} +benchCharsRange :: NFData a => String -> String -> Chars -> (Char -> a) -> Benchmark +benchCharsRange groupTitle title chars = case title of + "base" -> benchCharsNF title chars + _ -> bcompare' "base" . benchCharsNF title chars + where {-# INLINE bcompare' #-} -- [NOTE] Works if groupTitle uniquely identifies the benchmark group. bcompare' ref = bcompare (mconcat ["$NF == \"", ref, "\" && $(NF-1) == \"", groupTitle, "\""]) -{-# INLINE benchChars #-} -benchChars - :: (NFData a) - => String - -> CharRange - -> (Char -> a) - -> Benchmark -benchChars t charRange = benchCharsNF t charRange isValid +-- | Variant of 'bgroupWithCharRange' +{-# INLINE bgroupWithCharRange' #-} +bgroupWithCharRange' :: + (NFData a) => + String -> + CharRange -> + [Bench a] -> + Benchmark +bgroupWithCharRange' groupTitle charRange = + bgroupWithValidCharRange' groupTitle charRange isValid where - -- Filter out: Surrogates, Private Use Areas and unsassigned code points isValid c = G.generalCategory c < G.Surrogate +-- | Helper to compare benchmarks of function from this package to ones in base. +{-# INLINE bgroupWithChars #-} +bgroupWithChars :: (NFData a) => String -> Chars -> [Bench a] -> Benchmark +bgroupWithChars groupTitle chars bs = bgroup groupTitle + [ benchCharsRange groupTitle title chars f + | Bench title f <- bs + ] + +-- | Helper to bench a char function on a filtered char range {-# INLINE benchCharsNF #-} benchCharsNF - :: forall a. (NFData a) + :: (NFData a) => String - -> CharRange - -> (Char -> Bool) + -> Chars -> (Char -> a) -> Benchmark -benchCharsNF t charRange isValid f = - -- Avoid side-effects with garbage collection (see tasty-bench doc) - env - (evaluate (force chars')) -- initialize - (bench t . nf (foldString f)) -- benchmark +benchCharsNF title chars f = bench title (nf (foldrChars f) chars) + +-------------------------------------------------------------------------------- +-- Chars byte array +-------------------------------------------------------------------------------- + +-- | Pinned array of characters +data Chars = Chars !Exts.ByteArray# !Int + +instance NFData Chars where + rnf (Chars !_ !_) = () + +-- | Fold over a chars byte array +foldrChars :: NFData a => (Char -> a) -> Chars -> () +foldrChars f = go + where + -- Loop over the pinned char array. The loop itself does not allocate. + go (Chars cs len) = foldr + (\(Exts.I# k) -> + let c = Exts.indexWideCharArray# cs (k Exts.-# 1#) +#if MIN_VERSION_base(4,10,0) + -- `inline` is necessary to avoid excessive inlining, resulting + -- in benchmarking empty loop iterations, i.e. not the function. + -- We could use `inline` with more care at call site, but then we + -- would have to test the functions one by one and everytime we + -- modify them. Using it here is a hammer but more secure and + -- maintainable. + -- Note that we may improve this by controling the inlining for each + -- phase. + in deepseq (Exts.noinline f (Exts.C# c))) +#else + -- HACK: No `inline` for GHC < 8.2. Should we drop support? + in deepseq (f (Exts.C# c))) +#endif + () + [1..len] + +-- | Create a byte array of the chars to bench +initialize :: (Char -> Bool) -> CharRange -> IO Chars +initialize isValid charRange = IO $ \s1 -> + case Exts.newPinnedByteArray# initialLength s1 of { (# s2, ma #) -> + -- Write the filtered char range + case writeChars isValid ma 0# s2 start end of { (# s3, filteredCount #) -> + -- Duplicate to get enough chars to bench + case tile ma 0# finalLength filteredLength s3 of { s4 -> + case Exts.unsafeFreezeByteArray# ma s4 of { (# s5, a #) -> + (# s5, Chars a (Exts.I# (replications Exts.*# filteredCount)) #) + }} + where + -- Ensure to have enough chars + replications = case Exts.quotInt# targetCharsCount filteredCount of + 0# -> 1# + r# -> r# + filteredLength = filteredCount Exts.*# wcharSize + finalLength = filteredLength Exts.*# replications + }} + where + targetCharsCount = 0x10FFFF# + !(CharRange start end) = assert + (ord end - ord start + 1 < Exts.I# targetCharsCount) + charRange + !initialLength = targetCharsCount Exts.*# wcharSize + !(Exts.I# wcharSize) = sizeOf 'x' + +-- | Write a range of chars that match the given predicate +writeChars :: + (Char -> Bool) -> + Exts.MutableByteArray# d -> + Exts.Int# -> + Exts.State# d -> + Char -> + Char -> + (# Exts.State# d, Exts.Int# #) +writeChars isValid ma = go + where + go i s c1@(Exts.C# c1#) !c2 = if c1 < c2 + then go i' s' (succ c1) c2 + else (# s', i' #) + where + !(# s', i' #) = if isValid c1 + then (# Exts.writeWideCharArray# ma i c1# s, i Exts.+# 1# #) + else (# s, i #) + +-- | Duplicate a portion of an array +-- +-- Adapted from Data.Text.Array.tile +tile :: + -- | Mutable array + Exts.MutableByteArray# s -> + -- | Start of the portion to duplicate + Exts.Int# -> + -- | Total length of the duplicate + Exts.Int# -> + -- | Length of the portion to duplicate + Exts.Int# -> + Exts.State# s -> + Exts.State# s +tile dest destOff totalLen = go where - CharRange l u = charRange - chars = filter isValid [l..u] - -- Ensure to have sufficiently chars - n = 0x10FFFF `div` length chars - chars' = mconcat (replicate n chars) - -{-# INLINE foldString #-} -foldString :: forall a. (NFData a) => (Char -> a) -> String -> () -foldString f = foldr (deepseq . f) () + go l s + | Exts.isTrue# ((2# Exts.*# l) Exts.># totalLen) = + Exts.copyMutableByteArray# + dest + destOff + dest + (destOff Exts.+# l) + (totalLen Exts.-# l) + s + | otherwise = + case Exts.copyMutableByteArray# + dest + destOff + dest + (destOff Exts.+# l) + l + s of + s' -> go (2# Exts.*# l) s' diff --git a/unicode-data/bench/Unicode/Char/Case/CompatBench.hs b/unicode-data/bench/Unicode/Char/Case/CompatBench.hs index 4f299281..2b447634 100644 --- a/unicode-data/bench/Unicode/Char/Case/CompatBench.hs +++ b/unicode-data/bench/Unicode/Char/Case/CompatBench.hs @@ -2,32 +2,37 @@ module Unicode.Char.Case.CompatBench ( benchmarks ) where -import Test.Tasty.Bench ( bgroup, Benchmark ) - import qualified Data.Char as Char -import Unicode.Char.Bench (CharRange, Bench(..), bgroup') +import Test.Tasty.Bench (Benchmark) + +import Unicode.Char.Bench ( + Bench (..), + CharRange, + bgroupWithCharRange, + bgroupWithChars, + ) import qualified Unicode.Char.Case.Compat as CC {-# NOINLINE benchmarks #-} benchmarks :: CharRange -> Benchmark -benchmarks charRange = bgroup "Unicode.Char.Case.Compat" - [ bgroup' "isLower" charRange +benchmarks r = bgroupWithCharRange "Unicode.Char.Case.Compat" r $ \chars -> + [ bgroupWithChars "isLower" chars [ Bench "base" Char.isLower , Bench "unicode-data" CC.isLower ] - , bgroup' "isUpper" charRange + , bgroupWithChars "isUpper" chars [ Bench "base" Char.isUpper , Bench "unicode-data" CC.isUpper ] - , bgroup' "toLower" charRange + , bgroupWithChars "toLower" chars [ Bench "base" Char.toLower , Bench "unicode-data" CC.toLower ] - , bgroup' "toTitle" charRange + , bgroupWithChars "toTitle" chars [ Bench "base" Char.toTitle , Bench "unicode-data" CC.toTitle ] - , bgroup' "toUpper" charRange + , bgroupWithChars "toUpper" chars [ Bench "base" Char.toUpper , Bench "unicode-data" CC.toUpper ] diff --git a/unicode-data/bench/Unicode/Char/CaseBench.hs b/unicode-data/bench/Unicode/Char/CaseBench.hs index 0fc613fd..b36e19af 100644 --- a/unicode-data/bench/Unicode/Char/CaseBench.hs +++ b/unicode-data/bench/Unicode/Char/CaseBench.hs @@ -4,47 +4,51 @@ module Unicode.Char.CaseBench ( benchmarks ) where -import Test.Tasty.Bench ( bgroup, Benchmark ) +import Test.Tasty.Bench (Benchmark) -import Unicode.Char.Bench (benchChars, CharRange) +import Unicode.Char.Bench ( + Bench (..), + CharRange, + bgroupWithCharRange, + bgroupWithChars, + ) import qualified Unicode.Char.Case as C #if MIN_VERSION_base(4,18,0) import qualified Data.Char as Char -import Unicode.Char.Bench (Bench(..), bgroup') #endif {-# NOINLINE benchmarks #-} benchmarks :: CharRange -> Benchmark -benchmarks charRange = bgroup "Unicode.Char.Case" +benchmarks r = bgroupWithCharRange "Unicode.Char.Case" r $ \chars -> [ #if MIN_VERSION_base(4,18,0) - bgroup' "isLowerCase" charRange + bgroupWithChars "isLowerCase" chars [ Bench "base" Char.isLowerCase , Bench "unicode-data" C.isLowerCase ] - , bgroup' "isUpperCase" charRange + , bgroupWithChars "isUpperCase" chars [ Bench "base" Char.isUpperCase , Bench "unicode-data" C.isUpperCase ] #else - bgroup "isLowerCase" - [ benchChars "unicode-data" charRange C.isLowerCase + bgroupWithChars "isLowerCase" chars + [ Bench "unicode-data" C.isLowerCase ] - , bgroup "isUpperCase" - [ benchChars "unicode-data" charRange C.isUpperCase + , bgroupWithChars "isUpperCase" chars + [ Bench "unicode-data" C.isUpperCase ] #endif - , bgroup "toCaseFoldString" - [ benchChars "unicode-data" charRange C.toCaseFoldString + , bgroupWithChars "toCaseFoldString" chars + [ Bench "unicode-data" C.toCaseFoldString ] - , bgroup "toLowerString" - [ benchChars "unicode-data" charRange C.toLowerString + , bgroupWithChars "toLowerString" chars + [ Bench "unicode-data" C.toLowerString ] - , bgroup "toTitleString" - [ benchChars "unicode-data" charRange C.toTitleString + , bgroupWithChars "toTitleString" chars + [ Bench "unicode-data" C.toTitleString ] - , bgroup "toUpperString" - [ benchChars "unicode-data" charRange C.toUpperString + , bgroupWithChars "toUpperString" chars + [ Bench "unicode-data" C.toUpperString ] ] diff --git a/unicode-data/bench/Unicode/Char/General/BlocksBench.hs b/unicode-data/bench/Unicode/Char/General/BlocksBench.hs index a811043c..b0004ab4 100644 --- a/unicode-data/bench/Unicode/Char/General/BlocksBench.hs +++ b/unicode-data/bench/Unicode/Char/General/BlocksBench.hs @@ -1,38 +1,32 @@ +{-# LANGUAGE StandaloneDeriving #-} +{-# OPTIONS_GHC -Wno-orphans #-} module Unicode.Char.General.BlocksBench ( benchmarks ) where -import Test.Tasty.Bench ( bgroup, Benchmark, bench, nf ) +import Control.DeepSeq (NFData (..)) +import Test.Tasty.Bench (Benchmark, bgroup) -import Unicode.Char.Bench (benchChars, CharRange) +import Unicode.Char.Bench ( + Bench (..), + CharRange (..), + benchRange, + bgroupWithCharRange', + ) import qualified Unicode.Char.General.Blocks as B -import Control.DeepSeq (NFData, deepseq) -import Data.Ix (Ix(..)) +import GHC.Generics (Generic) + +-- FIXME derive Generic at datatype definition +deriving instance Generic B.BlockDefinition +instance NFData B.BlockDefinition {-# NOINLINE benchmarks #-} benchmarks :: CharRange -> Benchmark -benchmarks charRange = bgroup "Unicode.Char.General.Blocks" - [ bgroup "block" - [ benchChars "unicode-data" charRange (fmap fromEnum . B.block) +benchmarks r = bgroup "Unicode.Char.General.Blocks" + [ bgroupWithCharRange' "block" r + [ Bench "unicode-data" (fmap fromEnum . B.block) ] , bgroup "blockDefinition" - -- [FIXME] We should addd NFData instance for BlockDefinition - [ benchRangeNF "unicode-data" (show . B.blockDefinition) + [ benchRange "unicode-data" B.blockDefinition ] ] - -{-# INLINE benchRangeNF #-} -benchRangeNF - :: forall a b. (Bounded a, Ix a, NFData b) - => String - -> (a -> b) - -> Benchmark -benchRangeNF t f = bench t (nf (fold_ f) (minBound, maxBound)) - -{-# INLINE fold_ #-} -fold_ - :: forall a b. (Ix a, NFData b) - => (a -> b) - -> (a, a) - -> () -fold_ f = foldr (deepseq . f) () . range diff --git a/unicode-data/bench/Unicode/Char/General/CompatBench.hs b/unicode-data/bench/Unicode/Char/General/CompatBench.hs index 757aefaf..f0a5db06 100644 --- a/unicode-data/bench/Unicode/Char/General/CompatBench.hs +++ b/unicode-data/bench/Unicode/Char/General/CompatBench.hs @@ -2,25 +2,30 @@ module Unicode.Char.General.CompatBench ( benchmarks ) where -import Test.Tasty.Bench ( bgroup, Benchmark ) - +import Test.Tasty.Bench ( Benchmark ) import qualified Data.Char as Char -import Unicode.Char.Bench (CharRange, Bench(..), bgroup') + +import Unicode.Char.Bench ( + Bench (..), + CharRange, + bgroupWithCharRange, + bgroupWithChars, + ) import qualified Unicode.Char.General.Compat as GC {-# NOINLINE benchmarks #-} benchmarks :: CharRange -> Benchmark -benchmarks charRange = bgroup "Unicode.Char.General.Compat" - [ bgroup' "isAlpha" charRange - [ Bench "base" Char.isAlpha - , Bench "unicode-data" GC.isAlpha +benchmarks r = bgroupWithCharRange "Unicode.Char.General.Compat" r $ \chars -> + [ bgroupWithChars "isAlpha" chars + [ Bench "base" Char.isAlpha + , Bench "unicode-data" GC.isAlpha ] - , bgroup' "isLetter" charRange - [ Bench "base" Char.isLetter - , Bench "unicode-data" GC.isLetter + , bgroupWithChars "isLetter" chars + [ Bench "base" Char.isLetter + , Bench "unicode-data" GC.isLetter ] - , bgroup' "isSpace" charRange - [ Bench "base" Char.isSpace - , Bench "unicode-data" GC.isSpace + , bgroupWithChars "isSpace" chars + [ Bench "base" Char.isSpace + , Bench "unicode-data" GC.isSpace ] ] diff --git a/unicode-data/bench/Unicode/Char/GeneralBench.hs b/unicode-data/bench/Unicode/Char/GeneralBench.hs index f5d24165..47030661 100644 --- a/unicode-data/bench/Unicode/Char/GeneralBench.hs +++ b/unicode-data/bench/Unicode/Char/GeneralBench.hs @@ -2,71 +2,77 @@ module Unicode.Char.GeneralBench ( benchmarks ) where -import Test.Tasty.Bench ( bgroup, Benchmark ) - -import Unicode.Char.Bench (benchChars, bgroup', Bench(..), CharRange) import qualified Data.Char as Char +import Test.Tasty.Bench (Benchmark) + +import Unicode.Char.Bench ( + Bench (..), + CharRange, + bgroupWithCharRange, + bgroupWithChars, + ) import qualified Unicode.Char.General as G {-# NOINLINE benchmarks #-} benchmarks :: CharRange -> Benchmark -benchmarks charRange = bgroup "Unicode.Char.General" +benchmarks r = bgroupWithCharRange "Unicode.Char.General" r $ \chars -> -- Character classification - [ bgroup' "generalCategory" charRange - [ Bench "base" (fromEnum . Char.generalCategory) - , Bench "unicode-data" (fromEnum . G.generalCategory) + [ bgroupWithChars "generalCategory" chars + -- We use `fromEnum` because of incompatible GeneralCategory types + [ Bench "base" (fromEnum . Char.generalCategory) + , Bench "unicode-data" (fromEnum . G.generalCategory) ] - , bgroup "isAlphabetic" - [ benchChars "unicode-data" charRange G.isAlphabetic + , bgroupWithChars "isAlphabetic" chars + [ Bench "unicode-data" G.isAlphabetic ] - , bgroup' "isAlphaNum" charRange - [ Bench "base" Char.isAlphaNum - , Bench "unicode-data" G.isAlphaNum + , bgroupWithChars "isAlphaNum" chars + [ Bench "base" Char.isAlphaNum + , Bench "unicode-data" G.isAlphaNum ] - , bgroup' "isControl" charRange - [ Bench "base" Char.isControl - , Bench "unicode-data" G.isControl + , bgroupWithChars "isControl" chars + [ Bench "base" Char.isControl + , Bench "unicode-data" G.isControl ] - , bgroup' "isMark" charRange - [ Bench "base" Char.isMark - , Bench "unicode-data" G.isMark + , bgroupWithChars "isMark" chars + [ Bench "base" Char.isMark + , Bench "unicode-data" G.isMark ] - , bgroup' "isPrint" charRange - [ Bench "base" Char.isPrint - , Bench "unicode-data" G.isPrint + , bgroupWithChars "isPrint" chars + [ Bench "base" Char.isPrint + , Bench "unicode-data" G.isPrint ] - , bgroup' "isPunctuation" charRange - [ Bench "base" Char.isPunctuation - , Bench "unicode-data" G.isPunctuation + , bgroupWithChars "isPunctuation" chars + [ Bench "base" Char.isPunctuation + , Bench "unicode-data" G.isPunctuation ] - , bgroup' "isSeparator" charRange - [ Bench "base" Char.isSeparator - , Bench "unicode-data" G.isSeparator + , bgroupWithChars "isSeparator" chars + [ Bench "base" Char.isSeparator + , Bench "unicode-data" G.isSeparator ] - , bgroup' "isSymbol" charRange - [ Bench "base" Char.isSymbol - , Bench "unicode-data" G.isSymbol + , bgroupWithChars "isSymbol" chars + [ Bench "base" Char.isSymbol + , Bench "unicode-data" G.isSymbol ] - , bgroup "isWhiteSpace" - [ benchChars "unicode-data" charRange G.isWhiteSpace + , bgroupWithChars "isWhiteSpace" chars + [ Bench "unicode-data" G.isWhiteSpace ] -- Korean Hangul Characters - , bgroup "isHangul" - [ benchChars "unicode-data" charRange G.isHangul + , bgroupWithChars "isHangul" chars + [ Bench "unicode-data" G.isHangul ] - , bgroup "isHangulLV" - [ benchChars "unicode-data" charRange G.isHangul + , bgroupWithChars "isHangulLV" chars + [ Bench "unicode-data" G.isHangul ] - , bgroup "isJamo" - [ benchChars "unicode-data" charRange G.isJamo + , bgroupWithChars "isJamo" chars + [ Bench "unicode-data" G.isJamo ] - , bgroup "jamoLIndex" - [ benchChars "unicode-data" charRange G.jamoLIndex + , bgroupWithChars "jamoLIndex" chars + [ Bench "unicode-data" G.jamoLIndex ] - , bgroup "jamoVIndex" - [ benchChars "unicode-data" charRange G.jamoVIndex + , bgroupWithChars "jamoVIndex" chars + [ Bench "unicode-data" G.jamoVIndex ] - , bgroup "jamoTIndex" - [ benchChars "unicode-data" charRange G.jamoTIndex + , bgroupWithChars "jamoTIndex" chars + [ Bench "unicode-data" G.jamoTIndex ] ] diff --git a/unicode-data/bench/Unicode/Char/IdentifiersBench.hs b/unicode-data/bench/Unicode/Char/IdentifiersBench.hs index c2030417..8a02f966 100644 --- a/unicode-data/bench/Unicode/Char/IdentifiersBench.hs +++ b/unicode-data/bench/Unicode/Char/IdentifiersBench.hs @@ -2,30 +2,35 @@ module Unicode.Char.IdentifiersBench ( benchmarks ) where -import Test.Tasty.Bench ( bgroup, Benchmark ) +import Test.Tasty.Bench (Benchmark) -import Unicode.Char.Bench (CharRange, benchChars) +import Unicode.Char.Bench ( + Bench (..), + CharRange, + bgroupWithCharRange, + bgroupWithChars, + ) import qualified Unicode.Char.Identifiers as I {-# NOINLINE benchmarks #-} benchmarks :: CharRange -> Benchmark -benchmarks charRange = bgroup "Unicode.Char.Identifiers" - [ bgroup "isIDContinue" - [ benchChars "unicode-data" charRange I.isIDContinue +benchmarks r = bgroupWithCharRange "Unicode.Char.Identifiers" r $ \chars -> + [ bgroupWithChars "isIDContinue" chars + [ Bench "unicode-data" I.isIDContinue ] - , bgroup "isIDStart" - [ benchChars "unicode-data" charRange I.isIDStart + , bgroupWithChars "isIDStart" chars + [ Bench "unicode-data" I.isIDStart ] - , bgroup "isXIDContinue" - [ benchChars "unicode-data" charRange I.isXIDContinue + , bgroupWithChars "isXIDContinue" chars + [ Bench "unicode-data" I.isXIDContinue ] - , bgroup "isXIDStart" - [ benchChars "unicode-data" charRange I.isXIDStart + , bgroupWithChars "isXIDStart" chars + [ Bench "unicode-data" I.isXIDStart ] - , bgroup "isPatternSyntax" - [ benchChars "unicode-data" charRange I.isPatternSyntax + , bgroupWithChars "isPatternSyntax" chars + [ Bench "unicode-data" I.isPatternSyntax ] - , bgroup "isPatternWhitespace" - [ benchChars "unicode-data" charRange I.isPatternWhitespace + , bgroupWithChars "isPatternWhitespace" chars + [ Bench "unicode-data" I.isPatternWhitespace ] ] diff --git a/unicode-data/bench/Unicode/Char/NormalizationBench.hs b/unicode-data/bench/Unicode/Char/NormalizationBench.hs index a55c2f14..1b0f567b 100644 --- a/unicode-data/bench/Unicode/Char/NormalizationBench.hs +++ b/unicode-data/bench/Unicode/Char/NormalizationBench.hs @@ -2,58 +2,53 @@ module Unicode.Char.NormalizationBench ( benchmarks ) where -import Control.DeepSeq (NFData) import Test.Tasty.Bench ( bgroup, Benchmark ) -import Unicode.Char.Bench (benchChars, CharRange, benchCharsNF) +import Unicode.Char.Bench ( + Bench (..), + CharRange, + bgroupWithCharRange, + bgroupWithChars, + bgroupWithValidCharRange', + ) import qualified Unicode.Char.General as G import qualified Unicode.Char.Normalization as N {-# NOINLINE benchmarks #-} benchmarks :: CharRange -> Benchmark -benchmarks charRange = bgroup "Unicode.Char.Normalization" - [ bgroup "isCombining" - [ benchChars "unicode-data" charRange N.isCombining +benchmarks r = bgroupWithCharRange "Unicode.Char.Normalization" r $ \chars -> + [ bgroupWithChars "isCombining" chars + [ Bench "unicode-data" N.isCombining ] - , bgroup "combiningClass" - [ benchChars "unicode-data" charRange N.combiningClass + , bgroupWithChars "combiningClass" chars + [ Bench "unicode-data" N.combiningClass ] - , bgroup "isCombiningStarter" - [ benchChars "unicode-data" charRange N.isCombiningStarter + , bgroupWithChars "isCombiningStarter" chars + [ Bench "unicode-data" N.isCombiningStarter ] -- [TODO] compose, composeStarters , bgroup "isDecomposable" - [ bgroup "Canonical" - [ benchChars "unicode-data" charRange (N.isDecomposable N.Canonical) + [ bgroupWithChars "Canonical" chars + [ Bench "unicode-data" (N.isDecomposable N.Canonical) ] - , bgroup "Kompat" - [ benchChars "unicode-data" charRange (N.isDecomposable N.Kompat) + , bgroupWithChars "Kompat" chars + [ Bench "unicode-data" (N.isDecomposable N.Kompat) ] ] , bgroup "decompose" - [ bgroup "Canonical" - [ benchDecomposableChars "unicode-data" charRange N.Canonical N.decompose + [ bgroupWithValidCharRange' "Canonical" r (isValid N.Canonical) + [ Bench "unicode-data" (N.decompose N.Canonical) ] - , bgroup "Kompat" - [ benchDecomposableChars "unicode-data" charRange N.Kompat N.decompose + , bgroupWithValidCharRange' "Kompat" r (isValid N.Kompat) + [ Bench "unicode-data" (N.decompose N.Kompat) ] ] - , bgroup "decomposeHangul" - [ benchChars "unicode-data" charRange N.decomposeHangul + , bgroupWithChars "decomposeHangul" chars + [ Bench "unicode-data" N.decomposeHangul ] ] -{-# INLINE benchDecomposableChars #-} -benchDecomposableChars - :: forall a. (NFData a) - => String - -> CharRange - -> N.DecomposeMode - -> (N.DecomposeMode -> Char -> a) - -> Benchmark -benchDecomposableChars t charRange mode f = - benchCharsNF t charRange isValid (f mode) - where - -- Filter out: Surrogates, Private Use Areas and unsassigned code points - -- and non-decomposable characters - isValid c = G.generalCategory c < G.Surrogate && N.isDecomposable mode c +-- Filter out: Surrogates, Private Use Areas and unsassigned code points +-- and non-decomposable characters +isValid :: N.DecomposeMode -> Char -> Bool +isValid mode c = G.generalCategory c < G.Surrogate && N.isDecomposable mode c diff --git a/unicode-data/bench/Unicode/Char/Numeric/CompatBench.hs b/unicode-data/bench/Unicode/Char/Numeric/CompatBench.hs index 8808804b..1f055bed 100644 --- a/unicode-data/bench/Unicode/Char/Numeric/CompatBench.hs +++ b/unicode-data/bench/Unicode/Char/Numeric/CompatBench.hs @@ -2,17 +2,22 @@ module Unicode.Char.Numeric.CompatBench ( benchmarks ) where -import Test.Tasty.Bench ( bgroup, Benchmark ) - +import Test.Tasty.Bench ( Benchmark ) import qualified Data.Char as Char -import Unicode.Char.Bench (Bench (..), CharRange, bgroup') + +import Unicode.Char.Bench ( + Bench (..), + CharRange, + bgroupWithCharRange, + bgroupWithChars, + ) import qualified Unicode.Char.Numeric.Compat as NumCompat {-# NOINLINE benchmarks #-} benchmarks :: CharRange -> Benchmark -benchmarks charRange = bgroup "Unicode.Char.Numeric.Compat" - [ bgroup' "isNumber" charRange - [ Bench "base" Char.isNumber - , Bench "unicode-data" NumCompat.isNumber - ] +benchmarks r = bgroupWithCharRange "Unicode.Char.Numeric.Compat" r $ \chars -> + [ bgroupWithChars "isNumber" chars + [ Bench "base" Char.isNumber + , Bench "unicode-data" NumCompat.isNumber + ] ] diff --git a/unicode-data/bench/Unicode/Char/NumericBench.hs b/unicode-data/bench/Unicode/Char/NumericBench.hs index 25c62d40..c6df39e7 100644 --- a/unicode-data/bench/Unicode/Char/NumericBench.hs +++ b/unicode-data/bench/Unicode/Char/NumericBench.hs @@ -2,22 +2,27 @@ module Unicode.Char.NumericBench ( benchmarks ) where -import Test.Tasty.Bench ( bgroup, Benchmark ) +import Test.Tasty.Bench (Benchmark) -import Unicode.Char.Bench (benchChars, CharRange) +import Unicode.Char.Bench ( + Bench (..), + CharRange, + bgroupWithCharRange, + bgroupWithChars, + ) import qualified Unicode.Char.Numeric as Num {-# NOINLINE benchmarks #-} benchmarks :: CharRange -> Benchmark -benchmarks charRange = bgroup "Unicode.Char.Numeric" +benchmarks r = bgroupWithCharRange "Unicode.Char.Numeric" r $ \chars -> -- [TODO] Replace with 'isNumber' once the migration is done. - [ bgroup "isNumeric" - [ benchChars "unicode-data" charRange Num.isNumeric + [ bgroupWithChars "isNumeric" chars + [ Bench "unicode-data" Num.isNumeric ] - , bgroup "numericValue" - [ benchChars "unicode-data" charRange Num.numericValue + , bgroupWithChars "numericValue" chars + [ Bench "unicode-data" Num.numericValue ] - , bgroup "integerValue" - [ benchChars "unicode-data" charRange Num.integerValue + , bgroupWithChars "integerValue" chars + [ Bench "unicode-data" Num.integerValue ] ] diff --git a/unicode-data/unicode-data.cabal b/unicode-data/unicode-data.cabal index 348ea65d..7d5aa957 100644 --- a/unicode-data/unicode-data.cabal +++ b/unicode-data/unicode-data.cabal @@ -148,6 +148,9 @@ benchmark bench tasty-bench >= 0.2.5 && < 0.4, tasty >= 1.4.1 && < 1.6, unicode-data + if impl(ghc < 9.0) + -- Required for noinline + build-depends: ghc-prim -- [NOTE] Recommendation of tasty-bench to reduce garbage collection noisiness ghc-options: -O2 -fdicts-strict -rtsopts -with-rtsopts=-A32m -- [NOTE] Recommendation of tasty-bench for comparison against baseline