diff --git a/unicode-data/test/Unicode/CharSpec.hs b/unicode-data/test/Unicode/CharSpec.hs index 91461779..6c6bd895 100644 --- a/unicode-data/test/Unicode/CharSpec.hs +++ b/unicode-data/test/Unicode/CharSpec.hs @@ -6,7 +6,9 @@ module Unicode.CharSpec ( spec ) where +import Data.Bits (Bits(..)) import qualified Data.Char as Char +import Data.Foldable (traverse_) import Data.Ix (Ix(..)) import Data.Maybe (isJust) import qualified Unicode.Char as UChar @@ -20,8 +22,27 @@ import qualified Unicode.Char.Case.Compat as UCharCompat import qualified Unicode.Char.Numeric as UNumeric import qualified Unicode.Char.Numeric.Compat as UNumericCompat import qualified Unicode.Internal.Char.UnicodeData.GeneralCategory as UC -import Data.Foldable (traverse_) import Test.Hspec +-- Use to display warnings. See note in `shouldBeEqualToV`. +import Debug.Trace (traceM) + +#if MIN_VERSION_base(4,15,0) +import Data.Version (showVersion) +import GHC.Unicode (unicodeVersion) + +hasGhcUnicodeVersion :: Bool +hasGhcUnicodeVersion = True + +#else +import Data.Version (Version, makeVersion, showVersion) + +-- | Dummy Unicode version. +unicodeVersion :: Version +unicodeVersion = makeVersion [0, 0, 0] + +hasGhcUnicodeVersion :: Bool +hasGhcUnicodeVersion = False +#endif {- [NOTE] These tests may fail if the compiler’s Unicode version @@ -36,22 +57,15 @@ does not match the version of this package. | 9.0.[1-2] | 4.15.0 | 12.1 | | 9.2.[1-6] | 4.16.0 | 14.0 | | 9.4.[1-4] | 4.17.0 | 14.0 | -| 9.6.1 | 4.18.0 | 15.0 | +| 9.6.[1-3] | 4.18.{0,1} | 15.0 | +| 9.6.4-5 | 4.18.2+ | 15.1 | | 9.8.1 | 4.19.0 | 15.1 | +| 9.10.1 | 4.20.0 | 15.1 | +-------------+----------------+-----------------+ -} spec :: Spec spec = do -#ifdef COMPATIBLE_GHC_UNICODE - let describe' = describe - let it' = it -#else - let describe' t = before_ (pendingWith "Incompatible GHC Unicode version") - . describe t - let it' t = before_ (pendingWith "Incompatible GHC Unicode version") - . it t -#endif describe "Unicode blocks" do it "Characters not in any block are unassigned" let { check c = case UBlocks.block c of @@ -73,20 +87,20 @@ spec = do it "Characters in a block definition have the corresponding block" let { check b = let r = UBlocks.blockRange (UBlocks.blockDefinition b) - in traverse_ (checkChar b) (UChar.chr <$> range r); + in traverse_ (checkChar b. UChar.chr) (range r); checkChar b c = let b' = UBlocks.block c in if b' == Just b then pure () else expectationFailure $ mconcat [ "Block is different for “", show c, "”. Expected: “Just " , show b, "” but got: “", show b', "”." ] } in traverse_ check [minBound..maxBound] - describe' "Unicode general categories" do + describe "Unicode general categories" do it "generalCategory" do -- [NOTE] We cannot compare the categories directly, so use 'show'. - (show . UChar.generalCategory) `shouldBeEqualTo` (show . Char.generalCategory) - describe' "Character classification" do + (show . UChar.generalCategory) `shouldBeEqualToV` (show . Char.generalCategory) + describe "Character classification" do it "isAlpha" do - UChar.isAlpha `shouldBeEqualTo` Char.isAlpha + UChar.isAlpha `shouldBeEqualToV` Char.isAlpha describe "isAlphaNum" do let isAlphaNumRef = \case UChar.UppercaseLetter -> True @@ -102,9 +116,9 @@ spec = do Char.chr UC.MaxIsAlphaNum `shouldBe` maxCodePointBy isAlphaNumRef UC.MaxIsAlphaNum `shouldSatisfy` isPlane0To3 it "Compare to base" do - UChar.isAlphaNum `shouldBeEqualTo` Char.isAlphaNum + UChar.isAlphaNum `shouldBeEqualToV` Char.isAlphaNum it "isControl" do - UChar.isControl `shouldBeEqualTo` Char.isControl + UChar.isControl `shouldBeEqualToV` Char.isControl describe "isLetter" do let isLetterRef = \case UChar.UppercaseLetter -> True @@ -117,13 +131,13 @@ spec = do Char.chr UC.MaxIsLetter `shouldBe` maxCodePointBy isLetterRef UC.MaxIsLetter `shouldSatisfy` isPlane0To3 it "Compare to base" do - UCharCompat.isLetter `shouldBeEqualTo` Char.isLetter + UCharCompat.isLetter `shouldBeEqualToV` Char.isLetter it "isMark" do - UChar.isMark `shouldBeEqualTo` Char.isMark + UChar.isMark `shouldBeEqualToV` Char.isMark it "isPrint" do - UChar.isPrint `shouldBeEqualTo` Char.isPrint + UChar.isPrint `shouldBeEqualToV` Char.isPrint it "isPunctuation" do - UChar.isPunctuation `shouldBeEqualTo` Char.isPunctuation + UChar.isPunctuation `shouldBeEqualToV` Char.isPunctuation describe "isSeparator" do it "Check max codepoint for isSeparator" do let isSeparatorRef = \case @@ -134,27 +148,27 @@ spec = do Char.chr UC.MaxIsSeparator `shouldBe` maxCodePointBy isSeparatorRef UC.MaxIsSeparator `shouldSatisfy` isPlane0To3 it "Compare to base" do - UChar.isSeparator `shouldBeEqualTo` Char.isSeparator + UChar.isSeparator `shouldBeEqualToV` Char.isSeparator describe "isSpace" do it "Check max codepoint for Space" do let isSpaceRef = (== UChar.Space) Char.chr UC.MaxIsSpace `shouldBe` maxCodePointBy isSpaceRef UC.MaxIsSpace `shouldSatisfy` isPlane0To3 it "Compare to base" do - UCharCompat.isSpace `shouldBeEqualTo` Char.isSpace + UCharCompat.isSpace `shouldBeEqualToV` Char.isSpace it "isSymbol" do - UChar.isSymbol `shouldBeEqualTo` Char.isSymbol + UChar.isSymbol `shouldBeEqualToV` Char.isSymbol describe "Case" do describe "isLower" do it "Check max codepoint for lower" do let isLowerRef = (== UChar.LowercaseLetter) Char.chr UC.MaxIsLower `shouldBe` maxCodePointBy isLowerRef UC.MaxIsLower `shouldSatisfy` isPlane0To3 - it' "Compare to base" do - UCharCompat.isLower `shouldBeEqualTo` Char.isLower + it "Compare to base" do + UCharCompat.isLower `shouldBeEqualToV` Char.isLower #if MIN_VERSION_base(4,18,0) - it' "isLowerCase" do - UChar.isLowerCase `shouldBeEqualTo` Char.isLowerCase + it "isLowerCase" do + UChar.isLowerCase `shouldBeEqualToV` Char.isLowerCase #endif describe "isUpper" do it "Check max codepoint for upper" do @@ -164,14 +178,14 @@ spec = do _ -> False Char.chr UC.MaxIsUpper `shouldBe` maxCodePointBy isUpperRef UC.MaxIsUpper `shouldSatisfy` isPlane0To3 - it' "Compare to base" do - UCharCompat.isUpper `shouldBeEqualTo` Char.isUpper + it "Compare to base" do + UCharCompat.isUpper `shouldBeEqualToV` Char.isUpper #if MIN_VERSION_base(4,18,0) - it' "isUpperCase" do - UChar.isUpperCase `shouldBeEqualTo` Char.isUpperCase + it "isUpperCase" do + UChar.isUpperCase `shouldBeEqualToV` Char.isUpperCase #endif - it' "toLower" do - UChar.toLower `shouldBeEqualTo` Char.toLower + it "toLower" do + UChar.toLower `shouldBeEqualToV` Char.toLower let caseCheck f (c, cs) = c `shouldSatisfy` (== cs) . f describe "toLowerString" do it "Examples" do @@ -181,7 +195,7 @@ spec = do , ('1', "1") , ('\x130', "i\x307") ] traverse_ (caseCheck UChar.toLowerString) examples - it' "Common mapping should match simple one" do + it "Common mapping should match simple one" do let check c = case UChar.toLowerString c of [c'] -> c `shouldSatisfy` ((== c') . UChar.toLower) _ -> pure () @@ -191,8 +205,8 @@ spec = do let cf = UChar.toLowerString c' in cf == foldMap UChar.toLowerString cf traverse_ check [minBound..maxBound] - it' "toUpper" do - UChar.toUpper `shouldBeEqualTo` Char.toUpper + it "toUpper" do + UChar.toUpper `shouldBeEqualToV` Char.toUpper describe "toUpperString" do it "Examples" do let examples = [ ('\0', "\0") @@ -202,7 +216,7 @@ spec = do , ('\xdf', "SS") , ('\x1F52', "\x03A5\x0313\x0300") ] traverse_ (caseCheck UChar.toUpperString) examples - it' "Common mapping should match simple one" do + it "Common mapping should match simple one" do let check c = case UChar.toUpperString c of [c'] -> c `shouldSatisfy` ((== c') . UChar.toUpper) _ -> pure () @@ -212,8 +226,8 @@ spec = do let cf = UChar.toUpperString c' in cf == foldMap UChar.toUpperString cf traverse_ check [minBound..maxBound] - it' "toTitle" do - UChar.toTitle `shouldBeEqualTo` Char.toTitle + it "toTitle" do + UChar.toTitle `shouldBeEqualToV` Char.toTitle describe "toTitleString" do it "Examples" do let examples = [ ('\0', "\0") @@ -224,7 +238,7 @@ spec = do , ('\xfb02', "Fl") , ('\x1F52', "\x03A5\x0313\x0300") ] traverse_ (caseCheck UChar.toTitleString) examples - it' "Common mapping should match simple one" do + it "Common mapping should match simple one" do let check c = case UChar.toTitleString c of [c'] -> c `shouldSatisfy` ((== c') . UChar.toTitle) _ -> pure () @@ -253,8 +267,8 @@ spec = do _ -> False Char.chr UC.MaxIsNumber `shouldBe` maxCodePointBy isNumber UC.MaxIsNumber `shouldSatisfy` isPlane0To3 - it' "Compare to base" do - UNumericCompat.isNumber `shouldBeEqualTo` Char.isNumber + it "Compare to base" do + UNumericCompat.isNumber `shouldBeEqualToV` Char.isNumber it "isNumber implies a numeric value" do -- [NOTE] the following does not hold with the current predicate `isNumber`. -- As of Unicode 15.0.0, there are 81 such characters (all CJK). @@ -262,14 +276,71 @@ spec = do let check c = not (UNumericCompat.isNumber c) || isJust (UNumeric.numericValue c) traverse_ (`shouldSatisfy` check) [minBound..maxBound] where - shouldBeEqualTo - :: forall a b. (Bounded a, Enum a, Show a, Eq b, Show b) - => (a -> b) - -> (a -> b) + -- [NOTE] Unused for now + -- shouldBeEqualTo + -- :: forall a b. (Bounded a, Enum a, Show a, Eq b, Show b) + -- => (a -> b) + -- -> (a -> b) + -- -> IO () + -- shouldBeEqualTo f g = + -- let same x = f x == g x + -- in traverse_ (`shouldSatisfy` same) [minBound..maxBound] + + -- There is no feature to display warnings other than `trace`. + -- If we use `pendingWith` then the whole test is pending, not just + -- the assertion. + shouldBeEqualToV + :: forall b. (HasCallStack) => (Eq b, Show b) + => (Char -> b) + -> (Char -> b) -> IO () - shouldBeEqualTo f g = + shouldBeEqualToV f g = let same x = f x == g x - in traverse_ (`shouldSatisfy` same) [minBound..maxBound] + in traverse_ (`shouldSatisfyV` same) [minBound..maxBound] + + shouldSatisfyV :: (HasCallStack) => Char -> (Char -> Bool) -> IO () + shouldSatisfyV c h + | hasSameUnicodeVersion = shouldSatisfy c h + | h c = pure () + | not hasGhcUnicodeVersion = traceM . mconcat $ + [ "[WARNING] Cannot test ", show c + , ": incompatible Unicode version (too old). Expected " + , showVersion UChar.unicodeVersion ] + | isUnassigned c = traceM . mconcat $ + [ "[WARNING] Cannot test ", show c + , ": incompatible Unicode version (unassigned char). Expected " + , showVersion UChar.unicodeVersion + , ", but got: " + , showVersion unicodeVersion ] + | hasDifferentCategory c = traceM . mconcat $ + [ "[WARNING] Cannot test ", show c + , ": incompatible Unicode version (different general category)." + , " Expected " + , showVersion UChar.unicodeVersion + , ", but got: " + , showVersion unicodeVersion ] + | otherwise = shouldSatisfy c h + -- Check if the character is not assigned in exactly one Unicode version. + isUnassigned c = (UChar.generalCategory c == UChar.NotAssigned) + `xor` (Char.generalCategory c == Char.NotAssigned) + -- Check if the character has changed its general category + hasDifferentCategory c = fromEnum (UChar.generalCategory c) + /= fromEnum (Char.generalCategory c) + hasSameUnicodeVersion = unicodeVersion == UChar.unicodeVersion + + -- [NOTE] Unused for now + -- shouldBe' x y + -- | x == y = pure () + -- | unicodeVersion /= UChar.unicodeVersion = traceM msg + -- | otherwise = shouldBe x y + -- where + -- msg = mconcat + -- [ "[WARNING] Cannot test ", show x + -- , ": incompatible Unicode version. Expected " + -- , showVersion UChar.unicodeVersion + -- , ", but got: " + -- , unicodeVersionString ] + isPlane0To3 = (< 0x40000) maxCodePointBy p = foldr (\c -> if p (UChar.generalCategory c) then max c else id) diff --git a/unicode-data/unicode-data.cabal b/unicode-data/unicode-data.cabal index c044e63c..243cdf85 100644 --- a/unicode-data/unicode-data.cabal +++ b/unicode-data/unicode-data.cabal @@ -124,10 +124,6 @@ test-suite test base >= 4.7 && < 4.21 , hspec >= 2.0 && < 2.12 , unicode-data - -- We need to match a GHC version with the same Unicode version. - -- See: test/Unicode/CharSpec.hs for compatibility table. - if impl(ghc >= 9.6.0) && impl(ghc < 9.8.0) - cpp-options: -DCOMPATIBLE_GHC_UNICODE benchmark bench import: default-extensions, compile-options