Skip to content

Commit

Permalink
Allow different GHC Unicode version
Browse files Browse the repository at this point in the history
Just print characters that cannot be tested
  • Loading branch information
wismill committed Jun 7, 2024
1 parent 35e0c50 commit 49a2122
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 54 deletions.
171 changes: 121 additions & 50 deletions unicode-data/test/Unicode/CharSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ module Unicode.CharSpec
( spec
) where

import Data.Bits (Bits(..))
import qualified Data.Char as Char
import Data.Foldable (traverse_)
import Data.Ix (Ix(..))
import Data.Maybe (isJust)
import qualified Unicode.Char as UChar
Expand All @@ -20,8 +22,27 @@ import qualified Unicode.Char.Case.Compat as UCharCompat
import qualified Unicode.Char.Numeric as UNumeric
import qualified Unicode.Char.Numeric.Compat as UNumericCompat
import qualified Unicode.Internal.Char.UnicodeData.GeneralCategory as UC
import Data.Foldable (traverse_)
import Test.Hspec
-- Use to display warnings. See note in `shouldBeEqualToV`.
import Debug.Trace (traceM)

#if MIN_VERSION_base(4,15,0)
import Data.Version (showVersion)
import GHC.Unicode (unicodeVersion)

hasGhcUnicodeVersion :: Bool
hasGhcUnicodeVersion = True

#else
import Data.Version (Version, makeVersion, showVersion)

-- | Dummy Unicode version.
unicodeVersion :: Version
unicodeVersion = makeVersion [0, 0, 0]

hasGhcUnicodeVersion :: Bool
hasGhcUnicodeVersion = False
#endif

{- [NOTE]
These tests may fail if the compiler’s Unicode version
Expand All @@ -36,22 +57,15 @@ does not match the version of this package.
| 9.0.[1-2] | 4.15.0 | 12.1 |
| 9.2.[1-6] | 4.16.0 | 14.0 |
| 9.4.[1-4] | 4.17.0 | 14.0 |
| 9.6.1 | 4.18.0 | 15.0 |
| 9.6.[1-3] | 4.18.{0,1} | 15.0 |
| 9.6.4-5 | 4.18.2+ | 15.1 |
| 9.8.1 | 4.19.0 | 15.1 |
| 9.10.1 | 4.20.0 | 15.1 |
+-------------+----------------+-----------------+
-}

spec :: Spec
spec = do
#ifdef COMPATIBLE_GHC_UNICODE
let describe' = describe
let it' = it
#else
let describe' t = before_ (pendingWith "Incompatible GHC Unicode version")
. describe t
let it' t = before_ (pendingWith "Incompatible GHC Unicode version")
. it t
#endif
describe "Unicode blocks" do
it "Characters not in any block are unassigned"
let { check c = case UBlocks.block c of
Expand All @@ -73,20 +87,20 @@ spec = do
it "Characters in a block definition have the corresponding block"
let {
check b = let r = UBlocks.blockRange (UBlocks.blockDefinition b)
in traverse_ (checkChar b) (UChar.chr <$> range r);
in traverse_ (checkChar b. UChar.chr) (range r);
checkChar b c = let b' = UBlocks.block c in if b' == Just b
then pure ()
else expectationFailure $ mconcat
[ "Block is different for “", show c, "”. Expected: “Just "
, show b, "” but got: “", show b', "”." ]
} in traverse_ check [minBound..maxBound]
describe' "Unicode general categories" do
describe "Unicode general categories" do
it "generalCategory" do
-- [NOTE] We cannot compare the categories directly, so use 'show'.
(show . UChar.generalCategory) `shouldBeEqualTo` (show . Char.generalCategory)
describe' "Character classification" do
(show . UChar.generalCategory) `shouldBeEqualToV` (show . Char.generalCategory)
describe "Character classification" do
it "isAlpha" do
UChar.isAlpha `shouldBeEqualTo` Char.isAlpha
UChar.isAlpha `shouldBeEqualToV` Char.isAlpha
describe "isAlphaNum" do
let isAlphaNumRef = \case
UChar.UppercaseLetter -> True
Expand All @@ -102,9 +116,9 @@ spec = do
Char.chr UC.MaxIsAlphaNum `shouldBe` maxCodePointBy isAlphaNumRef
UC.MaxIsAlphaNum `shouldSatisfy` isPlane0To3
it "Compare to base" do
UChar.isAlphaNum `shouldBeEqualTo` Char.isAlphaNum
UChar.isAlphaNum `shouldBeEqualToV` Char.isAlphaNum
it "isControl" do
UChar.isControl `shouldBeEqualTo` Char.isControl
UChar.isControl `shouldBeEqualToV` Char.isControl
describe "isLetter" do
let isLetterRef = \case
UChar.UppercaseLetter -> True
Expand All @@ -117,13 +131,13 @@ spec = do
Char.chr UC.MaxIsLetter `shouldBe` maxCodePointBy isLetterRef
UC.MaxIsLetter `shouldSatisfy` isPlane0To3
it "Compare to base" do
UCharCompat.isLetter `shouldBeEqualTo` Char.isLetter
UCharCompat.isLetter `shouldBeEqualToV` Char.isLetter
it "isMark" do
UChar.isMark `shouldBeEqualTo` Char.isMark
UChar.isMark `shouldBeEqualToV` Char.isMark
it "isPrint" do
UChar.isPrint `shouldBeEqualTo` Char.isPrint
UChar.isPrint `shouldBeEqualToV` Char.isPrint
it "isPunctuation" do
UChar.isPunctuation `shouldBeEqualTo` Char.isPunctuation
UChar.isPunctuation `shouldBeEqualToV` Char.isPunctuation
describe "isSeparator" do
it "Check max codepoint for isSeparator" do
let isSeparatorRef = \case
Expand All @@ -134,27 +148,27 @@ spec = do
Char.chr UC.MaxIsSeparator `shouldBe` maxCodePointBy isSeparatorRef
UC.MaxIsSeparator `shouldSatisfy` isPlane0To3
it "Compare to base" do
UChar.isSeparator `shouldBeEqualTo` Char.isSeparator
UChar.isSeparator `shouldBeEqualToV` Char.isSeparator
describe "isSpace" do
it "Check max codepoint for Space" do
let isSpaceRef = (== UChar.Space)
Char.chr UC.MaxIsSpace `shouldBe` maxCodePointBy isSpaceRef
UC.MaxIsSpace `shouldSatisfy` isPlane0To3
it "Compare to base" do
UCharCompat.isSpace `shouldBeEqualTo` Char.isSpace
UCharCompat.isSpace `shouldBeEqualToV` Char.isSpace
it "isSymbol" do
UChar.isSymbol `shouldBeEqualTo` Char.isSymbol
UChar.isSymbol `shouldBeEqualToV` Char.isSymbol
describe "Case" do
describe "isLower" do
it "Check max codepoint for lower" do
let isLowerRef = (== UChar.LowercaseLetter)
Char.chr UC.MaxIsLower `shouldBe` maxCodePointBy isLowerRef
UC.MaxIsLower `shouldSatisfy` isPlane0To3
it' "Compare to base" do
UCharCompat.isLower `shouldBeEqualTo` Char.isLower
it "Compare to base" do
UCharCompat.isLower `shouldBeEqualToV` Char.isLower
#if MIN_VERSION_base(4,18,0)
it' "isLowerCase" do
UChar.isLowerCase `shouldBeEqualTo` Char.isLowerCase
it "isLowerCase" do
UChar.isLowerCase `shouldBeEqualToV` Char.isLowerCase
#endif
describe "isUpper" do
it "Check max codepoint for upper" do
Expand All @@ -164,14 +178,14 @@ spec = do
_ -> False
Char.chr UC.MaxIsUpper `shouldBe` maxCodePointBy isUpperRef
UC.MaxIsUpper `shouldSatisfy` isPlane0To3
it' "Compare to base" do
UCharCompat.isUpper `shouldBeEqualTo` Char.isUpper
it "Compare to base" do
UCharCompat.isUpper `shouldBeEqualToV` Char.isUpper
#if MIN_VERSION_base(4,18,0)
it' "isUpperCase" do
UChar.isUpperCase `shouldBeEqualTo` Char.isUpperCase
it "isUpperCase" do
UChar.isUpperCase `shouldBeEqualToV` Char.isUpperCase
#endif
it' "toLower" do
UChar.toLower `shouldBeEqualTo` Char.toLower
it "toLower" do
UChar.toLower `shouldBeEqualToV` Char.toLower
let caseCheck f (c, cs) = c `shouldSatisfy` (== cs) . f
describe "toLowerString" do
it "Examples" do
Expand All @@ -181,7 +195,7 @@ spec = do
, ('1', "1")
, ('\x130', "i\x307") ]
traverse_ (caseCheck UChar.toLowerString) examples
it' "Common mapping should match simple one" do
it "Common mapping should match simple one" do
let check c = case UChar.toLowerString c of
[c'] -> c `shouldSatisfy` ((== c') . UChar.toLower)
_ -> pure ()
Expand All @@ -191,8 +205,8 @@ spec = do
let cf = UChar.toLowerString c'
in cf == foldMap UChar.toLowerString cf
traverse_ check [minBound..maxBound]
it' "toUpper" do
UChar.toUpper `shouldBeEqualTo` Char.toUpper
it "toUpper" do
UChar.toUpper `shouldBeEqualToV` Char.toUpper
describe "toUpperString" do
it "Examples" do
let examples = [ ('\0', "\0")
Expand All @@ -202,7 +216,7 @@ spec = do
, ('\xdf', "SS")
, ('\x1F52', "\x03A5\x0313\x0300") ]
traverse_ (caseCheck UChar.toUpperString) examples
it' "Common mapping should match simple one" do
it "Common mapping should match simple one" do
let check c = case UChar.toUpperString c of
[c'] -> c `shouldSatisfy` ((== c') . UChar.toUpper)
_ -> pure ()
Expand All @@ -212,8 +226,8 @@ spec = do
let cf = UChar.toUpperString c'
in cf == foldMap UChar.toUpperString cf
traverse_ check [minBound..maxBound]
it' "toTitle" do
UChar.toTitle `shouldBeEqualTo` Char.toTitle
it "toTitle" do
UChar.toTitle `shouldBeEqualToV` Char.toTitle
describe "toTitleString" do
it "Examples" do
let examples = [ ('\0', "\0")
Expand All @@ -224,7 +238,7 @@ spec = do
, ('\xfb02', "Fl")
, ('\x1F52', "\x03A5\x0313\x0300") ]
traverse_ (caseCheck UChar.toTitleString) examples
it' "Common mapping should match simple one" do
it "Common mapping should match simple one" do
let check c = case UChar.toTitleString c of
[c'] -> c `shouldSatisfy` ((== c') . UChar.toTitle)
_ -> pure ()
Expand Down Expand Up @@ -253,23 +267,80 @@ spec = do
_ -> False
Char.chr UC.MaxIsNumber `shouldBe` maxCodePointBy isNumber
UC.MaxIsNumber `shouldSatisfy` isPlane0To3
it' "Compare to base" do
UNumericCompat.isNumber `shouldBeEqualTo` Char.isNumber
it "Compare to base" do
UNumericCompat.isNumber `shouldBeEqualToV` Char.isNumber
it "isNumber implies a numeric value" do
-- [NOTE] the following does not hold with the current predicate `isNumber`.
-- As of Unicode 15.0.0, there are 81 such characters (all CJK).
-- let check c = (UNumeric.isNumber c `xor` isNothing (UNumeric.numericValue c))
let check c = not (UNumericCompat.isNumber c) || isJust (UNumeric.numericValue c)
traverse_ (`shouldSatisfy` check) [minBound..maxBound]
where
shouldBeEqualTo
:: forall a b. (Bounded a, Enum a, Show a, Eq b, Show b)
=> (a -> b)
-> (a -> b)
-- [NOTE] Unused for now
-- shouldBeEqualTo
-- :: forall a b. (Bounded a, Enum a, Show a, Eq b, Show b)
-- => (a -> b)
-- -> (a -> b)
-- -> IO ()
-- shouldBeEqualTo f g =
-- let same x = f x == g x
-- in traverse_ (`shouldSatisfy` same) [minBound..maxBound]

-- There is no feature to display warnings other than `trace`.
-- If we use `pendingWith` then the whole test is pending, not just
-- the assertion.
shouldBeEqualToV
:: forall b. (HasCallStack) => (Eq b, Show b)
=> (Char -> b)
-> (Char -> b)
-> IO ()
shouldBeEqualTo f g =
shouldBeEqualToV f g =
let same x = f x == g x
in traverse_ (`shouldSatisfy` same) [minBound..maxBound]
in traverse_ (`shouldSatisfyV` same) [minBound..maxBound]

shouldSatisfyV :: (HasCallStack) => Char -> (Char -> Bool) -> IO ()
shouldSatisfyV c h
| hasSameUnicodeVersion = shouldSatisfy c h
| h c = pure ()
| not hasGhcUnicodeVersion = traceM . mconcat $
[ "[WARNING] Cannot test ", show c
, ": incompatible Unicode version (too old). Expected "
, showVersion UChar.unicodeVersion ]
| isUnassigned c = traceM . mconcat $
[ "[WARNING] Cannot test ", show c
, ": incompatible Unicode version (unassigned char). Expected "
, showVersion UChar.unicodeVersion
, ", but got: "
, showVersion unicodeVersion ]
| hasDifferentCategory c = traceM . mconcat $
[ "[WARNING] Cannot test ", show c
, ": incompatible Unicode version (different general category)."
, " Expected "
, showVersion UChar.unicodeVersion
, ", but got: "
, showVersion unicodeVersion ]
| otherwise = shouldSatisfy c h
-- Check if the character is not assigned in exactly one Unicode version.
isUnassigned c = (UChar.generalCategory c == UChar.NotAssigned)
`xor` (Char.generalCategory c == Char.NotAssigned)
-- Check if the character has changed its general category
hasDifferentCategory c = fromEnum (UChar.generalCategory c)
/= fromEnum (Char.generalCategory c)
hasSameUnicodeVersion = unicodeVersion == UChar.unicodeVersion

-- [NOTE] Unused for now
-- shouldBe' x y
-- | x == y = pure ()
-- | unicodeVersion /= UChar.unicodeVersion = traceM msg
-- | otherwise = shouldBe x y
-- where
-- msg = mconcat
-- [ "[WARNING] Cannot test ", show x
-- , ": incompatible Unicode version. Expected "
-- , showVersion UChar.unicodeVersion
-- , ", but got: "
-- , unicodeVersionString ]

isPlane0To3 = (< 0x40000)
maxCodePointBy p = foldr
(\c -> if p (UChar.generalCategory c) then max c else id)
Expand Down
4 changes: 0 additions & 4 deletions unicode-data/unicode-data.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,6 @@ test-suite test
base >= 4.7 && < 4.21
, hspec >= 2.0 && < 2.12
, unicode-data
-- We need to match a GHC version with the same Unicode version.
-- See: test/Unicode/CharSpec.hs for compatibility table.
if impl(ghc >= 9.6.0) && impl(ghc < 9.8.0)
cpp-options: -DCOMPATIBLE_GHC_UNICODE

benchmark bench
import: default-extensions, compile-options
Expand Down

0 comments on commit 49a2122

Please sign in to comment.