diff --git a/cabal.project b/cabal.project index 3a84e6ca..08e42657 100644 --- a/cabal.project +++ b/cabal.project @@ -1,6 +1,7 @@ packages: */*.cabal, experimental/*/*.cabal + unicode-data-names/icu -- Allow any newer non-upgradable package allow-newer: diff --git a/unicode-data-names/bench/Main.hs b/unicode-data-names/bench/Main.hs index eddb76f2..9e66b4de 100644 --- a/unicode-data-names/bench/Main.hs +++ b/unicode-data-names/bench/Main.hs @@ -1,13 +1,10 @@ -{-# LANGUAGE CPP, ExistentialQuantification #-} +{-# LANGUAGE ExistentialQuantification #-} import Control.DeepSeq (NFData, deepseq) import Data.Ix (Ix(..)) import Test.Tasty.Bench (Benchmark, bgroup, bcompare, bench, nf, defaultMain) import qualified Unicode.Char.General.Names as Names -#ifdef HAS_ICU -import qualified ICU.Names as ICU -#endif -- | A unit benchmark data Bench = forall a. (NFData a) => Bench @@ -21,15 +18,9 @@ main = defaultMain [ bgroup "Unicode.Char.General.Names" [ bgroup' "name" [ Bench "unicode-data" Names.name -#ifdef HAS_ICU - , Bench "icu" ICU.name -#endif ] , bgroup' "correctedName" [ Bench "unicode-data" Names.correctedName -#ifdef HAS_ICU - , Bench "icu" ICU.correctedName -#endif ] , bgroup' "nameOrAlias" [ Bench "unicode-data" Names.name @@ -57,7 +48,7 @@ main = defaultMain "unicode-data" -> benchNF title _ -> bcompare ( "$NF == \"unicode-data\" && $(NF-1) == \"" ++ - groupTitle ++ "\"") + groupTitle ++ "\"" ) . benchNF title benchNF :: forall a. (NFData a) => String -> (Char -> a) -> Benchmark diff --git a/unicode-data-names/icu/LICENSE b/unicode-data-names/icu/LICENSE new file mode 100644 index 00000000..2fab32ff --- /dev/null +++ b/unicode-data-names/icu/LICENSE @@ -0,0 +1,249 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work. + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + +------------------------------------------------------------------------------- +This distribution includes portions of code from the "unicode-transforms" +package (https://github.com/composewell/unicode-transforms/) which is +available under BSD-3-Clause license as described below. +------------------------------------------------------------------------------- + +Copyright (c) 2016, Harendra Kumar +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +------------------------------------------------------------------------------- +This distribution includes portions of code from the "unicode-transforms" +package (https://github.com/composewell/unicode-transforms/) +which included portions of code from the "prose" +(https://github.com/llelf/prose) package available under BSD-3-Clause +license as described below. +------------------------------------------------------------------------------- + +Copyright (c) 2014–2015, Antonio Nikishaev + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of Antonio Nikishaev nor the names of other + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/unicode-data-names/icu/bench/ICU.hs b/unicode-data-names/icu/bench/ICU.hs new file mode 100644 index 00000000..9155760d --- /dev/null +++ b/unicode-data-names/icu/bench/ICU.hs @@ -0,0 +1,48 @@ +{-# LANGUAGE CPP, ExistentialQuantification #-} + +import Control.DeepSeq (NFData, deepseq) +import Data.Ix (Ix(..)) +import Test.Tasty.Bench (Benchmark, bgroup, bcompare, bench, nf, defaultMain) + +import qualified Unicode.Char.General.Names as Names +import qualified ICU.Names as ICU + +-- | A unit benchmark +data Bench = forall a. (NFData a) => Bench + { -- | Name + _title :: !String + -- | Function to benchmark + , _func :: Char -> a } + +main :: IO () +main = defaultMain + [ bgroup "Unicode.Char.General.Names" + [ bgroup' "name" + [ Bench "unicode-data" Names.name + , Bench "icu" ICU.name + ] + , bgroup' "correctedName" + [ Bench "unicode-data" Names.correctedName + , Bench "icu" ICU.correctedName + ] + ] + ] + where + bgroup' groupTitle bs = bgroup groupTitle + [ benchNF' groupTitle title f + | Bench title f <- bs + ] + + -- [NOTE] Works if groupTitle uniquely identifies the benchmark group. + benchNF' groupTitle title = case title of + "unicode-data" -> benchNF title + _ -> + bcompare ( "$NF == \"unicode-data\" && $(NF-1) == \"" ++ + groupTitle ++ "\"" ) + . benchNF title + + benchNF :: forall a. (NFData a) => String -> (Char -> a) -> Benchmark + benchNF t f = bench t $ nf (fold_ f) (minBound, maxBound) + + fold_ :: forall a. (NFData a) => (Char -> a) -> (Char, Char) -> () + fold_ f = foldr (deepseq . f) () . range diff --git a/unicode-data-names/cbits/icu.c b/unicode-data-names/icu/cbits/icu.c similarity index 100% rename from unicode-data-names/cbits/icu.c rename to unicode-data-names/icu/cbits/icu.c diff --git a/unicode-data-names/cbits/icu.h b/unicode-data-names/icu/cbits/icu.h similarity index 100% rename from unicode-data-names/cbits/icu.h rename to unicode-data-names/icu/cbits/icu.h diff --git a/unicode-data-names/icu/icu-names.cabal b/unicode-data-names/icu/icu-names.cabal new file mode 100644 index 00000000..24725a95 --- /dev/null +++ b/unicode-data-names/icu/icu-names.cabal @@ -0,0 +1,111 @@ +cabal-version: 2.2 +name: icu-names +version: 0.3.0 +synopsis: ICU binding +description: + ICU binding to ICU for characters names and aliases (internal library). +homepage: http://github.com/composewell/unicode-data +bug-reports: https://github.com/composewell/unicode-data/issues +license: Apache-2.0 +license-file: LICENSE +author: Composewell Technologies and Contributors +maintainer: dev@wismill.eu +copyright: 2023 Composewell Technologies and Contributors +category: Data,Text,Unicode +stability: Experimental +build-type: Simple + +source-repository head + type: git + location: https://github.com/composewell/unicode-data + +flag has-icu + description: Use ICU for test and benchmark + manual: True + default: False + +common default-extensions + default-extensions: + BangPatterns + DeriveGeneric + MagicHash + RecordWildCards + ScopedTypeVariables + TupleSections + FlexibleContexts + LambdaCase + + -- Experimental, may lead to issues + UnboxedTuples + +common compile-options + ghc-options: -Wall + -fwarn-identities + -fwarn-incomplete-record-updates + -fwarn-incomplete-uni-patterns + -fwarn-tabs + default-language: Haskell2010 + +extra-source-files: + cbits/icu.h + +library + import: default-extensions, compile-options + if flag(has-icu) + buildable: True + else + buildable: False + exposed-modules: + ICU.Names + hs-source-dirs: lib + build-depends: + base >= 4.7 && < 4.19 + include-dirs: cbits + c-sources: cbits/icu.c + cc-options: -Wall -Wextra -pedantic + extra-libraries: icuuc + pkgconfig-depends: + icu-uc >= 72.1 + +test-suite test + import: default-extensions, compile-options + if flag(has-icu) + buildable: True + else + buildable: False + type: exitcode-stdio-1.0 + main-is: ICU.hs + hs-source-dirs: + test + other-modules: + ICU.NamesSpec + build-depends: + base >= 4.7 && < 4.19 + , hspec >= 2.0 && < 2.11 + , icu-names + , unicode-data >= 0.4 && < 0.5 + , unicode-data-names + other-modules: + ICU.NamesSpec + +benchmark bench + import: default-extensions, compile-options + if flag(has-icu) + buildable: True + else + buildable: False + type: exitcode-stdio-1.0 + hs-source-dirs: bench + main-is: ICU.hs + build-depends: + base >= 4.7 && < 4.19, + deepseq >= 1.1 && < 1.5, + icu-names, + tasty-bench >= 0.2.5 && < 0.4, + tasty >= 1.4.1 && < 1.5, + unicode-data-names + -- [NOTE] Recommendation of tasty-bench to reduce garbage collection noisiness + ghc-options: -O2 -fdicts-strict -rtsopts -with-rtsopts=-A32m + -- [NOTE] Recommendation of tasty-bench for comparison against baseline + if impl(ghc >= 8.6) + ghc-options: -fproc-alignment=64 diff --git a/unicode-data-names/lib/ICU/Names.hsc b/unicode-data-names/icu/lib/ICU/Names.hsc similarity index 98% rename from unicode-data-names/lib/ICU/Names.hsc rename to unicode-data-names/icu/lib/ICU/Names.hsc index 8d217c96..4be84f25 100644 --- a/unicode-data-names/lib/ICU/Names.hsc +++ b/unicode-data-names/icu/lib/ICU/Names.hsc @@ -49,7 +49,7 @@ unicodeVersion charName :: Int -> Char -> Maybe String charName ty c = unsafePerformIO - $ allocaArray bufferLength $ + . allocaArray bufferLength $ \ptr -> u_charName cp ty ptr (fromIntegral bufferLength) >>= \case 0 -> pure Nothing diff --git a/unicode-data-names/icu/test/ICU.hs b/unicode-data-names/icu/test/ICU.hs new file mode 100644 index 00000000..51f8b08e --- /dev/null +++ b/unicode-data-names/icu/test/ICU.hs @@ -0,0 +1,10 @@ +module Main where + +import Test.Hspec +import qualified ICU.NamesSpec as ICU + +main :: IO () +main = hspec spec + +spec :: Spec +spec = describe "ICU.Names" ICU.spec diff --git a/unicode-data-names/test/Unicode/Char/General/Names/ICUSpec.hs b/unicode-data-names/icu/test/ICU/NamesSpec.hs similarity index 97% rename from unicode-data-names/test/Unicode/Char/General/Names/ICUSpec.hs rename to unicode-data-names/icu/test/ICU/NamesSpec.hs index 7b36e36f..4c825136 100644 --- a/unicode-data-names/test/Unicode/Char/General/Names/ICUSpec.hs +++ b/unicode-data-names/icu/test/ICU/NamesSpec.hs @@ -1,6 +1,6 @@ {-# LANGUAGE BlockArguments #-} -module Unicode.Char.General.Names.ICUSpec +module ICU.NamesSpec ( spec ) where diff --git a/unicode-data-names/test/Main.hs b/unicode-data-names/test/Main.hs index b3c5033c..9a83f57f 100644 --- a/unicode-data-names/test/Main.hs +++ b/unicode-data-names/test/Main.hs @@ -1,19 +1,10 @@ -{-# LANGUAGE CPP #-} - module Main where -import Test.Hspec +import Test.Hspec ( Spec, hspec, describe ) import qualified Unicode.Char.General.NamesSpec as Names -#ifdef HAS_ICU -import qualified Unicode.Char.General.Names.ICUSpec as ICU -#endif main :: IO () main = hspec spec spec :: Spec -spec = do - describe "Unicode.Char.General.Names" Names.spec -#ifdef HAS_ICU - describe "Unicode.Char.General.Names.ICU" ICU.spec -#endif +spec = describe "Unicode.Char.General.Names" Names.spec diff --git a/unicode-data-names/unicode-data-names.cabal b/unicode-data-names/unicode-data-names.cabal index 129fdf26..6262b802 100644 --- a/unicode-data-names/unicode-data-names.cabal +++ b/unicode-data-names/unicode-data-names.cabal @@ -66,11 +66,6 @@ common compile-options -fwarn-tabs default-language: Haskell2010 -flag has-icu - description: Use ICU for test and benchmark - manual: True - default: False - library import: default-extensions, compile-options exposed-modules: @@ -95,32 +90,6 @@ library build-depends: ghc-prim >= 0.3.1 && < 1.0 --- Internal library to enable comparison to ICU -library icu-names - import: default-extensions, compile-options - if flag(has-icu) - buildable: True - else - buildable: False - exposed-modules: - ICU.Names - hs-source-dirs: lib - build-depends: - base >= 4.7 && < 4.19 - include-dirs: cbits - c-sources: cbits/icu.c - cc-options: -Wall -Wextra -pedantic - if os(darwin) - extra-lib-dirs: - /usr/local/opt/icu4c/lib - /opt/homebrew/opt/icu4c/lib - include-dirs: - /usr/local/opt/icu4c/include - /opt/homebrew/opt/icu4c/include - extra-libraries: icuuc - pkgconfig-depends: - icu-uc >= 72.1 - test-suite test import: default-extensions, compile-options type: exitcode-stdio-1.0 @@ -134,12 +103,6 @@ test-suite test , hspec >= 2.0 && < 2.11 , unicode-data >= 0.4 && < 0.5 , unicode-data-names - if flag(has-icu) - other-modules: - Unicode.Char.General.Names.ICUSpec - build-depends: - icu-names - cpp-options: -DHAS_ICU executable export-all-chars import: default-extensions, compile-options @@ -171,8 +134,3 @@ benchmark bench -- [NOTE] Recommendation of tasty-bench for comparison against baseline if impl(ghc >= 8.6) ghc-options: -fproc-alignment=64 - if flag(has-icu) - build-depends: - icu-names - cpp-options: -DHAS_ICU -