summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2020-02-06 18:23:30 -0500
committerMarge Bot <ben+marge-bot@smart-cactus.org>2020-02-12 17:22:37 -0500
commitf5ffd8d9ec776db708e690c4fdbf671afa8df48f (patch)
tree7092307f960d4025da0d9cf0094bf5179fb8aea2
parent059c3c9d7c84fc37c69e9f414ff736d47081e72c (diff)
downloadhaskell-f5ffd8d9ec776db708e690c4fdbf671afa8df48f.tar.gz
base: Expose GHC.Unicode.unicodeVersion
This exposes a Data.Version.Version representing the version of the Unicode database used by `base`. This should clear up some confusion I have seen in tickets regarding with which Unicode versions a given GHC can be expected to work. While in town I also regenerated (but did not update) the Unicode database with database 12.0.0. Strangely, the file cited in the README no longer existed. Consequently, I used https://www.unicode.org/Public/12.0.0/ucd/UnicodeData.txt and was slightly surprised to find that there were a few changes.
-rw-r--r--libraries/base/Data/Version.hs-boot12
-rw-r--r--libraries/base/GHC/Unicode.hs7
-rw-r--r--libraries/base/cbits/README.Unicode8
-rw-r--r--libraries/base/cbits/WCsubst.c23
-rwxr-xr-x[-rw-r--r--]libraries/base/cbits/ubconfc20
-rw-r--r--libraries/base/include/UnicodeVersion.h7
6 files changed, 65 insertions, 12 deletions
diff --git a/libraries/base/Data/Version.hs-boot b/libraries/base/Data/Version.hs-boot
new file mode 100644
index 0000000000..63726cf6af
--- /dev/null
+++ b/libraries/base/Data/Version.hs-boot
@@ -0,0 +1,12 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+
+module Data.Version
+ ( Version
+ , makeVersion
+ ) where
+
+import GHC.Base
+
+data Version
+
+makeVersion :: [Int] -> Version
diff --git a/libraries/base/GHC/Unicode.hs b/libraries/base/GHC/Unicode.hs
index 9d11b37d0c..6fba91f0e2 100644
--- a/libraries/base/GHC/Unicode.hs
+++ b/libraries/base/GHC/Unicode.hs
@@ -19,6 +19,7 @@
-----------------------------------------------------------------------------
module GHC.Unicode (
+ unicodeVersion,
GeneralCategory (..), generalCategory,
isAscii, isLatin1, isControl,
isAsciiUpper, isAsciiLower,
@@ -36,12 +37,18 @@ import GHC.Real
import GHC.Enum ( Enum (..), Bounded (..) )
import GHC.Ix ( Ix (..) )
import GHC.Num
+import {-# SOURCE #-} Data.Version
-- Data.Char.chr already imports this and we need to define a Show instance
-- for GeneralCategory
import GHC.Show ( Show )
#include "HsBaseConfig.h"
+#include "UnicodeVersion.h"
+
+-- | Version of Unicode standard used by @base@.
+unicodeVersion :: Version
+unicodeVersion = makeVersion UNICODE_VERSION_NUMS
-- | Unicode General Categories (column 2 of the UnicodeData table) in
-- the order they are listed in the Unicode standard (the Unicode
diff --git a/libraries/base/cbits/README.Unicode b/libraries/base/cbits/README.Unicode
index 6cc18464cd..1eef278c96 100644
--- a/libraries/base/cbits/README.Unicode
+++ b/libraries/base/cbits/README.Unicode
@@ -1,8 +1,12 @@
+Generating GHC's Unicode table
+==============================
WCsubst.c is generated with:
- sh ubconfc < UnicodeData.txt > WCsubst.c
+ sh ubconfc 12.0.0 < UnicodeData.txt > WCsubst.c
where UnicodeData.txt came from
- https://www.unicode.org/Public/12.0.0/ucd/UnicodeData-12.0.0d4.txt
+ https://www.unicode.org/Public/12.0.0/ucd/UnicodeData.txt
+
+Don't forget to mention the update in the User's Guide.
diff --git a/libraries/base/cbits/WCsubst.c b/libraries/base/cbits/WCsubst.c
index 9940405a8e..aa58dc244c 100644
--- a/libraries/base/cbits/WCsubst.c
+++ b/libraries/base/cbits/WCsubst.c
@@ -1,6 +1,6 @@
/*-------------------------------------------------------------------------
This is an automatically generated file: do not edit
-Generated by ubconfc at Tue Aug 14 10:04:18 UTC 2018
+Generated by ubconfc at Mon Feb 10 11:42:08 EST 2020
@generated
-------------------------------------------------------------------------*/
@@ -90,7 +90,7 @@ struct _charblock_
#define GENCAT_MN 2097152
#define GENCAT_LO 16384
#define MAX_UNI_CHAR 1114109
-#define NUM_BLOCKS 3349
+#define NUM_BLOCKS 3352
#define NUM_CONVBLOCKS 1326
#define NUM_SPACEBLOCKS 7
#define NUM_LAT1BLOCKS 63
@@ -1485,7 +1485,8 @@ static const struct _charblock_ allchars[]={
{5112, 6, &rule110},
{5120, 1, &rule7},
{5121, 620, &rule14},
- {5741, 2, &rule2},
+ {5741, 1, &rule13},
+ {5742, 1, &rule2},
{5743, 17, &rule14},
{5760, 1, &rule1},
{5761, 26, &rule14},
@@ -2799,8 +2800,8 @@ static const struct _charblock_ allchars[]={
{43444, 2, &rule124},
{43446, 4, &rule92},
{43450, 2, &rule124},
- {43452, 1, &rule92},
- {43453, 4, &rule124},
+ {43452, 2, &rule92},
+ {43454, 3, &rule124},
{43457, 13, &rule2},
{43471, 1, &rule91},
{43472, 10, &rule8},
@@ -3302,14 +3303,14 @@ static const struct _charblock_ allchars[]={
{71935, 1, &rule14},
{72096, 8, &rule14},
{72106, 39, &rule14},
- {72145, 1, &rule124},
- {72146, 1, &rule92},
- {72147, 1, &rule124},
+ {72145, 3, &rule124},
{72148, 4, &rule92},
{72154, 2, &rule92},
{72156, 4, &rule124},
{72160, 1, &rule92},
- {72161, 3, &rule14},
+ {72161, 1, &rule14},
+ {72162, 1, &rule2},
+ {72163, 1, &rule14},
{72164, 1, &rule124},
{72192, 1, &rule14},
{72193, 10, &rule92},
@@ -3545,7 +3546,8 @@ static const struct _charblock_ allchars[]={
{123184, 7, &rule92},
{123191, 7, &rule91},
{123200, 10, &rule8},
- {123214, 2, &rule14},
+ {123214, 1, &rule14},
+ {123215, 1, &rule13},
{123584, 44, &rule14},
{123628, 4, &rule92},
{123632, 10, &rule8},
@@ -3556,6 +3558,7 @@ static const struct _charblock_ allchars[]={
{125184, 34, &rule203},
{125218, 34, &rule204},
{125252, 7, &rule92},
+ {125259, 1, &rule91},
{125264, 10, &rule8},
{125278, 2, &rule2},
{126065, 59, &rule17},
diff --git a/libraries/base/cbits/ubconfc b/libraries/base/cbits/ubconfc
index 4d325866bb..cd29641c58 100644..100755
--- a/libraries/base/cbits/ubconfc
+++ b/libraries/base/cbits/ubconfc
@@ -17,6 +17,26 @@
# Output the file header
+VERSION="$1"
+if [ -z "$VERSION" ]; then
+ echo "Usage: $0 [unicode version]"
+ exit 1
+fi
+
+# This file is #included from GHC.Unicode and is used to define
+# GHC.Unicode.unicodeVersion.
+cat > $(dirname $0)/../include/UnicodeVersion.h <<EOF
+#if 0
+This is an automatically generated file: do not edit
+Generated by `basename $0` at `date`
+@generated
+#endif
+
+#define UNICODE_VERSION_NUMS [$(echo $VERSION | sed 's/\./,/g')]
+EOF
+
+exec > $(dirname $0)/WCsubst.c
+
echo "/*-------------------------------------------------------------------------"
echo "This is an automatically generated file: do not edit"
echo "Generated by `basename $0` at `date`"
diff --git a/libraries/base/include/UnicodeVersion.h b/libraries/base/include/UnicodeVersion.h
new file mode 100644
index 0000000000..14852682ac
--- /dev/null
+++ b/libraries/base/include/UnicodeVersion.h
@@ -0,0 +1,7 @@
+#if 0
+This is an automatically generated file: do not edit
+Generated by ubconfc at Mon Feb 10 11:42:08 EST 2020
+@generated
+#endif
+
+#define UNICODE_VERSION_NUMS [12,0,0]