summaryrefslogtreecommitdiff
path: root/tools/update-unicode-data.sh
diff options
context:
space:
mode:
Diffstat (limited to 'tools/update-unicode-data.sh')
-rwxr-xr-xtools/update-unicode-data.sh38
1 files changed, 38 insertions, 0 deletions
diff --git a/tools/update-unicode-data.sh b/tools/update-unicode-data.sh
new file mode 100755
index 000000000..2d751db15
--- /dev/null
+++ b/tools/update-unicode-data.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+
+if [ ! -d "$1" ]; then
+ echo "Usage $(basename "$0") UCD-directory [version]"
+ exit 1
+fi
+
+ucd=$(realpath "$1")
+version=$2
+glib_dir=$(git -C "$(dirname "$0")" rev-parse --show-toplevel)
+
+# shellcheck disable=SC2144 # we only want to match a file like this
+if ! [ -f "$ucd"/UnicodeData*.txt ] || ! [ -f "$ucd"/CaseFolding.*txt ]; then
+ echo "'$ucd' does not look like an Unicode Database directory";
+fi
+
+if [ -z "$version" ]; then
+ readme=("$ucd"/ReadMe*.txt)
+ version=$(sed -n "s,.*Version \([0-9.]\+\) of the Unicode Standard.*,\1,p" \
+ "${readme[@]}")
+
+ if [ -z "$version" ]; then
+ echo "Invalid version found"
+ exit 1
+ fi
+fi
+
+cd "$glib_dir" || exit 1
+
+echo "Updating generated code to Unicode version $version"
+set -xe
+
+(cd glib && ./gen-unicode-tables.pl -both "$version" "$ucd")
+glib/tests/gen-casefold-txt.py "$version" \
+ "$ucd"/CaseFolding*.txt > glib/tests/casefold.txt
+glib/tests/gen-casemap-txt.py "$version" \
+ "$ucd"/UnicodeData*.txt \
+ "$ucd"/SpecialCasing*.txt > glib/tests/casemap.txt