summaryrefslogtreecommitdiff
path: root/tools/update-unicode-data.sh
blob: 2d751db151274dab825edd03bbcd74cd8bca31f6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env bash

if [ ! -d "$1" ]; then
    echo "Usage $(basename "$0") UCD-directory [version]"
    exit 1
fi

ucd=$(realpath "$1")
version=$2
glib_dir=$(git -C "$(dirname "$0")" rev-parse --show-toplevel)

# shellcheck disable=SC2144 # we only want to match a file like this
if ! [ -f "$ucd"/UnicodeData*.txt ] || ! [ -f "$ucd"/CaseFolding.*txt ]; then
    echo "'$ucd' does not look like an Unicode Database directory";
fi

if [ -z "$version" ]; then
    readme=("$ucd"/ReadMe*.txt)
    version=$(sed -n "s,.*Version \([0-9.]\+\) of the Unicode Standard.*,\1,p" \
        "${readme[@]}")

    if [ -z "$version" ]; then
        echo "Invalid version found"
        exit 1
    fi
fi

cd "$glib_dir" || exit 1

echo "Updating generated code to Unicode version $version"
set -xe

(cd glib && ./gen-unicode-tables.pl -both "$version" "$ucd")
glib/tests/gen-casefold-txt.py "$version" \
    "$ucd"/CaseFolding*.txt > glib/tests/casefold.txt
glib/tests/gen-casemap-txt.py "$version" \
    "$ucd"/UnicodeData*.txt \
    "$ucd"/SpecialCasing*.txt > glib/tests/casemap.txt