diff options
Diffstat (limited to 'tools/gen-script-table.pl')
-rwxr-xr-x | tools/gen-script-table.pl | 70 |
1 files changed, 70 insertions, 0 deletions
diff --git a/tools/gen-script-table.pl b/tools/gen-script-table.pl new file mode 100755 index 00000000..6b524ef7 --- /dev/null +++ b/tools/gen-script-table.pl @@ -0,0 +1,70 @@ +#!/usr/bin/perl -w +# +# Script to convert http://www.unicode.org/Public/UNIDATA/Scripts.txt +# into a machine-readable table. +# +###################################################################### + +if (@ARGV != 1) { + die "Usage: gen-script-table.pl Scripts.txt > pango-script-table.h\n"; +} + +open IN, $ARGV[0] || die "Cannot open $ARGV[0]: $!\n"; + +my @ranges; +my $file; + +while (<IN>) { + if (/^\#\s+(Scripts-.*.txt)/) { + $file = $1; + } + + s/#.*//; + next if /^\s*$/; + if (!/^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+([A-Z_]+)\s+$/) { + die "Cannot parse line: $_\n"; + } + + if (defined $2) { + push @ranges, [ hex $1, hex $2, $3 ]; + } else { + push @ranges, [ hex $1, hex $1, $3 ]; + } +} + +$date = gmtime; + +print <<"EOT"; +/* pango-script-table.h: Generated by gen-script-table.pl + * + * Date: $date + * Source: $file + * + * Do not edit. + */ +static const struct { + gunichar start; + guint16 chars; + guint16 script; /* PangoScript */ +} pango_script_table[] = { +EOT + +@ranges = sort { $a->[0] <=> $b->[0] } @ranges; + +for (my $i = 0; $i <= $#ranges; $i++) { + my $start = $ranges[$i]->[0]; + my $end = $ranges[$i]->[1]; + my $script = $ranges[$i]->[2]; + + while ($i <= $#ranges - 1 && + $ranges[$i + 1]->[0] == $end + 1 && + $ranges[$i + 1]->[2] eq $script) { + $i++; + $end = $ranges[$i]->[1]; + } + + printf " { %#06x, %5d, PANGO_SCRIPT_%s },\n", $start, $end - $start + 1, $script; +} + +printf "};\n"; + |