#!/usr/bin/perl -w # # Script to convert http://www.unicode.org/Public/UNIDATA/Scripts.txt # into a machine-readable table. # ###################################################################### if (@ARGV != 1) { die "Usage: gen-script-table.pl Scripts.txt > gscripttable.h\n"; } open IN, $ARGV[0] || die "Cannot open $ARGV[0]: $!\n"; my @ranges; my $file; my $easy_range; my $i; my $start; my $end; my $script; while () { if (/^\#\s+(Scripts-.*.txt)/) { $file = $1; } s/#.*//; next if /^\s*$/; if (!/^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*([A-Za-z_]+)\s*$/) { die "Cannot parse line: '$_'\n"; } if (defined $2) { push @ranges, [ hex $1, hex $2, uc $3 ]; } else { push @ranges, [ hex $1, hex $1, uc $3 ]; } } @ranges = sort { $a->[0] <=> $b->[0] } @ranges; $date = gmtime; print <<"EOT"; /* gscripttable.h: Generated by gen-script-table.pl * * Date: $date * Source: $file * * Do not edit. */ EOT $easy_range = 0x2000; print <<"EOT"; #define G_EASY_SCRIPTS_RANGE $easy_range static const guchar g_script_easy_table[$easy_range] = { EOT $i = 0; $end = -1; for (my $c = 0; $c < $easy_range; $c++) { if ($c % 3 == 0) { printf "\n "; } if ($c > $end) { $start = $ranges[$i]->[0]; $end = $ranges[$i]->[1]; $script = $ranges[$i]->[2]; $i++; } if ($c < $start) { printf " G_UNICODE_SCRIPT_UNKNOWN,"; } else { printf " G_UNICODE_SCRIPT_%s,", $script; } } if ($end >= $easy_range) { $i--; $ranges[$i]->[0] = $easy_range; } print <<"EOT"; }; static const struct { gunichar start; guint16 chars; guint16 script; } g_script_table[] = { EOT for (; $i <= $#ranges; $i++) { $start = $ranges[$i]->[0]; $end = $ranges[$i]->[1]; $script = $ranges[$i]->[2]; while ($i <= $#ranges - 1 && $ranges[$i + 1]->[0] == $end + 1 && $ranges[$i + 1]->[2] eq $script) { $i++; $end = $ranges[$i]->[1]; } printf " { %#06x, %5d, G_UNICODE_SCRIPT_%s },\n", $start, $end - $start + 1, $script; } printf "};\n";