diff options
Diffstat (limited to 'admin/unidata')
-rw-r--r-- | admin/unidata/BidiBrackets.txt | 176 | ||||
-rw-r--r-- | admin/unidata/Makefile.in | 62 | ||||
-rw-r--r-- | admin/unidata/unidata-gen.el | 77 | ||||
-rw-r--r-- | admin/unidata/uvs.el | 4 |
4 files changed, 278 insertions, 41 deletions
diff --git a/admin/unidata/BidiBrackets.txt b/admin/unidata/BidiBrackets.txt new file mode 100644 index 00000000000..2a0cc0c7a69 --- /dev/null +++ b/admin/unidata/BidiBrackets.txt @@ -0,0 +1,176 @@ +# BidiBrackets-7.0.0.txt +# Date: 2014-01-21, 02:30:00 GMT [AG, LI, KW] +# +# Bidi_Paired_Bracket and Bidi_Paired_Bracket_Type Properties +# +# This file is a normative contributory data file in the Unicode +# Character Database. +# +# Copyright (c) 1991-2014 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Bidi_Paired_Bracket is a normative property of type Miscellaneous, +# which establishes a mapping between characters that are treated as +# bracket pairs by the Unicode Bidirectional Algorithm. +# +# Bidi_Paired_Bracket_Type is a normative property of type Enumeration, +# which classifies characters into opening and closing paired brackets +# for the purposes of the Unicode Bidirectional Algorithm. +# +# This file lists the set of code points with Bidi_Paired_Bracket_Type +# property values Open and Close. The set is derived from the character +# properties General_Category (gc), Bidi_Class (bc), Bidi_Mirrored (Bidi_M), +# and Bidi_Mirroring_Glyph (bmg), as follows: two characters, A and B, +# form a bracket pair if A has gc=Ps and B has gc=Pe, both have bc=ON and +# Bidi_M=Y, and bmg of A is B. Bidi_Paired_Bracket (bpb) maps A to B and +# vice versa, and their Bidi_Paired_Bracket_Type (bpt) property values are +# Open (o) and Close (c), respectively. +# +# For legacy reasons, the characters U+FD3E ORNATE LEFT PARENTHESIS and +# U+FD3F ORNATE RIGHT PARENTHESIS do not mirror in bidirectional display +# and therefore do not form a bracket pair. +# +# The Unicode property value stability policy guarantees that characters +# which have bpt=o or bpt=c also have bc=ON and Bidi_M=Y. As a result, an +# implementation can optimize the lookup of the Bidi_Paired_Bracket_Type +# property values Open and Close by restricting the processing to characters +# with bc=ON. +# +# The format of the file is three fields separated by a semicolon. +# Field 0: Unicode code point value, represented as a hexadecimal value +# Field 1: Bidi_Paired_Bracket property value, a code point value or <none> +# Field 2: Bidi_Paired_Bracket_Type property value, one of the following: +# o Open +# c Close +# n None +# The names of the characters in field 0 are given in comments at the end +# of each line. +# +# For information on bidirectional paired brackets, see UAX #9: Unicode +# Bidirectional Algorithm, at http://www.unicode.org/unicode/reports/tr9/ +# +# This file was originally created by Andrew Glass and Laurentiu Iancu +# for Unicode 6.3. + +0028; 0029; o # LEFT PARENTHESIS +0029; 0028; c # RIGHT PARENTHESIS +005B; 005D; o # LEFT SQUARE BRACKET +005D; 005B; c # RIGHT SQUARE BRACKET +007B; 007D; o # LEFT CURLY BRACKET +007D; 007B; c # RIGHT CURLY BRACKET +0F3A; 0F3B; o # TIBETAN MARK GUG RTAGS GYON +0F3B; 0F3A; c # TIBETAN MARK GUG RTAGS GYAS +0F3C; 0F3D; o # TIBETAN MARK ANG KHANG GYON +0F3D; 0F3C; c # TIBETAN MARK ANG KHANG GYAS +169B; 169C; o # OGHAM FEATHER MARK +169C; 169B; c # OGHAM REVERSED FEATHER MARK +2045; 2046; o # LEFT SQUARE BRACKET WITH QUILL +2046; 2045; c # RIGHT SQUARE BRACKET WITH QUILL +207D; 207E; o # SUPERSCRIPT LEFT PARENTHESIS +207E; 207D; c # SUPERSCRIPT RIGHT PARENTHESIS +208D; 208E; o # SUBSCRIPT LEFT PARENTHESIS +208E; 208D; c # SUBSCRIPT RIGHT PARENTHESIS +2308; 2309; o # LEFT CEILING +2309; 2308; c # RIGHT CEILING +230A; 230B; o # LEFT FLOOR +230B; 230A; c # RIGHT FLOOR +2329; 232A; o # LEFT-POINTING ANGLE BRACKET +232A; 2329; c # RIGHT-POINTING ANGLE BRACKET +2768; 2769; o # MEDIUM LEFT PARENTHESIS ORNAMENT +2769; 2768; c # MEDIUM RIGHT PARENTHESIS ORNAMENT +276A; 276B; o # MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B; 276A; c # MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C; 276D; o # MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D; 276C; c # MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E; 276F; o # HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F; 276E; c # HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770; 2771; o # HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771; 2770; c # HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772; 2773; o # LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773; 2772; c # LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774; 2775; o # MEDIUM LEFT CURLY BRACKET ORNAMENT +2775; 2774; c # MEDIUM RIGHT CURLY BRACKET ORNAMENT +27C5; 27C6; o # LEFT S-SHAPED BAG DELIMITER +27C6; 27C5; c # RIGHT S-SHAPED BAG DELIMITER +27E6; 27E7; o # MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7; 27E6; c # MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8; 27E9; o # MATHEMATICAL LEFT ANGLE BRACKET +27E9; 27E8; c # MATHEMATICAL RIGHT ANGLE BRACKET +27EA; 27EB; o # MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB; 27EA; c # MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC; 27ED; o # MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED; 27EC; c # MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE; 27EF; o # MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF; 27EE; c # MATHEMATICAL RIGHT FLATTENED PARENTHESIS +2983; 2984; o # LEFT WHITE CURLY BRACKET +2984; 2983; c # RIGHT WHITE CURLY BRACKET +2985; 2986; o # LEFT WHITE PARENTHESIS +2986; 2985; c # RIGHT WHITE PARENTHESIS +2987; 2988; o # Z NOTATION LEFT IMAGE BRACKET +2988; 2987; c # Z NOTATION RIGHT IMAGE BRACKET +2989; 298A; o # Z NOTATION LEFT BINDING BRACKET +298A; 2989; c # Z NOTATION RIGHT BINDING BRACKET +298B; 298C; o # LEFT SQUARE BRACKET WITH UNDERBAR +298C; 298B; c # RIGHT SQUARE BRACKET WITH UNDERBAR +298D; 2990; o # LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E; 298F; c # RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F; 298E; o # LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990; 298D; c # RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991; 2992; o # LEFT ANGLE BRACKET WITH DOT +2992; 2991; c # RIGHT ANGLE BRACKET WITH DOT +2993; 2994; o # LEFT ARC LESS-THAN BRACKET +2994; 2993; c # RIGHT ARC GREATER-THAN BRACKET +2995; 2996; o # DOUBLE LEFT ARC GREATER-THAN BRACKET +2996; 2995; c # DOUBLE RIGHT ARC LESS-THAN BRACKET +2997; 2998; o # LEFT BLACK TORTOISE SHELL BRACKET +2998; 2997; c # RIGHT BLACK TORTOISE SHELL BRACKET +29D8; 29D9; o # LEFT WIGGLY FENCE +29D9; 29D8; c # RIGHT WIGGLY FENCE +29DA; 29DB; o # LEFT DOUBLE WIGGLY FENCE +29DB; 29DA; c # RIGHT DOUBLE WIGGLY FENCE +29FC; 29FD; o # LEFT-POINTING CURVED ANGLE BRACKET +29FD; 29FC; c # RIGHT-POINTING CURVED ANGLE BRACKET +2E22; 2E23; o # TOP LEFT HALF BRACKET +2E23; 2E22; c # TOP RIGHT HALF BRACKET +2E24; 2E25; o # BOTTOM LEFT HALF BRACKET +2E25; 2E24; c # BOTTOM RIGHT HALF BRACKET +2E26; 2E27; o # LEFT SIDEWAYS U BRACKET +2E27; 2E26; c # RIGHT SIDEWAYS U BRACKET +2E28; 2E29; o # LEFT DOUBLE PARENTHESIS +2E29; 2E28; c # RIGHT DOUBLE PARENTHESIS +3008; 3009; o # LEFT ANGLE BRACKET +3009; 3008; c # RIGHT ANGLE BRACKET +300A; 300B; o # LEFT DOUBLE ANGLE BRACKET +300B; 300A; c # RIGHT DOUBLE ANGLE BRACKET +300C; 300D; o # LEFT CORNER BRACKET +300D; 300C; c # RIGHT CORNER BRACKET +300E; 300F; o # LEFT WHITE CORNER BRACKET +300F; 300E; c # RIGHT WHITE CORNER BRACKET +3010; 3011; o # LEFT BLACK LENTICULAR BRACKET +3011; 3010; c # RIGHT BLACK LENTICULAR BRACKET +3014; 3015; o # LEFT TORTOISE SHELL BRACKET +3015; 3014; c # RIGHT TORTOISE SHELL BRACKET +3016; 3017; o # LEFT WHITE LENTICULAR BRACKET +3017; 3016; c # RIGHT WHITE LENTICULAR BRACKET +3018; 3019; o # LEFT WHITE TORTOISE SHELL BRACKET +3019; 3018; c # RIGHT WHITE TORTOISE SHELL BRACKET +301A; 301B; o # LEFT WHITE SQUARE BRACKET +301B; 301A; c # RIGHT WHITE SQUARE BRACKET +FE59; FE5A; o # SMALL LEFT PARENTHESIS +FE5A; FE59; c # SMALL RIGHT PARENTHESIS +FE5B; FE5C; o # SMALL LEFT CURLY BRACKET +FE5C; FE5B; c # SMALL RIGHT CURLY BRACKET +FE5D; FE5E; o # SMALL LEFT TORTOISE SHELL BRACKET +FE5E; FE5D; c # SMALL RIGHT TORTOISE SHELL BRACKET +FF08; FF09; o # FULLWIDTH LEFT PARENTHESIS +FF09; FF08; c # FULLWIDTH RIGHT PARENTHESIS +FF3B; FF3D; o # FULLWIDTH LEFT SQUARE BRACKET +FF3D; FF3B; c # FULLWIDTH RIGHT SQUARE BRACKET +FF5B; FF5D; o # FULLWIDTH LEFT CURLY BRACKET +FF5D; FF5B; c # FULLWIDTH RIGHT CURLY BRACKET +FF5F; FF60; o # FULLWIDTH LEFT WHITE PARENTHESIS +FF60; FF5F; c # FULLWIDTH RIGHT WHITE PARENTHESIS +FF62; FF63; o # HALFWIDTH LEFT CORNER BRACKET +FF63; FF62; c # HALFWIDTH RIGHT CORNER BRACKET + +# EOF diff --git a/admin/unidata/Makefile.in b/admin/unidata/Makefile.in index 26c91bd3dcf..94504818d12 100644 --- a/admin/unidata/Makefile.in +++ b/admin/unidata/Makefile.in @@ -28,53 +28,55 @@ top_srcdir = @top_srcdir@ top_builddir = @top_builddir@ EMACS = ${top_builddir}/src/emacs -DSTDIR = ${top_srcdir}/lisp/international +unidir = ${top_srcdir}/lisp/international emacs = "${EMACS}" -batch --no-site-file --no-site-lisp -.PHONY: all compile install +.PHONY: all unifiles -all: ${top_srcdir}/src/macuvs.h ${DSTDIR}/charprop.el +all: ${top_srcdir}/src/macuvs.h unifiles -${top_srcdir}/src/macuvs.h: ${srcdir}/uvs.el ${srcdir}/IVD_Sequences.txt - ${EMACS} -batch -l "${srcdir}/uvs.el" \ +## Specify .elc as an order-only prereq so as to not needlessly rebuild +## target just because the .elc is missing. +## Same with charprop.el below. +${top_srcdir}/src/macuvs.h: ${srcdir}/uvs.el ${srcdir}/IVD_Sequences.txt | \ + ${srcdir}/uvs.elc + ${emacs} -L ${srcdir} -l uvs \ --eval '(uvs-print-table-ivd (unmsys--file-name "${srcdir}/IVD_Sequences.txt") "Adobe-Japan1")' \ > $@ -.el.elc: +%.elc: %.el ${emacs} -f batch-byte-compile $< unidata.txt: ${srcdir}/UnicodeData.txt - sed -e 's/\([^;]*\);\(.*\)/(#x\1 "\2")/' -e 's/;/" "/g' < ${srcdir}/UnicodeData.txt > $@ + sed -e 's/\([^;]*\);\(.*\)/(#x\1 "\2")/' -e 's/;/" "/g' < $< > $@ -compile: ${srcdir}/unidata-gen.elc +FORCE = +FORCE: +.PHONY: FORCE -## Depend on .el rather than .elc so as not to needlessly rebuild -## uni-*.el files just because .elc is missing. -## Same for UnicodeData.txt v unidata.txt. -${DSTDIR}/charprop.el: ${srcdir}/unidata-gen.el ${srcdir}/UnicodeData.txt - ${MAKE} ${MFLAGS} compile unidata.txt EMACS="${EMACS}" +${unidir}/charprop.el: ${FORCE} ${srcdir}/unidata-gen.el \ + ${srcdir}/UnicodeData.txt ${srcdir}/BidiMirroring.txt \ + ${srcdir}/BidiBrackets.txt | \ + ${srcdir}/unidata-gen.elc unidata.txt -if [ -f "$@" ]; then \ - cd ${DSTDIR} && chmod +w charprop.el `sed -n 's/^;; FILE: //p' < charprop.el`; \ + cd ${unidir} && chmod +w charprop.el `sed -n 's/^;; FILE: //p' < charprop.el`; \ fi ${emacs} -L ${srcdir} -l unidata-gen -f unidata-gen-files \ - ${srcdir} "${DSTDIR}" + ${srcdir} "${unidir}" -## Like the above, but generate in PWD rather than lisp/international. -charprop.el: ${srcdir}/unidata-gen.elc unidata.txt - ${emacs} -L ${srcdir} -l unidata-gen -f unidata-gen-files \ - ${srcdir} +## Check for deleted uni- files, and if any such, force a rebuild. +## Perhaps a more elegant way would be for the previous rule +## to generate a Makefile fragment explicitly listing the uni- files, +## which this file could include. If no fragment, rebuild everything. +unifiles: ${unidir}/charprop.el + for f in `sed -n 's/^;; FILE: //p' < $<`; do \ + [ -f ${unidir}/$$f ] || { ${MAKE} $< FORCE=FORCE || exit 1; break; };\ + done .PHONY: clean bootstrap-clean distclean maintainer-clean extraclean -install: charprop.el - cp charprop.el ${DSTDIR} - cp `sed -n 's/^;; FILE: //p' < charprop.el` ${DSTDIR} - clean: - if test -f charprop.el; then \ - rm -f `sed -n 's/^;; FILE: //p' < charprop.el`; \ - fi - rm -f charprop.el ${srcdir}/*.elc unidata.txt + rm -f ${srcdir}/*.elc unidata.txt bootstrap-clean: clean @@ -88,7 +90,7 @@ maintainer-clean: distclean ## Cf leim/ja-dic (which is much slower). extraclean: rm -f ${top_srcdir}/src/macuvs.h - if test -f ${DSTDIR}/charprop.el; then \ - (cd ${DSTDIR} && rm -f `sed -n 's/^;; FILE: //p' < charprop.el`); \ - rm -f ${DSTDIR}/charprop.el; \ + if test -f ${unidir}/charprop.el; then \ + (cd ${unidir} && rm -f `sed -n 's/^;; FILE: //p' < charprop.el`); \ + rm -f ${unidir}/charprop.el; \ fi diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el index fb9b6dccc72..d10b260b470 100644 --- a/admin/unidata/unidata-gen.el +++ b/admin/unidata/unidata-gen.el @@ -88,6 +88,8 @@ ;; CHAR-or-RANGE: a character code or a cons of character codes ;; PROPn: string representing the nth property value +(eval-when-compile (require 'cl-lib)) + (defvar unidata-list nil) ;; Name of the directory containing files of Unicode Character Database. @@ -152,7 +154,8 @@ ;; PROP: character property ;; INDEX: index to each element of unidata-list for PROP. ;; It may be a function that generates an alist of character codes -;; vs. the corresponding property values. +;; vs. the corresponding property values. Currently, only character +;; codepoints or symbol values are supported in this case. ;; GENERATOR: function to generate a char-table ;; FILENAME: filename to store the char-table ;; DOCSTRING: docstring for the property @@ -271,7 +274,23 @@ is the character itself." "Unicode bidi-mirroring characters. Property value is a character that has the corresponding mirroring image or nil. The value nil means that the actual property value of a character -is the character itself."))) +is the character itself.") + (paired-bracket + unidata-gen-brackets-list unidata-gen-table-character "uni-brackets.el" + "Unicode bidi paired-bracket characters. +Property value is the paired bracket character, or nil. +The value nil means that the character is neither an opening nor +a closing paired bracket." + string) + (bracket-type + unidata-gen-bracket-type-list unidata-gen-table-symbol "uni-brackets.el" + "Unicode bidi paired-bracket type. +Property value is a symbol `o' (Open), `c' (Close), or `n' (None)." + unidata-describe-bidi-bracket-type + n + ;; The order of elements must be in sync with bidi_bracket_type_t + ;; in src/dispextern.h. + (n o c)))) ;; Functions to access the above data. (defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist))) @@ -449,7 +468,10 @@ is the character itself."))) (unidata-encode-val val-list (nth 2 elm))) (set-char-table-range table (cons (car elm) (nth 1 elm)) (nth 2 elm))) - (setq tail unidata-list) + (if (functionp prop-idx) + (setq tail (funcall prop-idx) + prop-idx 1) + (setq tail unidata-list)) (while tail (setq elt (car tail) tail (cdr tail)) (setq range (car elt) @@ -923,11 +945,7 @@ is the character itself."))) (dotimes (i (length vec)) (dolist (elt (aref vec i)) (if (symbolp elt) - (let ((slot (assq elt word-list))) - (if slot - (setcdr slot (1+ (cdr slot))) - (setcdr word-list - (cons (cons elt 1) (cdr word-list)))))))) + (cl-incf (alist-get elt (cdr word-list) 0))))) (set-char-table-range table (cons start limit) vec)))))) (setq word-list (sort (cdr word-list) #'(lambda (x y) (> (cdr x) (cdr y))))) @@ -1159,6 +1177,12 @@ is the character itself."))) (string ?')))) val " ")) +(defun unidata-describe-bidi-bracket-type (val) + (cdr (assq val + '((n . "Not a paired bracket character.") + (o . "Opening paired bracket character.") + (c . "Closing paired bracket character."))))) + (defun unidata-gen-mirroring-list () (let ((head (list nil)) tail) @@ -1172,6 +1196,36 @@ is the character itself."))) (setq tail (setcdr tail (list (list char mirror))))))) (cdr head))) +(defun unidata-gen-brackets-list () + (let ((head (list nil)) + tail) + (with-temp-buffer + (insert-file-contents (expand-file-name "BidiBrackets.txt" unidata-dir)) + (goto-char (point-min)) + (setq tail head) + (while (re-search-forward + "^\\([0-9A-F]+\\);\\s +\\([0-9A-F]+\\);\\s +\\([oc]\\)" + nil t) + (let ((char (string-to-number (match-string 1) 16)) + (paired (match-string 2))) + (setq tail (setcdr tail (list (list char paired))))))) + (cdr head))) + +(defun unidata-gen-bracket-type-list () + (let ((head (list nil)) + tail) + (with-temp-buffer + (insert-file-contents (expand-file-name "BidiBrackets.txt" unidata-dir)) + (goto-char (point-min)) + (setq tail head) + (while (re-search-forward + "^\\([0-9A-F]+\\);\\s +\\([0-9A-F]+\\);\\s +\\([oc]\\)" + nil t) + (let ((char (string-to-number (match-string 1) 16)) + (type (match-string 3))) + (setq tail (setcdr tail (list (list char type))))))) + (cdr head))) + ;; Verify if we can retrieve correct values from the generated ;; char-tables. ;; @@ -1220,7 +1274,9 @@ is the character itself."))) ((eq generator 'unidata-gen-table-decomposition) (setq val1 (unidata-split-decomposition val1)))) (cond ((eq prop 'decomposition) - (setq val1 (list char))))) + (setq val1 (list char))) + ((eq prop 'bracket-type) + (setq val1 'n)))) (when (>= char check) (message "%S %04X" prop check) (setq check (+ check #x400))) @@ -1263,6 +1319,9 @@ is the character itself."))) (describer (unidata-prop-describer prop)) (default-value (unidata-prop-default prop)) (val-list (unidata-prop-val-list prop)) + ;; Avoid creating backup files for those uni-*.el files + ;; that hold more than one table. + (backup-inhibited t) table) ;; Filename in this comment line is extracted by sed in ;; Makefile. diff --git a/admin/unidata/uvs.el b/admin/unidata/uvs.el index 7559a566974..4a17c726712 100644 --- a/admin/unidata/uvs.el +++ b/admin/unidata/uvs.el @@ -198,8 +198,8 @@ corresponding number." (let ((uvs-alist (with-temp-buffer (insert-file-contents filename) - (setq uvs-alist (uvs-alist-from-ivd collection-id - sequence-id-to-glyph-func))))) + (uvs-alist-from-ivd collection-id + sequence-id-to-glyph-func)))) (princ "/* Automatically generated by uvs.el. */\n") (princ (format "static const unsigned char mac_uvs_table_%s_bytes[] =\n {\n" |