summaryrefslogtreecommitdiff
path: root/lisp/international
diff options
context:
space:
mode:
authorMichal Nazarewicz <mina86@mina86.com>2017-02-17 16:36:44 +0100
committerMichal Nazarewicz <mina86@mina86.com>2017-02-17 16:36:44 +0100
commit9f9863e50298a3506165cc1f056ab3238f37cb9f (patch)
tree50939d8bb4ff46baf97fc2dba7fe3455213a0ec0 /lisp/international
parent630e2d2e6aeba60f178c6ef2b283622070b873b3 (diff)
downloademacs-9f9863e50298a3506165cc1f056ab3238f37cb9f.tar.gz
Fix build failure caused by ‘Generate upcase and downcase tables from Unicode’
The [5ec3a584: Generate upcase and downcase tables from Unicode data] commit broke bootstrap from a truly clean tree (e.g. a fresh clone or one created with ‘make extraclean’), see <http://hydra.nixos.org/build/48774928>. The failure was caused by characters.el trying to read Unicode property tables which aren’t available so early in the build process. Wrap the part that requires Unicode property tables in a condition checking if those are available. If they aren’t they case and syntax tables won’t be fully set but later on, the characters.el file will be evaluated again and this time with Unicode properties available so final Emacs ends up with the exact same case and syntax tables.
Diffstat (limited to 'lisp/international')
-rw-r--r--lisp/international/characters.el132
1 files changed, 68 insertions, 64 deletions
diff --git a/lisp/international/characters.el b/lisp/international/characters.el
index 9e993e7060a..3eb287fd963 100644
--- a/lisp/international/characters.el
+++ b/lisp/international/characters.el
@@ -665,70 +665,74 @@ with L, LRE, or LRO Unicode bidi character type.")
;; Combining marks
(modify-category-entry '(#x20d0 . #x20ff) ?^)
- ;; Set all Letter, uppercase; Letter, lowercase and Letter, titlecase syntax
- ;; to word.
- (let ((syn-tab (standard-syntax-table)))
- (map-char-table
- (lambda (ch cat)
- (when (memq cat '(Lu Ll Lt))
- (modify-syntax-entry ch "w " syn-tab)))
- (unicode-property-table-internal 'general-category))
-
- ;; Ⅰ through Ⅻ had word syntax in the past so set it here as well.
- ;; The general category of those characters is Number, Letter.
- (modify-syntax-entry '(#x2160 . #x216b) "w " syn-tab)
-
- ;; ⓐ through ⓩ are symbols, other according to Unicode but Emacs set
- ;; their syntax to word in the past so keep backwards compatibility.
- (modify-syntax-entry '(#x24D0 . #x24E9) "w " syn-tab))
-
- ;; Set downcase and upcase from Unicode properties
-
- ;; In some languages, such as Turkish, U+0049 LATIN CAPITAL LETTER I and
- ;; U+0131 LATIN SMALL LETTER DOTLESS I make a case pair, and so do U+0130
- ;; LATIN CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
-
- ;; We used to set up half of those correspondence unconditionally, but that
- ;; makes searches slow. So now we don't set up either half of these
- ;; correspondences by default.
-
- ;; (set-downcase-syntax ?İ ?i tbl)
- ;; (set-upcase-syntax ?I ?ı tbl)
-
- (let ((map-unicode-property
- (lambda (property func)
- (map-char-table
- (lambda (ch cased)
- ;; ASCII characters skipped due to reasons outlined above. As of
- ;; Unicode 9.0, this exception affects the following:
- ;; lc(U+0130 İ) = i
- ;; uc(U+0131 ı) = I
- ;; uc(U+017F ſ) = S
- ;; uc(U+212A K) = k
- (when (> cased 127)
- (let ((end (if (consp ch) (cdr ch) ch)))
- (setq ch (max 128 (if (consp ch) (car ch) ch)))
- (while (<= ch end)
- (funcall func ch cased)
- (setq ch (1+ ch))))))
- (unicode-property-table-internal property))))
- (down tbl)
- (up (case-table-get-table tbl 'up)))
-
- ;; This works on an assumption that if toUpper(x) != x then toLower(x) ==
- ;; x (and the opposite for toLower/toUpper). This doesn’t hold for title
- ;; case characters but those incorrect mappings will be overwritten later.
- (funcall map-unicode-property 'uppercase
- (lambda (lc uc) (aset down lc lc) (aset up uc uc)))
- (funcall map-unicode-property 'lowercase
- (lambda (uc lc) (aset down lc lc) (aset up uc uc)))
-
- ;; Now deal with the actual mapping. This will correctly assign casing for
- ;; title-case characters.
- (funcall map-unicode-property 'uppercase
- (lambda (lc uc) (aset up lc uc) (aset up uc uc)))
- (funcall map-unicode-property 'lowercase
- (lambda (uc lc) (aset down uc lc) (aset down lc lc))))
+ (let ((gc (unicode-property-table-internal 'general-category))
+ (syn-table (standard-syntax-table)))
+ ;; In early bootstrapping Unicode tables are not available so we need to
+ ;; skip this step in those cases.
+ (when gc
+ ;; Set all Letter, uppercase; Letter, lowercase and Letter,
+ ;; titlecase syntax to word.
+ (map-char-table
+ (lambda (ch cat)
+ (when (memq cat '(Lu Ll Lt))
+ (modify-syntax-entry ch "w " syn-table)))
+ gc)
+ ;; Ⅰ through Ⅻ had word syntax in the past so set it here as well.
+ ;; The general category of those characters is Number, Letter.
+ (modify-syntax-entry '(#x2160 . #x216b) "w " syn-table)
+
+ ;; ⓐ through ⓩ are symbols, other according to Unicode but Emacs set
+ ;; their syntax to word in the past so keep backwards compatibility.
+ (modify-syntax-entry '(#x24D0 . #x24E9) "w " syn-table)
+
+ ;; Set downcase and upcase from Unicode properties
+
+ ;; In some languages, such as Turkish, U+0049 LATIN CAPITAL LETTER I and
+ ;; U+0131 LATIN SMALL LETTER DOTLESS I make a case pair, and so do U+0130
+ ;; LATIN CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
+
+ ;; We used to set up half of those correspondence unconditionally, but
+ ;; that makes searches slow. So now we don't set up either half of these
+ ;; correspondences by default.
+
+ ;; (set-downcase-syntax ?İ ?i tbl)
+ ;; (set-upcase-syntax ?I ?ı tbl)
+
+ (let ((map-unicode-property
+ (lambda (property func)
+ (map-char-table
+ (lambda (ch cased)
+ ;; ASCII characters skipped due to reasons outlined above. As
+ ;; of Unicode 9.0, this exception affects the following:
+ ;; lc(U+0130 İ) = i
+ ;; uc(U+0131 ı) = I
+ ;; uc(U+017F ſ) = S
+ ;; uc(U+212A K) = k
+ (when (> cased 127)
+ (let ((end (if (consp ch) (cdr ch) ch)))
+ (setq ch (max 128 (if (consp ch) (car ch) ch)))
+ (while (<= ch end)
+ (funcall func ch cased)
+ (setq ch (1+ ch))))))
+ (unicode-property-table-internal property))))
+ (down tbl)
+ (up (case-table-get-table tbl 'up)))
+
+ ;; This works on an assumption that if toUpper(x) != x then toLower(x)
+ ;; == x (and the opposite for toLower/toUpper). This doesn’t hold for
+ ;; title case characters but those incorrect mappings will be
+ ;; overwritten later.
+ (funcall map-unicode-property 'uppercase
+ (lambda (lc uc) (aset down lc lc) (aset up uc uc)))
+ (funcall map-unicode-property 'lowercase
+ (lambda (uc lc) (aset down lc lc) (aset up uc uc)))
+
+ ;; Now deal with the actual mapping. This will correctly assign casing
+ ;; for title-case characters.
+ (funcall map-unicode-property 'uppercase
+ (lambda (lc uc) (aset up lc uc) (aset up uc uc)))
+ (funcall map-unicode-property 'lowercase
+ (lambda (uc lc) (aset down uc lc) (aset down lc lc))))))
;; Clear out the extra slots so that they will be recomputed from the main
;; (downcase) table and upcase table. Since we’re side-stepping the usual