diff options
Diffstat (limited to 'lisp/international/characters.el')
-rw-r--r-- | lisp/international/characters.el | 116 |
1 files changed, 74 insertions, 42 deletions
diff --git a/lisp/international/characters.el b/lisp/international/characters.el index 97bf31acfc3..5aefda23283 100644 --- a/lisp/international/characters.el +++ b/lisp/international/characters.el @@ -116,11 +116,11 @@ Base characters (Unicode General Category L,N,P,S,Zs)") Combining diacritic or mark (Unicode General Category M)") ;; bidi types -(define-category ?R "Right-to-left (strong) +(define-category ?R "Strong R2L Characters with \"strong\" right-to-left directionality, i.e. with R, AL, RLE, or RLO Unicode bidi character type.") -(define-category ?L "Left-to-right (strong) +(define-category ?L "Strong L2R Characters with \"strong\" left-to-right directionality, i.e. with L, LRE, or LRO Unicode bidi character type.") @@ -214,6 +214,9 @@ with L, LRE, or LRO Unicode bidi character type.") (modify-category-entry '(#x31F0 . #x31FF) ?K) (modify-category-entry '(#x30A0 . #x30FA) ?\|) (modify-category-entry #x30FF ?\|) +(modify-category-entry '(#x1AFF0 . #x1B000) ?K) +(modify-category-entry '(#x1B120 . #x1B122) ?K) +(modify-category-entry '(#x1B164 . #x1B167) ?K) ;; Hiragana block (modify-category-entry '(#x3040 . #x309F) ?H) @@ -221,8 +224,12 @@ with L, LRE, or LRO Unicode bidi character type.") (modify-category-entry #x309F ?\|) (modify-category-entry #x30A0 ?H) (modify-category-entry #x30FC ?H) +(modify-category-entry #x1B001 ?H) +(modify-category-entry #x1B11F ?H) +(modify-category-entry '(#x1B150 . #x1B152) ?H) +(modify-category-entry '(#x1B002 . #x1B11E) ?H) ; Hentiagana -(modify-category-entry '(#x1B000 . #x1B1FF) ?j) +(modify-category-entry '(#x1AFF0 . #x1B1FF) ?j) ;; JISX0208 @@ -295,7 +302,7 @@ with L, LRE, or LRO Unicode bidi character type.") (map-charset-chars #'modify-category-entry (car charsets) ?b) (setq charsets (cdr charsets)))) (modify-category-entry '(#x600 . #x6ff) ?b) -(modify-category-entry '(#x8a0 . #x8ff) ?b) +(modify-category-entry '(#x870 . #x8ff) ?b) (modify-category-entry '(#xfb50 . #xfdff) ?b) (modify-category-entry '(#xfe70 . #xfefe) ?b) @@ -306,7 +313,9 @@ with L, LRE, or LRO Unicode bidi character type.") ;; Ethiopic character set (modify-category-entry '(#x1200 . #x1399) ?e) -(modify-category-entry '(#x2d80 . #x2dde) ?e) +(modify-category-entry '(#X2D80 . #X2DDE) ?e) +(modify-category-entry '(#xAB01 . #xAB2E) ?e) +(modify-category-entry '(#x1E7E0 . #x1E7FE) ?e) (let ((chars '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨))) (while chars (modify-syntax-entry (car chars) ".") @@ -580,6 +589,12 @@ with L, LRE, or LRO Unicode bidi character type.") (modify-category-entry c ?l) (setq c (1+ c))) + ;; Latin Extended-G + (setq c #x1DF00) + (while (<= c #x1DFFF) + (modify-category-entry c ?l) + (setq c (1+ c))) + ;; Greek (modify-category-entry '(#x0370 . #x03FF) ?g) @@ -1016,7 +1031,7 @@ with L, LRE, or LRO Unicode bidi character type.") (#x0D41 . #x0D44) (#x0D4D . #x0D4D) (#x0D62 . #x0D63) - (#x0D81 . #x0D81) + (#x0D81 . #x0D81) (#x0DCA . #x0DCA) (#x0DD2 . #x0DD6) (#x0E31 . #x0E31) @@ -1045,7 +1060,7 @@ with L, LRE, or LRO Unicode bidi character type.") (#x1085 . #x1086) (#x108D . #x108D) (#x109D . #x109D) - (#x1160 . #x11FF) + (#x1160 . #x11FF) (#x135D . #x135F) (#x1712 . #x1714) (#x1732 . #x1734) @@ -1111,7 +1126,7 @@ with L, LRE, or LRO Unicode bidi character type.") (#xA806 . #xA806) (#xA80B . #xA80B) (#xA825 . #xA826) - (#xA82C . #xA82C) + (#xA82C . #xA82C) (#xA8C4 . #xA8C5) (#xA8E0 . #xA8F1) (#xA926 . #xA92D) @@ -1136,7 +1151,7 @@ with L, LRE, or LRO Unicode bidi character type.") (#xABE5 . #xABE5) (#xABE8 . #xABE8) (#xABED . #xABED) - (#xD7B0 . #xD7FB) + (#xD7B0 . #xD7FB) (#xFB1E . #xFB1E) (#xFE00 . #xFE0F) (#xFE20 . #xFE2F) @@ -1148,7 +1163,7 @@ with L, LRE, or LRO Unicode bidi character type.") (#x10A01 . #x10A0F) (#x10A38 . #x10A3F) (#x10AE5 . #x10AE6) - (#x10EAB . #x10EAC) + (#x10EAB . #x10EAC) (#x11001 . #x11001) (#x11038 . #x11046) (#x1107F . #x11081) @@ -1162,7 +1177,7 @@ with L, LRE, or LRO Unicode bidi character type.") (#x11180 . #x11181) (#x111B6 . #x111BE) (#x111CA . #x111CC) - (#x111CF . #x111CF) + (#x111CF . #x111CF) (#x1122F . #x11231) (#x11234 . #x11234) (#x11236 . #x11237) @@ -1194,9 +1209,9 @@ with L, LRE, or LRO Unicode bidi character type.") (#x1171D . #x1171F) (#x11722 . #x11725) (#x11727 . #x1172B) - (#x1193B . #x1193C) - (#x1193E . #x1193E) - (#x11943 . #x11943) + (#x1193B . #x1193C) + (#x1193E . #x1193E) + (#x11943 . #x11943) (#x11C30 . #x11C36) (#x11C38 . #x11C3D) (#x11C92 . #x11CA7) @@ -1206,7 +1221,7 @@ with L, LRE, or LRO Unicode bidi character type.") (#x16AF0 . #x16AF4) (#x16B30 . #x16B36) (#x16F8F . #x16F92) - (#x16FE4 . #x16FE4) + (#x16FE4 . #x16FE4) (#x1BC9D . #x1BC9E) (#x1BCA0 . #x1BCA3) (#x1D167 . #x1D169) @@ -1280,18 +1295,19 @@ with L, LRE, or LRO Unicode bidi character type.") (#xFF01 . #xFF60) (#xFFE0 . #xFFE6) (#x16FE0 . #x16FE4) - (#x16FF0 . #x16FF1) + (#x16FF0 . #x16FF1) (#x17000 . #x187F7) (#x18800 . #x18AFF) - (#x18B00 . #x18CD5) + (#x18B00 . #x18CD5) + (#x1AFF0 . #x1AFFF) (#x1B000 . #x1B152) - (#x1B164 . #x1B167) - (#x1B170 . #x1B2FB) + (#x1B164 . #x1B167) + (#x1B170 . #x1B2FB) (#x1F004 . #x1F004) (#x1F0CF . #x1F0CF) (#x1F18E . #x1F18E) (#x1F191 . #x1F19A) - (#x1F1AD . #x1F1AD) + (#x1F1AD . #x1F1AD) (#x1F200 . #x1F320) (#x1F32D . #x1F335) (#x1F337 . #x1F37C) @@ -1316,27 +1332,26 @@ with L, LRE, or LRO Unicode bidi character type.") (#x1F680 . #x1F6C5) (#x1F6CC . #x1F6CC) (#x1F6D0 . #x1F6D2) - (#x1F6D5 . #x1F6D7) + (#x1F6D5 . #x1F6D7) + (#x1F6DD . #x1F6DF) (#x1F6EB . #x1F6EC) (#x1F6F4 . #x1F6FC) - (#x1F7E0 . #x1F7EB) + (#x1F7E0 . #x1F7F0) (#x1F90C . #x1F93A) - (#x1F93C . #x1F945) - (#x1F947 . #x1F978) - (#x1F97A . #x1F9CB) - (#x1F9A5 . #x1F9AA) - (#x1F9AE . #x1F9CA) - (#x1F9CD . #x1F9FF) - (#x1FA00 . #x1FA53) - (#x1FA60 . #x1FA6D) - (#x1FA70 . #x1FA74) - (#x1FA78 . #x1FA7A) - (#x1FA80 . #x1FA86) - (#x1FA90 . #x1FAA8) - (#x1FAB0 . #x1FAB6) - (#x1FAC0 . #x1FAC2) - (#x1FAD0 . #x1FAD6) - (#x1FB00 . #x1FB92) + (#x1F93C . #x1F945) + (#x1F947 . #x1F9FF) + (#x1FA00 . #x1FA53) + (#x1FA60 . #x1FA6D) + (#x1FA70 . #x1FA74) + (#x1FA78 . #x1FA7C) + (#x1FA80 . #x1FA86) + (#x1FA90 . #x1FAAC) + (#x1FAB0 . #x1FABA) + (#x1FAC0 . #x1FAC5) + (#x1FAD0 . #x1FAD9) + (#x1FAE0 . #x1FAE7) + (#x1FAF0 . #x1FAF6) + (#x1FB00 . #x1FB92) (#x20000 . #x2FFFF) (#x30000 . #x3FFFF)))) (dolist (elt l) @@ -1401,7 +1416,7 @@ with L, LRE, or LRO Unicode bidi character type.") (defun use-default-char-width-table () "Internal use only. -Setup char-width-table appropriate for non-CJK language environment." +Setup `char-width-table' appropriate for non-CJK language environment." (while (char-table-parent char-width-table) (setq char-width-table (char-table-parent char-width-table)))) @@ -1413,8 +1428,12 @@ Setup char-width-table appropriate for non-CJK language environment." (if dump-mode ;; While dumping, we can't use require, and international is not ;; in load-path. - (load "international/charscript") - (require 'charscript)) + (progn + (load "international/charscript") + (load "international/emoji-zwj")) + (progn + (require 'charscript) + (require 'emoji-zwj))) (map-charset-chars (lambda (range _ignore) @@ -1521,6 +1540,9 @@ option `glyphless-char-display'." ((eq target 'c1-control) (glyphless-set-char-table-range glyphless-char-display #x80 #x9F method)) + ((eq target 'variation-selectors) + (glyphless-set-char-table-range glyphless-char-display + #xFE00 #xFE0F method)) ((eq target 'format-control) (when unicode-category-table (map-char-table @@ -1556,6 +1578,7 @@ option `glyphless-char-display'." ;;; Control of displaying glyphless characters. (defcustom glyphless-char-display-control '((format-control . thin-space) + (variation-selectors . thin-space) (no-font . hex-code)) "List of directives to control display of glyphless characters. @@ -1571,6 +1594,9 @@ GROUP must be one of these symbols: such as U+200C (ZWNJ), U+200E (LRM), but excluding characters that have graphic images, such as U+00AD (SHY). + `variation-selectors': U+FE00..U+FE0F, used for choosing between + glyph variations (e.g. Emoji vs Text + presentation). `no-font': characters for which no suitable font is found. For character terminals, characters that cannot be encoded by `terminal-coding-system'. @@ -1588,7 +1614,7 @@ Do not set its value directly from Lisp; the value takes effect only via a custom `:set' function (`update-glyphless-char-display'), which updates `glyphless-char-display'." - :version "24.1" + :version "28.1" :type '(alist :key-type (symbol :tag "Character Group") :value-type (symbol :tag "Display Method")) :options '((c0-control @@ -1609,6 +1635,12 @@ function (`update-glyphless-char-display'), which updates (const :tag "Display as empty box" empty-box) (const :tag "Display acronym" acronym) (const :tag "Display hex code in a box" hex-code))) + (variation-selectors + (choice (const :tag "Don't display" zero-width) + (const :tag "Display as thin space" thin-space) + (const :tag "Display as empty box" empty-box) + (const :tag "Display acronym" acronym) + (const :tag "Display hex code in a box" hex-code))) (no-font (choice (const :tag "Don't display" zero-width) (const :tag "Display as thin space" thin-space) |