summaryrefslogtreecommitdiff
path: root/lisp/international/characters.el
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/international/characters.el')
-rw-r--r--lisp/international/characters.el116
1 files changed, 74 insertions, 42 deletions
diff --git a/lisp/international/characters.el b/lisp/international/characters.el
index 97bf31acfc3..5aefda23283 100644
--- a/lisp/international/characters.el
+++ b/lisp/international/characters.el
@@ -116,11 +116,11 @@ Base characters (Unicode General Category L,N,P,S,Zs)")
Combining diacritic or mark (Unicode General Category M)")
;; bidi types
-(define-category ?R "Right-to-left (strong)
+(define-category ?R "Strong R2L
Characters with \"strong\" right-to-left directionality, i.e.
with R, AL, RLE, or RLO Unicode bidi character type.")
-(define-category ?L "Left-to-right (strong)
+(define-category ?L "Strong L2R
Characters with \"strong\" left-to-right directionality, i.e.
with L, LRE, or LRO Unicode bidi character type.")
@@ -214,6 +214,9 @@ with L, LRE, or LRO Unicode bidi character type.")
(modify-category-entry '(#x31F0 . #x31FF) ?K)
(modify-category-entry '(#x30A0 . #x30FA) ?\|)
(modify-category-entry #x30FF ?\|)
+(modify-category-entry '(#x1AFF0 . #x1B000) ?K)
+(modify-category-entry '(#x1B120 . #x1B122) ?K)
+(modify-category-entry '(#x1B164 . #x1B167) ?K)
;; Hiragana block
(modify-category-entry '(#x3040 . #x309F) ?H)
@@ -221,8 +224,12 @@ with L, LRE, or LRO Unicode bidi character type.")
(modify-category-entry #x309F ?\|)
(modify-category-entry #x30A0 ?H)
(modify-category-entry #x30FC ?H)
+(modify-category-entry #x1B001 ?H)
+(modify-category-entry #x1B11F ?H)
+(modify-category-entry '(#x1B150 . #x1B152) ?H)
+(modify-category-entry '(#x1B002 . #x1B11E) ?H) ; Hentiagana
-(modify-category-entry '(#x1B000 . #x1B1FF) ?j)
+(modify-category-entry '(#x1AFF0 . #x1B1FF) ?j)
;; JISX0208
@@ -295,7 +302,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(map-charset-chars #'modify-category-entry (car charsets) ?b)
(setq charsets (cdr charsets))))
(modify-category-entry '(#x600 . #x6ff) ?b)
-(modify-category-entry '(#x8a0 . #x8ff) ?b)
+(modify-category-entry '(#x870 . #x8ff) ?b)
(modify-category-entry '(#xfb50 . #xfdff) ?b)
(modify-category-entry '(#xfe70 . #xfefe) ?b)
@@ -306,7 +313,9 @@ with L, LRE, or LRO Unicode bidi character type.")
;; Ethiopic character set
(modify-category-entry '(#x1200 . #x1399) ?e)
-(modify-category-entry '(#x2d80 . #x2dde) ?e)
+(modify-category-entry '(#X2D80 . #X2DDE) ?e)
+(modify-category-entry '(#xAB01 . #xAB2E) ?e)
+(modify-category-entry '(#x1E7E0 . #x1E7FE) ?e)
(let ((chars '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨)))
(while chars
(modify-syntax-entry (car chars) ".")
@@ -580,6 +589,12 @@ with L, LRE, or LRO Unicode bidi character type.")
(modify-category-entry c ?l)
(setq c (1+ c)))
+ ;; Latin Extended-G
+ (setq c #x1DF00)
+ (while (<= c #x1DFFF)
+ (modify-category-entry c ?l)
+ (setq c (1+ c)))
+
;; Greek
(modify-category-entry '(#x0370 . #x03FF) ?g)
@@ -1016,7 +1031,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x0D41 . #x0D44)
(#x0D4D . #x0D4D)
(#x0D62 . #x0D63)
- (#x0D81 . #x0D81)
+ (#x0D81 . #x0D81)
(#x0DCA . #x0DCA)
(#x0DD2 . #x0DD6)
(#x0E31 . #x0E31)
@@ -1045,7 +1060,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x1085 . #x1086)
(#x108D . #x108D)
(#x109D . #x109D)
- (#x1160 . #x11FF)
+ (#x1160 . #x11FF)
(#x135D . #x135F)
(#x1712 . #x1714)
(#x1732 . #x1734)
@@ -1111,7 +1126,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(#xA806 . #xA806)
(#xA80B . #xA80B)
(#xA825 . #xA826)
- (#xA82C . #xA82C)
+ (#xA82C . #xA82C)
(#xA8C4 . #xA8C5)
(#xA8E0 . #xA8F1)
(#xA926 . #xA92D)
@@ -1136,7 +1151,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(#xABE5 . #xABE5)
(#xABE8 . #xABE8)
(#xABED . #xABED)
- (#xD7B0 . #xD7FB)
+ (#xD7B0 . #xD7FB)
(#xFB1E . #xFB1E)
(#xFE00 . #xFE0F)
(#xFE20 . #xFE2F)
@@ -1148,7 +1163,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x10A01 . #x10A0F)
(#x10A38 . #x10A3F)
(#x10AE5 . #x10AE6)
- (#x10EAB . #x10EAC)
+ (#x10EAB . #x10EAC)
(#x11001 . #x11001)
(#x11038 . #x11046)
(#x1107F . #x11081)
@@ -1162,7 +1177,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x11180 . #x11181)
(#x111B6 . #x111BE)
(#x111CA . #x111CC)
- (#x111CF . #x111CF)
+ (#x111CF . #x111CF)
(#x1122F . #x11231)
(#x11234 . #x11234)
(#x11236 . #x11237)
@@ -1194,9 +1209,9 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x1171D . #x1171F)
(#x11722 . #x11725)
(#x11727 . #x1172B)
- (#x1193B . #x1193C)
- (#x1193E . #x1193E)
- (#x11943 . #x11943)
+ (#x1193B . #x1193C)
+ (#x1193E . #x1193E)
+ (#x11943 . #x11943)
(#x11C30 . #x11C36)
(#x11C38 . #x11C3D)
(#x11C92 . #x11CA7)
@@ -1206,7 +1221,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x16AF0 . #x16AF4)
(#x16B30 . #x16B36)
(#x16F8F . #x16F92)
- (#x16FE4 . #x16FE4)
+ (#x16FE4 . #x16FE4)
(#x1BC9D . #x1BC9E)
(#x1BCA0 . #x1BCA3)
(#x1D167 . #x1D169)
@@ -1280,18 +1295,19 @@ with L, LRE, or LRO Unicode bidi character type.")
(#xFF01 . #xFF60)
(#xFFE0 . #xFFE6)
(#x16FE0 . #x16FE4)
- (#x16FF0 . #x16FF1)
+ (#x16FF0 . #x16FF1)
(#x17000 . #x187F7)
(#x18800 . #x18AFF)
- (#x18B00 . #x18CD5)
+ (#x18B00 . #x18CD5)
+ (#x1AFF0 . #x1AFFF)
(#x1B000 . #x1B152)
- (#x1B164 . #x1B167)
- (#x1B170 . #x1B2FB)
+ (#x1B164 . #x1B167)
+ (#x1B170 . #x1B2FB)
(#x1F004 . #x1F004)
(#x1F0CF . #x1F0CF)
(#x1F18E . #x1F18E)
(#x1F191 . #x1F19A)
- (#x1F1AD . #x1F1AD)
+ (#x1F1AD . #x1F1AD)
(#x1F200 . #x1F320)
(#x1F32D . #x1F335)
(#x1F337 . #x1F37C)
@@ -1316,27 +1332,26 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x1F680 . #x1F6C5)
(#x1F6CC . #x1F6CC)
(#x1F6D0 . #x1F6D2)
- (#x1F6D5 . #x1F6D7)
+ (#x1F6D5 . #x1F6D7)
+ (#x1F6DD . #x1F6DF)
(#x1F6EB . #x1F6EC)
(#x1F6F4 . #x1F6FC)
- (#x1F7E0 . #x1F7EB)
+ (#x1F7E0 . #x1F7F0)
(#x1F90C . #x1F93A)
- (#x1F93C . #x1F945)
- (#x1F947 . #x1F978)
- (#x1F97A . #x1F9CB)
- (#x1F9A5 . #x1F9AA)
- (#x1F9AE . #x1F9CA)
- (#x1F9CD . #x1F9FF)
- (#x1FA00 . #x1FA53)
- (#x1FA60 . #x1FA6D)
- (#x1FA70 . #x1FA74)
- (#x1FA78 . #x1FA7A)
- (#x1FA80 . #x1FA86)
- (#x1FA90 . #x1FAA8)
- (#x1FAB0 . #x1FAB6)
- (#x1FAC0 . #x1FAC2)
- (#x1FAD0 . #x1FAD6)
- (#x1FB00 . #x1FB92)
+ (#x1F93C . #x1F945)
+ (#x1F947 . #x1F9FF)
+ (#x1FA00 . #x1FA53)
+ (#x1FA60 . #x1FA6D)
+ (#x1FA70 . #x1FA74)
+ (#x1FA78 . #x1FA7C)
+ (#x1FA80 . #x1FA86)
+ (#x1FA90 . #x1FAAC)
+ (#x1FAB0 . #x1FABA)
+ (#x1FAC0 . #x1FAC5)
+ (#x1FAD0 . #x1FAD9)
+ (#x1FAE0 . #x1FAE7)
+ (#x1FAF0 . #x1FAF6)
+ (#x1FB00 . #x1FB92)
(#x20000 . #x2FFFF)
(#x30000 . #x3FFFF))))
(dolist (elt l)
@@ -1401,7 +1416,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(defun use-default-char-width-table ()
"Internal use only.
-Setup char-width-table appropriate for non-CJK language environment."
+Setup `char-width-table' appropriate for non-CJK language environment."
(while (char-table-parent char-width-table)
(setq char-width-table (char-table-parent char-width-table))))
@@ -1413,8 +1428,12 @@ Setup char-width-table appropriate for non-CJK language environment."
(if dump-mode
;; While dumping, we can't use require, and international is not
;; in load-path.
- (load "international/charscript")
- (require 'charscript))
+ (progn
+ (load "international/charscript")
+ (load "international/emoji-zwj"))
+ (progn
+ (require 'charscript)
+ (require 'emoji-zwj)))
(map-charset-chars
(lambda (range _ignore)
@@ -1521,6 +1540,9 @@ option `glyphless-char-display'."
((eq target 'c1-control)
(glyphless-set-char-table-range glyphless-char-display
#x80 #x9F method))
+ ((eq target 'variation-selectors)
+ (glyphless-set-char-table-range glyphless-char-display
+ #xFE00 #xFE0F method))
((eq target 'format-control)
(when unicode-category-table
(map-char-table
@@ -1556,6 +1578,7 @@ option `glyphless-char-display'."
;;; Control of displaying glyphless characters.
(defcustom glyphless-char-display-control
'((format-control . thin-space)
+ (variation-selectors . thin-space)
(no-font . hex-code))
"List of directives to control display of glyphless characters.
@@ -1571,6 +1594,9 @@ GROUP must be one of these symbols:
such as U+200C (ZWNJ), U+200E (LRM), but
excluding characters that have graphic images,
such as U+00AD (SHY).
+ `variation-selectors': U+FE00..U+FE0F, used for choosing between
+ glyph variations (e.g. Emoji vs Text
+ presentation).
`no-font': characters for which no suitable font is found.
For character terminals, characters that cannot
be encoded by `terminal-coding-system'.
@@ -1588,7 +1614,7 @@ Do not set its value directly from Lisp; the value takes effect
only via a custom `:set'
function (`update-glyphless-char-display'), which updates
`glyphless-char-display'."
- :version "24.1"
+ :version "28.1"
:type '(alist :key-type (symbol :tag "Character Group")
:value-type (symbol :tag "Display Method"))
:options '((c0-control
@@ -1609,6 +1635,12 @@ function (`update-glyphless-char-display'), which updates
(const :tag "Display as empty box" empty-box)
(const :tag "Display acronym" acronym)
(const :tag "Display hex code in a box" hex-code)))
+ (variation-selectors
+ (choice (const :tag "Don't display" zero-width)
+ (const :tag "Display as thin space" thin-space)
+ (const :tag "Display as empty box" empty-box)
+ (const :tag "Display acronym" acronym)
+ (const :tag "Display hex code in a box" hex-code)))
(no-font
(choice (const :tag "Don't display" zero-width)
(const :tag "Display as thin space" thin-space)