summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEli Zaretskii <eliz@gnu.org>2011-08-23 17:45:14 +0300
committerEli Zaretskii <eliz@gnu.org>2011-08-23 17:45:14 +0300
commitbca633fb296b17c0e86d589c50fb3414b361e0b3 (patch)
tree1b1e93f6017f7614f6aa950fa78ced1249a99b99
parent4a5885a74a3310ed4f4ba86eee3c406019b2c334 (diff)
downloademacs-bca633fb296b17c0e86d589c50fb3414b361e0b3.tar.gz
Followup for character properties in 2011-08-23T11:48:07Z!handa@m17n.org.
src/bidi.c (bidi_get_type): Abort if we get zero as the bidi type of a character. admin/unidata/unidata-gen.el (unidata-prop-alist): Update the default values of bidi-class according to DerivedBidiClass.txt from the latest UCD. lisp/international/uni-bidi.el: Regenerated. doc/lispref/nonascii.texi (Character Properties): Document the values for unassigned codepoints.
-rw-r--r--admin/ChangeLog6
-rw-r--r--admin/unidata/unidata-gen.el6
-rw-r--r--doc/lispref/ChangeLog5
-rw-r--r--doc/lispref/nonascii.texi53
-rw-r--r--lisp/ChangeLog4
-rw-r--r--lisp/international/uni-bidi.el2
-rw-r--r--src/ChangeLog5
-rw-r--r--src/bidi.c8
8 files changed, 70 insertions, 19 deletions
diff --git a/admin/ChangeLog b/admin/ChangeLog
index f8eb071d96d..17cbcbb3bdf 100644
--- a/admin/ChangeLog
+++ b/admin/ChangeLog
@@ -1,3 +1,9 @@
+2011-08-23 Eli Zaretskii <eliz@gnu.org>
+
+ * unidata/unidata-gen.el (unidata-prop-alist): Update the default
+ values of bidi-class according to DerivedBidiClass.txt from the
+ latest UCD.
+
2011-08-23 Kenichi Handa <handa@m17n.org>
* unidata/unidata-gen.el (unidata-prop-alist): Provide default
diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el
index 03399eae213..1002bb003af 100644
--- a/admin/unidata/unidata-gen.el
+++ b/admin/unidata/unidata-gen.el
@@ -194,7 +194,11 @@ Property value is one of the following symbols:
L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET,
AN, CS, NSM, BN, B, S, WS, ON"
unidata-describe-bidi-class
- (L (#x0600 #x06FF AL) (#x0590 #x05FF R) (#x07C0 #x08FF R)
+ ;; The assignment of default values to blocks of code points
+ ;; follows the file DerivedBidiClass.txt from the Unicode
+ ;; Character Database (UCD).
+ (L (#x0600 #x06FF AL) (#xFB50 #xFDFF AL) (#xFE70 #xFEFF AL)
+ (#x0590 #x05FF R) (#x07C0 #x08FF R)
(#xFB1D #xFB4F R) (#x10800 #x10FFF R) (#x1E800 #x1EFFF R))
;; The order of elements must be in sync with bidi_type_t in
;; src/dispextern.h.
diff --git a/doc/lispref/ChangeLog b/doc/lispref/ChangeLog
index 4cb4d0a6f50..43add469ec0 100644
--- a/doc/lispref/ChangeLog
+++ b/doc/lispref/ChangeLog
@@ -1,3 +1,8 @@
+2011-08-23 Eli Zaretskii <eliz@gnu.org>
+
+ * nonascii.texi (Character Properties): Document the values for
+ unassigned codepoints.
+
2011-08-18 Eli Zaretskii <eliz@gnu.org>
* nonascii.texi (Character Properties): Document use of
diff --git a/doc/lispref/nonascii.texi b/doc/lispref/nonascii.texi
index 7b6d665b2ac..298c7c3d1a8 100644
--- a/doc/lispref/nonascii.texi
+++ b/doc/lispref/nonascii.texi
@@ -369,6 +369,12 @@ replacing each @samp{_} character with a dash @samp{-}. For example,
@code{canonical-combining-class}. However, sometimes we shorten the
names to make their use easier.
+@cindex unassigned character codepoints
+ Some codepoints are left @dfn{unassigned} by the
+@acronym{UCD}---they don't correspond to any character. The Unicode
+Standard defines default values of properties for such codepoints;
+they are mentioned below for each property.
+
Here is the full list of value types for all the character
properties that Emacs knows about:
@@ -376,24 +382,31 @@ properties that Emacs knows about:
@item name
Corresponds to the @code{Name} Unicode property. The value is a
string consisting of upper-case Latin letters A to Z, digits, spaces,
-and hyphen @samp{-} characters.
+and hyphen @samp{-} characters. For unassigned codepoints, the value
+is an empty string.
@cindex unicode general category
@item general-category
Corresponds to the @code{General_Category} Unicode property. The
value is a symbol whose name is a 2-letter abbreviation of the
-character's classification.
+character's classification. For unassigned codepoints, the value
+is @code{Cn}.
@item canonical-combining-class
Corresponds to the @code{Canonical_Combining_Class} Unicode property.
-The value is an integer number.
+The value is an integer number. For unassigned codepoints, the value
+is zero.
@cindex bidirectional class of characters
@item bidi-class
Corresponds to the Unicode @code{Bidi_Class} property. The value is a
symbol whose name is the Unicode @dfn{directional type} of the
character. Emacs uses this property when it reorders bidirectional
-text for display (@pxref{Bidirectional Display}).
+text for display (@pxref{Bidirectional Display}). For unassigned
+codepoints, the value depends on the code blocks to which the
+codepoint belongs: most unassigned codepoints get the value of
+@code{L} (strong L), but some get values of @code{AL} (Arabic letter)
+or @code{R} (strong R).
@item decomposition
Corresponds to the Unicode @code{Decomposition_Type} and
@@ -405,19 +418,22 @@ Note that the Unicode spec writes these tag names inside
brackets; e.g., Unicode specifies @samp{<small>} where Emacs uses
@samp{small}.
}; the other elements are characters that give the compatibility
-decomposition sequence of this character.
+decomposition sequence of this character. For unassigned codepoints,
+the value is the character itself.
@item decimal-digit-value
Corresponds to the Unicode @code{Numeric_Value} property for
characters whose @code{Numeric_Type} is @samp{Digit}. The value is an
-integer number.
+integer number. For unassigned codepoints, the value is @code{nil},
+which means @acronym{NaN}, or ``not-a-number''.
@item digit-value
Corresponds to the Unicode @code{Numeric_Value} property for
characters whose @code{Numeric_Type} is @samp{Decimal}. The value is
an integer number. Examples of such characters include compatibility
subscript and superscript digits, for which the value is the
-corresponding number.
+corresponding number. For unassigned codepoints, the value is
+@code{nil}, which means @acronym{NaN}.
@item numeric-value
Corresponds to the Unicode @code{Numeric_Value} property for
@@ -426,12 +442,15 @@ this property is an integer or a floating-point number. Examples of
characters that have this property include fractions, subscripts,
superscripts, Roman numerals, currency numerators, and encircled
numbers. For example, the value of this property for the character
-@code{U+2155} (@sc{vulgar fraction one fifth}) is @code{0.2}.
+@code{U+2155} (@sc{vulgar fraction one fifth}) is @code{0.2}. For
+unassigned codepoints, the value is @code{nil}, which means
+@acronym{NaN}.
@cindex mirroring of characters
@item mirrored
Corresponds to the Unicode @code{Bidi_Mirrored} property. The value
-of this property is a symbol, either @code{Y} or @code{N}.
+of this property is a symbol, either @code{Y} or @code{N}. For
+unassigned codepoints, the value is @code{N}.
@item mirroring
Corresponds to the Unicode @code{Bidi_Mirroring_Glyph} property. The
@@ -443,29 +462,33 @@ property; however, some characters whose @code{mirrored} property is
@code{Y} also have @code{nil} for @code{mirroring}, because no
appropriate characters exist with mirrored glyphs. Emacs uses this
property to display mirror images of characters when appropriate
-(@pxref{Bidirectional Display}).
+(@pxref{Bidirectional Display}). For unassigned codepoints, the value
+is @code{nil}.
@item old-name
Corresponds to the Unicode @code{Unicode_1_Name} property. The value
-is a string.
+is a string. For unassigned codepoints, the value is an empty string.
@item iso-10646-comment
Corresponds to the Unicode @code{ISO_Comment} property. The value is
-a string.
+a string. For unassigned codepoints, the value is an empty string.
@item uppercase
Corresponds to the Unicode @code{Simple_Uppercase_Mapping} property.
-The value of this property is a single character.
+The value of this property is a single character. For unassigned
+codepoints, the value is @code{nil}, which means the character itself.
@item lowercase
Corresponds to the Unicode @code{Simple_Lowercase_Mapping} property.
-The value of this property is a single character.
+The value of this property is a single character. For unassigned
+codepoints, the value is @code{nil}, which means the character itself.
@item titlecase
Corresponds to the Unicode @code{Simple_Titlecase_Mapping} property.
@dfn{Title case} is a special form of a character used when the first
character of a word needs to be capitalized. The value of this
-property is a single character.
+property is a single character. For unassigned codepoints, the value
+is @code{nil}, which means the character itself.
@end table
@defun get-char-code-property char propname
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index 4df4d1445b1..f8f1f98bf93 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,3 +1,7 @@
+2011-08-23 Eli Zaretskii <eliz@gnu.org>
+
+ * international/uni-bidi.el: Regenerated.
+
2011-08-23 Kenichi Handa <handa@m17n.org>
* international/charprop.el:
diff --git a/lisp/international/uni-bidi.el b/lisp/international/uni-bidi.el
index 9fa7b53b508..4d86fc821fa 100644
--- a/lisp/international/uni-bidi.el
+++ b/lisp/international/uni-bidi.el
@@ -6,7 +6,7 @@
#^^[3 0 5 5 5 5 5 5 5 5 5 17 6 17 18 6 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 17 18 19 19 14 14 14 19 19 19 19 19 13 15 13 15 15 3 3 3 3 3 3 3 3 3 3 15 19 19 19 19 19 19 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 19 19 19 19 19 19 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 19 19 19 19 5] #^^[1 0 #^^[2 0
#^^[3 0 5 5 5 5 5 5 5 5 5 17 6 17 18 6 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 17 18 19 19 14 14 14 19 19 19 19 19 13 15 13 15 15 3 3 3 3 3 3 3 3 3 3 15 19 19 19 19 19 19 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 19 19 19 19 19 19 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 19 19 19 19 5] "…š„„ƒ…—Ÿˆ" 1 1 1 "¹‡ŽŽ…‰‘" "ð„ˆ" "„î‰" 1 "ƒ‡ö" 1 "Š…­¸" "„‹°•Šƒ" "Ö‡†„Š†" "Žž›³" "¦‹Ž«‰„†" "–„‰ƒ…«ƒ¤" 2 "ƒ·„ˆ„ƒ‡Šœ" "º„„ˆ”Ž‡„" "¹„„ƒƒžƒŠ" "¹„…„”Ž" "º„ˆˆ‹œ" "½Œ¥†…" "¾ƒ…ƒ„‡‹”‡" "¼”œ" "Á„ˆ”œ" "ʇƒ©" "±‡„‡ˆ±" "±†‹†²" "˜›„³Ž" "……‹¤‰¹"] #^^[2 4096 "­„†™„ƒ„‹" "†â" 1 1 1 1 "݃ " "Šæ" "ÿ" 1 1 1 1 "šã" "’ƒƒžŒ" "·‡ˆ‹‡’Š†" "‹ƒñ" "©Ö" " ƒ„‰†ƒ„ƒº" "Þ¢" "—½‡ˆ†Š" 1 "„°……¨‰Œ" " „¼ƒƒŽ" "¬ˆÈ" "Ѓ‡„’" 1 "À§•„" 1 1 1 "½ƒ‹ƒƒƒ"] #^^[2 8192 "‹ƒ˜
…š……†ƒ† ƒ" "Š ƒ‘š–¡" "„Šƒ…†„‹„……„ " "‰†ð" "’ ì" 19 "¶Å…" "•ÞŒ" "§™‹• " "ˆ”Ζ" 19 19 19 "¬Ó" "ÿ" "˲" 1 1 19 19 19 19 "̓Š¦" 1 1 "冄ƒ‡‡" "ÿ" "à " "²Î" "šÙŒ" 19 "ÖšŒ„"] #^^[2 12288 "„ƒ™‰†……ƒÀ" "™ƒÚ„" 1 "À¤œ" "±œƒ" "±Œ„°" "÷„…" "ÞŸ" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] #^^[2 16384 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 "ÀÀ" 1 1 1 1] 1 1 1 1 #^^[2 36864 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
-#^^[3 40832 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]] #^^[2 40960 1 1 1 1 1 1 1 1 1 "·¹" 1 1 "ƒß„ˆ" "ðŽ" "¢Þ" "ˆ÷" "ƒ„™„Œº„ˆ" "Ä›’Ž" "¦ˆ™‹®" "ƒ°„Ã" "©†Œˆ³" "°ƒ…¾" 1 "å„’" 1 1 1 1 1 1 1 1] 1 1 #^^[2 53248 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 1 #^^[2 61440 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 "Š ¦°" "‘­" 7 7 "¾°" "¶¨" "Š†‡‰ ‰ ƒ„…Š" "ý" "ƒ…  Š†š†š‹š" "àƒ‡Š…"]] #^^[1 65536 #^^[2 65536 1 1 "¾À" "‹…Œá" 1 1 1 1 1 1 1 1 1 1 1 1 2 2 "Ÿà" 2 "ƒ…„¨ƒ„À" 2 "¹‡À" 2 2 2 2 2 "àŸ" 2 2 2] #^^[2 69632 "¶‹”š" "±„Å" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] #^^[2 73728 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] #^^[2 77824 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 1 1 #^^[2 90112 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 1 1 1 1 #^^[2 110592 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 1 #^^[2 118784 1 1 "烉ˆ…" "ƒ‡ž„Ò" "ƒº" 1 "ש" 1 1 1 1 1 1 "Û¤" "•¹°" "‰¹Š²" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] #^^[2 122880 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] #^^[2 126976 "¬„Ð" "”ŒŽ " "‹õ" 1 1 1 "¡†Æƒ" "”Œ¥…•‘" "¿¾" "Œë„ƒ" "¤™’˜˜" "û…" "ƒƒ†„„Œ„‹°" "ƺ" "ôŒ" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]] #^^[1 131072 1 1 1 1 1 1 1 1 1 1 #^^[2 172032 1 1 1 1 1 1 1 1 1 1 1 1 1
+#^^[3 40832 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]] #^^[2 40960 1 1 1 1 1 1 1 1 1 "·¹" 1 1 "ƒß„ˆ" "ðŽ" "¢Þ" "ˆ÷" "ƒ„™„Œº„ˆ" "Ä›’Ž" "¦ˆ™‹®" "ƒ°„Ã" "©†Œˆ³" "°ƒ…¾" 1 "å„’" 1 1 1 1 1 1 1 1] 1 1 #^^[2 53248 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 1 #^^[2 61440 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 "Š ¦°" 7 7 7 "¾À" "ý" "Š†‡‰ ‰ ƒ„" "ÿ" "ƒ…  Š†š†š‹š" "àƒ‡Š…"]] #^^[1 65536 #^^[2 65536 1 1 "¾À" "‹…Œá" 1 1 1 1 1 1 1 1 1 1 1 1 2 2 "Ÿà" 2 "ƒ…„¨ƒ„À" 2 "¹‡À" 2 2 2 2 2 "àŸ" 2 2 2] #^^[2 69632 "¶‹”š" "±„Å" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] #^^[2 73728 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] #^^[2 77824 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 1 1 #^^[2 90112 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 1 1 1 1 #^^[2 110592 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 1 #^^[2 118784 1 1 "烉ˆ…" "ƒ‡ž„Ò" "ƒº" 1 "ש" 1 1 1 1 1 1 "Û¤" "•¹°" "‰¹Š²" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] #^^[2 122880 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] #^^[2 126976 "¬„Ð" "”ŒŽ " "‹õ" 1 1 1 "¡†Æƒ" "”Œ¥…•‘" "¿¾" "Œë„ƒ" "¤™’˜˜" "û…" "ƒƒ†„„Œ„‹°" "ƺ" "ôŒ" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]] #^^[1 131072 1 1 1 1 1 1 1 1 1 1 #^^[2 172032 1 1 1 1 1 1 1 1 1 1 1 1 1
#^^[3 173696 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] #^^[2 176128 1 1 1 1 1 1 1 1 1 1 1 1 1 1
#^^[3 177920 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 1
#^^[3 178176 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 1 1 1 #^^[2 192512 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]] 1 1 1 1 1 1 1 1 1 1 1 #^^[1 917504 #^^[2 917504 "žà" 1 16 "ð" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] #^^[1 983040 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 #^^[2 1044480 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
diff --git a/src/ChangeLog b/src/ChangeLog
index e7d426f23fb..c629598fcf7 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,8 @@
+2011-08-23 Eli Zaretskii <eliz@gnu.org>
+
+ * bidi.c (bidi_get_type): Abort if we get zero as the bidi type of
+ a character.
+
2011-08-23 Chong Yidong <cyd@stupidchicken.com>
* nsfont.m (ns_otf_to_script): Fix typo.
diff --git a/src/bidi.c b/src/bidi.c
index 7517eca5aed..00aa31bf48d 100644
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -108,8 +108,12 @@ bidi_get_type (int ch, bidi_dir_t override)
abort ();
default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
- if (default_type == 0)
- default_type = STRONG_L;
+ /* Every valid character code, even those that are unassigned by the
+ UCD, have some bidi-class property, according to
+ DerivedBidiClass.txt file. Therefore, if we ever get UNKNOWN_BT
+ (= zero) code from CHAR_TABLE_REF, that's a bug. */
+ if (default_type == UNKNOWN_BT)
+ abort ();
if (override == NEUTRAL_DIR)
return default_type;