diff options
| author | Eli Zaretskii <eliz@gnu.org> | 2014-10-18 15:47:57 +0300 |
|---|---|---|
| committer | Eli Zaretskii <eliz@gnu.org> | 2014-10-18 15:47:57 +0300 |
| commit | 6b247d287327777dfd29e20eac177c2005e99b45 (patch) | |
| tree | f7581b4e96b9ba0f2f475f15c888b718d0050a6d /src | |
| parent | 605cfb8b7a4ef8f73ddc8f2de5c086f3a7455971 (diff) | |
| parent | b5e71861a3b15de7651be4524f38337aa451bfd7 (diff) | |
| download | emacs-6b247d287327777dfd29e20eac177c2005e99b45.tar.gz | |
Fix bidi reordering of bracket characters in isolates.
src/bidi.c (bidi_cache_find): Rename the argument NEUTRALS_OK to
RESOLVED_ONLY; when non-zero, return from the cache only fully
resolved states. All callers changed.
(CANONICAL_EQU): New macro.
(PUSH_BPA_STACK): Use it to push onto the BPA stack the canonical
equivalent of the paired closing bracket character.
(bidi_find_bracket_pairs): Set the bracket_pairing_pos member to
the default non-negative value, to be checked later in
bidi_resolve_brackets. Use CANONICAL_EQU to test candidate
characters against those pushed onto the BPA stack.
(bidi_record_type_for_neutral): New function.
(bidi_resolve_brackets): Record next_for_neutral and
prev_for_neutral when embedding level gets pushed. Force
resolution of bracket pairs when entering a level run that was not
yet BPA-resolved.
(bidi_resolve_neutral): Add assertions before calling
bidi_resolve_neutral_1.
(bidi_level_of_next_char): Remove the code that attempted to
resolve unresolved neutrals; that is now done by
bidi_resolve_neutral.
Diffstat (limited to 'src')
| -rw-r--r-- | src/ChangeLog | 22 | ||||
| -rw-r--r-- | src/bidi.c | 262 |
2 files changed, 176 insertions, 108 deletions
diff --git a/src/ChangeLog b/src/ChangeLog index 026ae46299d..66306bd5fb6 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,5 +1,27 @@ 2014-10-18 Eli Zaretskii <eliz@gnu.org> + Fix reordering of bracket characters in isolates. + * bidi.c (bidi_cache_find): Rename the argument NEUTRALS_OK to + RESOLVED_ONLY; when non-zero, return from the cache only fully + resolved states. All callers changed. + (CANONICAL_EQU): New macro. + (PUSH_BPA_STACK): Use it to push onto the BPA stack the canonical + equivalent of the paired closing bracket character. + (bidi_find_bracket_pairs): Set the bracket_pairing_pos member to + the default non-negative value, to be checked later in + bidi_resolve_brackets. Use CANONICAL_EQU to test candidate + characters against those pushed onto the BPA stack. + (bidi_record_type_for_neutral): New function. + (bidi_resolve_brackets): Record next_for_neutral and + prev_for_neutral when embedding level gets pushed. Force + resolution of bracket pairs when entering a level run that was not + yet BPA-resolved. + (bidi_resolve_neutral): Add assertions before calling + bidi_resolve_neutral_1. + (bidi_level_of_next_char): Remove the code that attempted to + resolve unresolved neutrals; that is now done by + bidi_resolve_neutral. + * w32select.c (owner_callback): Mark with ALIGN_STACK attribute. 2014-10-17 Eli Zaretskii <eliz@gnu.org> diff --git a/src/bidi.c b/src/bidi.c index bbafc785e7b..59fade3f785 100644 --- a/src/bidi.c +++ b/src/bidi.c @@ -800,26 +800,22 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved, /* Look for a cached iterator state that corresponds to CHARPOS. If found, copy the cached state into BIDI_IT and return the type of - the cached entry. If not found, return UNKNOWN_BT. NEUTRALS_OK - non-zero means it is OK to return cached state for neutral - characters that have no valid next_for_neutral member, and - therefore cannot be resolved. This can happen if the state was - cached before it was resolved in bidi_resolve_neutral. */ + the cached entry. If not found, return UNKNOWN_BT. RESOLVED_ONLY + zero means it is OK to return cached states tyhat were not fully + resolved yet. This can happen if the state was cached before it + was resolved in bidi_resolve_neutral. */ static bidi_type_t -bidi_cache_find (ptrdiff_t charpos, bool neutrals_ok, struct bidi_it *bidi_it) +bidi_cache_find (ptrdiff_t charpos, bool resolved_only, struct bidi_it *bidi_it) { ptrdiff_t i = bidi_cache_search (charpos, -1, bidi_it->scan_dir); if (i >= bidi_cache_start - && (neutrals_ok - /* Callers that don't want to resolve neutrals (and set - neutrals_ok = false) need to be sure that there's enough - info in the cached state to resolve the neutrals and - isolates, and if not, they don't want the cached state. */ - || !(bidi_cache[i].resolved_level == -1 - && (bidi_get_category (bidi_cache[i].type) == NEUTRAL - || bidi_isolate_fmt_char (bidi_cache[i].type)) - && bidi_cache[i].next_for_neutral.type == UNKNOWN_BT))) + && (!resolved_only + /* Callers that want only fully resolved states (and set + resolved_only = true) need to be sure that there's enough + info in the cached state to return the state as final, + and if not, they don't want the cached state. */ + || bidi_cache[i].resolved_level >= 0)) { bidi_dir_t current_scan_dir = bidi_it->scan_dir; @@ -2342,6 +2338,41 @@ typedef struct bpa_stack_entry { BPA stack, which should be more than enough for actual bidi text. */ #define MAX_BPA_STACK (max (MAX_ALLOCA / sizeof (bpa_stack_entry), 1)) +/* UAX#9 says to match opening brackets with the matching closing + brackets or their canonical equivalents. As of Unicode 7.0, there + are only 2 bracket characters that have canonical equivalence + decompositions: u+2329 and u+232A. So instead of accessing the + table in uni-decomposition.el, we just handle these 2 characters + with this simple macro. Note that ASCII characters don't have + canonical equivalents by definition. */ + +/* To find all the characters that need to be processed by + CANONICAL_EQU, first find all the characters which have + decompositions in UnicodeData.txt, with this Awk script: + + awk -F ";" " {if ($6 != \"\") print $1, $6}" UnicodeData.txt + + Then produce a list of all the bracket characters in BidiBrackets.txt: + + awk -F "[ ;]" " {if ($1 != \"#\" && $1 != \"\") print $1}" BidiBrackets.txt + + And finally, cross-reference these two: + + fgrep -w -f brackets.txt decompositions.txt + + where "decompositions.txt" was produced by the 1st script, and + "brackets.txt" by the 2nd script. In the output of fgrep, look + only for decompositions that don't begin with some compatibility + formatting tag, such as "<compat>". Only decompositions that + consist solely of character codepoints are relevant to bidi + brackets processing. */ + +#define CANONICAL_EQU(c) \ + ( ASCII_CHAR_P (c) ? c \ + : (c) == 0x2329 ? 0x3008 \ + : (c) == 0x232a ? 0x3009 \ + : c ) + #ifdef ENABLE_CHECKING # define STORE_BRACKET_CHARPOS \ bpa_stack[bpa_sp].open_bracket_pos = bidi_it->charpos @@ -2351,16 +2382,18 @@ typedef struct bpa_stack_entry { #define PUSH_BPA_STACK \ do { \ - bpa_sp++; \ - if (bpa_sp >= MAX_BPA_STACK) \ - { \ - bpa_sp = MAX_BPA_STACK - 1; \ - goto bpa_give_up; \ - } \ - bpa_stack[bpa_sp].close_bracket_char = bidi_mirror_char (bidi_it->ch); \ - bpa_stack[bpa_sp].open_bracket_idx = bidi_cache_last_idx; \ - bpa_stack[bpa_sp].flags = 0; \ - STORE_BRACKET_CHARPOS; \ + int ch; \ + bpa_sp++; \ + if (bpa_sp >= MAX_BPA_STACK) \ + { \ + bpa_sp = MAX_BPA_STACK - 1; \ + goto bpa_give_up; \ + } \ + ch = CANONICAL_EQU (bidi_it->ch); \ + bpa_stack[bpa_sp].close_bracket_char = bidi_mirror_char (ch); \ + bpa_stack[bpa_sp].open_bracket_idx = bidi_cache_last_idx; \ + bpa_stack[bpa_sp].flags = 0; \ + STORE_BRACKET_CHARPOS; \ } while (0) @@ -2405,13 +2438,22 @@ bidi_find_bracket_pairs (struct bidi_it *bidi_it) int old_sidx, new_sidx; int current_level = bidi_it->level_stack[bidi_it->stack_idx].level; + /* Mark every opening bracket character we've traversed by + putting its own position into bracket_pairing_pos. This + is examined in bidi_resolve_brackets to distinguish + brackets that were already resolved to stay NEUTRAL_ON, + and those that were not yet processed by this function + (because they were skipped when we skip higher embedding + levels below). */ + if (btype == BIDI_BRACKET_OPEN && bidi_it->bracket_pairing_pos == -1) + bidi_it->bracket_pairing_pos = bidi_it->charpos; bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0); if (btype == BIDI_BRACKET_OPEN) PUSH_BPA_STACK; else if (btype == BIDI_BRACKET_CLOSE) { int sp = bpa_sp; - int curchar = bidi_it->ch; + int curchar = CANONICAL_EQU (bidi_it->ch); eassert (sp >= 0); while (sp >= 0 && bpa_stack[sp].close_bracket_char != curchar) @@ -2513,13 +2555,35 @@ bidi_find_bracket_pairs (struct bidi_it *bidi_it) /* Restore bidi_it from the cache, which should have the bracket resolution members set as determined by the above loop. */ - type = bidi_cache_find (saved_it.charpos, 1, bidi_it); + type = bidi_cache_find (saved_it.charpos, 0, bidi_it); eassert (type == NEUTRAL_ON); } return retval; } +static void +bidi_record_type_for_neutral (struct bidi_saved_info *info, int level, + bool nextp) +{ + int idx; + + for (idx = bidi_cache_last_idx + 1; idx < bidi_cache_idx; idx++) + { + int lev = bidi_cache[idx].level_stack[bidi_cache[idx].stack_idx].level; + + if (lev <= level) + { + eassert (lev == level); + if (nextp) + bidi_cache[idx].next_for_neutral = *info; + else + bidi_cache[idx].prev_for_neutral = *info; + break; + } + } +} + static bidi_type_t bidi_resolve_brackets (struct bidi_it *bidi_it) { @@ -2527,12 +2591,24 @@ bidi_resolve_brackets (struct bidi_it *bidi_it) bool resolve_bracket = false; bidi_type_t type = UNKNOWN_BT; int ch; - struct bidi_saved_info tem_info; - - bidi_remember_char (&tem_info, bidi_it, 1); + struct bidi_saved_info prev_for_neutral, next_for_neutral; + + /* Record the prev_for_neutral type either from the previous + character, if it was a strong or AN/EN, or from the + prev_for_neutral information recorded previously. */ + if (bidi_it->type == STRONG_L || bidi_it->type == STRONG_R + || bidi_it->type == WEAK_AN || bidi_it->type == WEAK_EN) + bidi_remember_char (&prev_for_neutral, bidi_it, 1); + else + prev_for_neutral = bidi_it->prev_for_neutral; + /* Record the next_for_neutral type information. */ + if (bidi_it->next_for_neutral.charpos > bidi_it->charpos) + next_for_neutral = bidi_it->next_for_neutral; + else + next_for_neutral.charpos = -1; if (!bidi_it->first_elt) { - type = bidi_cache_find (bidi_it->charpos + bidi_it->nchars, 1, bidi_it); + type = bidi_cache_find (bidi_it->charpos + bidi_it->nchars, 0, bidi_it); ch = bidi_it->ch; } if (type == UNKNOWN_BT) @@ -2543,36 +2619,45 @@ bidi_resolve_brackets (struct bidi_it *bidi_it) } else { + eassert (bidi_it->resolved_level == -1); + /* If the cached state shows an increase of embedding level due + to an isolate initiator, we need to update the 1st cached + state of the next run of the current isolating sequence with + the prev_for_neutral and next_for_neutral information, so + that it will be picked up when we advance to that next run. */ + if (bidi_it->level_stack[bidi_it->stack_idx].level > prev_level + && bidi_it->level_stack[bidi_it->stack_idx].isolate_status) + { + bidi_record_type_for_neutral (&prev_for_neutral, prev_level, 0); + bidi_record_type_for_neutral (&next_for_neutral, prev_level, 1); + } if (type == NEUTRAL_ON && bidi_paired_bracket_type (ch) == BIDI_BRACKET_OPEN) { - if (bidi_it->level_stack[bidi_it->stack_idx].level == prev_level) + if (bidi_it->bracket_pairing_pos > bidi_it->charpos) { - if (bidi_it->bracket_pairing_pos > 0) - { - /* A cached opening bracket that wasn't completely - resolved yet. */ - resolve_bracket = true; - } + /* A cached opening bracket that wasn't completely + resolved yet. */ + resolve_bracket = true; } - else + else if (bidi_it->bracket_pairing_pos == -1) { /* Higher levels were not BPA-resolved yet, even if - cached by bidi_find_bracket_pairs. Lower levels were - probably processed by bidi_find_bracket_pairs, but we - have no easy way of retaining the prev_for_neutral - from the previous level run of the isolating - sequence. Force application of BPA now. */ + cached by bidi_find_bracket_pairs. Force application + of BPA to the new level now. */ if (bidi_find_bracket_pairs (bidi_it)) resolve_bracket = true; } } - /* Keep track of the prev_for_neutral type, needed for resolving - brackets below and for resolving neutrals in bidi_resolve_neutral. */ - if (bidi_it->level_stack[bidi_it->stack_idx].level == prev_level - && (tem_info.type == STRONG_L || tem_info.type == STRONG_R - || tem_info.type == WEAK_AN || tem_info.type == WEAK_EN)) - bidi_it->prev_for_neutral = tem_info; + /* Keep track of the prev_for_neutral and next_for_neutral + types, needed for resolving brackets below and for resolving + neutrals in bidi_resolve_neutral. */ + if (bidi_it->level_stack[bidi_it->stack_idx].level == prev_level) + { + bidi_it->prev_for_neutral = prev_for_neutral; + if (next_for_neutral.charpos > 0) + bidi_it->next_for_neutral = next_for_neutral; + } } /* If needed, resolve the bracket type according to N0. */ @@ -2657,9 +2742,18 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) || (type == WEAK_BN && bidi_explicit_dir_char (bidi_it->ch))) { if (bidi_it->next_for_neutral.type != UNKNOWN_BT) - type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, - bidi_it->next_for_neutral.type, - current_level); + { + /* Make sure the data for resolving neutrals we are + about to use is valid. */ + eassert (bidi_it->next_for_neutral.charpos > bidi_it->charpos + /* PDI defines an eos, so it's OK for it to + serve as its own next_for_neutral. */ + || (bidi_it->next_for_neutral.charpos == bidi_it->charpos + && bidi_it->type == PDI)); + type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, + bidi_it->next_for_neutral.type, + current_level); + } /* The next two "else if" clauses are shortcuts for the important special case when we have a long sequence of neutral or WEAK_BN characters, such as whitespace or nulls or @@ -2855,16 +2949,13 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) } } - /* Perhaps the character we want is already cached. If it is, the - call to bidi_cache_find below will return a type other than - UNKNOWN_BT. */ + /* Perhaps the character we want is already cached s fully resolved. + If it is, the call to bidi_cache_find below will return a type + other than UNKNOWN_BT. */ if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt) { int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring)) ? 0 : 1); - bidi_type_t prev_type = bidi_it->type; - bidi_type_t type_for_neutral = bidi_it->next_for_neutral.type; - ptrdiff_t pos_for_neutral = bidi_it->next_for_neutral.charpos; if (bidi_it->scan_dir > 0) { @@ -2879,57 +2970,12 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) cached at the beginning of the iteration. */ next_char_pos = bidi_it->charpos - 1; if (next_char_pos >= bob - 1) - type = bidi_cache_find (next_char_pos, 0, bidi_it); - - /* For a sequence of BN and NI, copy the type from the previous - character. This is because the loop in bidi_resolve_neutral - that handles such sequences caches the characters it - traverses, but does not (and cannot) store the - next_for_neutral member for them, because it is only known - when the loop ends. So when we find them in the cache, their - type needs to be updated, but we don't have next_for_neutral - to do that. However, whatever type is resolved as result of - that loop, it will be the same for all the traversed - characters, by virtue of N1 and N2. */ - if (type == WEAK_BN && bidi_it->scan_dir > 0 - && bidi_explicit_dir_char (bidi_it->ch) - && type_for_neutral != UNKNOWN_BT - && bidi_it->charpos < pos_for_neutral) - { - type = prev_type; - eassert (type != UNKNOWN_BT); - } + type = bidi_cache_find (next_char_pos, 1, bidi_it); if (type != UNKNOWN_BT) { - /* If resolved_level is -1, it means this state was cached - before it was completely resolved, so we cannot return - it. */ - if (bidi_it->resolved_level != -1) - { - eassert (bidi_it->resolved_level >= 0); - return bidi_it->resolved_level; - } - else - { - level = bidi_it->level_stack[bidi_it->stack_idx].level; - if (bidi_get_category (type) == NEUTRAL - || bidi_isolate_fmt_char (type)) - { - /* Make sure the data for resolving neutrals we are - about to use is valid. */ - if (bidi_it->next_for_neutral.charpos < bidi_it->charpos - /* PDI defines an eos, so it's OK for it to - serve as its own next_for_neutral. */ - || (bidi_it->next_for_neutral.charpos == bidi_it->charpos - && bidi_it->type != PDI) - || bidi_it->next_for_neutral.type == UNKNOWN_BT) - emacs_abort (); - - type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, - bidi_it->next_for_neutral.type, - level); - } - } + /* We asked the cache for fully resolved states. */ + eassert (bidi_it->resolved_level >= 0); + return bidi_it->resolved_level; } } |
