diff options
author | Matthias Clasen <mclasen@redhat.com> | 2021-11-21 16:17:37 -0500 |
---|---|---|
committer | Matthias Clasen <mclasen@redhat.com> | 2021-11-22 23:32:35 -0500 |
commit | 6655ceabff26f7830335833ace8e59e78251c296 (patch) | |
tree | 0355dcc3c767e4ad1de1a34de19c266cf844f7d8 | |
parent | 9d114095bd4c4fb5ab64fff0ed5c1f6680ed2609 (diff) | |
download | pango-6655ceabff26f7830335833ace8e59e78251c296.tar.gz |
break: Fix hyphen condition
When looking at scripts, we want to look
at the script of the *previous* character.
And then we need to exclude SHY from the
common script.
-rw-r--r-- | pango/break.c | 9 | ||||
-rw-r--r-- | tests/breaks/eight.expected | 2 | ||||
-rw-r--r-- | tests/breaks/eleven.expected | 14 | ||||
-rw-r--r-- | tests/breaks/fifteen.expected | 4 | ||||
-rw-r--r-- | tests/breaks/one.expected | 4 | ||||
-rw-r--r-- | tests/breaks/seventeen.expected | 2 | ||||
-rw-r--r-- | tests/breaks/sixteen.expected | 4 | ||||
-rw-r--r-- | tests/breaks/thirteen.expected | 4 | ||||
-rw-r--r-- | tests/breaks/two.break | 2 | ||||
-rw-r--r-- | tests/breaks/two.expected | 14 |
10 files changed, 32 insertions, 27 deletions
diff --git a/pango/break.c b/pango/break.c index 3af083ce..5622ca21 100644 --- a/pango/break.c +++ b/pango/break.c @@ -170,6 +170,8 @@ default_break (const char *text, GUnicodeBreakType prev_break_type; GUnicodeBreakType prev_prev_break_type; + PangoScript prev_script; + /* See Grapheme_Cluster_Break Property Values table of UAX#29 */ typedef enum { @@ -262,6 +264,7 @@ default_break (const char *text, prev_break_type = G_UNICODE_BREAK_UNKNOWN; prev_prev_break_type = G_UNICODE_BREAK_UNKNOWN; prev_wc = 0; + prev_script = PANGO_SCRIPT_COMMON; prev_jamo = NO_JAMO; prev_space_or_hyphen = FALSE; @@ -539,7 +542,6 @@ default_break (const char *text, } script = (PangoScript)g_unichar_get_script (wc); - /* ---- UAX#29 Word Boundaries ---- */ { is_word_boundary = FALSE; @@ -1571,9 +1573,11 @@ default_break (const char *text, attrs[i].break_inserts_hyphen = FALSE; attrs[i].break_removes_preceding = FALSE; - switch ((int)script) + switch ((int)prev_script) { case PANGO_SCRIPT_COMMON: + insert_hyphens = prev_wc == 0x00ad; + break; case PANGO_SCRIPT_HAN: case PANGO_SCRIPT_HANGUL: case PANGO_SCRIPT_HIRAGANA: @@ -1634,6 +1638,7 @@ default_break (const char *text, } prev_wc = wc; + prev_script = script; /* wc might not be a valid Unicode base character, but really all we * need to know is the last non-combining character */ diff --git a/tests/breaks/eight.expected b/tests/breaks/eight.expected index 39794d22..d71bb02f 100644 --- a/tests/breaks/eight.expected +++ b/tests/breaks/eight.expected @@ -4,4 +4,4 @@ Whitespace: x x x x Sentences: bs e b Words: bs be bs e s be bs be b bs be bs be bs be bs be bs be b Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b -Hyphens: i i i i i i i i i i i i i i i i +Hyphens: i i i i i i i i i i i i i i i i diff --git a/tests/breaks/eleven.expected b/tests/breaks/eleven.expected index 8df89869..b4a33897 100644 --- a/tests/breaks/eleven.expected +++ b/tests/breaks/eleven.expected @@ -1,7 +1,7 @@ -Text: ❤ ️ ︎ ︎ 👨 [0x200d]🦰 👨🏿 [0x200d]🦱 0 ️ ⃣ 🏴[0xe0075][0xe0073][0xe0063][0xe0061][0xe007f] 🇩🇪 ️ [0x0a] -Breaks: c lc lc lc lc lc c lc -Whitespace: w w -Sentences: bs e b -Words: b b b bs be b b b -Graphemes: b b b b b b b b -Hyphens: i i i i i i i i +Text: ❤️ ︎ ︎ 👨[0x200d] 🦰 👨🏿[0x200d] 🦱 0️ ⃣ 🏴[0xe0075][0xe0073][0xe0063][0xe0061][0xe007f] 🇩🇪️ [0x0a] +Breaks: c lc lc lc lc lc c Lc +Whitespace: w w +Sentences: bs e b +Words: b b b bs be b b b +Graphemes: b b b b b b b b +Hyphens: i i i i i diff --git a/tests/breaks/fifteen.expected b/tests/breaks/fifteen.expected index 93b37c39..3521a70b 100644 --- a/tests/breaks/fifteen.expected +++ b/tests/breaks/fifteen.expected @@ -1,7 +1,7 @@ Text: o n e [ ] t w o - t h r e e [ ] f o [0xad] u r [0x0a] -Breaks: c c c c lc c c c lc c c c c c lc c c lc c c lc +Breaks: c c c c lc c c c lc c c c c c lc c c lc c c Lc Whitespace: x x w w Sentences: bs e b Words: bs be bs be bs be bs be b Graphemes: b b b b b b b b b b b b b b b b b b b b b -Hyphens: i i i i i i i i i +Hyphens: i i i i i i i i i i diff --git a/tests/breaks/one.expected b/tests/breaks/one.expected index 44fee3ef..6c811256 100644 --- a/tests/breaks/one.expected +++ b/tests/breaks/one.expected @@ -1,7 +1,7 @@ Text: a b c / d e f [ ] g h i [0xad] j k l . [ ] B l a [0x0a] -Breaks: c c c c lc c c c lc c c c lc c c c c lc c c c lc +Breaks: c c c c lc c c c lc c c c lc c c c c lc c c c Lc Whitespace: x x w w Sentences: bs e bs e b Words: bs be bs be bs be b bs be b Graphemes: b b b b b b b b b b b b b b b b b b b b b b -Hyphens: i i i i i i i i i i i +Hyphens: i i i i i i i i i i i i diff --git a/tests/breaks/seventeen.expected b/tests/breaks/seventeen.expected index 8f5f2749..35fb5120 100644 --- a/tests/breaks/seventeen.expected +++ b/tests/breaks/seventeen.expected @@ -4,4 +4,4 @@ Whitespace: x x x w Sentences: bs e bs e bs e b Words: bs be bs be bs be bs e s be bs be bs be bs be bs e s be bs be bs be bs be bs e s e s be b Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b -Hyphens: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i +Hyphens: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i diff --git a/tests/breaks/sixteen.expected b/tests/breaks/sixteen.expected index 0fd06fb2..2868b8e4 100644 --- a/tests/breaks/sixteen.expected +++ b/tests/breaks/sixteen.expected @@ -1,7 +1,7 @@ Text: h y ‧ p h e n | a t i o n [ ] o v e r [0xad] l o a d [0x0a] -Breaks: c c c lc c c c c lc c c c c c lc c c c c lc c c c c lc +Breaks: c c c lc c c c c lc c c c c c lc c c c c lc c c c c Lc Whitespace: x w w Sentences: bs e b Words: bs e s be bs be bs be b Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b -Hyphens: i ri i i i i i i i i i i i i i i +Hyphens: i ri i i i i i i i i i i i i i i i diff --git a/tests/breaks/thirteen.expected b/tests/breaks/thirteen.expected index e6b1787d..25a38150 100644 --- a/tests/breaks/thirteen.expected +++ b/tests/breaks/thirteen.expected @@ -1,7 +1,7 @@ Text: a [ ] a b [0x200b] s p [0x200b] [ ] [ ] d e [0xad] f g [ ] b [0x0a] -Breaks: c c lc c c lc c c c c lc c c lc c c lc c lc +Breaks: c c lc c c lc c c c c lc c c lc c c lc c Lc Whitespace: x x x x w w Sentences: bs e b Words: bs be bs be bs be b Graphemes: b b b b b b b b b b b b b b b b b b b -Hyphens: i i i i i i +Hyphens: i i i i i i i i diff --git a/tests/breaks/two.break b/tests/breaks/two.break index 6ff0a36e..53c39c5c 100644 --- a/tests/breaks/two.break +++ b/tests/breaks/two.break @@ -1,3 +1,3 @@ # Example from https://gitlab.gnome.org/GNOME/pango/issues/218 # This shows difference between word start/end and boundary -goril·les +goril‧les diff --git a/tests/breaks/two.expected b/tests/breaks/two.expected index 2921d224..58d15186 100644 --- a/tests/breaks/two.expected +++ b/tests/breaks/two.expected @@ -1,7 +1,7 @@ -Text: g o r i l · l e s [0x0a] -Breaks: c c c c c c c c c c lc -Whitespace: w w -Sentences: bs e b -Words: bs e s be b -Graphemes: b b b b b b b b b b b -Hyphens: i i i i i i i +Text: g o r i l ‧ l e s [0x0a] +Breaks: c c c c c c lc c c c Lc +Whitespace: w w +Sentences: bs e b +Words: bs e s be b +Graphemes: b b b b b b b b b b b +Hyphens: i i i i ri i i |