diff options
author | Matthias Clasen <mclasen@redhat.com> | 2018-11-27 17:46:59 +0000 |
---|---|---|
committer | Matthias Clasen <mclasen@redhat.com> | 2018-11-27 17:46:59 +0000 |
commit | 169044900b20cba827be72510396a52ecde9ed51 (patch) | |
tree | 4e72c38cb363fea4fe7ad7f36264f4faf4f090f6 /pango | |
parent | 1dd9c9aec9fd593d033540900fb91b597a085cf5 (diff) | |
parent | ad1dc6019e67556b5b3b4fb164c15a2c3289bbc9 (diff) | |
download | pango-169044900b20cba827be72510396a52ecde9ed51.tar.gz |
Merge branch 'unicode11' into 'master'
Update Unicode Text Segmentation to Unicode 11
See merge request GNOME/pango!15
Diffstat (limited to 'pango')
-rw-r--r-- | pango/break.c | 152 | ||||
-rw-r--r-- | pango/pango-emoji-private.h | 3 | ||||
-rw-r--r-- | pango/pango-emoji-table.h | 240 | ||||
-rw-r--r-- | pango/pango-emoji.c | 6 |
4 files changed, 299 insertions, 102 deletions
diff --git a/pango/break.c b/pango/break.c index 2e66972e..558df7e6 100644 --- a/pango/break.c +++ b/pango/break.c @@ -24,6 +24,7 @@ #include "pango-break.h" #include "pango-engine-private.h" #include "pango-script-private.h" +#include "pango-emoji-private.h" #include "pango-impl-utils.h" #include <string.h> @@ -194,16 +195,12 @@ pango_default_break (const gchar *text, GB_SpacingMark, GB_InHangulSyllable, /* Handles all of L, V, T, LV, LVT rules */ /* Use state machine to handle emoji sequence */ - /* Rule GB10 and GB11 */ - GB_E_Base, - GB_E_Modifier, - GB_Glue_After_Zwj, - GB_E_Base_GAZ, /* Rule GB12 and GB13 */ GB_RI_Odd, /* Meets odd number of RI */ GB_RI_Even, /* Meets even number of RI */ } GraphemeBreakType; GraphemeBreakType prev_GB_type = GB_Other; + gboolean met_Extended_Pictographic = FALSE; /* See Word_Break Property Values table of UAX#29 */ typedef enum @@ -221,6 +218,7 @@ pango_default_break (const gchar *text, WB_ExtendNumLet, WB_RI_Odd, WB_RI_Even, + WB_WSegSpace, } WordBreakType; WordBreakType prev_prev_WB_type = WB_Other, prev_WB_type = WB_Other; gint prev_WB_i = -1; @@ -371,10 +369,14 @@ pango_default_break (const gchar *text, /* Just few spaces have variable width. So explicitly mark them. */ attrs[i].is_expandable_space = (0x0020 == wc || 0x00A0 == wc); + gboolean is_Extended_Pictographic = + _pango_Is_Emoji_Extended_Pictographic (wc); + /* ---- UAX#29 Grapheme Boundaries ---- */ { GraphemeBreakType GB_type; + /* Find the GraphemeBreakType of wc */ GB_type = GB_Other; switch ((int) type) @@ -447,70 +449,6 @@ pango_default_break (const gchar *text, break; case G_UNICODE_OTHER_SYMBOL: - if (G_UNLIKELY(wc == 0x261D || - wc == 0x26F9 || - (wc >= 0x270A && wc <= 0x270D) || - wc == 0x1F385 || - (wc >= 0x1F3C2 && wc <= 0x1F3C4) || - wc == 0x1F3C7 || - (wc >= 0x1F3CA && wc <= 0x1F3CC) || - (wc >= 0x1F442 && wc <= 0x1F443) || - (wc >= 0x1F446 && wc <= 0x1F450) || - wc == 0x1F46E || - (wc >= 0x1F470 && wc <= 0x1F478) || - wc == 0x1F47C || - (wc >= 0x1F481 && wc <= 0x1F483) || - (wc >= 0x1F485 && wc <= 0x1F487) || - wc == 0x1F4AA || - (wc >= 0x1F574 && wc <= 0x1F575) || - wc == 0x1F57A || - wc == 0x1F590 || - (wc >= 0x1F595 && wc <= 0x1F596) || - (wc >= 0x1F645 && wc <= 0x1F647) || - (wc >= 0x1F64B && wc <= 0x1F64F) || - wc == 0x1F6A3 || - (wc >= 0x1F6B4 && wc <= 0x1F6B6) || - wc == 0x1F6C0 || - wc == 0x1F6CC || - (wc >= 0x1F918 && wc <= 0x1F91C) || - (wc >= 0x1F91E && wc <= 0x1F91F) || - wc == 0x1F926 || - (wc >= 0x1F930 && wc <= 0x1F939) || - (wc >= 0x1F93D && wc <= 0x1F93E) || - (wc >= 0x1F9D1 && wc <= 0x1F9DD))) - { - GB_type = GB_E_Base; - break; - } - if (G_UNLIKELY(wc == 0x2640 || - wc == 0x2642 || - (wc >= 0x2695 && wc <= 0x2696) || - wc == 0x2708 || - wc == 0x2764 || - wc == 0x1F308 || - wc == 0x1F33E || - wc == 0x1F373 || - wc == 0x1F393 || - wc == 0x1F3A4 || - wc == 0x1F3A8 || - wc == 0x1F3EB || - wc == 0x1F3ED || - wc == 0x1F48B || - (wc >= 0x1F4BB && wc <= 0x1F4BC) || - wc == 0x1F527 || - wc == 0x1F52C || - wc == 0x1F5E8 || - wc == 0x1F680 || - wc == 0x1F692)) - { - GB_type = GB_Glue_After_Zwj; - break; - } - if (G_UNLIKELY(wc >= 0x1F466 && wc <= 0x1F469)) - { - GB_type = GB_E_Base_GAZ; - break; - } if (G_UNLIKELY(wc >=0x1F1E6 && wc <=0x1F1FF)) { if (prev_GB_type == GB_RI_Odd) @@ -525,11 +463,28 @@ pango_default_break (const gchar *text, case G_UNICODE_MODIFIER_SYMBOL: if (wc >= 0x1F3FB && wc <= 0x1F3FF) - GB_type = GB_E_Modifier; + GB_type = GB_Extend; break; } + /* Rule GB11 */ + if (met_Extended_Pictographic) + { + if (GB_type == GB_Extend) + met_Extended_Pictographic = TRUE; + else if (_pango_Is_Emoji_Extended_Pictographic (prev_wc) && + GB_type == GB_ZWJ) + met_Extended_Pictographic = TRUE; + else if (prev_GB_type == GB_Extend && GB_type == GB_ZWJ) + met_Extended_Pictographic = TRUE; + else if (prev_GB_type == GB_ZWJ && is_Extended_Pictographic) + met_Extended_Pictographic = TRUE; + else + met_Extended_Pictographic = FALSE; + } + /* Grapheme Cluster Boundary Rules */ + is_grapheme_boundary = TRUE; /* Rule GB999 */ /* We apply Rules GB1 and GB2 at the end of the function */ if (wc == '\n' && prev_wc == '\r') @@ -540,9 +495,6 @@ pango_default_break (const gchar *text, is_grapheme_boundary = FALSE; /* Rules GB6, GB7, GB8 */ else if (GB_type == GB_Extend) { - /* Rule GB10 */ - if (prev_GB_type == GB_E_Base || prev_GB_type == GB_E_Base_GAZ) - GB_type = prev_GB_type; is_grapheme_boundary = FALSE; /* Rule GB9 */ } else if (GB_type == GB_ZWJ) @@ -551,37 +503,23 @@ pango_default_break (const gchar *text, is_grapheme_boundary = FALSE; /* Rule GB9a */ else if (prev_GB_type == GB_Prepend) is_grapheme_boundary = FALSE; /* Rule GB9b */ - /* Rule GB10 */ - else if (prev_GB_type == GB_E_Base || prev_GB_type == GB_E_Base_GAZ) - { - if (GB_type == GB_E_Modifier) - is_grapheme_boundary = FALSE; - else - is_grapheme_boundary = TRUE; - } - else if (prev_GB_type == GB_ZWJ && - (GB_type == GB_Glue_After_Zwj || GB_type == GB_E_Base_GAZ)) - is_grapheme_boundary = FALSE; /* Rule GB11 */ + else if (is_Extended_Pictographic) + { /* Rule GB11 */ + if (prev_GB_type == GB_ZWJ && met_Extended_Pictographic) + is_grapheme_boundary = FALSE; + } else if (prev_GB_type == GB_RI_Odd && GB_type == GB_RI_Even) is_grapheme_boundary = FALSE; /* Rule GB12 and GB13 */ - else - is_grapheme_boundary = TRUE; /* Rule GB999 */ + + if (is_Extended_Pictographic) + met_Extended_Pictographic = TRUE; attrs[i].is_cursor_position = is_grapheme_boundary; /* If this is a grapheme boundary, we have to decide if backspace * deletes a character or the whole grapheme cluster */ if (is_grapheme_boundary) { - if (prev_GB_type == GB_E_Base || - prev_GB_type == GB_E_Base_GAZ || - prev_GB_type == GB_Glue_After_Zwj || - prev_GB_type == GB_Extend || - prev_GB_type == GB_E_Modifier || - prev_GB_type == GB_RI_Odd || - prev_GB_type == GB_RI_Even) - attrs[i].backspace_deletes_character = FALSE; - else - attrs[i].backspace_deletes_character = BACKSPACE_DELETES_CHARACTER (base_character); + attrs[i].backspace_deletes_character = BACKSPACE_DELETES_CHARACTER (base_character); } else attrs[i].backspace_deletes_character = FALSE; @@ -723,7 +661,14 @@ pango_default_break (const gchar *text, break; } - /* Grapheme Cluster Boundary Rules */ + if (WB_type == WB_Other) + { + if (type == G_UNICODE_SPACE_SEPARATOR && + break_type != G_UNICODE_BREAK_NON_BREAKING_GLUE) + WB_type = WB_WSegSpace; + } + + /* Word Cluster Boundary Rules */ /* We apply Rules WB1 and WB2 at the end of the function */ @@ -739,6 +684,11 @@ pango_default_break (const gchar *text, } else if (WB_type == WB_NewlineCRLF) is_word_boundary = TRUE; /* Rule WB3b */ + else if (prev_wc == 0x200D && is_Extended_Pictographic) + is_word_boundary = FALSE; /* Rule WB3c */ + else if (prev_WB_type == WB_WSegSpace && + WB_type == WB_WSegSpace && prev_WB_i + 1 == i) + is_word_boundary = FALSE; /* Rule WB3d */ else if (WB_type == WB_ExtendFormat) is_word_boundary = FALSE; /* Rules WB4? */ else if ((prev_WB_type == WB_ALetter || @@ -1089,7 +1039,7 @@ pango_default_break (const gchar *text, /* Rule LB1: assign a line breaking class to each code point of the input. */ - switch ((int) break_type) + switch (break_type) { case G_UNICODE_BREAK_AMBIGUOUS: case G_UNICODE_BREAK_SURROGATE: @@ -1122,6 +1072,7 @@ pango_default_break (const gchar *text, break_type == G_UNICODE_BREAK_HANGUL_T_JAMO || break_type == G_UNICODE_BREAK_HANGUL_LV_SYLLABLE || break_type == G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE || + break_type == G_UNICODE_BREAK_EMOJI_MODIFIER || break_type == G_UNICODE_BREAK_REGIONAL_INDICATOR) { LineBreakType LB_type; @@ -1425,10 +1376,7 @@ pango_default_break (const gchar *text, if (row_break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE) break_op = BREAK_ALLOWED; /* Rule LB8 */ - if (prev_wc == 0x200D && - (break_type == G_UNICODE_BREAK_IDEOGRAPHIC || - break_type == G_UNICODE_BREAK_EMOJI_BASE || - break_type == G_UNICODE_BREAK_EMOJI_MODIFIER)) + if (prev_wc == 0x200D) break_op = BREAK_PROHIBITED; /* Rule LB8a */ if (break_type == G_UNICODE_BREAK_SPACE || diff --git a/pango/pango-emoji-private.h b/pango/pango-emoji-private.h index a360b37a..ed4b7213 100644 --- a/pango/pango-emoji-private.h +++ b/pango/pango-emoji-private.h @@ -24,6 +24,9 @@ #include <glib.h> +gboolean +_pango_Is_Emoji_Extended_Pictographic (gunichar ch); + typedef struct _PangoEmojiIter PangoEmojiIter; struct _PangoEmojiIter diff --git a/pango/pango-emoji-table.h b/pango/pango-emoji-table.h index 138de885..87325f30 100644 --- a/pango/pango-emoji-table.h +++ b/pango/pango-emoji-table.h @@ -302,6 +302,246 @@ static const struct Interval _pango_Emoji_Modifier_Base_table[] = {0x1F9D1, 0x1F9DD}, }; +static const struct Interval _pango_Extended_Pictographic_table[] = +{ + {0x00A9, 0x00A9}, + {0x00AE, 0x00AE}, + {0x203C, 0x203C}, + {0x2049, 0x2049}, + {0x2122, 0x2122}, + {0x2139, 0x2139}, + {0x2194, 0x2199}, + {0x21A9, 0x21AA}, + {0x231A, 0x231B}, + {0x2328, 0x2328}, + {0x2388, 0x2388}, + {0x23CF, 0x23CF}, + {0x23E9, 0x23F3}, + {0x23F8, 0x23FA}, + {0x24C2, 0x24C2}, + {0x25AA, 0x25AB}, + {0x25B6, 0x25B6}, + {0x25C0, 0x25C0}, + {0x25FB, 0x25FE}, + {0x2600, 0x2605}, + {0x2607, 0x2612}, + {0x2614, 0x2615}, + {0x2616, 0x2617}, + {0x2618, 0x2618}, + {0x2619, 0x2619}, + {0x261A, 0x266F}, + {0x2670, 0x2671}, + {0x2672, 0x267D}, + {0x267E, 0x267F}, + {0x2680, 0x2685}, + {0x2690, 0x2691}, + {0x2692, 0x269C}, + {0x269D, 0x269D}, + {0x269E, 0x269F}, + {0x26A0, 0x26A1}, + {0x26A2, 0x26B1}, + {0x26B2, 0x26B2}, + {0x26B3, 0x26BC}, + {0x26BD, 0x26BF}, + {0x26C0, 0x26C3}, + {0x26C4, 0x26CD}, + {0x26CE, 0x26CE}, + {0x26CF, 0x26E1}, + {0x26E2, 0x26E2}, + {0x26E3, 0x26E3}, + {0x26E4, 0x26E7}, + {0x26E8, 0x26FF}, + {0x2700, 0x2700}, + {0x2701, 0x2704}, + {0x2705, 0x2705}, + {0x2708, 0x2709}, + {0x270A, 0x270B}, + {0x270C, 0x2712}, + {0x2714, 0x2714}, + {0x2716, 0x2716}, + {0x271D, 0x271D}, + {0x2721, 0x2721}, + {0x2728, 0x2728}, + {0x2733, 0x2734}, + {0x2744, 0x2744}, + {0x2747, 0x2747}, + {0x274C, 0x274C}, + {0x274E, 0x274E}, + {0x2753, 0x2755}, + {0x2757, 0x2757}, + {0x2763, 0x2767}, + {0x2795, 0x2797}, + {0x27A1, 0x27A1}, + {0x27B0, 0x27B0}, + {0x27BF, 0x27BF}, + {0x2934, 0x2935}, + {0x2B05, 0x2B07}, + {0x2B1B, 0x2B1C}, + {0x2B50, 0x2B50}, + {0x2B55, 0x2B55}, + {0x3030, 0x3030}, + {0x303D, 0x303D}, + {0x3297, 0x3297}, + {0x3299, 0x3299}, + {0x1F000, 0x1F02B}, + {0x1F02C, 0x1F02F}, + {0x1F030, 0x1F093}, + {0x1F094, 0x1F09F}, + {0x1F0A0, 0x1F0AE}, + {0x1F0AF, 0x1F0B0}, + {0x1F0B1, 0x1F0BE}, + {0x1F0BF, 0x1F0BF}, + {0x1F0C0, 0x1F0C0}, + {0x1F0C1, 0x1F0CF}, + {0x1F0D0, 0x1F0D0}, + {0x1F0D1, 0x1F0DF}, + {0x1F0E0, 0x1F0F5}, + {0x1F0F6, 0x1F0FF}, + {0x1F10D, 0x1F10F}, + {0x1F12F, 0x1F12F}, + {0x1F16C, 0x1F16F}, + {0x1F170, 0x1F171}, + {0x1F17E, 0x1F17E}, + {0x1F17F, 0x1F17F}, + {0x1F18E, 0x1F18E}, + {0x1F191, 0x1F19A}, + {0x1F1AD, 0x1F1E5}, + {0x1F201, 0x1F202}, + {0x1F203, 0x1F20F}, + {0x1F21A, 0x1F21A}, + {0x1F22F, 0x1F22F}, + {0x1F232, 0x1F23A}, + {0x1F23C, 0x1F23F}, + {0x1F249, 0x1F24F}, + {0x1F250, 0x1F251}, + {0x1F252, 0x1F25F}, + {0x1F260, 0x1F265}, + {0x1F266, 0x1F2FF}, + {0x1F300, 0x1F320}, + {0x1F321, 0x1F32C}, + {0x1F32D, 0x1F32F}, + {0x1F330, 0x1F335}, + {0x1F336, 0x1F336}, + {0x1F337, 0x1F37C}, + {0x1F37D, 0x1F37D}, + {0x1F37E, 0x1F37F}, + {0x1F380, 0x1F393}, + {0x1F394, 0x1F39F}, + {0x1F3A0, 0x1F3C4}, + {0x1F3C5, 0x1F3C5}, + {0x1F3C6, 0x1F3CA}, + {0x1F3CB, 0x1F3CE}, + {0x1F3CF, 0x1F3D3}, + {0x1F3D4, 0x1F3DF}, + {0x1F3E0, 0x1F3F0}, + {0x1F3F1, 0x1F3F7}, + {0x1F3F8, 0x1F3FA}, + {0x1F400, 0x1F43E}, + {0x1F43F, 0x1F43F}, + {0x1F440, 0x1F440}, + {0x1F441, 0x1F441}, + {0x1F442, 0x1F4F7}, + {0x1F4F8, 0x1F4F8}, + {0x1F4F9, 0x1F4FC}, + {0x1F4FD, 0x1F4FE}, + {0x1F4FF, 0x1F4FF}, + {0x1F500, 0x1F53D}, + {0x1F546, 0x1F54A}, + {0x1F54B, 0x1F54F}, + {0x1F550, 0x1F567}, + {0x1F568, 0x1F579}, + {0x1F57A, 0x1F57A}, + {0x1F57B, 0x1F5A3}, + {0x1F5A4, 0x1F5A4}, + {0x1F5A5, 0x1F5FA}, + {0x1F5FB, 0x1F5FF}, + {0x1F600, 0x1F600}, + {0x1F601, 0x1F610}, + {0x1F611, 0x1F611}, + {0x1F612, 0x1F614}, + {0x1F615, 0x1F615}, + {0x1F616, 0x1F616}, + {0x1F617, 0x1F617}, + {0x1F618, 0x1F618}, + {0x1F619, 0x1F619}, + {0x1F61A, 0x1F61A}, + {0x1F61B, 0x1F61B}, + {0x1F61C, 0x1F61E}, + {0x1F61F, 0x1F61F}, + {0x1F620, 0x1F625}, + {0x1F626, 0x1F627}, + {0x1F628, 0x1F62B}, + {0x1F62C, 0x1F62C}, + {0x1F62D, 0x1F62D}, + {0x1F62E, 0x1F62F}, + {0x1F630, 0x1F633}, + {0x1F634, 0x1F634}, + {0x1F635, 0x1F640}, + {0x1F641, 0x1F642}, + {0x1F643, 0x1F644}, + {0x1F645, 0x1F64F}, + {0x1F680, 0x1F6C5}, + {0x1F6C6, 0x1F6CF}, + {0x1F6D0, 0x1F6D0}, + {0x1F6D1, 0x1F6D2}, + {0x1F6D3, 0x1F6D4}, + {0x1F6D5, 0x1F6DF}, + {0x1F6E0, 0x1F6EC}, + {0x1F6ED, 0x1F6EF}, + {0x1F6F0, 0x1F6F3}, + {0x1F6F4, 0x1F6F6}, + {0x1F6F7, 0x1F6F8}, + {0x1F6F9, 0x1F6F9}, + {0x1F6FA, 0x1F6FF}, + {0x1F774, 0x1F77F}, + {0x1F7D5, 0x1F7D8}, + {0x1F7D9, 0x1F7FF}, + {0x1F80C, 0x1F80F}, + {0x1F848, 0x1F84F}, + {0x1F85A, 0x1F85F}, + {0x1F888, 0x1F88F}, + {0x1F8AE, 0x1F8FF}, + {0x1F90C, 0x1F90F}, + {0x1F910, 0x1F918}, + {0x1F919, 0x1F91E}, + {0x1F91F, 0x1F91F}, + {0x1F920, 0x1F927}, + {0x1F928, 0x1F92F}, + {0x1F930, 0x1F930}, + {0x1F931, 0x1F932}, + {0x1F933, 0x1F93A}, + {0x1F93C, 0x1F93E}, + {0x1F93F, 0x1F93F}, + {0x1F940, 0x1F945}, + {0x1F947, 0x1F94B}, + {0x1F94C, 0x1F94C}, + {0x1F94D, 0x1F94F}, + {0x1F950, 0x1F95E}, + {0x1F95F, 0x1F96B}, + {0x1F96C, 0x1F970}, + {0x1F971, 0x1F972}, + {0x1F973, 0x1F976}, + {0x1F977, 0x1F979}, + {0x1F97A, 0x1F97A}, + {0x1F97B, 0x1F97B}, + {0x1F97C, 0x1F97F}, + {0x1F980, 0x1F984}, + {0x1F985, 0x1F991}, + {0x1F992, 0x1F997}, + {0x1F998, 0x1F9A2}, + {0x1F9A3, 0x1F9AF}, + {0x1F9B0, 0x1F9B9}, + {0x1F9BA, 0x1F9BF}, + {0x1F9C0, 0x1F9C0}, + {0x1F9C1, 0x1F9C2}, + {0x1F9C3, 0x1F9CF}, + {0x1F9D0, 0x1F9E6}, + {0x1F9E7, 0x1F9FF}, + {0x1FA00, 0x1FA5F}, + {0x1FA60, 0x1FA6D}, + {0x1FA6E, 0x1FFFD}, +}; + #endif /* PANGO_EMOJI_TABLE_H */ /* == End of generated table == */ diff --git a/pango/pango-emoji.c b/pango/pango-emoji.c index 46ab5b3f..158daa5b 100644 --- a/pango/pango-emoji.c +++ b/pango/pango-emoji.c @@ -92,7 +92,13 @@ DEFINE_pango_Is_(Emoji) DEFINE_pango_Is_(Emoji_Presentation) DEFINE_pango_Is_(Emoji_Modifier) DEFINE_pango_Is_(Emoji_Modifier_Base) +DEFINE_pango_Is_(Extended_Pictographic) +gboolean +_pango_Is_Emoji_Extended_Pictographic (gunichar ch) +{ + return _pango_Is_Extended_Pictographic (ch); +} static gboolean _pango_Is_Emoji_Text_Default (gunichar ch) |