diff options
25 files changed, 344 insertions, 986 deletions
diff --git a/src/lib/evas/canvas/evas_object_textblock.c b/src/lib/evas/canvas/evas_object_textblock.c index aacc251b1b..45fc1a0028 100644 --- a/src/lib/evas/canvas/evas_object_textblock.c +++ b/src/lib/evas/canvas/evas_object_textblock.c @@ -10291,7 +10291,7 @@ evas_textblock_cursor_word_start(Efl_Text_Cursor_Handle *cur) if ((cur->pos > 0) && (cur->pos == len)) cur->pos--; - for (i = cur->pos ; _is_white(text[i]) ; i--) + for (i = cur->pos ; _is_white(text[i]) && BREAK_AFTER(i) ; i--) { if (i == 0) { @@ -10358,7 +10358,7 @@ evas_textblock_cursor_word_end(Efl_Text_Cursor_Handle *cur) set_wordbreaks_utf32((const utf32_t *) text, len, lang, breaks); } - for (i = cur->pos; text[i] && _is_white(text[i]) ; i++); + for (i = cur->pos; text[i] && _is_white(text[i]) && (BREAK_AFTER(i)) ; i++); if (i == len) { Evas_Object_Textblock_Node_Text *nnode; diff --git a/src/static_libs/libunibreak/LICENCE b/src/static_libs/libunibreak/LICENCE index 6b4137ca21..3fba16ad53 100644 --- a/src/static_libs/libunibreak/LICENCE +++ b/src/static_libs/libunibreak/LICENCE @@ -1,7 +1,7 @@ -Copyright (C) Wu Yongwei <wuyongwei at gmail dot com> -Copyright (C) Tom Hacohen <tom at stosb dot com> -Copyright (C) Petr Filipsky <philodej at gmail dot com> -Copyright (C) Andreas Röver <roever at users dot sf dot net> +Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com> +Copyright (C) 2012-2016 Tom Hacohen <tom at stosb dot com> +Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com> +Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net> This software is provided 'as-is', without any express or implied warranty. In no event will the author be held liable for any damages diff --git a/src/static_libs/libunibreak/NEWS b/src/static_libs/libunibreak/NEWS index a2b9e0302e..d217628da8 100644 --- a/src/static_libs/libunibreak/NEWS +++ b/src/static_libs/libunibreak/NEWS @@ -1,14 +1,3 @@ -New in libunibreak 4.2 - -- Update the data to conform to Unicode 12 - -New in libunibreak 4.1 - -- Update the code and data to conform to Unicode 11.0.0, especially - adding support for extended pictographs in word and grapheme breaking -- ZWJ support has been much improved (it was broken) -- Make minor tweaks to the project files - New in libunibreak 4.0 - Update the code and data to conform to Unicode 9.0.0 @@ -33,8 +22,7 @@ New in libunibreak 1.1 New in libunibreak 1.0 - Add word breaking support -- Change the library name to "libunibreak", while keeping maximum - compatibility +- Change the library name to "libunibreak", while keeping maximum compatibility - Add pkg-config support New in liblinebreak 2.1 diff --git a/src/static_libs/libunibreak/README.md b/src/static_libs/libunibreak/README.md index 4e65059586..f37fd902aa 100644 --- a/src/static_libs/libunibreak/README.md +++ b/src/static_libs/libunibreak/README.md @@ -6,11 +6,11 @@ Overview This is the README file for libunibreak, an implementation of the line breaking and word breaking algorithms as described in [Unicode Standard -Annex 14][1] and [Unicode Standard Annex 29][2]. Check the project's -[home page][3] for up-to-date information. +Annex 14] [1] and [Unicode Standard Annex 29] [2]. Check the project's +[home page] [3] for up-to-date information. - [1]: http://www.unicode.org/reports/tr14/ - [2]: http://www.unicode.org/reports/tr29/ + [1]: http://www.unicode.org/reports/tr14/tr14-37.html + [2]: http://www.unicode.org/reports/tr29/tr29-29.html [3]: https://github.com/adah1972/libunibreak @@ -21,7 +21,7 @@ This library is released under an open-source licence, the zlib/libpng licence. Please check the file *LICENCE* for details. Apart from using the algorithm, part of the code is derived from the -[Unicode Public Data][4], and the [Unicode Terms of Use][5] may apply. +[Unicode Public Data] [4], and the [Unicode Terms of Use] [5] may apply. [4]: http://www.unicode.org/Public/ [5]: http://www.unicode.org/copyright.html @@ -48,8 +48,6 @@ There are three ways to build the library: *WordBreakProperty.txt*. - type `make graphemebreakdata` to regenerate *graphemebreakdata.c* from *GraphemeBreakProperty.txt*. - - type `make emojidata` to regenerate *emojidata.c* from - *emoji-data.txt*. 2. On systems where GCC and Binutils are supported, one can type @@ -67,8 +65,6 @@ There are three ways to build the library: *WordBreakProperty.txt*. - type `make graphemebreakdata` to regenerate *graphemebreakdata.c* from *GraphemeBreakProperty.txt*. - - type `make emojidata` to regenerate *emojidata.c* from - *emoji-data.txt*. 3. On Windows, apart from using method 1 (Cygwin/MSYS) and method 2 (MinGW), MSVC can also be used. Type @@ -76,7 +72,7 @@ There are three ways to build the library: cd src nmake -f Makefile.msvc - to build the static library. By default the debug version is built. + to build the static library. By default the debug release is built. To build the release version nmake -f Makefile.msvc CFG="libunibreak - Win32 Release" diff --git a/src/static_libs/libunibreak/emojidata.c b/src/static_libs/libunibreak/emojidata.c deleted file mode 100644 index a78f2678e7..0000000000 --- a/src/static_libs/libunibreak/emojidata.c +++ /dev/null @@ -1,264 +0,0 @@ -/* The content of this file is generated from: -# emoji-data.txt -# Date: 2019-01-15, 12:10:05 GMT -*/ - -static const struct ExtendedPictograpic ep_prop[] = { - {0x00A9, 0x00A9}, - {0x00AE, 0x00AE}, - {0x203C, 0x203C}, - {0x2049, 0x2049}, - {0x2122, 0x2122}, - {0x2139, 0x2139}, - {0x2194, 0x2199}, - {0x21A9, 0x21AA}, - {0x231A, 0x231B}, - {0x2328, 0x2328}, - {0x2388, 0x2388}, - {0x23CF, 0x23CF}, - {0x23E9, 0x23F3}, - {0x23F8, 0x23FA}, - {0x24C2, 0x24C2}, - {0x25AA, 0x25AB}, - {0x25B6, 0x25B6}, - {0x25C0, 0x25C0}, - {0x25FB, 0x25FE}, - {0x2600, 0x2605}, - {0x2607, 0x2612}, - {0x2614, 0x2615}, - {0x2616, 0x2617}, - {0x2618, 0x2618}, - {0x2619, 0x2619}, - {0x261A, 0x266F}, - {0x2670, 0x2671}, - {0x2672, 0x267D}, - {0x267E, 0x267F}, - {0x2680, 0x2685}, - {0x2690, 0x2691}, - {0x2692, 0x269C}, - {0x269D, 0x269D}, - {0x269E, 0x269F}, - {0x26A0, 0x26A1}, - {0x26A2, 0x26B1}, - {0x26B2, 0x26B2}, - {0x26B3, 0x26BC}, - {0x26BD, 0x26BF}, - {0x26C0, 0x26C3}, - {0x26C4, 0x26CD}, - {0x26CE, 0x26CE}, - {0x26CF, 0x26E1}, - {0x26E2, 0x26E2}, - {0x26E3, 0x26E3}, - {0x26E4, 0x26E7}, - {0x26E8, 0x26FF}, - {0x2700, 0x2700}, - {0x2701, 0x2704}, - {0x2705, 0x2705}, - {0x2708, 0x2709}, - {0x270A, 0x270B}, - {0x270C, 0x2712}, - {0x2714, 0x2714}, - {0x2716, 0x2716}, - {0x271D, 0x271D}, - {0x2721, 0x2721}, - {0x2728, 0x2728}, - {0x2733, 0x2734}, - {0x2744, 0x2744}, - {0x2747, 0x2747}, - {0x274C, 0x274C}, - {0x274E, 0x274E}, - {0x2753, 0x2755}, - {0x2757, 0x2757}, - {0x2763, 0x2767}, - {0x2795, 0x2797}, - {0x27A1, 0x27A1}, - {0x27B0, 0x27B0}, - {0x27BF, 0x27BF}, - {0x2934, 0x2935}, - {0x2B05, 0x2B07}, - {0x2B1B, 0x2B1C}, - {0x2B50, 0x2B50}, - {0x2B55, 0x2B55}, - {0x3030, 0x3030}, - {0x303D, 0x303D}, - {0x3297, 0x3297}, - {0x3299, 0x3299}, - {0x1F000, 0x1F02B}, - {0x1F02C, 0x1F02F}, - {0x1F030, 0x1F093}, - {0x1F094, 0x1F09F}, - {0x1F0A0, 0x1F0AE}, - {0x1F0AF, 0x1F0B0}, - {0x1F0B1, 0x1F0BE}, - {0x1F0BF, 0x1F0BF}, - {0x1F0C0, 0x1F0C0}, - {0x1F0C1, 0x1F0CF}, - {0x1F0D0, 0x1F0D0}, - {0x1F0D1, 0x1F0DF}, - {0x1F0E0, 0x1F0F5}, - {0x1F0F6, 0x1F0FF}, - {0x1F10D, 0x1F10F}, - {0x1F12F, 0x1F12F}, - {0x1F16C, 0x1F16C}, - {0x1F16D, 0x1F16F}, - {0x1F170, 0x1F171}, - {0x1F17E, 0x1F17E}, - {0x1F17F, 0x1F17F}, - {0x1F18E, 0x1F18E}, - {0x1F191, 0x1F19A}, - {0x1F1AD, 0x1F1E5}, - {0x1F201, 0x1F202}, - {0x1F203, 0x1F20F}, - {0x1F21A, 0x1F21A}, - {0x1F22F, 0x1F22F}, - {0x1F232, 0x1F23A}, - {0x1F23C, 0x1F23F}, - {0x1F249, 0x1F24F}, - {0x1F250, 0x1F251}, - {0x1F252, 0x1F25F}, - {0x1F260, 0x1F265}, - {0x1F266, 0x1F2FF}, - {0x1F300, 0x1F320}, - {0x1F321, 0x1F32C}, - {0x1F32D, 0x1F32F}, - {0x1F330, 0x1F335}, - {0x1F336, 0x1F336}, - {0x1F337, 0x1F37C}, - {0x1F37D, 0x1F37D}, - {0x1F37E, 0x1F37F}, - {0x1F380, 0x1F393}, - {0x1F394, 0x1F39F}, - {0x1F3A0, 0x1F3C4}, - {0x1F3C5, 0x1F3C5}, - {0x1F3C6, 0x1F3CA}, - {0x1F3CB, 0x1F3CE}, - {0x1F3CF, 0x1F3D3}, - {0x1F3D4, 0x1F3DF}, - {0x1F3E0, 0x1F3F0}, - {0x1F3F1, 0x1F3F7}, - {0x1F3F8, 0x1F3FA}, - {0x1F400, 0x1F43E}, - {0x1F43F, 0x1F43F}, - {0x1F440, 0x1F440}, - {0x1F441, 0x1F441}, - {0x1F442, 0x1F4F7}, - {0x1F4F8, 0x1F4F8}, - {0x1F4F9, 0x1F4FC}, - {0x1F4FD, 0x1F4FE}, - {0x1F4FF, 0x1F4FF}, - {0x1F500, 0x1F53D}, - {0x1F546, 0x1F54A}, - {0x1F54B, 0x1F54F}, - {0x1F550, 0x1F567}, - {0x1F568, 0x1F579}, - {0x1F57A, 0x1F57A}, - {0x1F57B, 0x1F5A3}, - {0x1F5A4, 0x1F5A4}, - {0x1F5A5, 0x1F5FA}, - {0x1F5FB, 0x1F5FF}, - {0x1F600, 0x1F600}, - {0x1F601, 0x1F610}, - {0x1F611, 0x1F611}, - {0x1F612, 0x1F614}, - {0x1F615, 0x1F615}, - {0x1F616, 0x1F616}, - {0x1F617, 0x1F617}, - {0x1F618, 0x1F618}, - {0x1F619, 0x1F619}, - {0x1F61A, 0x1F61A}, - {0x1F61B, 0x1F61B}, - {0x1F61C, 0x1F61E}, - {0x1F61F, 0x1F61F}, - {0x1F620, 0x1F625}, - {0x1F626, 0x1F627}, - {0x1F628, 0x1F62B}, - {0x1F62C, 0x1F62C}, - {0x1F62D, 0x1F62D}, - {0x1F62E, 0x1F62F}, - {0x1F630, 0x1F633}, - {0x1F634, 0x1F634}, - {0x1F635, 0x1F640}, - {0x1F641, 0x1F642}, - {0x1F643, 0x1F644}, - {0x1F645, 0x1F64F}, - {0x1F680, 0x1F6C5}, - {0x1F6C6, 0x1F6CF}, - {0x1F6D0, 0x1F6D0}, - {0x1F6D1, 0x1F6D2}, - {0x1F6D3, 0x1F6D4}, - {0x1F6D5, 0x1F6D5}, - {0x1F6D6, 0x1F6DF}, - {0x1F6E0, 0x1F6EC}, - {0x1F6ED, 0x1F6EF}, - {0x1F6F0, 0x1F6F3}, - {0x1F6F4, 0x1F6F6}, - {0x1F6F7, 0x1F6F8}, - {0x1F6F9, 0x1F6F9}, - {0x1F6FA, 0x1F6FA}, - {0x1F6FB, 0x1F6FF}, - {0x1F774, 0x1F77F}, - {0x1F7D5, 0x1F7D8}, - {0x1F7D9, 0x1F7DF}, - {0x1F7E0, 0x1F7EB}, - {0x1F7EC, 0x1F7FF}, - {0x1F80C, 0x1F80F}, - {0x1F848, 0x1F84F}, - {0x1F85A, 0x1F85F}, - {0x1F888, 0x1F88F}, - {0x1F8AE, 0x1F8FF}, - {0x1F90C, 0x1F90C}, - {0x1F90D, 0x1F90F}, - {0x1F910, 0x1F918}, - {0x1F919, 0x1F91E}, - {0x1F91F, 0x1F91F}, - {0x1F920, 0x1F927}, - {0x1F928, 0x1F92F}, - {0x1F930, 0x1F930}, - {0x1F931, 0x1F932}, - {0x1F933, 0x1F93A}, - {0x1F93C, 0x1F93E}, - {0x1F93F, 0x1F93F}, - {0x1F940, 0x1F945}, - {0x1F947, 0x1F94B}, - {0x1F94C, 0x1F94C}, - {0x1F94D, 0x1F94F}, - {0x1F950, 0x1F95E}, - {0x1F95F, 0x1F96B}, - {0x1F96C, 0x1F970}, - {0x1F971, 0x1F971}, - {0x1F972, 0x1F972}, - {0x1F973, 0x1F976}, - {0x1F977, 0x1F979}, - {0x1F97A, 0x1F97A}, - {0x1F97B, 0x1F97B}, - {0x1F97C, 0x1F97F}, - {0x1F980, 0x1F984}, - {0x1F985, 0x1F991}, - {0x1F992, 0x1F997}, - {0x1F998, 0x1F9A2}, - {0x1F9A3, 0x1F9A4}, - {0x1F9A5, 0x1F9AA}, - {0x1F9AB, 0x1F9AD}, - {0x1F9AE, 0x1F9AF}, - {0x1F9B0, 0x1F9B9}, - {0x1F9BA, 0x1F9BF}, - {0x1F9C0, 0x1F9C0}, - {0x1F9C1, 0x1F9C2}, - {0x1F9C3, 0x1F9CA}, - {0x1F9CB, 0x1F9CC}, - {0x1F9CD, 0x1F9CF}, - {0x1F9D0, 0x1F9E6}, - {0x1F9E7, 0x1F9FF}, - {0x1FA00, 0x1FA53}, - {0x1FA54, 0x1FA5F}, - {0x1FA60, 0x1FA6D}, - {0x1FA6E, 0x1FA6F}, - {0x1FA70, 0x1FA73}, - {0x1FA74, 0x1FA77}, - {0x1FA78, 0x1FA7A}, - {0x1FA7B, 0x1FA7F}, - {0x1FA80, 0x1FA82}, - {0x1FA83, 0x1FA8F}, - {0x1FA90, 0x1FA95}, - {0x1FA96, 0x1FFFD}, -}; diff --git a/src/static_libs/libunibreak/emojidef.c b/src/static_libs/libunibreak/emojidef.c deleted file mode 100644 index 43a2ed3db0..0000000000 --- a/src/static_libs/libunibreak/emojidef.c +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Emoji-related routine and data. - * - * Copyright (C) 2018 Andreas Röver <roever at users dot sf dot net> - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the author be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute - * it freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must - * not claim that you wrote the original software. If you use this - * software in a product, an acknowledgement in the product - * documentation would be appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must - * not be misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source - * distribution. - */ - -/** - * @file emojidef.c - * - * Emoji-related routine and data that are used internally. - * - * @author Andreas Röver - */ - -#include "emojidef.h" -#include "emojidata.c" - -/** - * Finds out if a codepoint is extended pictographic. - * - * @param[in] ch character to check - * @return \c true if the codepoint is extended pictographic; - * \c false otherwise - */ -bool ub_is_extended_pictographic(utf32_t ch) -{ - int min = 0; - int max = ARRAY_LEN(ep_prop) - 1; - int mid; - - do - { - mid = (min + max) / 2; - - if (ch < ep_prop[mid].start) - max = mid - 1; - else if (ch > ep_prop[mid].end) - min = mid + 1; - else - return true; - } while (min <= max); - - return false; -} diff --git a/src/static_libs/libunibreak/emojidef.h b/src/static_libs/libunibreak/emojidef.h deleted file mode 100644 index b9055fd261..0000000000 --- a/src/static_libs/libunibreak/emojidef.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Emoji-related routine and data. - * - * Copyright (C) 2018 Andreas Röver <roever at users dot sf dot net> - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the author be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute - * it freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must - * not claim that you wrote the original software. If you use this - * software in a product, an acknowledgement in the product - * documentation would be appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must - * not be misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source - * distribution. - */ - -/** - * @file emojidef.h - * - * Definitions of internal data structure and function for extended - * pictographs. - * - * @author Andreas Röver - */ - -#include "unibreakdef.h" - -/** - * Struct for entries of extended pictographic properties. The array of - * the entries \e must be sorted. All codepoints within this list have - * the property of being extended pictographic. - */ -struct ExtendedPictograpic -{ - utf32_t start; /**< Start codepoint */ - utf32_t end; /**< End codepoint, inclusive */ -}; - -bool ub_is_extended_pictographic(utf32_t ch); diff --git a/src/static_libs/libunibreak/graphemebreak.c b/src/static_libs/libunibreak/graphemebreak.c index 401522f12d..77c3d5f55c 100644 --- a/src/static_libs/libunibreak/graphemebreak.c +++ b/src/static_libs/libunibreak/graphemebreak.c @@ -2,7 +2,7 @@ * Grapheme breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2016-2019 Andreas Röver <roever at users dot sf dot net> + * Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net> * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -28,10 +28,6 @@ * Unicode 9.0.0: * <URL:http://www.unicode.org/reports/tr29/tr29-29.html> * - * This library has been updated according to Revision 35, for - * Unicode 12.0.0: - * <URL:http://www.unicode.org/reports/tr29/tr29-35.html> - * * The Unicode Terms of Use are available at * <URL:http://www.unicode.org/copyright.html> */ @@ -42,14 +38,23 @@ * Implementation of the grapheme breaking algorithm as described in Unicode * Standard Annex 29. * - * @author Andreas Röver + * @author Andreas Roever */ +#if defined(_MSC_VER) && _MSC_VER < 1800 +typedef int bool; +#define false 0 +#define true 1 +#else +#include <stdbool.h> +#endif + #include <string.h> #include "graphemebreak.h" #include "graphemebreakdata.c" #include "unibreakdef.h" -#include "emojidef.h" + +#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0])) /** * Initializes the wordbreak internals. It currently does nothing, but @@ -62,8 +67,8 @@ void init_graphemebreak(void) /** * Gets the grapheme breaking class of a character. * - * @param[in] ch character to check - * @return the grapheme breaking class if found; \c GBP_Other otherwise + * @param ch character to check + * @return the grapheme breaking class if found; \c GBP_Other otherwise */ static enum GraphemeBreakClass get_char_gb_class(utf32_t ch) { @@ -88,7 +93,6 @@ static enum GraphemeBreakClass get_char_gb_class(utf32_t ch) /** * Sets the grapheme breaking information for a generic input string. - * It uses the extended grapheme cluster ruleset. * * @param[in] s input string * @param[in] len length of the input @@ -100,7 +104,7 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks, get_next_char_t get_next_char) { size_t posNext = 0; - int rule11Detector = 0; + bool rule10Left = false; // is the left side of rule 10 fulfilled? bool evenRegionalIndicators = true; // is the number of preceeding // GBP_RegionalIndicator characters // even @@ -113,47 +117,6 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks, while (true) { - - // this state-machine recognizes the following pattern: - // extended_pictograph Extended* ZWJ - // when that pattern has been detected rule11Detector will be - // 3 and rule 11 can be applied below - switch (current_class) - { - case GBP_ZWJ: - if (rule11Detector == 1 || rule11Detector == 2) - { - rule11Detector = 3; - } - else - { - rule11Detector = 0; - } - break; - - case GBP_Extend: - if (rule11Detector == 1 || rule11Detector == 2) - { - rule11Detector = 2; - } - else - { - rule11Detector = 0; - } - break; - - default: - if (ub_is_extended_pictographic(ch)) - { - rule11Detector = 1; - } - else - { - rule11Detector = 0; - } - break; - } - enum GraphemeBreakClass prev_class = current_class; // safe position if current character so that we can store the @@ -174,6 +137,16 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks, // get class of current character current_class = get_char_gb_class(ch); + // update some helper variables + if ((prev_class == GBP_E_Base) || (prev_class == GBP_E_Base_GAZ)) + { + rule10Left = true; + } + else if (prev_class != GBP_Extend) + { + rule10Left = false; + } + if (prev_class == GBP_Regional_Indicator) { evenRegionalIndicators = !evenRegionalIndicators; @@ -212,8 +185,7 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks, brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB8 } else if ((current_class == GBP_Extend) || - (current_class == GBP_ZWJ) || - (current_class == GBP_Virama)) + (current_class == GBP_ZWJ)) { brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB9 } @@ -225,7 +197,13 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks, { brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB9b } - else if ((rule11Detector == 3) && ub_is_extended_pictographic(ch)) + else if (rule10Left && (current_class == GBP_E_Modifier)) + { + brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB10 + } + else if ((prev_class == GBP_ZWJ) && + ((current_class == GBP_Glue_After_Zwj) || + (current_class == GBP_E_Base_GAZ))) { brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB11 } diff --git a/src/static_libs/libunibreak/graphemebreak.h b/src/static_libs/libunibreak/graphemebreak.h index e5259b5ccd..c01768233a 100644 --- a/src/static_libs/libunibreak/graphemebreak.h +++ b/src/static_libs/libunibreak/graphemebreak.h @@ -2,7 +2,7 @@ * Grapheme breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2016-2019 Andreas Röver <roever at users dot sf dot net> + * Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net> * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -28,10 +28,6 @@ * Unicode 9.0.0: * <URL:http://www.unicode.org/reports/tr29/tr29-29.html> * - * This library has been updated according to Revision 35, for - * Unicode 12.0.0: - * <URL:http://www.unicode.org/reports/tr29/tr29-35.html> - * * The Unicode Terms of Use are available at * <URL:http://www.unicode.org/copyright.html> */ @@ -70,4 +66,4 @@ void set_graphemebreaks_utf32(const utf32_t *s, size_t len, } #endif -#endif /* GRAPHEMEBREAK_H */ +#endif diff --git a/src/static_libs/libunibreak/graphemebreakdata.c b/src/static_libs/libunibreak/graphemebreakdata.c index bc1af932cf..cab9bebd80 100644 --- a/src/static_libs/libunibreak/graphemebreakdata.c +++ b/src/static_libs/libunibreak/graphemebreakdata.c @@ -1,6 +1,6 @@ /* The content of this file is generated from: -# GraphemeBreakProperty-12.1.0.txt -# Date: 2019-03-10, 10:53:12 GMT +# GraphemeBreakProperty-9.0.0.txt +# Date: 2016-06-03, 22:23:55 GMT */ #include "graphemebreakdef.h" @@ -36,13 +36,12 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x0730, 0x074A, GBP_Extend}, {0x07A6, 0x07B0, GBP_Extend}, {0x07EB, 0x07F3, GBP_Extend}, - {0x07FD, 0x07FD, GBP_Extend}, {0x0816, 0x0819, GBP_Extend}, {0x081B, 0x0823, GBP_Extend}, {0x0825, 0x0827, GBP_Extend}, {0x0829, 0x082D, GBP_Extend}, {0x0859, 0x085B, GBP_Extend}, - {0x08D3, 0x08E1, GBP_Extend}, + {0x08D4, 0x08E1, GBP_Extend}, {0x08E2, 0x08E2, GBP_Prepend}, {0x08E3, 0x0902, GBP_Extend}, {0x0903, 0x0903, GBP_SpacingMark}, @@ -67,7 +66,6 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x09CD, 0x09CD, GBP_Extend}, {0x09D7, 0x09D7, GBP_Extend}, {0x09E2, 0x09E3, GBP_Extend}, - {0x09FE, 0x09FE, GBP_Extend}, {0x0A01, 0x0A02, GBP_Extend}, {0x0A03, 0x0A03, GBP_SpacingMark}, {0x0A3C, 0x0A3C, GBP_Extend}, @@ -88,7 +86,6 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x0ACB, 0x0ACC, GBP_SpacingMark}, {0x0ACD, 0x0ACD, GBP_Extend}, {0x0AE2, 0x0AE3, GBP_Extend}, - {0x0AFA, 0x0AFF, GBP_Extend}, {0x0B01, 0x0B01, GBP_Extend}, {0x0B02, 0x0B03, GBP_SpacingMark}, {0x0B3C, 0x0B3C, GBP_Extend}, @@ -113,7 +110,6 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x0BD7, 0x0BD7, GBP_Extend}, {0x0C00, 0x0C00, GBP_Extend}, {0x0C01, 0x0C03, GBP_SpacingMark}, - {0x0C04, 0x0C04, GBP_Extend}, {0x0C3E, 0x0C40, GBP_Extend}, {0x0C41, 0x0C44, GBP_SpacingMark}, {0x0C46, 0x0C48, GBP_Extend}, @@ -134,9 +130,8 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x0CCC, 0x0CCD, GBP_Extend}, {0x0CD5, 0x0CD6, GBP_Extend}, {0x0CE2, 0x0CE3, GBP_Extend}, - {0x0D00, 0x0D01, GBP_Extend}, + {0x0D01, 0x0D01, GBP_Extend}, {0x0D02, 0x0D03, GBP_SpacingMark}, - {0x0D3B, 0x0D3C, GBP_Extend}, {0x0D3E, 0x0D3E, GBP_Extend}, {0x0D3F, 0x0D40, GBP_SpacingMark}, {0x0D41, 0x0D44, GBP_Extend}, @@ -161,7 +156,8 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x0E47, 0x0E4E, GBP_Extend}, {0x0EB1, 0x0EB1, GBP_Extend}, {0x0EB3, 0x0EB3, GBP_SpacingMark}, - {0x0EB4, 0x0EBC, GBP_Extend}, + {0x0EB4, 0x0EB9, GBP_Extend}, + {0x0EBB, 0x0EBC, GBP_Extend}, {0x0EC8, 0x0ECD, GBP_Extend}, {0x0F18, 0x0F19, GBP_Extend}, {0x0F35, 0x0F35, GBP_Extend}, @@ -236,7 +232,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x1B00, 0x1B03, GBP_Extend}, {0x1B04, 0x1B04, GBP_SpacingMark}, {0x1B34, 0x1B34, GBP_Extend}, - {0x1B35, 0x1B35, GBP_Extend}, + {0x1B35, 0x1B35, GBP_SpacingMark}, {0x1B36, 0x1B3A, GBP_Extend}, {0x1B3B, 0x1B3B, GBP_SpacingMark}, {0x1B3C, 0x1B3C, GBP_Extend}, @@ -269,10 +265,10 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x1CE1, 0x1CE1, GBP_SpacingMark}, {0x1CE2, 0x1CE8, GBP_Extend}, {0x1CED, 0x1CED, GBP_Extend}, + {0x1CF2, 0x1CF3, GBP_SpacingMark}, {0x1CF4, 0x1CF4, GBP_Extend}, - {0x1CF7, 0x1CF7, GBP_SpacingMark}, {0x1CF8, 0x1CF9, GBP_Extend}, - {0x1DC0, 0x1DF9, GBP_Extend}, + {0x1DC0, 0x1DF5, GBP_Extend}, {0x1DFB, 0x1DFF, GBP_Extend}, {0x200B, 0x200B, GBP_Control}, {0x200C, 0x200C, GBP_Extend}, @@ -289,6 +285,10 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x20E1, 0x20E1, GBP_Extend}, {0x20E2, 0x20E4, GBP_Extend}, {0x20E5, 0x20F0, GBP_Extend}, + {0x261D, 0x261D, GBP_E_Base}, + {0x26F9, 0x26F9, GBP_E_Base}, + {0x270A, 0x270D, GBP_E_Base}, + {0x2764, 0x2764, GBP_Glue_After_Zwj}, {0x2CEF, 0x2CF1, GBP_Extend}, {0x2D7F, 0x2D7F, GBP_Extend}, {0x2DE0, 0x2DFF, GBP_Extend}, @@ -310,7 +310,6 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0xA8B4, 0xA8C3, GBP_SpacingMark}, {0xA8C4, 0xA8C5, GBP_Extend}, {0xA8E0, 0xA8F1, GBP_Extend}, - {0xA8FF, 0xA8FF, GBP_Extend}, {0xA926, 0xA92D, GBP_Extend}, {0xA947, 0xA951, GBP_Extend}, {0xA952, 0xA953, GBP_SpacingMark}, @@ -321,8 +320,8 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0xA9B4, 0xA9B5, GBP_SpacingMark}, {0xA9B6, 0xA9B9, GBP_Extend}, {0xA9BA, 0xA9BB, GBP_SpacingMark}, - {0xA9BC, 0xA9BD, GBP_Extend}, - {0xA9BE, 0xA9C0, GBP_SpacingMark}, + {0xA9BC, 0xA9BC, GBP_Extend}, + {0xA9BD, 0xA9C0, GBP_SpacingMark}, {0xA9E5, 0xA9E5, GBP_Extend}, {0xAA29, 0xAA2E, GBP_Extend}, {0xAA2F, 0xAA30, GBP_SpacingMark}, @@ -1150,6 +1149,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0xD789, 0xD7A3, GBP_LVT}, {0xD7B0, 0xD7C6, GBP_V}, {0xD7CB, 0xD7FB, GBP_T}, + {0xD800, 0xDFFF, GBP_Control}, {0xFB1E, 0xFB1E, GBP_Extend}, {0xFE00, 0xFE0F, GBP_Extend}, {0xFE20, 0xFE2F, GBP_Extend}, @@ -1166,8 +1166,6 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x10A38, 0x10A3A, GBP_Extend}, {0x10A3F, 0x10A3F, GBP_Extend}, {0x10AE5, 0x10AE6, GBP_Extend}, - {0x10D24, 0x10D27, GBP_Extend}, - {0x10F46, 0x10F50, GBP_Extend}, {0x11000, 0x11000, GBP_SpacingMark}, {0x11001, 0x11001, GBP_Extend}, {0x11002, 0x11002, GBP_SpacingMark}, @@ -1179,12 +1177,10 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x110B7, 0x110B8, GBP_SpacingMark}, {0x110B9, 0x110BA, GBP_Extend}, {0x110BD, 0x110BD, GBP_Prepend}, - {0x110CD, 0x110CD, GBP_Prepend}, {0x11100, 0x11102, GBP_Extend}, {0x11127, 0x1112B, GBP_Extend}, {0x1112C, 0x1112C, GBP_SpacingMark}, {0x1112D, 0x11134, GBP_Extend}, - {0x11145, 0x11146, GBP_SpacingMark}, {0x11173, 0x11173, GBP_Extend}, {0x11180, 0x11181, GBP_Extend}, {0x11182, 0x11182, GBP_SpacingMark}, @@ -1192,7 +1188,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x111B6, 0x111BE, GBP_Extend}, {0x111BF, 0x111C0, GBP_SpacingMark}, {0x111C2, 0x111C3, GBP_Prepend}, - {0x111C9, 0x111CC, GBP_Extend}, + {0x111CA, 0x111CC, GBP_Extend}, {0x1122C, 0x1122E, GBP_SpacingMark}, {0x1122F, 0x11231, GBP_Extend}, {0x11232, 0x11233, GBP_SpacingMark}, @@ -1205,7 +1201,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x112E3, 0x112EA, GBP_Extend}, {0x11300, 0x11301, GBP_Extend}, {0x11302, 0x11303, GBP_SpacingMark}, - {0x1133B, 0x1133C, GBP_Extend}, + {0x1133C, 0x1133C, GBP_Extend}, {0x1133E, 0x1133E, GBP_Extend}, {0x1133F, 0x1133F, GBP_SpacingMark}, {0x11340, 0x11340, GBP_Extend}, @@ -1222,7 +1218,6 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x11442, 0x11444, GBP_Extend}, {0x11445, 0x11445, GBP_SpacingMark}, {0x11446, 0x11446, GBP_Extend}, - {0x1145E, 0x1145E, GBP_Extend}, {0x114B0, 0x114B0, GBP_Extend}, {0x114B1, 0x114B2, GBP_SpacingMark}, {0x114B3, 0x114B8, GBP_Extend}, @@ -1260,29 +1255,6 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x11722, 0x11725, GBP_Extend}, {0x11726, 0x11726, GBP_SpacingMark}, {0x11727, 0x1172B, GBP_Extend}, - {0x1182C, 0x1182E, GBP_SpacingMark}, - {0x1182F, 0x11837, GBP_Extend}, - {0x11838, 0x11838, GBP_SpacingMark}, - {0x11839, 0x1183A, GBP_Extend}, - {0x119D1, 0x119D3, GBP_SpacingMark}, - {0x119D4, 0x119D7, GBP_Extend}, - {0x119DA, 0x119DB, GBP_Extend}, - {0x119DC, 0x119DF, GBP_SpacingMark}, - {0x119E0, 0x119E0, GBP_Extend}, - {0x119E4, 0x119E4, GBP_SpacingMark}, - {0x11A01, 0x11A0A, GBP_Extend}, - {0x11A33, 0x11A38, GBP_Extend}, - {0x11A39, 0x11A39, GBP_SpacingMark}, - {0x11A3A, 0x11A3A, GBP_Prepend}, - {0x11A3B, 0x11A3E, GBP_Extend}, - {0x11A47, 0x11A47, GBP_Extend}, - {0x11A51, 0x11A56, GBP_Extend}, - {0x11A57, 0x11A58, GBP_SpacingMark}, - {0x11A59, 0x11A5B, GBP_Extend}, - {0x11A84, 0x11A89, GBP_Prepend}, - {0x11A8A, 0x11A96, GBP_Extend}, - {0x11A97, 0x11A97, GBP_SpacingMark}, - {0x11A98, 0x11A99, GBP_Extend}, {0x11C2F, 0x11C2F, GBP_SpacingMark}, {0x11C30, 0x11C36, GBP_Extend}, {0x11C38, 0x11C3D, GBP_Extend}, @@ -1295,25 +1267,9 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x11CB2, 0x11CB3, GBP_Extend}, {0x11CB4, 0x11CB4, GBP_SpacingMark}, {0x11CB5, 0x11CB6, GBP_Extend}, - {0x11D31, 0x11D36, GBP_Extend}, - {0x11D3A, 0x11D3A, GBP_Extend}, - {0x11D3C, 0x11D3D, GBP_Extend}, - {0x11D3F, 0x11D45, GBP_Extend}, - {0x11D46, 0x11D46, GBP_Prepend}, - {0x11D47, 0x11D47, GBP_Extend}, - {0x11D8A, 0x11D8E, GBP_SpacingMark}, - {0x11D90, 0x11D91, GBP_Extend}, - {0x11D93, 0x11D94, GBP_SpacingMark}, - {0x11D95, 0x11D95, GBP_Extend}, - {0x11D96, 0x11D96, GBP_SpacingMark}, - {0x11D97, 0x11D97, GBP_Extend}, - {0x11EF3, 0x11EF4, GBP_Extend}, - {0x11EF5, 0x11EF6, GBP_SpacingMark}, - {0x13430, 0x13438, GBP_Control}, {0x16AF0, 0x16AF4, GBP_Extend}, {0x16B30, 0x16B36, GBP_Extend}, - {0x16F4F, 0x16F4F, GBP_Extend}, - {0x16F51, 0x16F87, GBP_SpacingMark}, + {0x16F51, 0x16F7E, GBP_SpacingMark}, {0x16F8F, 0x16F92, GBP_Extend}, {0x1BC9D, 0x1BC9E, GBP_Extend}, {0x1BCA0, 0x1BCA3, GBP_Control}, @@ -1338,12 +1294,38 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x1E01B, 0x1E021, GBP_Extend}, {0x1E023, 0x1E024, GBP_Extend}, {0x1E026, 0x1E02A, GBP_Extend}, - {0x1E130, 0x1E136, GBP_Extend}, - {0x1E2EC, 0x1E2EF, GBP_Extend}, {0x1E8D0, 0x1E8D6, GBP_Extend}, {0x1E944, 0x1E94A, GBP_Extend}, {0x1F1E6, 0x1F1FF, GBP_Regional_Indicator}, - {0x1F3FB, 0x1F3FF, GBP_Extend}, + {0x1F385, 0x1F385, GBP_E_Base}, + {0x1F3C3, 0x1F3C4, GBP_E_Base}, + {0x1F3CA, 0x1F3CB, GBP_E_Base}, + {0x1F3FB, 0x1F3FF, GBP_E_Modifier}, + {0x1F442, 0x1F443, GBP_E_Base}, + {0x1F446, 0x1F450, GBP_E_Base}, + {0x1F466, 0x1F469, GBP_E_Base_GAZ}, + {0x1F46E, 0x1F46E, GBP_E_Base}, + {0x1F470, 0x1F478, GBP_E_Base}, + {0x1F47C, 0x1F47C, GBP_E_Base}, + {0x1F481, 0x1F483, GBP_E_Base}, + {0x1F485, 0x1F487, GBP_E_Base}, + {0x1F48B, 0x1F48B, GBP_Glue_After_Zwj}, + {0x1F4AA, 0x1F4AA, GBP_E_Base}, + {0x1F575, 0x1F575, GBP_E_Base}, + {0x1F57A, 0x1F57A, GBP_E_Base}, + {0x1F590, 0x1F590, GBP_E_Base}, + {0x1F595, 0x1F596, GBP_E_Base}, + {0x1F5E8, 0x1F5E8, GBP_Glue_After_Zwj}, + {0x1F645, 0x1F647, GBP_E_Base}, + {0x1F64B, 0x1F64F, GBP_E_Base}, + {0x1F6A3, 0x1F6A3, GBP_E_Base}, + {0x1F6B4, 0x1F6B6, GBP_E_Base}, + {0x1F6C0, 0x1F6C0, GBP_E_Base}, + {0x1F918, 0x1F91E, GBP_E_Base}, + {0x1F926, 0x1F926, GBP_E_Base}, + {0x1F930, 0x1F930, GBP_E_Base}, + {0x1F933, 0x1F939, GBP_E_Base}, + {0x1F93C, 0x1F93E, GBP_E_Base}, {0xE0000, 0xE0000, GBP_Control}, {0xE0001, 0xE0001, GBP_Control}, {0xE0002, 0xE001F, GBP_Control}, diff --git a/src/static_libs/libunibreak/graphemebreakdef.h b/src/static_libs/libunibreak/graphemebreakdef.h index 90ccfbd5f1..0de1f3d623 100644 --- a/src/static_libs/libunibreak/graphemebreakdef.h +++ b/src/static_libs/libunibreak/graphemebreakdef.h @@ -2,7 +2,7 @@ * Grapheme breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2016-2019 Andreas Röver <roever at users dot sf dot net> + * Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net> * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -28,10 +28,6 @@ * Unicode 9.0.0: * <URL:http://www.unicode.org/reports/tr29/tr29-29.html> * - * This library has been updated according to Revision 35, for - * Unicode 12.0.0: - * <URL:http://www.unicode.org/reports/tr29/tr29-35.html> - * * The Unicode Terms of Use are available at * <URL:http://www.unicode.org/copyright.html> */ @@ -49,15 +45,13 @@ /** * Word break classes. This is a direct mapping of Table 2 of Unicode - * Standard Annex 29. + * Standard Annex 29 */ enum GraphemeBreakClass { GBP_CR, GBP_LF, GBP_Control, - GBP_Virama, - GBP_LinkingConsonant, GBP_Extend, GBP_ZWJ, GBP_Regional_Indicator, @@ -68,6 +62,10 @@ enum GraphemeBreakClass GBP_T, GBP_LV, GBP_LVT, + GBP_E_Base, + GBP_E_Modifier, + GBP_Glue_After_Zwj, + GBP_E_Base_GAZ, GBP_Other, GBP_Undefined }; @@ -78,7 +76,7 @@ enum GraphemeBreakClass */ struct GraphemeBreakProperties { - utf32_t start; /**< Start codepoint */ - utf32_t end; /**< End codepoint, inclusive */ + utf32_t start; /**< Starting coding point */ + utf32_t end; /**< End coding point, including */ enum GraphemeBreakClass prop; /**< The grapheme breaking property */ }; diff --git a/src/static_libs/libunibreak/linebreak.c b/src/static_libs/libunibreak/linebreak.c index 98e2730314..41f23c1c08 100644 --- a/src/static_libs/libunibreak/linebreak.c +++ b/src/static_libs/libunibreak/linebreak.c @@ -4,7 +4,7 @@ * Line breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2008-2019 Wu Yongwei <wuyongwei at gmail dot com> + * Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com> * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com> * * This software is provided 'as-is', without any express or implied @@ -31,9 +31,9 @@ * Unicode 5.0.0: * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> * - * This library has been updated according to Revision 43, for - * Unicode 12.0.0: - * <URL:http://www.unicode.org/reports/tr14/tr14-43.html> + * This library has been updated according to Revision 37, for + * Unicode 9.0.0: + * <URL:http://www.unicode.org/reports/tr14/tr14-37.html> * * The Unicode Terms of Use are available at * <URL:http://www.unicode.org/copyright.html> @@ -80,9 +80,7 @@ enum BreakAction /** * Break action pair table. This is a direct mapping of Table 2 of - * Unicode Standard Annex 14, Revision 37, except for ZWJ (manually - * adjusted after special processing as per LB8a of Revision 41) and CB - * (manually added as per LB20). + * Unicode Standard Annex 14, Revision 37, except the "CB" part. */ static enum BreakAction baTable[LBP_CB][LBP_CB] = { { /* OP */ @@ -272,17 +270,17 @@ static enum BreakAction baTable[LBP_CB][LBP_CB] = { CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* ZWJ */ - IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, - PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, - DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, + PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + IND_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, + DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK }, { /* CB */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, }; /** @@ -290,9 +288,8 @@ static enum BreakAction baTable[LBP_CB][LBP_CB] = { */ struct LineBreakPropertiesIndex { - utf32_t end; /**< End codepoint */ - const struct LineBreakProperties *lbp; /**< Pointer to line breaking - properties */ + utf32_t end; /**< End coding point */ + const struct LineBreakProperties *lbp;/**< Pointer to line breaking properties */ }; /** @@ -338,7 +335,7 @@ static __inline int ends_with(const char *str, const char *suffix, * Initializes the second-level index to the line breaking properties. * If it is not called, the performance of #get_char_lb_class_lang (and * thus the main functionality) can be pretty bad, especially for big - * codepoints like those of Chinese. + * code points like those of Chinese. */ void init_linebreak(void) { @@ -615,18 +612,12 @@ static int get_lb_result_lookup( break; } - /* Special processing due to rule LB8a */ - if (lbpCtx->fLb8aZwj) - { - brk = LINEBREAK_NOBREAK; - } - /* Special processing due to rule LB21a */ if (lbpCtx->fLb21aHebrew && (lbpCtx->lbcCur == LBP_HY || lbpCtx->lbcCur == LBP_BA)) { brk = LINEBREAK_NOBREAK; - lbpCtx->fLb21aHebrew = false; + lbpCtx->fLb21aHebrew = 0; } else { @@ -672,21 +663,17 @@ void lb_init_break_context( lbpCtx->lbcCur = resolve_lb_class( get_char_lb_class_lang(ch, lbpCtx->lbpLang), lbpCtx->lang); - lbpCtx->fLb8aZwj = - (get_char_lb_class_lang(ch, lbpCtx->lbpLang) == LBP_ZWJ); - lbpCtx->fLb10LeadSpace = - (get_char_lb_class_lang(ch, lbpCtx->lbpLang) == LBP_SP); - lbpCtx->fLb21aHebrew = false; + lbpCtx->fLb21aHebrew = 0; lbpCtx->cLb30aRI = 0; treat_first_char(lbpCtx); } /** - * Updates LineBreakingContext for the next codepoint and returns + * Updates LineBreakingContext for the next code point and returns * the detected break. * * @param[in,out] lbpCtx pointer to the line breaking context - * @param[in] ch Unicode codepoint + * @param[in] ch Unicode code point * @return break result, one of #LINEBREAK_MUSTBREAK, * #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK * @post the line breaking context is updated @@ -713,25 +700,6 @@ int lb_process_next_char( default: break; } - - /* Special processing due to rule LB8a */ - if (lbpCtx->lbcNew == LBP_ZWJ) - { - lbpCtx->fLb8aZwj = true; - } - else - { - lbpCtx->fLb8aZwj = false; - } - - /* Special processing due to rule LB10 */ - if (lbpCtx->fLb10LeadSpace) - { - if (lbpCtx->lbcNew == LBP_CM || lbpCtx->lbcNew == LBP_ZWJ) - brk = LINEBREAK_ALLOWBREAK; - lbpCtx->fLb10LeadSpace = false; - } - return brk; } diff --git a/src/static_libs/libunibreak/linebreak.h b/src/static_libs/libunibreak/linebreak.h index fa88094b4b..fd7351191b 100644 --- a/src/static_libs/libunibreak/linebreak.h +++ b/src/static_libs/libunibreak/linebreak.h @@ -4,7 +4,7 @@ * Line breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2008-2019 Wu Yongwei <wuyongwei at gmail dot com> + * Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com> * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -30,9 +30,9 @@ * Unicode 5.0.0: * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> * - * This library has been updated according to Revision 43, for - * Unicode 12.0.0: - * <URL:http://www.unicode.org/reports/tr14/tr14-43.html> + * This library has been updated according to Revision 37, for + * Unicode 9.0.0: + * <URL:http://www.unicode.org/reports/tr14/tr14-37.html> * * The Unicode Terms of Use are available at * <URL:http://www.unicode.org/copyright.html> diff --git a/src/static_libs/libunibreak/linebreakdata.c b/src/static_libs/libunibreak/linebreakdata.c index 23d9072baa..c571f2da00 100644 --- a/src/static_libs/libunibreak/linebreakdata.c +++ b/src/static_libs/libunibreak/linebreakdata.c @@ -1,6 +1,6 @@ /* The content of this file is generated from: -# LineBreak-12.1.0.txt -# Date: 2019-03-31, 22:04:15 GMT [KW, LI] +# LineBreak-9.0.0.txt +# Date: 2016-05-26, 01:00:00 GMT [KW, LI] */ #include "linebreakdef.h" @@ -94,7 +94,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x037E, 0x037E, LBP_IS }, { 0x037F, 0x0482, LBP_AL }, { 0x0483, 0x0489, LBP_CM }, - { 0x048A, 0x0588, LBP_AL }, + { 0x048A, 0x0587, LBP_AL }, { 0x0589, 0x0589, LBP_IS }, { 0x058A, 0x058A, LBP_BA }, { 0x058D, 0x058E, LBP_AL }, @@ -149,10 +149,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x07F4, 0x07F7, LBP_AL }, { 0x07F8, 0x07F8, LBP_IS }, { 0x07F9, 0x07F9, LBP_EX }, - { 0x07FA, 0x07FA, LBP_AL }, - { 0x07FD, 0x07FD, LBP_CM }, - { 0x07FE, 0x07FF, LBP_PR }, - { 0x0800, 0x0815, LBP_AL }, + { 0x07FA, 0x0815, LBP_AL }, { 0x0816, 0x0819, LBP_CM }, { 0x081A, 0x081A, LBP_AL }, { 0x081B, 0x0823, LBP_CM }, @@ -163,7 +160,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x0830, 0x0858, LBP_AL }, { 0x0859, 0x085B, LBP_CM }, { 0x085E, 0x08BD, LBP_AL }, - { 0x08D3, 0x08E1, LBP_CM }, + { 0x08D4, 0x08E1, LBP_CM }, { 0x08E2, 0x08E2, LBP_AL }, { 0x08E3, 0x0903, LBP_CM }, { 0x0904, 0x0939, LBP_AL }, @@ -193,17 +190,14 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x09F9, 0x09F9, LBP_PO }, { 0x09FA, 0x09FA, LBP_AL }, { 0x09FB, 0x09FB, LBP_PR }, - { 0x09FC, 0x09FD, LBP_AL }, - { 0x09FE, 0x0A03, LBP_CM }, + { 0x0A01, 0x0A03, LBP_CM }, { 0x0A05, 0x0A39, LBP_AL }, { 0x0A3C, 0x0A51, LBP_CM }, { 0x0A59, 0x0A5E, LBP_AL }, { 0x0A66, 0x0A6F, LBP_NU }, { 0x0A70, 0x0A71, LBP_CM }, { 0x0A72, 0x0A74, LBP_AL }, - { 0x0A75, 0x0A75, LBP_CM }, - { 0x0A76, 0x0A76, LBP_AL }, - { 0x0A81, 0x0A83, LBP_CM }, + { 0x0A75, 0x0A83, LBP_CM }, { 0x0A85, 0x0AB9, LBP_AL }, { 0x0ABC, 0x0ABC, LBP_CM }, { 0x0ABD, 0x0ABD, LBP_AL }, @@ -214,7 +208,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x0AF0, 0x0AF0, LBP_AL }, { 0x0AF1, 0x0AF1, LBP_PR }, { 0x0AF9, 0x0AF9, LBP_AL }, - { 0x0AFA, 0x0B03, LBP_CM }, + { 0x0B01, 0x0B03, LBP_CM }, { 0x0B05, 0x0B39, LBP_AL }, { 0x0B3C, 0x0B3C, LBP_CM }, { 0x0B3D, 0x0B3D, LBP_AL }, @@ -232,16 +226,14 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x0BF0, 0x0BF8, LBP_AL }, { 0x0BF9, 0x0BF9, LBP_PR }, { 0x0BFA, 0x0BFA, LBP_AL }, - { 0x0C00, 0x0C04, LBP_CM }, + { 0x0C00, 0x0C03, LBP_CM }, { 0x0C05, 0x0C3D, LBP_AL }, { 0x0C3E, 0x0C56, LBP_CM }, { 0x0C58, 0x0C61, LBP_AL }, { 0x0C62, 0x0C63, LBP_CM }, { 0x0C66, 0x0C6F, LBP_NU }, - { 0x0C77, 0x0C77, LBP_BB }, { 0x0C78, 0x0C80, LBP_AL }, { 0x0C81, 0x0C83, LBP_CM }, - { 0x0C84, 0x0C84, LBP_BB }, { 0x0C85, 0x0CB9, LBP_AL }, { 0x0CBC, 0x0CBC, LBP_CM }, { 0x0CBD, 0x0CBD, LBP_AL }, @@ -250,10 +242,8 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x0CE2, 0x0CE3, LBP_CM }, { 0x0CE6, 0x0CEF, LBP_NU }, { 0x0CF1, 0x0CF2, LBP_AL }, - { 0x0D00, 0x0D03, LBP_CM }, - { 0x0D05, 0x0D3A, LBP_AL }, - { 0x0D3B, 0x0D3C, LBP_CM }, - { 0x0D3D, 0x0D3D, LBP_AL }, + { 0x0D01, 0x0D03, LBP_CM }, + { 0x0D05, 0x0D3D, LBP_AL }, { 0x0D3E, 0x0D4D, LBP_CM }, { 0x0D4E, 0x0D56, LBP_AL }, { 0x0D57, 0x0D57, LBP_CM }, @@ -427,11 +417,11 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1CD4, 0x1CE8, LBP_CM }, { 0x1CE9, 0x1CEC, LBP_AL }, { 0x1CED, 0x1CED, LBP_CM }, - { 0x1CEE, 0x1CF3, LBP_AL }, - { 0x1CF4, 0x1CF4, LBP_CM }, + { 0x1CEE, 0x1CF1, LBP_AL }, + { 0x1CF2, 0x1CF4, LBP_CM }, { 0x1CF5, 0x1CF6, LBP_AL }, - { 0x1CF7, 0x1CF9, LBP_CM }, - { 0x1CFA, 0x1DBF, LBP_AL }, + { 0x1CF8, 0x1CF9, LBP_CM }, + { 0x1D00, 0x1DBF, LBP_AL }, { 0x1DC0, 0x1DFF, LBP_CM }, { 0x1E00, 0x1FFC, LBP_AL }, { 0x1FFD, 0x1FFD, LBP_BB }, @@ -440,9 +430,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x2007, 0x2007, LBP_GL }, { 0x2008, 0x200A, LBP_BA }, { 0x200B, 0x200B, LBP_ZW }, - { 0x200C, 0x200C, LBP_CM }, - { 0x200D, 0x200D, LBP_ZWJ }, - { 0x200E, 0x200F, LBP_CM }, + { 0x200C, 0x200F, LBP_CM }, { 0x2010, 0x2010, LBP_BA }, { 0x2011, 0x2011, LBP_GL }, { 0x2012, 0x2013, LBP_BA }, @@ -820,11 +808,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x2E3F, 0x2E3F, LBP_AL }, { 0x2E40, 0x2E41, LBP_BA }, { 0x2E42, 0x2E42, LBP_OP }, - { 0x2E43, 0x2E4A, LBP_BA }, - { 0x2E4B, 0x2E4B, LBP_AL }, - { 0x2E4C, 0x2E4C, LBP_BA }, - { 0x2E4D, 0x2E4D, LBP_AL }, - { 0x2E4E, 0x2E4F, LBP_BA }, + { 0x2E43, 0x2E44, LBP_BA }, { 0x2E80, 0x2FFB, LBP_ID }, { 0x3000, 0x3000, LBP_BA }, { 0x3001, 0x3002, LBP_CL }, @@ -958,8 +942,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0xA8E0, 0xA8F1, LBP_CM }, { 0xA8F2, 0xA8FB, LBP_AL }, { 0xA8FC, 0xA8FC, LBP_BB }, - { 0xA8FD, 0xA8FE, LBP_AL }, - { 0xA8FF, 0xA8FF, LBP_CM }, + { 0xA8FD, 0xA8FD, LBP_AL }, { 0xA900, 0xA909, LBP_NU }, { 0xA90A, 0xA925, LBP_AL }, { 0xA926, 0xA92D, LBP_CM }, @@ -1924,9 +1907,9 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1091F, 0x1091F, LBP_BA }, { 0x10920, 0x10A00, LBP_AL }, { 0x10A01, 0x10A0F, LBP_CM }, - { 0x10A10, 0x10A35, LBP_AL }, + { 0x10A10, 0x10A33, LBP_AL }, { 0x10A38, 0x10A3F, LBP_CM }, - { 0x10A40, 0x10A48, LBP_AL }, + { 0x10A40, 0x10A47, LBP_AL }, { 0x10A50, 0x10A57, LBP_BA }, { 0x10A58, 0x10AE4, LBP_AL }, { 0x10AE5, 0x10AE6, LBP_CM }, @@ -1935,12 +1918,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x10AF6, 0x10AF6, LBP_IN }, { 0x10B00, 0x10B35, LBP_AL }, { 0x10B39, 0x10B3F, LBP_BA }, - { 0x10B40, 0x10D23, LBP_AL }, - { 0x10D24, 0x10D27, LBP_CM }, - { 0x10D30, 0x10D39, LBP_NU }, - { 0x10E60, 0x10F45, LBP_AL }, - { 0x10F46, 0x10F50, LBP_CM }, - { 0x10F51, 0x10FF6, LBP_AL }, + { 0x10B40, 0x10E7E, LBP_AL }, { 0x11000, 0x11002, LBP_CM }, { 0x11003, 0x11037, LBP_AL }, { 0x11038, 0x11046, LBP_CM }, @@ -1952,15 +1930,13 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x110B0, 0x110BA, LBP_CM }, { 0x110BB, 0x110BD, LBP_AL }, { 0x110BE, 0x110C1, LBP_BA }, - { 0x110CD, 0x110E8, LBP_AL }, + { 0x110D0, 0x110E8, LBP_AL }, { 0x110F0, 0x110F9, LBP_NU }, { 0x11100, 0x11102, LBP_CM }, { 0x11103, 0x11126, LBP_AL }, { 0x11127, 0x11134, LBP_CM }, { 0x11136, 0x1113F, LBP_NU }, { 0x11140, 0x11143, LBP_BA }, - { 0x11144, 0x11144, LBP_AL }, - { 0x11145, 0x11146, LBP_CM }, { 0x11150, 0x11172, LBP_AL }, { 0x11173, 0x11173, LBP_CM }, { 0x11174, 0x11174, LBP_AL }, @@ -1973,7 +1949,8 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x111C5, 0x111C6, LBP_BA }, { 0x111C7, 0x111C7, LBP_AL }, { 0x111C8, 0x111C8, LBP_BA }, - { 0x111C9, 0x111CC, LBP_CM }, + { 0x111C9, 0x111C9, LBP_AL }, + { 0x111CA, 0x111CC, LBP_CM }, { 0x111CD, 0x111CD, LBP_AL }, { 0x111D0, 0x111D9, LBP_NU }, { 0x111DA, 0x111DA, LBP_AL }, @@ -1994,7 +1971,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x112F0, 0x112F9, LBP_NU }, { 0x11300, 0x11303, LBP_CM }, { 0x11305, 0x11339, LBP_AL }, - { 0x1133B, 0x1133C, LBP_CM }, + { 0x1133C, 0x1133C, LBP_CM }, { 0x1133D, 0x1133D, LBP_AL }, { 0x1133E, 0x1134D, LBP_CM }, { 0x11350, 0x11350, LBP_AL }, @@ -2008,9 +1985,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1144F, 0x1144F, LBP_AL }, { 0x11450, 0x11459, LBP_NU }, { 0x1145B, 0x1145B, LBP_BA }, - { 0x1145D, 0x1145D, LBP_AL }, - { 0x1145E, 0x1145E, LBP_CM }, - { 0x1145F, 0x114AF, LBP_AL }, + { 0x1145D, 0x114AF, LBP_AL }, { 0x114B0, 0x114C3, LBP_CM }, { 0x114C4, 0x114C7, LBP_AL }, { 0x114D0, 0x114D9, LBP_NU }, @@ -2031,44 +2006,15 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x11660, 0x1166C, LBP_BB }, { 0x11680, 0x116AA, LBP_AL }, { 0x116AB, 0x116B7, LBP_CM }, - { 0x116B8, 0x116B8, LBP_AL }, { 0x116C0, 0x116C9, LBP_NU }, { 0x11700, 0x1172B, LBP_SA }, { 0x11730, 0x11739, LBP_NU }, { 0x1173A, 0x1173B, LBP_SA }, { 0x1173C, 0x1173E, LBP_BA }, { 0x1173F, 0x1173F, LBP_SA }, - { 0x11800, 0x1182B, LBP_AL }, - { 0x1182C, 0x1183A, LBP_CM }, - { 0x1183B, 0x118DF, LBP_AL }, + { 0x118A0, 0x118DF, LBP_AL }, { 0x118E0, 0x118E9, LBP_NU }, - { 0x118EA, 0x119D0, LBP_AL }, - { 0x119D1, 0x119E0, LBP_CM }, - { 0x119E1, 0x119E1, LBP_AL }, - { 0x119E2, 0x119E2, LBP_BB }, - { 0x119E3, 0x119E3, LBP_AL }, - { 0x119E4, 0x119E4, LBP_CM }, - { 0x11A00, 0x11A00, LBP_AL }, - { 0x11A01, 0x11A0A, LBP_CM }, - { 0x11A0B, 0x11A32, LBP_AL }, - { 0x11A33, 0x11A39, LBP_CM }, - { 0x11A3A, 0x11A3A, LBP_AL }, - { 0x11A3B, 0x11A3E, LBP_CM }, - { 0x11A3F, 0x11A3F, LBP_BB }, - { 0x11A40, 0x11A40, LBP_AL }, - { 0x11A41, 0x11A44, LBP_BA }, - { 0x11A45, 0x11A45, LBP_BB }, - { 0x11A46, 0x11A46, LBP_AL }, - { 0x11A47, 0x11A47, LBP_CM }, - { 0x11A50, 0x11A50, LBP_AL }, - { 0x11A51, 0x11A5B, LBP_CM }, - { 0x11A5C, 0x11A89, LBP_AL }, - { 0x11A8A, 0x11A99, LBP_CM }, - { 0x11A9A, 0x11A9C, LBP_BA }, - { 0x11A9D, 0x11A9D, LBP_AL }, - { 0x11A9E, 0x11AA0, LBP_BB }, - { 0x11AA1, 0x11AA2, LBP_BA }, - { 0x11AC0, 0x11C2E, LBP_AL }, + { 0x118EA, 0x11C2E, LBP_AL }, { 0x11C2F, 0x11C3F, LBP_CM }, { 0x11C40, 0x11C40, LBP_AL }, { 0x11C41, 0x11C45, LBP_BA }, @@ -2078,21 +2024,6 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x11C71, 0x11C71, LBP_EX }, { 0x11C72, 0x11C8F, LBP_AL }, { 0x11C92, 0x11CB6, LBP_CM }, - { 0x11D00, 0x11D30, LBP_AL }, - { 0x11D31, 0x11D45, LBP_CM }, - { 0x11D46, 0x11D46, LBP_AL }, - { 0x11D47, 0x11D47, LBP_CM }, - { 0x11D50, 0x11D59, LBP_NU }, - { 0x11D60, 0x11D89, LBP_AL }, - { 0x11D8A, 0x11D97, LBP_CM }, - { 0x11D98, 0x11D98, LBP_AL }, - { 0x11DA0, 0x11DA9, LBP_NU }, - { 0x11EE0, 0x11EF2, LBP_AL }, - { 0x11EF3, 0x11EF6, LBP_CM }, - { 0x11EF7, 0x11FDC, LBP_AL }, - { 0x11FDD, 0x11FE0, LBP_PO }, - { 0x11FE1, 0x11FF1, LBP_AL }, - { 0x11FFF, 0x11FFF, LBP_BA }, { 0x12000, 0x1246E, LBP_AL }, { 0x12470, 0x12474, LBP_BA }, { 0x12480, 0x13257, LBP_AL }, @@ -2108,11 +2039,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1328A, 0x13378, LBP_AL }, { 0x13379, 0x13379, LBP_OP }, { 0x1337A, 0x1337B, LBP_CL }, - { 0x1337C, 0x1342E, LBP_AL }, - { 0x13430, 0x13436, LBP_GL }, - { 0x13437, 0x13437, LBP_OP }, - { 0x13438, 0x13438, LBP_CL }, - { 0x14400, 0x145CD, LBP_AL }, + { 0x1337C, 0x145CD, LBP_AL }, { 0x145CE, 0x145CE, LBP_OP }, { 0x145CF, 0x145CF, LBP_CL }, { 0x145D0, 0x16A5E, LBP_AL }, @@ -2128,17 +2055,11 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x16B44, 0x16B44, LBP_BA }, { 0x16B45, 0x16B45, LBP_AL }, { 0x16B50, 0x16B59, LBP_NU }, - { 0x16B5B, 0x16E96, LBP_AL }, - { 0x16E97, 0x16E98, LBP_BA }, - { 0x16E99, 0x16F4A, LBP_AL }, - { 0x16F4F, 0x16F4F, LBP_CM }, - { 0x16F50, 0x16F50, LBP_AL }, + { 0x16B5B, 0x16F50, LBP_AL }, { 0x16F51, 0x16F92, LBP_CM }, { 0x16F93, 0x16F9F, LBP_AL }, - { 0x16FE0, 0x16FE3, LBP_NS }, - { 0x17000, 0x1B11E, LBP_ID }, - { 0x1B150, 0x1B167, LBP_CJ }, - { 0x1B170, 0x1B2FB, LBP_ID }, + { 0x16FE0, 0x16FE0, LBP_NS }, + { 0x17000, 0x1B001, LBP_ID }, { 0x1BC00, 0x1BC9C, LBP_AL }, { 0x1BC9D, 0x1BC9E, LBP_CM }, { 0x1BC9F, 0x1BC9F, LBP_BA }, @@ -2167,34 +2088,22 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1DA87, 0x1DA8A, LBP_BA }, { 0x1DA8B, 0x1DA8B, LBP_AL }, { 0x1DA9B, 0x1E02A, LBP_CM }, - { 0x1E100, 0x1E12C, LBP_AL }, - { 0x1E130, 0x1E136, LBP_CM }, - { 0x1E137, 0x1E13D, LBP_AL }, - { 0x1E140, 0x1E149, LBP_NU }, - { 0x1E14E, 0x1E2EB, LBP_AL }, - { 0x1E2EC, 0x1E2EF, LBP_CM }, - { 0x1E2F0, 0x1E2F9, LBP_NU }, - { 0x1E2FF, 0x1E2FF, LBP_PR }, { 0x1E800, 0x1E8CF, LBP_AL }, { 0x1E8D0, 0x1E8D6, LBP_CM }, { 0x1E900, 0x1E943, LBP_AL }, { 0x1E944, 0x1E94A, LBP_CM }, - { 0x1E94B, 0x1E94B, LBP_AL }, { 0x1E950, 0x1E959, LBP_NU }, { 0x1E95E, 0x1E95F, LBP_OP }, - { 0x1EC71, 0x1ECAB, LBP_AL }, - { 0x1ECAC, 0x1ECAC, LBP_PO }, - { 0x1ECAD, 0x1ECAF, LBP_AL }, - { 0x1ECB0, 0x1ECB0, LBP_PO }, - { 0x1ECB1, 0x1EEF1, LBP_AL }, + { 0x1EE00, 0x1EEF1, LBP_AL }, { 0x1F000, 0x1F0FF, LBP_ID }, { 0x1F100, 0x1F10C, LBP_AI }, { 0x1F10D, 0x1F10F, LBP_ID }, { 0x1F110, 0x1F12D, LBP_AI }, - { 0x1F12E, 0x1F12F, LBP_AL }, + { 0x1F12E, 0x1F12E, LBP_AL }, + { 0x1F12F, 0x1F12F, LBP_ID }, { 0x1F130, 0x1F169, LBP_AI }, - { 0x1F16A, 0x1F16C, LBP_AL }, - { 0x1F16D, 0x1F16F, LBP_ID }, + { 0x1F16A, 0x1F16B, LBP_AL }, + { 0x1F16C, 0x1F16F, LBP_ID }, { 0x1F170, 0x1F1AC, LBP_AI }, { 0x1F1AD, 0x1F1E5, LBP_ID }, { 0x1F1E6, 0x1F1FF, LBP_RI }, @@ -2206,31 +2115,29 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1F3B5, 0x1F3B6, LBP_AL }, { 0x1F3B7, 0x1F3BB, LBP_ID }, { 0x1F3BC, 0x1F3BC, LBP_AL }, - { 0x1F3BD, 0x1F3C1, LBP_ID }, - { 0x1F3C2, 0x1F3C4, LBP_EB }, - { 0x1F3C5, 0x1F3C6, LBP_ID }, - { 0x1F3C7, 0x1F3C7, LBP_EB }, - { 0x1F3C8, 0x1F3C9, LBP_ID }, - { 0x1F3CA, 0x1F3CC, LBP_EB }, - { 0x1F3CD, 0x1F3FA, LBP_ID }, + { 0x1F3BD, 0x1F3C2, LBP_ID }, + { 0x1F3C3, 0x1F3C4, LBP_EB }, + { 0x1F3C5, 0x1F3C9, LBP_ID }, + { 0x1F3CA, 0x1F3CB, LBP_EB }, + { 0x1F3CC, 0x1F3FA, LBP_ID }, { 0x1F3FB, 0x1F3FF, LBP_EM }, { 0x1F400, 0x1F441, LBP_ID }, { 0x1F442, 0x1F443, LBP_EB }, { 0x1F444, 0x1F445, LBP_ID }, { 0x1F446, 0x1F450, LBP_EB }, { 0x1F451, 0x1F465, LBP_ID }, - { 0x1F466, 0x1F478, LBP_EB }, + { 0x1F466, 0x1F469, LBP_EB }, + { 0x1F46A, 0x1F46D, LBP_ID }, + { 0x1F46E, 0x1F46E, LBP_EB }, + { 0x1F46F, 0x1F46F, LBP_ID }, + { 0x1F470, 0x1F478, LBP_EB }, { 0x1F479, 0x1F47B, LBP_ID }, { 0x1F47C, 0x1F47C, LBP_EB }, { 0x1F47D, 0x1F480, LBP_ID }, { 0x1F481, 0x1F483, LBP_EB }, { 0x1F484, 0x1F484, LBP_ID }, { 0x1F485, 0x1F487, LBP_EB }, - { 0x1F488, 0x1F48E, LBP_ID }, - { 0x1F48F, 0x1F48F, LBP_EB }, - { 0x1F490, 0x1F490, LBP_ID }, - { 0x1F491, 0x1F491, LBP_EB }, - { 0x1F492, 0x1F49F, LBP_ID }, + { 0x1F488, 0x1F49F, LBP_ID }, { 0x1F4A0, 0x1F4A0, LBP_AL }, { 0x1F4A1, 0x1F4A1, LBP_ID }, { 0x1F4A2, 0x1F4A2, LBP_AL }, @@ -2248,8 +2155,8 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1F517, 0x1F524, LBP_AL }, { 0x1F525, 0x1F531, LBP_ID }, { 0x1F532, 0x1F549, LBP_AL }, - { 0x1F54A, 0x1F573, LBP_ID }, - { 0x1F574, 0x1F575, LBP_EB }, + { 0x1F54A, 0x1F574, LBP_ID }, + { 0x1F575, 0x1F575, LBP_EB }, { 0x1F576, 0x1F579, LBP_ID }, { 0x1F57A, 0x1F57A, LBP_EB }, { 0x1F57B, 0x1F58F, LBP_ID }, @@ -2274,9 +2181,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1F6B4, 0x1F6B6, LBP_EB }, { 0x1F6B7, 0x1F6BF, LBP_ID }, { 0x1F6C0, 0x1F6C0, LBP_EB }, - { 0x1F6C1, 0x1F6CB, LBP_ID }, - { 0x1F6CC, 0x1F6CC, LBP_EB }, - { 0x1F6CD, 0x1F6FF, LBP_ID }, + { 0x1F6C1, 0x1F6FF, LBP_ID }, { 0x1F700, 0x1F773, LBP_AL }, { 0x1F774, 0x1F77F, LBP_ID }, { 0x1F780, 0x1F7D4, LBP_AL }, @@ -2290,31 +2195,17 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1F860, 0x1F887, LBP_AL }, { 0x1F888, 0x1F88F, LBP_ID }, { 0x1F890, 0x1F8AD, LBP_AL }, - { 0x1F8AE, 0x1F8FF, LBP_ID }, - { 0x1F900, 0x1F90B, LBP_AL }, - { 0x1F90C, 0x1F90E, LBP_ID }, - { 0x1F90F, 0x1F90F, LBP_EB }, - { 0x1F910, 0x1F917, LBP_ID }, - { 0x1F918, 0x1F91F, LBP_EB }, - { 0x1F920, 0x1F925, LBP_ID }, + { 0x1F8AE, 0x1F917, LBP_ID }, + { 0x1F918, 0x1F91E, LBP_EB }, + { 0x1F91F, 0x1F925, LBP_ID }, { 0x1F926, 0x1F926, LBP_EB }, { 0x1F927, 0x1F92F, LBP_ID }, - { 0x1F930, 0x1F939, LBP_EB }, + { 0x1F930, 0x1F930, LBP_EB }, + { 0x1F931, 0x1F932, LBP_ID }, + { 0x1F933, 0x1F939, LBP_EB }, { 0x1F93A, 0x1F93B, LBP_ID }, { 0x1F93C, 0x1F93E, LBP_EB }, - { 0x1F93F, 0x1F9B4, LBP_ID }, - { 0x1F9B5, 0x1F9B6, LBP_EB }, - { 0x1F9B7, 0x1F9B7, LBP_ID }, - { 0x1F9B8, 0x1F9B9, LBP_EB }, - { 0x1F9BA, 0x1F9BA, LBP_ID }, - { 0x1F9BB, 0x1F9BB, LBP_EB }, - { 0x1F9BC, 0x1F9CC, LBP_ID }, - { 0x1F9CD, 0x1F9CF, LBP_EB }, - { 0x1F9D0, 0x1F9D0, LBP_ID }, - { 0x1F9D1, 0x1F9DD, LBP_EB }, - { 0x1F9DE, 0x1F9FF, LBP_ID }, - { 0x1FA00, 0x1FA53, LBP_AL }, - { 0x1FA54, 0x3FFFD, LBP_ID }, + { 0x1F93F, 0x3FFFD, LBP_ID }, { 0xE0001, 0xE01EF, LBP_CM }, { 0xF0000, 0x10FFFD, LBP_XX }, { 0xFFFFFFFF, 0xFFFFFFFF, LBP_Undefined } diff --git a/src/static_libs/libunibreak/linebreakdef.c b/src/static_libs/libunibreak/linebreakdef.c index 847621ed95..6b485cecbd 100644 --- a/src/static_libs/libunibreak/linebreakdef.c +++ b/src/static_libs/libunibreak/linebreakdef.c @@ -4,7 +4,7 @@ * Line breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2008-2018 Wu Yongwei <wuyongwei at gmail dot com> + * Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com> * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -22,6 +22,20 @@ * not be misrepresented as being the original software. * 3. This notice may not be removed or altered from any source * distribution. + * + * The main reference is Unicode Standard Annex 14 (UAX #14): + * <URL:http://www.unicode.org/reports/tr14/> + * + * When this library was designed, this annex was at Revision 19, for + * Unicode 5.0.0: + * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> + * + * This library has been updated according to Revision 37, for + * Unicode 9.0.0: + * <URL:http://www.unicode.org/reports/tr14/tr14-37.html> + * + * The Unicode Terms of Use are available at + * <URL:http://www.unicode.org/copyright.html> */ /** @@ -52,7 +66,6 @@ static const struct LineBreakProperties lb_prop_German[] = { { 0x00AB, 0x00AB, LBP_CL }, /* Left double angle quotation mark: closing */ { 0x00BB, 0x00BB, LBP_OP }, /* Right double angle quotation mark: opening */ { 0x2018, 0x2018, LBP_CL }, /* Left single quotation mark: closing */ - { 0x2019, 0x2019, LBP_GL }, /* Right single quotation mark: glue */ { 0x201C, 0x201C, LBP_CL }, /* Left double quotation mark: closing */ { 0x2039, 0x2039, LBP_CL }, /* Left single angle quotation mark: closing */ { 0x203A, 0x203A, LBP_OP }, /* Right single angle quotation mark: opening */ diff --git a/src/static_libs/libunibreak/linebreakdef.h b/src/static_libs/libunibreak/linebreakdef.h index 48d714ef29..37ec7b546e 100644 --- a/src/static_libs/libunibreak/linebreakdef.h +++ b/src/static_libs/libunibreak/linebreakdef.h @@ -4,7 +4,7 @@ * Line breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2008-2018 Wu Yongwei <wuyongwei at gmail dot com> + * Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com> * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com> * * This software is provided 'as-is', without any express or implied @@ -31,9 +31,9 @@ * Unicode 5.0.0: * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> * - * This library has been updated according to Revision 43, for - * Unicode 12.0.0: - * <URL:http://www.unicode.org/reports/tr14/tr14-43.html> + * This library has been updated according to Revision 37, for + * Unicode 9.0.0: + * <URL:http://www.unicode.org/reports/tr14/tr14-37.html> * * The Unicode Terms of Use are available at * <URL:http://www.unicode.org/copyright.html> @@ -52,8 +52,8 @@ #include "unibreakdef.h" /** - * Line break classes. This is a mapping of Table 1 of Unicode - * Standard Annex 14. + * Line break classes. This is a direct mapping of Table 1 of Unicode + * Standard Annex 14, Revision 26. */ enum LineBreakClass { @@ -95,7 +95,7 @@ enum LineBreakClass LBP_ZWJ, /**< Zero width joiner */ /* The following break class is treated in the pair table, but it is - * not part of Table 2 of UAX #14-37. */ + * not part of Table 2 of UAX #14. */ LBP_CB, /**< Contingent break */ /* The following break classes are not treated in the pair table */ @@ -117,8 +117,8 @@ enum LineBreakClass */ struct LineBreakProperties { - utf32_t start; /**< Start codepoint */ - utf32_t end; /**< End codepoint, inclusive */ + utf32_t start; /**< Starting coding point */ + utf32_t end; /**< End coding point */ enum LineBreakClass prop; /**< The line breaking property */ }; @@ -140,14 +140,11 @@ struct LineBreakPropertiesLang struct LineBreakContext { const char *lang; /**< Language name */ - const struct LineBreakProperties *lbpLang; /**< Pointer to - LineBreakProperties */ + const struct LineBreakProperties *lbpLang;/**< Pointer to LineBreakProperties */ enum LineBreakClass lbcCur; /**< Breaking class of current codepoint */ enum LineBreakClass lbcNew; /**< Breaking class of next codepoint */ enum LineBreakClass lbcLast; /**< Breaking class of last codepoint */ - bool fLb8aZwj; /**< Flag for ZWJ (LB8a) */ - bool fLb10LeadSpace; /**< Flag for leading space (LB10) */ - bool fLb21aHebrew; /**< Flag for Hebrew letters (LB21a) */ + int fLb21aHebrew; /**< Flag for Hebrew letters (LB21a) */ int cLb30aRI; /**< Count of RI characters (LB30a) */ }; diff --git a/src/static_libs/libunibreak/meson.build b/src/static_libs/libunibreak/meson.build index cca9c1f6a0..a2d5c3e3f9 100644 --- a/src/static_libs/libunibreak/meson.build +++ b/src/static_libs/libunibreak/meson.build @@ -15,8 +15,6 @@ libunibreak_src = [ 'graphemebreak.c', 'graphemebreak.h', 'graphemebreakdef.h', - 'emojidef.h', - 'emojidef.c', ] libunibreak_lib = static_library('libunibreak', diff --git a/src/static_libs/libunibreak/unibreakbase.c b/src/static_libs/libunibreak/unibreakbase.c index ef24c90047..686852a990 100644 --- a/src/static_libs/libunibreak/unibreakbase.c +++ b/src/static_libs/libunibreak/unibreakbase.c @@ -4,7 +4,7 @@ * Break processing in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2015-2018 Wu Yongwei <wuyongwei at gmail dot com> + * Copyright (C) 2015-2016 Wu Yongwei <wuyongwei at gmail dot com> * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages diff --git a/src/static_libs/libunibreak/unibreakbase.h b/src/static_libs/libunibreak/unibreakbase.h index a00a5bdb6b..ff9a6ce8a9 100644 --- a/src/static_libs/libunibreak/unibreakbase.h +++ b/src/static_libs/libunibreak/unibreakbase.h @@ -4,7 +4,7 @@ * Break processing in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2015-2019 Wu Yongwei <wuyongwei at gmail dot com> + * Copyright (C) 2015-2016 Wu Yongwei <wuyongwei at gmail dot com> * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -39,7 +39,7 @@ extern "C" { #endif -#define UNIBREAK_VERSION 0x0402 /**< Version of the libunibreak */ +#define UNIBREAK_VERSION 0x0400 /**< Version of the library linebreak */ extern const int unibreak_version; #ifndef UNIBREAK_UTF_TYPES_DEFINED diff --git a/src/static_libs/libunibreak/unibreakdef.h b/src/static_libs/libunibreak/unibreakdef.h index 5f3533e5dd..e13016d8cd 100644 --- a/src/static_libs/libunibreak/unibreakdef.h +++ b/src/static_libs/libunibreak/unibreakdef.h @@ -4,7 +4,7 @@ * Break processing in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2015-2018 Wu Yongwei <wuyongwei at gmail dot com> + * Copyright (C) 2015-2016 Wu Yongwei <wuyongwei at gmail dot com> * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -35,19 +35,9 @@ #ifndef UNIBREAKDEF_H #define UNIBREAKDEF_H -#if defined(_MSC_VER) && _MSC_VER < 1800 -typedef int bool; -#define false 0 -#define true 1 -#else -#include <stdbool.h> -#endif - #include <stddef.h> #include "unibreakbase.h" -#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0])) - #ifdef __cplusplus extern "C" { #endif diff --git a/src/static_libs/libunibreak/wordbreak.c b/src/static_libs/libunibreak/wordbreak.c index d4e22495c6..50c830c7cc 100644 --- a/src/static_libs/libunibreak/wordbreak.c +++ b/src/static_libs/libunibreak/wordbreak.c @@ -4,8 +4,7 @@ * Word breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2013-2019 Tom Hacohen <tom at stosb dot com> - * Copyright (C) 2018 Wu Yongwei <wuyongwei at gmail dot com> + * Copyright (C) 2013-2016 Tom Hacohen <tom at stosb dot com> * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -31,9 +30,9 @@ * Unicode 6.0.0: * <URL:http://www.unicode.org/reports/tr29/tr29-17.html> * - * This library has been updated according to Revision 35, for - * Unicode 12.0.0: - * <URL:http://www.unicode.org/reports/tr29/tr29-35.html> + * This library has been updated according to Revision 29, for + * Unicode 9.0.0: + * <URL:http://www.unicode.org/reports/tr29/tr29-29.html> * * The Unicode Terms of Use are available at * <URL:http://www.unicode.org/copyright.html> @@ -54,7 +53,8 @@ #include "unibreakdef.h" #include "wordbreak.h" #include "wordbreakdata.c" -#include "emojidef.h" + +#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0])) /** * Initializes the wordbreak internals. It currently does nothing, but @@ -215,7 +215,7 @@ static void set_wordbreaks( #if __has_attribute(fallthrough) __attribute__((fallthrough)); #endif - /* Fall through */ + /* Fall off */ case WBP_Newline: /* WB3a,3b */ @@ -225,6 +225,24 @@ static void set_wordbreaks( posLast = posCur; break; + case WBP_E_Base_GAZ: + case WBP_Glue_After_Zwj: + /* WB3c */ + if (wbcLast == WBP_ZWJ) + { + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_NOBREAK, get_next_char); + } + /* No rule found, reset */ + else + { + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_BREAK, get_next_char); + } + wbcSeqStart = wbcCur; + posLast = posCur; + break; + case WBP_ZWJ: case WBP_Extend: case WBP_Format: @@ -242,10 +260,8 @@ static void set_wordbreaks( { /* It's surely not the first */ brks[posCur - 1] = WORDBREAK_NOBREAK; - /* WB3c and WB3d precede 4, so no intervening Extend - * chars allowed. */ - if (wbcCur != WBP_ZWJ && wbcSeqStart != WBP_ZWJ && - wbcSeqStart != WBP_WSegSpace) + /* WB3c precedes 4, so no intervening Extend chars allowed. */ + if (wbcSeqStart != WBP_ZWJ) { /* "inherit" the previous class. */ wbcCur = wbcLast; @@ -318,8 +334,7 @@ static void set_wordbreaks( #if __has_attribute(fallthrough) __attribute__((fallthrough)); #endif - /* Fall through */ - + /* No break on purpose */ case WBP_MidNumLet: if (((wbcLast == WBP_ALetter) || (wbcLast == WBP_Hebrew_Letter)) || /* WB6,7 */ @@ -406,6 +421,32 @@ static void set_wordbreaks( posLast = posCur; break; + case WBP_E_Base: + /* No rule found, reset */ + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_BREAK, get_next_char); + wbcSeqStart = wbcCur; + posLast = posCur; + break; + + case WBP_E_Modifier: + /* WB14 */ + if ((wbcLast == WBP_E_Base) || + (wbcLast == WBP_E_Base_GAZ)) + { + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_NOBREAK, get_next_char); + } + /* No rule found, reset */ + else + { + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_BREAK, get_next_char); + } + wbcSeqStart = wbcCur; + posLast = posCur; + break; + case WBP_Regional_Indicator: /* WB15,16 */ if ((wbcSeqStart == WBP_Regional_Indicator) && @@ -440,32 +481,7 @@ static void set_wordbreaks( } break; - case WBP_WSegSpace: - if (wbcLast == WBP_WSegSpace) /* WB3d */ - { - set_brks_to(s, brks, posLast, posCur, len, - WORDBREAK_NOBREAK, get_next_char); - posLast = posCur; - break; - } -#ifndef __has_attribute -# define __has_attribute(x) 0 -#endif -#if __has_attribute(fallthrough) - __attribute__((fallthrough)); -#endif - /* Fall through */ - case WBP_Any: - /* Check for rule WB3c */ - if (wbcLast == WBP_ZWJ && ub_is_extended_pictographic(ch)) - { - set_brks_to(s, brks, posLast, posCur, len, - WORDBREAK_NOBREAK, get_next_char); - posLast = posCur; - break; - } - /* Allow breaks and reset */ set_brks_to(s, brks, posLast, posCur, len, WORDBREAK_BREAK, get_next_char); diff --git a/src/static_libs/libunibreak/wordbreak.h b/src/static_libs/libunibreak/wordbreak.h index 021de4d751..1040c13280 100644 --- a/src/static_libs/libunibreak/wordbreak.h +++ b/src/static_libs/libunibreak/wordbreak.h @@ -4,8 +4,7 @@ * Word breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2013-2019 Tom Hacohen <tom at stosb dot com> - * Copyright (C) 2018 Wu Yongwei <wuyongwei at gmail dot com> + * Copyright (C) 2013-2016 Tom Hacohen <tom at stosb dot com> * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -31,9 +30,9 @@ * Unicode 6.0.0: * <URL:http://www.unicode.org/reports/tr29/tr29-17.html> * - * This library has been updated according to Revision 35, for - * Unicode 12.0.0: - * <URL:http://www.unicode.org/reports/tr29/tr29-35.html> + * This library has been updated according to Revision 29, for + * Unicode 9.0.0: + * <URL:http://www.unicode.org/reports/tr29/tr29-29.html> * * The Unicode Terms of Use are available at * <URL:http://www.unicode.org/copyright.html> @@ -73,4 +72,4 @@ void set_wordbreaks_utf32( } #endif -#endif /* WORDBREAK_H */ +#endif diff --git a/src/static_libs/libunibreak/wordbreakdata.c b/src/static_libs/libunibreak/wordbreakdata.c index f5ee889589..99fcff5bad 100644 --- a/src/static_libs/libunibreak/wordbreakdata.c +++ b/src/static_libs/libunibreak/wordbreakdata.c @@ -1,6 +1,6 @@ /* The content of this file is generated from: -# WordBreakProperty-12.1.0.txt -# Date: 2019-03-10, 10:53:28 GMT +# WordBreakProperty-9.0.0.txt +# Date: 2016-06-01, 10:34:38 GMT */ #include "wordbreakdef.h" @@ -9,7 +9,6 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x000A, 0x000A, WBP_LF}, {0x000B, 0x000C, WBP_Newline}, {0x000D, 0x000D, WBP_CR}, - {0x0020, 0x0020, WBP_WSegSpace}, {0x0022, 0x0022, WBP_Double_Quote}, {0x0027, 0x0027, WBP_Single_Quote}, {0x002C, 0x002C, WBP_MidNum}, @@ -36,15 +35,11 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x0294, 0x0294, WBP_ALetter}, {0x0295, 0x02AF, WBP_ALetter}, {0x02B0, 0x02C1, WBP_ALetter}, - {0x02C2, 0x02C5, WBP_ALetter}, {0x02C6, 0x02D1, WBP_ALetter}, - {0x02D2, 0x02D7, WBP_ALetter}, - {0x02DE, 0x02DF, WBP_ALetter}, + {0x02D7, 0x02D7, WBP_MidLetter}, {0x02E0, 0x02E4, WBP_ALetter}, {0x02EC, 0x02EC, WBP_ALetter}, - {0x02ED, 0x02ED, WBP_ALetter}, {0x02EE, 0x02EE, WBP_ALetter}, - {0x02EF, 0x02FF, WBP_ALetter}, {0x0300, 0x036F, WBP_Extend}, {0x0370, 0x0373, WBP_ALetter}, {0x0374, 0x0374, WBP_ALetter}, @@ -65,9 +60,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x048A, 0x052F, WBP_ALetter}, {0x0531, 0x0556, WBP_ALetter}, {0x0559, 0x0559, WBP_ALetter}, - {0x055B, 0x055C, WBP_ALetter}, - {0x055E, 0x055E, WBP_ALetter}, - {0x0560, 0x0588, WBP_ALetter}, + {0x0561, 0x0587, WBP_ALetter}, {0x0589, 0x0589, WBP_MidNum}, {0x0591, 0x05BD, WBP_Extend}, {0x05BF, 0x05BF, WBP_Extend}, @@ -75,7 +68,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x05C4, 0x05C5, WBP_Extend}, {0x05C7, 0x05C7, WBP_Extend}, {0x05D0, 0x05EA, WBP_Hebrew_Letter}, - {0x05EF, 0x05F2, WBP_Hebrew_Letter}, + {0x05F0, 0x05F2, WBP_Hebrew_Letter}, {0x05F3, 0x05F3, WBP_ALetter}, {0x05F4, 0x05F4, WBP_MidLetter}, {0x0600, 0x0605, WBP_Format}, @@ -117,7 +110,6 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x07F4, 0x07F5, WBP_ALetter}, {0x07F8, 0x07F8, WBP_MidNum}, {0x07FA, 0x07FA, WBP_ALetter}, - {0x07FD, 0x07FD, WBP_Extend}, {0x0800, 0x0815, WBP_ALetter}, {0x0816, 0x0819, WBP_Extend}, {0x081A, 0x081A, WBP_ALetter}, @@ -128,10 +120,9 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x0829, 0x082D, WBP_Extend}, {0x0840, 0x0858, WBP_ALetter}, {0x0859, 0x085B, WBP_Extend}, - {0x0860, 0x086A, WBP_ALetter}, {0x08A0, 0x08B4, WBP_ALetter}, {0x08B6, 0x08BD, WBP_ALetter}, - {0x08D3, 0x08E1, WBP_Extend}, + {0x08D4, 0x08E1, WBP_Extend}, {0x08E2, 0x08E2, WBP_Format}, {0x08E3, 0x0902, WBP_Extend}, {0x0903, 0x0903, WBP_Extend}, @@ -174,8 +165,6 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x09E2, 0x09E3, WBP_Extend}, {0x09E6, 0x09EF, WBP_Numeric}, {0x09F0, 0x09F1, WBP_ALetter}, - {0x09FC, 0x09FC, WBP_ALetter}, - {0x09FE, 0x09FE, WBP_Extend}, {0x0A01, 0x0A02, WBP_Extend}, {0x0A03, 0x0A03, WBP_Extend}, {0x0A05, 0x0A0A, WBP_ALetter}, @@ -218,7 +207,6 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x0AE2, 0x0AE3, WBP_Extend}, {0x0AE6, 0x0AEF, WBP_Numeric}, {0x0AF9, 0x0AF9, WBP_ALetter}, - {0x0AFA, 0x0AFF, WBP_Extend}, {0x0B01, 0x0B01, WBP_Extend}, {0x0B02, 0x0B03, WBP_Extend}, {0x0B05, 0x0B0C, WBP_ALetter}, @@ -265,7 +253,6 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x0BE6, 0x0BEF, WBP_Numeric}, {0x0C00, 0x0C00, WBP_Extend}, {0x0C01, 0x0C03, WBP_Extend}, - {0x0C04, 0x0C04, WBP_Extend}, {0x0C05, 0x0C0C, WBP_ALetter}, {0x0C0E, 0x0C10, WBP_ALetter}, {0x0C12, 0x0C28, WBP_ALetter}, @@ -303,12 +290,11 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x0CE2, 0x0CE3, WBP_Extend}, {0x0CE6, 0x0CEF, WBP_Numeric}, {0x0CF1, 0x0CF2, WBP_ALetter}, - {0x0D00, 0x0D01, WBP_Extend}, + {0x0D01, 0x0D01, WBP_Extend}, {0x0D02, 0x0D03, WBP_Extend}, {0x0D05, 0x0D0C, WBP_ALetter}, {0x0D0E, 0x0D10, WBP_ALetter}, {0x0D12, 0x0D3A, WBP_ALetter}, - {0x0D3B, 0x0D3C, WBP_Extend}, {0x0D3D, 0x0D3D, WBP_ALetter}, {0x0D3E, 0x0D40, WBP_Extend}, {0x0D41, 0x0D44, WBP_Extend}, @@ -340,7 +326,8 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x0E47, 0x0E4E, WBP_Extend}, {0x0E50, 0x0E59, WBP_Numeric}, {0x0EB1, 0x0EB1, WBP_Extend}, - {0x0EB4, 0x0EBC, WBP_Extend}, + {0x0EB4, 0x0EB9, WBP_Extend}, + {0x0EBB, 0x0EBC, WBP_Extend}, {0x0EC8, 0x0ECD, WBP_Extend}, {0x0ED0, 0x0ED9, WBP_Numeric}, {0x0F00, 0x0F00, WBP_ALetter}, @@ -389,8 +376,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x10CD, 0x10CD, WBP_ALetter}, {0x10D0, 0x10FA, WBP_ALetter}, {0x10FC, 0x10FC, WBP_ALetter}, - {0x10FD, 0x10FF, WBP_ALetter}, - {0x1100, 0x1248, WBP_ALetter}, + {0x10FD, 0x1248, WBP_ALetter}, {0x124A, 0x124D, WBP_ALetter}, {0x1250, 0x1256, WBP_ALetter}, {0x1258, 0x1258, WBP_ALetter}, @@ -412,7 +398,6 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x13F8, 0x13FD, WBP_ALetter}, {0x1401, 0x166C, WBP_ALetter}, {0x166F, 0x167F, WBP_ALetter}, - {0x1680, 0x1680, WBP_WSegSpace}, {0x1681, 0x169A, WBP_ALetter}, {0x16A0, 0x16EA, WBP_ALetter}, {0x16EE, 0x16F0, WBP_ALetter}, @@ -441,7 +426,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1810, 0x1819, WBP_Numeric}, {0x1820, 0x1842, WBP_ALetter}, {0x1843, 0x1843, WBP_ALetter}, - {0x1844, 0x1878, WBP_ALetter}, + {0x1844, 0x1877, WBP_ALetter}, {0x1880, 0x1884, WBP_ALetter}, {0x1885, 0x1886, WBP_Extend}, {0x1887, 0x18A8, WBP_ALetter}, @@ -524,27 +509,24 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1C5A, 0x1C77, WBP_ALetter}, {0x1C78, 0x1C7D, WBP_ALetter}, {0x1C80, 0x1C88, WBP_ALetter}, - {0x1C90, 0x1CBA, WBP_ALetter}, - {0x1CBD, 0x1CBF, WBP_ALetter}, {0x1CD0, 0x1CD2, WBP_Extend}, {0x1CD4, 0x1CE0, WBP_Extend}, {0x1CE1, 0x1CE1, WBP_Extend}, {0x1CE2, 0x1CE8, WBP_Extend}, {0x1CE9, 0x1CEC, WBP_ALetter}, {0x1CED, 0x1CED, WBP_Extend}, - {0x1CEE, 0x1CF3, WBP_ALetter}, + {0x1CEE, 0x1CF1, WBP_ALetter}, + {0x1CF2, 0x1CF3, WBP_Extend}, {0x1CF4, 0x1CF4, WBP_Extend}, {0x1CF5, 0x1CF6, WBP_ALetter}, - {0x1CF7, 0x1CF7, WBP_Extend}, {0x1CF8, 0x1CF9, WBP_Extend}, - {0x1CFA, 0x1CFA, WBP_ALetter}, {0x1D00, 0x1D2B, WBP_ALetter}, {0x1D2C, 0x1D6A, WBP_ALetter}, {0x1D6B, 0x1D77, WBP_ALetter}, {0x1D78, 0x1D78, WBP_ALetter}, {0x1D79, 0x1D9A, WBP_ALetter}, {0x1D9B, 0x1DBF, WBP_ALetter}, - {0x1DC0, 0x1DF9, WBP_Extend}, + {0x1DC0, 0x1DF5, WBP_Extend}, {0x1DFB, 0x1DFF, WBP_Extend}, {0x1E00, 0x1F15, WBP_ALetter}, {0x1F18, 0x1F1D, WBP_ALetter}, @@ -565,8 +547,6 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1FE0, 0x1FEC, WBP_ALetter}, {0x1FF2, 0x1FF4, WBP_ALetter}, {0x1FF6, 0x1FFC, WBP_ALetter}, - {0x2000, 0x2006, WBP_WSegSpace}, - {0x2008, 0x200A, WBP_WSegSpace}, {0x200C, 0x200C, WBP_Extend}, {0x200D, 0x200D, WBP_ZWJ}, {0x200E, 0x200F, WBP_Format}, @@ -581,7 +561,6 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x203F, 0x2040, WBP_ExtendNumLet}, {0x2044, 0x2044, WBP_MidNum}, {0x2054, 0x2054, WBP_ExtendNumLet}, - {0x205F, 0x205F, WBP_WSegSpace}, {0x2060, 0x2064, WBP_Format}, {0x2066, 0x206F, WBP_Format}, {0x2071, 0x2071, WBP_ALetter}, @@ -611,6 +590,10 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x2183, 0x2184, WBP_ALetter}, {0x2185, 0x2188, WBP_ALetter}, {0x24B6, 0x24E9, WBP_ALetter}, + {0x261D, 0x261D, WBP_E_Base}, + {0x26F9, 0x26F9, WBP_E_Base}, + {0x270A, 0x270D, WBP_E_Base}, + {0x2764, 0x2764, WBP_Glue_After_Zwj}, {0x2C00, 0x2C2E, WBP_ALetter}, {0x2C30, 0x2C5E, WBP_ALetter}, {0x2C60, 0x2C7B, WBP_ALetter}, @@ -636,7 +619,6 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x2DD8, 0x2DDE, WBP_ALetter}, {0x2DE0, 0x2DFF, WBP_Extend}, {0x2E2F, 0x2E2F, WBP_ALetter}, - {0x3000, 0x3000, WBP_WSegSpace}, {0x3005, 0x3005, WBP_ALetter}, {0x302A, 0x302D, WBP_Extend}, {0x302E, 0x302F, WBP_Extend}, @@ -649,7 +631,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x30A1, 0x30FA, WBP_Katakana}, {0x30FC, 0x30FE, WBP_Katakana}, {0x30FF, 0x30FF, WBP_Katakana}, - {0x3105, 0x312F, WBP_ALetter}, + {0x3105, 0x312D, WBP_ALetter}, {0x3131, 0x318E, WBP_ALetter}, {0x31A0, 0x31BA, WBP_ALetter}, {0x31F0, 0x31FF, WBP_Katakana}, @@ -678,16 +660,14 @@ static const struct WordBreakProperties wb_prop_default[] = { {0xA6E6, 0xA6EF, WBP_ALetter}, {0xA6F0, 0xA6F1, WBP_Extend}, {0xA717, 0xA71F, WBP_ALetter}, - {0xA720, 0xA721, WBP_ALetter}, {0xA722, 0xA76F, WBP_ALetter}, {0xA770, 0xA770, WBP_ALetter}, {0xA771, 0xA787, WBP_ALetter}, {0xA788, 0xA788, WBP_ALetter}, - {0xA789, 0xA78A, WBP_ALetter}, {0xA78B, 0xA78E, WBP_ALetter}, {0xA78F, 0xA78F, WBP_ALetter}, - {0xA790, 0xA7BF, WBP_ALetter}, - {0xA7C2, 0xA7C6, WBP_ALetter}, + {0xA790, 0xA7AE, WBP_ALetter}, + {0xA7B0, 0xA7B7, WBP_ALetter}, {0xA7F7, 0xA7F7, WBP_ALetter}, {0xA7F8, 0xA7F9, WBP_ALetter}, {0xA7FA, 0xA7FA, WBP_ALetter}, @@ -710,8 +690,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0xA8E0, 0xA8F1, WBP_Extend}, {0xA8F2, 0xA8F7, WBP_ALetter}, {0xA8FB, 0xA8FB, WBP_ALetter}, - {0xA8FD, 0xA8FE, WBP_ALetter}, - {0xA8FF, 0xA8FF, WBP_Extend}, + {0xA8FD, 0xA8FD, WBP_ALetter}, {0xA900, 0xA909, WBP_Numeric}, {0xA90A, 0xA925, WBP_ALetter}, {0xA926, 0xA92D, WBP_Extend}, @@ -726,8 +705,8 @@ static const struct WordBreakProperties wb_prop_default[] = { {0xA9B4, 0xA9B5, WBP_Extend}, {0xA9B6, 0xA9B9, WBP_Extend}, {0xA9BA, 0xA9BB, WBP_Extend}, - {0xA9BC, 0xA9BD, WBP_Extend}, - {0xA9BE, 0xA9C0, WBP_Extend}, + {0xA9BC, 0xA9BC, WBP_Extend}, + {0xA9BD, 0xA9C0, WBP_Extend}, {0xA9CF, 0xA9CF, WBP_ALetter}, {0xA9D0, 0xA9D9, WBP_Numeric}, {0xA9E5, 0xA9E5, WBP_Extend}, @@ -766,9 +745,8 @@ static const struct WordBreakProperties wb_prop_default[] = { {0xAB20, 0xAB26, WBP_ALetter}, {0xAB28, 0xAB2E, WBP_ALetter}, {0xAB30, 0xAB5A, WBP_ALetter}, - {0xAB5B, 0xAB5B, WBP_ALetter}, {0xAB5C, 0xAB5F, WBP_ALetter}, - {0xAB60, 0xAB67, WBP_ALetter}, + {0xAB60, 0xAB65, WBP_ALetter}, {0xAB70, 0xABBF, WBP_ALetter}, {0xABC0, 0xABE2, WBP_ALetter}, {0xABE3, 0xABE4, WBP_Extend}, @@ -815,7 +793,6 @@ static const struct WordBreakProperties wb_prop_default[] = { {0xFF07, 0xFF07, WBP_MidNumLet}, {0xFF0C, 0xFF0C, WBP_MidNum}, {0xFF0E, 0xFF0E, WBP_MidNumLet}, - {0xFF10, 0xFF19, WBP_Numeric}, {0xFF1A, 0xFF1A, WBP_MidLetter}, {0xFF1B, 0xFF1B, WBP_MidNum}, {0xFF21, 0xFF3A, WBP_ALetter}, @@ -844,7 +821,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x102A0, 0x102D0, WBP_ALetter}, {0x102E0, 0x102E0, WBP_Extend}, {0x10300, 0x1031F, WBP_ALetter}, - {0x1032D, 0x10340, WBP_ALetter}, + {0x10330, 0x10340, WBP_ALetter}, {0x10341, 0x10341, WBP_ALetter}, {0x10342, 0x10349, WBP_ALetter}, {0x1034A, 0x1034A, WBP_ALetter}, @@ -884,7 +861,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x10A0C, 0x10A0F, WBP_Extend}, {0x10A10, 0x10A13, WBP_ALetter}, {0x10A15, 0x10A17, WBP_ALetter}, - {0x10A19, 0x10A35, WBP_ALetter}, + {0x10A19, 0x10A33, WBP_ALetter}, {0x10A38, 0x10A3A, WBP_Extend}, {0x10A3F, 0x10A3F, WBP_Extend}, {0x10A60, 0x10A7C, WBP_ALetter}, @@ -899,14 +876,6 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x10C00, 0x10C48, WBP_ALetter}, {0x10C80, 0x10CB2, WBP_ALetter}, {0x10CC0, 0x10CF2, WBP_ALetter}, - {0x10D00, 0x10D23, WBP_ALetter}, - {0x10D24, 0x10D27, WBP_Extend}, - {0x10D30, 0x10D39, WBP_Numeric}, - {0x10F00, 0x10F1C, WBP_ALetter}, - {0x10F27, 0x10F27, WBP_ALetter}, - {0x10F30, 0x10F45, WBP_ALetter}, - {0x10F46, 0x10F50, WBP_Extend}, - {0x10FE0, 0x10FF6, WBP_ALetter}, {0x11000, 0x11000, WBP_Extend}, {0x11001, 0x11001, WBP_Extend}, {0x11002, 0x11002, WBP_Extend}, @@ -921,7 +890,6 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x110B7, 0x110B8, WBP_Extend}, {0x110B9, 0x110BA, WBP_Extend}, {0x110BD, 0x110BD, WBP_Format}, - {0x110CD, 0x110CD, WBP_Format}, {0x110D0, 0x110E8, WBP_ALetter}, {0x110F0, 0x110F9, WBP_Numeric}, {0x11100, 0x11102, WBP_Extend}, @@ -930,8 +898,6 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1112C, 0x1112C, WBP_Extend}, {0x1112D, 0x11134, WBP_Extend}, {0x11136, 0x1113F, WBP_Numeric}, - {0x11144, 0x11144, WBP_ALetter}, - {0x11145, 0x11146, WBP_Extend}, {0x11150, 0x11172, WBP_ALetter}, {0x11173, 0x11173, WBP_Extend}, {0x11176, 0x11176, WBP_ALetter}, @@ -942,7 +908,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x111B6, 0x111BE, WBP_Extend}, {0x111BF, 0x111C0, WBP_Extend}, {0x111C1, 0x111C4, WBP_ALetter}, - {0x111C9, 0x111CC, WBP_Extend}, + {0x111CA, 0x111CC, WBP_Extend}, {0x111D0, 0x111D9, WBP_Numeric}, {0x111DA, 0x111DA, WBP_ALetter}, {0x111DC, 0x111DC, WBP_ALetter}, @@ -973,7 +939,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1132A, 0x11330, WBP_ALetter}, {0x11332, 0x11333, WBP_ALetter}, {0x11335, 0x11339, WBP_ALetter}, - {0x1133B, 0x1133C, WBP_Extend}, + {0x1133C, 0x1133C, WBP_Extend}, {0x1133D, 0x1133D, WBP_ALetter}, {0x1133E, 0x1133F, WBP_Extend}, {0x11340, 0x11340, WBP_Extend}, @@ -995,8 +961,6 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x11446, 0x11446, WBP_Extend}, {0x11447, 0x1144A, WBP_ALetter}, {0x11450, 0x11459, WBP_Numeric}, - {0x1145E, 0x1145E, WBP_Extend}, - {0x1145F, 0x1145F, WBP_ALetter}, {0x11480, 0x114AF, WBP_ALetter}, {0x114B0, 0x114B2, WBP_Extend}, {0x114B3, 0x114B8, WBP_Extend}, @@ -1035,7 +999,6 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x116B0, 0x116B5, WBP_Extend}, {0x116B6, 0x116B6, WBP_Extend}, {0x116B7, 0x116B7, WBP_Extend}, - {0x116B8, 0x116B8, WBP_ALetter}, {0x116C0, 0x116C9, WBP_Numeric}, {0x1171D, 0x1171F, WBP_Extend}, {0x11720, 0x11721, WBP_Extend}, @@ -1043,41 +1006,9 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x11726, 0x11726, WBP_Extend}, {0x11727, 0x1172B, WBP_Extend}, {0x11730, 0x11739, WBP_Numeric}, - {0x11800, 0x1182B, WBP_ALetter}, - {0x1182C, 0x1182E, WBP_Extend}, - {0x1182F, 0x11837, WBP_Extend}, - {0x11838, 0x11838, WBP_Extend}, - {0x11839, 0x1183A, WBP_Extend}, {0x118A0, 0x118DF, WBP_ALetter}, {0x118E0, 0x118E9, WBP_Numeric}, {0x118FF, 0x118FF, WBP_ALetter}, - {0x119A0, 0x119A7, WBP_ALetter}, - {0x119AA, 0x119D0, WBP_ALetter}, - {0x119D1, 0x119D3, WBP_Extend}, - {0x119D4, 0x119D7, WBP_Extend}, - {0x119DA, 0x119DB, WBP_Extend}, - {0x119DC, 0x119DF, WBP_Extend}, - {0x119E0, 0x119E0, WBP_Extend}, - {0x119E1, 0x119E1, WBP_ALetter}, - {0x119E3, 0x119E3, WBP_ALetter}, - {0x119E4, 0x119E4, WBP_Extend}, - {0x11A00, 0x11A00, WBP_ALetter}, - {0x11A01, 0x11A0A, WBP_Extend}, - {0x11A0B, 0x11A32, WBP_ALetter}, - {0x11A33, 0x11A38, WBP_Extend}, - {0x11A39, 0x11A39, WBP_Extend}, - {0x11A3A, 0x11A3A, WBP_ALetter}, - {0x11A3B, 0x11A3E, WBP_Extend}, - {0x11A47, 0x11A47, WBP_Extend}, - {0x11A50, 0x11A50, WBP_ALetter}, - {0x11A51, 0x11A56, WBP_Extend}, - {0x11A57, 0x11A58, WBP_Extend}, - {0x11A59, 0x11A5B, WBP_Extend}, - {0x11A5C, 0x11A89, WBP_ALetter}, - {0x11A8A, 0x11A96, WBP_Extend}, - {0x11A97, 0x11A97, WBP_Extend}, - {0x11A98, 0x11A99, WBP_Extend}, - {0x11A9D, 0x11A9D, WBP_ALetter}, {0x11AC0, 0x11AF8, WBP_ALetter}, {0x11C00, 0x11C08, WBP_ALetter}, {0x11C0A, 0x11C2E, WBP_ALetter}, @@ -1096,35 +1027,10 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x11CB2, 0x11CB3, WBP_Extend}, {0x11CB4, 0x11CB4, WBP_Extend}, {0x11CB5, 0x11CB6, WBP_Extend}, - {0x11D00, 0x11D06, WBP_ALetter}, - {0x11D08, 0x11D09, WBP_ALetter}, - {0x11D0B, 0x11D30, WBP_ALetter}, - {0x11D31, 0x11D36, WBP_Extend}, - {0x11D3A, 0x11D3A, WBP_Extend}, - {0x11D3C, 0x11D3D, WBP_Extend}, - {0x11D3F, 0x11D45, WBP_Extend}, - {0x11D46, 0x11D46, WBP_ALetter}, - {0x11D47, 0x11D47, WBP_Extend}, - {0x11D50, 0x11D59, WBP_Numeric}, - {0x11D60, 0x11D65, WBP_ALetter}, - {0x11D67, 0x11D68, WBP_ALetter}, - {0x11D6A, 0x11D89, WBP_ALetter}, - {0x11D8A, 0x11D8E, WBP_Extend}, - {0x11D90, 0x11D91, WBP_Extend}, - {0x11D93, 0x11D94, WBP_Extend}, - {0x11D95, 0x11D95, WBP_Extend}, - {0x11D96, 0x11D96, WBP_Extend}, - {0x11D97, 0x11D97, WBP_Extend}, - {0x11D98, 0x11D98, WBP_ALetter}, - {0x11DA0, 0x11DA9, WBP_Numeric}, - {0x11EE0, 0x11EF2, WBP_ALetter}, - {0x11EF3, 0x11EF4, WBP_Extend}, - {0x11EF5, 0x11EF6, WBP_Extend}, {0x12000, 0x12399, WBP_ALetter}, {0x12400, 0x1246E, WBP_ALetter}, {0x12480, 0x12543, WBP_ALetter}, {0x13000, 0x1342E, WBP_ALetter}, - {0x13430, 0x13438, WBP_Format}, {0x14400, 0x14646, WBP_ALetter}, {0x16800, 0x16A38, WBP_ALetter}, {0x16A40, 0x16A5E, WBP_ALetter}, @@ -1137,17 +1043,13 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x16B50, 0x16B59, WBP_Numeric}, {0x16B63, 0x16B77, WBP_ALetter}, {0x16B7D, 0x16B8F, WBP_ALetter}, - {0x16E40, 0x16E7F, WBP_ALetter}, - {0x16F00, 0x16F4A, WBP_ALetter}, - {0x16F4F, 0x16F4F, WBP_Extend}, + {0x16F00, 0x16F44, WBP_ALetter}, {0x16F50, 0x16F50, WBP_ALetter}, - {0x16F51, 0x16F87, WBP_Extend}, + {0x16F51, 0x16F7E, WBP_Extend}, {0x16F8F, 0x16F92, WBP_Extend}, {0x16F93, 0x16F9F, WBP_ALetter}, - {0x16FE0, 0x16FE1, WBP_ALetter}, - {0x16FE3, 0x16FE3, WBP_ALetter}, + {0x16FE0, 0x16FE0, WBP_ALetter}, {0x1B000, 0x1B000, WBP_Katakana}, - {0x1B164, 0x1B167, WBP_Katakana}, {0x1BC00, 0x1BC6A, WBP_ALetter}, {0x1BC70, 0x1BC7C, WBP_ALetter}, {0x1BC80, 0x1BC88, WBP_ALetter}, @@ -1204,19 +1106,10 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1E01B, 0x1E021, WBP_Extend}, {0x1E023, 0x1E024, WBP_Extend}, {0x1E026, 0x1E02A, WBP_Extend}, - {0x1E100, 0x1E12C, WBP_ALetter}, - {0x1E130, 0x1E136, WBP_Extend}, - {0x1E137, 0x1E13D, WBP_ALetter}, - {0x1E140, 0x1E149, WBP_Numeric}, - {0x1E14E, 0x1E14E, WBP_ALetter}, - {0x1E2C0, 0x1E2EB, WBP_ALetter}, - {0x1E2EC, 0x1E2EF, WBP_Extend}, - {0x1E2F0, 0x1E2F9, WBP_Numeric}, {0x1E800, 0x1E8C4, WBP_ALetter}, {0x1E8D0, 0x1E8D6, WBP_Extend}, {0x1E900, 0x1E943, WBP_ALetter}, {0x1E944, 0x1E94A, WBP_Extend}, - {0x1E94B, 0x1E94B, WBP_ALetter}, {0x1E950, 0x1E959, WBP_Numeric}, {0x1EE00, 0x1EE03, WBP_ALetter}, {0x1EE05, 0x1EE1F, WBP_ALetter}, @@ -1255,7 +1148,35 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1F150, 0x1F169, WBP_ALetter}, {0x1F170, 0x1F189, WBP_ALetter}, {0x1F1E6, 0x1F1FF, WBP_Regional_Indicator}, - {0x1F3FB, 0x1F3FF, WBP_Extend}, + {0x1F385, 0x1F385, WBP_E_Base}, + {0x1F3C3, 0x1F3C4, WBP_E_Base}, + {0x1F3CA, 0x1F3CB, WBP_E_Base}, + {0x1F3FB, 0x1F3FF, WBP_E_Modifier}, + {0x1F442, 0x1F443, WBP_E_Base}, + {0x1F446, 0x1F450, WBP_E_Base}, + {0x1F466, 0x1F469, WBP_E_Base_GAZ}, + {0x1F46E, 0x1F46E, WBP_E_Base}, + {0x1F470, 0x1F478, WBP_E_Base}, + {0x1F47C, 0x1F47C, WBP_E_Base}, + {0x1F481, 0x1F483, WBP_E_Base}, + {0x1F485, 0x1F487, WBP_E_Base}, + {0x1F48B, 0x1F48B, WBP_Glue_After_Zwj}, + {0x1F4AA, 0x1F4AA, WBP_E_Base}, + {0x1F575, 0x1F575, WBP_E_Base}, + {0x1F57A, 0x1F57A, WBP_E_Base}, + {0x1F590, 0x1F590, WBP_E_Base}, + {0x1F595, 0x1F596, WBP_E_Base}, + {0x1F5E8, 0x1F5E8, WBP_Glue_After_Zwj}, + {0x1F645, 0x1F647, WBP_E_Base}, + {0x1F64B, 0x1F64F, WBP_E_Base}, + {0x1F6A3, 0x1F6A3, WBP_E_Base}, + {0x1F6B4, 0x1F6B6, WBP_E_Base}, + {0x1F6C0, 0x1F6C0, WBP_E_Base}, + {0x1F918, 0x1F91E, WBP_E_Base}, + {0x1F926, 0x1F926, WBP_E_Base}, + {0x1F930, 0x1F930, WBP_E_Base}, + {0x1F933, 0x1F939, WBP_E_Base}, + {0x1F93C, 0x1F93E, WBP_E_Base}, {0xE0001, 0xE0001, WBP_Format}, {0xE0020, 0xE007F, WBP_Extend}, {0xE0100, 0xE01EF, WBP_Extend}, diff --git a/src/static_libs/libunibreak/wordbreakdef.h b/src/static_libs/libunibreak/wordbreakdef.h index 03feb3cdac..82cd98e7c3 100644 --- a/src/static_libs/libunibreak/wordbreakdef.h +++ b/src/static_libs/libunibreak/wordbreakdef.h @@ -4,8 +4,7 @@ * Word breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2013-2019 Tom Hacohen <tom at stosb dot com> - * Copyright (C) 2018 Wu Yongwei <wuyongwei at gmail dot com> + * Copyright (C) 2013-16 Tom Hacohen <tom at stosb dot com> * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -31,9 +30,9 @@ * Unicode 6.0.0: * <URL:http://www.unicode.org/reports/tr29/tr29-17.html> * - * This library has been updated according to Revision 35, for - * Unicode 12.0.0: - * <URL:http://www.unicode.org/reports/tr29/tr29-35.html> + * This library has been updated according to Revision 29, for + * Unicode 9.0.0: + * <URL:http://www.unicode.org/reports/tr29/tr29-29.html> * * The Unicode Terms of Use are available at * <URL:http://www.unicode.org/copyright.html> @@ -52,7 +51,7 @@ /** * Word break classes. This is a direct mapping of Table 3 of Unicode - * Standard Annex 29, Revision 35. + * Standard Annex 29, Revision 23. */ enum WordBreakClass { @@ -74,7 +73,10 @@ enum WordBreakClass WBP_MidNum, WBP_Numeric, WBP_ExtendNumLet, - WBP_WSegSpace, + WBP_E_Base, + WBP_E_Modifier, + WBP_Glue_After_Zwj, + WBP_E_Base_GAZ, WBP_Any }; @@ -84,7 +86,7 @@ enum WordBreakClass */ struct WordBreakProperties { - utf32_t start; /**< Start codepoint */ - utf32_t end; /**< End codepoint, inclusive */ + utf32_t start; /**< Starting coding point */ + utf32_t end; /**< End coding point */ enum WordBreakClass prop; /**< The word breaking property */ }; diff --git a/src/tests/evas/evas_test_textblock.c b/src/tests/evas/evas_test_textblock.c index 3f1cbec8d8..a24d16d73c 100644 --- a/src/tests/evas/evas_test_textblock.c +++ b/src/tests/evas/evas_test_textblock.c @@ -1054,10 +1054,6 @@ EFL_START_TEST(evas_textblock_cursor) pos = evas_textblock_cursor_pos_get(cur); ck_assert_int_eq(pos, 0); - evas_object_textblock_text_markup_set(tb, "🏳️‍🌈"); - evas_textblock_cursor_pos_set(cur, 0); - evas_textblock_cursor_cluster_next(cur); - ck_assert_int_eq(4, evas_textblock_cursor_pos_get(cur)); END_TB_TEST(); } |