summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWooHyun Jung <wh0705.jung@samsung.com>2020-10-08 12:32:53 +0900
committerWooHyun Jung <wh0705.jung@samsung.com>2020-10-08 12:32:53 +0900
commit509e3fcc7a65918f6bcf8a664353b45af457cfa6 (patch)
tree7deb45be2683af33c78c3dd64391e565a76ed3f6
parent0a08a860a87fc99e712be1ec72d91a5536c856c2 (diff)
downloadefl-509e3fcc7a65918f6bcf8a664353b45af457cfa6.tar.gz
Revert "Revert "evas_textblock: rainbow flag emoji treated as two clusters(update unibreak to version 4.2)""
This reverts commit 173b3a108e1b2093ac37650619a61568aaed4e04. This was reverted because of freezing codes for release. Now, release work was over. So, I think it's ok to restore this.
-rw-r--r--src/lib/evas/canvas/evas_object_textblock.c4
-rw-r--r--src/static_libs/libunibreak/LICENCE8
-rw-r--r--src/static_libs/libunibreak/NEWS14
-rw-r--r--src/static_libs/libunibreak/README.md16
-rw-r--r--src/static_libs/libunibreak/emojidata.c264
-rw-r--r--src/static_libs/libunibreak/emojidef.c61
-rw-r--r--src/static_libs/libunibreak/emojidef.h46
-rw-r--r--src/static_libs/libunibreak/graphemebreak.c88
-rw-r--r--src/static_libs/libunibreak/graphemebreak.h8
-rw-r--r--src/static_libs/libunibreak/graphemebreakdata.c114
-rw-r--r--src/static_libs/libunibreak/graphemebreakdef.h18
-rw-r--r--src/static_libs/libunibreak/linebreak.c66
-rw-r--r--src/static_libs/libunibreak/linebreak.h8
-rw-r--r--src/static_libs/libunibreak/linebreakdata.c225
-rw-r--r--src/static_libs/libunibreak/linebreakdef.c17
-rw-r--r--src/static_libs/libunibreak/linebreakdef.h25
-rw-r--r--src/static_libs/libunibreak/meson.build2
-rw-r--r--src/static_libs/libunibreak/unibreakbase.c2
-rw-r--r--src/static_libs/libunibreak/unibreakbase.h4
-rw-r--r--src/static_libs/libunibreak/unibreakdef.h12
-rw-r--r--src/static_libs/libunibreak/wordbreak.c92
-rw-r--r--src/static_libs/libunibreak/wordbreak.h11
-rw-r--r--src/static_libs/libunibreak/wordbreakdata.c201
-rw-r--r--src/static_libs/libunibreak/wordbreakdef.h20
-rw-r--r--src/tests/evas/evas_test_textblock.c4
25 files changed, 986 insertions, 344 deletions
diff --git a/src/lib/evas/canvas/evas_object_textblock.c b/src/lib/evas/canvas/evas_object_textblock.c
index 501e12ba78..77f3d3d07a 100644
--- a/src/lib/evas/canvas/evas_object_textblock.c
+++ b/src/lib/evas/canvas/evas_object_textblock.c
@@ -10249,7 +10249,7 @@ evas_textblock_cursor_word_start(Efl_Text_Cursor_Handle *cur)
if ((cur->pos > 0) && (cur->pos == len))
cur->pos--;
- for (i = cur->pos ; _is_white(text[i]) && BREAK_AFTER(i) ; i--)
+ for (i = cur->pos ; _is_white(text[i]) ; i--)
{
if (i == 0)
{
@@ -10316,7 +10316,7 @@ evas_textblock_cursor_word_end(Efl_Text_Cursor_Handle *cur)
set_wordbreaks_utf32((const utf32_t *) text, len, lang, breaks);
}
- for (i = cur->pos; text[i] && _is_white(text[i]) && (BREAK_AFTER(i)) ; i++);
+ for (i = cur->pos; text[i] && _is_white(text[i]) ; i++);
if (i == len)
{
Evas_Object_Textblock_Node_Text *nnode;
diff --git a/src/static_libs/libunibreak/LICENCE b/src/static_libs/libunibreak/LICENCE
index 3fba16ad53..6b4137ca21 100644
--- a/src/static_libs/libunibreak/LICENCE
+++ b/src/static_libs/libunibreak/LICENCE
@@ -1,7 +1,7 @@
-Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com>
-Copyright (C) 2012-2016 Tom Hacohen <tom at stosb dot com>
-Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
-Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net>
+Copyright (C) Wu Yongwei <wuyongwei at gmail dot com>
+Copyright (C) Tom Hacohen <tom at stosb dot com>
+Copyright (C) Petr Filipsky <philodej at gmail dot com>
+Copyright (C) Andreas Röver <roever at users dot sf dot net>
This software is provided 'as-is', without any express or implied
warranty. In no event will the author be held liable for any damages
diff --git a/src/static_libs/libunibreak/NEWS b/src/static_libs/libunibreak/NEWS
index d217628da8..a2b9e0302e 100644
--- a/src/static_libs/libunibreak/NEWS
+++ b/src/static_libs/libunibreak/NEWS
@@ -1,3 +1,14 @@
+New in libunibreak 4.2
+
+- Update the data to conform to Unicode 12
+
+New in libunibreak 4.1
+
+- Update the code and data to conform to Unicode 11.0.0, especially
+ adding support for extended pictographs in word and grapheme breaking
+- ZWJ support has been much improved (it was broken)
+- Make minor tweaks to the project files
+
New in libunibreak 4.0
- Update the code and data to conform to Unicode 9.0.0
@@ -22,7 +33,8 @@ New in libunibreak 1.1
New in libunibreak 1.0
- Add word breaking support
-- Change the library name to "libunibreak", while keeping maximum compatibility
+- Change the library name to "libunibreak", while keeping maximum
+ compatibility
- Add pkg-config support
New in liblinebreak 2.1
diff --git a/src/static_libs/libunibreak/README.md b/src/static_libs/libunibreak/README.md
index f37fd902aa..4e65059586 100644
--- a/src/static_libs/libunibreak/README.md
+++ b/src/static_libs/libunibreak/README.md
@@ -6,11 +6,11 @@ Overview
This is the README file for libunibreak, an implementation of the line
breaking and word breaking algorithms as described in [Unicode Standard
-Annex 14] [1] and [Unicode Standard Annex 29] [2]. Check the project's
-[home page] [3] for up-to-date information.
+Annex 14][1] and [Unicode Standard Annex 29][2]. Check the project's
+[home page][3] for up-to-date information.
- [1]: http://www.unicode.org/reports/tr14/tr14-37.html
- [2]: http://www.unicode.org/reports/tr29/tr29-29.html
+ [1]: http://www.unicode.org/reports/tr14/
+ [2]: http://www.unicode.org/reports/tr29/
[3]: https://github.com/adah1972/libunibreak
@@ -21,7 +21,7 @@ This library is released under an open-source licence, the zlib/libpng
licence. Please check the file *LICENCE* for details.
Apart from using the algorithm, part of the code is derived from the
-[Unicode Public Data] [4], and the [Unicode Terms of Use] [5] may apply.
+[Unicode Public Data][4], and the [Unicode Terms of Use][5] may apply.
[4]: http://www.unicode.org/Public/
[5]: http://www.unicode.org/copyright.html
@@ -48,6 +48,8 @@ There are three ways to build the library:
*WordBreakProperty.txt*.
- type `make graphemebreakdata` to regenerate *graphemebreakdata.c*
from *GraphemeBreakProperty.txt*.
+ - type `make emojidata` to regenerate *emojidata.c* from
+ *emoji-data.txt*.
2. On systems where GCC and Binutils are supported, one can type
@@ -65,6 +67,8 @@ There are three ways to build the library:
*WordBreakProperty.txt*.
- type `make graphemebreakdata` to regenerate *graphemebreakdata.c*
from *GraphemeBreakProperty.txt*.
+ - type `make emojidata` to regenerate *emojidata.c* from
+ *emoji-data.txt*.
3. On Windows, apart from using method 1 (Cygwin/MSYS) and method 2
(MinGW), MSVC can also be used. Type
@@ -72,7 +76,7 @@ There are three ways to build the library:
cd src
nmake -f Makefile.msvc
- to build the static library. By default the debug release is built.
+ to build the static library. By default the debug version is built.
To build the release version
nmake -f Makefile.msvc CFG="libunibreak - Win32 Release"
diff --git a/src/static_libs/libunibreak/emojidata.c b/src/static_libs/libunibreak/emojidata.c
new file mode 100644
index 0000000000..a78f2678e7
--- /dev/null
+++ b/src/static_libs/libunibreak/emojidata.c
@@ -0,0 +1,264 @@
+/* The content of this file is generated from:
+# emoji-data.txt
+# Date: 2019-01-15, 12:10:05 GMT
+*/
+
+static const struct ExtendedPictograpic ep_prop[] = {
+ {0x00A9, 0x00A9},
+ {0x00AE, 0x00AE},
+ {0x203C, 0x203C},
+ {0x2049, 0x2049},
+ {0x2122, 0x2122},
+ {0x2139, 0x2139},
+ {0x2194, 0x2199},
+ {0x21A9, 0x21AA},
+ {0x231A, 0x231B},
+ {0x2328, 0x2328},
+ {0x2388, 0x2388},
+ {0x23CF, 0x23CF},
+ {0x23E9, 0x23F3},
+ {0x23F8, 0x23FA},
+ {0x24C2, 0x24C2},
+ {0x25AA, 0x25AB},
+ {0x25B6, 0x25B6},
+ {0x25C0, 0x25C0},
+ {0x25FB, 0x25FE},
+ {0x2600, 0x2605},
+ {0x2607, 0x2612},
+ {0x2614, 0x2615},
+ {0x2616, 0x2617},
+ {0x2618, 0x2618},
+ {0x2619, 0x2619},
+ {0x261A, 0x266F},
+ {0x2670, 0x2671},
+ {0x2672, 0x267D},
+ {0x267E, 0x267F},
+ {0x2680, 0x2685},
+ {0x2690, 0x2691},
+ {0x2692, 0x269C},
+ {0x269D, 0x269D},
+ {0x269E, 0x269F},
+ {0x26A0, 0x26A1},
+ {0x26A2, 0x26B1},
+ {0x26B2, 0x26B2},
+ {0x26B3, 0x26BC},
+ {0x26BD, 0x26BF},
+ {0x26C0, 0x26C3},
+ {0x26C4, 0x26CD},
+ {0x26CE, 0x26CE},
+ {0x26CF, 0x26E1},
+ {0x26E2, 0x26E2},
+ {0x26E3, 0x26E3},
+ {0x26E4, 0x26E7},
+ {0x26E8, 0x26FF},
+ {0x2700, 0x2700},
+ {0x2701, 0x2704},
+ {0x2705, 0x2705},
+ {0x2708, 0x2709},
+ {0x270A, 0x270B},
+ {0x270C, 0x2712},
+ {0x2714, 0x2714},
+ {0x2716, 0x2716},
+ {0x271D, 0x271D},
+ {0x2721, 0x2721},
+ {0x2728, 0x2728},
+ {0x2733, 0x2734},
+ {0x2744, 0x2744},
+ {0x2747, 0x2747},
+ {0x274C, 0x274C},
+ {0x274E, 0x274E},
+ {0x2753, 0x2755},
+ {0x2757, 0x2757},
+ {0x2763, 0x2767},
+ {0x2795, 0x2797},
+ {0x27A1, 0x27A1},
+ {0x27B0, 0x27B0},
+ {0x27BF, 0x27BF},
+ {0x2934, 0x2935},
+ {0x2B05, 0x2B07},
+ {0x2B1B, 0x2B1C},
+ {0x2B50, 0x2B50},
+ {0x2B55, 0x2B55},
+ {0x3030, 0x3030},
+ {0x303D, 0x303D},
+ {0x3297, 0x3297},
+ {0x3299, 0x3299},
+ {0x1F000, 0x1F02B},
+ {0x1F02C, 0x1F02F},
+ {0x1F030, 0x1F093},
+ {0x1F094, 0x1F09F},
+ {0x1F0A0, 0x1F0AE},
+ {0x1F0AF, 0x1F0B0},
+ {0x1F0B1, 0x1F0BE},
+ {0x1F0BF, 0x1F0BF},
+ {0x1F0C0, 0x1F0C0},
+ {0x1F0C1, 0x1F0CF},
+ {0x1F0D0, 0x1F0D0},
+ {0x1F0D1, 0x1F0DF},
+ {0x1F0E0, 0x1F0F5},
+ {0x1F0F6, 0x1F0FF},
+ {0x1F10D, 0x1F10F},
+ {0x1F12F, 0x1F12F},
+ {0x1F16C, 0x1F16C},
+ {0x1F16D, 0x1F16F},
+ {0x1F170, 0x1F171},
+ {0x1F17E, 0x1F17E},
+ {0x1F17F, 0x1F17F},
+ {0x1F18E, 0x1F18E},
+ {0x1F191, 0x1F19A},
+ {0x1F1AD, 0x1F1E5},
+ {0x1F201, 0x1F202},
+ {0x1F203, 0x1F20F},
+ {0x1F21A, 0x1F21A},
+ {0x1F22F, 0x1F22F},
+ {0x1F232, 0x1F23A},
+ {0x1F23C, 0x1F23F},
+ {0x1F249, 0x1F24F},
+ {0x1F250, 0x1F251},
+ {0x1F252, 0x1F25F},
+ {0x1F260, 0x1F265},
+ {0x1F266, 0x1F2FF},
+ {0x1F300, 0x1F320},
+ {0x1F321, 0x1F32C},
+ {0x1F32D, 0x1F32F},
+ {0x1F330, 0x1F335},
+ {0x1F336, 0x1F336},
+ {0x1F337, 0x1F37C},
+ {0x1F37D, 0x1F37D},
+ {0x1F37E, 0x1F37F},
+ {0x1F380, 0x1F393},
+ {0x1F394, 0x1F39F},
+ {0x1F3A0, 0x1F3C4},
+ {0x1F3C5, 0x1F3C5},
+ {0x1F3C6, 0x1F3CA},
+ {0x1F3CB, 0x1F3CE},
+ {0x1F3CF, 0x1F3D3},
+ {0x1F3D4, 0x1F3DF},
+ {0x1F3E0, 0x1F3F0},
+ {0x1F3F1, 0x1F3F7},
+ {0x1F3F8, 0x1F3FA},
+ {0x1F400, 0x1F43E},
+ {0x1F43F, 0x1F43F},
+ {0x1F440, 0x1F440},
+ {0x1F441, 0x1F441},
+ {0x1F442, 0x1F4F7},
+ {0x1F4F8, 0x1F4F8},
+ {0x1F4F9, 0x1F4FC},
+ {0x1F4FD, 0x1F4FE},
+ {0x1F4FF, 0x1F4FF},
+ {0x1F500, 0x1F53D},
+ {0x1F546, 0x1F54A},
+ {0x1F54B, 0x1F54F},
+ {0x1F550, 0x1F567},
+ {0x1F568, 0x1F579},
+ {0x1F57A, 0x1F57A},
+ {0x1F57B, 0x1F5A3},
+ {0x1F5A4, 0x1F5A4},
+ {0x1F5A5, 0x1F5FA},
+ {0x1F5FB, 0x1F5FF},
+ {0x1F600, 0x1F600},
+ {0x1F601, 0x1F610},
+ {0x1F611, 0x1F611},
+ {0x1F612, 0x1F614},
+ {0x1F615, 0x1F615},
+ {0x1F616, 0x1F616},
+ {0x1F617, 0x1F617},
+ {0x1F618, 0x1F618},
+ {0x1F619, 0x1F619},
+ {0x1F61A, 0x1F61A},
+ {0x1F61B, 0x1F61B},
+ {0x1F61C, 0x1F61E},
+ {0x1F61F, 0x1F61F},
+ {0x1F620, 0x1F625},
+ {0x1F626, 0x1F627},
+ {0x1F628, 0x1F62B},
+ {0x1F62C, 0x1F62C},
+ {0x1F62D, 0x1F62D},
+ {0x1F62E, 0x1F62F},
+ {0x1F630, 0x1F633},
+ {0x1F634, 0x1F634},
+ {0x1F635, 0x1F640},
+ {0x1F641, 0x1F642},
+ {0x1F643, 0x1F644},
+ {0x1F645, 0x1F64F},
+ {0x1F680, 0x1F6C5},
+ {0x1F6C6, 0x1F6CF},
+ {0x1F6D0, 0x1F6D0},
+ {0x1F6D1, 0x1F6D2},
+ {0x1F6D3, 0x1F6D4},
+ {0x1F6D5, 0x1F6D5},
+ {0x1F6D6, 0x1F6DF},
+ {0x1F6E0, 0x1F6EC},
+ {0x1F6ED, 0x1F6EF},
+ {0x1F6F0, 0x1F6F3},
+ {0x1F6F4, 0x1F6F6},
+ {0x1F6F7, 0x1F6F8},
+ {0x1F6F9, 0x1F6F9},
+ {0x1F6FA, 0x1F6FA},
+ {0x1F6FB, 0x1F6FF},
+ {0x1F774, 0x1F77F},
+ {0x1F7D5, 0x1F7D8},
+ {0x1F7D9, 0x1F7DF},
+ {0x1F7E0, 0x1F7EB},
+ {0x1F7EC, 0x1F7FF},
+ {0x1F80C, 0x1F80F},
+ {0x1F848, 0x1F84F},
+ {0x1F85A, 0x1F85F},
+ {0x1F888, 0x1F88F},
+ {0x1F8AE, 0x1F8FF},
+ {0x1F90C, 0x1F90C},
+ {0x1F90D, 0x1F90F},
+ {0x1F910, 0x1F918},
+ {0x1F919, 0x1F91E},
+ {0x1F91F, 0x1F91F},
+ {0x1F920, 0x1F927},
+ {0x1F928, 0x1F92F},
+ {0x1F930, 0x1F930},
+ {0x1F931, 0x1F932},
+ {0x1F933, 0x1F93A},
+ {0x1F93C, 0x1F93E},
+ {0x1F93F, 0x1F93F},
+ {0x1F940, 0x1F945},
+ {0x1F947, 0x1F94B},
+ {0x1F94C, 0x1F94C},
+ {0x1F94D, 0x1F94F},
+ {0x1F950, 0x1F95E},
+ {0x1F95F, 0x1F96B},
+ {0x1F96C, 0x1F970},
+ {0x1F971, 0x1F971},
+ {0x1F972, 0x1F972},
+ {0x1F973, 0x1F976},
+ {0x1F977, 0x1F979},
+ {0x1F97A, 0x1F97A},
+ {0x1F97B, 0x1F97B},
+ {0x1F97C, 0x1F97F},
+ {0x1F980, 0x1F984},
+ {0x1F985, 0x1F991},
+ {0x1F992, 0x1F997},
+ {0x1F998, 0x1F9A2},
+ {0x1F9A3, 0x1F9A4},
+ {0x1F9A5, 0x1F9AA},
+ {0x1F9AB, 0x1F9AD},
+ {0x1F9AE, 0x1F9AF},
+ {0x1F9B0, 0x1F9B9},
+ {0x1F9BA, 0x1F9BF},
+ {0x1F9C0, 0x1F9C0},
+ {0x1F9C1, 0x1F9C2},
+ {0x1F9C3, 0x1F9CA},
+ {0x1F9CB, 0x1F9CC},
+ {0x1F9CD, 0x1F9CF},
+ {0x1F9D0, 0x1F9E6},
+ {0x1F9E7, 0x1F9FF},
+ {0x1FA00, 0x1FA53},
+ {0x1FA54, 0x1FA5F},
+ {0x1FA60, 0x1FA6D},
+ {0x1FA6E, 0x1FA6F},
+ {0x1FA70, 0x1FA73},
+ {0x1FA74, 0x1FA77},
+ {0x1FA78, 0x1FA7A},
+ {0x1FA7B, 0x1FA7F},
+ {0x1FA80, 0x1FA82},
+ {0x1FA83, 0x1FA8F},
+ {0x1FA90, 0x1FA95},
+ {0x1FA96, 0x1FFFD},
+};
diff --git a/src/static_libs/libunibreak/emojidef.c b/src/static_libs/libunibreak/emojidef.c
new file mode 100644
index 0000000000..43a2ed3db0
--- /dev/null
+++ b/src/static_libs/libunibreak/emojidef.c
@@ -0,0 +1,61 @@
+/*
+ * Emoji-related routine and data.
+ *
+ * Copyright (C) 2018 Andreas Röver <roever at users dot sf dot net>
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the author be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute
+ * it freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must
+ * not claim that you wrote the original software. If you use this
+ * software in a product, an acknowledgement in the product
+ * documentation would be appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must
+ * not be misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source
+ * distribution.
+ */
+
+/**
+ * @file emojidef.c
+ *
+ * Emoji-related routine and data that are used internally.
+ *
+ * @author Andreas Röver
+ */
+
+#include "emojidef.h"
+#include "emojidata.c"
+
+/**
+ * Finds out if a codepoint is extended pictographic.
+ *
+ * @param[in] ch character to check
+ * @return \c true if the codepoint is extended pictographic;
+ * \c false otherwise
+ */
+bool ub_is_extended_pictographic(utf32_t ch)
+{
+ int min = 0;
+ int max = ARRAY_LEN(ep_prop) - 1;
+ int mid;
+
+ do
+ {
+ mid = (min + max) / 2;
+
+ if (ch < ep_prop[mid].start)
+ max = mid - 1;
+ else if (ch > ep_prop[mid].end)
+ min = mid + 1;
+ else
+ return true;
+ } while (min <= max);
+
+ return false;
+}
diff --git a/src/static_libs/libunibreak/emojidef.h b/src/static_libs/libunibreak/emojidef.h
new file mode 100644
index 0000000000..b9055fd261
--- /dev/null
+++ b/src/static_libs/libunibreak/emojidef.h
@@ -0,0 +1,46 @@
+/*
+ * Emoji-related routine and data.
+ *
+ * Copyright (C) 2018 Andreas Röver <roever at users dot sf dot net>
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the author be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute
+ * it freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must
+ * not claim that you wrote the original software. If you use this
+ * software in a product, an acknowledgement in the product
+ * documentation would be appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must
+ * not be misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source
+ * distribution.
+ */
+
+/**
+ * @file emojidef.h
+ *
+ * Definitions of internal data structure and function for extended
+ * pictographs.
+ *
+ * @author Andreas Röver
+ */
+
+#include "unibreakdef.h"
+
+/**
+ * Struct for entries of extended pictographic properties. The array of
+ * the entries \e must be sorted. All codepoints within this list have
+ * the property of being extended pictographic.
+ */
+struct ExtendedPictograpic
+{
+ utf32_t start; /**< Start codepoint */
+ utf32_t end; /**< End codepoint, inclusive */
+};
+
+bool ub_is_extended_pictographic(utf32_t ch);
diff --git a/src/static_libs/libunibreak/graphemebreak.c b/src/static_libs/libunibreak/graphemebreak.c
index 77c3d5f55c..401522f12d 100644
--- a/src/static_libs/libunibreak/graphemebreak.c
+++ b/src/static_libs/libunibreak/graphemebreak.c
@@ -2,7 +2,7 @@
* Grapheme breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
- * Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net>
+ * Copyright (C) 2016-2019 Andreas Röver <roever at users dot sf dot net>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@@ -28,6 +28,10 @@
* Unicode 9.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
*
+ * This library has been updated according to Revision 35, for
+ * Unicode 12.0.0:
+ * <URL:http://www.unicode.org/reports/tr29/tr29-35.html>
+ *
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
*/
@@ -38,23 +42,14 @@
* Implementation of the grapheme breaking algorithm as described in Unicode
* Standard Annex 29.
*
- * @author Andreas Roever
+ * @author Andreas Röver
*/
-#if defined(_MSC_VER) && _MSC_VER < 1800
-typedef int bool;
-#define false 0
-#define true 1
-#else
-#include <stdbool.h>
-#endif
-
#include <string.h>
#include "graphemebreak.h"
#include "graphemebreakdata.c"
#include "unibreakdef.h"
-
-#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
+#include "emojidef.h"
/**
* Initializes the wordbreak internals. It currently does nothing, but
@@ -67,8 +62,8 @@ void init_graphemebreak(void)
/**
* Gets the grapheme breaking class of a character.
*
- * @param ch character to check
- * @return the grapheme breaking class if found; \c GBP_Other otherwise
+ * @param[in] ch character to check
+ * @return the grapheme breaking class if found; \c GBP_Other otherwise
*/
static enum GraphemeBreakClass get_char_gb_class(utf32_t ch)
{
@@ -93,6 +88,7 @@ static enum GraphemeBreakClass get_char_gb_class(utf32_t ch)
/**
* Sets the grapheme breaking information for a generic input string.
+ * It uses the extended grapheme cluster ruleset.
*
* @param[in] s input string
* @param[in] len length of the input
@@ -104,7 +100,7 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks,
get_next_char_t get_next_char)
{
size_t posNext = 0;
- bool rule10Left = false; // is the left side of rule 10 fulfilled?
+ int rule11Detector = 0;
bool evenRegionalIndicators = true; // is the number of preceeding
// GBP_RegionalIndicator characters
// even
@@ -117,6 +113,47 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks,
while (true)
{
+
+ // this state-machine recognizes the following pattern:
+ // extended_pictograph Extended* ZWJ
+ // when that pattern has been detected rule11Detector will be
+ // 3 and rule 11 can be applied below
+ switch (current_class)
+ {
+ case GBP_ZWJ:
+ if (rule11Detector == 1 || rule11Detector == 2)
+ {
+ rule11Detector = 3;
+ }
+ else
+ {
+ rule11Detector = 0;
+ }
+ break;
+
+ case GBP_Extend:
+ if (rule11Detector == 1 || rule11Detector == 2)
+ {
+ rule11Detector = 2;
+ }
+ else
+ {
+ rule11Detector = 0;
+ }
+ break;
+
+ default:
+ if (ub_is_extended_pictographic(ch))
+ {
+ rule11Detector = 1;
+ }
+ else
+ {
+ rule11Detector = 0;
+ }
+ break;
+ }
+
enum GraphemeBreakClass prev_class = current_class;
// safe position if current character so that we can store the
@@ -137,16 +174,6 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks,
// get class of current character
current_class = get_char_gb_class(ch);
- // update some helper variables
- if ((prev_class == GBP_E_Base) || (prev_class == GBP_E_Base_GAZ))
- {
- rule10Left = true;
- }
- else if (prev_class != GBP_Extend)
- {
- rule10Left = false;
- }
-
if (prev_class == GBP_Regional_Indicator)
{
evenRegionalIndicators = !evenRegionalIndicators;
@@ -185,7 +212,8 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks,
brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB8
}
else if ((current_class == GBP_Extend) ||
- (current_class == GBP_ZWJ))
+ (current_class == GBP_ZWJ) ||
+ (current_class == GBP_Virama))
{
brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB9
}
@@ -197,13 +225,7 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks,
{
brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB9b
}
- else if (rule10Left && (current_class == GBP_E_Modifier))
- {
- brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB10
- }
- else if ((prev_class == GBP_ZWJ) &&
- ((current_class == GBP_Glue_After_Zwj) ||
- (current_class == GBP_E_Base_GAZ)))
+ else if ((rule11Detector == 3) && ub_is_extended_pictographic(ch))
{
brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB11
}
diff --git a/src/static_libs/libunibreak/graphemebreak.h b/src/static_libs/libunibreak/graphemebreak.h
index c01768233a..e5259b5ccd 100644
--- a/src/static_libs/libunibreak/graphemebreak.h
+++ b/src/static_libs/libunibreak/graphemebreak.h
@@ -2,7 +2,7 @@
* Grapheme breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
- * Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net>
+ * Copyright (C) 2016-2019 Andreas Röver <roever at users dot sf dot net>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@@ -28,6 +28,10 @@
* Unicode 9.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
*
+ * This library has been updated according to Revision 35, for
+ * Unicode 12.0.0:
+ * <URL:http://www.unicode.org/reports/tr29/tr29-35.html>
+ *
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
*/
@@ -66,4 +70,4 @@ void set_graphemebreaks_utf32(const utf32_t *s, size_t len,
}
#endif
-#endif
+#endif /* GRAPHEMEBREAK_H */
diff --git a/src/static_libs/libunibreak/graphemebreakdata.c b/src/static_libs/libunibreak/graphemebreakdata.c
index cab9bebd80..bc1af932cf 100644
--- a/src/static_libs/libunibreak/graphemebreakdata.c
+++ b/src/static_libs/libunibreak/graphemebreakdata.c
@@ -1,6 +1,6 @@
/* The content of this file is generated from:
-# GraphemeBreakProperty-9.0.0.txt
-# Date: 2016-06-03, 22:23:55 GMT
+# GraphemeBreakProperty-12.1.0.txt
+# Date: 2019-03-10, 10:53:12 GMT
*/
#include "graphemebreakdef.h"
@@ -36,12 +36,13 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x0730, 0x074A, GBP_Extend},
{0x07A6, 0x07B0, GBP_Extend},
{0x07EB, 0x07F3, GBP_Extend},
+ {0x07FD, 0x07FD, GBP_Extend},
{0x0816, 0x0819, GBP_Extend},
{0x081B, 0x0823, GBP_Extend},
{0x0825, 0x0827, GBP_Extend},
{0x0829, 0x082D, GBP_Extend},
{0x0859, 0x085B, GBP_Extend},
- {0x08D4, 0x08E1, GBP_Extend},
+ {0x08D3, 0x08E1, GBP_Extend},
{0x08E2, 0x08E2, GBP_Prepend},
{0x08E3, 0x0902, GBP_Extend},
{0x0903, 0x0903, GBP_SpacingMark},
@@ -66,6 +67,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x09CD, 0x09CD, GBP_Extend},
{0x09D7, 0x09D7, GBP_Extend},
{0x09E2, 0x09E3, GBP_Extend},
+ {0x09FE, 0x09FE, GBP_Extend},
{0x0A01, 0x0A02, GBP_Extend},
{0x0A03, 0x0A03, GBP_SpacingMark},
{0x0A3C, 0x0A3C, GBP_Extend},
@@ -86,6 +88,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x0ACB, 0x0ACC, GBP_SpacingMark},
{0x0ACD, 0x0ACD, GBP_Extend},
{0x0AE2, 0x0AE3, GBP_Extend},
+ {0x0AFA, 0x0AFF, GBP_Extend},
{0x0B01, 0x0B01, GBP_Extend},
{0x0B02, 0x0B03, GBP_SpacingMark},
{0x0B3C, 0x0B3C, GBP_Extend},
@@ -110,6 +113,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x0BD7, 0x0BD7, GBP_Extend},
{0x0C00, 0x0C00, GBP_Extend},
{0x0C01, 0x0C03, GBP_SpacingMark},
+ {0x0C04, 0x0C04, GBP_Extend},
{0x0C3E, 0x0C40, GBP_Extend},
{0x0C41, 0x0C44, GBP_SpacingMark},
{0x0C46, 0x0C48, GBP_Extend},
@@ -130,8 +134,9 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x0CCC, 0x0CCD, GBP_Extend},
{0x0CD5, 0x0CD6, GBP_Extend},
{0x0CE2, 0x0CE3, GBP_Extend},
- {0x0D01, 0x0D01, GBP_Extend},
+ {0x0D00, 0x0D01, GBP_Extend},
{0x0D02, 0x0D03, GBP_SpacingMark},
+ {0x0D3B, 0x0D3C, GBP_Extend},
{0x0D3E, 0x0D3E, GBP_Extend},
{0x0D3F, 0x0D40, GBP_SpacingMark},
{0x0D41, 0x0D44, GBP_Extend},
@@ -156,8 +161,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x0E47, 0x0E4E, GBP_Extend},
{0x0EB1, 0x0EB1, GBP_Extend},
{0x0EB3, 0x0EB3, GBP_SpacingMark},
- {0x0EB4, 0x0EB9, GBP_Extend},
- {0x0EBB, 0x0EBC, GBP_Extend},
+ {0x0EB4, 0x0EBC, GBP_Extend},
{0x0EC8, 0x0ECD, GBP_Extend},
{0x0F18, 0x0F19, GBP_Extend},
{0x0F35, 0x0F35, GBP_Extend},
@@ -232,7 +236,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x1B00, 0x1B03, GBP_Extend},
{0x1B04, 0x1B04, GBP_SpacingMark},
{0x1B34, 0x1B34, GBP_Extend},
- {0x1B35, 0x1B35, GBP_SpacingMark},
+ {0x1B35, 0x1B35, GBP_Extend},
{0x1B36, 0x1B3A, GBP_Extend},
{0x1B3B, 0x1B3B, GBP_SpacingMark},
{0x1B3C, 0x1B3C, GBP_Extend},
@@ -265,10 +269,10 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x1CE1, 0x1CE1, GBP_SpacingMark},
{0x1CE2, 0x1CE8, GBP_Extend},
{0x1CED, 0x1CED, GBP_Extend},
- {0x1CF2, 0x1CF3, GBP_SpacingMark},
{0x1CF4, 0x1CF4, GBP_Extend},
+ {0x1CF7, 0x1CF7, GBP_SpacingMark},
{0x1CF8, 0x1CF9, GBP_Extend},
- {0x1DC0, 0x1DF5, GBP_Extend},
+ {0x1DC0, 0x1DF9, GBP_Extend},
{0x1DFB, 0x1DFF, GBP_Extend},
{0x200B, 0x200B, GBP_Control},
{0x200C, 0x200C, GBP_Extend},
@@ -285,10 +289,6 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x20E1, 0x20E1, GBP_Extend},
{0x20E2, 0x20E4, GBP_Extend},
{0x20E5, 0x20F0, GBP_Extend},
- {0x261D, 0x261D, GBP_E_Base},
- {0x26F9, 0x26F9, GBP_E_Base},
- {0x270A, 0x270D, GBP_E_Base},
- {0x2764, 0x2764, GBP_Glue_After_Zwj},
{0x2CEF, 0x2CF1, GBP_Extend},
{0x2D7F, 0x2D7F, GBP_Extend},
{0x2DE0, 0x2DFF, GBP_Extend},
@@ -310,6 +310,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0xA8B4, 0xA8C3, GBP_SpacingMark},
{0xA8C4, 0xA8C5, GBP_Extend},
{0xA8E0, 0xA8F1, GBP_Extend},
+ {0xA8FF, 0xA8FF, GBP_Extend},
{0xA926, 0xA92D, GBP_Extend},
{0xA947, 0xA951, GBP_Extend},
{0xA952, 0xA953, GBP_SpacingMark},
@@ -320,8 +321,8 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0xA9B4, 0xA9B5, GBP_SpacingMark},
{0xA9B6, 0xA9B9, GBP_Extend},
{0xA9BA, 0xA9BB, GBP_SpacingMark},
- {0xA9BC, 0xA9BC, GBP_Extend},
- {0xA9BD, 0xA9C0, GBP_SpacingMark},
+ {0xA9BC, 0xA9BD, GBP_Extend},
+ {0xA9BE, 0xA9C0, GBP_SpacingMark},
{0xA9E5, 0xA9E5, GBP_Extend},
{0xAA29, 0xAA2E, GBP_Extend},
{0xAA2F, 0xAA30, GBP_SpacingMark},
@@ -1149,7 +1150,6 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0xD789, 0xD7A3, GBP_LVT},
{0xD7B0, 0xD7C6, GBP_V},
{0xD7CB, 0xD7FB, GBP_T},
- {0xD800, 0xDFFF, GBP_Control},
{0xFB1E, 0xFB1E, GBP_Extend},
{0xFE00, 0xFE0F, GBP_Extend},
{0xFE20, 0xFE2F, GBP_Extend},
@@ -1166,6 +1166,8 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x10A38, 0x10A3A, GBP_Extend},
{0x10A3F, 0x10A3F, GBP_Extend},
{0x10AE5, 0x10AE6, GBP_Extend},
+ {0x10D24, 0x10D27, GBP_Extend},
+ {0x10F46, 0x10F50, GBP_Extend},
{0x11000, 0x11000, GBP_SpacingMark},
{0x11001, 0x11001, GBP_Extend},
{0x11002, 0x11002, GBP_SpacingMark},
@@ -1177,10 +1179,12 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x110B7, 0x110B8, GBP_SpacingMark},
{0x110B9, 0x110BA, GBP_Extend},
{0x110BD, 0x110BD, GBP_Prepend},
+ {0x110CD, 0x110CD, GBP_Prepend},
{0x11100, 0x11102, GBP_Extend},
{0x11127, 0x1112B, GBP_Extend},
{0x1112C, 0x1112C, GBP_SpacingMark},
{0x1112D, 0x11134, GBP_Extend},
+ {0x11145, 0x11146, GBP_SpacingMark},
{0x11173, 0x11173, GBP_Extend},
{0x11180, 0x11181, GBP_Extend},
{0x11182, 0x11182, GBP_SpacingMark},
@@ -1188,7 +1192,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x111B6, 0x111BE, GBP_Extend},
{0x111BF, 0x111C0, GBP_SpacingMark},
{0x111C2, 0x111C3, GBP_Prepend},
- {0x111CA, 0x111CC, GBP_Extend},
+ {0x111C9, 0x111CC, GBP_Extend},
{0x1122C, 0x1122E, GBP_SpacingMark},
{0x1122F, 0x11231, GBP_Extend},
{0x11232, 0x11233, GBP_SpacingMark},
@@ -1201,7 +1205,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x112E3, 0x112EA, GBP_Extend},
{0x11300, 0x11301, GBP_Extend},
{0x11302, 0x11303, GBP_SpacingMark},
- {0x1133C, 0x1133C, GBP_Extend},
+ {0x1133B, 0x1133C, GBP_Extend},
{0x1133E, 0x1133E, GBP_Extend},
{0x1133F, 0x1133F, GBP_SpacingMark},
{0x11340, 0x11340, GBP_Extend},
@@ -1218,6 +1222,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x11442, 0x11444, GBP_Extend},
{0x11445, 0x11445, GBP_SpacingMark},
{0x11446, 0x11446, GBP_Extend},
+ {0x1145E, 0x1145E, GBP_Extend},
{0x114B0, 0x114B0, GBP_Extend},
{0x114B1, 0x114B2, GBP_SpacingMark},
{0x114B3, 0x114B8, GBP_Extend},
@@ -1255,6 +1260,29 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x11722, 0x11725, GBP_Extend},
{0x11726, 0x11726, GBP_SpacingMark},
{0x11727, 0x1172B, GBP_Extend},
+ {0x1182C, 0x1182E, GBP_SpacingMark},
+ {0x1182F, 0x11837, GBP_Extend},
+ {0x11838, 0x11838, GBP_SpacingMark},
+ {0x11839, 0x1183A, GBP_Extend},
+ {0x119D1, 0x119D3, GBP_SpacingMark},
+ {0x119D4, 0x119D7, GBP_Extend},
+ {0x119DA, 0x119DB, GBP_Extend},
+ {0x119DC, 0x119DF, GBP_SpacingMark},
+ {0x119E0, 0x119E0, GBP_Extend},
+ {0x119E4, 0x119E4, GBP_SpacingMark},
+ {0x11A01, 0x11A0A, GBP_Extend},
+ {0x11A33, 0x11A38, GBP_Extend},
+ {0x11A39, 0x11A39, GBP_SpacingMark},
+ {0x11A3A, 0x11A3A, GBP_Prepend},
+ {0x11A3B, 0x11A3E, GBP_Extend},
+ {0x11A47, 0x11A47, GBP_Extend},
+ {0x11A51, 0x11A56, GBP_Extend},
+ {0x11A57, 0x11A58, GBP_SpacingMark},
+ {0x11A59, 0x11A5B, GBP_Extend},
+ {0x11A84, 0x11A89, GBP_Prepend},
+ {0x11A8A, 0x11A96, GBP_Extend},
+ {0x11A97, 0x11A97, GBP_SpacingMark},
+ {0x11A98, 0x11A99, GBP_Extend},
{0x11C2F, 0x11C2F, GBP_SpacingMark},
{0x11C30, 0x11C36, GBP_Extend},
{0x11C38, 0x11C3D, GBP_Extend},
@@ -1267,9 +1295,25 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x11CB2, 0x11CB3, GBP_Extend},
{0x11CB4, 0x11CB4, GBP_SpacingMark},
{0x11CB5, 0x11CB6, GBP_Extend},
+ {0x11D31, 0x11D36, GBP_Extend},
+ {0x11D3A, 0x11D3A, GBP_Extend},
+ {0x11D3C, 0x11D3D, GBP_Extend},
+ {0x11D3F, 0x11D45, GBP_Extend},
+ {0x11D46, 0x11D46, GBP_Prepend},
+ {0x11D47, 0x11D47, GBP_Extend},
+ {0x11D8A, 0x11D8E, GBP_SpacingMark},
+ {0x11D90, 0x11D91, GBP_Extend},
+ {0x11D93, 0x11D94, GBP_SpacingMark},
+ {0x11D95, 0x11D95, GBP_Extend},
+ {0x11D96, 0x11D96, GBP_SpacingMark},
+ {0x11D97, 0x11D97, GBP_Extend},
+ {0x11EF3, 0x11EF4, GBP_Extend},
+ {0x11EF5, 0x11EF6, GBP_SpacingMark},
+ {0x13430, 0x13438, GBP_Control},
{0x16AF0, 0x16AF4, GBP_Extend},
{0x16B30, 0x16B36, GBP_Extend},
- {0x16F51, 0x16F7E, GBP_SpacingMark},
+ {0x16F4F, 0x16F4F, GBP_Extend},
+ {0x16F51, 0x16F87, GBP_SpacingMark},
{0x16F8F, 0x16F92, GBP_Extend},
{0x1BC9D, 0x1BC9E, GBP_Extend},
{0x1BCA0, 0x1BCA3, GBP_Control},
@@ -1294,38 +1338,12 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x1E01B, 0x1E021, GBP_Extend},
{0x1E023, 0x1E024, GBP_Extend},
{0x1E026, 0x1E02A, GBP_Extend},
+ {0x1E130, 0x1E136, GBP_Extend},
+ {0x1E2EC, 0x1E2EF, GBP_Extend},
{0x1E8D0, 0x1E8D6, GBP_Extend},
{0x1E944, 0x1E94A, GBP_Extend},
{0x1F1E6, 0x1F1FF, GBP_Regional_Indicator},
- {0x1F385, 0x1F385, GBP_E_Base},
- {0x1F3C3, 0x1F3C4, GBP_E_Base},
- {0x1F3CA, 0x1F3CB, GBP_E_Base},
- {0x1F3FB, 0x1F3FF, GBP_E_Modifier},
- {0x1F442, 0x1F443, GBP_E_Base},
- {0x1F446, 0x1F450, GBP_E_Base},
- {0x1F466, 0x1F469, GBP_E_Base_GAZ},
- {0x1F46E, 0x1F46E, GBP_E_Base},
- {0x1F470, 0x1F478, GBP_E_Base},
- {0x1F47C, 0x1F47C, GBP_E_Base},
- {0x1F481, 0x1F483, GBP_E_Base},
- {0x1F485, 0x1F487, GBP_E_Base},
- {0x1F48B, 0x1F48B, GBP_Glue_After_Zwj},
- {0x1F4AA, 0x1F4AA, GBP_E_Base},
- {0x1F575, 0x1F575, GBP_E_Base},
- {0x1F57A, 0x1F57A, GBP_E_Base},
- {0x1F590, 0x1F590, GBP_E_Base},
- {0x1F595, 0x1F596, GBP_E_Base},
- {0x1F5E8, 0x1F5E8, GBP_Glue_After_Zwj},
- {0x1F645, 0x1F647, GBP_E_Base},
- {0x1F64B, 0x1F64F, GBP_E_Base},
- {0x1F6A3, 0x1F6A3, GBP_E_Base},
- {0x1F6B4, 0x1F6B6, GBP_E_Base},
- {0x1F6C0, 0x1F6C0, GBP_E_Base},
- {0x1F918, 0x1F91E, GBP_E_Base},
- {0x1F926, 0x1F926, GBP_E_Base},
- {0x1F930, 0x1F930, GBP_E_Base},
- {0x1F933, 0x1F939, GBP_E_Base},
- {0x1F93C, 0x1F93E, GBP_E_Base},
+ {0x1F3FB, 0x1F3FF, GBP_Extend},
{0xE0000, 0xE0000, GBP_Control},
{0xE0001, 0xE0001, GBP_Control},
{0xE0002, 0xE001F, GBP_Control},
diff --git a/src/static_libs/libunibreak/graphemebreakdef.h b/src/static_libs/libunibreak/graphemebreakdef.h
index 0de1f3d623..90ccfbd5f1 100644
--- a/src/static_libs/libunibreak/graphemebreakdef.h
+++ b/src/static_libs/libunibreak/graphemebreakdef.h
@@ -2,7 +2,7 @@
* Grapheme breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
- * Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net>
+ * Copyright (C) 2016-2019 Andreas Röver <roever at users dot sf dot net>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@@ -28,6 +28,10 @@
* Unicode 9.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
*
+ * This library has been updated according to Revision 35, for
+ * Unicode 12.0.0:
+ * <URL:http://www.unicode.org/reports/tr29/tr29-35.html>
+ *
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
*/
@@ -45,13 +49,15 @@
/**
* Word break classes. This is a direct mapping of Table 2 of Unicode
- * Standard Annex 29
+ * Standard Annex 29.
*/
enum GraphemeBreakClass
{
GBP_CR,
GBP_LF,
GBP_Control,
+ GBP_Virama,
+ GBP_LinkingConsonant,
GBP_Extend,
GBP_ZWJ,
GBP_Regional_Indicator,
@@ -62,10 +68,6 @@ enum GraphemeBreakClass
GBP_T,
GBP_LV,
GBP_LVT,
- GBP_E_Base,
- GBP_E_Modifier,
- GBP_Glue_After_Zwj,
- GBP_E_Base_GAZ,
GBP_Other,
GBP_Undefined
};
@@ -76,7 +78,7 @@ enum GraphemeBreakClass
*/
struct GraphemeBreakProperties
{
- utf32_t start; /**< Starting coding point */
- utf32_t end; /**< End coding point, including */
+ utf32_t start; /**< Start codepoint */
+ utf32_t end; /**< End codepoint, inclusive */
enum GraphemeBreakClass prop; /**< The grapheme breaking property */
};
diff --git a/src/static_libs/libunibreak/linebreak.c b/src/static_libs/libunibreak/linebreak.c
index 41f23c1c08..98e2730314 100644
--- a/src/static_libs/libunibreak/linebreak.c
+++ b/src/static_libs/libunibreak/linebreak.c
@@ -4,7 +4,7 @@
* Line breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
- * Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com>
+ * Copyright (C) 2008-2019 Wu Yongwei <wuyongwei at gmail dot com>
* Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
@@ -31,9 +31,9 @@
* Unicode 5.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
*
- * This library has been updated according to Revision 37, for
- * Unicode 9.0.0:
- * <URL:http://www.unicode.org/reports/tr14/tr14-37.html>
+ * This library has been updated according to Revision 43, for
+ * Unicode 12.0.0:
+ * <URL:http://www.unicode.org/reports/tr14/tr14-43.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@@ -80,7 +80,9 @@ enum BreakAction
/**
* Break action pair table. This is a direct mapping of Table 2 of
- * Unicode Standard Annex 14, Revision 37, except the "CB" part.
+ * Unicode Standard Annex 14, Revision 37, except for ZWJ (manually
+ * adjusted after special processing as per LB8a of Revision 41) and CB
+ * (manually added as per LB20).
*/
static enum BreakAction baTable[LBP_CB][LBP_CB] = {
{ /* OP */
@@ -270,17 +272,17 @@ static enum BreakAction baTable[LBP_CB][LBP_CB] = {
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK },
{ /* ZWJ */
- DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
- IND_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+ DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
- DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK },
+ DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK },
{ /* CB */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
- DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
+ DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK },
};
/**
@@ -288,8 +290,9 @@ static enum BreakAction baTable[LBP_CB][LBP_CB] = {
*/
struct LineBreakPropertiesIndex
{
- utf32_t end; /**< End coding point */
- const struct LineBreakProperties *lbp;/**< Pointer to line breaking properties */
+ utf32_t end; /**< End codepoint */
+ const struct LineBreakProperties *lbp; /**< Pointer to line breaking
+ properties */
};
/**
@@ -335,7 +338,7 @@ static __inline int ends_with(const char *str, const char *suffix,
* Initializes the second-level index to the line breaking properties.
* If it is not called, the performance of #get_char_lb_class_lang (and
* thus the main functionality) can be pretty bad, especially for big
- * code points like those of Chinese.
+ * codepoints like those of Chinese.
*/
void init_linebreak(void)
{
@@ -612,12 +615,18 @@ static int get_lb_result_lookup(
break;
}
+ /* Special processing due to rule LB8a */
+ if (lbpCtx->fLb8aZwj)
+ {
+ brk = LINEBREAK_NOBREAK;
+ }
+
/* Special processing due to rule LB21a */
if (lbpCtx->fLb21aHebrew &&
(lbpCtx->lbcCur == LBP_HY || lbpCtx->lbcCur == LBP_BA))
{
brk = LINEBREAK_NOBREAK;
- lbpCtx->fLb21aHebrew = 0;
+ lbpCtx->fLb21aHebrew = false;
}
else
{
@@ -663,17 +672,21 @@ void lb_init_break_context(
lbpCtx->lbcCur = resolve_lb_class(
get_char_lb_class_lang(ch, lbpCtx->lbpLang),
lbpCtx->lang);
- lbpCtx->fLb21aHebrew = 0;
+ lbpCtx->fLb8aZwj =
+ (get_char_lb_class_lang(ch, lbpCtx->lbpLang) == LBP_ZWJ);
+ lbpCtx->fLb10LeadSpace =
+ (get_char_lb_class_lang(ch, lbpCtx->lbpLang) == LBP_SP);
+ lbpCtx->fLb21aHebrew = false;
lbpCtx->cLb30aRI = 0;
treat_first_char(lbpCtx);
}
/**
- * Updates LineBreakingContext for the next code point and returns
+ * Updates LineBreakingContext for the next codepoint and returns
* the detected break.
*
* @param[in,out] lbpCtx pointer to the line breaking context
- * @param[in] ch Unicode code point
+ * @param[in] ch Unicode codepoint
* @return break result, one of #LINEBREAK_MUSTBREAK,
* #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK
* @post the line breaking context is updated
@@ -700,6 +713,25 @@ int lb_process_next_char(
default:
break;
}
+
+ /* Special processing due to rule LB8a */
+ if (lbpCtx->lbcNew == LBP_ZWJ)
+ {
+ lbpCtx->fLb8aZwj = true;
+ }
+ else
+ {
+ lbpCtx->fLb8aZwj = false;
+ }
+
+ /* Special processing due to rule LB10 */
+ if (lbpCtx->fLb10LeadSpace)
+ {
+ if (lbpCtx->lbcNew == LBP_CM || lbpCtx->lbcNew == LBP_ZWJ)
+ brk = LINEBREAK_ALLOWBREAK;
+ lbpCtx->fLb10LeadSpace = false;
+ }
+
return brk;
}
diff --git a/src/static_libs/libunibreak/linebreak.h b/src/static_libs/libunibreak/linebreak.h
index fd7351191b..fa88094b4b 100644
--- a/src/static_libs/libunibreak/linebreak.h
+++ b/src/static_libs/libunibreak/linebreak.h
@@ -4,7 +4,7 @@
* Line breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
- * Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com>
+ * Copyright (C) 2008-2019 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@@ -30,9 +30,9 @@
* Unicode 5.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
*
- * This library has been updated according to Revision 37, for
- * Unicode 9.0.0:
- * <URL:http://www.unicode.org/reports/tr14/tr14-37.html>
+ * This library has been updated according to Revision 43, for
+ * Unicode 12.0.0:
+ * <URL:http://www.unicode.org/reports/tr14/tr14-43.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
diff --git a/src/static_libs/libunibreak/linebreakdata.c b/src/static_libs/libunibreak/linebreakdata.c
index c571f2da00..23d9072baa 100644
--- a/src/static_libs/libunibreak/linebreakdata.c
+++ b/src/static_libs/libunibreak/linebreakdata.c
@@ -1,6 +1,6 @@
/* The content of this file is generated from:
-# LineBreak-9.0.0.txt
-# Date: 2016-05-26, 01:00:00 GMT [KW, LI]
+# LineBreak-12.1.0.txt
+# Date: 2019-03-31, 22:04:15 GMT [KW, LI]
*/
#include "linebreakdef.h"
@@ -94,7 +94,7 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x037E, 0x037E, LBP_IS },
{ 0x037F, 0x0482, LBP_AL },
{ 0x0483, 0x0489, LBP_CM },
- { 0x048A, 0x0587, LBP_AL },
+ { 0x048A, 0x0588, LBP_AL },
{ 0x0589, 0x0589, LBP_IS },
{ 0x058A, 0x058A, LBP_BA },
{ 0x058D, 0x058E, LBP_AL },
@@ -149,7 +149,10 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x07F4, 0x07F7, LBP_AL },
{ 0x07F8, 0x07F8, LBP_IS },
{ 0x07F9, 0x07F9, LBP_EX },
- { 0x07FA, 0x0815, LBP_AL },
+ { 0x07FA, 0x07FA, LBP_AL },
+ { 0x07FD, 0x07FD, LBP_CM },
+ { 0x07FE, 0x07FF, LBP_PR },
+ { 0x0800, 0x0815, LBP_AL },
{ 0x0816, 0x0819, LBP_CM },
{ 0x081A, 0x081A, LBP_AL },
{ 0x081B, 0x0823, LBP_CM },
@@ -160,7 +163,7 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x0830, 0x0858, LBP_AL },
{ 0x0859, 0x085B, LBP_CM },
{ 0x085E, 0x08BD, LBP_AL },
- { 0x08D4, 0x08E1, LBP_CM },
+ { 0x08D3, 0x08E1, LBP_CM },
{ 0x08E2, 0x08E2, LBP_AL },
{ 0x08E3, 0x0903, LBP_CM },
{ 0x0904, 0x0939, LBP_AL },
@@ -190,14 +193,17 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x09F9, 0x09F9, LBP_PO },
{ 0x09FA, 0x09FA, LBP_AL },
{ 0x09FB, 0x09FB, LBP_PR },
- { 0x0A01, 0x0A03, LBP_CM },
+ { 0x09FC, 0x09FD, LBP_AL },
+ { 0x09FE, 0x0A03, LBP_CM },
{ 0x0A05, 0x0A39, LBP_AL },
{ 0x0A3C, 0x0A51, LBP_CM },
{ 0x0A59, 0x0A5E, LBP_AL },
{ 0x0A66, 0x0A6F, LBP_NU },
{ 0x0A70, 0x0A71, LBP_CM },
{ 0x0A72, 0x0A74, LBP_AL },
- { 0x0A75, 0x0A83, LBP_CM },
+ { 0x0A75, 0x0A75, LBP_CM },
+ { 0x0A76, 0x0A76, LBP_AL },
+ { 0x0A81, 0x0A83, LBP_CM },
{ 0x0A85, 0x0AB9, LBP_AL },
{ 0x0ABC, 0x0ABC, LBP_CM },
{ 0x0ABD, 0x0ABD, LBP_AL },
@@ -208,7 +214,7 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x0AF0, 0x0AF0, LBP_AL },
{ 0x0AF1, 0x0AF1, LBP_PR },
{ 0x0AF9, 0x0AF9, LBP_AL },
- { 0x0B01, 0x0B03, LBP_CM },
+ { 0x0AFA, 0x0B03, LBP_CM },
{ 0x0B05, 0x0B39, LBP_AL },
{ 0x0B3C, 0x0B3C, LBP_CM },
{ 0x0B3D, 0x0B3D, LBP_AL },
@@ -226,14 +232,16 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x0BF0, 0x0BF8, LBP_AL },
{ 0x0BF9, 0x0BF9, LBP_PR },
{ 0x0BFA, 0x0BFA, LBP_AL },
- { 0x0C00, 0x0C03, LBP_CM },
+ { 0x0C00, 0x0C04, LBP_CM },
{ 0x0C05, 0x0C3D, LBP_AL },
{ 0x0C3E, 0x0C56, LBP_CM },
{ 0x0C58, 0x0C61, LBP_AL },
{ 0x0C62, 0x0C63, LBP_CM },
{ 0x0C66, 0x0C6F, LBP_NU },
+ { 0x0C77, 0x0C77, LBP_BB },
{ 0x0C78, 0x0C80, LBP_AL },
{ 0x0C81, 0x0C83, LBP_CM },
+ { 0x0C84, 0x0C84, LBP_BB },
{ 0x0C85, 0x0CB9, LBP_AL },
{ 0x0CBC, 0x0CBC, LBP_CM },
{ 0x0CBD, 0x0CBD, LBP_AL },
@@ -242,8 +250,10 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x0CE2, 0x0CE3, LBP_CM },
{ 0x0CE6, 0x0CEF, LBP_NU },
{ 0x0CF1, 0x0CF2, LBP_AL },
- { 0x0D01, 0x0D03, LBP_CM },
- { 0x0D05, 0x0D3D, LBP_AL },
+ { 0x0D00, 0x0D03, LBP_CM },
+ { 0x0D05, 0x0D3A, LBP_AL },
+ { 0x0D3B, 0x0D3C, LBP_CM },
+ { 0x0D3D, 0x0D3D, LBP_AL },
{ 0x0D3E, 0x0D4D, LBP_CM },
{ 0x0D4E, 0x0D56, LBP_AL },
{ 0x0D57, 0x0D57, LBP_CM },
@@ -417,11 +427,11 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x1CD4, 0x1CE8, LBP_CM },
{ 0x1CE9, 0x1CEC, LBP_AL },
{ 0x1CED, 0x1CED, LBP_CM },
- { 0x1CEE, 0x1CF1, LBP_AL },
- { 0x1CF2, 0x1CF4, LBP_CM },
+ { 0x1CEE, 0x1CF3, LBP_AL },
+ { 0x1CF4, 0x1CF4, LBP_CM },
{ 0x1CF5, 0x1CF6, LBP_AL },
- { 0x1CF8, 0x1CF9, LBP_CM },
- { 0x1D00, 0x1DBF, LBP_AL },
+ { 0x1CF7, 0x1CF9, LBP_CM },
+ { 0x1CFA, 0x1DBF, LBP_AL },
{ 0x1DC0, 0x1DFF, LBP_CM },
{ 0x1E00, 0x1FFC, LBP_AL },
{ 0x1FFD, 0x1FFD, LBP_BB },
@@ -430,7 +440,9 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x2007, 0x2007, LBP_GL },
{ 0x2008, 0x200A, LBP_BA },
{ 0x200B, 0x200B, LBP_ZW },
- { 0x200C, 0x200F, LBP_CM },
+ { 0x200C, 0x200C, LBP_CM },
+ { 0x200D, 0x200D, LBP_ZWJ },
+ { 0x200E, 0x200F, LBP_CM },
{ 0x2010, 0x2010, LBP_BA },
{ 0x2011, 0x2011, LBP_GL },
{ 0x2012, 0x2013, LBP_BA },
@@ -808,7 +820,11 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x2E3F, 0x2E3F, LBP_AL },
{ 0x2E40, 0x2E41, LBP_BA },
{ 0x2E42, 0x2E42, LBP_OP },
- { 0x2E43, 0x2E44, LBP_BA },
+ { 0x2E43, 0x2E4A, LBP_BA },
+ { 0x2E4B, 0x2E4B, LBP_AL },
+ { 0x2E4C, 0x2E4C, LBP_BA },
+ { 0x2E4D, 0x2E4D, LBP_AL },
+ { 0x2E4E, 0x2E4F, LBP_BA },
{ 0x2E80, 0x2FFB, LBP_ID },
{ 0x3000, 0x3000, LBP_BA },
{ 0x3001, 0x3002, LBP_CL },
@@ -942,7 +958,8 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0xA8E0, 0xA8F1, LBP_CM },
{ 0xA8F2, 0xA8FB, LBP_AL },
{ 0xA8FC, 0xA8FC, LBP_BB },
- { 0xA8FD, 0xA8FD, LBP_AL },
+ { 0xA8FD, 0xA8FE, LBP_AL },
+ { 0xA8FF, 0xA8FF, LBP_CM },
{ 0xA900, 0xA909, LBP_NU },
{ 0xA90A, 0xA925, LBP_AL },
{ 0xA926, 0xA92D, LBP_CM },
@@ -1907,9 +1924,9 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x1091F, 0x1091F, LBP_BA },
{ 0x10920, 0x10A00, LBP_AL },
{ 0x10A01, 0x10A0F, LBP_CM },
- { 0x10A10, 0x10A33, LBP_AL },
+ { 0x10A10, 0x10A35, LBP_AL },
{ 0x10A38, 0x10A3F, LBP_CM },
- { 0x10A40, 0x10A47, LBP_AL },
+ { 0x10A40, 0x10A48, LBP_AL },
{ 0x10A50, 0x10A57, LBP_BA },
{ 0x10A58, 0x10AE4, LBP_AL },
{ 0x10AE5, 0x10AE6, LBP_CM },
@@ -1918,7 +1935,12 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x10AF6, 0x10AF6, LBP_IN },
{ 0x10B00, 0x10B35, LBP_AL },
{ 0x10B39, 0x10B3F, LBP_BA },
- { 0x10B40, 0x10E7E, LBP_AL },
+ { 0x10B40, 0x10D23, LBP_AL },
+ { 0x10D24, 0x10D27, LBP_CM },
+ { 0x10D30, 0x10D39, LBP_NU },
+ { 0x10E60, 0x10F45, LBP_AL },
+ { 0x10F46, 0x10F50, LBP_CM },
+ { 0x10F51, 0x10FF6, LBP_AL },
{ 0x11000, 0x11002, LBP_CM },
{ 0x11003, 0x11037, LBP_AL },
{ 0x11038, 0x11046, LBP_CM },
@@ -1930,13 +1952,15 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x110B0, 0x110BA, LBP_CM },
{ 0x110BB, 0x110BD, LBP_AL },
{ 0x110BE, 0x110C1, LBP_BA },
- { 0x110D0, 0x110E8, LBP_AL },
+ { 0x110CD, 0x110E8, LBP_AL },
{ 0x110F0, 0x110F9, LBP_NU },
{ 0x11100, 0x11102, LBP_CM },
{ 0x11103, 0x11126, LBP_AL },
{ 0x11127, 0x11134, LBP_CM },
{ 0x11136, 0x1113F, LBP_NU },
{ 0x11140, 0x11143, LBP_BA },
+ { 0x11144, 0x11144, LBP_AL },
+ { 0x11145, 0x11146, LBP_CM },
{ 0x11150, 0x11172, LBP_AL },
{ 0x11173, 0x11173, LBP_CM },
{ 0x11174, 0x11174, LBP_AL },
@@ -1949,8 +1973,7 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x111C5, 0x111C6, LBP_BA },
{ 0x111C7, 0x111C7, LBP_AL },
{ 0x111C8, 0x111C8, LBP_BA },
- { 0x111C9, 0x111C9, LBP_AL },
- { 0x111CA, 0x111CC, LBP_CM },
+ { 0x111C9, 0x111CC, LBP_CM },
{ 0x111CD, 0x111CD, LBP_AL },
{ 0x111D0, 0x111D9, LBP_NU },
{ 0x111DA, 0x111DA, LBP_AL },
@@ -1971,7 +1994,7 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x112F0, 0x112F9, LBP_NU },
{ 0x11300, 0x11303, LBP_CM },
{ 0x11305, 0x11339, LBP_AL },
- { 0x1133C, 0x1133C, LBP_CM },
+ { 0x1133B, 0x1133C, LBP_CM },
{ 0x1133D, 0x1133D, LBP_AL },
{ 0x1133E, 0x1134D, LBP_CM },
{ 0x11350, 0x11350, LBP_AL },
@@ -1985,7 +2008,9 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x1144F, 0x1144F, LBP_AL },
{ 0x11450, 0x11459, LBP_NU },
{ 0x1145B, 0x1145B, LBP_BA },
- { 0x1145D, 0x114AF, LBP_AL },
+ { 0x1145D, 0x1145D, LBP_AL },
+ { 0x1145E, 0x1145E, LBP_CM },
+ { 0x1145F, 0x114AF, LBP_AL },
{ 0x114B0, 0x114C3, LBP_CM },
{ 0x114C4, 0x114C7, LBP_AL },
{ 0x114D0, 0x114D9, LBP_NU },
@@ -2006,15 +2031,44 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x11660, 0x1166C, LBP_BB },
{ 0x11680, 0x116AA, LBP_AL },
{ 0x116AB, 0x116B7, LBP_CM },
+ { 0x116B8, 0x116B8, LBP_AL },
{ 0x116C0, 0x116C9, LBP_NU },
{ 0x11700, 0x1172B, LBP_SA },
{ 0x11730, 0x11739, LBP_NU },
{ 0x1173A, 0x1173B, LBP_SA },
{ 0x1173C, 0x1173E, LBP_BA },
{ 0x1173F, 0x1173F, LBP_SA },
- { 0x118A0, 0x118DF, LBP_AL },
+ { 0x11800, 0x1182B, LBP_AL },
+ { 0x1182C, 0x1183A, LBP_CM },
+ { 0x1183B, 0x118DF, LBP_AL },
{ 0x118E0, 0x118E9, LBP_NU },
- { 0x118EA, 0x11C2E, LBP_AL },
+ { 0x118EA, 0x119D0, LBP_AL },
+ { 0x119D1, 0x119E0, LBP_CM },
+ { 0x119E1, 0x119E1, LBP_AL },
+ { 0x119E2, 0x119E2, LBP_BB },
+ { 0x119E3, 0x119E3, LBP_AL },
+ { 0x119E4, 0x119E4, LBP_CM },
+ { 0x11A00, 0x11A00, LBP_AL },
+ { 0x11A01, 0x11A0A, LBP_CM },
+ { 0x11A0B, 0x11A32, LBP_AL },
+ { 0x11A33, 0x11A39, LBP_CM },
+ { 0x11A3A, 0x11A3A, LBP_AL },
+ { 0x11A3B, 0x11A3E, LBP_CM },
+ { 0x11A3F, 0x11A3F, LBP_BB },
+ { 0x11A40, 0x11A40, LBP_AL },
+ { 0x11A41, 0x11A44, LBP_BA },
+ { 0x11A45, 0x11A45, LBP_BB },
+ { 0x11A46, 0x11A46, LBP_AL },
+ { 0x11A47, 0x11A47, LBP_CM },
+ { 0x11A50, 0x11A50, LBP_AL },
+ { 0x11A51, 0x11A5B, LBP_CM },
+ { 0x11A5C, 0x11A89, LBP_AL },
+ { 0x11A8A, 0x11A99, LBP_CM },
+ { 0x11A9A, 0x11A9C, LBP_BA },
+ { 0x11A9D, 0x11A9D, LBP_AL },
+ { 0x11A9E, 0x11AA0, LBP_BB },
+ { 0x11AA1, 0x11AA2, LBP_BA },
+ { 0x11AC0, 0x11C2E, LBP_AL },
{ 0x11C2F, 0x11C3F, LBP_CM },
{ 0x11C40, 0x11C40, LBP_AL },
{ 0x11C41, 0x11C45, LBP_BA },
@@ -2024,6 +2078,21 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x11C71, 0x11C71, LBP_EX },
{ 0x11C72, 0x11C8F, LBP_AL },
{ 0x11C92, 0x11CB6, LBP_CM },
+ { 0x11D00, 0x11D30, LBP_AL },
+ { 0x11D31, 0x11D45, LBP_CM },
+ { 0x11D46, 0x11D46, LBP_AL },
+ { 0x11D47, 0x11D47, LBP_CM },
+ { 0x11D50, 0x11D59, LBP_NU },
+ { 0x11D60, 0x11D89, LBP_AL },
+ { 0x11D8A, 0x11D97, LBP_CM },
+ { 0x11D98, 0x11D98, LBP_AL },
+ { 0x11DA0, 0x11DA9, LBP_NU },
+ { 0x11EE0, 0x11EF2, LBP_AL },
+ { 0x11EF3, 0x11EF6, LBP_CM },
+ { 0x11EF7, 0x11FDC, LBP_AL },
+ { 0x11FDD, 0x11FE0, LBP_PO },
+ { 0x11FE1, 0x11FF1, LBP_AL },
+ { 0x11FFF, 0x11FFF, LBP_BA },
{ 0x12000, 0x1246E, LBP_AL },
{ 0x12470, 0x12474, LBP_BA },
{ 0x12480, 0x13257, LBP_AL },
@@ -2039,7 +2108,11 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x1328A, 0x13378, LBP_AL },
{ 0x13379, 0x13379, LBP_OP },
{ 0x1337A, 0x1337B, LBP_CL },
- { 0x1337C, 0x145CD, LBP_AL },
+ { 0x1337C, 0x1342E, LBP_AL },
+ { 0x13430, 0x13436, LBP_GL },
+ { 0x13437, 0x13437, LBP_OP },
+ { 0x13438, 0x13438, LBP_CL },
+ { 0x14400, 0x145CD, LBP_AL },
{ 0x145CE, 0x145CE, LBP_OP },
{ 0x145CF, 0x145CF, LBP_CL },
{ 0x145D0, 0x16A5E, LBP_AL },
@@ -2055,11 +2128,17 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x16B44, 0x16B44, LBP_BA },
{ 0x16B45, 0x16B45, LBP_AL },
{ 0x16B50, 0x16B59, LBP_NU },
- { 0x16B5B, 0x16F50, LBP_AL },
+ { 0x16B5B, 0x16E96, LBP_AL },
+ { 0x16E97, 0x16E98, LBP_BA },
+ { 0x16E99, 0x16F4A, LBP_AL },
+ { 0x16F4F, 0x16F4F, LBP_CM },
+ { 0x16F50, 0x16F50, LBP_AL },
{ 0x16F51, 0x16F92, LBP_CM },
{ 0x16F93, 0x16F9F, LBP_AL },
- { 0x16FE0, 0x16FE0, LBP_NS },
- { 0x17000, 0x1B001, LBP_ID },
+ { 0x16FE0, 0x16FE3, LBP_NS },
+ { 0x17000, 0x1B11E, LBP_ID },
+ { 0x1B150, 0x1B167, LBP_CJ },
+ { 0x1B170, 0x1B2FB, LBP_ID },
{ 0x1BC00, 0x1BC9C, LBP_AL },
{ 0x1BC9D, 0x1BC9E, LBP_CM },
{ 0x1BC9F, 0x1BC9F, LBP_BA },
@@ -2088,22 +2167,34 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x1DA87, 0x1DA8A, LBP_BA },
{ 0x1DA8B, 0x1DA8B, LBP_AL },
{ 0x1DA9B, 0x1E02A, LBP_CM },
+ { 0x1E100, 0x1E12C, LBP_AL },
+ { 0x1E130, 0x1E136, LBP_CM },
+ { 0x1E137, 0x1E13D, LBP_AL },
+ { 0x1E140, 0x1E149, LBP_NU },
+ { 0x1E14E, 0x1E2EB, LBP_AL },
+ { 0x1E2EC, 0x1E2EF, LBP_CM },
+ { 0x1E2F0, 0x1E2F9, LBP_NU },
+ { 0x1E2FF, 0x1E2FF, LBP_PR },
{ 0x1E800, 0x1E8CF, LBP_AL },
{ 0x1E8D0, 0x1E8D6, LBP_CM },
{ 0x1E900, 0x1E943, LBP_AL },
{ 0x1E944, 0x1E94A, LBP_CM },
+ { 0x1E94B, 0x1E94B, LBP_AL },
{ 0x1E950, 0x1E959, LBP_NU },
{ 0x1E95E, 0x1E95F, LBP_OP },
- { 0x1EE00, 0x1EEF1, LBP_AL },
+ { 0x1EC71, 0x1ECAB, LBP_AL },
+ { 0x1ECAC, 0x1ECAC, LBP_PO },
+ { 0x1ECAD, 0x1ECAF, LBP_AL },
+ { 0x1ECB0, 0x1ECB0, LBP_PO },
+ { 0x1ECB1, 0x1EEF1, LBP_AL },
{ 0x1F000, 0x1F0FF, LBP_ID },
{ 0x1F100, 0x1F10C, LBP_AI },
{ 0x1F10D, 0x1F10F, LBP_ID },
{ 0x1F110, 0x1F12D, LBP_AI },
- { 0x1F12E, 0x1F12E, LBP_AL },
- { 0x1F12F, 0x1F12F, LBP_ID },
+ { 0x1F12E, 0x1F12F, LBP_AL },
{ 0x1F130, 0x1F169, LBP_AI },
- { 0x1F16A, 0x1F16B, LBP_AL },
- { 0x1F16C, 0x1F16F, LBP_ID },
+ { 0x1F16A, 0x1F16C, LBP_AL },
+ { 0x1F16D, 0x1F16F, LBP_ID },
{ 0x1F170, 0x1F1AC, LBP_AI },
{ 0x1F1AD, 0x1F1E5, LBP_ID },
{ 0x1F1E6, 0x1F1FF, LBP_RI },
@@ -2115,29 +2206,31 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x1F3B5, 0x1F3B6, LBP_AL },
{ 0x1F3B7, 0x1F3BB, LBP_ID },
{ 0x1F3BC, 0x1F3BC, LBP_AL },
- { 0x1F3BD, 0x1F3C2, LBP_ID },
- { 0x1F3C3, 0x1F3C4, LBP_EB },
- { 0x1F3C5, 0x1F3C9, LBP_ID },
- { 0x1F3CA, 0x1F3CB, LBP_EB },
- { 0x1F3CC, 0x1F3FA, LBP_ID },
+ { 0x1F3BD, 0x1F3C1, LBP_ID },
+ { 0x1F3C2, 0x1F3C4, LBP_EB },
+ { 0x1F3C5, 0x1F3C6, LBP_ID },
+ { 0x1F3C7, 0x1F3C7, LBP_EB },
+ { 0x1F3C8, 0x1F3C9, LBP_ID },
+ { 0x1F3CA, 0x1F3CC, LBP_EB },
+ { 0x1F3CD, 0x1F3FA, LBP_ID },
{ 0x1F3FB, 0x1F3FF, LBP_EM },
{ 0x1F400, 0x1F441, LBP_ID },
{ 0x1F442, 0x1F443, LBP_EB },
{ 0x1F444, 0x1F445, LBP_ID },
{ 0x1F446, 0x1F450, LBP_EB },
{ 0x1F451, 0x1F465, LBP_ID },
- { 0x1F466, 0x1F469, LBP_EB },
- { 0x1F46A, 0x1F46D, LBP_ID },
- { 0x1F46E, 0x1F46E, LBP_EB },
- { 0x1F46F, 0x1F46F, LBP_ID },
- { 0x1F470, 0x1F478, LBP_EB },
+ { 0x1F466, 0x1F478, LBP_EB },
{ 0x1F479, 0x1F47B, LBP_ID },
{ 0x1F47C, 0x1F47C, LBP_EB },
{ 0x1F47D, 0x1F480, LBP_ID },
{ 0x1F481, 0x1F483, LBP_EB },
{ 0x1F484, 0x1F484, LBP_ID },
{ 0x1F485, 0x1F487, LBP_EB },
- { 0x1F488, 0x1F49F, LBP_ID },
+ { 0x1F488, 0x1F48E, LBP_ID },
+ { 0x1F48F, 0x1F48F, LBP_EB },
+ { 0x1F490, 0x1F490, LBP_ID },
+ { 0x1F491, 0x1F491, LBP_EB },
+ { 0x1F492, 0x1F49F, LBP_ID },
{ 0x1F4A0, 0x1F4A0, LBP_AL },
{ 0x1F4A1, 0x1F4A1, LBP_ID },
{ 0x1F4A2, 0x1F4A2, LBP_AL },
@@ -2155,8 +2248,8 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x1F517, 0x1F524, LBP_AL },
{ 0x1F525, 0x1F531, LBP_ID },
{ 0x1F532, 0x1F549, LBP_AL },
- { 0x1F54A, 0x1F574, LBP_ID },
- { 0x1F575, 0x1F575, LBP_EB },
+ { 0x1F54A, 0x1F573, LBP_ID },
+ { 0x1F574, 0x1F575, LBP_EB },
{ 0x1F576, 0x1F579, LBP_ID },
{ 0x1F57A, 0x1F57A, LBP_EB },
{ 0x1F57B, 0x1F58F, LBP_ID },
@@ -2181,7 +2274,9 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x1F6B4, 0x1F6B6, LBP_EB },
{ 0x1F6B7, 0x1F6BF, LBP_ID },
{ 0x1F6C0, 0x1F6C0, LBP_EB },
- { 0x1F6C1, 0x1F6FF, LBP_ID },
+ { 0x1F6C1, 0x1F6CB, LBP_ID },
+ { 0x1F6CC, 0x1F6CC, LBP_EB },
+ { 0x1F6CD, 0x1F6FF, LBP_ID },
{ 0x1F700, 0x1F773, LBP_AL },
{ 0x1F774, 0x1F77F, LBP_ID },
{ 0x1F780, 0x1F7D4, LBP_AL },
@@ -2195,17 +2290,31 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x1F860, 0x1F887, LBP_AL },
{ 0x1F888, 0x1F88F, LBP_ID },
{ 0x1F890, 0x1F8AD, LBP_AL },
- { 0x1F8AE, 0x1F917, LBP_ID },
- { 0x1F918, 0x1F91E, LBP_EB },
- { 0x1F91F, 0x1F925, LBP_ID },
+ { 0x1F8AE, 0x1F8FF, LBP_ID },
+ { 0x1F900, 0x1F90B, LBP_AL },
+ { 0x1F90C, 0x1F90E, LBP_ID },
+ { 0x1F90F, 0x1F90F, LBP_EB },
+ { 0x1F910, 0x1F917, LBP_ID },
+ { 0x1F918, 0x1F91F, LBP_EB },
+ { 0x1F920, 0x1F925, LBP_ID },
{ 0x1F926, 0x1F926, LBP_EB },
{ 0x1F927, 0x1F92F, LBP_ID },
- { 0x1F930, 0x1F930, LBP_EB },
- { 0x1F931, 0x1F932, LBP_ID },
- { 0x1F933, 0x1F939, LBP_EB },
+ { 0x1F930, 0x1F939, LBP_EB },
{ 0x1F93A, 0x1F93B, LBP_ID },
{ 0x1F93C, 0x1F93E, LBP_EB },
- { 0x1F93F, 0x3FFFD, LBP_ID },
+ { 0x1F93F, 0x1F9B4, LBP_ID },
+ { 0x1F9B5, 0x1F9B6, LBP_EB },
+ { 0x1F9B7, 0x1F9B7, LBP_ID },
+ { 0x1F9B8, 0x1F9B9, LBP_EB },
+ { 0x1F9BA, 0x1F9BA, LBP_ID },
+ { 0x1F9BB, 0x1F9BB, LBP_EB },
+ { 0x1F9BC, 0x1F9CC, LBP_ID },
+ { 0x1F9CD, 0x1F9CF, LBP_EB },
+ { 0x1F9D0, 0x1F9D0, LBP_ID },
+ { 0x1F9D1, 0x1F9DD, LBP_EB },
+ { 0x1F9DE, 0x1F9FF, LBP_ID },
+ { 0x1FA00, 0x1FA53, LBP_AL },
+ { 0x1FA54, 0x3FFFD, LBP_ID },
{ 0xE0001, 0xE01EF, LBP_CM },
{ 0xF0000, 0x10FFFD, LBP_XX },
{ 0xFFFFFFFF, 0xFFFFFFFF, LBP_Undefined }
diff --git a/src/static_libs/libunibreak/linebreakdef.c b/src/static_libs/libunibreak/linebreakdef.c
index 6b485cecbd..847621ed95 100644
--- a/src/static_libs/libunibreak/linebreakdef.c
+++ b/src/static_libs/libunibreak/linebreakdef.c
@@ -4,7 +4,7 @@
* Line breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
- * Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com>
+ * Copyright (C) 2008-2018 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@@ -22,20 +22,6 @@
* not be misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source
* distribution.
- *
- * The main reference is Unicode Standard Annex 14 (UAX #14):
- * <URL:http://www.unicode.org/reports/tr14/>
- *
- * When this library was designed, this annex was at Revision 19, for
- * Unicode 5.0.0:
- * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
- *
- * This library has been updated according to Revision 37, for
- * Unicode 9.0.0:
- * <URL:http://www.unicode.org/reports/tr14/tr14-37.html>
- *
- * The Unicode Terms of Use are available at
- * <URL:http://www.unicode.org/copyright.html>
*/
/**
@@ -66,6 +52,7 @@ static const struct LineBreakProperties lb_prop_German[] = {
{ 0x00AB, 0x00AB, LBP_CL }, /* Left double angle quotation mark: closing */
{ 0x00BB, 0x00BB, LBP_OP }, /* Right double angle quotation mark: opening */
{ 0x2018, 0x2018, LBP_CL }, /* Left single quotation mark: closing */
+ { 0x2019, 0x2019, LBP_GL }, /* Right single quotation mark: glue */
{ 0x201C, 0x201C, LBP_CL }, /* Left double quotation mark: closing */
{ 0x2039, 0x2039, LBP_CL }, /* Left single angle quotation mark: closing */
{ 0x203A, 0x203A, LBP_OP }, /* Right single angle quotation mark: opening */
diff --git a/src/static_libs/libunibreak/linebreakdef.h b/src/static_libs/libunibreak/linebreakdef.h
index 37ec7b546e..48d714ef29 100644
--- a/src/static_libs/libunibreak/linebreakdef.h
+++ b/src/static_libs/libunibreak/linebreakdef.h
@@ -4,7 +4,7 @@
* Line breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
- * Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com>
+ * Copyright (C) 2008-2018 Wu Yongwei <wuyongwei at gmail dot com>
* Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
@@ -31,9 +31,9 @@
* Unicode 5.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
*
- * This library has been updated according to Revision 37, for
- * Unicode 9.0.0:
- * <URL:http://www.unicode.org/reports/tr14/tr14-37.html>
+ * This library has been updated according to Revision 43, for
+ * Unicode 12.0.0:
+ * <URL:http://www.unicode.org/reports/tr14/tr14-43.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@@ -52,8 +52,8 @@
#include "unibreakdef.h"
/**
- * Line break classes. This is a direct mapping of Table 1 of Unicode
- * Standard Annex 14, Revision 26.
+ * Line break classes. This is a mapping of Table 1 of Unicode
+ * Standard Annex 14.
*/
enum LineBreakClass
{
@@ -95,7 +95,7 @@ enum LineBreakClass
LBP_ZWJ, /**< Zero width joiner */
/* The following break class is treated in the pair table, but it is
- * not part of Table 2 of UAX #14. */
+ * not part of Table 2 of UAX #14-37. */
LBP_CB, /**< Contingent break */
/* The following break classes are not treated in the pair table */
@@ -117,8 +117,8 @@ enum LineBreakClass
*/
struct LineBreakProperties
{
- utf32_t start; /**< Starting coding point */
- utf32_t end; /**< End coding point */
+ utf32_t start; /**< Start codepoint */
+ utf32_t end; /**< End codepoint, inclusive */
enum LineBreakClass prop; /**< The line breaking property */
};
@@ -140,11 +140,14 @@ struct LineBreakPropertiesLang
struct LineBreakContext
{
const char *lang; /**< Language name */
- const struct LineBreakProperties *lbpLang;/**< Pointer to LineBreakProperties */
+ const struct LineBreakProperties *lbpLang; /**< Pointer to
+ LineBreakProperties */
enum LineBreakClass lbcCur; /**< Breaking class of current codepoint */
enum LineBreakClass lbcNew; /**< Breaking class of next codepoint */
enum LineBreakClass lbcLast; /**< Breaking class of last codepoint */
- int fLb21aHebrew; /**< Flag for Hebrew letters (LB21a) */
+ bool fLb8aZwj; /**< Flag for ZWJ (LB8a) */
+ bool fLb10LeadSpace; /**< Flag for leading space (LB10) */
+ bool fLb21aHebrew; /**< Flag for Hebrew letters (LB21a) */
int cLb30aRI; /**< Count of RI characters (LB30a) */
};
diff --git a/src/static_libs/libunibreak/meson.build b/src/static_libs/libunibreak/meson.build
index a2d5c3e3f9..cca9c1f6a0 100644
--- a/src/static_libs/libunibreak/meson.build
+++ b/src/static_libs/libunibreak/meson.build
@@ -15,6 +15,8 @@ libunibreak_src = [
'graphemebreak.c',
'graphemebreak.h',
'graphemebreakdef.h',
+ 'emojidef.h',
+ 'emojidef.c',
]
libunibreak_lib = static_library('libunibreak',
diff --git a/src/static_libs/libunibreak/unibreakbase.c b/src/static_libs/libunibreak/unibreakbase.c
index 686852a990..ef24c90047 100644
--- a/src/static_libs/libunibreak/unibreakbase.c
+++ b/src/static_libs/libunibreak/unibreakbase.c
@@ -4,7 +4,7 @@
* Break processing in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
- * Copyright (C) 2015-2016 Wu Yongwei <wuyongwei at gmail dot com>
+ * Copyright (C) 2015-2018 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
diff --git a/src/static_libs/libunibreak/unibreakbase.h b/src/static_libs/libunibreak/unibreakbase.h
index ff9a6ce8a9..a00a5bdb6b 100644
--- a/src/static_libs/libunibreak/unibreakbase.h
+++ b/src/static_libs/libunibreak/unibreakbase.h
@@ -4,7 +4,7 @@
* Break processing in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
- * Copyright (C) 2015-2016 Wu Yongwei <wuyongwei at gmail dot com>
+ * Copyright (C) 2015-2019 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@@ -39,7 +39,7 @@
extern "C" {
#endif
-#define UNIBREAK_VERSION 0x0400 /**< Version of the library linebreak */
+#define UNIBREAK_VERSION 0x0402 /**< Version of the libunibreak */
extern const int unibreak_version;
#ifndef UNIBREAK_UTF_TYPES_DEFINED
diff --git a/src/static_libs/libunibreak/unibreakdef.h b/src/static_libs/libunibreak/unibreakdef.h
index e13016d8cd..5f3533e5dd 100644
--- a/src/static_libs/libunibreak/unibreakdef.h
+++ b/src/static_libs/libunibreak/unibreakdef.h
@@ -4,7 +4,7 @@
* Break processing in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
- * Copyright (C) 2015-2016 Wu Yongwei <wuyongwei at gmail dot com>
+ * Copyright (C) 2015-2018 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@@ -35,9 +35,19 @@
#ifndef UNIBREAKDEF_H
#define UNIBREAKDEF_H
+#if defined(_MSC_VER) && _MSC_VER < 1800
+typedef int bool;
+#define false 0
+#define true 1
+#else
+#include <stdbool.h>
+#endif
+
#include <stddef.h>
#include "unibreakbase.h"
+#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
+
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/src/static_libs/libunibreak/wordbreak.c b/src/static_libs/libunibreak/wordbreak.c
index 50c830c7cc..d4e22495c6 100644
--- a/src/static_libs/libunibreak/wordbreak.c
+++ b/src/static_libs/libunibreak/wordbreak.c
@@ -4,7 +4,8 @@
* Word breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
- * Copyright (C) 2013-2016 Tom Hacohen <tom at stosb dot com>
+ * Copyright (C) 2013-2019 Tom Hacohen <tom at stosb dot com>
+ * Copyright (C) 2018 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@@ -30,9 +31,9 @@
* Unicode 6.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
*
- * This library has been updated according to Revision 29, for
- * Unicode 9.0.0:
- * <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
+ * This library has been updated according to Revision 35, for
+ * Unicode 12.0.0:
+ * <URL:http://www.unicode.org/reports/tr29/tr29-35.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@@ -53,8 +54,7 @@
#include "unibreakdef.h"
#include "wordbreak.h"
#include "wordbreakdata.c"
-
-#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
+#include "emojidef.h"
/**
* Initializes the wordbreak internals. It currently does nothing, but
@@ -215,7 +215,7 @@ static void set_wordbreaks(
#if __has_attribute(fallthrough)
__attribute__((fallthrough));
#endif
- /* Fall off */
+ /* Fall through */
case WBP_Newline:
/* WB3a,3b */
@@ -225,24 +225,6 @@ static void set_wordbreaks(
posLast = posCur;
break;
- case WBP_E_Base_GAZ:
- case WBP_Glue_After_Zwj:
- /* WB3c */
- if (wbcLast == WBP_ZWJ)
- {
- set_brks_to(s, brks, posLast, posCur, len,
- WORDBREAK_NOBREAK, get_next_char);
- }
- /* No rule found, reset */
- else
- {
- set_brks_to(s, brks, posLast, posCur, len,
- WORDBREAK_BREAK, get_next_char);
- }
- wbcSeqStart = wbcCur;
- posLast = posCur;
- break;
-
case WBP_ZWJ:
case WBP_Extend:
case WBP_Format:
@@ -260,8 +242,10 @@ static void set_wordbreaks(
{
/* It's surely not the first */
brks[posCur - 1] = WORDBREAK_NOBREAK;
- /* WB3c precedes 4, so no intervening Extend chars allowed. */
- if (wbcSeqStart != WBP_ZWJ)
+ /* WB3c and WB3d precede 4, so no intervening Extend
+ * chars allowed. */
+ if (wbcCur != WBP_ZWJ && wbcSeqStart != WBP_ZWJ &&
+ wbcSeqStart != WBP_WSegSpace)
{
/* "inherit" the previous class. */
wbcCur = wbcLast;
@@ -334,7 +318,8 @@ static void set_wordbreaks(
#if __has_attribute(fallthrough)
__attribute__((fallthrough));
#endif
- /* No break on purpose */
+ /* Fall through */
+
case WBP_MidNumLet:
if (((wbcLast == WBP_ALetter) ||
(wbcLast == WBP_Hebrew_Letter)) || /* WB6,7 */
@@ -421,32 +406,6 @@ static void set_wordbreaks(
posLast = posCur;
break;
- case WBP_E_Base:
- /* No rule found, reset */
- set_brks_to(s, brks, posLast, posCur, len,
- WORDBREAK_BREAK, get_next_char);
- wbcSeqStart = wbcCur;
- posLast = posCur;
- break;
-
- case WBP_E_Modifier:
- /* WB14 */
- if ((wbcLast == WBP_E_Base) ||
- (wbcLast == WBP_E_Base_GAZ))
- {
- set_brks_to(s, brks, posLast, posCur, len,
- WORDBREAK_NOBREAK, get_next_char);
- }
- /* No rule found, reset */
- else
- {
- set_brks_to(s, brks, posLast, posCur, len,
- WORDBREAK_BREAK, get_next_char);
- }
- wbcSeqStart = wbcCur;
- posLast = posCur;
- break;
-
case WBP_Regional_Indicator:
/* WB15,16 */
if ((wbcSeqStart == WBP_Regional_Indicator) &&
@@ -481,7 +440,32 @@ static void set_wordbreaks(
}
break;
+ case WBP_WSegSpace:
+ if (wbcLast == WBP_WSegSpace) /* WB3d */
+ {
+ set_brks_to(s, brks, posLast, posCur, len,
+ WORDBREAK_NOBREAK, get_next_char);
+ posLast = posCur;
+ break;
+ }
+#ifndef __has_attribute
+# define __has_attribute(x) 0
+#endif
+#if __has_attribute(fallthrough)
+ __attribute__((fallthrough));
+#endif
+ /* Fall through */
+
case WBP_Any:
+ /* Check for rule WB3c */
+ if (wbcLast == WBP_ZWJ && ub_is_extended_pictographic(ch))
+ {
+ set_brks_to(s, brks, posLast, posCur, len,
+ WORDBREAK_NOBREAK, get_next_char);
+ posLast = posCur;
+ break;
+ }
+
/* Allow breaks and reset */
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_BREAK, get_next_char);
diff --git a/src/static_libs/libunibreak/wordbreak.h b/src/static_libs/libunibreak/wordbreak.h
index 1040c13280..021de4d751 100644
--- a/src/static_libs/libunibreak/wordbreak.h
+++ b/src/static_libs/libunibreak/wordbreak.h
@@ -4,7 +4,8 @@
* Word breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
- * Copyright (C) 2013-2016 Tom Hacohen <tom at stosb dot com>
+ * Copyright (C) 2013-2019 Tom Hacohen <tom at stosb dot com>
+ * Copyright (C) 2018 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@@ -30,9 +31,9 @@
* Unicode 6.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
*
- * This library has been updated according to Revision 29, for
- * Unicode 9.0.0:
- * <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
+ * This library has been updated according to Revision 35, for
+ * Unicode 12.0.0:
+ * <URL:http://www.unicode.org/reports/tr29/tr29-35.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@@ -72,4 +73,4 @@ void set_wordbreaks_utf32(
}
#endif
-#endif
+#endif /* WORDBREAK_H */
diff --git a/src/static_libs/libunibreak/wordbreakdata.c b/src/static_libs/libunibreak/wordbreakdata.c
index 99fcff5bad..f5ee889589 100644
--- a/src/static_libs/libunibreak/wordbreakdata.c
+++ b/src/static_libs/libunibreak/wordbreakdata.c
@@ -1,6 +1,6 @@
/* The content of this file is generated from:
-# WordBreakProperty-9.0.0.txt
-# Date: 2016-06-01, 10:34:38 GMT
+# WordBreakProperty-12.1.0.txt
+# Date: 2019-03-10, 10:53:28 GMT
*/
#include "wordbreakdef.h"
@@ -9,6 +9,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x000A, 0x000A, WBP_LF},
{0x000B, 0x000C, WBP_Newline},
{0x000D, 0x000D, WBP_CR},
+ {0x0020, 0x0020, WBP_WSegSpace},
{0x0022, 0x0022, WBP_Double_Quote},
{0x0027, 0x0027, WBP_Single_Quote},
{0x002C, 0x002C, WBP_MidNum},
@@ -35,11 +36,15 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x0294, 0x0294, WBP_ALetter},
{0x0295, 0x02AF, WBP_ALetter},
{0x02B0, 0x02C1, WBP_ALetter},
+ {0x02C2, 0x02C5, WBP_ALetter},
{0x02C6, 0x02D1, WBP_ALetter},
- {0x02D7, 0x02D7, WBP_MidLetter},
+ {0x02D2, 0x02D7, WBP_ALetter},
+ {0x02DE, 0x02DF, WBP_ALetter},
{0x02E0, 0x02E4, WBP_ALetter},
{0x02EC, 0x02EC, WBP_ALetter},
+ {0x02ED, 0x02ED, WBP_ALetter},
{0x02EE, 0x02EE, WBP_ALetter},
+ {0x02EF, 0x02FF, WBP_ALetter},
{0x0300, 0x036F, WBP_Extend},
{0x0370, 0x0373, WBP_ALetter},
{0x0374, 0x0374, WBP_ALetter},
@@ -60,7 +65,9 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x048A, 0x052F, WBP_ALetter},
{0x0531, 0x0556, WBP_ALetter},
{0x0559, 0x0559, WBP_ALetter},
- {0x0561, 0x0587, WBP_ALetter},
+ {0x055B, 0x055C, WBP_ALetter},
+ {0x055E, 0x055E, WBP_ALetter},
+ {0x0560, 0x0588, WBP_ALetter},
{0x0589, 0x0589, WBP_MidNum},
{0x0591, 0x05BD, WBP_Extend},
{0x05BF, 0x05BF, WBP_Extend},
@@ -68,7 +75,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x05C4, 0x05C5, WBP_Extend},
{0x05C7, 0x05C7, WBP_Extend},
{0x05D0, 0x05EA, WBP_Hebrew_Letter},
- {0x05F0, 0x05F2, WBP_Hebrew_Letter},
+ {0x05EF, 0x05F2, WBP_Hebrew_Letter},
{0x05F3, 0x05F3, WBP_ALetter},
{0x05F4, 0x05F4, WBP_MidLetter},
{0x0600, 0x0605, WBP_Format},
@@ -110,6 +117,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x07F4, 0x07F5, WBP_ALetter},
{0x07F8, 0x07F8, WBP_MidNum},
{0x07FA, 0x07FA, WBP_ALetter},
+ {0x07FD, 0x07FD, WBP_Extend},
{0x0800, 0x0815, WBP_ALetter},
{0x0816, 0x0819, WBP_Extend},
{0x081A, 0x081A, WBP_ALetter},
@@ -120,9 +128,10 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x0829, 0x082D, WBP_Extend},
{0x0840, 0x0858, WBP_ALetter},
{0x0859, 0x085B, WBP_Extend},
+ {0x0860, 0x086A, WBP_ALetter},
{0x08A0, 0x08B4, WBP_ALetter},
{0x08B6, 0x08BD, WBP_ALetter},
- {0x08D4, 0x08E1, WBP_Extend},
+ {0x08D3, 0x08E1, WBP_Extend},
{0x08E2, 0x08E2, WBP_Format},
{0x08E3, 0x0902, WBP_Extend},
{0x0903, 0x0903, WBP_Extend},
@@ -165,6 +174,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x09E2, 0x09E3, WBP_Extend},
{0x09E6, 0x09EF, WBP_Numeric},
{0x09F0, 0x09F1, WBP_ALetter},
+ {0x09FC, 0x09FC, WBP_ALetter},
+ {0x09FE, 0x09FE, WBP_Extend},
{0x0A01, 0x0A02, WBP_Extend},
{0x0A03, 0x0A03, WBP_Extend},
{0x0A05, 0x0A0A, WBP_ALetter},
@@ -207,6 +218,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x0AE2, 0x0AE3, WBP_Extend},
{0x0AE6, 0x0AEF, WBP_Numeric},
{0x0AF9, 0x0AF9, WBP_ALetter},
+ {0x0AFA, 0x0AFF, WBP_Extend},
{0x0B01, 0x0B01, WBP_Extend},
{0x0B02, 0x0B03, WBP_Extend},
{0x0B05, 0x0B0C, WBP_ALetter},
@@ -253,6 +265,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x0BE6, 0x0BEF, WBP_Numeric},
{0x0C00, 0x0C00, WBP_Extend},
{0x0C01, 0x0C03, WBP_Extend},
+ {0x0C04, 0x0C04, WBP_Extend},
{0x0C05, 0x0C0C, WBP_ALetter},
{0x0C0E, 0x0C10, WBP_ALetter},
{0x0C12, 0x0C28, WBP_ALetter},
@@ -290,11 +303,12 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x0CE2, 0x0CE3, WBP_Extend},
{0x0CE6, 0x0CEF, WBP_Numeric},
{0x0CF1, 0x0CF2, WBP_ALetter},
- {0x0D01, 0x0D01, WBP_Extend},
+ {0x0D00, 0x0D01, WBP_Extend},
{0x0D02, 0x0D03, WBP_Extend},
{0x0D05, 0x0D0C, WBP_ALetter},
{0x0D0E, 0x0D10, WBP_ALetter},
{0x0D12, 0x0D3A, WBP_ALetter},
+ {0x0D3B, 0x0D3C, WBP_Extend},
{0x0D3D, 0x0D3D, WBP_ALetter},
{0x0D3E, 0x0D40, WBP_Extend},
{0x0D41, 0x0D44, WBP_Extend},
@@ -326,8 +340,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x0E47, 0x0E4E, WBP_Extend},
{0x0E50, 0x0E59, WBP_Numeric},
{0x0EB1, 0x0EB1, WBP_Extend},
- {0x0EB4, 0x0EB9, WBP_Extend},
- {0x0EBB, 0x0EBC, WBP_Extend},
+ {0x0EB4, 0x0EBC, WBP_Extend},
{0x0EC8, 0x0ECD, WBP_Extend},
{0x0ED0, 0x0ED9, WBP_Numeric},
{0x0F00, 0x0F00, WBP_ALetter},
@@ -376,7 +389,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x10CD, 0x10CD, WBP_ALetter},
{0x10D0, 0x10FA, WBP_ALetter},
{0x10FC, 0x10FC, WBP_ALetter},
- {0x10FD, 0x1248, WBP_ALetter},
+ {0x10FD, 0x10FF, WBP_ALetter},
+ {0x1100, 0x1248, WBP_ALetter},
{0x124A, 0x124D, WBP_ALetter},
{0x1250, 0x1256, WBP_ALetter},
{0x1258, 0x1258, WBP_ALetter},
@@ -398,6 +412,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x13F8, 0x13FD, WBP_ALetter},
{0x1401, 0x166C, WBP_ALetter},
{0x166F, 0x167F, WBP_ALetter},
+ {0x1680, 0x1680, WBP_WSegSpace},
{0x1681, 0x169A, WBP_ALetter},
{0x16A0, 0x16EA, WBP_ALetter},
{0x16EE, 0x16F0, WBP_ALetter},
@@ -426,7 +441,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x1810, 0x1819, WBP_Numeric},
{0x1820, 0x1842, WBP_ALetter},
{0x1843, 0x1843, WBP_ALetter},
- {0x1844, 0x1877, WBP_ALetter},
+ {0x1844, 0x1878, WBP_ALetter},
{0x1880, 0x1884, WBP_ALetter},
{0x1885, 0x1886, WBP_Extend},
{0x1887, 0x18A8, WBP_ALetter},
@@ -509,24 +524,27 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x1C5A, 0x1C77, WBP_ALetter},
{0x1C78, 0x1C7D, WBP_ALetter},
{0x1C80, 0x1C88, WBP_ALetter},
+ {0x1C90, 0x1CBA, WBP_ALetter},
+ {0x1CBD, 0x1CBF, WBP_ALetter},
{0x1CD0, 0x1CD2, WBP_Extend},
{0x1CD4, 0x1CE0, WBP_Extend},
{0x1CE1, 0x1CE1, WBP_Extend},
{0x1CE2, 0x1CE8, WBP_Extend},
{0x1CE9, 0x1CEC, WBP_ALetter},
{0x1CED, 0x1CED, WBP_Extend},
- {0x1CEE, 0x1CF1, WBP_ALetter},
- {0x1CF2, 0x1CF3, WBP_Extend},
+ {0x1CEE, 0x1CF3, WBP_ALetter},
{0x1CF4, 0x1CF4, WBP_Extend},
{0x1CF5, 0x1CF6, WBP_ALetter},
+ {0x1CF7, 0x1CF7, WBP_Extend},
{0x1CF8, 0x1CF9, WBP_Extend},
+ {0x1CFA, 0x1CFA, WBP_ALetter},
{0x1D00, 0x1D2B, WBP_ALetter},
{0x1D2C, 0x1D6A, WBP_ALetter},
{0x1D6B, 0x1D77, WBP_ALetter},
{0x1D78, 0x1D78, WBP_ALetter},
{0x1D79, 0x1D9A, WBP_ALetter},
{0x1D9B, 0x1DBF, WBP_ALetter},
- {0x1DC0, 0x1DF5, WBP_Extend},
+ {0x1DC0, 0x1DF9, WBP_Extend},
{0x1DFB, 0x1DFF, WBP_Extend},
{0x1E00, 0x1F15, WBP_ALetter},
{0x1F18, 0x1F1D, WBP_ALetter},
@@ -547,6 +565,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x1FE0, 0x1FEC, WBP_ALetter},
{0x1FF2, 0x1FF4, WBP_ALetter},
{0x1FF6, 0x1FFC, WBP_ALetter},
+ {0x2000, 0x2006, WBP_WSegSpace},
+ {0x2008, 0x200A, WBP_WSegSpace},
{0x200C, 0x200C, WBP_Extend},
{0x200D, 0x200D, WBP_ZWJ},
{0x200E, 0x200F, WBP_Format},
@@ -561,6 +581,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x203F, 0x2040, WBP_ExtendNumLet},
{0x2044, 0x2044, WBP_MidNum},
{0x2054, 0x2054, WBP_ExtendNumLet},
+ {0x205F, 0x205F, WBP_WSegSpace},
{0x2060, 0x2064, WBP_Format},
{0x2066, 0x206F, WBP_Format},
{0x2071, 0x2071, WBP_ALetter},
@@ -590,10 +611,6 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x2183, 0x2184, WBP_ALetter},
{0x2185, 0x2188, WBP_ALetter},
{0x24B6, 0x24E9, WBP_ALetter},
- {0x261D, 0x261D, WBP_E_Base},
- {0x26F9, 0x26F9, WBP_E_Base},
- {0x270A, 0x270D, WBP_E_Base},
- {0x2764, 0x2764, WBP_Glue_After_Zwj},
{0x2C00, 0x2C2E, WBP_ALetter},
{0x2C30, 0x2C5E, WBP_ALetter},
{0x2C60, 0x2C7B, WBP_ALetter},
@@ -619,6 +636,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x2DD8, 0x2DDE, WBP_ALetter},
{0x2DE0, 0x2DFF, WBP_Extend},
{0x2E2F, 0x2E2F, WBP_ALetter},
+ {0x3000, 0x3000, WBP_WSegSpace},
{0x3005, 0x3005, WBP_ALetter},
{0x302A, 0x302D, WBP_Extend},
{0x302E, 0x302F, WBP_Extend},
@@ -631,7 +649,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x30A1, 0x30FA, WBP_Katakana},
{0x30FC, 0x30FE, WBP_Katakana},
{0x30FF, 0x30FF, WBP_Katakana},
- {0x3105, 0x312D, WBP_ALetter},
+ {0x3105, 0x312F, WBP_ALetter},
{0x3131, 0x318E, WBP_ALetter},
{0x31A0, 0x31BA, WBP_ALetter},
{0x31F0, 0x31FF, WBP_Katakana},
@@ -660,14 +678,16 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0xA6E6, 0xA6EF, WBP_ALetter},
{0xA6F0, 0xA6F1, WBP_Extend},
{0xA717, 0xA71F, WBP_ALetter},
+ {0xA720, 0xA721, WBP_ALetter},
{0xA722, 0xA76F, WBP_ALetter},
{0xA770, 0xA770, WBP_ALetter},
{0xA771, 0xA787, WBP_ALetter},
{0xA788, 0xA788, WBP_ALetter},
+ {0xA789, 0xA78A, WBP_ALetter},
{0xA78B, 0xA78E, WBP_ALetter},
{0xA78F, 0xA78F, WBP_ALetter},
- {0xA790, 0xA7AE, WBP_ALetter},
- {0xA7B0, 0xA7B7, WBP_ALetter},
+ {0xA790, 0xA7BF, WBP_ALetter},
+ {0xA7C2, 0xA7C6, WBP_ALetter},
{0xA7F7, 0xA7F7, WBP_ALetter},
{0xA7F8, 0xA7F9, WBP_ALetter},
{0xA7FA, 0xA7FA, WBP_ALetter},
@@ -690,7 +710,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0xA8E0, 0xA8F1, WBP_Extend},
{0xA8F2, 0xA8F7, WBP_ALetter},
{0xA8FB, 0xA8FB, WBP_ALetter},
- {0xA8FD, 0xA8FD, WBP_ALetter},
+ {0xA8FD, 0xA8FE, WBP_ALetter},
+ {0xA8FF, 0xA8FF, WBP_Extend},
{0xA900, 0xA909, WBP_Numeric},
{0xA90A, 0xA925, WBP_ALetter},
{0xA926, 0xA92D, WBP_Extend},
@@ -705,8 +726,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0xA9B4, 0xA9B5, WBP_Extend},
{0xA9B6, 0xA9B9, WBP_Extend},
{0xA9BA, 0xA9BB, WBP_Extend},
- {0xA9BC, 0xA9BC, WBP_Extend},
- {0xA9BD, 0xA9C0, WBP_Extend},
+ {0xA9BC, 0xA9BD, WBP_Extend},
+ {0xA9BE, 0xA9C0, WBP_Extend},
{0xA9CF, 0xA9CF, WBP_ALetter},
{0xA9D0, 0xA9D9, WBP_Numeric},
{0xA9E5, 0xA9E5, WBP_Extend},
@@ -745,8 +766,9 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0xAB20, 0xAB26, WBP_ALetter},
{0xAB28, 0xAB2E, WBP_ALetter},
{0xAB30, 0xAB5A, WBP_ALetter},
+ {0xAB5B, 0xAB5B, WBP_ALetter},
{0xAB5C, 0xAB5F, WBP_ALetter},
- {0xAB60, 0xAB65, WBP_ALetter},
+ {0xAB60, 0xAB67, WBP_ALetter},
{0xAB70, 0xABBF, WBP_ALetter},
{0xABC0, 0xABE2, WBP_ALetter},
{0xABE3, 0xABE4, WBP_Extend},
@@ -793,6 +815,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0xFF07, 0xFF07, WBP_MidNumLet},
{0xFF0C, 0xFF0C, WBP_MidNum},
{0xFF0E, 0xFF0E, WBP_MidNumLet},
+ {0xFF10, 0xFF19, WBP_Numeric},
{0xFF1A, 0xFF1A, WBP_MidLetter},
{0xFF1B, 0xFF1B, WBP_MidNum},
{0xFF21, 0xFF3A, WBP_ALetter},
@@ -821,7 +844,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x102A0, 0x102D0, WBP_ALetter},
{0x102E0, 0x102E0, WBP_Extend},
{0x10300, 0x1031F, WBP_ALetter},
- {0x10330, 0x10340, WBP_ALetter},
+ {0x1032D, 0x10340, WBP_ALetter},
{0x10341, 0x10341, WBP_ALetter},
{0x10342, 0x10349, WBP_ALetter},
{0x1034A, 0x1034A, WBP_ALetter},
@@ -861,7 +884,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x10A0C, 0x10A0F, WBP_Extend},
{0x10A10, 0x10A13, WBP_ALetter},
{0x10A15, 0x10A17, WBP_ALetter},
- {0x10A19, 0x10A33, WBP_ALetter},
+ {0x10A19, 0x10A35, WBP_ALetter},
{0x10A38, 0x10A3A, WBP_Extend},
{0x10A3F, 0x10A3F, WBP_Extend},
{0x10A60, 0x10A7C, WBP_ALetter},
@@ -876,6 +899,14 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x10C00, 0x10C48, WBP_ALetter},
{0x10C80, 0x10CB2, WBP_ALetter},
{0x10CC0, 0x10CF2, WBP_ALetter},
+ {0x10D00, 0x10D23, WBP_ALetter},
+ {0x10D24, 0x10D27, WBP_Extend},
+ {0x10D30, 0x10D39, WBP_Numeric},
+ {0x10F00, 0x10F1C, WBP_ALetter},
+ {0x10F27, 0x10F27, WBP_ALetter},
+ {0x10F30, 0x10F45, WBP_ALetter},
+ {0x10F46, 0x10F50, WBP_Extend},
+ {0x10FE0, 0x10FF6, WBP_ALetter},
{0x11000, 0x11000, WBP_Extend},
{0x11001, 0x11001, WBP_Extend},
{0x11002, 0x11002, WBP_Extend},
@@ -890,6 +921,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x110B7, 0x110B8, WBP_Extend},
{0x110B9, 0x110BA, WBP_Extend},
{0x110BD, 0x110BD, WBP_Format},
+ {0x110CD, 0x110CD, WBP_Format},
{0x110D0, 0x110E8, WBP_ALetter},
{0x110F0, 0x110F9, WBP_Numeric},
{0x11100, 0x11102, WBP_Extend},
@@ -898,6 +930,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x1112C, 0x1112C, WBP_Extend},
{0x1112D, 0x11134, WBP_Extend},
{0x11136, 0x1113F, WBP_Numeric},
+ {0x11144, 0x11144, WBP_ALetter},
+ {0x11145, 0x11146, WBP_Extend},
{0x11150, 0x11172, WBP_ALetter},
{0x11173, 0x11173, WBP_Extend},
{0x11176, 0x11176, WBP_ALetter},
@@ -908,7 +942,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x111B6, 0x111BE, WBP_Extend},
{0x111BF, 0x111C0, WBP_Extend},
{0x111C1, 0x111C4, WBP_ALetter},
- {0x111CA, 0x111CC, WBP_Extend},
+ {0x111C9, 0x111CC, WBP_Extend},
{0x111D0, 0x111D9, WBP_Numeric},
{0x111DA, 0x111DA, WBP_ALetter},
{0x111DC, 0x111DC, WBP_ALetter},
@@ -939,7 +973,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x1132A, 0x11330, WBP_ALetter},
{0x11332, 0x11333, WBP_ALetter},
{0x11335, 0x11339, WBP_ALetter},
- {0x1133C, 0x1133C, WBP_Extend},
+ {0x1133B, 0x1133C, WBP_Extend},
{0x1133D, 0x1133D, WBP_ALetter},
{0x1133E, 0x1133F, WBP_Extend},
{0x11340, 0x11340, WBP_Extend},
@@ -961,6 +995,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x11446, 0x11446, WBP_Extend},
{0x11447, 0x1144A, WBP_ALetter},
{0x11450, 0x11459, WBP_Numeric},
+ {0x1145E, 0x1145E, WBP_Extend},
+ {0x1145F, 0x1145F, WBP_ALetter},
{0x11480, 0x114AF, WBP_ALetter},
{0x114B0, 0x114B2, WBP_Extend},
{0x114B3, 0x114B8, WBP_Extend},
@@ -999,6 +1035,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x116B0, 0x116B5, WBP_Extend},
{0x116B6, 0x116B6, WBP_Extend},
{0x116B7, 0x116B7, WBP_Extend},
+ {0x116B8, 0x116B8, WBP_ALetter},
{0x116C0, 0x116C9, WBP_Numeric},
{0x1171D, 0x1171F, WBP_Extend},
{0x11720, 0x11721, WBP_Extend},
@@ -1006,9 +1043,41 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x11726, 0x11726, WBP_Extend},
{0x11727, 0x1172B, WBP_Extend},
{0x11730, 0x11739, WBP_Numeric},
+ {0x11800, 0x1182B, WBP_ALetter},
+ {0x1182C, 0x1182E, WBP_Extend},
+ {0x1182F, 0x11837, WBP_Extend},
+ {0x11838, 0x11838, WBP_Extend},
+ {0x11839, 0x1183A, WBP_Extend},
{0x118A0, 0x118DF, WBP_ALetter},
{0x118E0, 0x118E9, WBP_Numeric},
{0x118FF, 0x118FF, WBP_ALetter},
+ {0x119A0, 0x119A7, WBP_ALetter},
+ {0x119AA, 0x119D0, WBP_ALetter},
+ {0x119D1, 0x119D3, WBP_Extend},
+ {0x119D4, 0x119D7, WBP_Extend},
+ {0x119DA, 0x119DB, WBP_Extend},
+ {0x119DC, 0x119DF, WBP_Extend},
+ {0x119E0, 0x119E0, WBP_Extend},
+ {0x119E1, 0x119E1, WBP_ALetter},
+ {0x119E3, 0x119E3, WBP_ALetter},
+ {0x119E4, 0x119E4, WBP_Extend},
+ {0x11A00, 0x11A00, WBP_ALetter},
+ {0x11A01, 0x11A0A, WBP_Extend},
+ {0x11A0B, 0x11A32, WBP_ALetter},
+ {0x11A33, 0x11A38, WBP_Extend},
+ {0x11A39, 0x11A39, WBP_Extend},
+ {0x11A3A, 0x11A3A, WBP_ALetter},
+ {0x11A3B, 0x11A3E, WBP_Extend},
+ {0x11A47, 0x11A47, WBP_Extend},
+ {0x11A50, 0x11A50, WBP_ALetter},
+ {0x11A51, 0x11A56, WBP_Extend},
+ {0x11A57, 0x11A58, WBP_Extend},
+ {0x11A59, 0x11A5B, WBP_Extend},
+ {0x11A5C, 0x11A89, WBP_ALetter},
+ {0x11A8A, 0x11A96, WBP_Extend},
+ {0x11A97, 0x11A97, WBP_Extend},
+ {0x11A98, 0x11A99, WBP_Extend},
+ {0x11A9D, 0x11A9D, WBP_ALetter},
{0x11AC0, 0x11AF8, WBP_ALetter},
{0x11C00, 0x11C08, WBP_ALetter},
{0x11C0A, 0x11C2E, WBP_ALetter},
@@ -1027,10 +1096,35 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x11CB2, 0x11CB3, WBP_Extend},
{0x11CB4, 0x11CB4, WBP_Extend},
{0x11CB5, 0x11CB6, WBP_Extend},
+ {0x11D00, 0x11D06, WBP_ALetter},
+ {0x11D08, 0x11D09, WBP_ALetter},
+ {0x11D0B, 0x11D30, WBP_ALetter},
+ {0x11D31, 0x11D36, WBP_Extend},
+ {0x11D3A, 0x11D3A, WBP_Extend},
+ {0x11D3C, 0x11D3D, WBP_Extend},
+ {0x11D3F, 0x11D45, WBP_Extend},
+ {0x11D46, 0x11D46, WBP_ALetter},
+ {0x11D47, 0x11D47, WBP_Extend},
+ {0x11D50, 0x11D59, WBP_Numeric},
+ {0x11D60, 0x11D65, WBP_ALetter},
+ {0x11D67, 0x11D68, WBP_ALetter},
+ {0x11D6A, 0x11D89, WBP_ALetter},
+ {0x11D8A, 0x11D8E, WBP_Extend},
+ {0x11D90, 0x11D91, WBP_Extend},
+ {0x11D93, 0x11D94, WBP_Extend},
+ {0x11D95, 0x11D95, WBP_Extend},
+ {0x11D96, 0x11D96, WBP_Extend},
+ {0x11D97, 0x11D97, WBP_Extend},
+ {0x11D98, 0x11D98, WBP_ALetter},
+ {0x11DA0, 0x11DA9, WBP_Numeric},
+ {0x11EE0, 0x11EF2, WBP_ALetter},
+ {0x11EF3, 0x11EF4, WBP_Extend},
+ {0x11EF5, 0x11EF6, WBP_Extend},
{0x12000, 0x12399, WBP_ALetter},
{0x12400, 0x1246E, WBP_ALetter},
{0x12480, 0x12543, WBP_ALetter},
{0x13000, 0x1342E, WBP_ALetter},
+ {0x13430, 0x13438, WBP_Format},
{0x14400, 0x14646, WBP_ALetter},
{0x16800, 0x16A38, WBP_ALetter},
{0x16A40, 0x16A5E, WBP_ALetter},
@@ -1043,13 +1137,17 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x16B50, 0x16B59, WBP_Numeric},
{0x16B63, 0x16B77, WBP_ALetter},
{0x16B7D, 0x16B8F, WBP_ALetter},
- {0x16F00, 0x16F44, WBP_ALetter},
+ {0x16E40, 0x16E7F, WBP_ALetter},
+ {0x16F00, 0x16F4A, WBP_ALetter},
+ {0x16F4F, 0x16F4F, WBP_Extend},
{0x16F50, 0x16F50, WBP_ALetter},
- {0x16F51, 0x16F7E, WBP_Extend},
+ {0x16F51, 0x16F87, WBP_Extend},
{0x16F8F, 0x16F92, WBP_Extend},
{0x16F93, 0x16F9F, WBP_ALetter},
- {0x16FE0, 0x16FE0, WBP_ALetter},
+ {0x16FE0, 0x16FE1, WBP_ALetter},
+ {0x16FE3, 0x16FE3, WBP_ALetter},
{0x1B000, 0x1B000, WBP_Katakana},
+ {0x1B164, 0x1B167, WBP_Katakana},
{0x1BC00, 0x1BC6A, WBP_ALetter},
{0x1BC70, 0x1BC7C, WBP_ALetter},
{0x1BC80, 0x1BC88, WBP_ALetter},
@@ -1106,10 +1204,19 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x1E01B, 0x1E021, WBP_Extend},
{0x1E023, 0x1E024, WBP_Extend},
{0x1E026, 0x1E02A, WBP_Extend},
+ {0x1E100, 0x1E12C, WBP_ALetter},
+ {0x1E130, 0x1E136, WBP_Extend},
+ {0x1E137, 0x1E13D, WBP_ALetter},
+ {0x1E140, 0x1E149, WBP_Numeric},
+ {0x1E14E, 0x1E14E, WBP_ALetter},
+ {0x1E2C0, 0x1E2EB, WBP_ALetter},
+ {0x1E2EC, 0x1E2EF, WBP_Extend},
+ {0x1E2F0, 0x1E2F9, WBP_Numeric},
{0x1E800, 0x1E8C4, WBP_ALetter},
{0x1E8D0, 0x1E8D6, WBP_Extend},
{0x1E900, 0x1E943, WBP_ALetter},
{0x1E944, 0x1E94A, WBP_Extend},
+ {0x1E94B, 0x1E94B, WBP_ALetter},
{0x1E950, 0x1E959, WBP_Numeric},
{0x1EE00, 0x1EE03, WBP_ALetter},
{0x1EE05, 0x1EE1F, WBP_ALetter},
@@ -1148,35 +1255,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x1F150, 0x1F169, WBP_ALetter},
{0x1F170, 0x1F189, WBP_ALetter},
{0x1F1E6, 0x1F1FF, WBP_Regional_Indicator},
- {0x1F385, 0x1F385, WBP_E_Base},
- {0x1F3C3, 0x1F3C4, WBP_E_Base},
- {0x1F3CA, 0x1F3CB, WBP_E_Base},
- {0x1F3FB, 0x1F3FF, WBP_E_Modifier},
- {0x1F442, 0x1F443, WBP_E_Base},
- {0x1F446, 0x1F450, WBP_E_Base},
- {0x1F466, 0x1F469, WBP_E_Base_GAZ},
- {0x1F46E, 0x1F46E, WBP_E_Base},
- {0x1F470, 0x1F478, WBP_E_Base},
- {0x1F47C, 0x1F47C, WBP_E_Base},
- {0x1F481, 0x1F483, WBP_E_Base},
- {0x1F485, 0x1F487, WBP_E_Base},
- {0x1F48B, 0x1F48B, WBP_Glue_After_Zwj},
- {0x1F4AA, 0x1F4AA, WBP_E_Base},
- {0x1F575, 0x1F575, WBP_E_Base},
- {0x1F57A, 0x1F57A, WBP_E_Base},
- {0x1F590, 0x1F590, WBP_E_Base},
- {0x1F595, 0x1F596, WBP_E_Base},
- {0x1F5E8, 0x1F5E8, WBP_Glue_After_Zwj},
- {0x1F645, 0x1F647, WBP_E_Base},
- {0x1F64B, 0x1F64F, WBP_E_Base},
- {0x1F6A3, 0x1F6A3, WBP_E_Base},
- {0x1F6B4, 0x1F6B6, WBP_E_Base},
- {0x1F6C0, 0x1F6C0, WBP_E_Base},
- {0x1F918, 0x1F91E, WBP_E_Base},
- {0x1F926, 0x1F926, WBP_E_Base},
- {0x1F930, 0x1F930, WBP_E_Base},
- {0x1F933, 0x1F939, WBP_E_Base},
- {0x1F93C, 0x1F93E, WBP_E_Base},
+ {0x1F3FB, 0x1F3FF, WBP_Extend},
{0xE0001, 0xE0001, WBP_Format},
{0xE0020, 0xE007F, WBP_Extend},
{0xE0100, 0xE01EF, WBP_Extend},
diff --git a/src/static_libs/libunibreak/wordbreakdef.h b/src/static_libs/libunibreak/wordbreakdef.h
index 82cd98e7c3..03feb3cdac 100644
--- a/src/static_libs/libunibreak/wordbreakdef.h
+++ b/src/static_libs/libunibreak/wordbreakdef.h
@@ -4,7 +4,8 @@
* Word breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
- * Copyright (C) 2013-16 Tom Hacohen <tom at stosb dot com>
+ * Copyright (C) 2013-2019 Tom Hacohen <tom at stosb dot com>
+ * Copyright (C) 2018 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@@ -30,9 +31,9 @@
* Unicode 6.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
*
- * This library has been updated according to Revision 29, for
- * Unicode 9.0.0:
- * <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
+ * This library has been updated according to Revision 35, for
+ * Unicode 12.0.0:
+ * <URL:http://www.unicode.org/reports/tr29/tr29-35.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@@ -51,7 +52,7 @@
/**
* Word break classes. This is a direct mapping of Table 3 of Unicode
- * Standard Annex 29, Revision 23.
+ * Standard Annex 29, Revision 35.
*/
enum WordBreakClass
{
@@ -73,10 +74,7 @@ enum WordBreakClass
WBP_MidNum,
WBP_Numeric,
WBP_ExtendNumLet,
- WBP_E_Base,
- WBP_E_Modifier,
- WBP_Glue_After_Zwj,
- WBP_E_Base_GAZ,
+ WBP_WSegSpace,
WBP_Any
};
@@ -86,7 +84,7 @@ enum WordBreakClass
*/
struct WordBreakProperties
{
- utf32_t start; /**< Starting coding point */
- utf32_t end; /**< End coding point */
+ utf32_t start; /**< Start codepoint */
+ utf32_t end; /**< End codepoint, inclusive */
enum WordBreakClass prop; /**< The word breaking property */
};
diff --git a/src/tests/evas/evas_test_textblock.c b/src/tests/evas/evas_test_textblock.c
index a24d16d73c..3f1cbec8d8 100644
--- a/src/tests/evas/evas_test_textblock.c
+++ b/src/tests/evas/evas_test_textblock.c
@@ -1054,6 +1054,10 @@ EFL_START_TEST(evas_textblock_cursor)
pos = evas_textblock_cursor_pos_get(cur);
ck_assert_int_eq(pos, 0);
+ evas_object_textblock_text_markup_set(tb, "&#x1f3f3;&#xfe0f;&#x200d;&#x1f308;");
+ evas_textblock_cursor_pos_set(cur, 0);
+ evas_textblock_cursor_cluster_next(cur);
+ ck_assert_int_eq(4, evas_textblock_cursor_pos_get(cur));
END_TB_TEST();
}