summaryrefslogtreecommitdiff
path: root/vendor/nunicode/src/libnu/tounaccent.c
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/nunicode/src/libnu/tounaccent.c')
-rw-r--r--vendor/nunicode/src/libnu/tounaccent.c57
1 files changed, 57 insertions, 0 deletions
diff --git a/vendor/nunicode/src/libnu/tounaccent.c b/vendor/nunicode/src/libnu/tounaccent.c
new file mode 100644
index 0000000000..ad5b269827
--- /dev/null
+++ b/vendor/nunicode/src/libnu/tounaccent.c
@@ -0,0 +1,57 @@
+#include <assert.h>
+
+#include <libnu/casemap.h>
+
+#ifdef NU_WITH_UNACCENT
+
+#include <libnu/casemap_internal.h>
+#include "gen/_tounaccent.c"
+
+const char* nu_tounaccent(uint32_t codepoint) {
+ typedef struct {
+ uint32_t block_start;
+ uint32_t block_end;
+ } block_t;
+
+ static const block_t blocks[] = {
+ { 0x0300, 0x036F }, /* Combining Diacritical Marks */
+ { 0x1AB0, 0x1AFF }, /* Combining Diacritical Marks Extended */
+ { 0x20D0, 0x20FF }, /* Combining Diacritical Marks for Symbols */
+ { 0x1DC0, 0x1DFF }, /* Combining Diacritical Marks Supplement */
+ };
+ static const size_t blocks_count = sizeof(blocks) / sizeof(*blocks);
+
+ /* check if codepoint itself is a diacritic,
+ * return empty string in that case
+ * (transform into empty string */
+ assert(nu_casemap_read == nu_utf8_read);
+ for (size_t i = 0; i < blocks_count; ++i) {
+ if (codepoint >= blocks[i].block_start && codepoint <= blocks[i].block_end) {
+ return ""; /* return zero-terminated empty string in nu_casemap_read (utf-8) */
+ }
+ }
+
+ return _nu_to_something(codepoint, NU_TOUNACCENT_G, NU_TOUNACCENT_G_SIZE,
+ NU_TOUNACCENT_VALUES_C, NU_TOUNACCENT_VALUES_I, NU_TOUNACCENT_COMBINED);
+}
+
+const char* _nu_tounaccent(const char *encoded, const char *limit, nu_read_iterator_t read,
+ uint32_t *u, const char **transform,
+ void *context) {
+
+ (void)(limit);
+ (void)(context);
+
+ uint32_t _u = 0;
+ const char *np = read(encoded, &_u);
+
+ *transform = nu_tounaccent(_u);
+
+ if (u != 0) {
+ *u = _u;
+ }
+
+ return np;
+}
+
+#endif /* NU_WITH_UNACCENT */