vendor/nunicode/src/libnu/tounaccent.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57

#include <assert.h>

#include <libnu/casemap.h>

#ifdef NU_WITH_UNACCENT

#include <libnu/casemap_internal.h>
#include "gen/_tounaccent.c"

const char* nu_tounaccent(uint32_t codepoint) {
	typedef struct {
		uint32_t block_start;
		uint32_t block_end;
	} block_t;

	static const block_t blocks[] = {
		{ 0x0300, 0x036F },  /* Combining Diacritical Marks */
		{ 0x1AB0, 0x1AFF },  /* Combining Diacritical Marks Extended */
		{ 0x20D0, 0x20FF },  /* Combining Diacritical Marks for Symbols */
		{ 0x1DC0, 0x1DFF },  /* Combining Diacritical Marks Supplement */
	};
	static const size_t blocks_count = sizeof(blocks) / sizeof(*blocks);

	/* check if codepoint itself is a diacritic,
	 * return empty string in that case
	 * (transform into empty string */
	assert(nu_casemap_read == nu_utf8_read);
	for (size_t i = 0; i < blocks_count; ++i) {
		if (codepoint >= blocks[i].block_start && codepoint <= blocks[i].block_end) {
			return ""; /* return zero-terminated empty string in nu_casemap_read (utf-8) */
		}
	}

	return _nu_to_something(codepoint, NU_TOUNACCENT_G, NU_TOUNACCENT_G_SIZE,
		NU_TOUNACCENT_VALUES_C, NU_TOUNACCENT_VALUES_I, NU_TOUNACCENT_COMBINED);
}

const char* _nu_tounaccent(const char *encoded, const char *limit, nu_read_iterator_t read,
	uint32_t *u, const char **transform,
	void *context) {

	(void)(limit);
	(void)(context);

	uint32_t _u = 0;
	const char *np = read(encoded, &_u);

	*transform = nu_tounaccent(_u);

	if (u != 0) {
		*u = _u;
	}

	return np;
}

#endif /* NU_WITH_UNACCENT */