summaryrefslogtreecommitdiff
path: root/vendor/nunicode/include/libnu/strcoll_internal.h
blob: 570cb14f873754724f0da723f22d14e019fafba8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
#ifndef NU_STRCOLL_INTERNAL_H
#define NU_STRCOLL_INTERNAL_H

/** @defgroup collation_internal Internal collation functions
 *
 * Functions in this group are mostly for the internal use. PLease use them
 * with care.
 */

#include <libnu/config.h>
#include <libnu/casemap.h>
#include <libnu/defines.h>
#include <libnu/strings.h>

#if defined (__cplusplus) || defined (c_plusplus)
extern "C" {
#endif

/** Read (decode) iterator with transformation applied inside of it
 *
 * @ingroup collation_internal
 * @see nu_default_compound_read
 * @see nu_nocase_compound_read
 */
typedef const char* (*nu_compound_read_t)(
	const char *encoded, const char *encoded_limit, nu_read_iterator_t encoded_read,
	uint32_t *unicode, const char **tail);

/** Weight unicode codepoint (or several codepoints)
 *
 * 0 should always be weighted to 0. If your weight function need more
 * than one codepoint - return negative value, which will be passed back to
 * this function along with next codepoint.
 *
 * When function decided on weight and returned positive result, it has to
 * fill weight with how many (Unicode) codepoints nunicode should rollback.
 * E.g. function consumed "ZZS" and decided weight (in Hungarian collation),
 * it fills 0 to \*weight because no rollback is needed. Then function
 * consumed "ZZZ" and no weight available for such contraction - it
 * returns weight for "Z" and fills \*weight with 2, to rollback
 * redundant "ZZ".
 *
 * If string suddenly ends before weight function can decide (string limit
 * reached), 0 will be passed additionally to the previous string to signal
 * end of the string.
 *
 * @ingroup collation_internal
 * @param u unicode codepoint to weight
 * @param weight 0 at first call or (on sequential calls) pointer to negative
 * weight previously returned by this function
 * @param context pointer passed to _nu_strcoll() or _nu_strstr()
 * @return positive codepoint weight or negative value if function need more
 * codepoints
 */
typedef int32_t (*nu_codepoint_weight_t)(uint32_t u, int32_t *weight, void *context);

#if (defined NU_WITH_Z_COLLATION) || (defined NU_WITH_N_COLLATION)

/** Default compound read, equal to simply calling encoded_read(encoded, &unicode)
 *
 * @ingroup collation_internal
 * @param encoded encoded string
 * @param encoded_limit upper limit for encoded. NU_UNLIMITED for 0-terminated
 * strings
 * @param encoded_read read (decode) function
 * @param unicode output unicode codepoint
 * @param tail output pointer to compound tail, should never be 0
 * @return pointer to next encoded codepoint
 */
static inline
const char* nu_default_compound_read(const char *encoded, const char *encoded_limit,
	nu_read_iterator_t encoded_read, uint32_t *unicode,
	const char **tail) {
	(void)(encoded_limit);
	(void)(tail);

	return encoded_read(encoded, unicode);
}

/** Case-ignoring compound read, equal to calling
 * encoded_read(encoded, &unicode) with nu_toupper() applied internally
 *
 * @ingroup collation_internal
 * @param encoded encoded string
 * @param encoded_limit upper limit for encoded. NU_UNLIMITED for 0-terminated
 * strings
 * @param encoded_read read (decode) function
 * @param unicode output unicode codepoint
 * @param tail output pointer to compound tail, should never be 0
 * @return pointer to next encoded codepoint
 */
static inline
const char* nu_nocase_compound_read(const char *encoded, const char *encoded_limit,
	nu_read_iterator_t encoded_read, uint32_t *unicode,
	const char **tail) {

	/* re-entry with tail != 0 */
	if (*tail != 0) {
		*tail = nu_casemap_read(*tail, unicode);

		if (*unicode != 0) {
			return encoded;
		}

		*tail = 0; // fall thru
	}

	if (encoded >= encoded_limit) {
		*unicode = 0;
		return encoded;
	}

	const char *p = encoded_read(encoded, unicode);

	if (*unicode == 0) {
		return p;
	}

	const char *map = NU_FOLDING_FUNCTION(*unicode);
	if (map != 0) {
		*tail = nu_casemap_read(map, unicode);
	}

	return p;
}

/** Internal interface for nu_strcoll
 *
 * @ingroup collation_internal
 * @param lhs left-hand side encoded string
 * @param lhs_limit upper limit for lhs, use NU_UNLIMITED for 0-terminated
 * strings
 * @param rhs right-hand side encoded string
 * @param rhs_limit upper limit for rhs, use NU_UNLIMITED for 0-terminated
 * strings
 * @param it1 lhs read (decoding) function
 * @param it2 rhs read (decoding) function
 * @param com1 lhs compound read function
 * @param com2 rhs compound read function
 * @param weight codepoint weighting function
 * @param context pointer which will be passed to weight
 * @param collated_left (optional) number of codepoints collated in lhs
 * @param collated_right (optional) number of codepoints collated in rhs
 *
 * @see nu_strcoll
 * @see nu_default_compound_read
 * @see nu_nocase_compound_read
 * @see nu_ducet_weight
 */
NU_EXPORT
int _nu_strcoll(const char *lhs, const char *lhs_limit,
	const char *rhs, const char *rhs_limit,
	nu_read_iterator_t it1, nu_read_iterator_t it2,
	nu_compound_read_t com1, nu_compound_read_t com2,
	nu_codepoint_weight_t weight, void *context,
	ssize_t *collated_left, ssize_t *collated_right);

/** Internal interface for nu_strchr
 *
 * @ingroup collation_internal
 * @param lhs left-hand side encoded string
 * @param lhs_limit upper limit for lhs, use NU_UNLIMITED for 0-terminated
 * strings
 * @param c unicode codepoint to look for
 * @param read lhs read (decoding) function
 * @param com lhs compound read function
 * @param casemap casemapping function
 * @param casemap_read casemapping result decoding function
 *
 * @see nu_strchr
 * @see nu_default_compound_read
 * @see nu_nocase_compound_read
 * @see nu_toupper
 * @see nu_tolower
 */
NU_EXPORT
const char* _nu_strchr(const char *lhs, const char *lhs_limit,
	uint32_t c, nu_read_iterator_t read,
	nu_compound_read_t com,
	nu_casemapping_t casemap, nu_read_iterator_t casemap_read);

/** Internal interface for nu_strchr
 *
 * @ingroup collation_internal
 * @see _nu_strchr
 */
NU_EXPORT
const char* _nu_strrchr(const char *encoded, const char *limit,
	uint32_t c, nu_read_iterator_t read,
	nu_compound_read_t com,
	nu_casemapping_t casemap, nu_read_iterator_t casemap_read);

/** Internal interface for nu_strcoll
 *
 * @ingroup collation_internal
 * @param haystack encoded haystack
 * @param haystack_limit upper limit for haystack, use NU_UNLIMITED for
 * 0-terminated strings
 * @param needle encoded needle string
 * @param needle_limit upper limit for needle, use NU_UNLIMITED for
 * 0-terminated strings
 * @param it1 haystack read (decoding) function
 * @param it2 needle read (decoding) function
 * @param com1 haystack compound read function
 * @param com2 needle compound read function
 * @param casemap casemapping function
 * @param casemap_read casemapping result decoding function
 * @param weight codepoint weighting function
 * @param context pointer which will be passed to weight
 *
 * @see nu_strstr
 * @see nu_default_compound_read
 * @see nu_nocase_compound_read
 * @see nu_toupper
 * @see nu_tolower
 * @see nu_ducet_weight
 */
NU_EXPORT
const char* _nu_strstr(const char *haystack, const char *haystack_limit,
	const char *needle, const char *needle_limit,
	nu_read_iterator_t it1, nu_read_iterator_t it2,
	nu_compound_read_t com1, nu_compound_read_t com2,
	nu_casemapping_t casemap, nu_read_iterator_t casemap_read,
	nu_codepoint_weight_t weight, void *context);

#endif /* (defined NU_WITH_Z_COLLATION) || (defined NU_WITH_N_COLLATION) */

#if defined (__cplusplus) || defined (c_plusplus)
}
#endif

#endif /* NU_STRCOLL_INTERNAL_H */