summaryrefslogtreecommitdiff
path: root/vendor/nunicode/include/libnu/strings.h
blob: 989ef5ba3f56e46596d126575265067f7afd79da (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#ifndef NU_STRINGS_H
#define NU_STRINGS_H

/** @defgroup strings String functions
 *
 * Note on "n" functions variant: "n" is in bytes in all functions,
 * note though that those are not for memory overrun control.
 * They are just for strings not having terminating 0 byte and those
 * functions won't go further than m-th *codepoint* in string, but might go
 * further than n-th byte in case of multibyte sequence.
 *
 * E.g.: ``nu_strnlen("абв", 3, nu_utf8_read);``.
 * Since codepoints are 2-byte sequences, nu_strnlen() won't go further than 2nd
 * codepoint, but will go further than 3rd byte while reading "б".
 */

#include <stdint.h>
#include <sys/types.h>

#include <libnu/config.h>
#include <libnu/defines.h>

#if defined (__cplusplus) || defined (c_plusplus)
extern "C" {
#endif

/**
 * @defgroup iterators Iterators
 * @defgroup transformations Codepoint transformations
 * @defgroup transformations_internal Codepoint transformations (internal)
 */

/** Read (decode) iterator
 *
 * @ingroup iterators
 * @see nu_utf8_read
 */
typedef const char* (*nu_read_iterator_t)(const char *encoded, uint32_t *unicode);

/** Read (decode) backwards iterator
 *
 * Arguments intentionally reversed to not mix this with nu_read_iterator_t.
 * Reverse read is not compatible with any of string functions.
 *
 * @ingroup iterators
 * @see nu_utf8_revread
 */
typedef const char* (*nu_revread_iterator_t)(uint32_t *unicode, const char *encoded);

/** Write (encode) iterator
 *
 * @ingroup iterators
 * @see nu_utf8_write
 */
typedef char* (*nu_write_iterator_t)(uint32_t unicode, char *encoded);

/** Transform codepoint
 *
 * @ingroup transformations
 * @see nu_toupper
 * @see nu_tolower
 */
typedef const char* (*nu_transformation_t)(uint32_t codepoint);

/** Transform codepoint (used internally). This kind of transformation
 * delegates iteration on string to transformation implementation.
 *
 * @ingroup transformations_internal
 * @see _nu_toupper
 * @see _nu_tolower
 */
typedef const char* (*nu_transform_read_t)(
	const char *encoded, const char *limit, nu_read_iterator_t read,
	uint32_t *u, const char **transformed,
	void *context);

#if (defined NU_WITH_Z_STRINGS) || (defined NU_WITH_N_STRINGS)

#endif /* NU_WITH_Z_STRINGS NU_WITH_N_STRINGS */

#ifdef NU_WITH_Z_STRINGS

/** Get decoded string codepoints length
 *
 * @ingroup strings
 * @param encoded encoded string
 * @param it decoding function
 * @return string length or negative error
 *
 * @see nu_strnlen
 */
NU_EXPORT
ssize_t nu_strlen(const char *encoded, nu_read_iterator_t it);

/** Get encoded string bytes length (encoding variant)
 *
 * @ingroup strings
 * @param unicode unicode codepoints
 * @param it encoding function
 * @return byte length or negative error
 *
 * @see nu_bytenlen
 */
NU_EXPORT
ssize_t nu_bytelen(const uint32_t *unicode, nu_write_iterator_t it);

/** Get encoded string bytes length
 *
 * @ingroup strings
 * @param encoded encoded string
 * @param it decoding function
 * @return string length or negative error
 */
NU_EXPORT
ssize_t nu_strbytelen(const char *encoded, nu_read_iterator_t it);

#endif /* NU_WITH_Z_STRINGS */

#ifdef NU_WITH_N_STRINGS

/**
 * @ingroup strings
 * @see nu_strlen
 */
NU_EXPORT
ssize_t nu_strnlen(const char *encoded, size_t max_len, nu_read_iterator_t it);

/**
 * @ingroup strings
 * @see nu_bytelen
 */
NU_EXPORT
ssize_t nu_bytenlen(const uint32_t *unicode, size_t max_len,
	nu_write_iterator_t it);

#endif /* NU_WITH_N_STRINGS */

#if defined (__cplusplus) || defined (c_plusplus)
}
#endif

#endif /* NU_STRINGS_H */