summaryrefslogtreecommitdiff
path: root/src/util/utf8.h
blob: 753ab07e2a552899fd07de084c7932235c4a77cd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
/*
 * Copyright (C) the libgit2 contributors. All rights reserved.
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */
#ifndef INCLUDE_utf8_h__
#define INCLUDE_utf8_h__

#include "git2_util.h"

/*
 * Iterate through an UTF-8 string, yielding one codepoint at a time.
 *
 * @param out pointer where to store the current codepoint
 * @param str current position in the string
 * @param str_len size left in the string
 * @return length in bytes of the read codepoint; -1 if the codepoint was invalid
 */
extern int git_utf8_iterate(uint32_t *out, const char *str, size_t str_len);

/**
 * Returns the number of characters in the given string.
 *
 * This function will count invalid codepoints; if any given byte is
 * not part of a valid UTF-8 codepoint, then it will be counted toward
 * the length in characters.
 *
 * In other words:
 *   0x24 (U+0024 "$") has length 1
 *   0xc2 0xa2 (U+00A2 "¢") has length 1
 *   0x24 0xc2 0xa2 (U+0024 U+00A2 "$¢") has length 2
 *   0xf0 0x90 0x8d 0x88 (U+10348 "𐍈") has length 1
 *   0x24 0xc0 0xc1 0x34 (U+0024 <invalid> <invalid> "4) has length 4
 *
 * @param str string to scan
 * @param str_len size of the string
 * @return length in characters of the string
 */
extern size_t git_utf8_char_length(const char *str, size_t str_len);

/**
 * Iterate through an UTF-8 string and stops after finding any invalid UTF-8
 * codepoints.
 *
 * @param str string to scan
 * @param str_len size of the string
 * @return length in bytes of the string that contains valid data
 */
extern size_t git_utf8_valid_buf_length(const char *str, size_t str_len);

#endif