#ifndef NU_UTF8_H
#define NU_UTF8_H

#include <stdint.h>
#include <sys/types.h>

#include <libnu/config.h>
#include <libnu/defines.h>
#include <libnu/utf8_internal.h>

/** @defgroup utf8 UTF-8 support
 *
 * Note: There is no utf8_string[i] equivalent - it will be slow,
 * use nu_utf8_read() and nu_utf8_revread() instead
 *
 * @example utf8.c
 * @example revread.c
 */

#if defined (__cplusplus) || defined (c_plusplus)
extern "C" {
#endif

#ifdef NU_WITH_UTF8_READER

/** Read codepoint from UTF-8 string
 *
 * @ingroup utf8
 * @param utf8 pointer to UTF-8 encoded string
 * @param unicode output unicode codepoint or 0
 * @return pointer to next codepoint in UTF-8 string
 */
static inline
const char* nu_utf8_read(const char *utf8, uint32_t *unicode) {
	uint32_t c = *(unsigned char *)(utf8);

	if (c >= 0x80) {
		if (c < 0xE0) {
			if (unicode != 0) {
				utf8_2b(utf8, unicode);
			}
			return utf8 + 2;
		}
		else if (c < 0xF0) {
			if (unicode != 0) {
				utf8_3b(utf8, unicode);
			}
			return utf8 + 3;
		}
		else {
			if (unicode != 0) {
				utf8_4b(utf8, unicode);
			}
			return utf8 + 4;
		}
	}
	else if (unicode != 0) {
		*unicode = c;
	}

	return utf8 + 1;
}

#ifdef NU_WITH_REVERSE_READ

/** Read codepoint from UTF-8 string in backward direction
 *
 * Note that it is your responsibility to check that this call
 * is not going under beginning of encoded string. Normally you
 * shouldn't call it like this: nu_utf8_revread(&u, "hello"); which
 * will result in undefined behavior
 *
 * @ingroup utf8
 * @param unicode output unicode codepoint or 0
 * @param utf8 pointer to UTF-8 encoded string
 * @return pointer to previous codepoint in UTF-8 string
 */
static inline
const char* nu_utf8_revread(uint32_t *unicode, const char *utf8) {
	/* valid UTF-8 has either 10xxxxxx (continuation byte)
	 * or beginning of byte sequence */
	const char *p = utf8 - 1;
	while (((unsigned char)(*p) & 0xC0) == 0x80) { /* skip every 0b10000000 */
		--p;
	}

	if (unicode != 0) {
		nu_utf8_read(p, unicode);
	}

	return p;
}

#endif /* NU_WITH_REVERSE_READ */

#ifdef NU_WITH_VALIDATION

/** Validate codepoint in string
 *
 * @ingroup utf8
 * @param encoded buffer with encoded string
 * @param max_len buffer length
 * @return codepoint length or 0 on error
 */
NU_EXPORT
int nu_utf8_validread(const char *encoded, size_t max_len);

#endif /* NU_WITH_VALIDATION */
#endif /* NU_WITH_UTF8_READER */

#ifdef NU_WITH_UTF8_WRITER

/** Write unicode codepoints into UTF-8 encoded string
 *
 * @ingroup utf8
 * @param unicode unicode codepoint
 * @param utf8 pointer to buffer to write UTF-8 encoded text to,
 * should be large enough to hold encoded value
 * @return pointer to byte after last written
 */
NU_EXPORT
char* nu_utf8_write(uint32_t unicode, char *utf8);

#endif /* NU_WITH_UTF8_WRITER */

#if defined (__cplusplus) || defined (c_plusplus)
}
#endif

#endif /* NU_UTF8_H */