diff options
Diffstat (limited to 'vendor/nunicode/src/libnu/ducet.c')
-rw-r--r-- | vendor/nunicode/src/libnu/ducet.c | 65 |
1 files changed, 65 insertions, 0 deletions
diff --git a/vendor/nunicode/src/libnu/ducet.c b/vendor/nunicode/src/libnu/ducet.c new file mode 100644 index 0000000000..b634538481 --- /dev/null +++ b/vendor/nunicode/src/libnu/ducet.c @@ -0,0 +1,65 @@ +#include <assert.h> + +#include <libnu/ducet.h> +#include <libnu/udb.h> + +#ifdef NU_WITH_DUCET + +#include "gen/_ducet.c" + +#ifndef NU_DISABLE_CONTRACTIONS +# include "gen/_ducet_switch.c" +#else + const size_t _NU_DUCET_CONTRACTIONS = 0; +#endif + +static size_t _nu_ducet_weights_count() { + return NU_DUCET_G_SIZE + _NU_DUCET_CONTRACTIONS; +} + +int32_t nu_ducet_weight(uint32_t codepoint, int32_t *weight, void *context) { + (void)(weight); + (void)(context); + + assert(_nu_ducet_weights_count() < 0x7FFFFFFF - 0x10FFFF); + +#ifndef NU_DISABLE_CONTRACTIONS + int32_t switch_value = _nu_ducet_weight_switch(codepoint, weight, context); + /* weight switch should return weight (if any) and fill value of *weight + * with fallback (if needed). returned value of 0 is impossible result - this + * special case is already handled above, this return value indicates that switch + * couldn't find weight for a codepoint */ + if (switch_value != 0) { + return switch_value; + } +#endif + + /* special case switch after contractions switch + * to let state-machine figure out its state on abort */ + if (codepoint == 0) { + return 0; + } + + uint32_t mph_value = nu_udb_lookup_value(codepoint, NU_DUCET_G, NU_DUCET_G_SIZE, + NU_DUCET_VALUES_C, NU_DUCET_VALUES_I); + + return (mph_value != 0 + ? (int32_t)(mph_value) + : (int32_t)(codepoint + _nu_ducet_weights_count())); + + /* ISO/IEC 14651 requests that codepoints with undefined weight should be + * sorted before max weight in collation table. This way all codepoints + * defined in ducet would have weight under a value of _nu_ducet_weights_count(), + * all undefined codepoints would have weight under + * 0x10FFFF + _nu_ducet_weights_count() - 1, max weight will be + * 0x10FFFF + _nu_ducet_weights_count() */ + + /* Regarding integer overflow: + * + * int32_t can hold 0xFFFFFFFF / 2 = 0x7FFFFFFF positive numbers, this + * function can safely offset codepoint value up to +2146369536 without + * risk of overflow. Thus max collation table size supported is + * 2146369536 (0x7FFFFFFF - 0x10FFFF) */ +} + +#endif /* NU_WITH_DUCET */ |