summaryrefslogtreecommitdiff
path: root/strings/ctype-utf16.h
blob: d4cf4664f970e8e2a55054d3a6dbe4d4dae28aa7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
/*
  Copyright (c) 2018 MariaDB Corporation

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; version 2 of the License.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/

#ifndef _CTYPE_UTF16_H
#define _CTYPE_UTF16_H

/*
  D800..DB7F - Non-provate surrogate high (896 pages)
  DB80..DBFF - Private surrogate high     (128 pages)
  DC00..DFFF - Surrogate low              (1024 codes in a page)
*/
#define MY_UTF16_SURROGATE_HIGH_FIRST 0xD800
#define MY_UTF16_SURROGATE_HIGH_LAST  0xDBFF
#define MY_UTF16_SURROGATE_LOW_FIRST  0xDC00
#define MY_UTF16_SURROGATE_LOW_LAST   0xDFFF

#define MY_UTF16_HIGH_HEAD(x)      ((((uchar) (x)) & 0xFC) == 0xD8)
#define MY_UTF16_LOW_HEAD(x)       ((((uchar) (x)) & 0xFC) == 0xDC)
/* Test if a byte is a leading byte of a high or low surrogate head: */
#define MY_UTF16_SURROGATE_HEAD(x) ((((uchar) (x)) & 0xF8) == 0xD8)
/* Test if a Unicode code point is a high or low surrogate head */
#define MY_UTF16_SURROGATE(x)      (((x) & 0xF800) == 0xD800)

#define MY_UTF16_WC2(a, b)         ((a << 8) + b)

/*
  a= 110110??  (<< 18)
  b= ????????  (<< 10)
  c= 110111??  (<<  8)
  d= ????????  (<<  0)
*/
#define MY_UTF16_WC4(a, b, c, d) (((a & 3) << 18) + (b << 10) + \
                                  ((c & 3) << 8) + d + 0x10000)

static inline int
my_mb_wc_utf16_quick(my_wc_t *pwc, const uchar *s, const uchar *e)
{
  if (s + 2 > e)
    return MY_CS_TOOSMALL2;

  /*
    High bytes: 0xD[89AB] = B'110110??'
    Low bytes:  0xD[CDEF] = B'110111??'
    Surrogate mask:  0xFC = B'11111100'
  */

  if (MY_UTF16_HIGH_HEAD(*s)) /* Surrogate head */
  {
    if (s + 4 > e)
      return MY_CS_TOOSMALL4;

    if (!MY_UTF16_LOW_HEAD(s[2]))  /* Broken surrigate pair */
      return MY_CS_ILSEQ;

    *pwc= MY_UTF16_WC4(s[0], s[1], s[2], s[3]);
    return 4;
  }

  if (MY_UTF16_LOW_HEAD(*s)) /* Low surrogate part without high part */
    return MY_CS_ILSEQ;

  *pwc= MY_UTF16_WC2(s[0], s[1]);
  return 2;
}

#endif /* _CTYPE_UTF16_H */