summaryrefslogtreecommitdiff
path: root/ext/pdo_sqlite/sqlite/src/utf.c
diff options
context:
space:
mode:
authorIlia Alshanetsky <iliaa@php.net>2007-02-09 03:17:47 +0000
committerIlia Alshanetsky <iliaa@php.net>2007-02-09 03:17:47 +0000
commitd35449bbfb0c4feac3057c9aa3583f2bfc92782a (patch)
treefd693b43275b710fc88e6710b90007691fc9bca1 /ext/pdo_sqlite/sqlite/src/utf.c
parent7aa2282124fe2b024cb4f7ec0353c7943ce15505 (diff)
downloadphp-git-d35449bbfb0c4feac3057c9aa3583f2bfc92782a.tar.gz
Upgraded SQLite 3 to version 3.3.12
Diffstat (limited to 'ext/pdo_sqlite/sqlite/src/utf.c')
-rw-r--r--ext/pdo_sqlite/sqlite/src/utf.c55
1 files changed, 38 insertions, 17 deletions
diff --git a/ext/pdo_sqlite/sqlite/src/utf.c b/ext/pdo_sqlite/sqlite/src/utf.c
index 05d238433a..6b78a6dee6 100644
--- a/ext/pdo_sqlite/sqlite/src/utf.c
+++ b/ext/pdo_sqlite/sqlite/src/utf.c
@@ -64,7 +64,7 @@
/*
** This table maps from the first byte of a UTF-8 character to the number
-** of trailing bytes expected. A value '255' indicates that the table key
+** of trailing bytes expected. A value '4' indicates that the table key
** is not a legal first byte for a UTF-8 character.
*/
static const u8 xtra_utf8_bytes[256] = {
@@ -79,10 +79,10 @@ static const u8 xtra_utf8_bytes[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 10wwwwww */
-255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
/* 110yyyyy */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -92,7 +92,7 @@ static const u8 xtra_utf8_bytes[256] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* 11110yyy */
-3, 3, 3, 3, 3, 3, 3, 3, 255, 255, 255, 255, 255, 255, 255, 255,
+3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
};
/*
@@ -101,11 +101,24 @@ static const u8 xtra_utf8_bytes[256] = {
** read by a naive implementation of a UTF-8 character reader. The code
** in the READ_UTF8 macro explains things best.
*/
-static const int xtra_utf8_bits[4] = {
-0,
-12416, /* (0xC0 << 6) + (0x80) */
-925824, /* (0xE0 << 12) + (0x80 << 6) + (0x80) */
-63447168 /* (0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
+static const int xtra_utf8_bits[] = {
+ 0,
+ 12416, /* (0xC0 << 6) + (0x80) */
+ 925824, /* (0xE0 << 12) + (0x80 << 6) + (0x80) */
+ 63447168 /* (0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
+};
+
+/*
+** If a UTF-8 character contains N bytes extra bytes (N bytes follow
+** the initial byte so that the total character length is N+1) then
+** masking the character with utf8_mask[N] must produce a non-zero
+** result. Otherwise, we have an (illegal) overlong encoding.
+*/
+static const int utf_mask[] = {
+ 0x00000000,
+ 0xffffff80,
+ 0xfffff800,
+ 0xffff0000,
};
#define READ_UTF8(zIn, c) { \
@@ -113,11 +126,14 @@ static const int xtra_utf8_bits[4] = {
c = *(zIn)++; \
xtra = xtra_utf8_bytes[c]; \
switch( xtra ){ \
- case 255: c = (int)0xFFFD; break; \
+ case 4: c = (int)0xFFFD; break; \
case 3: c = (c<<6) + *(zIn)++; \
case 2: c = (c<<6) + *(zIn)++; \
case 1: c = (c<<6) + *(zIn)++; \
c -= xtra_utf8_bits[xtra]; \
+ if( (utf_mask[xtra]&c)==0 \
+ || (c&0xFFFFF800)==0xD800 \
+ || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \
} \
}
int sqlite3ReadUtf8(const unsigned char *z){
@@ -181,6 +197,7 @@ int sqlite3ReadUtf8(const unsigned char *z){
int c2 = (*zIn++); \
c2 += ((*zIn++)<<8); \
c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \
+ if( (c & 0xFFFF0000)==0 ) c = 0xFFFD; \
} \
}
@@ -191,6 +208,7 @@ int sqlite3ReadUtf8(const unsigned char *z){
int c2 = ((*zIn++)<<8); \
c2 += (*zIn++); \
c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \
+ if( (c & 0xFFFF0000)==0 ) c = 0xFFFD; \
} \
}
@@ -245,7 +263,7 @@ int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){
unsigned char *zIn; /* Input iterator */
unsigned char *zTerm; /* End of input */
unsigned char *z; /* Output iterator */
- int c;
+ unsigned int c;
assert( pMem->flags&MEM_Str );
assert( pMem->enc!=desiredEnc );
@@ -475,7 +493,7 @@ char *sqlite3utf16to8(const void *z, int nByte){
** in pZ (or up until the first pair of 0x00 bytes, whichever comes first).
*/
int sqlite3utf16ByteLen(const void *zIn, int nChar){
- int c = 1;
+ unsigned int c = 1;
char const *z = zIn;
int n = 0;
if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){
@@ -556,11 +574,11 @@ void sqlite3utf16Substr(
** characters in each encoding are inverses of each other.
*/
void sqlite3utfSelfTest(){
- int i;
+ unsigned int i, t;
unsigned char zBuf[20];
unsigned char *z;
int n;
- int c;
+ unsigned int c;
for(i=0; i<0x00110000; i++){
z = zBuf;
@@ -568,7 +586,10 @@ void sqlite3utfSelfTest(){
n = z-zBuf;
z = zBuf;
READ_UTF8(z, c);
- assert( c==i );
+ t = i;
+ if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD;
+ if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD;
+ assert( c==t );
assert( (z-zBuf)==n );
}
for(i=0; i<0x00110000; i++){