summaryrefslogtreecommitdiff
path: root/storage/ibmdb2i/db2i_myconv.h
diff options
context:
space:
mode:
Diffstat (limited to 'storage/ibmdb2i/db2i_myconv.h')
-rw-r--r--storage/ibmdb2i/db2i_myconv.h3201
1 files changed, 0 insertions, 3201 deletions
diff --git a/storage/ibmdb2i/db2i_myconv.h b/storage/ibmdb2i/db2i_myconv.h
deleted file mode 100644
index 98032748148..00000000000
--- a/storage/ibmdb2i/db2i_myconv.h
+++ /dev/null
@@ -1,3201 +0,0 @@
-/*
-Licensed Materials - Property of IBM
-DB2 Storage Engine Enablement
-Copyright IBM Corporation 2007,2008
-All rights reserved
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
- (a) Redistributions of source code must retain this list of conditions, the
- copyright notice in section {d} below, and the disclaimer following this
- list of conditions.
- (b) Redistributions in binary form must reproduce this list of conditions, the
- copyright notice in section (d) below, and the disclaimer following this
- list of conditions, in the documentation and/or other materials provided
- with the distribution.
- (c) The name of IBM may not be used to endorse or promote products derived from
- this software without specific prior written permission.
- (d) The text of the required copyright notice is:
- Licensed Materials - Property of IBM
- DB2 Storage Engine Enablement
- Copyright IBM Corporation 2007,2008
- All rights reserved
-
-THIS SOFTWARE IS PROVIDED BY IBM CORPORATION "AS IS" AND ANY EXPRESS OR IMPLIED
-WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
-SHALL IBM CORPORATION BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
-OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
-IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
-OF SUCH DAMAGE.
-*/
-
-/**
- @file
-
- @brief A direct map optimization of iconv and related functions
- This was show to significantly reduce character conversion cost
- for short strings when compared to calling iconv system code.
-*/
-
-#ifndef DB2I_MYCONV_H
-#define DB2I_MYCONV_H
-
-
-#include <sys/time.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <wchar.h>
-#include <errno.h>
-#include <iconv.h>
-#include <ctype.h>
-#include <time.h>
-#include <stdarg.h>
-#include <string.h>
-
-#ifndef TRUE
-#define TRUE 1
-#endif
-
-#ifndef FALSE
-#define FALSE 0
-#endif
-
-#ifdef __cplusplus
-#define INTERN inline
-#define EXTERN extern "C"
-#else
-#define INTERN static
-#define EXTERN extern
-#endif
-
-
-/* ANSI integer data types */
-#if defined(__OS400_TGTVRM__)
-/* for DTAMDL(*P128), datamodel(P128): int/long/pointer=4/4/16 */
-/* LLP64:4/4/8 is used for teraspace ?? */
-typedef short int16_t;
-typedef unsigned short uint16_t;
-typedef int int32_t;
-typedef unsigned int uint32_t;
-typedef long long int64_t;
-typedef unsigned long long uint64_t;
-#elif defined(PASE)
-/* PASE uses IPL32: int/long/pointer=4/4/4 + long long */
-#elif defined(__64BIT__)
-/* AIX 64 bit uses LP64: int/long/pointer=4/8/8 */
-#endif
-
-#define CONVERTER_ICONV 1
-#define CONVERTER_DMAP 2
-
-#define DMAP_S2S 10
-#define DMAP_S2U 20
-#define DMAP_D2U 30
-#define DMAP_E2U 40
-#define DMAP_U2S 120
-#define DMAP_T2S 125
-#define DMAP_U2D 130
-#define DMAP_T2D 135
-#define DMAP_U2E 140
-#define DMAP_T2E 145
-#define DMAP_S28 220
-#define DMAP_D28 230
-#define DMAP_E28 240
-#define DMAP_82S 310
-#define DMAP_82D 320
-#define DMAP_82E 330
-#define DMAP_U28 410
-#define DMAP_82U 420
-#define DMAP_T28 425
-#define DMAP_U2U 510
-
-
-typedef struct __dmap_rec *dmap_t;
-
-struct __dmap_rec
-{
- uint32_t codingSchema;
- unsigned char * dmapS2S; /* SBCS -> SBCS */
- /* The following conversion needs be followed by conversion from UCS-2/UTF-16 to UTF-8 */
- UniChar * dmapD12U; /* DBCS(non-EUC) -> UCS-2/UTF-16 */
- UniChar * dmapD22U; /* DBCS(non-EUC) -> UCS-2/UTF-16 */
- UniChar * dmapE02U; /* EUC/SS0 -> UCS-2/UTF-16 */
- UniChar * dmapE12U; /* EUC/SS1 -> UCS-2/UTF-16 */
- UniChar * dmapE22U; /* EUC/0x8E + SS2 -> UCS-2/UTF-16 */
- UniChar * dmapE32U; /* EUC/0x8F + SS3 -> UCS-2/UTF-16 */
- uchar * dmapU2D; /* UCS-2 -> DBCS */
- uchar * dmapU2S; /* UCS-2 -> EUC SS0 */
- uchar * dmapU2M2; /* UCS-2 -> EUC SS1 */
- uchar * dmapU2M3; /* UCS-2 -> EUC SS2/SS3 */
- /* All of these pointers/tables are not used at the same time.
- * You may be able save some space if you consolidate them.
- */
- uchar * dmapS28; /* SBCS -> UTF-8 */
- uchar * dmapD28; /* DBCS -> UTF-8 */
-};
-
-typedef struct __myconv_rec *myconv_t;
-struct __myconv_rec
-{
- uint32_t converterType;
- uint32_t index; /* for close */
- union {
- iconv_t cnv_iconv;
- dmap_t cnv_dmap;
- };
- int32_t allocatedSize;
- int32_t fromCcsid;
- int32_t toCcsid;
- UniChar subD; /* DBCS substitution char */
- char subS; /* SBCS substitution char */
- UniChar srcSubD; /* DBCS substitution char of src codepage */
- char srcSubS; /* SBCS substitution char of src codepage */
- char from [41+1]; /* codepage name is up to 41 bytes */
- char to [41+1]; /* codepage name is up to 41 bytes */
-#ifdef __64BIT__
- char reserved[10]; /* align 128 */
-#else
- char reserved[14]; /* align 128 */
-#endif
-};
-
-
-EXTERN int32_t myconvDebug;
-
-
-
-EXTERN int myconvGetES(CCSID);
-EXTERN int myconvIsEBCDIC(const char *);
-EXTERN int myconvIsASCII(const char *);
-EXTERN int myconvIsUnicode(const char *); /* UTF-8, UTF-16, or UCS-2 */
-EXTERN int myconvIsUnicode2(const char *); /* 2 byte Unicode */
-EXTERN int myconvIsUCS2(const char *);
-EXTERN int myconvIsUTF16(const char *);
-EXTERN int myconvIsUTF8(const char *);
-EXTERN int myconvIsEUC(const char *);
-EXTERN int myconvIsISO(const char *);
-EXTERN int myconvIsSBCS(const char *);
-EXTERN int myconvIsDBCS(const char *);
-EXTERN char myconvGetSubS(const char *);
-EXTERN UniChar myconvGetSubD(const char *);
-
-
-EXTERN myconv_t myconv_open(const char*, const char*, int32_t);
-EXTERN int myconv_close(myconv_t);
-
-INTERN size_t myconv_iconv(myconv_t cd ,
- char** inBuf,
- size_t* inBytesLeft,
- char** outBuf,
- size_t* outBytesLeft,
- size_t* numSub)
-{
- return iconv(cd->cnv_iconv, inBuf, inBytesLeft, outBuf, outBytesLeft);
-}
-
-INTERN size_t myconv_dmap(myconv_t cd,
- char** inBuf,
- size_t* inBytesLeft,
- char** outBuf,
- size_t* outBytesLeft,
- size_t* numSub)
-{
- if (cd->cnv_dmap->codingSchema == DMAP_S2S) {
- register unsigned char * dmapS2S=cd->cnv_dmap->dmapS2S;
- register int inLen=*inBytesLeft;
- register char * pOut=*outBuf;
- register char * pIn=*inBuf;
- register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
- register char subS=cd->subS;
- register size_t numS=0;
- while (0 < inLen) {
- if (pLastOutBuf < pOut)
- break;
- if (*pIn == 0x00) {
- *pOut=0x00;
- } else {
- *pOut=dmapS2S[*pIn];
- if (*pOut == 0x00) {
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(*inBytesLeft-inLen);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- if (*pOut == subS) {
- if ((*pOut=dmapS2S[*pIn]) == subS) {
- if (*pIn != cd->srcSubS)
- ++numS;
- }
- }
- }
- ++pIn;
- --inLen;
- ++pOut;
- }
- *outBytesLeft-=(*inBytesLeft-inLen);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return 0;
-
- } else if (cd->cnv_dmap->codingSchema == DMAP_E2U) {
- /* use uchar * instead of UniChar to avoid memcpy */
- register uchar * dmapE02U=(uchar *) (cd->cnv_dmap->dmapE02U);
- register uchar * dmapE12U=(uchar *) (cd->cnv_dmap->dmapE12U);
- register uchar * dmapE22U=(uchar *) (cd->cnv_dmap->dmapE22U);
- register uchar * dmapE32U=(uchar *) (cd->cnv_dmap->dmapE32U);
- register int inLen=*inBytesLeft;
- register char * pOut=*outBuf;
- register char * pIn=*inBuf;
- register int offset;
- register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
- register size_t numS=0;
- while (0 < inLen) {
- if (pLastOutBuf < pOut)
- break;
- if (*pIn == 0x00) {
- *pOut=0x00;
- ++pOut;
- *pOut=0x00;
- ++pOut;
- ++pIn;
- --inLen;
- } else {
- if (*pIn == 0x8E) { /* SS2 */
- if (inLen < 2) {
- if (cd->fromCcsid == 33722 || /* IBM-eucJP */
- cd->fromCcsid == 964) /* IBM-eucTW */
- errno=EINVAL; /* 22 */
- else
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- return -1;
- }
- ++pIn;
- if (*pIn < 0xA0) {
- if (cd->fromCcsid == 964) /* IBM-eucTW */
- errno=EINVAL; /* 22 */
- else
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-1;
- return -1;
- }
- offset=(*pIn - 0xA0);
- offset<<=1;
- if (dmapE22U[offset] == 0x00 &&
- dmapE22U[offset+1] == 0x00) { /* 2 bytes */
- if (inLen < 3) {
- if (cd->fromCcsid == 964) /* IBM-eucTW */
- errno=EINVAL; /* 22 */
- else
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-1;
- return -1;
- }
- offset=(*pIn - 0xA0) * 0x60 + 0x60;
- ++pIn;
- if (*pIn < 0xA0) {
- if (cd->fromCcsid == 964) /* IBM-eucTW */
- errno=EINVAL; /* 22 */
- else
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-2;
- return -1;
- }
- offset+=(*pIn - 0xA0);
- offset<<=1;
- if (dmapE22U[offset] == 0x00 &&
- dmapE22U[offset+1] == 0x00) {
- if (cd->fromCcsid == 964) /* IBM-eucTW */
- errno=EINVAL; /* 22 */
- else
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-2;
- return -1;
- }
- *pOut=dmapE22U[offset];
- ++pOut;
- *pOut=dmapE22U[offset+1];
- ++pOut;
- if (dmapE22U[offset] == 0xFF &&
- dmapE22U[offset+1] == 0xFD) {
- if (pIn[-2] * 0x100 + pIn[-1] != cd->srcSubD)
- ++numS;
- }
- ++pIn;
- inLen-=3;
- } else { /* 1 bytes */
- *pOut=dmapE22U[offset];
- ++pOut;
- *pOut=dmapE22U[offset+1];
- ++pOut;
- ++pIn;
- inLen-=2;
- }
- } else if (*pIn == 0x8F) { /* SS3 */
- if (inLen < 2) {
- if (cd->fromCcsid == 33722) /* IBM-eucJP */
- errno=EINVAL; /* 22 */
- else
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- return -1;
- }
- ++pIn;
- if (*pIn < 0xA0) {
- if (cd->fromCcsid == 970 || /* IBM-eucKR */
- cd->fromCcsid == 964 || /* IBM-eucTW */
- cd->fromCcsid == 1383 || /* IBM-eucCN */
- (cd->fromCcsid == 33722 && 3 <= inLen)) /* IBM-eucJP */
- errno=EILSEQ; /* 116 */
- else
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-1;
- return -1;
- }
- offset=(*pIn - 0xA0);
- offset<<=1;
- if (dmapE32U[offset] == 0x00 &&
- dmapE32U[offset+1] == 0x00) { /* 0x8F + 2 bytes */
- if (inLen < 3) {
- if (cd->fromCcsid == 33722)
- errno=EINVAL; /* 22 */
- else
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-1;
- return -1;
- }
- offset=(*pIn - 0xA0) * 0x60 + 0x60;
- ++pIn;
- if (*pIn < 0xA0) {
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-2;
- return -1;
- }
- offset+=(*pIn - 0xA0);
- offset<<=1;
- if (dmapE32U[offset] == 0x00 &&
- dmapE32U[offset+1] == 0x00) {
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-2;
- return -1;
- }
- *pOut=dmapE32U[offset];
- ++pOut;
- *pOut=dmapE32U[offset+1];
- ++pOut;
- if (dmapE32U[offset] == 0xFF &&
- dmapE32U[offset+1] == 0xFD) {
- if (pIn[-2] * 0x100 + pIn[-1] != cd->srcSubD)
- ++numS;
- }
- ++pIn;
- inLen-=3;
- } else { /* 0x8F + 1 bytes */
- *pOut=dmapE32U[offset];
- ++pOut;
- *pOut=dmapE32U[offset+1];
- ++pOut;
- ++pIn;
- inLen-=2;
- }
-
- } else {
- offset=*pIn;
- offset<<=1;
- if (dmapE02U[offset] == 0x00 &&
- dmapE02U[offset+1] == 0x00) { /* SS1 */
- if (inLen < 2) {
- if ((cd->fromCcsid == 33722 && (*pIn == 0xA0 || (0xA9 <= *pIn && *pIn <= 0xAF) || *pIn == 0xFF)) ||
- (cd->fromCcsid == 970 && (*pIn == 0xA0 || *pIn == 0xAD || *pIn == 0xAE || *pIn == 0xAF || *pIn == 0xFF)) ||
- (cd->fromCcsid == 964 && (*pIn == 0xA0 || (0xAA <= *pIn && *pIn <= 0xC1) || *pIn == 0xC3 || *pIn == 0xFE || *pIn == 0xFF)) ||
- (cd->fromCcsid == 1383 && (*pIn == 0xA0 || *pIn == 0xFF)))
- errno=EILSEQ; /* 116 */
- else
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- return -1;
- }
- if (*pIn < 0xA0) {
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- return -1;
- }
- offset=(*pIn - 0xA0) * 0x60;
- ++pIn;
- if (*pIn < 0xA0) {
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-1;
- return -1;
- }
- offset+=(*pIn - 0xA0);
- offset<<=1;
- if (dmapE12U[offset] == 0x00 &&
- dmapE12U[offset+1] == 0x00) { /* undefined mapping */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-1;
- return -1;
- }
- *pOut=dmapE12U[offset];
- ++pOut;
- *pOut=dmapE12U[offset+1];
- ++pOut;
- if (dmapE12U[offset] == 0xFF &&
- dmapE12U[offset+1] == 0xFD) {
- if (pIn[-1] * 0x100 + pIn[0] != cd->srcSubD)
- ++numS;
- }
- ++pIn;
- inLen-=2;
- } else {
- *pOut=dmapE02U[offset];
- ++pOut;
- *pOut=dmapE02U[offset+1];
- ++pOut;
- if (dmapE02U[offset] == 0x00 &&
- dmapE02U[offset+1] == 0x1A) {
- if (*pIn != cd->srcSubS)
- ++numS;
- }
- ++pIn;
- --inLen;
- }
- }
- }
- }
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return 0;
-
-
- } else if (cd->cnv_dmap->codingSchema == DMAP_E28) {
- /* use uchar * instead of UniChar to avoid memcpy */
- register uchar * dmapE02U=(uchar *) (cd->cnv_dmap->dmapE02U);
- register uchar * dmapE12U=(uchar *) (cd->cnv_dmap->dmapE12U);
- register uchar * dmapE22U=(uchar *) (cd->cnv_dmap->dmapE22U);
- register uchar * dmapE32U=(uchar *) (cd->cnv_dmap->dmapE32U);
- register int inLen=*inBytesLeft;
- register char * pOut=*outBuf;
- register char * pIn=*inBuf;
- register int offset;
- register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
- register size_t numS=0;
- register UniChar in; /* copy part of U28 */
- register UniChar ucs2;
- while (0 < inLen) {
- if (pLastOutBuf < pOut)
- break;
- if (*pIn == 0x00) {
- *pOut=0x00;
- ++pOut;
- ++pIn;
- --inLen;
- } else {
- if (*pIn == 0x8E) { /* SS2 */
- if (inLen < 2) {
- if (cd->fromCcsid == 33722 || /* IBM-eucJP */
- cd->fromCcsid == 964) /* IBM-eucTW */
- errno=EINVAL; /* 22 */
- else
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- return -1;
- }
- ++pIn;
- if (*pIn < 0xA0) {
- if (cd->fromCcsid == 964) /* IBM-eucTW */
- errno=EINVAL; /* 22 */
- else
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-1;
- return -1;
- }
- offset=(*pIn - 0xA0);
- offset<<=1;
- if (dmapE22U[offset] == 0x00 &&
- dmapE22U[offset+1] == 0x00) { /* 2 bytes */
- if (inLen < 3) {
- if (cd->fromCcsid == 964) /* IBM-eucTW */
- errno=EINVAL; /* 22 */
- else
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-1;
- return -1;
- }
- offset=(*pIn - 0xA0) * 0x60 + 0x60;
- ++pIn;
- if (*pIn < 0xA0) {
- if (cd->fromCcsid == 964) /* IBM-eucTW */
- errno=EINVAL; /* 22 */
- else
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-2;
- return -1;
- }
- offset+=(*pIn - 0xA0);
- offset<<=1;
- if (dmapE22U[offset] == 0x00 &&
- dmapE22U[offset+1] == 0x00) {
- if (cd->fromCcsid == 964) /* IBM-eucTW */
- errno=EINVAL; /* 22 */
- else
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-2;
- return -1;
- }
- in=dmapE22U[offset];
- in<<=8;
- in+=dmapE22U[offset+1];
- if (dmapE22U[offset] == 0xFF &&
- dmapE22U[offset+1] == 0xFD) {
- if (pIn[-2] * 0x100 + pIn[-1] != cd->srcSubD)
- ++numS;
- }
- ++pIn;
- inLen-=3;
- } else { /* 1 bytes */
- in=dmapE22U[offset];
- in<<=8;
- in+=dmapE22U[offset+1];
- ++pIn;
- inLen-=2;
- }
- } else if (*pIn == 0x8F) { /* SS3 */
- if (inLen < 2) {
- if (cd->fromCcsid == 33722) /* IBM-eucJP */
- errno=EINVAL; /* 22 */
- else
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- return -1;
- }
- ++pIn;
- if (*pIn < 0xA0) {
- if (cd->fromCcsid == 970 || /* IBM-eucKR */
- cd->fromCcsid == 964 || /* IBM-eucTW */
- cd->fromCcsid == 1383 || /* IBM-eucCN */
- (cd->fromCcsid == 33722 && 3 <= inLen)) /* IBM-eucJP */
- errno=EILSEQ; /* 116 */
- else
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-1;
- return -1;
- }
- offset=(*pIn - 0xA0);
- offset<<=1;
- if (dmapE32U[offset] == 0x00 &&
- dmapE32U[offset+1] == 0x00) { /* 0x8F + 2 bytes */
- if (inLen < 3) {
- if (cd->fromCcsid == 33722)
- errno=EINVAL; /* 22 */
- else
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-1;
- return -1;
- }
- offset=(*pIn - 0xA0) * 0x60 + 0x60;
- ++pIn;
- if (*pIn < 0xA0) {
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-2;
- return -1;
- }
- offset+=(*pIn - 0xA0);
- offset<<=1;
- if (dmapE32U[offset] == 0x00 &&
- dmapE32U[offset+1] == 0x00) {
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-2;
- return -1;
- }
- in=dmapE32U[offset];
- in<<=8;
- in+=dmapE32U[offset+1];
- if (dmapE32U[offset] == 0xFF &&
- dmapE32U[offset+1] == 0xFD) {
- if (pIn[-2] * 0x100 + pIn[-1] != cd->srcSubD)
- ++numS;
- }
- ++pIn;
- inLen-=3;
- } else { /* 0x8F + 1 bytes */
- in=dmapE32U[offset];
- in<<=8;
- in+=dmapE32U[offset+1];
- ++pIn;
- inLen-=2;
- }
-
- } else {
- offset=*pIn;
- offset<<=1;
- if (dmapE02U[offset] == 0x00 &&
- dmapE02U[offset+1] == 0x00) { /* SS1 */
- if (inLen < 2) {
- if ((cd->fromCcsid == 33722 && (*pIn == 0xA0 || (0xA9 <= *pIn && *pIn <= 0xAF) || *pIn == 0xFF)) ||
- (cd->fromCcsid == 970 && (*pIn == 0xA0 || *pIn == 0xAD || *pIn == 0xAE || *pIn == 0xAF || *pIn == 0xFF)) ||
- (cd->fromCcsid == 964 && (*pIn == 0xA0 || (0xAA <= *pIn && *pIn <= 0xC1) || *pIn == 0xC3 || *pIn == 0xFE || *pIn == 0xFF)) ||
- (cd->fromCcsid == 1383 && (*pIn == 0xA0 || *pIn == 0xFF)))
- errno=EILSEQ; /* 116 */
- else
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- return -1;
- }
- if (*pIn < 0xA0) {
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- return -1;
- }
- offset=(*pIn - 0xA0) * 0x60;
- ++pIn;
- if (*pIn < 0xA0) {
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-1;
- return -1;
- }
- offset+=(*pIn - 0xA0);
- offset<<=1;
- if (dmapE12U[offset] == 0x00 &&
- dmapE12U[offset+1] == 0x00) { /* undefined mapping */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-1;
- return -1;
- }
- in=dmapE12U[offset];
- in<<=8;
- in+=dmapE12U[offset+1];
- if (dmapE12U[offset] == 0xFF &&
- dmapE12U[offset+1] == 0xFD) {
- if (pIn[-1] * 0x100 + pIn[0] != cd->srcSubD)
- ++numS;
- }
- ++pIn;
- inLen-=2;
- } else {
- in=dmapE02U[offset];
- in<<=8;
- in+=dmapE02U[offset+1];
- if (dmapE02U[offset] == 0x00 &&
- dmapE02U[offset+1] == 0x1A) {
- if (*pIn != cd->srcSubS)
- ++numS;
- }
- ++pIn;
- --inLen;
- }
- }
- ucs2=in;
- if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
- *pOut=in;
- ++pOut;
- } else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
- register uchar byte;
- in>>=6;
- in&=0x001F; /* 0b0000000000011111 */
- in|=0x00C0; /* 0b0000000011000000 */
- *pOut=in;
- ++pOut;
- byte=ucs2; /* dmapD12U[offset+1]; */
- byte&=0x3F; /* 0b00111111; */
- byte|=0x80; /* 0b10000000; */
- *pOut=byte;
- ++pOut;
- } else if ((in & 0xFC00) == 0xD800) {
- *pOut=0xEF;
- ++pOut;
- *pOut=0xBF;
- ++pOut;
- *pOut=0xBD;
- ++pOut;
- } else {
- register uchar byte;
- register uchar work;
- byte=(ucs2>>8); /* dmapD12U[offset]; */
- byte>>=4;
- byte|=0xE0; /* 0b11100000; */
- *pOut=byte;
- ++pOut;
-
- byte=(ucs2>>8); /* dmapD12U[offset]; */
- byte<<=2;
- work=ucs2; /* dmapD12U[offset+1]; */
- work>>=6;
- byte|=work;
- byte&=0x3F; /* 0b00111111; */
- byte|=0x80; /* 0b10000000; */
- *pOut=byte;
- ++pOut;
-
- byte=ucs2; /* dmapD12U[offset+1]; */
- byte&=0x3F; /* 0b00111111; */
- byte|=0x80; /* 0b10000000; */
- *pOut=byte;
- ++pOut;
- }
- /* end of U28 */
- }
- }
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return 0;
-
- } else if (cd->cnv_dmap->codingSchema == DMAP_U2E) {
- register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
- register uchar * dmapU2M2=cd->cnv_dmap->dmapU2M2 - 0x80 * 2;
- register uchar * dmapU2M3=cd->cnv_dmap->dmapU2M3 - 0x80 * 3;
- register int inLen=*inBytesLeft;
- register char * pOut=*outBuf;
- register char * pIn=*inBuf;
- register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
- register char subS=cd->subS;
- register char * pSubD=(char *) &(cd->subD);
- register size_t numS=0;
- register size_t rc=0;
- while (0 < inLen) {
- register uint32_t in;
- if (inLen == 1) {
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- return -1;
- }
- if (pLastOutBuf < pOut)
- break;
- in=pIn[0];
- in<<=8;
- in+=pIn[1];
- if (in == 0x0000) {
- *pOut=0x00;
- ++pOut;
- } else if (in < 0x100 && dmapU2S[in] != 0x0000) {
- if ((*pOut=dmapU2S[in]) == subS) {
- if (in != cd->srcSubS)
- ++numS;
- }
- ++pOut;
- } else {
- in<<=1;
- if (dmapU2M2[in] == 0x00) { /* not found in dmapU2M2 */
- in*=1.5;
- if (dmapU2M3[in] == 0x00) { /* not found in dmapU2M3*/
- *pOut=pSubD[0];
- ++pOut;
- *pOut=pSubD[1];
- ++pOut;
- ++numS;
- ++rc;
- } else {
- *pOut=dmapU2M3[in];
- ++pOut;
- *pOut=dmapU2M3[1+in];
- ++pOut;
- *pOut=dmapU2M3[2+in];
- ++pOut;
- }
- } else {
- *pOut=dmapU2M2[in];
- ++pOut;
- if (dmapU2M2[1+in] == 0x00) {
- if (*pOut == subS) {
- in>>=1;
- if (in != cd->srcSubS)
- ++numS;
- }
- } else {
- *pOut=dmapU2M2[1+in];
- ++pOut;
- if (memcmp(pOut-2, pSubD, 2) == 0) {
- in>>=1;
- if (in != cd->srcSubD) {
- ++numS;
- ++rc;
- }
- }
- }
- }
- }
- pIn+=2;
- inLen-=2;
- }
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return rc; /* compatibility to iconv() */
-
- } else if (cd->cnv_dmap->codingSchema == DMAP_T2E) {
- register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
- register uchar * dmapU2M2=cd->cnv_dmap->dmapU2M2 - 0x80 * 2;
- register uchar * dmapU2M3=cd->cnv_dmap->dmapU2M3 - 0x80 * 3;
- register int inLen=*inBytesLeft;
- register char * pOut=*outBuf;
- register char * pIn=*inBuf;
- register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
- register char subS=cd->subS;
- register char * pSubD=(char *) &(cd->subD);
- register size_t numS=0;
- register size_t rc=0;
- while (0 < inLen) {
- register uint32_t in;
- if (inLen == 1) {
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen-1;
- *outBuf=pOut;
- *inBuf=pIn;
- ++numS;
- *numSub+=numS;
- return 0;
- }
- if (pLastOutBuf < pOut)
- break;
- in=pIn[0];
- in<<=8;
- in+=pIn[1];
- if (in == 0x0000) {
- *pOut=0x00;
- ++pOut;
- } else if (0xD800 <= in && in <= 0xDBFF) { /* first byte of surrogate */
- errno=EINVAL; /* 22 */
- *inBytesLeft=inLen-2;
- *outBytesLeft-=(pOut-*outBuf);
- *outBuf=pOut;
- *inBuf=pIn+2;
- ++numS;
- *numSub+=numS;
- return -1;
-
- } else if (0xDC00 <= in && in <= 0xDFFF) { /* second byte of surrogate */
- errno=EINVAL; /* 22 */
- *inBytesLeft=inLen-1;
- *outBytesLeft-=(pOut-*outBuf);
- *outBuf=pOut;
- *inBuf=pIn;
- ++numS;
- *numSub+=numS;
- return -1;
-
- } else if (in < 0x100 && dmapU2S[in] != 0x0000) {
- if ((*pOut=dmapU2S[in]) == subS) {
- if (in != cd->srcSubS)
- ++numS;
- }
- ++pOut;
- } else {
- in<<=1;
- if (dmapU2M2[in] == 0x00) { /* not found in dmapU2M2 */
- in*=1.5;
- if (dmapU2M3[in] == 0x00) { /* not found in dmapU2M3*/
- *pOut=pSubD[0];
- ++pOut;
- *pOut=pSubD[1];
- ++pOut;
- ++numS;
- ++rc;
- } else {
- *pOut=dmapU2M3[in];
- ++pOut;
- *pOut=dmapU2M3[1+in];
- ++pOut;
- *pOut=dmapU2M3[2+in];
- ++pOut;
- }
- } else {
- *pOut=dmapU2M2[in];
- ++pOut;
- if (dmapU2M2[1+in] == 0x00) {
- if (*pOut == subS) {
- in>>=1;
- if (in != cd->srcSubS)
- ++numS;
- }
- } else {
- *pOut=dmapU2M2[1+in];
- ++pOut;
- if (memcmp(pOut-2, pSubD, 2) == 0) {
- in>>=1;
- if (in != cd->srcSubD) {
- ++numS;
- ++rc;
- }
- }
- }
- }
- }
- pIn+=2;
- inLen-=2;
- }
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return 0;
-
- } else if (cd->cnv_dmap->codingSchema == DMAP_82E) {
- register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
- register uchar * dmapU2M2=cd->cnv_dmap->dmapU2M2 - 0x80 * 2;
- register uchar * dmapU2M3=cd->cnv_dmap->dmapU2M3 - 0x80 * 3;
- register int inLen=*inBytesLeft;
- register char * pOut=*outBuf;
- register char * pIn=*inBuf;
- register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
- register char subS=cd->subS;
- register char * pSubD=(char *) &(cd->subD);
- register size_t numS=0;
- register size_t rc=0;
- while (0 < inLen) {
- register uint32_t in;
- uint32_t in2;
- if (pLastOutBuf < pOut)
- break;
- /* convert from UTF-8 to UCS-2 */
- if (*pIn == 0x00) {
- in=0x0000;
- ++pIn;
- --inLen;
- } else { /* 82U: */
- register uchar byte1=*pIn;
- if ((byte1 & 0x80) == 0x00) { /* if (byte1 & 0b10000000 == 0b00000000) { */
- /* 1 bytes sequence: 0xxxxxxx => 00000000 0xxxxxxx*/
- in=byte1;
- ++pIn;
- --inLen;
- } else if ((byte1 & 0xE0) == 0xC0) { /* (byte1 & 0b11100000 == 0b11000000) { */
- if (inLen < 2) {
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- if (byte1 == 0xC0 || byte1 == 0xC1) { /* invalid sequence */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- /* 2 bytes sequence:
- 110yyyyy 10xxxxxx => 00000yyy yyxxxxxx */
- register uchar byte2;
- ++pIn;
- byte2=*pIn;
- if ((byte2 & 0xC0) == 0x80) { /* byte2 & 0b11000000 == 0b10000000) { */
- register uchar work=byte1;
- work<<=6;
- byte2&=0x3F; /* 0b00111111; */
- byte2|=work;
-
- byte1&=0x1F; /* 0b00011111; */
- byte1>>=2;
- in=byte1;
- in<<=8;
- in+=byte2;
- inLen-=2;
- ++pIn;
- } else { /* invalid sequence */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-1;
- *numSub+=numS;
- return -1;
- }
- } else if ((byte1 & 0xF0) == 0xE0) { /* byte1 & 0b11110000 == 0b11100000 */
- /* 3 bytes sequence:
- 1110zzzz 10yyyyyy 10xxxxxx => zzzzyyyy yyxxxxxx */
- register uchar byte2;
- register uchar byte3;
- if (inLen < 3) {
- if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
- errno=EILSEQ; /* 116 */
- else
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- ++pIn;
- byte2=*pIn;
- ++pIn;
- byte3=*pIn;
- if ((byte2 & 0xC0) != 0x80 ||
- (byte3 & 0xC0) != 0x80 ||
- (byte1 == 0xE0 && byte2 < 0xA0)) { /* invalid sequence, only 0xA0-0xBF allowed after 0xE0 */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-2;
- *numSub+=numS;
- return -1;
- }
- {
- register uchar work=byte2;
- work<<=6;
- byte3&=0x3F; /* 0b00111111; */
- byte3|=work;
-
- byte2&=0x3F; /* 0b00111111; */
- byte2>>=2;
-
- byte1<<=4;
- in=byte1 | byte2;;
- in<<=8;
- in+=byte3;
- inLen-=3;
- ++pIn;
- }
- } else if ((0xF0 <= byte1 && byte1 <= 0xF4)) { /* (bytes1 & 11111000) == 0x1110000 */
- /* 4 bytes sequence
- 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx => 110110ww wwzzzzyy 110111yy yyxxxxxx
- where uuuuu = wwww + 1 */
- register uchar byte2;
- register uchar byte3;
- register uchar byte4;
- if (inLen < 4) {
- if ((inLen >= 2 && (pIn[1] & 0xC0) != 0x80) ||
- (inLen >= 3 && (pIn[2] & 0xC0) != 0x80) ||
- (cd->toCcsid == 13488) )
- errno=EILSEQ; /* 116 */
- else
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- ++pIn;
- byte2=*pIn;
- ++pIn;
- byte3=*pIn;
- ++pIn;
- byte4=*pIn;
- if ((byte2 & 0xC0) == 0x80 && /* byte2 & 0b11000000 == 0b10000000 */
- (byte3 & 0xC0) == 0x80 && /* byte3 & 0b11000000 == 0b10000000 */
- (byte4 & 0xC0) == 0x80) { /* byte4 & 0b11000000 == 0b10000000 */
- register uchar work=byte2;
- if (byte1 == 0xF0 && byte2 < 0x90) {
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-3;
- *numSub+=numS;
- return -1;
- /* iconv() returns 0 for 0xF4908080 and convert to 0x00
- } else if (byte1 == 0xF4 && byte2 > 0x8F) {
- errno=EINVAL;
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-3;
- *numSub+=numS;
- return -1;
- */
- }
-
- work&=0x30; /* 0b00110000; */
- work>>=4;
- byte1&=0x07; /* 0b00000111; */
- byte1<<=2;
- byte1+=work; /* uuuuu */
- --byte1; /* wwww */
-
- work=byte1 & 0x0F;
- work>>=2;
- work+=0xD8; /* 0b11011011; */
- in=work;
- in<<=8;
-
- byte1<<=6;
- byte2<<=2;
- byte2&=0x3C; /* 0b00111100; */
- work=byte3;
- work>>=4;
- work&=0x03; /* 0b00000011; */
- work|=byte1;
- work|=byte2;
- in+=work;
-
- work=byte3;
- work>>=2;
- work&=0x03; /* 0b00000011; */
- work|=0xDC; /* 0b110111xx; */
- in2=work;
- in2<<=8;
-
- byte3<<=6;
- byte4&=0x3F; /* 0b00111111; */
- byte4|=byte3;
- in2+=byte4;
- inLen-=4;
- ++pIn;
-#ifdef match_with_GBK
- if ((0xD800 == in && in2 < 0xDC80) ||
- (0xD840 == in && in2 < 0xDC80) ||
- (0xD880 == in && in2 < 0xDC80) ||
- (0xD8C0 == in && in2 < 0xDC80) ||
- (0xD900 == in && in2 < 0xDC80) ||
- (0xD940 == in && in2 < 0xDC80) ||
- (0xD980 == in && in2 < 0xDC80) ||
- (0xD9C0 == in && in2 < 0xDC80) ||
- (0xDA00 == in && in2 < 0xDC80) ||
- (0xDA40 == in && in2 < 0xDC80) ||
- (0xDA80 == in && in2 < 0xDC80) ||
- (0xDAC0 == in && in2 < 0xDC80) ||
- (0xDB00 == in && in2 < 0xDC80) ||
- (0xDB40 == in && in2 < 0xDC80) ||
- (0xDB80 == in && in2 < 0xDC80) ||
- (0xDBC0 == in && in2 < 0xDC80)) {
-#else
- if ((0xD800 <= in && in <= 0xDBFF) &&
- (0xDC00 <= in2 && in2 <= 0xDFFF)) {
-#endif
- *pOut=subS;
- ++pOut;
- ++numS;
- continue;
- }
- } else { /* invalid sequence */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-3;
- *numSub+=numS;
- return -1;
- }
- } else if (0xF5 <= byte1 && byte1 <= 0xFF) { /* minic iconv() behavior */
- if (inLen < 4 ||
- (inLen >= 4 && byte1 == 0xF8 && pIn[1] < 0x90) ||
- pIn[1] < 0x80 || 0xBF < pIn[1] ||
- pIn[2] < 0x80 || 0xBF < pIn[2] ||
- pIn[3] < 0x80 || 0xBF < pIn[3] ) {
- if (inLen == 1)
- errno=EINVAL; /* 22 */
- else if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
- errno=EILSEQ; /* 116 */
- else if (inLen == 3 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80))
- errno=EILSEQ; /* 116 */
- else if (inLen >= 4 && (byte1 == 0xF8 || (pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80 || (pIn[3] & 0xC0) != 0x80))
- errno=EILSEQ; /* 116 */
- else
- errno=EINVAL; /* 22 */
-
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- } else if ((pIn[1] == 0x80 || pIn[1] == 0x90 || pIn[1] == 0xA0 || pIn[1] == 0xB0) &&
- pIn[2] < 0x82) {
- *pOut=subS; /* Though returns replacement character, which iconv() does not return. */
- ++pOut;
- ++numS;
- pIn+=4;
- inLen-=4;
- continue;
- } else {
- *pOut=pSubD[0]; /* Though returns replacement character, which iconv() does not return. */
- ++pOut;
- *pOut=pSubD[1];
- ++pOut;
- ++numS;
- pIn+=4;
- inLen-=4;
- continue;
- /* iconv() returns 0 with strange 1 byte converted values */
- }
-
- } else { /* invalid sequence */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- }
- /* end of UTF-8 to UCS-2 */
- if (in == 0x0000) {
- *pOut=0x00;
- ++pOut;
- } else if (in < 0x100 && dmapU2S[in] != 0x0000) {
- if ((*pOut=dmapU2S[in]) == subS) {
- if (in != cd->srcSubS)
- ++numS;
- }
- ++pOut;
- } else {
- in<<=1;
- if (dmapU2M2[in] == 0x00) { /* not found in dmapU2M2 */
- in*=1.5;
- if (dmapU2M3[in] == 0x00) { /* not found in dmapU2M3*/
- *pOut=pSubD[0];
- ++pOut;
- *pOut=pSubD[1];
- ++pOut;
- ++numS;
- ++rc;
- } else {
- *pOut=dmapU2M3[in];
- ++pOut;
- *pOut=dmapU2M3[1+in];
- ++pOut;
- *pOut=dmapU2M3[2+in];
- ++pOut;
- }
- } else {
- *pOut=dmapU2M2[in];
- ++pOut;
- if (dmapU2M2[1+in] == 0x00) {
- if (*pOut == subS) {
- in>>=1;
- if (in != cd->srcSubS)
- ++numS;
- }
- } else {
- *pOut=dmapU2M2[1+in];
- ++pOut;
- if (memcmp(pOut-2, pSubD, 2) == 0) {
- in>>=1;
- if (in != cd->srcSubD) {
- ++numS;
- ++rc;
- }
- }
- }
- }
- }
- }
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return 0;
-
- } else if (cd->cnv_dmap->codingSchema == DMAP_S2U) {
- /* use uchar * instead of UniChar to avoid memcpy */
- register uchar * dmapD12U=(uchar *) (cd->cnv_dmap->dmapD12U);
- register int inLen=*inBytesLeft;
- register char * pOut=*outBuf;
- register char * pIn=*inBuf;
- register int offset;
- register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
- register size_t numS=0;
- while (0 < inLen) {
- if (pLastOutBuf < pOut)
- break;
- if (*pIn == 0x00) {
- *pOut=0x00;
- ++pOut;
- *pOut=0x00;
- ++pOut;
- ++pIn;
- --inLen;
- } else {
- offset=*pIn;
- offset<<=1;
- *pOut=dmapD12U[offset];
- ++pOut;
- *pOut=dmapD12U[offset+1];
- ++pOut;
- if (dmapD12U[offset] == 0x00) {
- if (dmapD12U[offset+1] == 0x1A) {
- if (*pIn != cd->srcSubS)
- ++numS;
- } else if (dmapD12U[offset+1] == 0x00) {
- pOut-=2;
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- }
- ++pIn;
- --inLen;
- }
- }
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return 0;
-
- } else if (cd->cnv_dmap->codingSchema == DMAP_S28) {
- /* use uchar * instead of UniChar to avoid memcpy */
- register uchar * dmapD12U=(uchar *) (cd->cnv_dmap->dmapD12U);
- register int inLen=*inBytesLeft;
- register char * pOut=*outBuf;
- register char * pIn=*inBuf;
- register int offset;
- register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
- register size_t numS=0;
- register UniChar in; /* copy part of U28 */
- while (0 < inLen) {
- if (pLastOutBuf < pOut)
- break;
- if (*pIn == 0x00) {
- *pOut=0x00;
- ++pOut;
- ++pIn;
- --inLen;
- } else {
- offset=*pIn;
- offset<<=1;
- in=dmapD12U[offset];
- in<<=8;
- in+=dmapD12U[offset+1];
- if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
- if (in == 0x000) {
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- *pOut=in;
- ++pOut;
- } else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
- register uchar byte;
- in>>=6;
- in&=0x001F; /* 0b0000000000011111 */
- in|=0x00C0; /* 0b0000000011000000 */
- *pOut=in;
- ++pOut;
- byte=dmapD12U[offset+1];
- byte&=0x3F; /* 0b00111111; */
- byte|=0x80; /* 0b10000000; */
- *pOut=byte;
- ++pOut;
- } else if ((in & 0xFC00) == 0xD800) { /* There should not be no surrogate character in SBCS. */
- *pOut=0xEF;
- ++pOut;
- *pOut=0xBF;
- ++pOut;
- *pOut=0xBD;
- ++pOut;
- } else {
- register uchar byte;
- register uchar work;
- byte=dmapD12U[offset];
- byte>>=4;
- byte|=0xE0; /* 0b11100000; */
- *pOut=byte;
- ++pOut;
-
- byte=dmapD12U[offset];
- byte<<=2;
- work=dmapD12U[offset+1];
- work>>=6;
- byte|=work;
- byte&=0x3F; /* 0b00111111; */
- byte|=0x80; /* 0b10000000; */
- *pOut=byte;
- ++pOut;
-
- byte=dmapD12U[offset+1];
- byte&=0x3F; /* 0b00111111; */
- byte|=0x80; /* 0b10000000; */
- *pOut=byte;
- ++pOut;
- }
- /* end of U28 */
- if (dmapD12U[offset] == 0x00) {
- if (dmapD12U[offset+1] == 0x1A) {
- if (*pIn != cd->srcSubS)
- ++numS;
- }
- }
- ++pIn;
- --inLen;
- }
- }
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return 0;
-
- } else if (cd->cnv_dmap->codingSchema == DMAP_U2S) {
- register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
- register int inLen=*inBytesLeft;
- register char * pOut=*outBuf;
- register char * pIn=*inBuf;
- register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
- register char subS=cd->subS;
- register size_t numS=0;
- while (0 < inLen) {
- register uint32_t in;
- if (inLen == 1) {
- errno=EINVAL; /* 22 */
-
- *inBytesLeft=inLen;
- *outBytesLeft-=(pOut-*outBuf);
- *outBuf=pOut;
- *inBuf=pIn;
- return -1;
- }
- if (pLastOutBuf < pOut)
- break;
- in=pIn[0];
- in<<=8;
- in+=pIn[1];
- if (in == 0x0000) {
- *pOut=0x00;
- } else {
- if ((*pOut=dmapU2S[in]) == 0x00) {
- *pOut=subS;
- ++numS;
- errno=EINVAL; /* 22 */
- } else if (*pOut == subS) {
- if (in != cd->srcSubS)
- ++numS;
- }
- }
- ++pOut;
- pIn+=2;
- inLen-=2;
- }
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return numS;
-
- } else if (cd->cnv_dmap->codingSchema == DMAP_T2S) {
- register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
- register int inLen=*inBytesLeft;
- register char * pOut=*outBuf;
- register char * pIn=*inBuf;
- register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
- register char subS=cd->subS;
- register size_t numS=0;
- while (0 < inLen) {
- register uint32_t in;
- if (inLen == 1) {
- errno=EINVAL; /* 22 */
-
- *inBytesLeft=inLen-1;
- *outBytesLeft-=(pOut-*outBuf);
- *outBuf=pOut;
- *inBuf=pIn;
- ++numS;
- *numSub+=numS;
- return 0;
- }
- if (pLastOutBuf < pOut)
- break;
- in=pIn[0];
- in<<=8;
- in+=pIn[1];
- if (in == 0x0000) {
- *pOut=0x00;
-
- } else if (0xD800 <= in && in <= 0xDFFF) { /* 0xD800-0xDFFF, surrogate first and second values */
- if (0xDC00 <= in ) {
- errno=EINVAL; /* 22 */
- *inBytesLeft=inLen-1;
- *outBytesLeft-=(pOut-*outBuf);
- *outBuf=pOut;
- *inBuf=pIn;
- return -1;
-
- } else if (inLen < 4) {
- errno=EINVAL; /* 22 */
- *inBytesLeft=inLen-2;
- *outBytesLeft-=(pOut-*outBuf);
- *outBuf=pOut;
- *inBuf=pIn+2;
- return -1;
-
- } else {
- register uint32_t in2;
- in2=pIn[2];
- in2<<=8;
- in2+=pIn[3];
- if (0xDC00 <= in2 && in2 <= 0xDFFF) { /* second surrogate character =0xDC00 - 0xDFFF*/
- *pOut=subS;
- ++numS;
- pIn+=4;
- } else {
- errno=EINVAL; /* 22 */
- *inBytesLeft=inLen-1;
- *outBytesLeft-=(pOut-*outBuf);
- *outBuf=pOut;
- *inBuf=pIn;
- return -1;
- }
- }
- } else {
- if ((*pOut=dmapU2S[in]) == 0x00) {
- *pOut=subS;
- ++numS;
- errno=EINVAL; /* 22 */
- } else if (*pOut == subS) {
- if (in != cd->srcSubS)
- ++numS;
- }
- }
- ++pOut;
- pIn+=2;
- inLen-=2;
- }
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return 0;
-
- } else if (cd->cnv_dmap->codingSchema == DMAP_82S) {
- register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
- register int inLen=*inBytesLeft;
- register char * pOut=*outBuf;
- register char * pIn=*inBuf;
- register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
- register char subS=cd->subS;
- register size_t numS=0;
- while (0 < inLen) {
- register uint32_t in;
- uint32_t in2; /* The second surrogate value */
- if (pLastOutBuf < pOut)
- break;
- /* convert from UTF-8 to UCS-2 */
- if (*pIn == 0x00) {
- in=0x0000;
- ++pIn;
- --inLen;
- } else { /* 82U: */
- register uchar byte1=*pIn;
- if ((byte1 & 0x80) == 0x00) { /* if (byte1 & 0b10000000 == 0b00000000) { */
- /* 1 bytes sequence: 0xxxxxxx => 00000000 0xxxxxxx*/
- in=byte1;
- ++pIn;
- --inLen;
- } else if ((byte1 & 0xE0) == 0xC0) { /* (byte1 & 0b11100000 == 0b11000000) { */
- if (inLen < 2) {
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- if (byte1 == 0xC0 || byte1 == 0xC1) { /* invalid sequence */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- /* 2 bytes sequence:
- 110yyyyy 10xxxxxx => 00000yyy yyxxxxxx */
- register uchar byte2;
- ++pIn;
- byte2=*pIn;
- if ((byte2 & 0xC0) == 0x80) { /* byte2 & 0b11000000 == 0b10000000) { */
- register uchar work=byte1;
- work<<=6;
- byte2&=0x3F; /* 0b00111111; */
- byte2|=work;
-
- byte1&=0x1F; /* 0b00011111; */
- byte1>>=2;
- in=byte1;
- in<<=8;
- in+=byte2;
- inLen-=2;
- ++pIn;
- } else { /* invalid sequence */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-1;
- *numSub+=numS;
- return -1;
- }
- } else if ((byte1 & 0xF0) == 0xE0) { /* byte1 & 0b11110000 == 0b11100000 */
- /* 3 bytes sequence:
- 1110zzzz 10yyyyyy 10xxxxxx => zzzzyyyy yyxxxxxx */
- register uchar byte2;
- register uchar byte3;
- if (inLen < 3) {
- if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
- errno=EILSEQ; /* 116 */
- else
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- ++pIn;
- byte2=*pIn;
- ++pIn;
- byte3=*pIn;
- if ((byte2 & 0xC0) != 0x80 ||
- (byte3 & 0xC0) != 0x80 ||
- (byte1 == 0xE0 && byte2 < 0xA0)) { /* invalid sequence, only 0xA0-0xBF allowed after 0xE0 */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-2;
- *numSub+=numS;
- return -1;
- }
- {
- register uchar work=byte2;
- work<<=6;
- byte3&=0x3F; /* 0b00111111; */
- byte3|=work;
-
- byte2&=0x3F; /* 0b00111111; */
- byte2>>=2;
-
- byte1<<=4;
- in=byte1 | byte2;;
- in<<=8;
- in+=byte3;
- inLen-=3;
- ++pIn;
- }
- } else if ((0xF0 <= byte1 && byte1 <= 0xF4) || /* (bytes1 & 11111000) == 0x1110000 */
- ((byte1&=0xF7) && 0xF0 <= byte1 && byte1 <= 0xF4)) { /* minic iconv() behavior */
- /* 4 bytes sequence
- 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx => 110110ww wwzzzzyy 110111yy yyxxxxxx
- where uuuuu = wwww + 1 */
- register uchar byte2;
- register uchar byte3;
- register uchar byte4;
- if (inLen < 4) {
- if ((inLen >= 2 && (pIn[1] & 0xC0) != 0x80) ||
- (inLen >= 3 && (pIn[2] & 0xC0) != 0x80) ||
- (cd->toCcsid == 13488) )
- errno=EILSEQ; /* 116 */
- else
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- ++pIn;
- byte2=*pIn;
- ++pIn;
- byte3=*pIn;
- ++pIn;
- byte4=*pIn;
- if ((byte2 & 0xC0) == 0x80 && /* byte2 & 0b11000000 == 0b10000000 */
- (byte3 & 0xC0) == 0x80 && /* byte3 & 0b11000000 == 0b10000000 */
- (byte4 & 0xC0) == 0x80) { /* byte4 & 0b11000000 == 0b10000000 */
- register uchar work=byte2;
- if (byte1 == 0xF0 && byte2 < 0x90) {
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-3;
- *numSub+=numS;
- return -1;
- /* iconv() returns 0 for 0xF4908080 and convert to 0x00
- } else if (byte1 == 0xF4 && byte2 > 0x8F) {
- errno=EINVAL;
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-3;
- *numSub+=numS;
- return -1;
- */
- }
-
- work&=0x30; /* 0b00110000; */
- work>>=4;
- byte1&=0x07; /* 0b00000111; */
- byte1<<=2;
- byte1+=work; /* uuuuu */
- --byte1; /* wwww */
-
- work=byte1 & 0x0F;
- work>>=2;
- work+=0xD8; /* 0b11011011; */
- in=work;
- in<<=8;
-
- byte1<<=6;
- byte2<<=2;
- byte2&=0x3C; /* 0b00111100; */
- work=byte3;
- work>>=4;
- work&=0x03; /* 0b00000011; */
- work|=byte1;
- work|=byte2;
- in+=work;
-
- work=byte3;
- work>>=2;
- work&=0x03; /* 0b00000011; */
- work|=0xDC; /* 0b110111xx; */
- in2=work;
- in2<<=8;
-
- byte3<<=6;
- byte4&=0x3F; /* 0b00111111; */
- byte4|=byte3;
- in2+=byte4;
- inLen-=4;
- ++pIn;
- } else { /* invalid sequence */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-3;
- *numSub+=numS;
- return -1;
- }
- } else if ((byte1 & 0xF0) == 0xF0) { /* minic iconv() behavior */
- if (inLen < 4 ||
- pIn[1] < 0x80 || 0xBF < pIn[1] ||
- pIn[2] < 0x80 || 0xBF < pIn[2] ||
- pIn[3] < 0x80 || 0xBF < pIn[3] ) {
- if (inLen == 1)
- errno=EINVAL; /* 22 */
- else if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
- errno=EILSEQ; /* 116 */
- else if (inLen == 3 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80))
- errno=EILSEQ; /* 116 */
- else if (inLen >= 4 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80 || (pIn[3] & 0xC0) != 0x80))
- errno=EILSEQ; /* 116 */
- else
- errno=EINVAL; /* 22 */
-
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- } else {
- *pOut=subS; /* Though returns replacement character, which iconv() does not return. */
- ++pOut;
- ++numS;
- pIn+=4;
- inLen-=4;
- /* UTF-8_IBM-850 0xF0908080 : converted value does not match, iconv=0x00, dmap=0x7F
- UTF-8_IBM-850 0xF0908081 : converted value does not match, iconv=0x01, dmap=0x7F
- UTF-8_IBM-850 0xF0908082 : converted value does not match, iconv=0x02, dmap=0x7F
- UTF-8_IBM-850 0xF0908083 : converted value does not match, iconv=0x03, dmap=0x7F
- ....
- UTF-8_IBM-850 0xF09081BE : converted value does not match, iconv=0x7E, dmap=0x7F
- UTF-8_IBM-850 0xF09081BF : converted value does not match, iconv=0x1C, dmap=0x7F
- UTF-8_IBM-850 0xF09082A0 : converted value does not match, iconv=0xFF, dmap=0x7F
- UTF-8_IBM-850 0xF09082A1 : converted value does not match, iconv=0xAD, dmap=0x7F
- ....
- */
- continue;
- /* iconv() returns 0 with strange 1 byte converted values */
- }
-
- } else { /* invalid sequence */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- }
- /* end of UTF-8 to UCS-2 */
- if (in == 0x0000) {
- *pOut=0x00;
- } else {
- if ((*pOut=dmapU2S[in]) == 0x00) {
- *pOut=subS;
- ++numS;
- errno=EINVAL; /* 22 */
- } else if (*pOut == subS) {
- if (in != cd->srcSubS) {
- ++numS;
- }
- }
- }
- ++pOut;
- }
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return 0;
-
- } else if (cd->cnv_dmap->codingSchema == DMAP_D2U) {
- /* use uchar * instead of UniChar to avoid memcpy */
- register uchar * dmapD12U=(uchar *) (cd->cnv_dmap->dmapD12U);
- register uchar * dmapD22U=(uchar *) (cd->cnv_dmap->dmapD22U);
- register int inLen=*inBytesLeft;
- register char * pOut=*outBuf;
- register char * pIn=*inBuf;
- register int offset;
- register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
- register size_t numS=0;
- while (0 < inLen) {
- if (pLastOutBuf < pOut)
- break;
- if (*pIn == 0x00) {
- *pOut=0x00;
- ++pOut;
- *pOut=0x00;
- ++pOut;
- ++pIn;
- --inLen;
- } else {
- offset=*pIn;
- offset<<=1;
- if (dmapD12U[offset] == 0x00 &&
- dmapD12U[offset+1] == 0x00) { /* DBCS */
- if (inLen < 2) {
- if (*pIn == 0x80 || *pIn == 0xFF ||
- (cd->fromCcsid == 943 && (*pIn == 0x85 || *pIn == 0x86 || *pIn == 0xA0 || *pIn == 0xEB || *pIn == 0xEC || *pIn == 0xEF || *pIn == 0xFD || *pIn == 0xFE)) ||
- (cd->fromCcsid == 932 && (*pIn == 0x85 || *pIn == 0x86 || *pIn == 0x87 || *pIn == 0xEB || *pIn == 0xEC || *pIn == 0xED || *pIn == 0xEE || *pIn == 0xEF)) ||
- (cd->fromCcsid == 1381 && ((0x85 <= *pIn && *pIn <= 0x8B) || (0xAA <= *pIn && *pIn <= 0xAF) || (0xF8 <= *pIn && *pIn <= 0xFE))))
- errno=EILSEQ; /* 116 */
- else
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- return -1;
- }
- offset-=0x100;
- ++pIn;
- offset<<=8;
- offset+=(*pIn * 2);
- if (dmapD22U[offset] == 0x00 &&
- dmapD22U[offset+1] == 0x00) {
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-1;
- return -1;
- }
- *pOut=dmapD22U[offset];
- ++pOut;
- *pOut=dmapD22U[offset+1];
- ++pOut;
- if (dmapD22U[offset] == 0xFF &&
- dmapD22U[offset+1] == 0xFD) {
- if (pIn[-1] * 0x100 + pIn[0] != cd->srcSubD)
- ++numS;
- }
- ++pIn;
- inLen-=2;
- } else { /* SBCS */
- *pOut=dmapD12U[offset];
- ++pOut;
- *pOut=dmapD12U[offset+1];
- ++pOut;
- if (dmapD12U[offset] == 0x00 &&
- dmapD12U[offset+1] == 0x1A) {
- if (*pIn != cd->srcSubS)
- ++numS;
- }
- ++pIn;
- --inLen;
- }
- }
- }
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return 0;
-
- } else if (cd->cnv_dmap->codingSchema == DMAP_D28) {
- /* use uchar * instead of UniChar to avoid memcpy */
- register uchar * dmapD12U=(uchar *) (cd->cnv_dmap->dmapD12U);
- register uchar * dmapD22U=(uchar *) (cd->cnv_dmap->dmapD22U);
- register int inLen=*inBytesLeft;
- register char * pOut=*outBuf;
- register char * pIn=*inBuf;
- register int offset;
- register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
- register size_t numS=0;
- register UniChar in; /* copy part of U28 */
- register UniChar ucs2;
- while (0 < inLen) {
- if (pLastOutBuf < pOut)
- break;
- if (*pIn == 0x00) {
- *pOut=0x00;
- ++pOut;
- ++pIn;
- --inLen;
- } else {
- offset=*pIn;
- offset<<=1;
- if (dmapD12U[offset] == 0x00 &&
- dmapD12U[offset+1] == 0x00) { /* DBCS */
- if (inLen < 2) {
- if (*pIn == 0x80 || *pIn == 0xFF ||
- (cd->fromCcsid == 943 && (*pIn == 0x85 || *pIn == 0x86 || *pIn == 0xA0 || *pIn == 0xEB || *pIn == 0xEC || *pIn == 0xEF || *pIn == 0xFD || *pIn == 0xFE)) ||
- (cd->fromCcsid == 932 && (*pIn == 0x85 || *pIn == 0x86 || *pIn == 0x87 || *pIn == 0xEB || *pIn == 0xEC || *pIn == 0xED || *pIn == 0xEE || *pIn == 0xEF)) ||
- (cd->fromCcsid == 1381 && ((0x85 <= *pIn && *pIn <= 0x8B) || (0xAA <= *pIn && *pIn <= 0xAF) || (0xF8 <= *pIn && *pIn <= 0xFE))))
- errno=EILSEQ; /* 116 */
- else
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- return -1;
- }
- offset-=0x100;
- ++pIn;
- offset<<=8;
- offset+=(*pIn * 2);
- if (dmapD22U[offset] == 0x00 &&
- dmapD22U[offset+1] == 0x00) {
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-1;
- return -1;
- }
- in=dmapD22U[offset];
- in<<=8;
- in+=dmapD22U[offset+1];
- ucs2=in;
- if (dmapD22U[offset] == 0xFF &&
- dmapD22U[offset+1] == 0xFD) {
- if (in != cd->srcSubD)
- ++numS;
- }
- ++pIn;
- inLen-=2;
- } else { /* SBCS */
- in=dmapD12U[offset];
- in<<=8;
- in+=dmapD12U[offset+1];
- ucs2=in;
- if (dmapD12U[offset] == 0x00 &&
- dmapD12U[offset+1] == 0x1A) {
- if (in != cd->srcSubS)
- ++numS;
- }
- ++pIn;
- --inLen;
- }
- if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
- *pOut=in;
- ++pOut;
- } else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
- register uchar byte;
- in>>=6;
- in&=0x001F; /* 0b0000000000011111 */
- in|=0x00C0; /* 0b0000000011000000 */
- *pOut=in;
- ++pOut;
- byte=ucs2; /* dmapD12U[offset+1]; */
- byte&=0x3F; /* 0b00111111; */
- byte|=0x80; /* 0b10000000; */
- *pOut=byte;
- ++pOut;
- } else if ((in & 0xFC00) == 0xD800) { /* There should not be no surrogate character in SBCS. */
- *pOut=0xEF;
- ++pOut;
- *pOut=0xBF;
- ++pOut;
- *pOut=0xBD;
- ++pOut;
- } else {
- register uchar byte;
- register uchar work;
- byte=(ucs2>>8); /* dmapD12U[offset]; */
- byte>>=4;
- byte|=0xE0; /* 0b11100000; */
- *pOut=byte;
- ++pOut;
-
- byte=(ucs2>>8); /* dmapD12U[offset]; */
- byte<<=2;
- work=ucs2; /* dmapD12U[offset+1]; */
- work>>=6;
- byte|=work;
- byte&=0x3F; /* 0b00111111; */
- byte|=0x80; /* 0b10000000; */
- *pOut=byte;
- ++pOut;
-
- byte=ucs2; /* dmapD12U[offset+1]; */
- byte&=0x3F; /* 0b00111111; */
- byte|=0x80; /* 0b10000000; */
- *pOut=byte;
- ++pOut;
- }
- /* end of U28 */
- }
- }
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return 0;
-
- } else if (cd->cnv_dmap->codingSchema == DMAP_U2D) {
- register uchar * dmapU2D=cd->cnv_dmap->dmapU2D;
- register int inLen=*inBytesLeft;
- register char * pOut=*outBuf;
- register char * pIn=*inBuf;
- register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
- register char subS=cd->subS;
- register char * pSubD=(char *) &(cd->subD);
- register size_t numS=0;
- while (0 < inLen) {
- register uint32_t in;
- if (inLen == 1) {
- errno=EINVAL; /* 22 */
-
- *inBytesLeft=inLen;
- *outBytesLeft-=(pOut-*outBuf);
- *outBuf=pOut;
- *inBuf=pIn;
- return -1;
- }
- if (pLastOutBuf < pOut)
- break;
- in=pIn[0];
- in<<=8;
- in+=pIn[1];
- if (in == 0x0000) {
- *pOut=0x00;
- ++pOut;
- } else {
- in<<=1;
- *pOut=dmapU2D[in];
- ++pOut;
- if (dmapU2D[in+1] == 0x00) { /* SBCS */
- if (*pOut == subS) {
- if (in != cd->srcSubS)
- ++numS;
- }
- } else {
- *pOut=dmapU2D[in+1];
- ++pOut;
- if (dmapU2D[in] == pSubD[0] &&
- dmapU2D[in+1] == pSubD[1]) {
- in>>=1;
- if (in != cd->srcSubD)
- ++numS;
- }
- }
- }
- pIn+=2;
- inLen-=2;
- }
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return numS; /* to minic iconv() behavior */
-
- } else if (cd->cnv_dmap->codingSchema == DMAP_T2D) {
- register uchar * dmapU2D=cd->cnv_dmap->dmapU2D;
- register int inLen=*inBytesLeft;
- register char * pOut=*outBuf;
- register char * pIn=*inBuf;
- register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
- register char subS=cd->subS;
- register char * pSubD=(char *) &(cd->subD);
- register size_t numS=0;
- while (0 < inLen) {
- register uint32_t in;
- if (inLen == 1) {
- errno=EINVAL; /* 22 */
- *inBytesLeft=inLen-1;
- *outBytesLeft-=(pOut-*outBuf);
- *outBuf=pOut;
- *inBuf=pIn;
- ++numS;
- *numSub+=numS;
- return 0;
- }
- if (pLastOutBuf < pOut)
- break;
- in=pIn[0];
- in<<=8;
- in+=pIn[1];
- if (in == 0x0000) {
- *pOut=0x00;
- ++pOut;
- } else if (0xD800 <= in && in <= 0xDBFF) { /* first byte of surrogate */
- errno=EINVAL; /* 22 */
- *inBytesLeft=inLen-2;
- *outBytesLeft-=(pOut-*outBuf);
- *outBuf=pOut;
- *inBuf=pIn+2;
- ++numS;
- *numSub+=numS;
- return -1;
-
- } else if (0xDC00 <= in && in <= 0xDFFF) { /* second byte of surrogate */
- errno=EINVAL; /* 22 */
- *inBytesLeft=inLen-1;
- *outBytesLeft-=(pOut-*outBuf);
- *outBuf=pOut;
- *inBuf=pIn;
- ++numS;
- *numSub+=numS;
- return -1;
-
- } else {
- in<<=1;
- *pOut=dmapU2D[in];
- ++pOut;
- if (dmapU2D[in+1] == 0x00) { /* SBCS */
- if (*pOut == subS) {
- if (in != cd->srcSubS)
- ++numS;
- }
- } else {
- *pOut=dmapU2D[in+1];
- ++pOut;
- if (dmapU2D[in] == pSubD[0] &&
- dmapU2D[in+1] == pSubD[1]) {
- in>>=1;
- if (in != cd->srcSubD)
- ++numS;
- }
- }
- }
- pIn+=2;
- inLen-=2;
- }
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return 0; /* to minic iconv() behavior */
-
- } else if (cd->cnv_dmap->codingSchema == DMAP_82D) {
- register uchar * dmapU2D=cd->cnv_dmap->dmapU2D;
- register int inLen=*inBytesLeft;
- register char * pOut=*outBuf;
- register char * pIn=*inBuf;
- register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
- register char subS=cd->subS;
- register char * pSubD=(char *) &(cd->subD);
- register size_t numS=0;
- while (0 < inLen) {
- register uint32_t in;
- uint32_t in2;
- if (pLastOutBuf < pOut)
- break;
- /* convert from UTF-8 to UCS-2 */
- if (*pIn == 0x00) {
- in=0x0000;
- ++pIn;
- --inLen;
- } else { /* 82U: */
- register uchar byte1=*pIn;
- if ((byte1 & 0x80) == 0x00) { /* if (byte1 & 0b10000000 == 0b00000000) { */
- /* 1 bytes sequence: 0xxxxxxx => 00000000 0xxxxxxx*/
- in=byte1;
- ++pIn;
- --inLen;
- } else if ((byte1 & 0xE0) == 0xC0) { /* (byte1 & 0b11100000 == 0b11000000) { */
- if (inLen < 2) {
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- if (byte1 == 0xC0 || byte1 == 0xC1) { /* invalid sequence */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- /* 2 bytes sequence:
- 110yyyyy 10xxxxxx => 00000yyy yyxxxxxx */
- register uchar byte2;
- ++pIn;
- byte2=*pIn;
- if ((byte2 & 0xC0) == 0x80) { /* byte2 & 0b11000000 == 0b10000000) { */
- register uchar work=byte1;
- work<<=6;
- byte2&=0x3F; /* 0b00111111; */
- byte2|=work;
-
- byte1&=0x1F; /* 0b00011111; */
- byte1>>=2;
- in=byte1;
- in<<=8;
- in+=byte2;
- inLen-=2;
- ++pIn;
- } else { /* invalid sequence */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-1;
- *numSub+=numS;
- return -1;
- }
- } else if ((byte1 & 0xF0) == 0xE0) { /* byte1 & 0b11110000 == 0b11100000 */
- /* 3 bytes sequence:
- 1110zzzz 10yyyyyy 10xxxxxx => zzzzyyyy yyxxxxxx */
- register uchar byte2;
- register uchar byte3;
- if (inLen < 3) {
- if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
- errno=EILSEQ; /* 116 */
- else
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- ++pIn;
- byte2=*pIn;
- ++pIn;
- byte3=*pIn;
- if ((byte2 & 0xC0) != 0x80 ||
- (byte3 & 0xC0) != 0x80 ||
- (byte1 == 0xE0 && byte2 < 0xA0)) { /* invalid sequence, only 0xA0-0xBF allowed after 0xE0 */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-2;
- *numSub+=numS;
- return -1;
- }
- {
- register uchar work=byte2;
- work<<=6;
- byte3&=0x3F; /* 0b00111111; */
- byte3|=work;
-
- byte2&=0x3F; /* 0b00111111; */
- byte2>>=2;
-
- byte1<<=4;
- in=byte1 | byte2;;
- in<<=8;
- in+=byte3;
- inLen-=3;
- ++pIn;
- }
- } else if ((0xF0 <= byte1 && byte1 <= 0xF4)) { /* (bytes1 & 11111000) == 0x1110000 */
- /* 4 bytes sequence
- 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx => 110110ww wwzzzzyy 110111yy yyxxxxxx
- where uuuuu = wwww + 1 */
- register uchar byte2;
- register uchar byte3;
- register uchar byte4;
- if (inLen < 4) {
- if ((inLen >= 2 && (pIn[1] & 0xC0) != 0x80) ||
- (inLen >= 3 && (pIn[2] & 0xC0) != 0x80) ||
- (cd->toCcsid == 13488) )
- errno=EILSEQ; /* 116 */
- else
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- ++pIn;
- byte2=*pIn;
- ++pIn;
- byte3=*pIn;
- ++pIn;
- byte4=*pIn;
- if ((byte2 & 0xC0) == 0x80 && /* byte2 & 0b11000000 == 0b10000000 */
- (byte3 & 0xC0) == 0x80 && /* byte3 & 0b11000000 == 0b10000000 */
- (byte4 & 0xC0) == 0x80) { /* byte4 & 0b11000000 == 0b10000000 */
- register uchar work=byte2;
- if (byte1 == 0xF0 && byte2 < 0x90) {
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-3;
- *numSub+=numS;
- return -1;
- /* iconv() returns 0 for 0xF4908080 and convert to 0x00
- } else if (byte1 == 0xF4 && byte2 > 0x8F) {
- errno=EINVAL;
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-3;
- *numSub+=numS;
- return -1;
- */
- }
-
- work&=0x30; /* 0b00110000; */
- work>>=4;
- byte1&=0x07; /* 0b00000111; */
- byte1<<=2;
- byte1+=work; /* uuuuu */
- --byte1; /* wwww */
-
- work=byte1 & 0x0F;
- work>>=2;
- work+=0xD8; /* 0b11011011; */
- in=work;
- in<<=8;
-
- byte1<<=6;
- byte2<<=2;
- byte2&=0x3C; /* 0b00111100; */
- work=byte3;
- work>>=4;
- work&=0x03; /* 0b00000011; */
- work|=byte1;
- work|=byte2;
- in+=work;
-
- work=byte3;
- work>>=2;
- work&=0x03; /* 0b00000011; */
- work|=0xDC; /* 0b110111xx; */
- in2=work;
- in2<<=8;
-
- byte3<<=6;
- byte4&=0x3F; /* 0b00111111; */
- byte4|=byte3;
- in2+=byte4;
- inLen-=4;
- ++pIn;
-#ifdef match_with_GBK
- if ((0xD800 == in && in2 < 0xDC80) ||
- (0xD840 == in && in2 < 0xDC80) ||
- (0xD880 == in && in2 < 0xDC80) ||
- (0xD8C0 == in && in2 < 0xDC80) ||
- (0xD900 == in && in2 < 0xDC80) ||
- (0xD940 == in && in2 < 0xDC80) ||
- (0xD980 == in && in2 < 0xDC80) ||
- (0xD9C0 == in && in2 < 0xDC80) ||
- (0xDA00 == in && in2 < 0xDC80) ||
- (0xDA40 == in && in2 < 0xDC80) ||
- (0xDA80 == in && in2 < 0xDC80) ||
- (0xDAC0 == in && in2 < 0xDC80) ||
- (0xDB00 == in && in2 < 0xDC80) ||
- (0xDB40 == in && in2 < 0xDC80) ||
- (0xDB80 == in && in2 < 0xDC80) ||
- (0xDBC0 == in && in2 < 0xDC80)) {
-#else
- if ((0xD800 <= in && in <= 0xDBFF) &&
- (0xDC00 <= in2 && in2 <= 0xDFFF)) {
-#endif
- *pOut=subS;
- ++pOut;
- ++numS;
- continue;
- }
- } else { /* invalid sequence */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-3;
- *numSub+=numS;
- return -1;
- }
- } else if (0xF5 <= byte1 && byte1 <= 0xFF) { /* minic iconv() behavior */
- if (inLen < 4 ||
- (inLen >= 4 && byte1 == 0xF8 && pIn[1] < 0x90) ||
- pIn[1] < 0x80 || 0xBF < pIn[1] ||
- pIn[2] < 0x80 || 0xBF < pIn[2] ||
- pIn[3] < 0x80 || 0xBF < pIn[3] ) {
- if (inLen == 1)
- errno=EINVAL; /* 22 */
- else if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
- errno=EILSEQ; /* 116 */
- else if (inLen == 3 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80))
- errno=EILSEQ; /* 116 */
- else if (inLen >= 4 && (byte1 == 0xF8 || (pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80 || (pIn[3] & 0xC0) != 0x80))
- errno=EILSEQ; /* 116 */
- else
- errno=EINVAL; /* 22 */
-
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- } else if ((pIn[1] == 0x80 || pIn[1] == 0x90 || pIn[1] == 0xA0 || pIn[1] == 0xB0) &&
- pIn[2] < 0x82) {
- *pOut=subS; /* Though returns replacement character, which iconv() does not return. */
- ++pOut;
- ++numS;
- pIn+=4;
- inLen-=4;
- continue;
- } else {
- *pOut=pSubD[0]; /* Though returns replacement character, which iconv() does not return. */
- ++pOut;
- *pOut=pSubD[1];
- ++pOut;
- ++numS;
- pIn+=4;
- inLen-=4;
- continue;
- /* iconv() returns 0 with strange 1 byte converted values */
- }
-
- } else { /* invalid sequence */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- }
- /* end of UTF-8 to UCS-2 */
- if (in == 0x0000) {
- *pOut=0x00;
- ++pOut;
- } else {
- in<<=1;
- *pOut=dmapU2D[in];
- ++pOut;
- if (dmapU2D[in+1] == 0x00) { /* SBCS */
- if (dmapU2D[in] == subS) {
- in>>=1;
- if (in != cd->srcSubS)
- ++numS;
- }
- } else {
- *pOut=dmapU2D[in+1];
- ++pOut;
- if (dmapU2D[in] == pSubD[0] &&
- dmapU2D[in+1] == pSubD[1]) {
- in>>=1;
- if (in != cd->srcSubD)
- ++numS;
- }
- }
- }
- }
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return 0;
-
- } else if (cd->cnv_dmap->codingSchema == DMAP_82U) {
- /* See http://unicode.org/versions/corrigendum1.html */
- /* convert from UTF-8 to UTF-16 can cover all conversion from UTF-8 to UCS-2 */
- register int inLen=*inBytesLeft;
- register char * pOut=*outBuf;
- register char * pIn=*inBuf;
- register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
- register size_t numS=0;
- while (0 < inLen) {
- if (pLastOutBuf < pOut)
- break;
- if (*pIn == 0x00) {
- *pOut=0x00;
- ++pOut;
- *pOut=0x00;
- ++pOut;
- ++pIn;
- --inLen;
- } else { /* 82U: */
- register uchar byte1=*pIn;
- if ((byte1 & 0x80) == 0x00) { /* if (byte1 & 0b10000000 == 0b00000000) { */
- /* 1 bytes sequence: 0xxxxxxx => 00000000 0xxxxxxx*/
- *pOut=0x00;
- ++pOut;
- *pOut=byte1;
- ++pOut;
- ++pIn;
- --inLen;
- } else if ((byte1 & 0xE0) == 0xC0) { /* (byte1 & 0b11100000 == 0b11000000) { */
- if (inLen < 2) {
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- if (byte1 == 0xC0 || byte1 == 0xC1) { /* invalid sequence */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- /* 2 bytes sequence:
- 110yyyyy 10xxxxxx => 00000yyy yyxxxxxx */
- register uchar byte2;
- ++pIn;
- byte2=*pIn;
- if ((byte2 & 0xC0) == 0x80) { /* byte2 & 0b11000000 == 0b10000000) { */
- register uchar work=byte1;
- work<<=6;
- byte2&=0x3F; /* 0b00111111; */
- byte2|=work;
-
- byte1&=0x1F; /* 0b00011111; */
- byte1>>=2;
- *pOut=byte1;
- ++pOut;
- *pOut=byte2;
- ++pOut;
- inLen-=2;
- ++pIn;
- } else { /* invalid sequence */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-1;
- *numSub+=numS;
- return -1;
- }
- } else if ((byte1 & 0xF0) == 0xE0) { /* byte1 & 0b11110000 == 0b11100000 */
- /* 3 bytes sequence:
- 1110zzzz 10yyyyyy 10xxxxxx => zzzzyyyy yyxxxxxx */
- register uchar byte2;
- register uchar byte3;
- if (inLen < 3) {
- if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
- errno=EILSEQ; /* 116 */
- else
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- ++pIn;
- byte2=*pIn;
- ++pIn;
- byte3=*pIn;
- if ((byte2 & 0xC0) != 0x80 ||
- (byte3 & 0xC0) != 0x80 ||
- (byte1 == 0xE0 && byte2 < 0xA0)) { /* invalid sequence, only 0xA0-0xBF allowed after 0xE0 */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-2;
- *numSub+=numS;
- return -1;
- }
- {
- register uchar work=byte2;
- work<<=6;
- byte3&=0x3F; /* 0b00111111; */
- byte3|=work;
-
- byte2&=0x3F; /* 0b00111111; */
- byte2>>=2;
-
- byte1<<=4;
- *pOut=byte1 | byte2;;
- ++pOut;
- *pOut=byte3;
- ++pOut;
- inLen-=3;
- ++pIn;
- }
- } else if ((0xF0 <= byte1 && byte1 <= 0xF4) || /* (bytes1 & 11111000) == 0x1110000 */
- ((byte1&=0xF7) && 0xF0 <= byte1 && byte1 <= 0xF4)) { /* minic iconv() behavior */
- /* 4 bytes sequence
- 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx => 110110ww wwzzzzyy 110111yy yyxxxxxx
- where uuuuu = wwww + 1 */
- register uchar byte2;
- register uchar byte3;
- register uchar byte4;
- if (inLen < 4 || cd->toCcsid == 13488) {
- if ((inLen >= 2 && (pIn[1] & 0xC0) != 0x80) ||
- (inLen >= 3 && (pIn[2] & 0xC0) != 0x80) ||
- (cd->toCcsid == 13488) )
- errno=EILSEQ; /* 116 */
- else
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- ++pIn;
- byte2=*pIn;
- ++pIn;
- byte3=*pIn;
- ++pIn;
- byte4=*pIn;
- if ((byte2 & 0xC0) == 0x80 && /* byte2 & 0b11000000 == 0b10000000 */
- (byte3 & 0xC0) == 0x80 && /* byte3 & 0b11000000 == 0b10000000 */
- (byte4 & 0xC0) == 0x80) { /* byte4 & 0b11000000 == 0b10000000 */
- register uchar work=byte2;
- if (byte1 == 0xF0 && byte2 < 0x90) {
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-3;
- *numSub+=numS;
- return -1;
- } else if (byte1 == 0xF4 && byte2 > 0x8F) {
- errno=EINVAL; /* 22 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-3;
- *numSub+=numS;
- return -1;
- }
-
- work&=0x30; /* 0b00110000; */
- work>>=4;
- byte1&=0x07; /* 0b00000111; */
- byte1<<=2;
- byte1+=work; /* uuuuu */
- --byte1; /* wwww */
-
- work=byte1 & 0x0F;
- work>>=2;
- work+=0xD8; /* 0b11011011; */
- *pOut=work;
- ++pOut;
-
- byte1<<=6;
- byte2<<=2;
- byte2&=0x3C; /* 0b00111100; */
- work=byte3;
- work>>=4;
- work&=0x03; /* 0b00000011; */
- work|=byte1;
- work|=byte2;
- *pOut=work;
- ++pOut;
-
- work=byte3;
- work>>=2;
- work&=0x03; /* 0b00000011; */
- work|=0xDC; /* 0b110111xx; */
- *pOut=work;
- ++pOut;
-
- byte3<<=6;
- byte4&=0x3F; /* 0b00111111; */
- byte4|=byte3;
- *pOut=byte4;
- ++pOut;
- inLen-=4;
- ++pIn;
- } else { /* invalid sequence */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn-3;
- *numSub+=numS;
- return -1;
- }
- } else if ((byte1 & 0xF0) == 0xF0) {
- if (cd->toCcsid == 13488) {
- errno=EILSEQ; /* 116 */
- } else {
- if (inLen == 1)
- errno=EINVAL; /* 22 */
- else if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
- errno=EILSEQ; /* 116 */
- else if (inLen == 3 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80))
- errno=EILSEQ; /* 116 */
- else if (inLen >= 4 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80 || (pIn[3] & 0xC0) != 0x80))
- errno=EILSEQ; /* 116 */
- else
- errno=EINVAL; /* 22 */
- }
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
-
- } else { /* invalid sequence */
- errno=EILSEQ; /* 116 */
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return -1;
- }
- }
- }
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- *numSub+=numS;
- return 0;
- } else if (cd->cnv_dmap->codingSchema == DMAP_U28) {
- /* See http://unicode.org/versions/corrigendum1.html */
- register int inLen=*inBytesLeft;
- register char * pOut=*outBuf;
- register char * pIn=*inBuf;
- register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
- // register size_t numS=0;
- while (0 < inLen) {
- register uint32_t in;
- if (inLen == 1) {
- errno=EINVAL; /* 22 */
- *inBytesLeft=inLen;
- *outBytesLeft-=(pOut-*outBuf);
- *outBuf=pOut;
- *inBuf=pIn;
- return -1;
- }
- if (pLastOutBuf < pOut)
- break;
- in=pIn[0];
- in<<=8;
- in+=pIn[1];
- if (in == 0x0000) {
- *pOut=0x00;
- ++pOut;
- } else if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
- *pOut=in;
- ++pOut;
- } else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
- register uchar byte;
- in>>=6;
- in&=0x001F; /* 0b0000000000011111 */
- in|=0x00C0; /* 0b0000000011000000 */
- *pOut=in;
- ++pOut;
- byte=pIn[1];
- byte&=0x3F; /* 0b00111111; */
- byte|=0x80; /* 0b10000000; */
- *pOut=byte;
- ++pOut;
- } else {
- register uchar byte;
- register uchar work;
- byte=pIn[0];
- byte>>=4;
- byte|=0xE0; /* 0b11100000; */
- *pOut=byte;
- ++pOut;
-
- byte=pIn[0];
- byte<<=2;
- work=pIn[1];
- work>>=6;
- byte|=work;
- byte&=0x3F; /* 0b00111111; */
- byte|=0x80; /* 0b10000000; */
- *pOut=byte;
- ++pOut;
-
- byte=pIn[1];
- byte&=0x3F; /* 0b00111111; */
- byte|=0x80; /* 0b10000000; */
- *pOut=byte;
- ++pOut;
- }
- pIn+=2;
- inLen-=2;
- }
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- // *numSub+=numS;
- return 0;
-
- } else if (cd->cnv_dmap->codingSchema == DMAP_T28) { /* UTF-16_UTF-8 */
- /* See http://unicode.org/versions/corrigendum1.html */
- register int inLen=*inBytesLeft;
- register char * pOut=*outBuf;
- register char * pIn=*inBuf;
- register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
- // register size_t numS=0;
- while (0 < inLen) {
- register uint32_t in;
- if (inLen == 1) {
- errno=EINVAL; /* 22 */
- *inBytesLeft=0;
- *outBytesLeft-=(pOut-*outBuf);
- *outBuf=pOut;
- *inBuf=pIn;
- return 0;
- }
- if (pLastOutBuf < pOut)
- break;
- in=pIn[0];
- in<<=8;
- in+=pIn[1];
- if (in == 0x0000) {
- *pOut=0x00;
- ++pOut;
- } else if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
- *pOut=in;
- ++pOut;
- } else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
- register uchar byte;
- in>>=6;
- in&=0x001F; /* 0b0000000000011111 */
- in|=0x00C0; /* 0b0000000011000000 */
- *pOut=in;
- ++pOut;
- byte=pIn[1];
- byte&=0x3F; /* 0b00111111; */
- byte|=0x80; /* 0b10000000; */
- *pOut=byte;
- ++pOut;
- } else if ((in & 0xFC00) == 0xD800) { /* in & 0b1111110000000000 == 0b1101100000000000, first surrogate character */
- if (0xDC00 <= in ) {
- errno=EINVAL; /* 22 */
- *inBytesLeft=inLen-1;
- *outBytesLeft-=(pOut-*outBuf);
- *outBuf=pOut;
- *inBuf=pIn;
- return -1;
-
- } else if (inLen < 4) {
- errno=EINVAL; /* 22 */
- *inBytesLeft=inLen-2;
- *outBytesLeft-=(pOut-*outBuf);
- *outBuf=pOut;
- *inBuf=pIn+2;
- return -1;
-
- } else if ((pIn[2] & 0xFC) != 0xDC) { /* pIn[2] & 0b11111100 == 0b11011100, second surrogate character */
- errno=EINVAL; /* 22 */
- *inBytesLeft=inLen-2;
- *outBytesLeft-=(pOut-*outBuf);
- *outBuf=pOut;
- *inBuf=pIn+2;
- return -1;
-
- } else {
- register uchar byte;
- register uchar work;
- in>>=6;
- in&=0x000F; /* 0b0000000000001111 */
- byte=in; /* wwww */
- ++byte; /* uuuuu */
- work=byte; /* save uuuuu */
- byte>>=2;
- byte|=0xF0; /* 0b11110000; */
- *pOut=byte;
- ++pOut;
-
- byte=work;
- byte&=0x03; /* 0b00000011; */
- byte<<=4;
- byte|=0x80; /* 0b10000000; */
- work=pIn[1];
- work&=0x3C; /* 0b00111100; */
- work>>=2;
- byte|=work;
- *pOut=byte;
- ++pOut;
-
- byte=pIn[1];
- byte&=0x03; /* 0b00000011; */
- byte<<=4;
- byte|=0x80; /* 0b10000000; */
- work=pIn[2];
- work&=0x03; /* 0b00000011; */
- work<<=2;
- byte|=work;
- work=pIn[3];
- work>>=6;
- byte|=work;
- *pOut=byte;
- ++pOut;
-
- byte=pIn[3];
- byte&=0x3F; /* 0b00111111; */
- byte|=0x80; /* 0b10000000; */
- *pOut=byte;
- ++pOut;
- pIn+=2;
- inLen-=2;
- }
- } else if ((in & 0xFC00) == 0xDC00) { /* in & 0b11111100 == 0b11011100, second surrogate character */
- errno=EINVAL; /* 22 */
- *inBytesLeft=inLen-1;
- *outBytesLeft-=(pOut-*outBuf);
- *outBuf=pOut;
- *inBuf=pIn;
- return -1;
-
- } else {
- register uchar byte;
- register uchar work;
- byte=pIn[0];
- byte>>=4;
- byte|=0xE0; /* 0b11100000; */
- *pOut=byte;
- ++pOut;
-
- byte=pIn[0];
- byte<<=2;
- work=pIn[1];
- work>>=6;
- byte|=work;
- byte&=0x3F; /* 0b00111111; */
- byte|=0x80; /* 0b10000000; */
- *pOut=byte;
- ++pOut;
-
- byte=pIn[1];
- byte&=0x3F; /* 0b00111111; */
- byte|=0x80; /* 0b10000000; */
- *pOut=byte;
- ++pOut;
- }
- pIn+=2;
- inLen-=2;
- }
- *outBytesLeft-=(pOut-*outBuf);
- *inBytesLeft=inLen;
- *outBuf=pOut;
- *inBuf=pIn;
- // *numSub+=numS;
- return 0;
-
- } else if (cd->cnv_dmap->codingSchema == DMAP_U2U) { /* UTF-16_UCS-2 */
- register int inLen=*inBytesLeft;
- register int outLen=*outBytesLeft;
- if (inLen <= outLen) {
- memcpy(*outBuf, *inBuf, inLen);
- (*outBytesLeft)-=inLen;
- (*inBuf)+=inLen;
- (*outBuf)+=inLen;
- *inBytesLeft=0;
- return 0;
- }
- memcpy(*outBuf, *inBuf, outLen);
- (*outBytesLeft)=0;
- (*inBuf)+=outLen;
- (*outBuf)+=outLen;
- *inBytesLeft-=outLen;
- return (*inBytesLeft);
-
- } else {
- return -1;
- }
- return 0;
-}
-
-
-#ifdef DEBUG
-inline size_t myconv(myconv_t cd ,
- char** inBuf,
- size_t* inBytesLeft,
- char** outBuf,
- size_t* outBytesLeft,
- size_t* numSub)
-{
- if (cd->converterType == CONVERTER_ICONV) {
- return myconv_iconv(cd,inBuf,inBytesLeft,outBuf,outBytesLeft,numSub);
- } else if (cd->converterType == CONVERTER_DMAP) {
- return myconv_dmap(cd,inBuf,inBytesLeft,outBuf,outBytesLeft,numSub);
- }
- return -1;
-}
-
-inline char * converterName(int32_t type)
-{
- if (type == CONVERTER_ICONV)
- return "iconv";
- else if (type == CONVERTER_DMAP)
- return "dmap";
-
- return "?????";
-}
-#else
-#define myconv(a,b,c,d,e,f) \
-(((a)->converterType == CONVERTER_ICONV)? myconv_iconv((a),(b),(c),(d),(e),(f)): (((a)->converterType == CONVERTER_DMAP)? myconv_dmap((a),(b),(c),(d),(e),(f)): -1))
-
-
-#define converterName(a) \
-(((a) == CONVERTER_ICONV)? "iconv": ((a) == CONVERTER_DMAP)? "dmap": "?????")
-#endif
-
-void initMyconv();
-void cleanupMyconv();
-
-#endif