summaryrefslogtreecommitdiff
path: root/gcc/mbchar.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/mbchar.c')
-rw-r--r--gcc/mbchar.c160
1 files changed, 101 insertions, 59 deletions
diff --git a/gcc/mbchar.c b/gcc/mbchar.c
index a22e52b56b3..0f1794a0344 100644
--- a/gcc/mbchar.c
+++ b/gcc/mbchar.c
@@ -18,11 +18,9 @@ along with GNU CC; see the file COPYING. If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
-/* These functions are used to manipulate multibyte characters. */
-
/* Note regarding cross compilation:
- In general translation of multibyte characters to wide characters can
+ In general, translation of multibyte characters to wide characters can
only work in a native compiler since the translation function (mbtowc)
needs to know about both the source and target character encoding. However,
this particular implementation for JIS, SJIS and EUCJP source characters
@@ -37,31 +35,24 @@ Boston, MA 02111-1307, USA. */
#include "mbchar.h"
#include <locale.h>
-typedef enum
-{
- ESCAPE, DOLLAR, BRACKET, AT, B, J, NUL, JIS_CHAR, OTHER, JIS_C_NUM
-} JIS_CHAR_TYPE;
+typedef enum {ESCAPE, DOLLAR, BRACKET, AT, B, J, NUL, JIS_CHAR, OTHER,
+ JIS_C_NUM} JIS_CHAR_TYPE;
-typedef enum
-{
- ASCII, A_ESC, A_ESC_DL, JIS, JIS_1, JIS_2, J_ESC, J_ESC_BR,
- J2_ESC, J2_ESC_BR, INV, JIS_S_NUM
-} JIS_STATE;
+typedef enum {ASCII, A_ESC, A_ESC_DL, JIS, JIS_1, JIS_2, J_ESC, J_ESC_BR,
+ J2_ESC, J2_ESC_BR, INV, JIS_S_NUM} JIS_STATE;
+
+typedef enum {COPYA, COPYJ, COPYJ2, MAKE_A, MAKE_J, NOOP,
+ EMPTY, ERROR} JIS_ACTION;
+
+/* State/action tables for processing JIS encoding:
+
+ Where possible, switches to JIS are grouped with proceding JIS characters
+ and switches to ASCII are grouped with preceding JIS characters.
+ Thus, maximum returned length is:
+ 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6. */
-typedef enum
-{
- COPYA, COPYJ, COPYJ2, MAKE_A, MAKE_J, NOOP, EMPTY, ERROR
-} JIS_ACTION;
-
-/*****************************************************************************
- * state/action tables for processing JIS encoding
- * Where possible, switches to JIS are grouped with proceding JIS characters
- * and switches to ASCII are grouped with preceding JIS characters.
- * Thus, maximum returned length is:
- * 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
- *****************************************************************************/
static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
-/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER*/
+/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTH*/
/*ASCII*/ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
/*A_ESC*/ { ASCII, A_ESC_DL,ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
/*A_ESC_DL*/{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII,ASCII,ASCII},
@@ -75,87 +66,112 @@ static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
};
static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
-/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
+/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTH */
/*ASCII */ {NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, EMPTY, COPYA, COPYA},
/*A_ESC */ {COPYA, NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA},
/*A_ESC_DL */{COPYA, COPYA, COPYA, MAKE_J, MAKE_J, COPYA, COPYA, COPYA, COPYA},
-/*JIS */ {NOOP, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
-/*JIS_1 */ {ERROR, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
+/*JIS */ {NOOP, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR},
+/*JIS_1 */ {ERROR, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR},
/*JIS_2 */ {NOOP, COPYJ2,COPYJ2,COPYJ2, COPYJ2, COPYJ2,ERROR, COPYJ2,COPYJ2},
-/*J_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
-/*J_ESC_BR */{ERROR, ERROR, ERROR, ERROR, NOOP, NOOP, ERROR, ERROR, ERROR },
-/*J2_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
-/*J2_ESC_BR*/{ERROR, ERROR, ERROR, ERROR, COPYJ, COPYJ, ERROR, ERROR, ERROR },
+/*J_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR},
+/*J_ESC_BR */{ERROR, ERROR, ERROR, ERROR, NOOP, NOOP, ERROR, ERROR, ERROR},
+/*J2_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR},
+/*J2_ESC_BR*/{ERROR, ERROR, ERROR, ERROR, COPYJ, COPYJ, ERROR, ERROR, ERROR},
};
char *literal_codeset = NULL;
+/* Store into *PWC (if PWC is not null) the wide character
+ corresponding to the multibyte character at the start of the
+ buffer S of size N. Return the number of bytes in the multibyte
+ character. Return -1 if the bytes do not form a valid character,
+ or 0 if S is null or points to a null byte.
+
+ This function behaves like the Standard C function mbtowc, except
+ it treats locale names of the form "C-..." specially. */
+
int
local_mbtowc (pwc, s, n)
- wchar_t *pwc;
- const char *s;
- size_t n;
+ wchar_t *pwc;
+ char *s;
+ size_t n;
{
static JIS_STATE save_state = ASCII;
JIS_STATE curr_state = save_state;
- unsigned char *t = (unsigned char *)s;
+ unsigned char *t = (unsigned char *) s;
if (s != NULL && n == 0)
return -1;
if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
- {
- /* This must be the "C" locale or unknown locale -- fall thru */
- }
+ /* This must be the "C" locale or unknown locale -- fall thru */
+ ;
else if (! strcmp (literal_codeset, "C-SJIS"))
{
int char1;
if (s == NULL)
- return 0; /* not state-dependent */
+ /* Not state-dependent. */
+ return 0;
+
char1 = *t;
if (ISSJIS1 (char1))
{
int char2 = t[1];
+
if (n <= 1)
return -1;
+
if (ISSJIS2 (char2))
{
if (pwc != NULL)
- *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
+ *pwc = (((wchar_t) *t) << 8) + (wchar_t) (*(t + 1));
return 2;
}
+
return -1;
}
+
if (pwc != NULL)
- *pwc = (wchar_t)*t;
+ *pwc = (wchar_t) *t;
+
if (*t == '\0')
return 0;
+
return 1;
}
else if (! strcmp (literal_codeset, "C-EUCJP"))
{
int char1;
+
if (s == NULL)
- return 0; /* not state-dependent */
+ /* Not state-dependent. */
+ return 0;
+
char1 = *t;
if (ISEUCJP (char1))
{
int char2 = t[1];
+
if (n <= 1)
return -1;
+
if (ISEUCJP (char2))
{
if (pwc != NULL)
- *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
+ *pwc = (((wchar_t) *t) << 8) + (wchar_t) (*(t + 1));
return 2;
}
+
return -1;
}
+
if (pwc != NULL)
- *pwc = (wchar_t)*t;
+ *pwc = (wchar_t) *t;
+
if (*t == '\0')
return 0;
+
return 1;
}
else if (! strcmp (literal_codeset, "C-JIS"))
@@ -168,12 +184,13 @@ local_mbtowc (pwc, s, n)
if (s == NULL)
{
save_state = ASCII;
- return 1; /* state-dependent */
+ /* State-dependent. */
+ return 1;
}
ptr = t;
- for (i = 0; i < n; ++i)
+ for (i = 0; i < n; i++)
{
curr_ch = t[i];
switch (curr_ch)
@@ -213,59 +230,84 @@ local_mbtowc (pwc, s, n)
{
case NOOP:
break;
+
case EMPTY:
if (pwc != NULL)
- *pwc = (wchar_t)0;
+ *pwc = (wchar_t) 0;
+
save_state = curr_state;
return i;
+
case COPYA:
if (pwc != NULL)
- *pwc = (wchar_t)*ptr;
+ *pwc = (wchar_t) *ptr;
save_state = curr_state;
- return (i + 1);
+ return i + 1;
+
case COPYJ:
if (pwc != NULL)
- *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
+ *pwc = (((wchar_t) *ptr) << 8) + (wchar_t) (*(ptr + 1));
+
save_state = curr_state;
- return (i + 1);
+ return i + 1;
+
case COPYJ2:
if (pwc != NULL)
- *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
+ *pwc = (((wchar_t) *ptr) << 8) + (wchar_t) (*(ptr + 1));
+
save_state = curr_state;
- return (ptr - t) + 2;
+ return ptr - t + 2;
+
case MAKE_A:
case MAKE_J:
- ptr = (char *)(t + i + 1);
+ ptr = (char *) (t + i + 1);
break;
+
case ERROR:
default:
return -1;
}
}
- return -1; /* n < bytes needed */
+ /* More than n bytes needed. */
+ return -1;
}
#ifdef CROSS_COMPILE
if (s == NULL)
- return 0; /* not state-dependent */
+ /* Not state-dependent. */
+ return 0;
+
if (pwc != NULL)
*pwc = *s;
return 1;
#else
+
/* This must be the "C" locale or unknown locale. */
return mbtowc (pwc, s, n);
#endif
}
+/* Return the number of bytes in the multibyte character at the start
+ of the buffer S of size N. Return -1 if the bytes do not form a
+ valid character, or 0 if S is null or points to a null byte.
+
+ This function behaves like the Standard C function mblen, except
+ it treats locale names of the form "C-..." specially. */
+
int
local_mblen (s, n)
- const char *s;
- size_t n;
+ char *s;
+ size_t n;
{
return local_mbtowc (NULL, s, n);
}
+/* Return the maximum mumber of bytes in a multibyte character.
+
+ This function returns the same value as the Standard C macro MB_CUR_MAX,
+ except it treats locale names of the form "C-..." specially. */
+
int
local_mb_cur_max ()
{