summaryrefslogtreecommitdiff
path: root/libcpp/lex.c
diff options
context:
space:
mode:
authorgeoffk <geoffk@138bc75d-0d04-0410-961f-82ee72b054a4>2005-03-15 00:36:33 +0000
committergeoffk <geoffk@138bc75d-0d04-0410-961f-82ee72b054a4>2005-03-15 00:36:33 +0000
commitbce471493dbd17218e3acbc35ee809986ba855c0 (patch)
treed3cd092701f32b8f84eec7a95a4e244aafcf795e /libcpp/lex.c
parenteddd9b2eb64bd2a51a3d297225a8a1cac5a8a055 (diff)
downloadgcc-bce471493dbd17218e3acbc35ee809986ba855c0.tar.gz
Index: gcc/ChangeLog
2005-03-14 Geoffrey Keating <geoffk@apple.com> * doc/cppopts.texi (-fexec-charset): Add concept index entry. (-fwide-exec-charset): Likewise. (-finput-charset): Likewise. * doc/invoke.texi (Warning Options): Document -Wnormalized=. * c-opts.c (c_common_handle_option): Handle -Wnormalized=. * c.opt (Wnormalized): New. Index: libcpp/ChangeLog 2005-03-14 Geoffrey Keating <geoffk@apple.com> * init.c (cpp_create_reader): Default warn_normalize to normalized_C. * charset.c: Update for new format of ucnid.h. (ucn_valid_in_identifier): Update for new format of ucnid.h. Add NST parameter, and update it; update callers. (cpp_valid_ucn): Add NST parameter, update callers. Replace abort with cpp_error. (convert_ucn): Pass normalize_state to cpp_valid_ucn. * internal.h (struct normalize_state): New. (INITIAL_NORMALIZE_STATE): New. (NORMALIZE_STATE_RESULT): New. (NORMALIZE_STATE_UPDATE_IDNUM): New. (_cpp_valid_ucn): New. * lex.c (warn_about_normalization): New. (forms_identifier_p): Add normalize_state parameter, update callers. (lex_identifier): Add normalize_state parameter, update callers. Keep the state current. (lex_number): Likewise. (_cpp_lex_direct): Pass normalize_state to subroutines. Check it with warn_about_normalization. * makeucnid.c: New. * ucnid.h: Replace. * ucnid.pl: Remove. * ucnid.tab: Make appropriate for input to makeucnid.c. Remove comments about obsolete version of C++. * include/cpplib.h (enum cpp_normalize_level): New. (struct cpp_options): Add warn_normalize field. Index: gcc/testsuite/ChangeLog 2005-03-14 Geoffrey Keating <geoffk@apple.com> * gcc.dg/cpp/normalize-1.c: New. * gcc.dg/cpp/normalize-2.c: New. * gcc.dg/cpp/normalize-3.c: New. * gcc.dg/cpp/normalize-4.c: New. * gcc.dg/cpp/ucnid-4.c: New. * gcc.dg/cpp/ucnid-5.c: New. * g++.dg/cpp/normalize-1.C: New. * g++.dg/cpp/ucnid-1.C: New. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@96459 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libcpp/lex.c')
-rw-r--r--libcpp/lex.c83
1 files changed, 64 insertions, 19 deletions
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 8398c7ca061..9bcb91c0472 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -53,9 +53,6 @@ static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
static int skip_line_comment (cpp_reader *);
static void skip_whitespace (cpp_reader *, cppchar_t);
-static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *, bool);
-static void lex_number (cpp_reader *, cpp_string *);
-static bool forms_identifier_p (cpp_reader *, int);
static void lex_string (cpp_reader *, cpp_token *, const uchar *);
static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
static void create_literal (cpp_reader *, cpp_token *, const uchar *,
@@ -430,10 +427,36 @@ name_p (cpp_reader *pfile, const cpp_string *string)
return 1;
}
+/* After parsing an identifier or other sequence, produce a warning about
+ sequences not in NFC/NFKC. */
+static void
+warn_about_normalization (cpp_reader *pfile,
+ const cpp_token *token,
+ const struct normalize_state *s)
+{
+ if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
+ && !pfile->state.skipping)
+ {
+ /* Make sure that the token is printed using UCNs, even
+ if we'd otherwise happily print UTF-8. */
+ unsigned char *buf = xmalloc (cpp_token_len (token));
+ size_t sz;
+
+ sz = cpp_spell_token (pfile, token, buf, false) - buf;
+ if (NORMALIZE_STATE_RESULT (s) == normalized_C)
+ cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
+ "`%.*s' is not in NFKC", sz, buf);
+ else
+ cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
+ "`%.*s' is not in NFC", sz, buf);
+ }
+}
+
/* Returns TRUE if the sequence starting at buffer->cur is invalid in
an identifier. FIRST is TRUE if this starts an identifier. */
static bool
-forms_identifier_p (cpp_reader *pfile, int first)
+forms_identifier_p (cpp_reader *pfile, int first,
+ struct normalize_state *state)
{
cpp_buffer *buffer = pfile->buffer;
@@ -457,7 +480,8 @@ forms_identifier_p (cpp_reader *pfile, int first)
&& (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
{
buffer->cur += 2;
- if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
+ if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
+ state))
return true;
buffer->cur -= 2;
}
@@ -467,7 +491,8 @@ forms_identifier_p (cpp_reader *pfile, int first)
/* Lex an identifier starting at BUFFER->CUR - 1. */
static cpp_hashnode *
-lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn)
+lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
+ struct normalize_state *nst)
{
cpp_hashnode *result;
const uchar *cur;
@@ -482,13 +507,16 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn)
cur++;
}
pfile->buffer->cur = cur;
- if (starts_ucn || forms_identifier_p (pfile, false))
+ if (starts_ucn || forms_identifier_p (pfile, false, nst))
{
/* Slower version for identifiers containing UCNs (or $). */
do {
while (ISIDNUM (*pfile->buffer->cur))
- pfile->buffer->cur++;
- } while (forms_identifier_p (pfile, false));
+ {
+ pfile->buffer->cur++;
+ NORMALIZE_STATE_UPDATE_IDNUM (nst);
+ }
+ } while (forms_identifier_p (pfile, false, nst));
result = _cpp_interpret_identifier (pfile, base,
pfile->buffer->cur - base);
}
@@ -524,7 +552,8 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn)
/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
static void
-lex_number (cpp_reader *pfile, cpp_string *number)
+lex_number (cpp_reader *pfile, cpp_string *number,
+ struct normalize_state *nst)
{
const uchar *cur;
const uchar *base;
@@ -537,11 +566,14 @@ lex_number (cpp_reader *pfile, cpp_string *number)
/* N.B. ISIDNUM does not include $. */
while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
- cur++;
+ {
+ cur++;
+ NORMALIZE_STATE_UPDATE_IDNUM (nst);
+ }
pfile->buffer->cur = cur;
}
- while (forms_identifier_p (pfile, false));
+ while (forms_identifier_p (pfile, false, nst));
number->len = cur - base;
dest = _cpp_unaligned_alloc (pfile, number->len + 1);
@@ -897,9 +929,13 @@ _cpp_lex_direct (cpp_reader *pfile)
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
- result->type = CPP_NUMBER;
- lex_number (pfile, &result->val.str);
- break;
+ {
+ struct normalize_state nst = INITIAL_NORMALIZE_STATE;
+ result->type = CPP_NUMBER;
+ lex_number (pfile, &result->val.str, &nst);
+ warn_about_normalization (pfile, result, &nst);
+ break;
+ }
case 'L':
/* 'L' may introduce wide characters or strings. */
@@ -922,7 +958,12 @@ _cpp_lex_direct (cpp_reader *pfile)
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
result->type = CPP_NAME;
- result->val.node = lex_identifier (pfile, buffer->cur - 1, false);
+ {
+ struct normalize_state nst = INITIAL_NORMALIZE_STATE;
+ result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
+ &nst);
+ warn_about_normalization (pfile, result, &nst);
+ }
/* Convert named operators to their proper types. */
if (result->val.node->flags & NODE_OPERATOR)
@@ -1067,8 +1108,10 @@ _cpp_lex_direct (cpp_reader *pfile)
result->type = CPP_DOT;
if (ISDIGIT (*buffer->cur))
{
+ struct normalize_state nst = INITIAL_NORMALIZE_STATE;
result->type = CPP_NUMBER;
- lex_number (pfile, &result->val.str);
+ lex_number (pfile, &result->val.str, &nst);
+ warn_about_normalization (pfile, result, &nst);
}
else if (*buffer->cur == '.' && buffer->cur[1] == '.')
buffer->cur += 2, result->type = CPP_ELLIPSIS;
@@ -1151,11 +1194,13 @@ _cpp_lex_direct (cpp_reader *pfile)
case '\\':
{
const uchar *base = --buffer->cur;
+ struct normalize_state nst = INITIAL_NORMALIZE_STATE;
- if (forms_identifier_p (pfile, true))
+ if (forms_identifier_p (pfile, true, &nst))
{
result->type = CPP_NAME;
- result->val.node = lex_identifier (pfile, base, true);
+ result->val.node = lex_identifier (pfile, base, true, &nst);
+ warn_about_normalization (pfile, result, &nst);
break;
}
buffer->cur++;