summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4>2008-04-21 14:02:00 +0000
committertromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4>2008-04-21 14:02:00 +0000
commitd656d07a0ea99c1a5c9c8273f2fe486381e12c15 (patch)
tree95c70d6534e91125519d9a8f2e7c2cf84de0c82b
parent7f0c1cb278a40dd00f83f7253eedf3d227c3937f (diff)
downloadgcc-d656d07a0ea99c1a5c9c8273f2fe486381e12c15.tar.gz
libcpp
PR libcpp/33415: * charset.c (_cpp_convert_input): Add buffer_start argument. Ignore UTF-8 BOM if seen. * internal.h (_cpp_convert_input): Add argument. * files.c (struct _cpp_file) <buffer_start>: New field. (destroy_cpp_file): Free buffer_start, not buffer. (_cpp_pop_file_buffer): Likewise. (read_file_guts): Update. gcc/testsuite PR libcpp/33415: * gcc.dg/cpp/pr33415.c: New file. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@134507 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.dg/cpp/pr33415.c6
-rw-r--r--libcpp/ChangeLog11
-rw-r--r--libcpp/charset.c40
-rw-r--r--libcpp/files.c20
-rw-r--r--libcpp/internal.h4
6 files changed, 69 insertions, 17 deletions
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 23c6f7e99e3..02e190f23b0 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2008-04-21 Tom Tromey <tromey@redhat.com>
+
+ PR libcpp/33415:
+ * gcc.dg/cpp/pr33415.c: New file.
+
2008-04-21 Olivier Hainque <hainque@adacore.com>
* gnat.dg/bltins.adb: New testcase.
diff --git a/gcc/testsuite/gcc.dg/cpp/pr33415.c b/gcc/testsuite/gcc.dg/cpp/pr33415.c
new file mode 100644
index 00000000000..28ffe2372e4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cpp/pr33415.c
@@ -0,0 +1,6 @@
+ /* Test case for PR 33415. Note that the first bytes of this file
+ are a UTF-8 BOM. */
+
+/* { dg-do compile } */
+
+int f(void) { return 5; }
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog
index 9eef6efb3e9..b80afd2550b 100644
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@@ -1,3 +1,14 @@
+2008-04-21 Tom Tromey <tromey@redhat.com>
+
+ PR libcpp/33415:
+ * charset.c (_cpp_convert_input): Add buffer_start argument.
+ Ignore UTF-8 BOM if seen.
+ * internal.h (_cpp_convert_input): Add argument.
+ * files.c (struct _cpp_file) <buffer_start>: New field.
+ (destroy_cpp_file): Free buffer_start, not buffer.
+ (_cpp_pop_file_buffer): Likewise.
+ (read_file_guts): Update.
+
2008-04-18 Kris Van Hees <kris.van.hees@oracle.com>
* include/cpp-id-data.h (UC): Was U, conflicts with U"..." literal.
diff --git a/libcpp/charset.c b/libcpp/charset.c
index 225cdb4915e..d70d05cc020 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -1,5 +1,5 @@
/* CPP Library - charsets
- Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006
+ Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, 2008
Free Software Foundation, Inc.
Broken out of c-lex.c Apr 2003, adding valid C99 UCN ranges.
@@ -1637,18 +1637,24 @@ _cpp_interpret_identifier (cpp_reader *pfile, const uchar *id, size_t len)
source file) from INPUT_CHARSET to the source character set. INPUT
points to the input buffer, SIZE is its allocated size, and LEN is
the length of the meaningful data within the buffer. The
- translated buffer is returned, and *ST_SIZE is set to the length of
- the meaningful data within the translated buffer.
-
- INPUT is expected to have been allocated with xmalloc. This function
- will either return INPUT, or free it and return a pointer to another
- xmalloc-allocated block of memory. */
+ translated buffer is returned, *ST_SIZE is set to the length of
+ the meaningful data within the translated buffer, and *BUFFER_START
+ is set to the start of the returned buffer. *BUFFER_START may
+ differ from the return value in the case of a BOM or other ignored
+ marker information.
+
+ INPUT is expected to have been allocated with xmalloc. This
+ function will either set *BUFFER_START to INPUT, or free it and set
+ *BUFFER_START to a pointer to another xmalloc-allocated block of
+ memory. */
uchar *
_cpp_convert_input (cpp_reader *pfile, const char *input_charset,
- uchar *input, size_t size, size_t len, off_t *st_size)
+ uchar *input, size_t size, size_t len,
+ const unsigned char **buffer_start, off_t *st_size)
{
struct cset_converter input_cset;
struct _cpp_strbuf to;
+ unsigned char *buffer;
input_cset = init_iconv_desc (pfile, SOURCE_CHARSET, input_charset);
if (input_cset.func == convert_no_conversion)
@@ -1689,8 +1695,24 @@ _cpp_convert_input (cpp_reader *pfile, const char *input_charset,
else
to.text[to.len] = '\n';
+ buffer = to.text;
*st_size = to.len;
- return to.text;
+#if HOST_CHARSET == HOST_CHARSET_ASCII
+ /* The HOST_CHARSET test just above ensures that the source charset
+ is UTF-8. So, ignore a UTF-8 BOM if we see one. Note that
+ glib'c UTF-8 iconv() provider (as of glibc 2.7) does not ignore a
+ BOM -- however, even if it did, we would still need this code due
+ to the 'convert_no_conversion' case. */
+ if (to.len >= 3 && to.text[0] == 0xef && to.text[1] == 0xbb
+ && to.text[2] == 0xbf)
+ {
+ *st_size -= 3;
+ buffer += 3;
+ }
+#endif
+
+ *buffer_start = to.text;
+ return buffer;
}
/* Decide on the default encoding to assume for input files. */
diff --git a/libcpp/files.c b/libcpp/files.c
index 2bc3a801e35..1adc58d88a8 100644
--- a/libcpp/files.c
+++ b/libcpp/files.c
@@ -74,6 +74,10 @@ struct _cpp_file
/* The contents of NAME after calling read_file(). */
const uchar *buffer;
+ /* Pointer to the real start of BUFFER. read_file() might increment
+ BUFFER; when freeing, this this pointer must be used instead. */
+ const uchar *buffer_start;
+
/* The macro, if any, preventing re-inclusion. */
const cpp_hashnode *cmacro;
@@ -635,8 +639,11 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file)
cpp_error (pfile, CPP_DL_WARNING,
"%s is shorter than expected", file->path);
- file->buffer = _cpp_convert_input (pfile, CPP_OPTION (pfile, input_charset),
- buf, size, total, &file->st.st_size);
+ file->buffer = _cpp_convert_input (pfile,
+ CPP_OPTION (pfile, input_charset),
+ buf, size, total,
+ &file->buffer_start,
+ &file->st.st_size);
file->buffer_valid = true;
return true;
@@ -969,8 +976,8 @@ make_cpp_file (cpp_reader *pfile, cpp_dir *dir, const char *fname)
static void
destroy_cpp_file (_cpp_file *file)
{
- if (file->buffer)
- free ((void *) file->buffer);
+ if (file->buffer_start)
+ free ((void *) file->buffer_start);
free ((void *) file->name);
free (file);
}
@@ -1302,9 +1309,10 @@ _cpp_pop_file_buffer (cpp_reader *pfile, _cpp_file *file)
/* Invalidate control macros in the #including file. */
pfile->mi_valid = false;
- if (file->buffer)
+ if (file->buffer_start)
{
- free ((void *) file->buffer);
+ free ((void *) file->buffer_start);
+ file->buffer_start = NULL;
file->buffer = NULL;
file->buffer_valid = false;
}
diff --git a/libcpp/internal.h b/libcpp/internal.h
index bf6c5f8c8d2..860fe2e53a2 100644
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -1,5 +1,5 @@
/* Part of CPP library.
- Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007
+ Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008
Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify it
@@ -644,7 +644,7 @@ extern cppchar_t _cpp_valid_ucn (cpp_reader *, const unsigned char **,
extern void _cpp_destroy_iconv (cpp_reader *);
extern unsigned char *_cpp_convert_input (cpp_reader *, const char *,
unsigned char *, size_t, size_t,
- off_t *);
+ const unsigned char **, off_t *);
extern const char *_cpp_default_encoding (void);
extern cpp_hashnode * _cpp_interpret_identifier (cpp_reader *pfile,
const unsigned char *id,