From dbd933365ec780d27ab7c0dfba30dc1af1094607 Mon Sep 17 00:00:00 2001 From: Brad King Date: Mon, 14 Oct 2013 15:13:11 -0400 Subject: cmListFileLexer: Allow a leading UTF-8 Byte-Order-Mark (#11137) Teach the lexer to read a UTF-8, UTF-16 BE/LE, or UTF-32 BE/LE Byte-Order-Mark from the start of a file if any is present. Report an error on files using UTF-16 or UTF-32 and accept a UTF-8 or missing BOM. --- Source/cmListFileLexer.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'Source/cmListFileLexer.h') diff --git a/Source/cmListFileLexer.h b/Source/cmListFileLexer.h index cc78b5c2f8..719347c1ab 100644 --- a/Source/cmListFileLexer.h +++ b/Source/cmListFileLexer.h @@ -36,6 +36,17 @@ struct cmListFileLexer_Token_s int column; }; +enum cmListFileLexer_BOM_e +{ + cmListFileLexer_BOM_None, + cmListFileLexer_BOM_UTF8, + cmListFileLexer_BOM_UTF16BE, + cmListFileLexer_BOM_UTF16LE, + cmListFileLexer_BOM_UTF32BE, + cmListFileLexer_BOM_UTF32LE +}; +typedef enum cmListFileLexer_BOM_e cmListFileLexer_BOM; + typedef struct cmListFileLexer_s cmListFileLexer; #ifdef __cplusplus @@ -44,7 +55,8 @@ extern "C" #endif cmListFileLexer* cmListFileLexer_New(); -int cmListFileLexer_SetFileName(cmListFileLexer*, const char*); +int cmListFileLexer_SetFileName(cmListFileLexer*, const char*, + cmListFileLexer_BOM* bom); int cmListFileLexer_SetString(cmListFileLexer*, const char*); cmListFileLexer_Token* cmListFileLexer_Scan(cmListFileLexer*); long cmListFileLexer_GetCurrentLine(cmListFileLexer*); -- cgit v1.2.1