/* rclex.c -- lexer for Windows rc files parser */ /* Copyright (C) 1997-2023 Free Software Foundation, Inc. Written by Kai Tietz, Onevision. This file is part of GNU Binutils. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ /* This is a lexer used by the Windows rc file parser. It basically just recognized a bunch of keywords. */ #include "sysdep.h" #include "bfd.h" #include "bucomm.h" #include "libiberty.h" #include "safe-ctype.h" #include "windres.h" #include "rcparse.h" #include /* Whether we are in rcdata mode, in which we returns the lengths of strings. */ static int rcdata_mode; /* Whether we are suppressing lines from cpp (including windows.h or headers from your C sources may bring in externs and typedefs). When active, we return IGNORED_TOKEN, which lets us ignore these outside of resource constructs. Thus, it isn't required to protect all the non-preprocessor lines in your header files with #ifdef RC_INVOKED. It also means your RC file can't include other RC files if they're named "*.h". Sorry. Name them *.rch or whatever. */ static int suppress_cpp_data; #define IGNORE_CPP(x) (suppress_cpp_data ? IGNORED_TOKEN : (x)) /* The first filename we detect in the cpp output. We use this to tell included files from the original file. */ static char *initial_fn; /* List of allocated strings. */ struct alloc_string { struct alloc_string *next; char *s; }; static struct alloc_string *strings; struct rclex_keywords { const char *name; int tok; }; #define K(KEY) { #KEY, KEY } #define KRT(KEY) { #KEY, RT_##KEY } static const struct rclex_keywords keywds[] = { K(ACCELERATORS), K(ALT), K(ANICURSOR), K(ANIICON), K(ASCII), K(AUTO3STATE), K(AUTOCHECKBOX), K(AUTORADIOBUTTON), K(BEDIT), { "BEGIN", BEG }, K(BITMAP), K(BLOCK), K(BUTTON), K(CAPTION), K(CHARACTERISTICS), K(CHECKBOX), K(CHECKED), K(CLASS), K(COMBOBOX), K(CONTROL), K(CTEXT), K(CURSOR), K(DEFPUSHBUTTON), K(DIALOG), K(DIALOGEX), K(DISCARDABLE), K(DLGINCLUDE), K(DLGINIT), K(EDITTEXT), K(END), K(EXSTYLE), K(FILEFLAGS), K(FILEFLAGSMASK), K(FILEOS), K(FILESUBTYPE), K(FILETYPE), K(FILEVERSION), K(FIXED), K(FONT), K(FONTDIR), K(GRAYED), KRT(GROUP_CURSOR), KRT(GROUP_ICON), K(GROUPBOX), K(HEDIT), K(HELP), K(HTML), K(ICON), K(IEDIT), K(IMPURE), K(INACTIVE), K(LANGUAGE), K(LISTBOX), K(LOADONCALL), K(LTEXT), K(MANIFEST), K(MENU), K(MENUBARBREAK), K(MENUBREAK), K(MENUEX), K(MENUITEM), K(MESSAGETABLE), K(MOVEABLE), K(NOINVERT), K(NOT), K(OWNERDRAW), K(PLUGPLAY), K(POPUP), K(PRELOAD), K(PRODUCTVERSION), K(PURE), K(PUSHBOX), K(PUSHBUTTON), K(RADIOBUTTON), K(RCDATA), K(RTEXT), K(SCROLLBAR), K(SEPARATOR), K(SHIFT), K(STATE3), K(STRINGTABLE), K(STYLE), K(TOOLBAR), K(USERBUTTON), K(VALUE), { "VERSION", VERSIONK }, K(VERSIONINFO), K(VIRTKEY), K(VXD), { NULL, 0 }, }; /* External input stream from resrc */ extern FILE *cpp_pipe; /* Lexical scanner helpers. */ static int rclex_lastch = -1; static size_t rclex_tok_max = 0; static size_t rclex_tok_pos = 0; static char *rclex_tok = NULL; static int rclex_translatekeyword (const char *key) { if (key && ISUPPER (key[0])) { const struct rclex_keywords *kw = &keywds[0]; do { if (! strcmp (kw->name, key)) return kw->tok; ++kw; } while (kw->name != NULL); } return STRING; } /* Handle a C preprocessor line. */ static void cpp_line (void) { const char *s = rclex_tok; int line; char *send, *fn; size_t len, mlen; ++s; while (ISSPACE (*s)) ++s; /* Check for #pragma code_page ( DEFAULT | ). */ len = strlen (s); mlen = strlen ("pragma"); if (len > mlen && memcmp (s, "pragma", mlen) == 0 && ISSPACE (s[mlen])) { const char *end; s += mlen + 1; while (ISSPACE (*s)) ++s; len = strlen (s); mlen = strlen ("code_page"); if (len <= mlen || memcmp (s, "code_page", mlen) != 0) /* FIXME: We ought to issue a warning message about an unrecognised pragma. */ return; s += mlen; while (ISSPACE (*s)) ++s; if (*s != '(') /* FIXME: We ought to issue an error message about a malformed pragma. */ return; ++s; while (ISSPACE (*s)) ++s; if (*s == 0 || (end = strchr (s, ')')) == NULL) /* FIXME: We ought to issue an error message about a malformed pragma. */ return; len = (size_t) (end - s); fn = xmalloc (len + 1); if (len) memcpy (fn, s, len); fn[len] = 0; while (len > 0 && (fn[len - 1] > 0 && fn[len - 1] <= 0x20)) fn[--len] = 0; if (! len || (len == strlen ("DEFAULT") && strcasecmp (fn, "DEFAULT") == 0)) wind_current_codepage = wind_default_codepage; else if (len > 0) { rc_uint_type ncp; if (fn[0] == '0' && (fn[1] == 'x' || fn[1] == 'X')) ncp = (rc_uint_type) strtol (fn + 2, NULL, 16); else ncp = (rc_uint_type) strtol (fn, NULL, 10); if (ncp == CP_UTF16 || ! unicode_is_valid_codepage (ncp)) fatal (_("invalid value specified for pragma code_page.\n")); wind_current_codepage = ncp; } free (fn); return; } line = strtol (s, &send, 0); if (*send != '\0' && ! ISSPACE (*send)) return; /* Subtract 1 because we are about to count the newline. */ rc_lineno = line - 1; s = send; while (ISSPACE (*s)) ++s; if (*s != '"') return; ++s; send = strchr (s, '"'); if (send == NULL) return; fn = xmalloc (send - s + 1); strncpy (fn, s, send - s); fn[send - s] = '\0'; free (rc_filename); rc_filename = fn; if (! initial_fn) { initial_fn = xmalloc (strlen (fn) + 1); strcpy (initial_fn, fn); } /* Allow the initial file, regardless of name. Suppress all other files if they end in ".h" (this allows included "*.rc"). */ if (strcmp (initial_fn, fn) == 0 || strcmp (fn + strlen (fn) - 2, ".h") != 0) suppress_cpp_data = 0; else suppress_cpp_data = 1; } /* Allocate a string of a given length. */ static char * get_string (int len) { struct alloc_string *as; as = xmalloc (sizeof *as); as->s = xmalloc (len); as->next = strings; strings = as; return as->s; } /* Handle a quoted string. The quotes are stripped. A pair of quotes in a string are turned into a single quote. Adjacent strings are merged separated by whitespace are merged, as in C. */ static char * handle_quotes (rc_uint_type *len) { const char *input = rclex_tok; char *ret, *s; const char *t; int ch; int num_xdigits; ret = get_string (strlen (input) + 1); s = ret; t = input; if (*t == '"') ++t; while (*t != '\0') { if (*t == '\\') { ++t; switch (*t) { case '\0': rcparse_warning ("backslash at end of string"); break; case '\"': rcparse_warning ("use \"\" to put \" in a string"); *s++ = '"'; ++t; break; case 'a': *s++ = ESCAPE_B; /* Strange, but true... */ ++t; break; case 'b': *s++ = ESCAPE_B; ++t; break; case 'f': *s++ = ESCAPE_F; ++t; break; case 'n': *s++ = ESCAPE_N; ++t; break; case 'r': *s++ = ESCAPE_R; ++t; break; case 't': *s++ = ESCAPE_T; ++t; break; case 'v': *s++ = ESCAPE_V; ++t; break; case '\\': *s++ = *t++; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': ch = *t - '0'; ++t; if (*t >= '0' && *t <= '7') { ch = (ch << 3) | (*t - '0'); ++t; if (*t >= '0' && *t <= '7') { ch = (ch << 3) | (*t - '0'); ++t; } } *s++ = ch; break; case 'x': case 'X': ++t; ch = 0; /* We only handle single byte chars here. Make sure we finish an escape sequence like "/xB0ABC" after the first two digits. */ num_xdigits = 2; while (num_xdigits--) { if (*t >= '0' && *t <= '9') ch = (ch << 4) | (*t - '0'); else if (*t >= 'a' && *t <= 'f') ch = (ch << 4) | (*t - 'a' + 10); else if (*t >= 'A' && *t <= 'F') ch = (ch << 4) | (*t - 'A' + 10); else break; ++t; } *s++ = ch; break; default: rcparse_warning ("unrecognized escape sequence"); *s++ = '\\'; *s++ = *t++; break; } } else if (*t != '"') *s++ = *t++; else if (t[1] == '\0') break; else if (t[1] == '"') { *s++ = '"'; t += 2; } else { ++t; if (! ISSPACE (*t)) rcparse_warning ("unexpected character after '\"'"); while (ISSPACE (*t)) { if ((*t) == '\n') ++rc_lineno; ++t; } if (*t == '\0') break; assert (*t == '"'); ++t; } } *s = '\0'; *len = s - ret; return ret; } /* Allocate a unicode string of a given length. */ static unichar * get_unistring (int len) { return (unichar *) get_string (len * sizeof (unichar)); } /* Handle a quoted unicode string. The quotes are stripped. A pair of quotes in a string are turned into a single quote. Adjacent strings are merged separated by whitespace are merged, as in C. */ static unichar * handle_uniquotes (rc_uint_type *len) { const char *input = rclex_tok; unichar *ret, *s; const char *t; int ch; int num_xdigits; ret = get_unistring (strlen (input) + 1); s = ret; t = input; if ((*t == 'L' || *t == 'l') && t[1] == '"') t += 2; else if (*t == '"') ++t; while (*t != '\0') { if (*t == '\\') { ++t; switch (*t) { case '\0': rcparse_warning ("backslash at end of string"); break; case '\"': rcparse_warning ("use \"\" to put \" in a string"); break; case 'a': *s++ = ESCAPE_B; /* Strange, but true... */ ++t; break; case 'b': *s++ = ESCAPE_B; ++t; break; case 'f': *s++ = ESCAPE_F; ++t; break; case 'n': *s++ = ESCAPE_N; ++t; break; case 'r': *s++ = ESCAPE_R; ++t; break; case 't': *s++ = ESCAPE_T; ++t; break; case 'v': *s++ = ESCAPE_V; ++t; break; case '\\': *s++ = (unichar) *t++; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': ch = *t - '0'; ++t; if (*t >= '0' && *t <= '7') { ch = (ch << 3) | (*t - '0'); ++t; if (*t >= '0' && *t <= '7') { ch = (ch << 3) | (*t - '0'); ++t; } } *s++ = (unichar) ch; break; case 'x': case 'X': ++t; ch = 0; /* We only handle two byte chars here. Make sure we finish an escape sequence like "/xB0ABC" after the first two digits. */ num_xdigits = 4; while (num_xdigits--) { if (*t >= '0' && *t <= '9') ch = (ch << 4) | (*t - '0'); else if (*t >= 'a' && *t <= 'f') ch = (ch << 4) | (*t - 'a' + 10); else if (*t >= 'A' && *t <= 'F') ch = (ch << 4) | (*t - 'A' + 10); else break; ++t; } *s++ = (unichar) ch; break; default: rcparse_warning ("unrecognized escape sequence"); *s++ = '\\'; *s++ = (unichar) *t++; break; } } else if (*t != '"') *s++ = (unichar) *t++; else if (t[1] == '\0') break; else if (t[1] == '"') { *s++ = '"'; t += 2; } else { ++t; assert (ISSPACE (*t)); while (ISSPACE (*t)) { if ((*t) == '\n') ++rc_lineno; ++t; } if (*t == '\0') break; assert (*t == '"'); ++t; } } *s = '\0'; *len = s - ret; return ret; } /* Discard all the strings we have allocated. The parser calls this when it no longer needs them. */ void rcparse_discard_strings (void) { struct alloc_string *as; as = strings; while (as != NULL) { struct alloc_string *n; free (as->s); n = as->next; free (as); as = n; } strings = NULL; } /* Enter rcdata mode. */ void rcparse_rcdata (void) { rcdata_mode = 1; } /* Go back to normal mode from rcdata mode. */ void rcparse_normal (void) { rcdata_mode = 0; } static void rclex_tok_add_char (int ch) { if (! rclex_tok || rclex_tok_max <= rclex_tok_pos) { char *h = xmalloc (rclex_tok_max + 9); if (! h) abort (); if (rclex_tok) { memcpy (h, rclex_tok, rclex_tok_pos + 1); free (rclex_tok); } else rclex_tok_pos = 0; rclex_tok_max += 8; rclex_tok = h; } if (ch != -1) rclex_tok[rclex_tok_pos++] = (char) ch; rclex_tok[rclex_tok_pos] = 0; } static int rclex_readch (void) { int r = -1; if ((r = rclex_lastch) != -1) rclex_lastch = -1; else { char ch; do { if (! cpp_pipe || feof (cpp_pipe) || fread (&ch, 1, 1,cpp_pipe) != 1) break; r = ((int) ch) & 0xff; } while (r == 0 || r == '\r'); } rclex_tok_add_char (r); return r; } static int rclex_peekch (void) { int r; if ((r = rclex_lastch) == -1) { if ((r = rclex_readch ()) != -1) { rclex_lastch = r; if (rclex_tok_pos > 0) rclex_tok[--rclex_tok_pos] = 0; } } return r; } static void rclex_string (void) { int c; while ((c = rclex_peekch ()) != -1) { if (c == '\n') break; if (c == '\\') { rclex_readch (); if ((c = rclex_peekch ()) == -1 || c == '\n') break; rclex_readch (); } else if (rclex_readch () == '"') { /* PR 6714 Skip any whitespace after the end of the double quotes. */ do { c = rclex_peekch (); if (ISSPACE (c)) rclex_readch (); else c = -1; } while (c != -1); if (rclex_peekch () == '"') rclex_readch (); else break; } } } static rc_uint_type read_digit (int ch) { rc_uint_type base = 10; rc_uint_type ret, val; int warned = 0; ret = 0; if (ch == '0') { base = 8; switch (rclex_peekch ()) { case 'o': case 'O': rclex_readch (); base = 8; break; case 'x': case 'X': rclex_readch (); base = 16; break; } } else ret = (rc_uint_type) (ch - '0'); while ((ch = rclex_peekch ()) != -1) { if (ISDIGIT (ch)) val = (rc_uint_type) (ch - '0'); else if (ch >= 'a' && ch <= 'f') val = (rc_uint_type) ((ch - 'a') + 10); else if (ch >= 'A' && ch <= 'F') val = (rc_uint_type) ((ch - 'A') + 10); else break; rclex_readch (); if (! warned && val >= base) { warned = 1; rcparse_warning ("digit exceeds base"); } ret *= base; ret += val; } return ret; } /* yyparser entry method. */ int yylex (void) { char *s; unichar *us; rc_uint_type length; int ch; /* Make sure that rclex_tok is initialized. */ if (! rclex_tok) rclex_tok_add_char (-1); do { do { /* Clear token. */ rclex_tok_pos = 0; rclex_tok[0] = 0; if ((ch = rclex_readch ()) == -1) return -1; if (ch == '\n') ++rc_lineno; } while (ch <= 0x20); switch (ch) { case '#': while ((ch = rclex_peekch ()) != -1 && ch != '\n') rclex_readch (); cpp_line (); ch = IGNORED_TOKEN; break; case '{': ch = IGNORE_CPP (BEG); break; case '}': ch = IGNORE_CPP (END); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': yylval.i.val = read_digit (ch); yylval.i.dword = 0; switch (rclex_peekch ()) { case 'l': case 'L': rclex_readch (); yylval.i.dword = 1; break; } ch = IGNORE_CPP (NUMBER); break; case '"': rclex_string (); ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDSTRING : SIZEDSTRING)); if (ch == IGNORED_TOKEN) break; s = handle_quotes (&length); if (! rcdata_mode) yylval.s = s; else { yylval.ss.length = length; yylval.ss.s = s; } break; case 'L': case 'l': if (rclex_peekch () == '"') { rclex_readch (); rclex_string (); ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDUNISTRING : SIZEDUNISTRING)); if (ch == IGNORED_TOKEN) break; us = handle_uniquotes (&length); if (! rcdata_mode) yylval.uni = us; else { yylval.suni.length = length; yylval.suni.s = us; } break; } /* Fall through. */ default: if (ISIDST (ch) || ch=='$') { while ((ch = rclex_peekch ()) != -1 && (ISIDNUM (ch) || ch == '$' || ch == '.' || ch == ':' || ch == '\\' || ch == '/' || ch == '_' || ch == '-') ) rclex_readch (); ch = IGNORE_CPP (rclex_translatekeyword (rclex_tok)); if (ch == STRING) { s = get_string (strlen (rclex_tok) + 1); strcpy (s, rclex_tok); yylval.s = s; } else if (ch == BLOCK) { const char *hs = NULL; switch (yylex ()) { case STRING: case QUOTEDSTRING: hs = yylval.s; break; case SIZEDSTRING: hs = yylval.s = yylval.ss.s; break; } if (! hs) { rcparse_warning ("BLOCK expects a string as argument."); ch = IGNORED_TOKEN; } else if (! strcmp (hs, "StringFileInfo")) ch = BLOCKSTRINGFILEINFO; else if (! strcmp (hs, "VarFileInfo")) ch = BLOCKVARFILEINFO; } break; } ch = IGNORE_CPP (ch); break; } } while (ch == IGNORED_TOKEN); return ch; }