diff options
| author | Chet Ramey <chet.ramey@case.edu> | 2018-05-22 16:51:07 -0400 |
|---|---|---|
| committer | Chet Ramey <chet.ramey@case.edu> | 2018-05-22 16:51:07 -0400 |
| commit | 9a51695bed07d37086c352372ac69d0a30039a6b (patch) | |
| tree | 4d0f57cba0dd1ec7a75aae56e18cfe9997d0b7a7 /parse.y | |
| parent | 7de27456f6494f5f9c11ea1c19024d0024f31112 (diff) | |
| download | bash-5.0-alpha.tar.gz | |
bash-5.0-alpha releasebash-5.0-alpha
Diffstat (limited to 'parse.y')
| -rw-r--r-- | parse.y | 444 |
1 files changed, 341 insertions, 103 deletions
@@ -1,6 +1,6 @@ /* parse.y - Yacc grammar for bash. */ -/* Copyright (C) 1989-2015 Free Software Foundation, Inc. +/* Copyright (C) 1989-2017 Free Software Foundation, Inc. This file is part of GNU Bash, the Bourne Again SHell. @@ -45,6 +45,7 @@ #define NEED_STRFTIME_DECL /* used in externs.h */ #include "shell.h" +#include "execute_cmd.h" #include "typemax.h" /* SIZE_MAX if needed */ #include "trap.h" #include "flags.h" @@ -94,6 +95,8 @@ typedef void *alias_t; #define RE_READ_TOKEN -99 #define NO_EXPANSION -100 +#define END_ALIAS -2 + #ifdef DEBUG # define YYDEBUG 1 #else @@ -115,24 +118,12 @@ typedef void *alias_t; extern int extended_glob; #endif -extern int eof_encountered; -extern int no_line_editing, running_under_emacs; -extern int current_command_number; -extern int sourcelevel, parse_and_execute_level; -extern int posixly_correct; -extern int last_command_exit_value; -extern pid_t last_command_subst_pid; -extern char *shell_name, *current_host_name; -extern char *dist_version; -extern int patch_level; extern int dump_translatable_strings, dump_po_strings; -extern sh_builtin_func_t *last_shell_builtin, *this_shell_builtin; -extern int here_doc_first_line; -#if defined (BUFFERED_INPUT) -extern int bash_input_fd_changed; -#endif +#if !defined (errno) extern int errno; +#endif + /* **************************************************************** */ /* */ /* "Forward" declarations */ @@ -218,6 +209,7 @@ static void print_prompt __P((void)); #if defined (HANDLE_MULTIBYTE) static void set_line_mbstate __P((void)); static char *shell_input_line_property = NULL; +static size_t shell_input_line_propsize = 0; #else # define set_line_mbstate() #endif @@ -328,6 +320,9 @@ static WORD_DESC *word_desc_to_read; static REDIRECTEE source; static REDIRECTEE redir; + +static FILE *yyoutstream; +static FILE *yyerrstream; %} %union { @@ -418,6 +413,22 @@ inputunit: simple_list simple_list_terminator YYABORT; } } + | error yacc_EOF + { + /* EOF after an error. Do ignoreeof or not. Really + only interesting in non-interactive shells */ + global_command = (COMMAND *)NULL; + last_command_exit_value = 1; + handle_eof_input_unit (); + if (interactive && parse_and_execute_level == 0) + { + YYACCEPT; + } + else + { + YYABORT; + } + } | yacc_EOF { /* Case of EOF seen by itself. Do ignoreeof or @@ -744,14 +755,14 @@ command: simple_command COMMAND *tc; tc = $1; - if (tc->redirects) + if (tc && tc->redirects) { register REDIRECT *t; for (t = tc->redirects; t->next; t = t->next) ; t->next = $2; } - else + else if (tc) tc->redirects = $2; $$ = $1; } @@ -830,21 +841,25 @@ for_command: FOR WORD newline_list DO compound_list DONE arith_for_command: FOR ARITH_FOR_EXPRS list_terminator newline_list DO compound_list DONE { $$ = make_arith_for_command ($2, $6, arith_for_lineno); + if ($$ == 0) YYERROR; if (word_top > 0) word_top--; } | FOR ARITH_FOR_EXPRS list_terminator newline_list '{' compound_list '}' { $$ = make_arith_for_command ($2, $6, arith_for_lineno); + if ($$ == 0) YYERROR; if (word_top > 0) word_top--; } | FOR ARITH_FOR_EXPRS DO compound_list DONE { $$ = make_arith_for_command ($2, $4, arith_for_lineno); + if ($$ == 0) YYERROR; if (word_top > 0) word_top--; } | FOR ARITH_FOR_EXPRS '{' compound_list '}' { $$ = make_arith_for_command ($2, $4, arith_for_lineno); + if ($$ == 0) YYERROR; if (word_top > 0) word_top--; } ; @@ -928,14 +943,14 @@ function_body: shell_command redirection. The two are semantically equivalent, though -- the only difference is in how the command printing code displays the redirections. */ - if (tc->redirects) + if (tc && tc->redirects) { register REDIRECT *t; for (t = tc->redirects; t->next; t = t->next) ; t->next = $2; } - else + else if (tc) tc->redirects = $2; $$ = $1; } @@ -958,14 +973,14 @@ coproc: COPROC shell_command COMMAND *tc; tc = $2; - if (tc->redirects) + if (tc && tc->redirects) { register REDIRECT *t; for (t = tc->redirects; t->next; t = t->next) ; t->next = $3; } - else + else if (tc) tc->redirects = $3; $$ = make_coproc_command ("COPROC", $2); $$->flags |= CMD_WANT_SUBSHELL|CMD_COPROC_SUBSHELL; @@ -980,14 +995,14 @@ coproc: COPROC shell_command COMMAND *tc; tc = $3; - if (tc->redirects) + if (tc && tc->redirects) { register REDIRECT *t; for (t = tc->redirects; t->next; t = t->next) ; t->next = $4; } - else + else if (tc) tc->redirects = $4; $$ = make_coproc_command ($2->word, $3); $$->flags |= CMD_WANT_SUBSHELL|CMD_COPROC_SUBSHELL; @@ -1230,6 +1245,7 @@ pipeline_command: pipeline token_to_read = '\n'; else if ($2 == ';') token_to_read = ';'; + parser_state &= ~PST_REDIRLIST; /* make_simple_command sets this */ } | BANG list_terminator { @@ -1250,6 +1266,7 @@ pipeline_command: pipeline token_to_read = '\n'; if ($2 == ';') token_to_read = ';'; + parser_state &= ~PST_REDIRLIST; /* make_simple_command sets this */ } ; @@ -1316,6 +1333,8 @@ debug_parser (i) { #if YYDEBUG != 0 yydebug = i; + yyoutstream = stdout; + yyerrstream = stderr; #endif } #endif @@ -1453,6 +1472,7 @@ yy_readline_get () old_sigint = (SigHandler *)set_signal_handler (SIGINT, sigint_sighandler); } + sh_unset_nodelay_mode (fileno (rl_instream)); /* just in case */ current_readline_line = readline (current_readline_prompt ? current_readline_prompt : ""); @@ -1615,19 +1635,9 @@ yy_stream_get () result = EOF; if (bash_input.location.file) { -#if 0 - if (interactive) - interrupt_immediately++; -#endif - /* XXX - don't need terminate_immediately; getc_with_restart checks for terminating signals itself if read returns < 0 */ result = getc_with_restart (bash_input.location.file); - -#if 0 - if (interactive) - interrupt_immediately--; -#endif } return (result); } @@ -1961,6 +1971,23 @@ parser_restore_alias () #endif } +#if defined (ALIAS) +/* Before freeing AP, make sure that there aren't any cases of pointer + aliasing that could cause us to reference freed memory later on. */ +void +clear_string_list_expander (ap) + alias_t *ap; +{ + register STRING_SAVER *t; + + for (t = pushed_string_list; t; t = t->next) + { + if (t->expander && t->expander == ap) + t->expander = 0; + } +} +#endif + void clear_shell_input_line () { @@ -2014,7 +2041,8 @@ read_a_line (remove_quoted_newline) c = '\n'; } - /* `+2' in case the final character in the buffer is a newline. */ + /* `+2' in case the final character in the buffer is a newline or we + have to handle CTLESC or CTLNUL. */ RESIZE_MALLOCED_BUFFER (line_buffer, indx, 2, buffer_size, 128); /* IF REMOVE_QUOTED_NEWLINES is non-zero, we are reading a @@ -2045,7 +2073,14 @@ read_a_line (remove_quoted_newline) } } else - line_buffer[indx++] = c; + { + /* remove_quoted_newline is non-zero if the here-document delimiter + is unquoted. In this case, we will be expanding the lines and + need to make sure CTLESC and CTLNUL in the input are quoted. */ + if (remove_quoted_newline && (c == CTLESC || c == CTLNUL)) + line_buffer[indx++] = CTLESC; + line_buffer[indx++] = c; + } if (c == '\n') { @@ -2306,6 +2341,16 @@ shell_getc (remove_quoted_newline) #if 0 internal_warning ("shell_getc: ignored null byte in input"); #endif + /* If we get EOS while parsing a string, treat it as EOF so we + don't just keep looping. Happens very rarely */ + if (bash_input.type == st_string) + { + if (i == 0) + shell_input_line_terminator = EOF; + shell_input_line[i] = '\0'; + c = EOF; + break; + } continue; } @@ -2484,6 +2529,30 @@ next_alias_char: parsing an alias, we have just saved one (push_string, when called by the parse_dparen code) In this case, just go on as well. The PSH_SOURCE case is handled below. */ + + /* If we're at the end of an alias expansion add a space to make sure that + the alias remains marked as being in use while we expand its last word. + This makes sure that pop_string doesn't mark the alias as not in use + before the string resulting from the alias expansion is tokenized and + checked for alias expansion, preventing recursion. At this point, the + last character in shell_input_line is the last character of the alias + expansion. We test that last character to determine whether or not to + return the space that will delimit the token and postpone the pop_string. + This set of conditions duplicates what used to be in mk_alexpansion () + below, with the addition that we don't add a space if we're currently + reading a quoted string. */ +#ifndef OLD_ALIAS_HACK + if (uc == 0 && pushed_string_list && pushed_string_list->flags != PSH_SOURCE && + shell_input_line_index > 0 && + shell_input_line[shell_input_line_index-1] != ' ' && + shell_input_line[shell_input_line_index-1] != '\n' && + shellmeta (shell_input_line[shell_input_line_index-1]) == 0 && + (current_delimiter (dstack) != '\'' && current_delimiter (dstack) != '"')) + { + return ' '; /* END_ALIAS */ + } +#endif + pop_alias: if (uc == 0 && pushed_string_list && pushed_string_list->flags != PSH_SOURCE) { @@ -2502,10 +2571,9 @@ pop_alias: /* What do we do here if we're expanding an alias whose definition includes an escaped newline? If that's the last character in the alias expansion, we just pop the pushed string list (recall that - we inhibit the appending of a space in mk_alexpansion() if newline - is the last character). If it's not the last character, we need - to consume the quoted newline and move to the next character in - the expansion. */ + we inhibit the appending of a space if newline is the last + character). If it's not the last character, we need to consume the + quoted newline and move to the next character in the expansion. */ #if defined (ALIAS) if (expanding_alias () && shell_input_line[shell_input_line_index+1] == '\0') { @@ -2570,7 +2638,7 @@ parser_remaining_input () { if (shell_input_line == 0) return 0; - if (shell_input_line_index < 0 || shell_input_line_index >= shell_input_line_len) + if ((int)shell_input_line_index < 0 || shell_input_line_index >= shell_input_line_len) return ""; /* XXX */ return (shell_input_line + shell_input_line_index); } @@ -2680,7 +2748,7 @@ yylex () if (bash_input.type == st_string) rewind_input_string (); } - parser_state &= ~PST_EOFTOKEN; + parser_state &= ~PST_EOFTOKEN; /* ??? */ return (current_token); } @@ -2689,6 +2757,10 @@ yylex () which allow ESAC to be the next one read. */ static int esacs_needed_count; +/* When non-zero, we can read IN as an acceptable token, regardless of how + many newlines we read. */ +static int expecting_in_token; + static void push_heredoc (r) REDIRECT *r; @@ -2809,10 +2881,15 @@ mk_alexpansion (s) l = strlen (s); r = xmalloc (l + 2); strcpy (r, s); +#ifdef OLD_ALIAS_HACK /* If the last character in the alias is a newline, don't add a trailing space to the expansion. Works with shell_getc above. */ - if (r[l - 1] != ' ' && r[l - 1] != '\n' && shellmeta(r[l - 1]) == 0) + /* Need to do something about the case where the alias expansion contains + an unmatched quoted string, since appending this space affects the + subsequent output. */ + if (l > 0 && r[l - 1] != ' ' && r[l - 1] != '\n' && shellmeta(r[l - 1]) == 0) r[l++] = ' '; +#endif r[l] = '\0'; return r; } @@ -2833,10 +2910,14 @@ alias_expand_token (tokstr) if (ap && (ap->flags & AL_BEINGEXPANDED)) return (NO_EXPANSION); +#ifdef OLD_ALIAS_HACK /* mk_alexpansion puts an extra space on the end of the alias expansion, - so the lookahead by the parser works right. If this gets changed, - make sure the code in shell_getc that deals with reaching the end of - an expanded alias is changed with it. */ + so the lookahead by the parser works right (the alias needs to remain + `in use' while parsing its last word to avoid alias recursion for + something like "alias echo=echo"). If this gets changed, make sure + the code in shell_getc that deals with reaching the end of an + expanded alias is changed with it. */ +#endif expanded = ap ? mk_alexpansion (ap->value) : (char *)NULL; if (expanded) @@ -2934,12 +3015,9 @@ static int special_case_tokens (tokstr) char *tokstr; { + /* Posix grammar rule 6 */ if ((last_read_token == WORD) && -#if defined (SELECT_COMMAND) ((token_before_that == FOR) || (token_before_that == CASE) || (token_before_that == SELECT)) && -#else - ((token_before_that == FOR) || (token_before_that == CASE)) && -#endif (tokstr[0] == 'i' && tokstr[1] == 'n' && tokstr[2] == 0)) { if (token_before_that == CASE) @@ -2947,9 +3025,34 @@ special_case_tokens (tokstr) parser_state |= PST_CASEPAT; esacs_needed_count++; } + if (expecting_in_token) + expecting_in_token--; + return (IN); + } + + /* bash-5.0: leaving above code intact for now, but it should eventually be + removed in favor of this clause. */ + /* Posix grammar rule 6 */ + if (expecting_in_token && (last_read_token == WORD || last_read_token == '\n') && + (tokstr[0] == 'i' && tokstr[1] == 'n' && tokstr[2] == 0)) + { + if (parser_state & PST_CASESTMT) + { + parser_state |= PST_CASEPAT; + esacs_needed_count++; + } + expecting_in_token--; return (IN); } + /* Posix grammar rule 6, third word in FOR: for i; do command-list; done */ + else if (expecting_in_token && (last_read_token == '\n' || last_read_token == ';') && + (tokstr[0] == 'd' && tokstr[1] == 'o' && tokstr[2] == '\0')) + { + expecting_in_token--; + return (DO); + } + /* for i do; command-list; done */ if (last_read_token == WORD && #if defined (SELECT_COMMAND) (token_before_that == FOR || token_before_that == SELECT) && @@ -2957,7 +3060,11 @@ special_case_tokens (tokstr) (token_before_that == FOR) && #endif (tokstr[0] == 'd' && tokstr[1] == 'o' && tokstr[2] == '\0')) - return (DO); + { + if (expecting_in_token) + expecting_in_token--; + return (DO); + } /* Ditto for ESAC in the CASE case. Specifically, this handles "case word in esac", which is a legal @@ -2967,9 +3074,9 @@ special_case_tokens (tokstr) the designers disagree. */ if (esacs_needed_count) { - esacs_needed_count--; - if (STREQ (tokstr, "esac")) + if (last_read_token == IN && STREQ (tokstr, "esac")) { + esacs_needed_count--; parser_state &= ~PST_CASEPAT; return (ESAC); } @@ -3121,6 +3228,17 @@ read_token (command) return (yacc_EOF); } + /* If we hit the end of the string and we're not expanding an alias (e.g., + we are eval'ing a string that is an incomplete command), return EOF */ + if (character == '\0' && bash_input.type == st_string && expanding_alias() == 0) + { +#if defined (DEBUG) +itrace("shell_getc: bash_input.location.string = `%s'", bash_input.location.string); +#endif + EOF_Reached = 1; + return (yacc_EOF); + } + if MBTEST(character == '#' && (!interactive || interactive_comments)) { /* A comment. Discard until EOL or EOF, and then return a newline. */ @@ -3316,18 +3434,19 @@ tokword: #define P_DOLBRACE 0x0040 /* parsing a ${...} construct */ /* Lexical state while parsing a grouping construct or $(...). */ -#define LEX_WASDOL 0x001 -#define LEX_CKCOMMENT 0x002 -#define LEX_INCOMMENT 0x004 -#define LEX_PASSNEXT 0x008 -#define LEX_RESWDOK 0x010 -#define LEX_CKCASE 0x020 -#define LEX_INCASE 0x040 -#define LEX_INHEREDOC 0x080 -#define LEX_HEREDELIM 0x100 /* reading here-doc delimiter */ -#define LEX_STRIPDOC 0x200 /* <<- strip tabs from here doc delim */ -#define LEX_QUOTEDDOC 0x400 /* here doc with quoted delim */ -#define LEX_INWORD 0x800 +#define LEX_WASDOL 0x0001 +#define LEX_CKCOMMENT 0x0002 +#define LEX_INCOMMENT 0x0004 +#define LEX_PASSNEXT 0x0008 +#define LEX_RESWDOK 0x0010 +#define LEX_CKCASE 0x0020 +#define LEX_INCASE 0x0040 +#define LEX_INHEREDOC 0x0080 +#define LEX_HEREDELIM 0x0100 /* reading here-doc delimiter */ +#define LEX_STRIPDOC 0x0200 /* <<- strip tabs from here doc delim */ +#define LEX_QUOTEDDOC 0x0400 /* here doc with quoted delim */ +#define LEX_INWORD 0x0800 +#define LEX_GTLT 0x1000 #define COMSUB_META(ch) ((ch) == ';' || (ch) == '&' || (ch) == '|') @@ -3358,7 +3477,7 @@ parse_matched_pair (qc, open, close, lenp, flags) int open, close; int *lenp, flags; { - int count, ch, tflags; + int count, ch, prevch, tflags; int nestlen, ttranslen, start_lineno; char *ret, *nestret, *ttrans; int retind, retsize, rflags; @@ -3380,8 +3499,10 @@ parse_matched_pair (qc, open, close, lenp, flags) retind = 0; start_lineno = line_number; + ch = EOF; /* just in case */ while (count) { + prevch = ch; ch = shell_getc (qc != '\'' && (tflags & (LEX_PASSNEXT)) == 0); if (ch == EOF) @@ -3532,7 +3653,7 @@ parse_matched_pair (qc, open, close, lenp, flags) { /* Translate $'...' here. */ ttrans = ansiexpand (nestret, 0, nestlen - 1, &ttranslen); - xfree (nestret); + free (nestret); /* If we're parsing a double-quoted brace expansion and we are not in a place where single quotes are treated specially, @@ -3562,7 +3683,7 @@ parse_matched_pair (qc, open, close, lenp, flags) { /* Locale expand $"..." here. */ ttrans = localeexpand (nestret, 0, nestlen - 1, start_lineno, &ttranslen); - xfree (nestret); + free (nestret); nestret = sh_mkdoublequoted (ttrans, ttranslen, 0); free (ttrans); @@ -3575,6 +3696,12 @@ parse_matched_pair (qc, open, close, lenp, flags) } else if ((flags & (P_ARRAYSUB|P_DOLBRACE)) && (tflags & LEX_WASDOL) && (ch == '(' || ch == '{' || ch == '[')) /* ) } ] */ goto parse_dollar_word; +#if defined (PROCESS_SUBSTITUTION) + /* XXX - technically this should only be recognized at the start of + a word */ + else if ((flags & (P_ARRAYSUB|P_DOLBRACE)) && (tflags & LEX_GTLT) && (ch == '(')) /* ) */ + goto parse_dollar_word; +#endif } /* Parse an old-style command substitution within double quotes as a single word. */ @@ -3606,7 +3733,13 @@ parse_dollar_word: FREE (nestret); } - if MBTEST(ch == '$') +#if defined (PROCESS_SUBSTITUTION) + if MBTEST((ch == '<' || ch == '>') && (tflags & LEX_GTLT) == 0) + tflags |= LEX_GTLT; + else + tflags &= ~LEX_GTLT; +#endif + if MBTEST(ch == '$' && (tflags & LEX_WASDOL) == 0) tflags |= LEX_WASDOL; else tflags &= ~LEX_WASDOL; @@ -3738,7 +3871,7 @@ parse_comsub (qc, open, close, lenp, flags) while (count) { comsub_readchar: - ch = shell_getc (qc != '\'' && (tflags & (LEX_INCOMMENT|LEX_PASSNEXT)) == 0); + ch = shell_getc (qc != '\'' && (tflags & (LEX_INCOMMENT|LEX_PASSNEXT|LEX_QUOTEDDOC)) == 0); if (ch == EOF) { @@ -3769,7 +3902,7 @@ eof_error: tind = lex_firstind; while ((tflags & LEX_STRIPDOC) && ret[tind] == '\t') tind++; - if (STREQN (ret + tind, heredelim, hdlen)) + if (retind-tind == hdlen && STREQN (ret + tind, heredelim, hdlen)) { tflags &= ~(LEX_STRIPDOC|LEX_INHEREDOC|LEX_QUOTEDDOC); /*itrace("parse_comsub:%d: found here doc end `%s'", line_number, ret + tind);*/ @@ -4010,11 +4143,13 @@ eof_error: tflags |= LEX_RESWDOK; lex_rwlen = 0; } - else + else if (shellmeta (ch) == 0) { tflags &= ~LEX_RESWDOK; /*itrace("parse_comsub:%d: found `%.4s', lex_reswdok -> 0", line_number, ret+retind-4);*/ } + else /* can't be in a reserved word any more */ + lex_rwlen = 0; } else if MBTEST((tflags & LEX_CKCOMMENT) && ch == '#' && (lex_rwlen == 0 || ((tflags & LEX_INWORD) && lex_wlen == 0))) ; /* don't modify LEX_RESWDOK if we're starting a comment */ @@ -4087,7 +4222,10 @@ eof_error: continue; } else - ch = peekc; /* fall through and continue XXX */ + { + shell_ungetc (peekc); /* not a here-doc, start over */ + continue; + } } else if MBTEST((tflags & LEX_CKCOMMENT) && (tflags & LEX_INCOMMENT) == 0 && ch == '#' && (((tflags & LEX_RESWDOK) && lex_rwlen == 0) || ((tflags & LEX_INWORD) && lex_wlen == 0))) { @@ -4143,7 +4281,7 @@ eof_error: { /* Translate $'...' here. */ ttrans = ansiexpand (nestret, 0, nestlen - 1, &ttranslen); - xfree (nestret); + free (nestret); if ((rflags & P_DQUOTE) == 0) { @@ -4162,7 +4300,7 @@ eof_error: { /* Locale expand $"..." here. */ ttrans = localeexpand (nestret, 0, nestlen - 1, start_lineno, &ttranslen); - xfree (nestret); + free (nestret); nestret = sh_mkdoublequoted (ttrans, ttranslen, 0); free (ttrans); @@ -4190,7 +4328,7 @@ eof_error: FREE (nestret); } - if MBTEST(ch == '$') + if MBTEST(ch == '$' && (tflags & LEX_WASDOL) == 0) tflags |= LEX_WASDOL; else tflags &= ~LEX_WASDOL; @@ -4215,12 +4353,25 @@ xparse_dolparen (base, string, indp, flags) sh_parser_state_t ps; sh_input_line_state_t ls; int orig_ind, nc, sflags, orig_eof_token; - char *ret, *s, *ep, *ostring; + char *ret, *ep, *ostring; +#if defined (ALIAS) || defined (DPAREN_ARITHMETIC) + STRING_SAVER *saved_pushed_strings; +#endif - /*yydebug = 1;*/ +/*debug_parser(1);*/ orig_ind = *indp; ostring = string; + if (*string == 0) + { + if (flags & SX_NOALLOC) + return (char *)NULL; + + ret = xmalloc (1); + ret[0] = '\0'; + return ret; + } + /*itrace("xparse_dolparen: size = %d shell_input_line = `%s'", shell_input_line_size, shell_input_line);*/ sflags = SEVAL_NONINT|SEVAL_NOHIST|SEVAL_NOFREE; if (flags & SX_NOLONGJMP) @@ -4228,29 +4379,47 @@ xparse_dolparen (base, string, indp, flags) save_parser_state (&ps); save_input_line_state (&ls); orig_eof_token = shell_eof_token; +#if defined (ALIAS) || defined (DPAREN_ARITHMETIC) + saved_pushed_strings = pushed_string_list; /* separate parsing context */ + pushed_string_list = (STRING_SAVER *)NULL; +#endif /*(*/ parser_state |= PST_CMDSUBST|PST_EOFTOKEN; /* allow instant ')' */ /*(*/ shell_eof_token = ')'; + /* Should we save and restore the bison/yacc lookahead token (yychar) here? + Or only if it's not YYEMPTY? */ + nc = parse_string (string, "command substitution", sflags, &ep); + if (current_token == shell_eof_token) + yyclearin; /* might want to clear lookahead token unconditionally */ + shell_eof_token = orig_eof_token; restore_parser_state (&ps); reset_parser (); /* reset_parser clears shell_input_line and associated variables */ restore_input_line_state (&ls); +#if defined (ALIAS) || defined (DPAREN_ARITHMETIC) + pushed_string_list = saved_pushed_strings; +#endif + token_to_read = 0; /* If parse_string returns < 0, we need to jump to top level with the - negative of the return value */ + negative of the return value. We abandon the rest of this input line + first */ if (nc < 0) - jump_to_top_level (-nc); /* XXX */ + { + clear_shell_input_line (); /* XXX */ + jump_to_top_level (-nc); /* XXX */ + } /* Need to find how many characters parse_and_execute consumed, update *indp, if flags != 0, copy the portion of the string parsed into RET - and return it. If flags & 1 (EX_NOALLOC) we can return NULL. */ + and return it. If flags & 1 (SX_NOALLOC) we can return NULL. */ /*(*/ if (ep[-1] != ')') @@ -4269,6 +4438,8 @@ xparse_dolparen (base, string, indp, flags) #if DEBUG if (base[*indp] != ')') itrace("xparse_dolparen:%d: base[%d] != RPAREN (%d), base = `%s'", line_number, *indp, base[*indp], base); + if (*indp < orig_ind) + itrace("xparse_dolparen:%d: *indp (%d) < orig_ind (%d), orig_string = `%s'", line_number, *indp, orig_ind, ostring); #endif if (flags & SX_NOALLOC) @@ -4512,7 +4683,7 @@ cond_term () if (term) term->flags |= CMD_INVERT_RETURN; } - else if (tok == WORD && yylval.word->word[0] == '-' && yylval.word->word[2] == 0 && test_unop (yylval.word->word)) + else if (tok == WORD && yylval.word->word[0] == '-' && yylval.word->word[1] && yylval.word->word[2] == 0 && test_unop (yylval.word->word)) { op = yylval.word; tok = read_token (READ); @@ -4645,20 +4816,30 @@ parse_cond_command () #if defined (ARRAY_VARS) /* When this is called, it's guaranteed that we don't care about anything - in t beyond i. We do save and restore the chars, though. */ + in t beyond i. We use a buffer with room for the characters we add just + in case assignment() ends up doing something like parsing a command + substitution that will reallocate atoken. We don't want to write beyond + the end of an allocated buffer. */ static int token_is_assignment (t, i) char *t; int i; { - unsigned char c, c1; int r; + char *atoken; - c = t[i]; c1 = t[i+1]; - t[i] = '='; t[i+1] = '\0'; - r = assignment (t, (parser_state & PST_COMPASSIGN) != 0); - t[i] = c; t[i+1] = c1; - return r; + atoken = xmalloc (i + 3); + memcpy (atoken, t, i); + atoken[i] = '='; + atoken[i+1] = '\0'; + + r = assignment (atoken, (parser_state & PST_COMPASSIGN) != 0); + + free (atoken); + + /* XXX - check that r == i to avoid returning false positive for + t containing `=' before t[i]. */ + return (r > 0 && r == i); } /* XXX - possible changes here for `+=' */ @@ -4773,7 +4954,8 @@ read_token_word (character) strcpy (token + token_index, ttok); token_index += ttoklen; all_digit_token = 0; - quoted = 1; + if (character != '`') + quoted = 1; dollar_present |= (character == '"' && strchr (ttok, '$') != 0); FREE (ttok); goto next_character; @@ -5113,6 +5295,7 @@ got_token: yylval.word = the_word; + /* should we check that quoted == 0 as well? */ if (token[0] == '{' && token[token_index-1] == '}' && (character == '<' || character == '>')) { @@ -5125,9 +5308,15 @@ got_token: #endif { strcpy (the_word->word, token+1); -/*itrace("read_token_word: returning REDIR_WORD for %s", the_word->word);*/ +/* itrace("read_token_word: returning REDIR_WORD for %s", the_word->word); */ + yylval.word = the_word; /* accommodate recursive call */ return (REDIR_WORD); } + else + /* valid_array_reference can call the parser recursively; need to + make sure that yylval.word doesn't change if we are going to + return WORD or ASSIGNMENT_WORD */ + yylval.word = the_word; } result = ((the_word->flags & (W_ASSIGNMENT|W_NOSPLIT)) == (W_ASSIGNMENT|W_NOSPLIT)) @@ -5145,6 +5334,7 @@ got_token: if (word_top < MAX_CASE_NEST) word_top++; word_lineno[word_top] = line_number; + expecting_in_token++; break; } @@ -5348,6 +5538,9 @@ history_delimiting_chars (line) return (" "); } + if (line_isblank (line)) + return (""); + return ("; "); } #endif /* HISTORY */ @@ -5418,6 +5611,31 @@ print_prompt () fflush (stderr); } +#if defined (HISTORY) + /* The history library increments the history offset as soon as it stores + the first line of a potentially multi-line command, so we compensate + here by returning one fewer when appropriate. */ +static int +prompt_history_number (pmt) + char *pmt; +{ + int ret; + + ret = history_number (); + if (ret == 1) + return ret; + + if (pmt == ps1_prompt) /* are we expanding $PS1? */ + return ret; + else if (pmt == ps2_prompt && command_oriented_history == 0) + return ret; /* not command oriented history */ + else if (pmt == ps2_prompt && command_oriented_history && current_command_first_line_saved) + return ret - 1; + else + return ret - 1; /* PS0, PS4, ${var@P}, PS2 other cases */ +} +#endif + /* Return a string which will be printed as a prompt. The string may contain special characters which are decoded as follows: @@ -5455,7 +5673,7 @@ decode_prompt_string (string) char *string; { WORD_LIST *list; - char *result, *t; + char *result, *t, *orig_string; struct dstack save_dstack; int last_exit_value, last_comsub_pid; #if defined (PROMPT_STRING_DECODE) @@ -5471,6 +5689,7 @@ decode_prompt_string (string) result = (char *)xmalloc (result_size = PROMPT_GROWTH); result[result_index = 0] = 0; temp = (char *)NULL; + orig_string = string; while (c = *string++) { @@ -5486,7 +5705,7 @@ decode_prompt_string (string) #if !defined (HISTORY) temp = savestring ("1"); #else /* HISTORY */ - temp = itos (history_number ()); + temp = itos (prompt_history_number (orig_string)); #endif /* HISTORY */ string--; /* add_string increments string again. */ goto add_string; @@ -5718,14 +5937,19 @@ decode_prompt_string (string) goto add_string; case '#': - temp = itos (current_command_number); + n = current_command_number; + /* If we have already incremented current_command_number (PS4, + ${var@P}), compensate */ + if (orig_string != ps0_prompt && orig_string != ps1_prompt && orig_string != ps2_prompt) + n--; + temp = itos (n); goto add_string; case '!': #if !defined (HISTORY) temp = savestring ("1"); #else /* HISTORY */ - temp = itos (history_number ()); + temp = itos (prompt_history_number (orig_string)); #endif /* HISTORY */ goto add_string; @@ -6302,8 +6526,6 @@ sh_parser_state_t * save_parser_state (ps) sh_parser_state_t *ps; { - int i; - if (ps == 0) ps = (sh_parser_state_t *)xmalloc (sizeof (sh_parser_state_t)); if (ps == 0) @@ -6339,15 +6561,10 @@ save_parser_state (ps) ps->need_here_doc = need_here_doc; ps->here_doc_first_line = here_doc_first_line; -#if 0 - for (i = 0; i < HEREDOC_MAX; i++) - ps->redir_stack[i] = redir_stack[i]; -#else if (need_here_doc == 0) ps->redir_stack[0] = 0; else memcpy (ps->redir_stack, redir_stack, sizeof (redir_stack[0]) * HEREDOC_MAX); -#endif ps->token = token; ps->token_buffer_size = token_buffer_size; @@ -6457,6 +6674,10 @@ restore_input_line_state (ls) ************************************************/ #if defined (HANDLE_MULTIBYTE) + +/* We don't let the property buffer get larger than this unless the line is */ +#define MAX_PROPSIZE 32768 + static void set_line_mbstate () { @@ -6468,8 +6689,22 @@ set_line_mbstate () if (shell_input_line == NULL) return; len = strlen (shell_input_line); /* XXX - shell_input_line_len ? */ - shell_input_line_property = (char *)xrealloc (shell_input_line_property, len + 1); + if (len == 0) + return; + if (shell_input_line_propsize >= MAX_PROPSIZE && len < MAX_PROPSIZE>>1) + { + free (shell_input_line_property); + shell_input_line_property = 0; + shell_input_line_propsize = 0; + } + if (len+1 > shell_input_line_propsize) + { + shell_input_line_propsize = len + 1; + shell_input_line_property = (char *)xrealloc (shell_input_line_property, shell_input_line_propsize); + } + /* XXX - use whether or not we are in a UTF-8 locale to avoid calls to + mbrlen */ memset (&prevs, '\0', sizeof (mbstate_t)); for (i = previ = 0; i < len; i++) { @@ -6484,6 +6719,9 @@ set_line_mbstate () break; } + /* I'd love to take more advantage of UTF-8's properties in a UTF-8 + locale, but mbrlen changes the mbstate_t on every call even when + presented with single-byte characters. */ mbclen = mbrlen (shell_input_line + previ, i - previ + 1, &mbs); if (mbclen == 1 || mbclen == (size_t)-1) { |
