diff options
author | Paolo Bonzini <bonzini@gnu.org> | 2004-10-20 21:42:31 +0000 |
---|---|---|
committer | Paolo Bonzini <bonzini@gnu.org> | 2004-10-20 21:42:31 +0000 |
commit | 9c9919efe2166efd32409054005619062624226c (patch) | |
tree | 29cf0853d8049b5d73337285c437fd35eb96086e /sed/execute.c | |
download | sed-9c9919efe2166efd32409054005619062624226c.tar.gz |
initial import
(automatically generated log message)
git-archimport-id: bonzini@gnu.org--2004b/sed--stable--4.1--base-0
Diffstat (limited to 'sed/execute.c')
-rw-r--r-- | sed/execute.c | 1747 |
1 files changed, 1747 insertions, 0 deletions
diff --git a/sed/execute.c b/sed/execute.c new file mode 100644 index 0000000..005a063 --- /dev/null +++ b/sed/execute.c @@ -0,0 +1,1747 @@ + +/* GNU SED, a batch stream editor. + Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003 + Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#undef EXPERIMENTAL_DASH_N_OPTIMIZATION /*don't use -- is very buggy*/ +#define INITIAL_BUFFER_SIZE 50 +#define FREAD_BUFFER_SIZE 8192 + +#include "sed.h" + +#include <stdio.h> +#include <ctype.h> + +#include <errno.h> +#ifndef errno +extern int errno; +#endif + +#ifdef HAVE_UNISTD_H +# include <unistd.h> +#endif + +#ifdef __GNUC__ +# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__-0 >= 7) + /* silence warning about unused parameter even for "gcc -W -Wunused" */ +# define UNUSED __attribute__((unused)) +# endif +#endif +#ifndef UNUSED +# define UNUSED +#endif + +#ifdef HAVE_STRINGS_H +# include <strings.h> +#else +# include <string.h> +#endif /*HAVE_STRINGS_H*/ +#ifdef HAVE_MEMORY_H +# include <memory.h> +#endif + +#ifndef HAVE_STRCHR +# define strchr index +# define strrchr rindex +#endif + +#ifdef HAVE_STDLIB_H +# include <stdlib.h> +#endif +#ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +#endif + +#ifdef HAVE_SYS_TYPES_H +# include <sys/types.h> +#endif + +#include <sys/stat.h> + + +/* Sed operates a line at a time. */ +struct line { + char *text; /* Pointer to line allocated by malloc. */ + char *active; /* Pointer to non-consumed part of text. */ + size_t length; /* Length of text (or active, if used). */ + size_t alloc; /* Allocated space for active. */ + bool chomped; /* Was a trailing newline dropped? */ +#ifdef HAVE_MBRTOWC + mbstate_t mbstate; +#endif +}; + +/* A queue of text to write out at the end of a cycle + (filled by the "a", "r" and "R" commands.) */ +struct append_queue { + const char *fname; + char *text; + size_t textlen; + struct append_queue *next; + bool free; +}; + +/* State information for the input stream. */ +struct input { + /* The list of yet-to-be-opened files. It is invalid for file_list + to be NULL. When *file_list is NULL we are currently processing + the last file. */ + + char **file_list; + + /* Count of files we failed to open. */ + countT bad_count; + + /* Current input line number (over all files). */ + countT line_number; + + /* True if we'll reset line numbers and addresses before + starting to process the next (possibly the first) file. */ + bool reset_at_next_file; + + /* Function to read one line. If FP is NULL, read_fn better not + be one which uses fp; in particular, read_always_fail() is + recommended. */ + bool (*read_fn) P_((struct input *)); /* read one line */ + + char *out_file_name; + + const char *in_file_name; + + /* if NULL, none of the following are valid */ + FILE *fp; + + bool no_buffering; +}; + + +/* Have we done any replacements lately? This is used by the `t' command. */ +static bool replaced = false; + +/* The current output file (stdout if -i is not being used. */ +static struct output output_file; + +/* The `current' input line. */ +static struct line line; + +/* An input line used to accumulate the result of the s and e commands. */ +static struct line s_accum; + +/* An input line that's been stored by later use by the program */ +static struct line hold; + +/* The buffered input look-ahead. The only field that should be + used outside of read_mem_line() or line_init() is buffer.length. */ +static struct line buffer; + +static struct append_queue *append_head = NULL; +static struct append_queue *append_tail = NULL; + + +#ifdef BOOTSTRAP +/* We can't be sure that the system we're boostrapping on has + memchr(), and ../lib/memchr.c requires configuration knowledge + about how many bits are in a `long'. This implementation + is far from ideal, but it should get us up-and-limping well + enough to run the configure script, which is all that matters. +*/ +# ifdef memchr +# undef memchr +# endif +# define memchr bootstrap_memchr + +static VOID *bootstrap_memchr P_((const VOID *s, int c, size_t n)); +static VOID * +bootstrap_memchr(s, c, n) + const VOID *s; + int c; + size_t n; +{ + char *p; + + for (p=(char *)s; n-- > 0; ++p) + if (*p == c) + return p; + return CAST(VOID *)0; +} +#endif /*BOOTSTRAP*/ + +/* increase a struct line's length, making some attempt at + keeping realloc() calls under control by padding for future growth. */ +static void resize_line P_((struct line *, size_t)); +static void +resize_line(lb, len) + struct line *lb; + size_t len; +{ + int inactive; + inactive = lb->active - lb->text; + + /* If the inactive part has got to more than two thirds of the buffer, + * remove it. */ + if (inactive > lb->alloc * 2) + { + MEMMOVE(lb->text, lb->active, lb->length); + lb->alloc += lb->active - lb->text; + lb->active = lb->text; + inactive = 0; + + if (lb->alloc > len) + return; + } + + lb->alloc *= 2; + if (lb->alloc < len) + lb->alloc = len; + if (lb->alloc < INITIAL_BUFFER_SIZE) + lb->alloc = INITIAL_BUFFER_SIZE; + + lb->text = REALLOC(lb->text, inactive + lb->alloc, char); + lb->active = lb->text + inactive; +} + +/* Append `length' bytes from `string' to the line `to'. */ +static void str_append P_((struct line *, const char *, size_t)); +static void +str_append(to, string, length) + struct line *to; + const char *string; + size_t length; +{ + size_t new_length = to->length + length; + + if (to->alloc < new_length) + resize_line(to, new_length); + MEMCPY(to->active + to->length, string, length); + to->length = new_length; + +#ifdef HAVE_MBRTOWC + if (mb_cur_max == 1) + return; + + while (length) + { + int n = MBRLEN (string, length, &to->mbstate); + + /* An invalid sequence is treated like a singlebyte character. */ + if (n == -1) + { + memset (&to->mbstate, 0, sizeof (to->mbstate)); + n = 1; + } + + if (n > 0) + length -= n; + else + break; + } +#endif +} + +static void str_append_modified P_((struct line *, const char *, size_t, + enum replacement_types)); +static void +str_append_modified(to, string, length, type) + struct line *to; + const char *string; + size_t length; + enum replacement_types type; +{ + size_t old_length = to->length; + char *start, *end; + + if (length == 0) + return; + +#ifdef HAVE_MBRTOWC + { + mbstate_t from_stat; + + if (type == REPL_ASIS) + { + str_append(to, string, length); + return; + } + + if (to->alloc - to->length < length * mb_cur_max) + resize_line(to, to->length + length * mb_cur_max); + + MEMCPY (&from_stat, &to->mbstate, sizeof(mbstate_t)); + while (length) + { + wchar_t wc; + int n = MBRTOWC (&wc, string, length, &from_stat); + + /* An invalid sequence is treated like a singlebyte character. */ + if (n == -1) + { + memset (&to->mbstate, 0, sizeof (from_stat)); + n = 1; + } + + if (n > 0) + string += n, length -= n; + else + { + /* Incomplete sequence, copy it manually. */ + str_append(to, string, length); + return; + } + + /* Convert the first character specially... */ + if (type & (REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST)) + { + if (type & REPL_UPPERCASE_FIRST) + wc = towupper(wc); + else + wc = towlower(wc); + + type &= ~(REPL_LOWERCASE_FIRST | REPL_UPPERCASE_FIRST); + if (type == REPL_ASIS) + { + str_append(to, string, length); + return; + } + } + + else if (type & REPL_UPPERCASE) + wc = towupper(wc); + else + wc = towlower(wc); + + /* Copy the new wide character to the end of the string. */ + n = wcrtomb (to->active + to->length, wc, &to->mbstate); + to->length += n; + if (n == -1) + { + fprintf (stderr, "Case conversion produced an invalid character!"); + abort (); + } + } + } +#else + str_append(to, string, length); + start = to->active + old_length; + end = start + length; + + /* Now do the required modifications. First \[lu]... */ + if (type & REPL_UPPERCASE_FIRST) + { + *start = toupper(*start); + start++; + type &= ~REPL_UPPERCASE_FIRST; + } + else if (type & REPL_LOWERCASE_FIRST) + { + *start = tolower(*start); + start++; + type &= ~REPL_LOWERCASE_FIRST; + } + + if (type == REPL_ASIS) + return; + + /* ...and then \[LU] */ + if (type == REPL_UPPERCASE) + for (; start != end; start++) + *start = toupper(*start); + else + for (; start != end; start++) + *start = tolower(*start); +#endif +} + +/* initialize a "struct line" buffer */ +static void line_init P_((struct line *, size_t initial_size)); +static void +line_init(buf, initial_size) + struct line *buf; + size_t initial_size; +{ + buf->text = MALLOC(initial_size, char); + buf->active = buf->text; + buf->alloc = initial_size; + buf->length = 0; + buf->chomped = true; + +#ifdef HAVE_MBRTOWC + memset (&buf->mbstate, 0, sizeof (buf->mbstate)); +#endif + +} + +/* Copy the contents of the line `from' into the line `to'. + This destroys the old contents of `to'. */ +static void line_copy P_((struct line *from, struct line *to)); +static void +line_copy(from, to) + struct line *from; + struct line *to; +{ + /* Remove the inactive portion in the destination buffer. */ + to->alloc += to->active - to->text; + + if (to->alloc < from->length) + { + to->alloc *= 2; + if (to->alloc < from->length) + to->alloc = from->length; + if (to->alloc < INITIAL_BUFFER_SIZE) + to->alloc = INITIAL_BUFFER_SIZE; + /* Use FREE()+MALLOC() instead of REALLOC() to + avoid unnecessary copying of old text. */ + FREE(to->text); + to->text = MALLOC(to->alloc, char); + } + + to->active = to->text; + to->length = from->length; + to->chomped = from->chomped; + MEMCPY(to->active, from->active, from->length); + +#ifdef HAVE_MBRTOWC + MEMCPY(&to->mbstate, &from->mbstate, sizeof (from->mbstate)); +#endif +} + +/* Append the contents of the line `from' to the line `to'. */ +static void line_append P_((struct line *from, struct line *to)); +static void +line_append(from, to) + struct line *from; + struct line *to; +{ + str_append(to, "\n", 1); + str_append(to, from->active, from->length); + to->chomped = from->chomped; + +#ifdef HAVE_MBRTOWC + MEMCPY (&to->mbstate, &from->mbstate, sizeof (from->mbstate)); +#endif +} + +/* Exchange the contents of two "struct line" buffers. */ +static void line_exchange P_((struct line *, struct line *)); +static void +line_exchange(a, b) + struct line *a; + struct line *b; +{ + struct line t; + + MEMCPY(&t, a, sizeof(struct line)); + MEMCPY( a, b, sizeof(struct line)); + MEMCPY( b, &t, sizeof(struct line)); +} + + +/* dummy function to simplify read_pattern_space() */ +static bool read_always_fail P_((struct input *)); +static bool +read_always_fail(input) + struct input *input UNUSED; +{ + return false; +} + +static bool read_file_line P_((struct input *)); +static bool +read_file_line(input) + struct input *input; +{ + static char *b; + static size_t blen; + + long result = ck_getline (&b, &blen, input->fp); + if (result <= 0) + return false; + + /* Remove the trailing new-line that is left by getline. */ + if (b[result - 1] == '\n') + --result; + else + line.chomped = false; + + str_append(&line, b, result); + return true; +} + + +static inline void output_missing_newline P_((struct output *)); +static inline void +output_missing_newline(outf) + struct output *outf; +{ + if (outf->missing_newline) + { + ck_fwrite("\n", 1, 1, outf->fp); + outf->missing_newline = false; + } +} + +static inline void flush_output P_((FILE *)); +static inline void +flush_output(fp) + FILE *fp; +{ + if (fp != stdout || unbuffered_output) + ck_fflush(fp); +} + +static void output_line P_((const char *, size_t, bool, struct output *)); +static void +output_line(text, length, nl, outf) + const char *text; + size_t length; + bool nl; + struct output *outf; +{ + output_missing_newline(outf); + + if (length) + ck_fwrite(text, 1, length, outf->fp); + + if (nl) + ck_fwrite("\n", 1, 1, outf->fp); + else + outf->missing_newline = true; + + flush_output(outf->fp); +} + +static struct append_queue *next_append_slot P_((void)); +static struct append_queue * +next_append_slot() +{ + struct append_queue *n = MALLOC(1, struct append_queue); + + n->fname = NULL; + n->text = NULL; + n->textlen = 0; + n->next = NULL; + n->free = false; + + if (append_tail) + append_tail->next = n; + else + append_head = n; + return append_tail = n; +} + +static void release_append_queue P_((void)); +static void +release_append_queue() +{ + struct append_queue *p, *q; + + for (p=append_head; p; p=q) + { + if (p->free) + FREE(p->text); + + q = p->next; + FREE(p); + } + append_head = append_tail = NULL; +} + +static void dump_append_queue P_((void)); +static void +dump_append_queue() +{ + struct append_queue *p; + + output_missing_newline(&output_file); + for (p=append_head; p; p=p->next) + { + if (p->text) + ck_fwrite(p->text, 1, p->textlen, output_file.fp); + + if (p->fname) + { + char buf[FREAD_BUFFER_SIZE]; + size_t cnt; + FILE *fp; + + /* "If _fname_ does not exist or cannot be read, it shall + be treated as if it were an empty file, causing no error + condition." IEEE Std 1003.2-1992 + So, don't fail. */ + fp = ck_fopen(p->fname, "r", false); + if (fp) + { + while ((cnt = ck_fread(buf, 1, sizeof buf, fp)) > 0) + ck_fwrite(buf, 1, cnt, output_file.fp); + ck_fclose(fp); + } + } + } + + flush_output(output_file.fp); + release_append_queue(); +} + + +/* Compute the name of the backup file for in-place editing */ +static char *get_backup_file_name P_((const char *)); +static char * +get_backup_file_name(name) + const char *name; +{ + char *old_asterisk, *asterisk, *backup, *p; + int name_length = strlen(name), backup_length = strlen(in_place_extension); + + /* Compute the length of the backup file */ + for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1; + asterisk = strchr(old_asterisk, '*'); + old_asterisk = asterisk + 1) + backup_length += name_length - 1; + + p = backup = xmalloc(backup_length + 1); + + /* Each iteration gobbles up to an asterisk */ + for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1; + asterisk = strchr(old_asterisk, '*'); + old_asterisk = asterisk + 1) + { + MEMCPY (p, old_asterisk, asterisk - old_asterisk); + p += asterisk - old_asterisk; + strcpy (p, name); + p += name_length; + } + + /* Tack on what's after the last asterisk */ + strcpy (p, old_asterisk); + return backup; +} + +/* Initialize a struct input for the named file. */ +static void open_next_file P_((const char *name, struct input *)); +static void +open_next_file(name, input) + const char *name; + struct input *input; +{ + buffer.length = 0; + + if (name[0] == '-' && name[1] == '\0' && !in_place_extension) + { + clearerr(stdin); /* clear any stale EOF indication */ + input->fp = stdin; + } + else if ( ! (input->fp = ck_fopen(name, "r", false)) ) + { + const char *ptr = strerror(errno); + fprintf(stderr, _("%s: can't read %s: %s\n"), myname, name, ptr); + input->read_fn = read_always_fail; /* a redundancy */ + ++input->bad_count; + return; + } + + input->read_fn = read_file_line; + + if (in_place_extension) + { + int output_fd; + char *tmpdir = ck_strdup(name), *p; + struct stat st; + + /* get the base name */ + if (p = strrchr(tmpdir, '/')) + *(p + 1) = 0; + else + strcpy(tmpdir, "."); + + input->in_file_name = name; + + if (isatty (fileno (input->fp))) + panic(_("couldn't edit %s: is a terminal"), input->in_file_name); + + fstat (fileno (input->fp), &st); + if (!S_ISREG (st.st_mode)) + panic(_("couldn't edit %s: not a regular file"), input->in_file_name); + + output_file.fp = ck_mkstemp (&input->out_file_name, tmpdir, "sed"); + output_file.missing_newline = false; + free (tmpdir); + + if (!output_file.fp) + panic(_("couldn't open temporary file %s: %s"), input->out_file_name, strerror(errno)); + + output_fd = fileno (output_file.fp); +#ifdef HAVE_FCHMOD + fchmod (output_fd, st.st_mode); +#endif +#ifdef HAVE_FCHOWN + if (fchown (output_fd, st.st_uid, st.st_gid) == -1) + fchown (output_fd, -1, st.st_gid); +#endif + } + else + output_file.fp = stdout; +} + + +/* Clean up an input stream that we are done with. */ +static void closedown P_((struct input *)); +static void +closedown(input) + struct input *input; +{ + input->read_fn = read_always_fail; + if (!input->fp) + return; + if (input->fp != stdin) /* stdin can be reused on tty and tape devices */ + ck_fclose(input->fp); + + if (in_place_extension && output_file.fp != NULL) + { + ck_fclose (output_file.fp); + if (strcmp(in_place_extension, "*") != 0) + { + char *backup_file_name = get_backup_file_name(input->in_file_name); + ck_rename (input->in_file_name, backup_file_name, input->out_file_name); + free (backup_file_name); + } + + ck_rename (input->out_file_name, input->in_file_name, input->out_file_name); + free (input->out_file_name); + } + + input->fp = NULL; +} + +/* Reset range commands so that they are marked as non-matching */ +static void reset_addresses P_((struct vector *)); +static void +reset_addresses(vec) + struct vector *vec; +{ + struct sed_cmd *cur_cmd; + int n; + + for (cur_cmd = vec->v, n = vec->v_length; n--; cur_cmd++) + if (cur_cmd->a1 + && (cur_cmd->a1->addr_type == ADDR_IS_NUM + || cur_cmd->a1->addr_type == ADDR_IS_NUM_MOD) + && cur_cmd->a1->addr_number == 0) + cur_cmd->range_state = RANGE_ACTIVE; + else + cur_cmd->range_state = RANGE_INACTIVE; +} + +/* Read in the next line of input, and store it in the pattern space. + Return zero if there is nothing left to input. */ +static bool read_pattern_space P_((struct input *, struct vector *, bool)); +static bool +read_pattern_space(input, the_program, append) + struct input *input; + struct vector *the_program; + bool append; +{ + if (append_head) /* redundant test to optimize for common case */ + dump_append_queue(); + replaced = false; + if (!append) + line.length = 0; + line.chomped = true; /* default, until proved otherwise */ + + while ( ! (*input->read_fn)(input) ) + { + closedown(input); + + if (!*input->file_list) + return false; + + if (input->reset_at_next_file) + { + input->line_number = 0; + reset_addresses (the_program); + rewind_read_files (); + + /* If doing in-place editing, we will never append the + new-line to this file; but if the output goes to stdout, + we might still have to output the missing new-line. */ + if (in_place_extension) + output_file.missing_newline = false; + + input->reset_at_next_file = separate_files; + } + + open_next_file (*input->file_list++, input); + } + + ++input->line_number; + return true; +} + + +static bool last_file_with_data_p P_((struct input *)); +static bool +last_file_with_data_p(input) + struct input *input; +{ + for (;;) + { + int ch; + + closedown(input); + if (!*input->file_list) + return true; + open_next_file(*input->file_list++, input); + if (input->fp) + { + if ((ch = getc(input->fp)) != EOF) + { + ungetc(ch, input->fp); + return false; + } + } + } +} + +/* Determine if we match the `$' address. */ +static bool test_eof P_((struct input *)); +static bool +test_eof(input) + struct input *input; +{ + int ch; + + if (buffer.length) + return false; + if (!input->fp) + return separate_files || last_file_with_data_p(input); + if (feof(input->fp)) + return separate_files || last_file_with_data_p(input); + if ((ch = getc(input->fp)) == EOF) + return separate_files || last_file_with_data_p(input); + ungetc(ch, input->fp); + return false; +} + +/* Return non-zero if the current line matches the address + pointed to by `addr'. */ +static bool match_an_address_p P_((struct addr *, struct input *)); +static bool +match_an_address_p(addr, input) + struct addr *addr; + struct input *input; +{ + switch (addr->addr_type) + { + case ADDR_IS_NULL: + return true; + + case ADDR_IS_REGEX: + return match_regex(addr->addr_regex, line.active, line.length, 0, NULL, 0); + + case ADDR_IS_NUM_MOD: + return (input->line_number >= addr->addr_number + && ((input->line_number - addr->addr_number) % addr->addr_step) == 0); + + case ADDR_IS_STEP: + case ADDR_IS_STEP_MOD: + /* reminder: these are only meaningful for a2 addresses */ + /* a2->addr_number needs to be recomputed each time a1 address + matches for the step and step_mod types */ + return (addr->addr_number <= input->line_number); + + case ADDR_IS_LAST: + return test_eof(input); + + /* ADDR_IS_NUM is handled in match_address_p. */ + case ADDR_IS_NUM: + default: + panic("INTERNAL ERROR: bad address type"); + } + /*NOTREACHED*/ + return false; +} + +/* return non-zero if current address is valid for cmd */ +static bool match_address_p P_((struct sed_cmd *, struct input *)); +static bool +match_address_p(cmd, input) + struct sed_cmd *cmd; + struct input *input; +{ + if (!cmd->a1) + return true; + + if (cmd->range_state != RANGE_ACTIVE) + { + /* Find if we are going to activate a range. Handle ADDR_IS_NUM + specially: it represent an "absolute" state, it should not + be computed like regexes. */ + if (cmd->a1->addr_type == ADDR_IS_NUM) + { + if (!cmd->a2) + return (input->line_number == cmd->a1->addr_number); + + if (cmd->range_state == RANGE_CLOSED + || input->line_number < cmd->a1->addr_number) + return false; + } + else + { + if (!cmd->a2) + return match_an_address_p(cmd->a1, input); + + if (!match_an_address_p(cmd->a1, input)) + return false; + } + + /* Ok, start a new range. */ + cmd->range_state = RANGE_ACTIVE; + switch (cmd->a2->addr_type) + { + case ADDR_IS_REGEX: + /* Always include at least two lines. */ + return true; + case ADDR_IS_NUM: + /* Same handling as below, but always include at least one line. */ + if (input->line_number >= cmd->a2->addr_number) + cmd->range_state = RANGE_CLOSED; + return true; + case ADDR_IS_STEP: + cmd->a2->addr_number = input->line_number + cmd->a2->addr_step; + return true; + case ADDR_IS_STEP_MOD: + cmd->a2->addr_number = input->line_number + cmd->a2->addr_step + - (input->line_number%cmd->a2->addr_step); + return true; + default: + break; + } + } + + /* cmd->range_state == RANGE_ACTIVE. Check if the range is + ending; also handle ADDR_IS_NUM specially in this case. */ + + if (cmd->a2->addr_type == ADDR_IS_NUM) + { + /* If the second address is a line number, and if we got past + that line, fail to match (it can happen when you jump + over such addresses with `b' and `t'. Use RANGE_CLOSED + so that the range is not re-enabled anymore. */ + if (input->line_number >= cmd->a2->addr_number) + cmd->range_state = RANGE_CLOSED; + + return (input->line_number <= cmd->a2->addr_number); + } + + /* Other addresses are treated as usual. */ + if (match_an_address_p(cmd->a2, input)) + cmd->range_state = RANGE_CLOSED; + + return true; +} + + +static void do_list P_((int line_len)); +static void +do_list(line_len) + int line_len; +{ + unsigned char *p = CAST(unsigned char *)line.active; + countT len = line.length; + countT width = 0; + char obuf[180]; /* just in case we encounter a 512-bit char (;-) */ + char *o; + size_t olen; + FILE *fp = output_file.fp; + + output_missing_newline(&output_file); + for (; len--; ++p) { + o = obuf; + + /* Some locales define 8-bit characters as printable. This makes the + testsuite fail at 8to7.sed because the `l' command in fact will not + convert the 8-bit characters. */ +#if defined isascii || defined HAVE_ISASCII + if (isascii(*p) && ISPRINT(*p)) { +#else + if (ISPRINT(*p)) { +#endif + *o++ = *p; + if (*p == '\\') + *o++ = '\\'; + } else { + *o++ = '\\'; + switch (*p) { +#if defined __STDC__ && __STDC__-0 + case '\a': *o++ = 'a'; break; +#else /* Not STDC; we'll just assume ASCII */ + case 007: *o++ = 'a'; break; +#endif + case '\b': *o++ = 'b'; break; + case '\f': *o++ = 'f'; break; + case '\n': *o++ = 'n'; break; + case '\r': *o++ = 'r'; break; + case '\t': *o++ = 't'; break; + case '\v': *o++ = 'v'; break; + default: + sprintf(o, "%03o", *p); + o += strlen(o); + break; + } + } + olen = o - obuf; + if (width+olen >= line_len && line_len > 0) { + ck_fwrite("\\\n", 1, 2, fp); + width = 0; + } + ck_fwrite(obuf, 1, olen, fp); + width += olen; + } + ck_fwrite("$\n", 1, 2, fp); + flush_output (fp); +} + + +static enum replacement_types append_replacement P_((struct line *, struct replacement *, + struct re_registers *, + enum replacement_types)); +static enum replacement_types +append_replacement (buf, p, regs, repl_mod) + struct line *buf; + struct replacement *p; + struct re_registers *regs; + enum replacement_types repl_mod; +{ + for (; p; p=p->next) + { + int i = p->subst_id; + enum replacement_types curr_type; + + /* Apply a \[lu] modifier that was given earlier, but which we + have not had yet the occasion to apply. But don't do it + if this replacement has a modifier of its own. */ + curr_type = (p->repl_type & REPL_MODIFIERS) + ? p->repl_type + : p->repl_type | repl_mod; + + repl_mod = 0; + if (p->prefix_length) + { + str_append_modified(buf, p->prefix, p->prefix_length, + curr_type); + curr_type &= ~REPL_MODIFIERS; + } + + if (0 <= i) + if (regs->end[i] == regs->start[i] && p->repl_type & REPL_MODIFIERS) + /* Save this modifier, we shall apply it later. + e.g. in s/()([a-z])/\u\1\2/ + the \u modifier is applied to \2, not \1 */ + repl_mod = curr_type & REPL_MODIFIERS; + + else + str_append_modified(buf, line.active + regs->start[i], + CAST(size_t)(regs->end[i] - regs->start[i]), + curr_type); + } + + return repl_mod; +} + +static void do_subst P_((struct subst *)); +static void +do_subst(sub) + struct subst *sub; +{ + size_t start = 0; /* where to start scan for (next) match in LINE */ + size_t last_end = 0; /* where did the last successful match end in LINE */ + countT count = 0; /* number of matches found */ + bool again = true; + +#define MAX_BACKREFERENCES 10 + static struct re_registers regs; + + if (s_accum.alloc == 0) + line_init(&s_accum, INITIAL_BUFFER_SIZE); + s_accum.length = 0; + + /* The first part of the loop optimizes s/xxx// when xxx is at the + start, and s/xxx$// */ + if (!match_regex(sub->regx, line.active, line.length, start, + ®s, MAX_BACKREFERENCES)) + return; + + if (!sub->replacement && sub->numb <= 1) + if (regs.start[0] == 0 && !sub->global) + { + /* We found a match, set the `replaced' flag. */ + replaced = true; + + line.active += regs.end[0]; + line.length -= regs.end[0]; + line.alloc -= regs.end[0]; + goto post_subst; + } + else if (regs.end[0] == line.length) + { + /* We found a match, set the `replaced' flag. */ + replaced = true; + + line.length = regs.start[0]; + goto post_subst; + } + + do + { + enum replacement_types repl_mod = 0; + + size_t offset = regs.start[0]; + size_t matched = regs.end[0] - regs.start[0]; + + /* Copy stuff to the left of this match into the output string. */ + if (start < offset) + str_append(&s_accum, line.active + start, offset - start); + + /* If we're counting up to the Nth match, are we there yet? + And even if we are there, there is another case we have to + skip: are we matching an empty string immediately following + another match? + + This latter case avoids that baaaac, when passed through + s,a*,x,g, gives `xbxxcx' instead of xbxcx. This behavior is + unacceptable because it is not consistently applied (for + example, `baaaa' gives `xbx', not `xbxx'). */ + if ((matched > 0 || count == 0 || offset > last_end) + && ++count >= sub->numb) + { + /* We found a match, set the `replaced' flag. */ + replaced = true; + + /* Now expand the replacement string into the output string. */ + repl_mod = append_replacement (&s_accum, sub->replacement, ®s, repl_mod); + again = sub->global; + } + else + { + /* The match was not replaced. Copy the text until its + end; if it was vacuous, skip over one character and + add that character to the output. */ + if (matched == 0) + { + if (start < line.length) + matched = 1; + else + break; + } + + str_append(&s_accum, line.active + offset, matched); + } + + /* Start after the match. last_end is the real end of the matched + substring, excluding characters that were skipped in case the RE + matched the empty string. */ + start = offset + matched; + last_end = regs.end[0]; + } + while (again + && start <= line.length + && match_regex(sub->regx, line.active, line.length, start, + ®s, MAX_BACKREFERENCES)); + + /* Copy stuff to the right of the last match into the output string. */ + if (start < line.length) + str_append(&s_accum, line.active + start, line.length-start); + s_accum.chomped = line.chomped; + + /* Exchange line and s_accum. This can be much cheaper + than copying s_accum.active into line.text (for huge lines). */ + line_exchange(&line, &s_accum); + + /* Finish up. */ + if (count < sub->numb) + return; + + post_subst: + if (sub->print & 1) + output_line(line.active, line.length, line.chomped, &output_file); + + if (sub->eval) + { +#ifdef HAVE_POPEN + FILE *pipe; + s_accum.length = 0; + + str_append (&line, "", 1); + pipe = popen(line.active, "r"); + + if (pipe != NULL) + { + while (!feof (pipe)) + { + char buf[4096]; + int n = fread (buf, sizeof(char), 4096, pipe); + if (n > 0) + str_append(&s_accum, buf, n); + } + + pclose (pipe); + + line_exchange(&line, &s_accum); + if (line.length && + line.active[line.length - 1] == '\n') + line.length--; + } + else + panic(_("error in subprocess")); +#else + panic(_("option `e' not supported")); +#endif + } + + if (sub->print & 2) + output_line(line.active, line.length, line.chomped, &output_file); + if (sub->outf) + output_line(line.active, line.length, line.chomped, sub->outf); +} + +#ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION +/* Used to attempt a simple-minded optimization. */ + +static countT branches; + +static countT count_branches P_((struct vector *)); +static countT +count_branches(program) + struct vector *program; +{ + struct sed_cmd *cur_cmd = program->v; + countT isn_cnt = program->v_length; + countT cnt = 0; + + while (isn_cnt-- > 0) + { + switch (cur_cmd->cmd) + { + case 'b': + case 't': + case 'T': + case '{': + ++cnt; + } + } + return cnt; +} + +static struct sed_cmd *shrink_program P_((struct vector *, struct sed_cmd *)); +static struct sed_cmd * +shrink_program(vec, cur_cmd) + struct vector *vec; + struct sed_cmd *cur_cmd; +{ + struct sed_cmd *v = vec->v; + struct sed_cmd *last_cmd = v + vec->v_length; + struct sed_cmd *p; + countT cmd_cnt; + + for (p=v; p < cur_cmd; ++p) + if (p->cmd != '#') + MEMCPY(v++, p, sizeof *v); + cmd_cnt = v - vec->v; + + for (; p < last_cmd; ++p) + if (p->cmd != '#') + MEMCPY(v++, p, sizeof *v); + vec->v_length = v - vec->v; + + return (0 < vec->v_length) ? (vec->v + cmd_cnt) : CAST(struct sed_cmd *)0; +} +#endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/ + +/* Execute the program `vec' on the current input line. + Return exit status if caller should quit, -1 otherwise. */ +static int execute_program P_((struct vector *, struct input *)); +static int +execute_program(vec, input) + struct vector *vec; + struct input *input; +{ + struct sed_cmd *cur_cmd; + struct sed_cmd *end_cmd; + + cur_cmd = vec->v; + end_cmd = vec->v + vec->v_length; + while (cur_cmd < end_cmd) + { + if (match_address_p(cur_cmd, input) != cur_cmd->addr_bang) + { + switch (cur_cmd->cmd) + { + case 'a': + { + struct append_queue *aq = next_append_slot(); + aq->text = cur_cmd->x.cmd_txt.text; + aq->textlen = cur_cmd->x.cmd_txt.text_length; + } + break; + + case '{': + case 'b': + cur_cmd = vec->v + cur_cmd->x.jump_index; + continue; + + case '}': + case '#': + case ':': + /* Executing labels and block-ends are easy. */ + break; + + case 'c': + if (cur_cmd->range_state != RANGE_ACTIVE) + output_line(cur_cmd->x.cmd_txt.text, + cur_cmd->x.cmd_txt.text_length - 1, true, + &output_file); + /* POSIX.2 is silent about c starting a new cycle, + but it seems to be expected (and make sense). */ + /* Fall Through */ + case 'd': + return -1; + + case 'D': + { + char *p = memchr(line.active, '\n', line.length); + if (!p) + return -1; + + ++p; + line.alloc -= p - line.active; + line.length -= p - line.active; + line.active += p - line.active; + + /* reset to start next cycle without reading a new line: */ + cur_cmd = vec->v; + continue; + } + + case 'e': { +#ifdef HAVE_POPEN + FILE *pipe; + int cmd_length = cur_cmd->x.cmd_txt.text_length; + if (s_accum.alloc == 0) + line_init(&s_accum, INITIAL_BUFFER_SIZE); + s_accum.length = 0; + + if (!cmd_length) + { + str_append (&line, "", 1); + pipe = popen(line.active, "r"); + } + else + { + cur_cmd->x.cmd_txt.text[cmd_length - 1] = 0; + pipe = popen(cur_cmd->x.cmd_txt.text, "r"); + output_missing_newline(&output_file); + } + + if (pipe != NULL) + { + while (!feof (pipe)) + { + char buf[4096]; + int n = fread (buf, sizeof(char), 4096, pipe); + if (n > 0) + if (!cmd_length) + str_append(&s_accum, buf, n); + else + ck_fwrite(buf, 1, n, output_file.fp); + } + + pclose (pipe); + if (!cmd_length) + { + /* Store into pattern space for plain `e' commands */ + if (s_accum.length && + s_accum.active[s_accum.length - 1] == '\n') + s_accum.length--; + + /* Exchange line and s_accum. This can be much + cheaper than copying s_accum.active into line.text + (for huge lines). */ + line_exchange(&line, &s_accum); + } + else + flush_output(output_file.fp); + + } + else + panic(_("error in subprocess")); +#else + panic(_("`e' command not supported")); +#endif + break; + } + + case 'g': + line_copy(&hold, &line); + break; + + case 'G': + line_append(&hold, &line); + break; + + case 'h': + line_copy(&line, &hold); + break; + + case 'H': + line_append(&line, &hold); + break; + + case 'i': + output_line(cur_cmd->x.cmd_txt.text, + cur_cmd->x.cmd_txt.text_length - 1, + true, &output_file); + break; + + case 'l': + do_list(cur_cmd->x.int_arg == -1 + ? lcmd_out_line_len + : cur_cmd->x.int_arg); + break; + + case 'L': + output_missing_newline(&output_file); + fmt(line.active, line.active + line.length, + cur_cmd->x.int_arg == -1 + ? lcmd_out_line_len + : cur_cmd->x.int_arg, + output_file.fp); + flush_output(output_file.fp); + break; + + case 'n': + if (!no_default_output) + output_line(line.active, line.length, line.chomped, &output_file); + if (test_eof(input) || !read_pattern_space(input, vec, false)) + return -1; + break; + + case 'N': + str_append(&line, "\n", 1); + + if (test_eof(input) || !read_pattern_space(input, vec, true)) + { + line.length--; + if (posixicity == POSIXLY_EXTENDED && !no_default_output) + output_line(line.active, line.length, line.chomped, + &output_file); + return -1; + } + break; + + case 'p': + output_line(line.active, line.length, line.chomped, &output_file); + break; + + case 'P': + { + char *p = memchr(line.active, '\n', line.length); + output_line(line.active, p ? p - line.active : line.length, + p ? true : line.chomped, &output_file); + } + break; + + case 'q': + if (!no_default_output) + output_line(line.active, line.length, line.chomped, &output_file); + + case 'Q': + return cur_cmd->x.int_arg == -1 ? 0 : cur_cmd->x.int_arg; + + case 'r': + if (cur_cmd->x.fname) + { + struct append_queue *aq = next_append_slot(); + aq->fname = cur_cmd->x.fname; + } + break; + + case 'R': + if (cur_cmd->x.fp && !feof (cur_cmd->x.fp)) + { + struct append_queue *aq; + size_t buflen; + char *text = NULL; + int result; + + result = ck_getline (&text, &buflen, cur_cmd->x.fp); + if (result != EOF) + { + aq = next_append_slot(); + aq->free = true; + aq->text = text; + aq->textlen = result; + } + } + break; + + case 's': + do_subst(cur_cmd->x.cmd_subst); + break; + + case 't': + if (replaced) + { + replaced = false; + cur_cmd = vec->v + cur_cmd->x.jump_index; + continue; + } + break; + + case 'T': + if (!replaced) + { + cur_cmd = vec->v + cur_cmd->x.jump_index; + continue; + } + else + replaced = false; + break; + + case 'w': + if (cur_cmd->x.fp) + output_line(line.active, line.length, + line.chomped, cur_cmd->x.outf); + break; + + case 'W': + if (cur_cmd->x.fp) + { + char *p = memchr(line.active, '\n', line.length); + output_line(line.active, p ? p - line.active : line.length, + p ? true : line.chomped, cur_cmd->x.outf); + } + break; + + case 'x': + line_exchange(&line, &hold); + break; + + case 'y': + { +#ifdef HAVE_MBRTOWC + if (mb_cur_max > 1) + { + int idx, prev_idx; /* index in the input line. */ + char **trans; + mbstate_t mbstate; + memset(&mbstate, 0, sizeof(mbstate_t)); + for (idx = 0; idx < line.length;) + { + int mbclen, i; + mbclen = MBRLEN (line.active + idx, line.length - idx, + &mbstate); + /* An invalid sequence, or a truncated multibyte + character. We treat it as a singlebyte character. + */ + if (mbclen == (size_t) -1 || mbclen == (size_t) -2 + || mbclen == 0) + mbclen = 1; + + trans = cur_cmd->x.translatemb; + /* `i' indicate i-th translate pair. */ + for (i = 0; trans[2*i] != NULL; i++) + { + if (strncmp(line.active + idx, trans[2*i], mbclen) == 0) + { + bool move_remain_buffer = false; + int trans_len = strlen(trans[2*i+1]); + + if (mbclen < trans_len) + { + int new_len; + new_len = line.length + 1 + trans_len - mbclen; + /* We must extend the line buffer. */ + if (line.alloc < new_len) + { + /* And we must resize the buffer. */ + resize_line(&line, new_len); + } + move_remain_buffer = true; + } + else if (mbclen > trans_len) + { + /* We must truncate the line buffer. */ + move_remain_buffer = true; + } + prev_idx = idx; + if (move_remain_buffer) + { + int move_len, move_offset; + char *move_from, *move_to; + /* Move the remaining with \0. */ + move_from = line.active + idx + mbclen; + move_to = line.active + idx + trans_len; + move_len = line.length + 1 - idx - mbclen; + move_offset = trans_len - mbclen; + memmove(move_to, move_from, move_len); + line.length += move_offset; + idx += move_offset; + } + strncpy(line.active + prev_idx, trans[2*i+1], + trans_len); + break; + } + } + idx += mbclen; + } + } + else +#endif /* HAVE_MBRTOWC */ + { + unsigned char *p, *e; + p = CAST(unsigned char *)line.active; + for (e=p+line.length; p<e; ++p) + *p = cur_cmd->x.translate[*p]; + } + } + break; + + case '=': + output_missing_newline(&output_file); + fprintf(output_file.fp, "%lu\n", + CAST(unsigned long)input->line_number); + flush_output(output_file.fp); + break; + + default: + panic("INTERNAL ERROR: Bad cmd %c", cur_cmd->cmd); + } + } + +#ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION + /* If our top-level program consists solely of commands with + ADDR_IS_NUM addresses then once we past the last mentioned + line we should be able to quit if no_default_output is true, + or otherwise quickly copy input to output. Now whether this + optimization is a win or not depends on how cheaply we can + implement this for the cases where it doesn't help, as + compared against how much time is saved. One semantic + difference (which I think is an improvement) is that *this* + version will terminate after printing line two in the script + "yes | sed -n 2p". + + Don't use this when in-place editing is active, because line + numbers restart each time then. */ + else if (!separate_files) + { + if (cur_cmd->a1->addr_type == ADDR_IS_NUM + && (cur_cmd->a2 + ? cur_cmd->range_state == RANGE_CLOSED + : cur_cmd->a1->addr_number < input->line_number)) + { + /* Skip this address next time */ + cur_cmd->addr_bang = !cur_cmd->addr_bang; + cur_cmd->a1->addr_type = ADDR_IS_NULL; + if (cur_cmd->a2) + cur_cmd->a2->addr_type = ADDR_IS_NULL; + + /* can we make an optimization? */ + if (cur_cmd->addr_bang) + { + if (cur_cmd->cmd == 'b' || cur_cmd->cmd == 't' + || cur_cmd->cmd == 'T' || cur_cmd->cmd == '}') + branches--; + + cur_cmd->cmd = '#'; /* replace with no-op */ + if (branches == 0) + cur_cmd = shrink_program(vec, cur_cmd); + if (!cur_cmd && no_default_output) + return 0; + end_cmd = vec->v + vec->v_length; + if (!cur_cmd) + cur_cmd = end_cmd; + continue; + } + } + } +#endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/ + + /* this is buried down here so that a "continue" statement can skip it */ + ++cur_cmd; + } + + if (!no_default_output) + output_line(line.active, line.length, line.chomped, &output_file); + return -1; +} + + + +/* Apply the compiled script to all the named files. */ +int +process_files(the_program, argv) + struct vector *the_program; + char **argv; +{ + static char dash[] = "-"; + static char *stdin_argv[2] = { dash, NULL }; + struct input input; + int status; + + line_init(&line, INITIAL_BUFFER_SIZE); + line_init(&hold, 0); + line_init(&buffer, 0); + +#ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION + branches = count_branches(the_program); +#endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/ + input.file_list = stdin_argv; + input.reset_at_next_file = true; + if (argv && *argv) + input.file_list = argv; + input.bad_count = 0; + input.line_number = 0; + input.read_fn = read_always_fail; + input.fp = NULL; + + status = EXIT_SUCCESS; + while (read_pattern_space(&input, the_program, false)) + { + status = execute_program(the_program, &input); + if (status == -1) + status = EXIT_SUCCESS; + else + break; + } + closedown(&input); + +#ifdef DEBUG_LEAKS + /* We're about to exit, so these free()s are redundant. + But if we're running under a memory-leak detecting + implementation of malloc(), we want to explicitly + deallocate in order to avoid extraneous noise from + the allocator. */ + release_append_queue(); + FREE(buffer.text); + FREE(hold.text); + FREE(line.text); + FREE(s_accum.text); +#endif /*DEBUG_LEAKS*/ + + if (input.bad_count) + status = 2; + + return status; +} |