From be2d5c0d33d507818d3cb0d3853c61c321a05464 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Wed, 10 Dec 2014 06:43:46 -0800 Subject: remove support for the 'L' (fmt/flow-paragraph) command * sed/Makefile.am (sed_SOURCES): Remove fmt.c. * sed/execute.c (execute_program): * sed/fmt.c: Remove file. * sed/sed.h (fmt): Remove declaration. * doc/sed.texi: Remove documentation for 'L' command. * doc/sed-in.texi: Likewise. * NEWS (Feature removal): Document it. Prompted by the report from Jodie Cunningham that using this command with a large number could cause sed to segfault: https://bugs.launchpad.net/ubuntu/+source/sed/+bug/1400575 --- NEWS | 6 + doc/sed-in.texi | 27 --- doc/sed.texi | 27 --- sed/Makefile.am | 2 +- sed/execute.c | 10 - sed/fmt.c | 578 -------------------------------------------------------- sed/sed.h | 2 - 7 files changed, 7 insertions(+), 645 deletions(-) delete mode 100644 sed/fmt.c diff --git a/NEWS b/NEWS index 19390a4..b713b1d 100644 --- a/NEWS +++ b/NEWS @@ -13,6 +13,12 @@ GNU sed NEWS -*- outline -*- is what stopped working. [bug introduced some time after sed-3.02 and prior to the first sed-4* test release] +** Feature removal + + The "L" command (format a paragraph like the fmt(1) command would) + has been listed in the documentation as a failed experiment for at + least 10 years. That command is now removed. + ** Build-related "make dist" now builds .tar.xz files, rather than .tar.gz ones. diff --git a/doc/sed-in.texi b/doc/sed-in.texi index a1f2cbd..5f932c1 100644 --- a/doc/sed-in.texi +++ b/doc/sed-in.texi @@ -1468,33 +1468,6 @@ to the end of the current cycle. Print out the file name of the current input file (with a trailing newline). -@item L @var{n} -@findex L (fLow paragraphs) command -@cindex Reformat pattern space -@cindex Reformatting paragraphs -@cindex @value{SSEDEXT}, reformatting paragraphs -@cindex @value{SSEDEXT}, @code{L} command -This @value{SSED} extension fills and joins lines in pattern space -to produce output lines of (at most) @var{n} characters, like -@code{fmt} does; if @var{n} is omitted, the default as specified -on the command line is used. This command is considered a failed -experiment and unless there is enough request (which seems unlikely) -will be removed in future versions. - -@ignore -Blank lines, spaces between words, and indentation are -preserved in the output; successive input lines with different -indentation are not joined; tabs are expanded to 8 columns. - -If the pattern space contains multiple lines, they are joined, but -since the pattern space usually contains a single line, the behavior -of a simple @code{L;d} script is the same as @samp{fmt -s} (i.e., -it does not join short lines to form longer ones). - -@var{n} specifies the desired line-wrap length; if omitted, -the default as specified on the command line is used. -@end ignore - @item Q [@var{exit-code}] This command only accepts a single address. diff --git a/doc/sed.texi b/doc/sed.texi index b4de5f3..412454c 100644 --- a/doc/sed.texi +++ b/doc/sed.texi @@ -1469,33 +1469,6 @@ to the end of the current cycle. Print out the file name of the current input file (with a trailing newline). -@item L @var{n} -@findex L (fLow paragraphs) command -@cindex Reformat pattern space -@cindex Reformatting paragraphs -@cindex @value{SSEDEXT}, reformatting paragraphs -@cindex @value{SSEDEXT}, @code{L} command -This @value{SSED} extension fills and joins lines in pattern space -to produce output lines of (at most) @var{n} characters, like -@code{fmt} does; if @var{n} is omitted, the default as specified -on the command line is used. This command is considered a failed -experiment and unless there is enough request (which seems unlikely) -will be removed in future versions. - -@ignore -Blank lines, spaces between words, and indentation are -preserved in the output; successive input lines with different -indentation are not joined; tabs are expanded to 8 columns. - -If the pattern space contains multiple lines, they are joined, but -since the pattern space usually contains a single line, the behavior -of a simple @code{L;d} script is the same as @samp{fmt -s} (i.e., -it does not join short lines to form longer ones). - -@var{n} specifies the desired line-wrap length; if omitted, -the default as specified on the command line is used. -@end ignore - @item Q [@var{exit-code}] This command only accepts a single address. diff --git a/sed/Makefile.am b/sed/Makefile.am index 243ac4b..1983810 100644 --- a/sed/Makefile.am +++ b/sed/Makefile.am @@ -3,7 +3,7 @@ bin_PROGRAMS = sed localedir = $(datadir)/locale -sed_SOURCES = sed.c compile.c execute.c regexp.c fmt.c mbcs.c utils.c +sed_SOURCES = sed.c compile.c execute.c regexp.c mbcs.c utils.c noinst_HEADERS = sed.h utils.h AM_CPPFLAGS = -I$(top_srcdir)/lib -I$(top_srcdir) -I$(top_builddir)/lib \ diff --git a/sed/execute.c b/sed/execute.c index fa6fe1d..f398e7c 100644 --- a/sed/execute.c +++ b/sed/execute.c @@ -1353,16 +1353,6 @@ execute_program(struct vector *vec, struct input *input) : cur_cmd->x.int_arg); break; - case 'L': - output_missing_newline(&output_file); - fmt(line.active, line.active + line.length, - cur_cmd->x.int_arg == -1 - ? lcmd_out_line_len - : cur_cmd->x.int_arg, - output_file.fp); - flush_output(output_file.fp); - break; - case 'n': if (!no_default_output) output_line(line.active, line.length, line.chomped, &output_file); diff --git a/sed/fmt.c b/sed/fmt.c deleted file mode 100644 index 389bccb..0000000 --- a/sed/fmt.c +++ /dev/null @@ -1,578 +0,0 @@ -/* `L' command implementation for GNU sed, based on GNU fmt 1.22. - Copyright (C) 1994, 1995, 1996, 2002, 2003, 2014 Free Software - Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* GNU fmt was written by Ross Paterson . */ - -#include "sed.h" - -#include -#include -#include -#include -#include - -/* The following parameters represent the program's idea of what is - "best". Adjust to taste, subject to the caveats given. */ - -/* Prefer lines to be LEEWAY % shorter than the maximum width, giving - room for optimization. */ -#define LEEWAY 7 - -/* Costs and bonuses are expressed as the equivalent departure from the - optimal line length, multiplied by 10. e.g. assigning something a - cost of 50 means that it is as bad as a line 5 characters too short - or too long. The definition of SHORT_COST(n) should not be changed. - However, EQUIV(n) may need tuning. */ - -typedef long COST; - -#define MAXCOST (~(((unsigned long) 1) << (8 * sizeof (COST) -1))) - -#define SQR(n) ((n) * (n)) -#define EQUIV(n) SQR ((COST) (n)) - -/* Cost of a filled line n chars longer or shorter than best_width. */ -#define SHORT_COST(n) EQUIV ((n) * 10) - -/* Cost of the difference between adjacent filled lines. */ -#define RAGGED_COST(n) (SHORT_COST (n) / 2) - -/* Basic cost per line. */ -#define LINE_COST EQUIV (70) - -/* Cost of breaking a line after the first word of a sentence, where - the length of the word is N. */ -#define WIDOW_COST(n) (EQUIV (200) / ((n) + 2)) - -/* Cost of breaking a line before the last word of a sentence, where - the length of the word is N. */ -#define ORPHAN_COST(n) (EQUIV (150) / ((n) + 2)) - -/* Bonus for breaking a line at the end of a sentence. */ -#define SENTENCE_BONUS EQUIV (50) - -/* Cost of breaking a line after a period not marking end of a sentence. - With the definition of sentence we are using (borrowed from emacs, see - get_line()) such a break would then look like a sentence break. Hence - we assign a very high cost -- it should be avoided unless things are - really bad. */ -#define NOBREAK_COST EQUIV (600) - -/* Bonus for breaking a line before open parenthesis. */ -#define PAREN_BONUS EQUIV (40) - -/* Bonus for breaking a line after other punctuation. */ -#define PUNCT_BONUS EQUIV(40) - -/* Credit for breaking a long paragraph one line later. */ -#define LINE_CREDIT EQUIV(3) - -/* Size of paragraph buffer in words. Longer paragraphs are handled - neatly (cf. flush_paragraph()), so there's little to gain by making - these larger. */ -#define MAXWORDS 1000 - -#define GETC() (parabuf == end_of_parabuf ? EOF : *parabuf++) - -/* Extra ctype(3)-style macros. */ - -#define isopen(c) (strchr ("([`'\"", (c)) != NULL) -#define isclose(c) (strchr (")]'\"", (c)) != NULL) -#define isperiod(c) (strchr (".?!", (c)) != NULL) - -/* Size of a tab stop, for expansion on input and re-introduction on - output. */ -#define TABWIDTH 8 - -/* Word descriptor structure. */ - -typedef struct Word WORD; - -struct Word - { - - /* Static attributes determined during input. */ - - const char *text; /* the text of the word */ - short length; /* length of this word */ - short space; /* the size of the following space */ - unsigned paren:1; /* starts with open paren */ - unsigned period:1; /* ends in [.?!])* */ - unsigned punct:1; /* ends in punctuation */ - unsigned final:1; /* end of sentence */ - - /* The remaining fields are computed during the optimization. */ - - short line_length; /* length of the best line starting here */ - COST best_cost; /* cost of best paragraph starting here */ - WORD *next_break; /* break which achieves best_cost */ - }; - -/* Forward declarations. */ - -static bool get_paragraph (void); -static int get_line (int c); -static int get_space (int c); -static int copy_rest (int c); -static bool same_para (int c); -static void flush_paragraph (void); -static void fmt_paragraph (void); -static void check_punctuation (WORD *w); -static COST base_cost (WORD *this); -static COST line_cost (WORD *next, int len); -static void put_paragraph (WORD *finish); -static void put_line (WORD *w, int indent); -static void put_word (WORD *w); -static void put_space (int space); - -/* Option values. */ - -/* User-supplied maximum line width (default WIDTH). The only output - lines - longer than this will each comprise a single word. */ -static int max_width; - -/* Space for the paragraph text. */ -static const char *parabuf; - -/* End of space for the paragraph text. */ -static const char *end_of_parabuf; - -/* The file on which we output */ -static FILE *outfile; - -/* Values derived from the option values. */ - -/* The preferred width of text lines, set to LEEWAY % less than max_width. */ -static int best_width; - -/* Dynamic variables. */ - -/* Start column of the character most recently read from the input file. */ -static int in_column; - -/* Start column of the next character to be written to stdout. */ -static int out_column; - -/* The words of a paragraph -- longer paragraphs are handled neatly - (cf. flush_paragraph()). */ -static WORD words[MAXWORDS]; - -/* A pointer into the above word array, indicating the first position - after the last complete word. Sometimes it will point at an incomplete - word. */ -static WORD *word_limit; - -/* Indentation of the first line of the current paragraph. */ -static int first_indent; - -/* Indentation of other lines of the current paragraph */ -static int other_indent; - -/* The last character read from the input file. */ -static int next_char; - -/* If nonzero, the length of the last line output in the current - paragraph, used to charge for raggedness at the split point for long - paragraphs chosen by fmt_paragraph(). */ -static int last_line_length; - -/* read file F and send formatted output to stdout. */ - -void -fmt (const char *line, const char *line_end, int max_length, FILE *output_file) -{ - parabuf = line; - end_of_parabuf = line_end; - outfile = output_file; - - max_width = max_length; - best_width = max_width * (201 - 2 * LEEWAY) / 200; - - in_column = 0; - other_indent = 0; - next_char = GETC(); - while (get_paragraph ()) - { - fmt_paragraph (); - put_paragraph (word_limit); - } -} - -/* Read a paragraph from input file F. A paragraph consists of a - maximal number of non-blank (excluding any prefix) lines - with the same indent. - - Return false if end-of-file was encountered before the start of a - paragraph, else true. */ - -static bool -get_paragraph (void) -{ - register int c; - - last_line_length = 0; - c = next_char; - - /* Scan (and copy) blank lines, and lines not introduced by the prefix. */ - - while (c == '\n' || c == EOF) - { - c = copy_rest (c); - if (c == EOF) - { - next_char = EOF; - return false; - } - putc ('\n', outfile); - c = GETC(); - } - - /* Got a suitable first line for a paragraph. */ - - first_indent = in_column; - word_limit = words; - c = get_line (c); - - /* Read rest of paragraph. */ - - other_indent = in_column; - while (same_para (c) && in_column == other_indent) - c = get_line (c); - - (word_limit - 1)->period = (word_limit - 1)->final = true; - next_char = c; - return true; -} - -/* Copy to the output a blank line. In the latter, C is \n or EOF. - Return the character (\n or EOF) ending the line. */ - -static int -copy_rest (register int c) -{ - out_column = 0; - while (c != '\n' && c != EOF) - { - putc (c, outfile); - c = GETC(); - } - return c; -} - -/* Return true if a line whose first non-blank character after the - prefix (if any) is C could belong to the current paragraph, - otherwise false. */ - -static bool -same_para (register int c) -{ - return (c != '\n' && c != EOF); -} - -/* Read a line from the input data given first non-blank character C - after the prefix, and the following indent, and break it into words. - A word is a maximal non-empty string of non-white characters. A word - ending in [.?!]["')\]]* and followed by end-of-line or at least two - spaces ends a sentence, as in emacs. - - Return the first non-blank character of the next line. */ - -static int -get_line (register int c) -{ - int start; - register WORD *end_of_word; - - end_of_word = &words[MAXWORDS - 2]; - - do - { /* for each word in a line */ - - /* Scan word. */ - - word_limit->text = parabuf - 1; - do - c = GETC(); - while (c != EOF && !ISSPACE (c)); - word_limit->length = parabuf - word_limit->text - (c != EOF); - in_column += word_limit->length; - - check_punctuation (word_limit); - - /* Scan inter-word space. */ - - start = in_column; - c = get_space (c); - word_limit->space = in_column - start; - word_limit->final = (c == EOF - || (word_limit->period - && (c == '\n' || word_limit->space > 1))); - if (c == '\n' || c == EOF) - word_limit->space = word_limit->final ? 2 : 1; - if (word_limit == end_of_word) - flush_paragraph (); - word_limit++; - if (c == EOF) - { - in_column = first_indent; - return EOF; - } - } - while (c != '\n'); - - in_column = 0; - c = GETC(); - return get_space (c); -} - -/* Read blank characters from the input data, starting with C, and keeping - in_column up-to-date. Return first non-blank character. */ - -static int -get_space (register int c) -{ - for (;;) - { - if (c == ' ') - in_column++; - else if (c == '\t') - in_column = (in_column / TABWIDTH + 1) * TABWIDTH; - else - return c; - c = GETC(); - } -} - -/* Set extra fields in word W describing any attached punctuation. */ - -static void -check_punctuation (register WORD *w) -{ - register const char *start, *finish; - - start = w->text; - finish = start + (w->length - 1); - w->paren = isopen (*start); - w->punct = ISPUNCT (*finish); - while (isclose (*finish) && finish > start) - finish--; - w->period = isperiod (*finish); -} - -/* Flush part of the paragraph to make room. This function is called on - hitting the limit on the number of words or characters. */ - -static void -flush_paragraph (void) -{ - WORD *split_point; - register WORD *w; - COST best_break; - - /* - format what you have so far as a paragraph, - - find a low-cost line break near the end, - - output to there, - - make that the start of the paragraph. */ - - fmt_paragraph (); - - /* Choose a good split point. */ - - split_point = word_limit; - best_break = MAXCOST; - for (w = words->next_break; w != word_limit; w = w->next_break) - { - if (w->best_cost - w->next_break->best_cost < best_break) - { - split_point = w; - best_break = w->best_cost - w->next_break->best_cost; - } - if (best_break <= MAXCOST - LINE_CREDIT) - best_break += LINE_CREDIT; - } - put_paragraph (split_point); - - /* Copy words from split_point down to word -- we use memmove because - the source and target may overlap. */ - - memmove ((char *) words, (char *) split_point, - (word_limit - split_point + 1) * sizeof (WORD)); - word_limit -= split_point - words; -} - -/* Compute the optimal formatting for the whole paragraph by computing - and remembering the optimal formatting for each suffix from the empty - one to the whole paragraph. */ - -static void -fmt_paragraph (void) -{ - register WORD *start, *w; - register int len; - register COST wcost, best; - int saved_length; - - word_limit->best_cost = 0; - saved_length = word_limit->length; - word_limit->length = max_width; /* sentinel */ - - for (start = word_limit - 1; start >= words; start--) - { - best = MAXCOST; - len = start == words ? first_indent : other_indent; - - /* At least one word, however long, in the line. */ - - w = start; - len += w->length; - do - { - w++; - - /* Consider breaking before w. */ - - wcost = line_cost (w, len) + w->best_cost; - if (start == words && last_line_length > 0) - wcost += RAGGED_COST (len - last_line_length); - if (wcost < best) - { - best = wcost; - start->next_break = w; - start->line_length = len; - } - len += (w - 1)->space + w->length; /* w > start >= words */ - } - while (len < max_width); - start->best_cost = best + base_cost (start); - } - - word_limit->length = saved_length; -} - -/* Return the constant component of the cost of breaking before the - word THIS. */ - -static COST -base_cost (register WORD *this) -{ - register COST cost; - - cost = LINE_COST; - - if (this > words) - { - if ((this - 1)->period) - { - if ((this - 1)->final) - cost -= SENTENCE_BONUS; - else - cost += NOBREAK_COST; - } - else if ((this - 1)->punct) - cost -= PUNCT_BONUS; - else if (this > words + 1 && (this - 2)->final) - cost += WIDOW_COST ((this - 1)->length); - } - - if (this->paren) - cost -= PAREN_BONUS; - else if (this->final) - cost += ORPHAN_COST (this->length); - - return cost; -} - -/* Return the component of the cost of breaking before word NEXT that - depends on LEN, the length of the line beginning there. */ - -static COST -line_cost (register WORD *next, register int len) -{ - register int n; - register COST cost; - - if (next == word_limit) - return 0; - n = best_width - len; - cost = SHORT_COST (n); - if (next->next_break != word_limit) - { - n = len - next->line_length; - cost += RAGGED_COST (n); - } - return cost; -} - -/* Output to stdout a paragraph from word up to (but not including) - FINISH, which must be in the next_break chain from word. */ - -static void -put_paragraph (register WORD *finish) -{ - register WORD *w; - - put_line (words, first_indent); - for (w = words->next_break; w != finish; w = w->next_break) - put_line (w, other_indent); -} - -/* Output to stdout the line beginning with word W, beginning in column - INDENT, including the prefix (if any). */ - -static void -put_line (register WORD *w, int indent) -{ - register WORD *endline; - out_column = 0; - put_space (indent); - - endline = w->next_break - 1; - for (; w != endline; w++) - { - put_word (w); - put_space (w->space); - } - put_word (w); - last_line_length = out_column; - putc ('\n', outfile); -} - -/* Output to stdout the word W. */ - -static void -put_word (register WORD *w) -{ - register const char *s; - register int n; - - s = w->text; - for (n = w->length; n != 0; n--) - putc (*s++, outfile); - out_column += w->length; -} - -/* Output to stdout SPACE spaces, or equivalent tabs. */ - -static void -put_space (int space) -{ - out_column += space; - while (space--) - putc (' ', outfile); -} diff --git a/sed/sed.h b/sed/sed.h index c3700b0..1f7456f 100644 --- a/sed/sed.h +++ b/sed/sed.h @@ -201,8 +201,6 @@ int process_files (struct vector *, char **argv); int main (int, char **); -extern void fmt (const char *line, const char *line_end, int max_length, FILE *output_file); - extern int extended_regexp_flags; /* one-byte buffer delimiter */ -- cgit v1.2.1