diff options
Diffstat (limited to 'sed/fmt.c')
-rw-r--r-- | sed/fmt.c | 578 |
1 files changed, 0 insertions, 578 deletions
diff --git a/sed/fmt.c b/sed/fmt.c deleted file mode 100644 index 389bccb..0000000 --- a/sed/fmt.c +++ /dev/null @@ -1,578 +0,0 @@ -/* `L' command implementation for GNU sed, based on GNU fmt 1.22. - Copyright (C) 1994, 1995, 1996, 2002, 2003, 2014 Free Software - Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* GNU fmt was written by Ross Paterson <rap@doc.ic.ac.uk>. */ - -#include "sed.h" - -#include <stdio.h> -#include <string.h> -#include <ctype.h> -#include <sys/types.h> -#include <limits.h> - -/* The following parameters represent the program's idea of what is - "best". Adjust to taste, subject to the caveats given. */ - -/* Prefer lines to be LEEWAY % shorter than the maximum width, giving - room for optimization. */ -#define LEEWAY 7 - -/* Costs and bonuses are expressed as the equivalent departure from the - optimal line length, multiplied by 10. e.g. assigning something a - cost of 50 means that it is as bad as a line 5 characters too short - or too long. The definition of SHORT_COST(n) should not be changed. - However, EQUIV(n) may need tuning. */ - -typedef long COST; - -#define MAXCOST (~(((unsigned long) 1) << (8 * sizeof (COST) -1))) - -#define SQR(n) ((n) * (n)) -#define EQUIV(n) SQR ((COST) (n)) - -/* Cost of a filled line n chars longer or shorter than best_width. */ -#define SHORT_COST(n) EQUIV ((n) * 10) - -/* Cost of the difference between adjacent filled lines. */ -#define RAGGED_COST(n) (SHORT_COST (n) / 2) - -/* Basic cost per line. */ -#define LINE_COST EQUIV (70) - -/* Cost of breaking a line after the first word of a sentence, where - the length of the word is N. */ -#define WIDOW_COST(n) (EQUIV (200) / ((n) + 2)) - -/* Cost of breaking a line before the last word of a sentence, where - the length of the word is N. */ -#define ORPHAN_COST(n) (EQUIV (150) / ((n) + 2)) - -/* Bonus for breaking a line at the end of a sentence. */ -#define SENTENCE_BONUS EQUIV (50) - -/* Cost of breaking a line after a period not marking end of a sentence. - With the definition of sentence we are using (borrowed from emacs, see - get_line()) such a break would then look like a sentence break. Hence - we assign a very high cost -- it should be avoided unless things are - really bad. */ -#define NOBREAK_COST EQUIV (600) - -/* Bonus for breaking a line before open parenthesis. */ -#define PAREN_BONUS EQUIV (40) - -/* Bonus for breaking a line after other punctuation. */ -#define PUNCT_BONUS EQUIV(40) - -/* Credit for breaking a long paragraph one line later. */ -#define LINE_CREDIT EQUIV(3) - -/* Size of paragraph buffer in words. Longer paragraphs are handled - neatly (cf. flush_paragraph()), so there's little to gain by making - these larger. */ -#define MAXWORDS 1000 - -#define GETC() (parabuf == end_of_parabuf ? EOF : *parabuf++) - -/* Extra ctype(3)-style macros. */ - -#define isopen(c) (strchr ("([`'\"", (c)) != NULL) -#define isclose(c) (strchr (")]'\"", (c)) != NULL) -#define isperiod(c) (strchr (".?!", (c)) != NULL) - -/* Size of a tab stop, for expansion on input and re-introduction on - output. */ -#define TABWIDTH 8 - -/* Word descriptor structure. */ - -typedef struct Word WORD; - -struct Word - { - - /* Static attributes determined during input. */ - - const char *text; /* the text of the word */ - short length; /* length of this word */ - short space; /* the size of the following space */ - unsigned paren:1; /* starts with open paren */ - unsigned period:1; /* ends in [.?!])* */ - unsigned punct:1; /* ends in punctuation */ - unsigned final:1; /* end of sentence */ - - /* The remaining fields are computed during the optimization. */ - - short line_length; /* length of the best line starting here */ - COST best_cost; /* cost of best paragraph starting here */ - WORD *next_break; /* break which achieves best_cost */ - }; - -/* Forward declarations. */ - -static bool get_paragraph (void); -static int get_line (int c); -static int get_space (int c); -static int copy_rest (int c); -static bool same_para (int c); -static void flush_paragraph (void); -static void fmt_paragraph (void); -static void check_punctuation (WORD *w); -static COST base_cost (WORD *this); -static COST line_cost (WORD *next, int len); -static void put_paragraph (WORD *finish); -static void put_line (WORD *w, int indent); -static void put_word (WORD *w); -static void put_space (int space); - -/* Option values. */ - -/* User-supplied maximum line width (default WIDTH). The only output - lines - longer than this will each comprise a single word. */ -static int max_width; - -/* Space for the paragraph text. */ -static const char *parabuf; - -/* End of space for the paragraph text. */ -static const char *end_of_parabuf; - -/* The file on which we output */ -static FILE *outfile; - -/* Values derived from the option values. */ - -/* The preferred width of text lines, set to LEEWAY % less than max_width. */ -static int best_width; - -/* Dynamic variables. */ - -/* Start column of the character most recently read from the input file. */ -static int in_column; - -/* Start column of the next character to be written to stdout. */ -static int out_column; - -/* The words of a paragraph -- longer paragraphs are handled neatly - (cf. flush_paragraph()). */ -static WORD words[MAXWORDS]; - -/* A pointer into the above word array, indicating the first position - after the last complete word. Sometimes it will point at an incomplete - word. */ -static WORD *word_limit; - -/* Indentation of the first line of the current paragraph. */ -static int first_indent; - -/* Indentation of other lines of the current paragraph */ -static int other_indent; - -/* The last character read from the input file. */ -static int next_char; - -/* If nonzero, the length of the last line output in the current - paragraph, used to charge for raggedness at the split point for long - paragraphs chosen by fmt_paragraph(). */ -static int last_line_length; - -/* read file F and send formatted output to stdout. */ - -void -fmt (const char *line, const char *line_end, int max_length, FILE *output_file) -{ - parabuf = line; - end_of_parabuf = line_end; - outfile = output_file; - - max_width = max_length; - best_width = max_width * (201 - 2 * LEEWAY) / 200; - - in_column = 0; - other_indent = 0; - next_char = GETC(); - while (get_paragraph ()) - { - fmt_paragraph (); - put_paragraph (word_limit); - } -} - -/* Read a paragraph from input file F. A paragraph consists of a - maximal number of non-blank (excluding any prefix) lines - with the same indent. - - Return false if end-of-file was encountered before the start of a - paragraph, else true. */ - -static bool -get_paragraph (void) -{ - register int c; - - last_line_length = 0; - c = next_char; - - /* Scan (and copy) blank lines, and lines not introduced by the prefix. */ - - while (c == '\n' || c == EOF) - { - c = copy_rest (c); - if (c == EOF) - { - next_char = EOF; - return false; - } - putc ('\n', outfile); - c = GETC(); - } - - /* Got a suitable first line for a paragraph. */ - - first_indent = in_column; - word_limit = words; - c = get_line (c); - - /* Read rest of paragraph. */ - - other_indent = in_column; - while (same_para (c) && in_column == other_indent) - c = get_line (c); - - (word_limit - 1)->period = (word_limit - 1)->final = true; - next_char = c; - return true; -} - -/* Copy to the output a blank line. In the latter, C is \n or EOF. - Return the character (\n or EOF) ending the line. */ - -static int -copy_rest (register int c) -{ - out_column = 0; - while (c != '\n' && c != EOF) - { - putc (c, outfile); - c = GETC(); - } - return c; -} - -/* Return true if a line whose first non-blank character after the - prefix (if any) is C could belong to the current paragraph, - otherwise false. */ - -static bool -same_para (register int c) -{ - return (c != '\n' && c != EOF); -} - -/* Read a line from the input data given first non-blank character C - after the prefix, and the following indent, and break it into words. - A word is a maximal non-empty string of non-white characters. A word - ending in [.?!]["')\]]* and followed by end-of-line or at least two - spaces ends a sentence, as in emacs. - - Return the first non-blank character of the next line. */ - -static int -get_line (register int c) -{ - int start; - register WORD *end_of_word; - - end_of_word = &words[MAXWORDS - 2]; - - do - { /* for each word in a line */ - - /* Scan word. */ - - word_limit->text = parabuf - 1; - do - c = GETC(); - while (c != EOF && !ISSPACE (c)); - word_limit->length = parabuf - word_limit->text - (c != EOF); - in_column += word_limit->length; - - check_punctuation (word_limit); - - /* Scan inter-word space. */ - - start = in_column; - c = get_space (c); - word_limit->space = in_column - start; - word_limit->final = (c == EOF - || (word_limit->period - && (c == '\n' || word_limit->space > 1))); - if (c == '\n' || c == EOF) - word_limit->space = word_limit->final ? 2 : 1; - if (word_limit == end_of_word) - flush_paragraph (); - word_limit++; - if (c == EOF) - { - in_column = first_indent; - return EOF; - } - } - while (c != '\n'); - - in_column = 0; - c = GETC(); - return get_space (c); -} - -/* Read blank characters from the input data, starting with C, and keeping - in_column up-to-date. Return first non-blank character. */ - -static int -get_space (register int c) -{ - for (;;) - { - if (c == ' ') - in_column++; - else if (c == '\t') - in_column = (in_column / TABWIDTH + 1) * TABWIDTH; - else - return c; - c = GETC(); - } -} - -/* Set extra fields in word W describing any attached punctuation. */ - -static void -check_punctuation (register WORD *w) -{ - register const char *start, *finish; - - start = w->text; - finish = start + (w->length - 1); - w->paren = isopen (*start); - w->punct = ISPUNCT (*finish); - while (isclose (*finish) && finish > start) - finish--; - w->period = isperiod (*finish); -} - -/* Flush part of the paragraph to make room. This function is called on - hitting the limit on the number of words or characters. */ - -static void -flush_paragraph (void) -{ - WORD *split_point; - register WORD *w; - COST best_break; - - /* - format what you have so far as a paragraph, - - find a low-cost line break near the end, - - output to there, - - make that the start of the paragraph. */ - - fmt_paragraph (); - - /* Choose a good split point. */ - - split_point = word_limit; - best_break = MAXCOST; - for (w = words->next_break; w != word_limit; w = w->next_break) - { - if (w->best_cost - w->next_break->best_cost < best_break) - { - split_point = w; - best_break = w->best_cost - w->next_break->best_cost; - } - if (best_break <= MAXCOST - LINE_CREDIT) - best_break += LINE_CREDIT; - } - put_paragraph (split_point); - - /* Copy words from split_point down to word -- we use memmove because - the source and target may overlap. */ - - memmove ((char *) words, (char *) split_point, - (word_limit - split_point + 1) * sizeof (WORD)); - word_limit -= split_point - words; -} - -/* Compute the optimal formatting for the whole paragraph by computing - and remembering the optimal formatting for each suffix from the empty - one to the whole paragraph. */ - -static void -fmt_paragraph (void) -{ - register WORD *start, *w; - register int len; - register COST wcost, best; - int saved_length; - - word_limit->best_cost = 0; - saved_length = word_limit->length; - word_limit->length = max_width; /* sentinel */ - - for (start = word_limit - 1; start >= words; start--) - { - best = MAXCOST; - len = start == words ? first_indent : other_indent; - - /* At least one word, however long, in the line. */ - - w = start; - len += w->length; - do - { - w++; - - /* Consider breaking before w. */ - - wcost = line_cost (w, len) + w->best_cost; - if (start == words && last_line_length > 0) - wcost += RAGGED_COST (len - last_line_length); - if (wcost < best) - { - best = wcost; - start->next_break = w; - start->line_length = len; - } - len += (w - 1)->space + w->length; /* w > start >= words */ - } - while (len < max_width); - start->best_cost = best + base_cost (start); - } - - word_limit->length = saved_length; -} - -/* Return the constant component of the cost of breaking before the - word THIS. */ - -static COST -base_cost (register WORD *this) -{ - register COST cost; - - cost = LINE_COST; - - if (this > words) - { - if ((this - 1)->period) - { - if ((this - 1)->final) - cost -= SENTENCE_BONUS; - else - cost += NOBREAK_COST; - } - else if ((this - 1)->punct) - cost -= PUNCT_BONUS; - else if (this > words + 1 && (this - 2)->final) - cost += WIDOW_COST ((this - 1)->length); - } - - if (this->paren) - cost -= PAREN_BONUS; - else if (this->final) - cost += ORPHAN_COST (this->length); - - return cost; -} - -/* Return the component of the cost of breaking before word NEXT that - depends on LEN, the length of the line beginning there. */ - -static COST -line_cost (register WORD *next, register int len) -{ - register int n; - register COST cost; - - if (next == word_limit) - return 0; - n = best_width - len; - cost = SHORT_COST (n); - if (next->next_break != word_limit) - { - n = len - next->line_length; - cost += RAGGED_COST (n); - } - return cost; -} - -/* Output to stdout a paragraph from word up to (but not including) - FINISH, which must be in the next_break chain from word. */ - -static void -put_paragraph (register WORD *finish) -{ - register WORD *w; - - put_line (words, first_indent); - for (w = words->next_break; w != finish; w = w->next_break) - put_line (w, other_indent); -} - -/* Output to stdout the line beginning with word W, beginning in column - INDENT, including the prefix (if any). */ - -static void -put_line (register WORD *w, int indent) -{ - register WORD *endline; - out_column = 0; - put_space (indent); - - endline = w->next_break - 1; - for (; w != endline; w++) - { - put_word (w); - put_space (w->space); - } - put_word (w); - last_line_length = out_column; - putc ('\n', outfile); -} - -/* Output to stdout the word W. */ - -static void -put_word (register WORD *w) -{ - register const char *s; - register int n; - - s = w->text; - for (n = w->length; n != 0; n--) - putc (*s++, outfile); - out_column += w->length; -} - -/* Output to stdout SPACE spaces, or equivalent tabs. */ - -static void -put_space (int space) -{ - out_column += space; - while (space--) - putc (' ', outfile); -} |