diff options
author | Assaf Gordon <assafgordon@gmail.com> | 2013-02-20 13:31:22 -0500 |
---|---|---|
committer | Pádraig Brady <P@draigBrady.com> | 2013-02-28 18:20:30 +0000 |
commit | 374f569579fe4e319d592f4d77ae1ede5566eed6 (patch) | |
tree | b7493c64cf19988dc84aaf1899b1e9c3718896bc /src/uniq.c | |
parent | 8b6d3c5700526f962b12cd5901b55961c5e18186 (diff) | |
download | coreutils-374f569579fe4e319d592f4d77ae1ede5566eed6.tar.gz |
uniq: add the --group option
* src/uniq.c (usage): Summarize the new option,
and adjust the --all-repeated option to be more consistent.
(check_file): Merge the --group functionality into
the core loop for the default uniq operation since
it's very similar and can output lines immediately upon reading.
(main): Handle the new --group option and make it
mutually exclusive with other selection options.
* tests/misc/uniq.pl: Add tests.
* NEWS: Mention the new feature.
* doc/coreutils.texi (uniq invocation): Describe --group.
Diffstat (limited to 'src/uniq.c')
-rw-r--r-- | src/uniq.c | 119 |
1 files changed, 109 insertions, 10 deletions
diff --git a/src/uniq.c b/src/uniq.c index 5efdad7d4..835b5b194 100644 --- a/src/uniq.c +++ b/src/uniq.c @@ -108,11 +108,47 @@ static enum delimit_method const delimit_method_map[] = /* Select whether/how to delimit groups of duplicate lines. */ static enum delimit_method delimit_groups; +enum grouping_method +{ + /* No grouping, when "--group" isn't used */ + GM_NONE, + + /* Delimiter preceges all groups. --group=prepend */ + GM_PREPEND, + + /* Delimiter follows all groups. --group=append */ + GM_APPEND, + + /* Delimiter between groups. --group[=separate] */ + GM_SEPARATE, + + /* Delimiter before and after each group. --group=both */ + GM_BOTH +}; + +static char const *const grouping_method_string[] = +{ + "prepend", "append", "separate", "both", NULL +}; + +static enum grouping_method const grouping_method_map[] = +{ + GM_PREPEND, GM_APPEND, GM_SEPARATE, GM_BOTH +}; + +static enum grouping_method grouping = GM_NONE; + +enum +{ + GROUP_OPTION = CHAR_MAX + 1 +}; + static struct option const longopts[] = { {"count", no_argument, NULL, 'c'}, {"repeated", no_argument, NULL, 'd'}, {"all-repeated", optional_argument, NULL, 'D'}, + {"group", optional_argument, NULL, GROUP_OPTION}, {"ignore-case", no_argument, NULL, 'i'}, {"unique", no_argument, NULL, 'u'}, {"skip-fields", required_argument, NULL, 'f'}, @@ -149,10 +185,18 @@ With no options, matching lines are merged to the first occurrence.\n\ -d, --repeated only print duplicate lines\n\ "), stdout); fputs (_("\ - -D, --all-repeated[=delimit-method] print all duplicate lines\n\ - delimit-method={none(default),prepend,separate}\n\ - Delimiting is done with blank lines\n\ + -D, --all-repeated[=METHOD] print all duplicate lines\n\ + groups can be delimited with an empty line\n\ + METHOD={none(default),prepend,separate}\n\ +"), stdout); + fputs (_("\ -f, --skip-fields=N avoid comparing the first N fields\n\ +"), stdout); + fputs (_("\ + --group[=METHOD] show all items, separating groups with an empty line\n\ + METHOD={separate(default),prepend,append,both}\n\ +"), stdout); + fputs (_("\ -i, --ignore-case ignore differences in case when comparing\n\ -s, --skip-chars=N avoid comparing the first N characters\n\ -u, --unique only print unique lines\n\ @@ -293,27 +337,48 @@ check_file (const char *infile, const char *outfile, char delimiter) initbuffer (prevline); /* The duplication in the following 'if' and 'else' blocks is an - optimization to distinguish the common case (in which none of - the following options has been specified: --count, -repeated, - --all-repeated, --unique) from the others. In the common case, - this optimization lets uniq output each different line right away, - without waiting to see if the next one is different. */ + optimization to distinguish between when we can print input + lines immediately (1. & 2.) or not. + + 1. --group => all input lines are printed. + checking for unique/duplicated lines is used only for printing + group separators. + + 2. The default case in which none of these options has been specified: + --count, --repeated, --all-repeated, --unique + In the default case, this optimization lets uniq output each different + line right away, without waiting to see if the next one is different. + 3. All other cases. + */ if (output_unique && output_first_repeated && countmode == count_none) { char *prevfield IF_LINT ( = NULL); size_t prevlen IF_LINT ( = 0); + bool first_group_printed = false; while (!feof (stdin)) { char *thisfield; size_t thislen; + bool new_group; + if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) break; + thisfield = find_field (thisline); thislen = thisline->length - 1 - (thisfield - thisline->buffer); - if (prevline->length == 0 - || different (thisfield, prevfield, thislen, prevlen)) + + new_group = (prevline->length == 0 + || different (thisfield, prevfield, thislen, prevlen)); + + if (new_group && grouping != GM_NONE + && (grouping == GM_PREPEND || grouping == GM_BOTH + || (first_group_printed && (grouping == GM_APPEND + || grouping == GM_SEPARATE)))) + putchar (delimiter); + + if (new_group || grouping != GM_NONE) { fwrite (thisline->buffer, sizeof (char), thisline->length, stdout); @@ -321,8 +386,11 @@ check_file (const char *infile, const char *outfile, char delimiter) SWAP_LINES (prevline, thisline); prevfield = thisfield; prevlen = thislen; + first_group_printed = true; } } + if ((grouping == GM_BOTH || grouping == GM_APPEND) && first_group_printed) + putchar (delimiter); } else { @@ -415,6 +483,7 @@ main (int argc, char **argv) int nfiles = 0; char const *file[2]; char delimiter = '\n'; /* change with --zero-terminated, -z */ + bool output_option_used = false; /* if true, one of -u/-d/-D/-c was used */ file[0] = file[1] = "-"; initialize_main (&argc, &argv); @@ -498,10 +567,12 @@ main (int argc, char **argv) case 'c': countmode = count_occurrences; + output_option_used = true; break; case 'd': output_unique = false; + output_option_used = true; break; case 'D': @@ -513,6 +584,16 @@ main (int argc, char **argv) delimit_groups = XARGMATCH ("--all-repeated", optarg, delimit_method_string, delimit_method_map); + output_option_used = true; + break; + + case GROUP_OPTION: + if (optarg == NULL) + grouping = GM_SEPARATE; + else + grouping = XARGMATCH ("--group", optarg, + grouping_method_string, + grouping_method_map); break; case 'f': @@ -532,6 +613,7 @@ main (int argc, char **argv) case 'u': output_first_repeated = false; + output_option_used = true; break; case 'w': @@ -552,6 +634,23 @@ main (int argc, char **argv) } } + /* Note we could allow --group with -D at least, and that would + avoid the need to specify a grouping method to --all-repeated. + It was thought best to avoid deprecating those parameters though + and keep --group separate to other options. */ + if (grouping != GM_NONE && output_option_used) + { + error (0, 0, _("--group is mutually exclusive with -c/-d/-D/-u")); + usage (EXIT_FAILURE); + } + + if (grouping != GM_NONE && countmode != count_none) + { + error (0, 0, + _("grouping and printing repeat counts is meaningless")); + usage (EXIT_FAILURE); + } + if (countmode == count_occurrences && output_later_repeated) { error (0, 0, |