diff options
author | Junio C Hamano <gitster@pobox.com> | 2012-09-17 15:53:31 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2012-09-17 15:53:31 -0700 |
commit | 3b0b6b53d5fa8f7ea5d2d9ddb74b0f8186252d28 (patch) | |
tree | 7a08742c7be74c972b2ddbbf5848f0a805108290 | |
parent | 992311cf860347df159081873fba099a94d463f4 (diff) | |
parent | 51f3145c2834c8f1d94c5b7cf3790baf74b9f521 (diff) | |
download | git-3b0b6b53d5fa8f7ea5d2d9ddb74b0f8186252d28.tar.gz |
Merge branch 'mh/string-list'
* mh/string-list:
api-string-list.txt: initialize the string_list the easy way
string_list: add a function string_list_longest_prefix()
string_list: add a new function, string_list_remove_duplicates()
string_list: add a new function, filter_string_list()
string_list: add two new functions for splitting strings
string_list: add function string_list_append_nodup()
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Documentation/technical/api-string-list.txt | 68 | ||||
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | string-list.c | 127 | ||||
-rw-r--r-- | string-list.h | 71 | ||||
-rwxr-xr-x | t/t0063-string-list.sh | 121 | ||||
-rw-r--r-- | test-string-list.c | 123 |
7 files changed, 502 insertions, 10 deletions
diff --git a/.gitignore b/.gitignore index 68fe464090..a188a82bb1 100644 --- a/.gitignore +++ b/.gitignore @@ -194,6 +194,7 @@ /test-run-command /test-sha1 /test-sigchain +/test-string-list /test-subprocess /test-svn-fe /common-cmds.h diff --git a/Documentation/technical/api-string-list.txt b/Documentation/technical/api-string-list.txt index 5a0c14fceb..155ac8cb10 100644 --- a/Documentation/technical/api-string-list.txt +++ b/Documentation/technical/api-string-list.txt @@ -20,8 +20,9 @@ If you need something advanced, you can manually malloc() the `items` member (you need this if you add things later) and you should set the `nr` and `alloc` members in that case, too. -. Adds new items to the list, using `string_list_append` or - `string_list_insert`. +. Adds new items to the list, using `string_list_append`, + `string_list_append_nodup`, `string_list_insert`, + `string_list_split`, and/or `string_list_split_in_place`. . Can check if a string is in the list using `string_list_has_string` or `unsorted_string_list_has_string` and get it from the list using @@ -29,18 +30,23 @@ member (you need this if you add things later) and you should set the . Can sort an unsorted list using `sort_string_list`. +. Can remove duplicate items from a sorted list using + `string_list_remove_duplicates`. + . Can remove individual items of an unsorted list using `unsorted_string_list_delete_item`. +. Can remove items not matching a criterion from a sorted or unsorted + list using `filter_string_list`. + . Finally it should free the list using `string_list_clear`. Example: ---- -struct string_list list; +struct string_list list = STRING_LIST_INIT_NODUP; int i; -memset(&list, 0, sizeof(struct string_list)); string_list_append(&list, "foo"); string_list_append(&list, "bar"); for (i = 0; i < list.nr; i++) @@ -60,6 +66,22 @@ Functions * General ones (works with sorted and unsorted lists as well) +`filter_string_list`:: + + Apply a function to each item in a list, retaining only the + items for which the function returns true. If free_util is + true, call free() on the util members of any items that have + to be deleted. Preserve the order of the items that are + retained. + +`string_list_longest_prefix`:: + + Return the longest string within a string_list that is a + prefix (in the sense of prefixcmp()) of the specified string, + or NULL if no such prefix exists. This function does not + require the string_list to be sorted (it does a linear + search). + `print_string_list`:: Dump a string_list to stdout, useful mainly for debugging purposes. It @@ -96,11 +118,28 @@ write `string_list_insert(...)->util = ...;`. Look up a given string in the string_list, returning the containing string_list_item. If the string is not found, NULL is returned. +`string_list_remove_duplicates`:: + + Remove all but the first of consecutive entries that have the + same string value. If free_util is true, call free() on the + util members of any items that have to be deleted. + * Functions for unsorted lists only `string_list_append`:: - Append a new string to the end of the string_list. + Append a new string to the end of the string_list. If + `strdup_string` is set, then the string argument is copied; + otherwise the new `string_list_entry` refers to the input + string. + +`string_list_append_nodup`:: + + Append a new string to the end of the string_list. The new + `string_list_entry` always refers to the input string, even if + `strdup_string` is set. This function can be used to hand + ownership of a malloc()ed string to a `string_list` that has + `strdup_string` set. `sort_string_list`:: @@ -124,6 +163,25 @@ counterpart for sorted lists, which performs a binary search. is set. The third parameter controls if the `util` pointer of the items should be freed or not. +`string_list_split`:: +`string_list_split_in_place`:: + + Split a string into substrings on a delimiter character and + append the substrings to a `string_list`. If `maxsplit` is + non-negative, then split at most `maxsplit` times. Return the + number of substrings appended to the list. ++ +`string_list_split` requires a `string_list` that has `strdup_strings` +set to true; it leaves the input string untouched and makes copies of +the substrings in newly-allocated memory. +`string_list_split_in_place` requires a `string_list` that has +`strdup_strings` set to false; it splits the input string in place, +overwriting the delimiter characters with NULs and creating new +string_list_items that point into the original string (the original +string must therefore not be modified or freed while the `string_list` +is in use). + + Data structures --------------- @@ -509,6 +509,7 @@ TEST_PROGRAMS_NEED_X += test-run-command TEST_PROGRAMS_NEED_X += test-scrap-cache-tree TEST_PROGRAMS_NEED_X += test-sha1 TEST_PROGRAMS_NEED_X += test-sigchain +TEST_PROGRAMS_NEED_X += test-string-list TEST_PROGRAMS_NEED_X += test-subprocess TEST_PROGRAMS_NEED_X += test-svn-fe diff --git a/string-list.c b/string-list.c index d9810aba42..c54b816244 100644 --- a/string-list.c +++ b/string-list.c @@ -92,6 +92,23 @@ struct string_list_item *string_list_lookup(struct string_list *list, const char return list->items + i; } +void string_list_remove_duplicates(struct string_list *list, int free_util) +{ + if (list->nr > 1) { + int src, dst; + for (src = dst = 1; src < list->nr; src++) { + if (!strcmp(list->items[dst - 1].string, list->items[src].string)) { + if (list->strdup_strings) + free(list->items[src].string); + if (free_util) + free(list->items[src].util); + } else + list->items[dst++] = list->items[src]; + } + list->nr = dst; + } +} + int for_each_string_list(struct string_list *list, string_list_each_func_t fn, void *cb_data) { @@ -102,6 +119,43 @@ int for_each_string_list(struct string_list *list, return ret; } +void filter_string_list(struct string_list *list, int free_util, + string_list_each_func_t want, void *cb_data) +{ + int src, dst = 0; + for (src = 0; src < list->nr; src++) { + if (want(&list->items[src], cb_data)) { + list->items[dst++] = list->items[src]; + } else { + if (list->strdup_strings) + free(list->items[src].string); + if (free_util) + free(list->items[src].util); + } + } + list->nr = dst; +} + +char *string_list_longest_prefix(const struct string_list *prefixes, + const char *string) +{ + int i, max_len = -1; + char *retval = NULL; + + for (i = 0; i < prefixes->nr; i++) { + char *prefix = prefixes->items[i].string; + if (!prefixcmp(string, prefix)) { + int len = strlen(prefix); + if (len > max_len) { + retval = prefix; + max_len = len; + } + } + } + + return retval; +} + void string_list_clear(struct string_list *list, int free_util) { if (list->items) { @@ -148,13 +202,23 @@ void print_string_list(const struct string_list *p, const char *text) printf("%s:%p\n", p->items[i].string, p->items[i].util); } -struct string_list_item *string_list_append(struct string_list *list, const char *string) +struct string_list_item *string_list_append_nodup(struct string_list *list, + char *string) { + struct string_list_item *retval; ALLOC_GROW(list->items, list->nr + 1, list->alloc); - list->items[list->nr].string = - list->strdup_strings ? xstrdup(string) : (char *)string; - list->items[list->nr].util = NULL; - return list->items + list->nr++; + retval = &list->items[list->nr++]; + retval->string = string; + retval->util = NULL; + return retval; +} + +struct string_list_item *string_list_append(struct string_list *list, + const char *string) +{ + return string_list_append_nodup( + list, + list->strdup_strings ? xstrdup(string) : (char *)string); } static int cmp_items(const void *a, const void *b) @@ -194,3 +258,56 @@ void unsorted_string_list_delete_item(struct string_list *list, int i, int free_ list->items[i] = list->items[list->nr-1]; list->nr--; } + +int string_list_split(struct string_list *list, const char *string, + int delim, int maxsplit) +{ + int count = 0; + const char *p = string, *end; + + if (!list->strdup_strings) + die("internal error in string_list_split(): " + "list->strdup_strings must be set"); + for (;;) { + count++; + if (maxsplit >= 0 && count > maxsplit) { + string_list_append(list, p); + return count; + } + end = strchr(p, delim); + if (end) { + string_list_append_nodup(list, xmemdupz(p, end - p)); + p = end + 1; + } else { + string_list_append(list, p); + return count; + } + } +} + +int string_list_split_in_place(struct string_list *list, char *string, + int delim, int maxsplit) +{ + int count = 0; + char *p = string, *end; + + if (list->strdup_strings) + die("internal error in string_list_split_in_place(): " + "list->strdup_strings must not be set"); + for (;;) { + count++; + if (maxsplit >= 0 && count > maxsplit) { + string_list_append(list, p); + return count; + } + end = strchr(p, delim); + if (end) { + *end = '\0'; + string_list_append(list, p); + p = end + 1; + } else { + string_list_append(list, p); + return count; + } + } +} diff --git a/string-list.h b/string-list.h index 0684cb73bf..5efd07b44e 100644 --- a/string-list.h +++ b/string-list.h @@ -29,6 +29,24 @@ int for_each_string_list(struct string_list *list, #define for_each_string_list_item(item,list) \ for (item = (list)->items; item < (list)->items + (list)->nr; ++item) +/* + * Apply want to each item in list, retaining only the ones for which + * the function returns true. If free_util is true, call free() on + * the util members of any items that have to be deleted. Preserve + * the order of the items that are retained. + */ +void filter_string_list(struct string_list *list, int free_util, + string_list_each_func_t want, void *cb_data); + +/* + * Return the longest string in prefixes that is a prefix (in the + * sense of prefixcmp()) of string, or NULL if no such prefix exists. + * This function does not require the string_list to be sorted (it + * does a linear search). + */ +char *string_list_longest_prefix(const struct string_list *prefixes, const char *string); + + /* Use these functions only on sorted lists: */ int string_list_has_string(const struct string_list *list, const char *string); int string_list_find_insert_index(const struct string_list *list, const char *string, @@ -38,11 +56,64 @@ struct string_list_item *string_list_insert_at_index(struct string_list *list, int insert_at, const char *string); struct string_list_item *string_list_lookup(struct string_list *list, const char *string); +/* + * Remove all but the first of consecutive entries with the same + * string value. If free_util is true, call free() on the util + * members of any items that have to be deleted. + */ +void string_list_remove_duplicates(struct string_list *sorted_list, int free_util); + + /* Use these functions only on unsorted lists: */ + +/* + * Add string to the end of list. If list->strdup_string is set, then + * string is copied; otherwise the new string_list_entry refers to the + * input string. + */ struct string_list_item *string_list_append(struct string_list *list, const char *string); + +/* + * Like string_list_append(), except string is never copied. When + * list->strdup_strings is set, this function can be used to hand + * ownership of a malloc()ed string to list without making an extra + * copy. + */ +struct string_list_item *string_list_append_nodup(struct string_list *list, char *string); + void sort_string_list(struct string_list *list); int unsorted_string_list_has_string(struct string_list *list, const char *string); struct string_list_item *unsorted_string_list_lookup(struct string_list *list, const char *string); + void unsorted_string_list_delete_item(struct string_list *list, int i, int free_util); + +/* + * Split string into substrings on character delim and append the + * substrings to list. The input string is not modified. + * list->strdup_strings must be set, as new memory needs to be + * allocated to hold the substrings. If maxsplit is non-negative, + * then split at most maxsplit times. Return the number of substrings + * appended to list. + * + * Examples: + * string_list_split(l, "foo:bar:baz", ':', -1) -> ["foo", "bar", "baz"] + * string_list_split(l, "foo:bar:baz", ':', 0) -> ["foo:bar:baz"] + * string_list_split(l, "foo:bar:baz", ':', 1) -> ["foo", "bar:baz"] + * string_list_split(l, "foo:bar:", ':', -1) -> ["foo", "bar", ""] + * string_list_split(l, "", ':', -1) -> [""] + * string_list_split(l, ":", ':', -1) -> ["", ""] + */ +int string_list_split(struct string_list *list, const char *string, + int delim, int maxsplit); + +/* + * Like string_list_split(), except that string is split in-place: the + * delimiter characters in string are overwritten with NULs, and the + * new string_list_items point into string (which therefore must not + * be modified or freed while the string_list is in use). + * list->strdup_strings must *not* be set. + */ +int string_list_split_in_place(struct string_list *list, char *string, + int delim, int maxsplit); #endif /* STRING_LIST_H */ diff --git a/t/t0063-string-list.sh b/t/t0063-string-list.sh new file mode 100755 index 0000000000..41c8826a74 --- /dev/null +++ b/t/t0063-string-list.sh @@ -0,0 +1,121 @@ +#!/bin/sh +# +# Copyright (c) 2012 Michael Haggerty +# + +test_description='Test string list functionality' + +. ./test-lib.sh + +test_split () { + cat >expected && + test_expect_success "split $1 at $2, max $3" " + test-string-list split '$1' '$2' '$3' >actual && + test_cmp expected actual && + test-string-list split_in_place '$1' '$2' '$3' >actual && + test_cmp expected actual + " +} + +test_longest_prefix () { + test "$(test-string-list longest_prefix "$1" "$2")" = "$3" +} + +test_no_longest_prefix () { + test_must_fail test-string-list longest_prefix "$1" "$2" +} + +test_split "foo:bar:baz" ":" "-1" <<EOF +3 +[0]: "foo" +[1]: "bar" +[2]: "baz" +EOF + +test_split "foo:bar:baz" ":" "0" <<EOF +1 +[0]: "foo:bar:baz" +EOF + +test_split "foo:bar:baz" ":" "1" <<EOF +2 +[0]: "foo" +[1]: "bar:baz" +EOF + +test_split "foo:bar:baz" ":" "2" <<EOF +3 +[0]: "foo" +[1]: "bar" +[2]: "baz" +EOF + +test_split "foo:bar:" ":" "-1" <<EOF +3 +[0]: "foo" +[1]: "bar" +[2]: "" +EOF + +test_split "" ":" "-1" <<EOF +1 +[0]: "" +EOF + +test_split ":" ":" "-1" <<EOF +2 +[0]: "" +[1]: "" +EOF + +test_expect_success "test filter_string_list" ' + test "x-" = "x$(test-string-list filter - y)" && + test "x-" = "x$(test-string-list filter no y)" && + test yes = "$(test-string-list filter yes y)" && + test yes = "$(test-string-list filter no:yes y)" && + test yes = "$(test-string-list filter yes:no y)" && + test y1:y2 = "$(test-string-list filter y1:y2 y)" && + test y2:y1 = "$(test-string-list filter y2:y1 y)" && + test "x-" = "x$(test-string-list filter x1:x2 y)" +' + +test_expect_success "test remove_duplicates" ' + test "x-" = "x$(test-string-list remove_duplicates -)" && + test "x" = "x$(test-string-list remove_duplicates "")" && + test a = "$(test-string-list remove_duplicates a)" && + test a = "$(test-string-list remove_duplicates a:a)" && + test a = "$(test-string-list remove_duplicates a:a:a:a:a)" && + test a:b = "$(test-string-list remove_duplicates a:b)" && + test a:b = "$(test-string-list remove_duplicates a:a:b)" && + test a:b = "$(test-string-list remove_duplicates a:b:b)" && + test a:b:c = "$(test-string-list remove_duplicates a:b:c)" && + test a:b:c = "$(test-string-list remove_duplicates a:a:b:c)" && + test a:b:c = "$(test-string-list remove_duplicates a:b:b:c)" && + test a:b:c = "$(test-string-list remove_duplicates a:b:c:c)" && + test a:b:c = "$(test-string-list remove_duplicates a:a:b:b:c:c)" && + test a:b:c = "$(test-string-list remove_duplicates a:a:a:b:b:b:c:c:c)" +' + +test_expect_success "test longest_prefix" ' + test_no_longest_prefix - '' && + test_no_longest_prefix - x && + test_longest_prefix "" x "" && + test_longest_prefix x x x && + test_longest_prefix "" foo "" && + test_longest_prefix : foo "" && + test_longest_prefix f foo f && + test_longest_prefix foo foobar foo && + test_longest_prefix foo foo foo && + test_no_longest_prefix bar foo && + test_no_longest_prefix bar:bar foo && + test_no_longest_prefix foobar foo && + test_longest_prefix foo:bar foo foo && + test_longest_prefix foo:bar bar bar && + test_longest_prefix foo::bar foo foo && + test_longest_prefix foo:foobar foo foo && + test_longest_prefix foobar:foo foo foo && + test_longest_prefix foo: bar "" && + test_longest_prefix :foo bar "" +' + +test_done diff --git a/test-string-list.c b/test-string-list.c new file mode 100644 index 0000000000..5e9631fe34 --- /dev/null +++ b/test-string-list.c @@ -0,0 +1,123 @@ +#include "cache.h" +#include "string-list.h" + +/* + * Parse an argument into a string list. arg should either be a + * ':'-separated list of strings, or "-" to indicate an empty string + * list (as opposed to "", which indicates a string list containing a + * single empty string). list->strdup_strings must be set. + */ +void parse_string_list(struct string_list *list, const char *arg) +{ + if (!strcmp(arg, "-")) + return; + + (void)string_list_split(list, arg, ':', -1); +} + +void write_list(const struct string_list *list) +{ + int i; + for (i = 0; i < list->nr; i++) + printf("[%d]: \"%s\"\n", i, list->items[i].string); +} + +void write_list_compact(const struct string_list *list) +{ + int i; + if (!list->nr) + printf("-\n"); + else { + printf("%s", list->items[0].string); + for (i = 1; i < list->nr; i++) + printf(":%s", list->items[i].string); + printf("\n"); + } +} + +int prefix_cb(struct string_list_item *item, void *cb_data) +{ + const char *prefix = (const char *)cb_data; + return !prefixcmp(item->string, prefix); +} + +int main(int argc, char **argv) +{ + if (argc == 5 && !strcmp(argv[1], "split")) { + struct string_list list = STRING_LIST_INIT_DUP; + int i; + const char *s = argv[2]; + int delim = *argv[3]; + int maxsplit = atoi(argv[4]); + + i = string_list_split(&list, s, delim, maxsplit); + printf("%d\n", i); + write_list(&list); + string_list_clear(&list, 0); + return 0; + } + + if (argc == 5 && !strcmp(argv[1], "split_in_place")) { + struct string_list list = STRING_LIST_INIT_NODUP; + int i; + char *s = xstrdup(argv[2]); + int delim = *argv[3]; + int maxsplit = atoi(argv[4]); + + i = string_list_split_in_place(&list, s, delim, maxsplit); + printf("%d\n", i); + write_list(&list); + string_list_clear(&list, 0); + free(s); + return 0; + } + + if (argc == 4 && !strcmp(argv[1], "filter")) { + /* + * Retain only the items that have the specified prefix. + * Arguments: list|- prefix + */ + struct string_list list = STRING_LIST_INIT_DUP; + const char *prefix = argv[3]; + + parse_string_list(&list, argv[2]); + filter_string_list(&list, 0, prefix_cb, (void *)prefix); + write_list_compact(&list); + string_list_clear(&list, 0); + return 0; + } + + if (argc == 3 && !strcmp(argv[1], "remove_duplicates")) { + struct string_list list = STRING_LIST_INIT_DUP; + + parse_string_list(&list, argv[2]); + string_list_remove_duplicates(&list, 0); + write_list_compact(&list); + string_list_clear(&list, 0); + return 0; + } + + if (argc == 4 && !strcmp(argv[1], "longest_prefix")) { + /* arguments: <colon-separated-prefixes>|- <string> */ + struct string_list prefixes = STRING_LIST_INIT_DUP; + int retval; + const char *prefix_string = argv[2]; + const char *string = argv[3]; + const char *match; + + parse_string_list(&prefixes, prefix_string); + match = string_list_longest_prefix(&prefixes, string); + if (match) { + printf("%s\n", match); + retval = 0; + } + else + retval = 1; + string_list_clear(&prefixes, 0); + return retval; + } + + fprintf(stderr, "%s: unknown function name: %s\n", argv[0], + argv[1] ? argv[1] : "(there was none)"); + return 1; +} |