summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2012-09-17 15:53:31 -0700
committerJunio C Hamano <gitster@pobox.com>2012-09-17 15:53:31 -0700
commit3b0b6b53d5fa8f7ea5d2d9ddb74b0f8186252d28 (patch)
tree7a08742c7be74c972b2ddbbf5848f0a805108290
parent992311cf860347df159081873fba099a94d463f4 (diff)
parent51f3145c2834c8f1d94c5b7cf3790baf74b9f521 (diff)
downloadgit-3b0b6b53d5fa8f7ea5d2d9ddb74b0f8186252d28.tar.gz
Merge branch 'mh/string-list'
* mh/string-list: api-string-list.txt: initialize the string_list the easy way string_list: add a function string_list_longest_prefix() string_list: add a new function, string_list_remove_duplicates() string_list: add a new function, filter_string_list() string_list: add two new functions for splitting strings string_list: add function string_list_append_nodup()
-rw-r--r--.gitignore1
-rw-r--r--Documentation/technical/api-string-list.txt68
-rw-r--r--Makefile1
-rw-r--r--string-list.c127
-rw-r--r--string-list.h71
-rwxr-xr-xt/t0063-string-list.sh121
-rw-r--r--test-string-list.c123
7 files changed, 502 insertions, 10 deletions
diff --git a/.gitignore b/.gitignore
index 68fe464090..a188a82bb1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -194,6 +194,7 @@
/test-run-command
/test-sha1
/test-sigchain
+/test-string-list
/test-subprocess
/test-svn-fe
/common-cmds.h
diff --git a/Documentation/technical/api-string-list.txt b/Documentation/technical/api-string-list.txt
index 5a0c14fceb..155ac8cb10 100644
--- a/Documentation/technical/api-string-list.txt
+++ b/Documentation/technical/api-string-list.txt
@@ -20,8 +20,9 @@ If you need something advanced, you can manually malloc() the `items`
member (you need this if you add things later) and you should set the
`nr` and `alloc` members in that case, too.
-. Adds new items to the list, using `string_list_append` or
- `string_list_insert`.
+. Adds new items to the list, using `string_list_append`,
+ `string_list_append_nodup`, `string_list_insert`,
+ `string_list_split`, and/or `string_list_split_in_place`.
. Can check if a string is in the list using `string_list_has_string` or
`unsorted_string_list_has_string` and get it from the list using
@@ -29,18 +30,23 @@ member (you need this if you add things later) and you should set the
. Can sort an unsorted list using `sort_string_list`.
+. Can remove duplicate items from a sorted list using
+ `string_list_remove_duplicates`.
+
. Can remove individual items of an unsorted list using
`unsorted_string_list_delete_item`.
+. Can remove items not matching a criterion from a sorted or unsorted
+ list using `filter_string_list`.
+
. Finally it should free the list using `string_list_clear`.
Example:
----
-struct string_list list;
+struct string_list list = STRING_LIST_INIT_NODUP;
int i;
-memset(&list, 0, sizeof(struct string_list));
string_list_append(&list, "foo");
string_list_append(&list, "bar");
for (i = 0; i < list.nr; i++)
@@ -60,6 +66,22 @@ Functions
* General ones (works with sorted and unsorted lists as well)
+`filter_string_list`::
+
+ Apply a function to each item in a list, retaining only the
+ items for which the function returns true. If free_util is
+ true, call free() on the util members of any items that have
+ to be deleted. Preserve the order of the items that are
+ retained.
+
+`string_list_longest_prefix`::
+
+ Return the longest string within a string_list that is a
+ prefix (in the sense of prefixcmp()) of the specified string,
+ or NULL if no such prefix exists. This function does not
+ require the string_list to be sorted (it does a linear
+ search).
+
`print_string_list`::
Dump a string_list to stdout, useful mainly for debugging purposes. It
@@ -96,11 +118,28 @@ write `string_list_insert(...)->util = ...;`.
Look up a given string in the string_list, returning the containing
string_list_item. If the string is not found, NULL is returned.
+`string_list_remove_duplicates`::
+
+ Remove all but the first of consecutive entries that have the
+ same string value. If free_util is true, call free() on the
+ util members of any items that have to be deleted.
+
* Functions for unsorted lists only
`string_list_append`::
- Append a new string to the end of the string_list.
+ Append a new string to the end of the string_list. If
+ `strdup_string` is set, then the string argument is copied;
+ otherwise the new `string_list_entry` refers to the input
+ string.
+
+`string_list_append_nodup`::
+
+ Append a new string to the end of the string_list. The new
+ `string_list_entry` always refers to the input string, even if
+ `strdup_string` is set. This function can be used to hand
+ ownership of a malloc()ed string to a `string_list` that has
+ `strdup_string` set.
`sort_string_list`::
@@ -124,6 +163,25 @@ counterpart for sorted lists, which performs a binary search.
is set. The third parameter controls if the `util` pointer of the
items should be freed or not.
+`string_list_split`::
+`string_list_split_in_place`::
+
+ Split a string into substrings on a delimiter character and
+ append the substrings to a `string_list`. If `maxsplit` is
+ non-negative, then split at most `maxsplit` times. Return the
+ number of substrings appended to the list.
++
+`string_list_split` requires a `string_list` that has `strdup_strings`
+set to true; it leaves the input string untouched and makes copies of
+the substrings in newly-allocated memory.
+`string_list_split_in_place` requires a `string_list` that has
+`strdup_strings` set to false; it splits the input string in place,
+overwriting the delimiter characters with NULs and creating new
+string_list_items that point into the original string (the original
+string must therefore not be modified or freed while the `string_list`
+is in use).
+
+
Data structures
---------------
diff --git a/Makefile b/Makefile
index 56301dc0a8..a49d1db288 100644
--- a/Makefile
+++ b/Makefile
@@ -509,6 +509,7 @@ TEST_PROGRAMS_NEED_X += test-run-command
TEST_PROGRAMS_NEED_X += test-scrap-cache-tree
TEST_PROGRAMS_NEED_X += test-sha1
TEST_PROGRAMS_NEED_X += test-sigchain
+TEST_PROGRAMS_NEED_X += test-string-list
TEST_PROGRAMS_NEED_X += test-subprocess
TEST_PROGRAMS_NEED_X += test-svn-fe
diff --git a/string-list.c b/string-list.c
index d9810aba42..c54b816244 100644
--- a/string-list.c
+++ b/string-list.c
@@ -92,6 +92,23 @@ struct string_list_item *string_list_lookup(struct string_list *list, const char
return list->items + i;
}
+void string_list_remove_duplicates(struct string_list *list, int free_util)
+{
+ if (list->nr > 1) {
+ int src, dst;
+ for (src = dst = 1; src < list->nr; src++) {
+ if (!strcmp(list->items[dst - 1].string, list->items[src].string)) {
+ if (list->strdup_strings)
+ free(list->items[src].string);
+ if (free_util)
+ free(list->items[src].util);
+ } else
+ list->items[dst++] = list->items[src];
+ }
+ list->nr = dst;
+ }
+}
+
int for_each_string_list(struct string_list *list,
string_list_each_func_t fn, void *cb_data)
{
@@ -102,6 +119,43 @@ int for_each_string_list(struct string_list *list,
return ret;
}
+void filter_string_list(struct string_list *list, int free_util,
+ string_list_each_func_t want, void *cb_data)
+{
+ int src, dst = 0;
+ for (src = 0; src < list->nr; src++) {
+ if (want(&list->items[src], cb_data)) {
+ list->items[dst++] = list->items[src];
+ } else {
+ if (list->strdup_strings)
+ free(list->items[src].string);
+ if (free_util)
+ free(list->items[src].util);
+ }
+ }
+ list->nr = dst;
+}
+
+char *string_list_longest_prefix(const struct string_list *prefixes,
+ const char *string)
+{
+ int i, max_len = -1;
+ char *retval = NULL;
+
+ for (i = 0; i < prefixes->nr; i++) {
+ char *prefix = prefixes->items[i].string;
+ if (!prefixcmp(string, prefix)) {
+ int len = strlen(prefix);
+ if (len > max_len) {
+ retval = prefix;
+ max_len = len;
+ }
+ }
+ }
+
+ return retval;
+}
+
void string_list_clear(struct string_list *list, int free_util)
{
if (list->items) {
@@ -148,13 +202,23 @@ void print_string_list(const struct string_list *p, const char *text)
printf("%s:%p\n", p->items[i].string, p->items[i].util);
}
-struct string_list_item *string_list_append(struct string_list *list, const char *string)
+struct string_list_item *string_list_append_nodup(struct string_list *list,
+ char *string)
{
+ struct string_list_item *retval;
ALLOC_GROW(list->items, list->nr + 1, list->alloc);
- list->items[list->nr].string =
- list->strdup_strings ? xstrdup(string) : (char *)string;
- list->items[list->nr].util = NULL;
- return list->items + list->nr++;
+ retval = &list->items[list->nr++];
+ retval->string = string;
+ retval->util = NULL;
+ return retval;
+}
+
+struct string_list_item *string_list_append(struct string_list *list,
+ const char *string)
+{
+ return string_list_append_nodup(
+ list,
+ list->strdup_strings ? xstrdup(string) : (char *)string);
}
static int cmp_items(const void *a, const void *b)
@@ -194,3 +258,56 @@ void unsorted_string_list_delete_item(struct string_list *list, int i, int free_
list->items[i] = list->items[list->nr-1];
list->nr--;
}
+
+int string_list_split(struct string_list *list, const char *string,
+ int delim, int maxsplit)
+{
+ int count = 0;
+ const char *p = string, *end;
+
+ if (!list->strdup_strings)
+ die("internal error in string_list_split(): "
+ "list->strdup_strings must be set");
+ for (;;) {
+ count++;
+ if (maxsplit >= 0 && count > maxsplit) {
+ string_list_append(list, p);
+ return count;
+ }
+ end = strchr(p, delim);
+ if (end) {
+ string_list_append_nodup(list, xmemdupz(p, end - p));
+ p = end + 1;
+ } else {
+ string_list_append(list, p);
+ return count;
+ }
+ }
+}
+
+int string_list_split_in_place(struct string_list *list, char *string,
+ int delim, int maxsplit)
+{
+ int count = 0;
+ char *p = string, *end;
+
+ if (list->strdup_strings)
+ die("internal error in string_list_split_in_place(): "
+ "list->strdup_strings must not be set");
+ for (;;) {
+ count++;
+ if (maxsplit >= 0 && count > maxsplit) {
+ string_list_append(list, p);
+ return count;
+ }
+ end = strchr(p, delim);
+ if (end) {
+ *end = '\0';
+ string_list_append(list, p);
+ p = end + 1;
+ } else {
+ string_list_append(list, p);
+ return count;
+ }
+ }
+}
diff --git a/string-list.h b/string-list.h
index 0684cb73bf..5efd07b44e 100644
--- a/string-list.h
+++ b/string-list.h
@@ -29,6 +29,24 @@ int for_each_string_list(struct string_list *list,
#define for_each_string_list_item(item,list) \
for (item = (list)->items; item < (list)->items + (list)->nr; ++item)
+/*
+ * Apply want to each item in list, retaining only the ones for which
+ * the function returns true. If free_util is true, call free() on
+ * the util members of any items that have to be deleted. Preserve
+ * the order of the items that are retained.
+ */
+void filter_string_list(struct string_list *list, int free_util,
+ string_list_each_func_t want, void *cb_data);
+
+/*
+ * Return the longest string in prefixes that is a prefix (in the
+ * sense of prefixcmp()) of string, or NULL if no such prefix exists.
+ * This function does not require the string_list to be sorted (it
+ * does a linear search).
+ */
+char *string_list_longest_prefix(const struct string_list *prefixes, const char *string);
+
+
/* Use these functions only on sorted lists: */
int string_list_has_string(const struct string_list *list, const char *string);
int string_list_find_insert_index(const struct string_list *list, const char *string,
@@ -38,11 +56,64 @@ struct string_list_item *string_list_insert_at_index(struct string_list *list,
int insert_at, const char *string);
struct string_list_item *string_list_lookup(struct string_list *list, const char *string);
+/*
+ * Remove all but the first of consecutive entries with the same
+ * string value. If free_util is true, call free() on the util
+ * members of any items that have to be deleted.
+ */
+void string_list_remove_duplicates(struct string_list *sorted_list, int free_util);
+
+
/* Use these functions only on unsorted lists: */
+
+/*
+ * Add string to the end of list. If list->strdup_string is set, then
+ * string is copied; otherwise the new string_list_entry refers to the
+ * input string.
+ */
struct string_list_item *string_list_append(struct string_list *list, const char *string);
+
+/*
+ * Like string_list_append(), except string is never copied. When
+ * list->strdup_strings is set, this function can be used to hand
+ * ownership of a malloc()ed string to list without making an extra
+ * copy.
+ */
+struct string_list_item *string_list_append_nodup(struct string_list *list, char *string);
+
void sort_string_list(struct string_list *list);
int unsorted_string_list_has_string(struct string_list *list, const char *string);
struct string_list_item *unsorted_string_list_lookup(struct string_list *list,
const char *string);
+
void unsorted_string_list_delete_item(struct string_list *list, int i, int free_util);
+
+/*
+ * Split string into substrings on character delim and append the
+ * substrings to list. The input string is not modified.
+ * list->strdup_strings must be set, as new memory needs to be
+ * allocated to hold the substrings. If maxsplit is non-negative,
+ * then split at most maxsplit times. Return the number of substrings
+ * appended to list.
+ *
+ * Examples:
+ * string_list_split(l, "foo:bar:baz", ':', -1) -> ["foo", "bar", "baz"]
+ * string_list_split(l, "foo:bar:baz", ':', 0) -> ["foo:bar:baz"]
+ * string_list_split(l, "foo:bar:baz", ':', 1) -> ["foo", "bar:baz"]
+ * string_list_split(l, "foo:bar:", ':', -1) -> ["foo", "bar", ""]
+ * string_list_split(l, "", ':', -1) -> [""]
+ * string_list_split(l, ":", ':', -1) -> ["", ""]
+ */
+int string_list_split(struct string_list *list, const char *string,
+ int delim, int maxsplit);
+
+/*
+ * Like string_list_split(), except that string is split in-place: the
+ * delimiter characters in string are overwritten with NULs, and the
+ * new string_list_items point into string (which therefore must not
+ * be modified or freed while the string_list is in use).
+ * list->strdup_strings must *not* be set.
+ */
+int string_list_split_in_place(struct string_list *list, char *string,
+ int delim, int maxsplit);
#endif /* STRING_LIST_H */
diff --git a/t/t0063-string-list.sh b/t/t0063-string-list.sh
new file mode 100755
index 0000000000..41c8826a74
--- /dev/null
+++ b/t/t0063-string-list.sh
@@ -0,0 +1,121 @@
+#!/bin/sh
+#
+# Copyright (c) 2012 Michael Haggerty
+#
+
+test_description='Test string list functionality'
+
+. ./test-lib.sh
+
+test_split () {
+ cat >expected &&
+ test_expect_success "split $1 at $2, max $3" "
+ test-string-list split '$1' '$2' '$3' >actual &&
+ test_cmp expected actual &&
+ test-string-list split_in_place '$1' '$2' '$3' >actual &&
+ test_cmp expected actual
+ "
+}
+
+test_longest_prefix () {
+ test "$(test-string-list longest_prefix "$1" "$2")" = "$3"
+}
+
+test_no_longest_prefix () {
+ test_must_fail test-string-list longest_prefix "$1" "$2"
+}
+
+test_split "foo:bar:baz" ":" "-1" <<EOF
+3
+[0]: "foo"
+[1]: "bar"
+[2]: "baz"
+EOF
+
+test_split "foo:bar:baz" ":" "0" <<EOF
+1
+[0]: "foo:bar:baz"
+EOF
+
+test_split "foo:bar:baz" ":" "1" <<EOF
+2
+[0]: "foo"
+[1]: "bar:baz"
+EOF
+
+test_split "foo:bar:baz" ":" "2" <<EOF
+3
+[0]: "foo"
+[1]: "bar"
+[2]: "baz"
+EOF
+
+test_split "foo:bar:" ":" "-1" <<EOF
+3
+[0]: "foo"
+[1]: "bar"
+[2]: ""
+EOF
+
+test_split "" ":" "-1" <<EOF
+1
+[0]: ""
+EOF
+
+test_split ":" ":" "-1" <<EOF
+2
+[0]: ""
+[1]: ""
+EOF
+
+test_expect_success "test filter_string_list" '
+ test "x-" = "x$(test-string-list filter - y)" &&
+ test "x-" = "x$(test-string-list filter no y)" &&
+ test yes = "$(test-string-list filter yes y)" &&
+ test yes = "$(test-string-list filter no:yes y)" &&
+ test yes = "$(test-string-list filter yes:no y)" &&
+ test y1:y2 = "$(test-string-list filter y1:y2 y)" &&
+ test y2:y1 = "$(test-string-list filter y2:y1 y)" &&
+ test "x-" = "x$(test-string-list filter x1:x2 y)"
+'
+
+test_expect_success "test remove_duplicates" '
+ test "x-" = "x$(test-string-list remove_duplicates -)" &&
+ test "x" = "x$(test-string-list remove_duplicates "")" &&
+ test a = "$(test-string-list remove_duplicates a)" &&
+ test a = "$(test-string-list remove_duplicates a:a)" &&
+ test a = "$(test-string-list remove_duplicates a:a:a:a:a)" &&
+ test a:b = "$(test-string-list remove_duplicates a:b)" &&
+ test a:b = "$(test-string-list remove_duplicates a:a:b)" &&
+ test a:b = "$(test-string-list remove_duplicates a:b:b)" &&
+ test a:b:c = "$(test-string-list remove_duplicates a:b:c)" &&
+ test a:b:c = "$(test-string-list remove_duplicates a:a:b:c)" &&
+ test a:b:c = "$(test-string-list remove_duplicates a:b:b:c)" &&
+ test a:b:c = "$(test-string-list remove_duplicates a:b:c:c)" &&
+ test a:b:c = "$(test-string-list remove_duplicates a:a:b:b:c:c)" &&
+ test a:b:c = "$(test-string-list remove_duplicates a:a:a:b:b:b:c:c:c)"
+'
+
+test_expect_success "test longest_prefix" '
+ test_no_longest_prefix - '' &&
+ test_no_longest_prefix - x &&
+ test_longest_prefix "" x "" &&
+ test_longest_prefix x x x &&
+ test_longest_prefix "" foo "" &&
+ test_longest_prefix : foo "" &&
+ test_longest_prefix f foo f &&
+ test_longest_prefix foo foobar foo &&
+ test_longest_prefix foo foo foo &&
+ test_no_longest_prefix bar foo &&
+ test_no_longest_prefix bar:bar foo &&
+ test_no_longest_prefix foobar foo &&
+ test_longest_prefix foo:bar foo foo &&
+ test_longest_prefix foo:bar bar bar &&
+ test_longest_prefix foo::bar foo foo &&
+ test_longest_prefix foo:foobar foo foo &&
+ test_longest_prefix foobar:foo foo foo &&
+ test_longest_prefix foo: bar "" &&
+ test_longest_prefix :foo bar ""
+'
+
+test_done
diff --git a/test-string-list.c b/test-string-list.c
new file mode 100644
index 0000000000..5e9631fe34
--- /dev/null
+++ b/test-string-list.c
@@ -0,0 +1,123 @@
+#include "cache.h"
+#include "string-list.h"
+
+/*
+ * Parse an argument into a string list. arg should either be a
+ * ':'-separated list of strings, or "-" to indicate an empty string
+ * list (as opposed to "", which indicates a string list containing a
+ * single empty string). list->strdup_strings must be set.
+ */
+void parse_string_list(struct string_list *list, const char *arg)
+{
+ if (!strcmp(arg, "-"))
+ return;
+
+ (void)string_list_split(list, arg, ':', -1);
+}
+
+void write_list(const struct string_list *list)
+{
+ int i;
+ for (i = 0; i < list->nr; i++)
+ printf("[%d]: \"%s\"\n", i, list->items[i].string);
+}
+
+void write_list_compact(const struct string_list *list)
+{
+ int i;
+ if (!list->nr)
+ printf("-\n");
+ else {
+ printf("%s", list->items[0].string);
+ for (i = 1; i < list->nr; i++)
+ printf(":%s", list->items[i].string);
+ printf("\n");
+ }
+}
+
+int prefix_cb(struct string_list_item *item, void *cb_data)
+{
+ const char *prefix = (const char *)cb_data;
+ return !prefixcmp(item->string, prefix);
+}
+
+int main(int argc, char **argv)
+{
+ if (argc == 5 && !strcmp(argv[1], "split")) {
+ struct string_list list = STRING_LIST_INIT_DUP;
+ int i;
+ const char *s = argv[2];
+ int delim = *argv[3];
+ int maxsplit = atoi(argv[4]);
+
+ i = string_list_split(&list, s, delim, maxsplit);
+ printf("%d\n", i);
+ write_list(&list);
+ string_list_clear(&list, 0);
+ return 0;
+ }
+
+ if (argc == 5 && !strcmp(argv[1], "split_in_place")) {
+ struct string_list list = STRING_LIST_INIT_NODUP;
+ int i;
+ char *s = xstrdup(argv[2]);
+ int delim = *argv[3];
+ int maxsplit = atoi(argv[4]);
+
+ i = string_list_split_in_place(&list, s, delim, maxsplit);
+ printf("%d\n", i);
+ write_list(&list);
+ string_list_clear(&list, 0);
+ free(s);
+ return 0;
+ }
+
+ if (argc == 4 && !strcmp(argv[1], "filter")) {
+ /*
+ * Retain only the items that have the specified prefix.
+ * Arguments: list|- prefix
+ */
+ struct string_list list = STRING_LIST_INIT_DUP;
+ const char *prefix = argv[3];
+
+ parse_string_list(&list, argv[2]);
+ filter_string_list(&list, 0, prefix_cb, (void *)prefix);
+ write_list_compact(&list);
+ string_list_clear(&list, 0);
+ return 0;
+ }
+
+ if (argc == 3 && !strcmp(argv[1], "remove_duplicates")) {
+ struct string_list list = STRING_LIST_INIT_DUP;
+
+ parse_string_list(&list, argv[2]);
+ string_list_remove_duplicates(&list, 0);
+ write_list_compact(&list);
+ string_list_clear(&list, 0);
+ return 0;
+ }
+
+ if (argc == 4 && !strcmp(argv[1], "longest_prefix")) {
+ /* arguments: <colon-separated-prefixes>|- <string> */
+ struct string_list prefixes = STRING_LIST_INIT_DUP;
+ int retval;
+ const char *prefix_string = argv[2];
+ const char *string = argv[3];
+ const char *match;
+
+ parse_string_list(&prefixes, prefix_string);
+ match = string_list_longest_prefix(&prefixes, string);
+ if (match) {
+ printf("%s\n", match);
+ retval = 0;
+ }
+ else
+ retval = 1;
+ string_list_clear(&prefixes, 0);
+ return retval;
+ }
+
+ fprintf(stderr, "%s: unknown function name: %s\n", argv[0],
+ argv[1] ? argv[1] : "(there was none)");
+ return 1;
+}