summaryrefslogtreecommitdiff
path: root/tests/core
diff options
context:
space:
mode:
authorPatrick Steinhardt <ps@pks.im>2018-10-18 16:08:46 +0200
committerPatrick Steinhardt <ps@pks.im>2018-10-25 12:52:47 +0200
commit83e8a6b36acc67f2702cbbc7d4e334c7f7737719 (patch)
tree20a332ee3db97373acbcc8501b280ec1ade13ce7 /tests/core
parentf010b66bf693d22560fd1af584d325a8da42b416 (diff)
downloadlibgit2-83e8a6b36acc67f2702cbbc7d4e334c7f7737719.tar.gz
util: provide `git__memmem` function
Unfortunately, neither the `memmem` nor the `strnstr` functions are part of any C standard but are merely extensions of C that are implemented by e.g. glibc. Thus, there is no standardized way to search for a string in a block of memory with a limited size, and using `strstr` is to be considered unsafe in case where the buffer has not been sanitized. In fact, there are some uses of `strstr` in exactly that unsafe way in our codebase. Provide a new function `git__memmem` that implements the `memmem` semantics. That is in a given haystack of `n` bytes, search for the occurrence of a byte sequence of `m` bytes and return a pointer to the first occurrence. The implementation chosen is the "Not So Naive" algorithm from [1]. It was chosen as the implementation is comparably simple while still being reasonably efficient in most cases. Preprocessing happens in constant time and space, searching has a time complexity of O(n*m) with a slightly sub-linear average case. [1]: http://www-igm.univ-mlv.fr/~lecroq/string/
Diffstat (limited to 'tests/core')
-rw-r--r--tests/core/memmem.c46
1 files changed, 46 insertions, 0 deletions
diff --git a/tests/core/memmem.c b/tests/core/memmem.c
new file mode 100644
index 000000000..fd9986d01
--- /dev/null
+++ b/tests/core/memmem.c
@@ -0,0 +1,46 @@
+#include "clar_libgit2.h"
+
+static void assert_found(const char *haystack, const char *needle, size_t expected_pos)
+{
+ cl_assert_equal_p(git__memmem(haystack, haystack ? strlen(haystack) : 0,
+ needle, needle ? strlen(needle) : 0),
+ haystack + expected_pos);
+}
+
+static void assert_absent(const char *haystack, const char *needle)
+{
+ cl_assert_equal_p(git__memmem(haystack, haystack ? strlen(haystack) : 0,
+ needle, needle ? strlen(needle) : 0),
+ NULL);
+}
+
+void test_core_memmem__found(void)
+{
+ assert_found("a", "a", 0);
+ assert_found("ab", "a", 0);
+ assert_found("ba", "a", 1);
+ assert_found("aa", "a", 0);
+ assert_found("aab", "aa", 0);
+ assert_found("baa", "aa", 1);
+ assert_found("dabc", "abc", 1);
+ assert_found("abababc", "abc", 4);
+}
+
+void test_core_memmem__absent(void)
+{
+ assert_absent("a", "b");
+ assert_absent("a", "aa");
+ assert_absent("ba", "ab");
+ assert_absent("ba", "ab");
+ assert_absent("abc", "abcd");
+ assert_absent("abcabcabc", "bcac");
+}
+
+void test_core_memmem__edgecases(void)
+{
+ assert_absent(NULL, NULL);
+ assert_absent("a", NULL);
+ assert_absent(NULL, "a");
+ assert_absent("", "a");
+ assert_absent("a", "");
+}