summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEdward Thomson <ethomson@microsoft.com>2015-01-14 10:19:28 -0600
committerEdward Thomson <ethomson@microsoft.com>2015-01-14 10:19:28 -0600
commit85880693d53d401aac71705fe55c406b82de6f0d (patch)
tree0707c79af3eda662b20410aeb3dee2142e14066e
parente6911dff9d6483a6afae4d1138151a1bc9116354 (diff)
parentb3837d4d2bbcae16a32deb732afa0e7e2e9b274e (diff)
downloadlibgit2-85880693d53d401aac71705fe55c406b82de6f0d.tar.gz
Merge branch 'pr/2740'
-rw-r--r--include/git2/sys/hashsig.h71
-rw-r--r--src/diff_tform.c37
-rw-r--r--src/hashsig.c27
-rw-r--r--src/merge.c8
-rw-r--r--tests/clar_libgit2.c5
-rw-r--r--tests/clar_libgit2.h1
-rw-r--r--tests/diff/rename.c54
7 files changed, 118 insertions, 85 deletions
diff --git a/include/git2/sys/hashsig.h b/include/git2/sys/hashsig.h
index 2bc32f394..09c19aec0 100644
--- a/include/git2/sys/hashsig.h
+++ b/include/git2/sys/hashsig.h
@@ -12,33 +12,52 @@
GIT_BEGIN_DECL
/**
- * Similarity signature of line hashes for a buffer
+ * Similarity signature of arbitrary text content based on line hashes
*/
typedef struct git_hashsig git_hashsig;
/**
- * Options for hashsig calculation
+ * Options for hashsig computation
+ *
+ * The options GIT_HASHSIG_NORMAL, GIT_HASHSIG_IGNORE_WHITESPACE,
+ * GIT_HASHSIG_SMART_WHITESPACE are exclusive and should not be combined.
*/
typedef enum {
- GIT_HASHSIG_NORMAL = 0, /* use all data */
- GIT_HASHSIG_IGNORE_WHITESPACE = 1, /* ignore whitespace */
- GIT_HASHSIG_SMART_WHITESPACE = 2, /* ignore \r and all space after \n */
+ /**
+ * Use all data
+ */
+ GIT_HASHSIG_NORMAL = 0,
+
+ /**
+ * Ignore whitespace
+ */
+ GIT_HASHSIG_IGNORE_WHITESPACE = (1 << 0),
+
+ /**
+ * Ignore \r and all space after \n
+ */
+ GIT_HASHSIG_SMART_WHITESPACE = (1 << 1),
+
+ /**
+ * Allow hashing of small files
+ */
+ GIT_HASHSIG_ALLOW_SMALL_FILES = (1 << 2)
} git_hashsig_option_t;
/**
- * Build a similarity signature for a buffer
- *
- * If you have passed a whitespace-ignoring buffer, then the whitespace
- * will be removed from the buffer while it is being processed, modifying
- * the buffer in place. Sorry about that!
+ * Compute a similarity signature for a text buffer
*
- * This will return an error if the buffer doesn't contain enough data to
- * compute a valid signature.
+ * If you have passed the option GIT_HASHSIG_IGNORE_WHITESPACE, then the
+ * whitespace will be removed from the buffer while it is being processed,
+ * modifying the buffer in place. Sorry about that!
*
- * @param out The array of hashed runs representing the file content
- * @param buf The contents of the file to hash
- * @param buflen The length of the data at `buf`
- * @param generate_pairwise_hashes Should pairwise runs be hashed
+ * @param out The computed similarity signature.
+ * @param buf The input buffer.
+ * @param buflen The input buffer size.
+ * @param opts The signature computation options (see above).
+ * @return 0 on success, GIT_EBUFS if the buffer doesn't contain enough data to
+ * compute a valid signature (unless GIT_HASHSIG_ALLOW_SMALL_FILES is set), or
+ * error code.
*/
GIT_EXTERN(int) git_hashsig_create(
git_hashsig **out,
@@ -47,13 +66,17 @@ GIT_EXTERN(int) git_hashsig_create(
git_hashsig_option_t opts);
/**
- * Build a similarity signature from a file
+ * Compute a similarity signature for a text file
*
* This walks through the file, only loading a maximum of 4K of file data at
- * a time. Otherwise, it acts just like `git_hashsig_create`.
+ * a time. Otherwise, it acts just like `git_hashsig_create`.
*
- * This will return an error if the file doesn't contain enough data to
- * compute a valid signature.
+ * @param out The computed similarity signature.
+ * @param path The path to the input file.
+ * @param opts The signature computation options (see above).
+ * @return 0 on success, GIT_EBUFS if the buffer doesn't contain enough data to
+ * compute a valid signature (unless GIT_HASHSIG_ALLOW_SMALL_FILES is set), or
+ * error code.
*/
GIT_EXTERN(int) git_hashsig_create_fromfile(
git_hashsig **out,
@@ -62,13 +85,17 @@ GIT_EXTERN(int) git_hashsig_create_fromfile(
/**
* Release memory for a content similarity signature
+ *
+ * @param sig The similarity signature to free.
*/
GIT_EXTERN(void) git_hashsig_free(git_hashsig *sig);
/**
- * Measure similarity between two files
+ * Measure similarity score between two similarity signatures
*
- * @return <0 for error, [0 to 100] as similarity score
+ * @param a The first similarity signature to compare.
+ * @param b The second similarity signature to compare.
+ * @return [0 to 100] on success as the similarity score, or error code.
*/
GIT_EXTERN(int) git_hashsig_compare(
const git_hashsig *a,
diff --git a/src/diff_tform.c b/src/diff_tform.c
index d576317f0..9133a9b14 100644
--- a/src/diff_tform.c
+++ b/src/diff_tform.c
@@ -219,34 +219,18 @@ int git_diff_find_similar__hashsig_for_file(
void **out, const git_diff_file *f, const char *path, void *p)
{
git_hashsig_option_t opt = (git_hashsig_option_t)(intptr_t)p;
- int error = 0;
GIT_UNUSED(f);
- error = git_hashsig_create_fromfile((git_hashsig **)out, path, opt);
-
- if (error == GIT_EBUFS) {
- error = 0;
- giterr_clear();
- }
-
- return error;
+ return git_hashsig_create_fromfile((git_hashsig **)out, path, opt);
}
int git_diff_find_similar__hashsig_for_buf(
void **out, const git_diff_file *f, const char *buf, size_t len, void *p)
{
git_hashsig_option_t opt = (git_hashsig_option_t)(intptr_t)p;
- int error = 0;
GIT_UNUSED(f);
- error = git_hashsig_create((git_hashsig **)out, buf, len, opt);
-
- if (error == GIT_EBUFS) {
- error = 0;
- giterr_clear();
- }
-
- return error;
+ return git_hashsig_create((git_hashsig **)out, buf, len, opt);
}
void git_diff_find_similar__hashsig_free(void *sig, void *payload)
@@ -258,8 +242,14 @@ void git_diff_find_similar__hashsig_free(void *sig, void *payload)
int git_diff_find_similar__calc_similarity(
int *score, void *siga, void *sigb, void *payload)
{
+ int error;
+
GIT_UNUSED(payload);
- *score = git_hashsig_compare(siga, sigb);
+ error = git_hashsig_compare(siga, sigb);
+ if (error < 0)
+ return error;
+
+ *score = error;
return 0;
}
@@ -273,6 +263,7 @@ static int normalize_find_opts(
const git_diff_find_options *given)
{
git_config *cfg = NULL;
+ git_hashsig_option_t hashsig_opts;
GITERR_CHECK_VERSION(given, GIT_DIFF_FIND_OPTIONS_VERSION, "git_diff_find_options");
@@ -354,11 +345,13 @@ static int normalize_find_opts(
opts->metric->similarity = git_diff_find_similar__calc_similarity;
if (opts->flags & GIT_DIFF_FIND_IGNORE_WHITESPACE)
- opts->metric->payload = (void *)GIT_HASHSIG_IGNORE_WHITESPACE;
+ hashsig_opts = GIT_HASHSIG_IGNORE_WHITESPACE;
else if (opts->flags & GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE)
- opts->metric->payload = (void *)GIT_HASHSIG_NORMAL;
+ hashsig_opts = GIT_HASHSIG_NORMAL;
else
- opts->metric->payload = (void *)GIT_HASHSIG_SMART_WHITESPACE;
+ hashsig_opts = GIT_HASHSIG_SMART_WHITESPACE;
+ hashsig_opts |= GIT_HASHSIG_ALLOW_SMALL_FILES;
+ opts->metric->payload = (void *)hashsig_opts;
}
return 0;
diff --git a/src/hashsig.c b/src/hashsig.c
index a6d5f2041..0ddfed902 100644
--- a/src/hashsig.c
+++ b/src/hashsig.c
@@ -35,7 +35,6 @@ struct git_hashsig {
hashsig_heap mins;
hashsig_heap maxs;
git_hashsig_option_t opt;
- int considered;
};
#define HEAP_LCHILD_OF(I) (((I)<<1)+1)
@@ -135,25 +134,23 @@ static void hashsig_in_progress_init(
{
int i;
- switch (sig->opt) {
- case GIT_HASHSIG_IGNORE_WHITESPACE:
+ /* no more than one can be set */
+ assert(!(sig->opt & GIT_HASHSIG_IGNORE_WHITESPACE) ||
+ !(sig->opt & GIT_HASHSIG_SMART_WHITESPACE));
+
+ if (sig->opt & GIT_HASHSIG_IGNORE_WHITESPACE) {
for (i = 0; i < 256; ++i)
prog->ignore_ch[i] = git__isspace_nonlf(i);
prog->use_ignores = 1;
- break;
- case GIT_HASHSIG_SMART_WHITESPACE:
+ } else if (sig->opt & GIT_HASHSIG_SMART_WHITESPACE) {
for (i = 0; i < 256; ++i)
prog->ignore_ch[i] = git__isspace(i);
prog->use_ignores = 1;
- break;
- default:
+ } else {
memset(prog, 0, sizeof(*prog));
- break;
}
}
-#define HASHSIG_IN_PROGRESS_INIT { 1 }
-
static int hashsig_add_hashes(
git_hashsig *sig,
const uint8_t *data,
@@ -174,12 +171,13 @@ static int hashsig_add_hashes(
if (use_ignores)
for (; scan < end && git__isspace_nonlf(ch); ch = *scan)
++scan;
- else if (sig->opt != GIT_HASHSIG_NORMAL)
+ else if (sig->opt &
+ (GIT_HASHSIG_IGNORE_WHITESPACE | GIT_HASHSIG_SMART_WHITESPACE))
for (; scan < end && ch == '\r'; ch = *scan)
++scan;
/* peek at next character to decide what to do next */
- if (sig->opt == GIT_HASHSIG_SMART_WHITESPACE)
+ if (sig->opt & GIT_HASHSIG_SMART_WHITESPACE)
use_ignores = (ch == '\n');
if (scan >= end)
@@ -198,8 +196,6 @@ static int hashsig_add_hashes(
hashsig_heap_insert(&sig->mins, (hashsig_t)state);
hashsig_heap_insert(&sig->maxs, (hashsig_t)state);
- sig->considered++;
-
while (scan < end && (*scan == '\n' || !*scan))
++scan;
}
@@ -212,7 +208,8 @@ static int hashsig_add_hashes(
static int hashsig_finalize_hashes(git_hashsig *sig)
{
- if (sig->mins.size < HASHSIG_HEAP_MIN_SIZE) {
+ if (sig->mins.size < HASHSIG_HEAP_MIN_SIZE &&
+ !(sig->opt & GIT_HASHSIG_ALLOW_SMALL_FILES)) {
giterr_set(GITERR_INVALID,
"File too small for similarity signature calculation");
return GIT_EBUFS;
diff --git a/src/merge.c b/src/merge.c
index 7031efcc7..7c38b5692 100644
--- a/src/merge.c
+++ b/src/merge.c
@@ -1612,13 +1612,7 @@ static int merge_normalize_opts(
opts->metric->buffer_signature = git_diff_find_similar__hashsig_for_buf;
opts->metric->free_signature = git_diff_find_similar__hashsig_free;
opts->metric->similarity = git_diff_find_similar__calc_similarity;
-
- if (opts->flags & GIT_DIFF_FIND_IGNORE_WHITESPACE)
- opts->metric->payload = (void *)GIT_HASHSIG_IGNORE_WHITESPACE;
- else if (opts->flags & GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE)
- opts->metric->payload = (void *)GIT_HASHSIG_NORMAL;
- else
- opts->metric->payload = (void *)GIT_HASHSIG_SMART_WHITESPACE;
+ opts->metric->payload = (void *)GIT_HASHSIG_SMART_WHITESPACE;
}
return 0;
diff --git a/tests/clar_libgit2.c b/tests/clar_libgit2.c
index 10f37ad5c..a8a8ba6ab 100644
--- a/tests/clar_libgit2.c
+++ b/tests/clar_libgit2.c
@@ -53,6 +53,11 @@ void cl_git_rewritefile(const char *path, const char *content)
cl_git_write2file(path, content, 0, O_WRONLY | O_CREAT | O_TRUNC, 0644);
}
+void cl_git_rmfile(const char *filename)
+{
+ cl_must_pass(p_unlink(filename));
+}
+
#ifdef GIT_WIN32
#include "win32/utf-conv.h"
diff --git a/tests/clar_libgit2.h b/tests/clar_libgit2.h
index f51554293..e1d62c820 100644
--- a/tests/clar_libgit2.h
+++ b/tests/clar_libgit2.h
@@ -112,6 +112,7 @@ void cl_git_append2file(const char *filename, const char *new_content);
void cl_git_rewritefile(const char *filename, const char *new_content);
void cl_git_write2file(const char *path, const char *data,
size_t datalen, int flags, unsigned int mode);
+void cl_git_rmfile(const char *filename);
bool cl_toggle_filemode(const char *filename);
bool cl_is_chmod_supported(void);
diff --git a/tests/diff/rename.c b/tests/diff/rename.c
index 4bc3eb54c..28e0bf149 100644
--- a/tests/diff/rename.c
+++ b/tests/diff/rename.c
@@ -381,37 +381,53 @@ void test_diff_rename__not_exact_match(void)
git_tree_free(new_tree);
}
-void test_diff_rename__handles_small_files(void)
+void test_diff_rename__test_small_files(void)
{
- const char *tree_sha = "2bc7f351d20b53f1c72c16c4b036e491c478c49a";
git_index *index;
- git_tree *tree;
+ git_reference *head_reference;
+ git_commit *head_commit;
+ git_tree *head_tree;
+ git_tree *commit_tree;
+ git_signature *signature;
git_diff *diff;
- git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
- git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
+ git_oid oid;
+ const git_diff_delta *delta;
+ git_diff_options diff_options = GIT_DIFF_OPTIONS_INIT;
+ git_diff_find_options find_options = GIT_DIFF_FIND_OPTIONS_INIT;
cl_git_pass(git_repository_index(&index, g_repo));
- tree = resolve_commit_oid_to_tree(g_repo, tree_sha);
+ cl_git_mkfile("renames/small.txt", "Hello World!\n");
+ cl_git_pass(git_index_add_bypath(index, "small.txt"));
- cl_git_rewritefile("renames/songof7cities.txt", "single line\n");
- cl_git_pass(git_index_add_bypath(index, "songof7cities.txt"));
+ cl_git_pass(git_repository_head(&head_reference, g_repo));
+ cl_git_pass(git_reference_peel((git_object**)&head_commit, head_reference, GIT_OBJ_COMMIT));
+ cl_git_pass(git_commit_tree(&head_tree, head_commit));
+ cl_git_pass(git_index_write_tree(&oid, index));
+ cl_git_pass(git_tree_lookup(&commit_tree, g_repo, &oid));
+ cl_git_pass(git_signature_new(&signature, "Rename", "rename@example.com", 1404157834, 0));
+ cl_git_pass(git_commit_create(&oid, g_repo, "HEAD", signature, signature, NULL, "Test commit", commit_tree, 1, (const git_commit**)&head_commit));
- cl_git_rewritefile("renames/untimely.txt", "untimely\n");
- cl_git_pass(git_index_add_bypath(index, "untimely.txt"));
+ cl_git_mkfile("renames/copy.txt", "Hello World!\n");
+ cl_git_rmfile("renames/small.txt");
- /* Tests that we can invoke find_similar on small files
- * and that the GIT_EBUFS (too small) error code is not
- * propagated to the caller.
- */
- cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
+ diff_options.flags = GIT_DIFF_INCLUDE_UNTRACKED;
+ cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, commit_tree, &diff_options));
+ find_options.flags = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_FOR_UNTRACKED;
+ cl_git_pass(git_diff_find_similar(diff, &find_options));
- opts.flags = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES |
- GIT_DIFF_FIND_AND_BREAK_REWRITES;
- cl_git_pass(git_diff_find_similar(diff, &opts));
+ cl_assert_equal_i(git_diff_num_deltas(diff), 1);
+ delta = git_diff_get_delta(diff, 0);
+ cl_assert_equal_i(delta->status, GIT_DELTA_RENAMED);
+ cl_assert_equal_s(delta->old_file.path, "small.txt");
+ cl_assert_equal_s(delta->new_file.path, "copy.txt");
git_diff_free(diff);
- git_tree_free(tree);
+ git_signature_free(signature);
+ git_tree_free(commit_tree);
+ git_tree_free(head_tree);
+ git_commit_free(head_commit);
+ git_reference_free(head_reference);
git_index_free(index);
}