diff options
author | Edward Thomson <ethomson@microsoft.com> | 2015-01-14 10:19:28 -0600 |
---|---|---|
committer | Edward Thomson <ethomson@microsoft.com> | 2015-01-14 10:19:28 -0600 |
commit | 85880693d53d401aac71705fe55c406b82de6f0d (patch) | |
tree | 0707c79af3eda662b20410aeb3dee2142e14066e | |
parent | e6911dff9d6483a6afae4d1138151a1bc9116354 (diff) | |
parent | b3837d4d2bbcae16a32deb732afa0e7e2e9b274e (diff) | |
download | libgit2-85880693d53d401aac71705fe55c406b82de6f0d.tar.gz |
Merge branch 'pr/2740'
-rw-r--r-- | include/git2/sys/hashsig.h | 71 | ||||
-rw-r--r-- | src/diff_tform.c | 37 | ||||
-rw-r--r-- | src/hashsig.c | 27 | ||||
-rw-r--r-- | src/merge.c | 8 | ||||
-rw-r--r-- | tests/clar_libgit2.c | 5 | ||||
-rw-r--r-- | tests/clar_libgit2.h | 1 | ||||
-rw-r--r-- | tests/diff/rename.c | 54 |
7 files changed, 118 insertions, 85 deletions
diff --git a/include/git2/sys/hashsig.h b/include/git2/sys/hashsig.h index 2bc32f394..09c19aec0 100644 --- a/include/git2/sys/hashsig.h +++ b/include/git2/sys/hashsig.h @@ -12,33 +12,52 @@ GIT_BEGIN_DECL /** - * Similarity signature of line hashes for a buffer + * Similarity signature of arbitrary text content based on line hashes */ typedef struct git_hashsig git_hashsig; /** - * Options for hashsig calculation + * Options for hashsig computation + * + * The options GIT_HASHSIG_NORMAL, GIT_HASHSIG_IGNORE_WHITESPACE, + * GIT_HASHSIG_SMART_WHITESPACE are exclusive and should not be combined. */ typedef enum { - GIT_HASHSIG_NORMAL = 0, /* use all data */ - GIT_HASHSIG_IGNORE_WHITESPACE = 1, /* ignore whitespace */ - GIT_HASHSIG_SMART_WHITESPACE = 2, /* ignore \r and all space after \n */ + /** + * Use all data + */ + GIT_HASHSIG_NORMAL = 0, + + /** + * Ignore whitespace + */ + GIT_HASHSIG_IGNORE_WHITESPACE = (1 << 0), + + /** + * Ignore \r and all space after \n + */ + GIT_HASHSIG_SMART_WHITESPACE = (1 << 1), + + /** + * Allow hashing of small files + */ + GIT_HASHSIG_ALLOW_SMALL_FILES = (1 << 2) } git_hashsig_option_t; /** - * Build a similarity signature for a buffer - * - * If you have passed a whitespace-ignoring buffer, then the whitespace - * will be removed from the buffer while it is being processed, modifying - * the buffer in place. Sorry about that! + * Compute a similarity signature for a text buffer * - * This will return an error if the buffer doesn't contain enough data to - * compute a valid signature. + * If you have passed the option GIT_HASHSIG_IGNORE_WHITESPACE, then the + * whitespace will be removed from the buffer while it is being processed, + * modifying the buffer in place. Sorry about that! * - * @param out The array of hashed runs representing the file content - * @param buf The contents of the file to hash - * @param buflen The length of the data at `buf` - * @param generate_pairwise_hashes Should pairwise runs be hashed + * @param out The computed similarity signature. + * @param buf The input buffer. + * @param buflen The input buffer size. + * @param opts The signature computation options (see above). + * @return 0 on success, GIT_EBUFS if the buffer doesn't contain enough data to + * compute a valid signature (unless GIT_HASHSIG_ALLOW_SMALL_FILES is set), or + * error code. */ GIT_EXTERN(int) git_hashsig_create( git_hashsig **out, @@ -47,13 +66,17 @@ GIT_EXTERN(int) git_hashsig_create( git_hashsig_option_t opts); /** - * Build a similarity signature from a file + * Compute a similarity signature for a text file * * This walks through the file, only loading a maximum of 4K of file data at - * a time. Otherwise, it acts just like `git_hashsig_create`. + * a time. Otherwise, it acts just like `git_hashsig_create`. * - * This will return an error if the file doesn't contain enough data to - * compute a valid signature. + * @param out The computed similarity signature. + * @param path The path to the input file. + * @param opts The signature computation options (see above). + * @return 0 on success, GIT_EBUFS if the buffer doesn't contain enough data to + * compute a valid signature (unless GIT_HASHSIG_ALLOW_SMALL_FILES is set), or + * error code. */ GIT_EXTERN(int) git_hashsig_create_fromfile( git_hashsig **out, @@ -62,13 +85,17 @@ GIT_EXTERN(int) git_hashsig_create_fromfile( /** * Release memory for a content similarity signature + * + * @param sig The similarity signature to free. */ GIT_EXTERN(void) git_hashsig_free(git_hashsig *sig); /** - * Measure similarity between two files + * Measure similarity score between two similarity signatures * - * @return <0 for error, [0 to 100] as similarity score + * @param a The first similarity signature to compare. + * @param b The second similarity signature to compare. + * @return [0 to 100] on success as the similarity score, or error code. */ GIT_EXTERN(int) git_hashsig_compare( const git_hashsig *a, diff --git a/src/diff_tform.c b/src/diff_tform.c index d576317f0..9133a9b14 100644 --- a/src/diff_tform.c +++ b/src/diff_tform.c @@ -219,34 +219,18 @@ int git_diff_find_similar__hashsig_for_file( void **out, const git_diff_file *f, const char *path, void *p) { git_hashsig_option_t opt = (git_hashsig_option_t)(intptr_t)p; - int error = 0; GIT_UNUSED(f); - error = git_hashsig_create_fromfile((git_hashsig **)out, path, opt); - - if (error == GIT_EBUFS) { - error = 0; - giterr_clear(); - } - - return error; + return git_hashsig_create_fromfile((git_hashsig **)out, path, opt); } int git_diff_find_similar__hashsig_for_buf( void **out, const git_diff_file *f, const char *buf, size_t len, void *p) { git_hashsig_option_t opt = (git_hashsig_option_t)(intptr_t)p; - int error = 0; GIT_UNUSED(f); - error = git_hashsig_create((git_hashsig **)out, buf, len, opt); - - if (error == GIT_EBUFS) { - error = 0; - giterr_clear(); - } - - return error; + return git_hashsig_create((git_hashsig **)out, buf, len, opt); } void git_diff_find_similar__hashsig_free(void *sig, void *payload) @@ -258,8 +242,14 @@ void git_diff_find_similar__hashsig_free(void *sig, void *payload) int git_diff_find_similar__calc_similarity( int *score, void *siga, void *sigb, void *payload) { + int error; + GIT_UNUSED(payload); - *score = git_hashsig_compare(siga, sigb); + error = git_hashsig_compare(siga, sigb); + if (error < 0) + return error; + + *score = error; return 0; } @@ -273,6 +263,7 @@ static int normalize_find_opts( const git_diff_find_options *given) { git_config *cfg = NULL; + git_hashsig_option_t hashsig_opts; GITERR_CHECK_VERSION(given, GIT_DIFF_FIND_OPTIONS_VERSION, "git_diff_find_options"); @@ -354,11 +345,13 @@ static int normalize_find_opts( opts->metric->similarity = git_diff_find_similar__calc_similarity; if (opts->flags & GIT_DIFF_FIND_IGNORE_WHITESPACE) - opts->metric->payload = (void *)GIT_HASHSIG_IGNORE_WHITESPACE; + hashsig_opts = GIT_HASHSIG_IGNORE_WHITESPACE; else if (opts->flags & GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE) - opts->metric->payload = (void *)GIT_HASHSIG_NORMAL; + hashsig_opts = GIT_HASHSIG_NORMAL; else - opts->metric->payload = (void *)GIT_HASHSIG_SMART_WHITESPACE; + hashsig_opts = GIT_HASHSIG_SMART_WHITESPACE; + hashsig_opts |= GIT_HASHSIG_ALLOW_SMALL_FILES; + opts->metric->payload = (void *)hashsig_opts; } return 0; diff --git a/src/hashsig.c b/src/hashsig.c index a6d5f2041..0ddfed902 100644 --- a/src/hashsig.c +++ b/src/hashsig.c @@ -35,7 +35,6 @@ struct git_hashsig { hashsig_heap mins; hashsig_heap maxs; git_hashsig_option_t opt; - int considered; }; #define HEAP_LCHILD_OF(I) (((I)<<1)+1) @@ -135,25 +134,23 @@ static void hashsig_in_progress_init( { int i; - switch (sig->opt) { - case GIT_HASHSIG_IGNORE_WHITESPACE: + /* no more than one can be set */ + assert(!(sig->opt & GIT_HASHSIG_IGNORE_WHITESPACE) || + !(sig->opt & GIT_HASHSIG_SMART_WHITESPACE)); + + if (sig->opt & GIT_HASHSIG_IGNORE_WHITESPACE) { for (i = 0; i < 256; ++i) prog->ignore_ch[i] = git__isspace_nonlf(i); prog->use_ignores = 1; - break; - case GIT_HASHSIG_SMART_WHITESPACE: + } else if (sig->opt & GIT_HASHSIG_SMART_WHITESPACE) { for (i = 0; i < 256; ++i) prog->ignore_ch[i] = git__isspace(i); prog->use_ignores = 1; - break; - default: + } else { memset(prog, 0, sizeof(*prog)); - break; } } -#define HASHSIG_IN_PROGRESS_INIT { 1 } - static int hashsig_add_hashes( git_hashsig *sig, const uint8_t *data, @@ -174,12 +171,13 @@ static int hashsig_add_hashes( if (use_ignores) for (; scan < end && git__isspace_nonlf(ch); ch = *scan) ++scan; - else if (sig->opt != GIT_HASHSIG_NORMAL) + else if (sig->opt & + (GIT_HASHSIG_IGNORE_WHITESPACE | GIT_HASHSIG_SMART_WHITESPACE)) for (; scan < end && ch == '\r'; ch = *scan) ++scan; /* peek at next character to decide what to do next */ - if (sig->opt == GIT_HASHSIG_SMART_WHITESPACE) + if (sig->opt & GIT_HASHSIG_SMART_WHITESPACE) use_ignores = (ch == '\n'); if (scan >= end) @@ -198,8 +196,6 @@ static int hashsig_add_hashes( hashsig_heap_insert(&sig->mins, (hashsig_t)state); hashsig_heap_insert(&sig->maxs, (hashsig_t)state); - sig->considered++; - while (scan < end && (*scan == '\n' || !*scan)) ++scan; } @@ -212,7 +208,8 @@ static int hashsig_add_hashes( static int hashsig_finalize_hashes(git_hashsig *sig) { - if (sig->mins.size < HASHSIG_HEAP_MIN_SIZE) { + if (sig->mins.size < HASHSIG_HEAP_MIN_SIZE && + !(sig->opt & GIT_HASHSIG_ALLOW_SMALL_FILES)) { giterr_set(GITERR_INVALID, "File too small for similarity signature calculation"); return GIT_EBUFS; diff --git a/src/merge.c b/src/merge.c index 7031efcc7..7c38b5692 100644 --- a/src/merge.c +++ b/src/merge.c @@ -1612,13 +1612,7 @@ static int merge_normalize_opts( opts->metric->buffer_signature = git_diff_find_similar__hashsig_for_buf; opts->metric->free_signature = git_diff_find_similar__hashsig_free; opts->metric->similarity = git_diff_find_similar__calc_similarity; - - if (opts->flags & GIT_DIFF_FIND_IGNORE_WHITESPACE) - opts->metric->payload = (void *)GIT_HASHSIG_IGNORE_WHITESPACE; - else if (opts->flags & GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE) - opts->metric->payload = (void *)GIT_HASHSIG_NORMAL; - else - opts->metric->payload = (void *)GIT_HASHSIG_SMART_WHITESPACE; + opts->metric->payload = (void *)GIT_HASHSIG_SMART_WHITESPACE; } return 0; diff --git a/tests/clar_libgit2.c b/tests/clar_libgit2.c index 10f37ad5c..a8a8ba6ab 100644 --- a/tests/clar_libgit2.c +++ b/tests/clar_libgit2.c @@ -53,6 +53,11 @@ void cl_git_rewritefile(const char *path, const char *content) cl_git_write2file(path, content, 0, O_WRONLY | O_CREAT | O_TRUNC, 0644); } +void cl_git_rmfile(const char *filename) +{ + cl_must_pass(p_unlink(filename)); +} + #ifdef GIT_WIN32 #include "win32/utf-conv.h" diff --git a/tests/clar_libgit2.h b/tests/clar_libgit2.h index f51554293..e1d62c820 100644 --- a/tests/clar_libgit2.h +++ b/tests/clar_libgit2.h @@ -112,6 +112,7 @@ void cl_git_append2file(const char *filename, const char *new_content); void cl_git_rewritefile(const char *filename, const char *new_content); void cl_git_write2file(const char *path, const char *data, size_t datalen, int flags, unsigned int mode); +void cl_git_rmfile(const char *filename); bool cl_toggle_filemode(const char *filename); bool cl_is_chmod_supported(void); diff --git a/tests/diff/rename.c b/tests/diff/rename.c index 4bc3eb54c..28e0bf149 100644 --- a/tests/diff/rename.c +++ b/tests/diff/rename.c @@ -381,37 +381,53 @@ void test_diff_rename__not_exact_match(void) git_tree_free(new_tree); } -void test_diff_rename__handles_small_files(void) +void test_diff_rename__test_small_files(void) { - const char *tree_sha = "2bc7f351d20b53f1c72c16c4b036e491c478c49a"; git_index *index; - git_tree *tree; + git_reference *head_reference; + git_commit *head_commit; + git_tree *head_tree; + git_tree *commit_tree; + git_signature *signature; git_diff *diff; - git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT; - git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT; + git_oid oid; + const git_diff_delta *delta; + git_diff_options diff_options = GIT_DIFF_OPTIONS_INIT; + git_diff_find_options find_options = GIT_DIFF_FIND_OPTIONS_INIT; cl_git_pass(git_repository_index(&index, g_repo)); - tree = resolve_commit_oid_to_tree(g_repo, tree_sha); + cl_git_mkfile("renames/small.txt", "Hello World!\n"); + cl_git_pass(git_index_add_bypath(index, "small.txt")); - cl_git_rewritefile("renames/songof7cities.txt", "single line\n"); - cl_git_pass(git_index_add_bypath(index, "songof7cities.txt")); + cl_git_pass(git_repository_head(&head_reference, g_repo)); + cl_git_pass(git_reference_peel((git_object**)&head_commit, head_reference, GIT_OBJ_COMMIT)); + cl_git_pass(git_commit_tree(&head_tree, head_commit)); + cl_git_pass(git_index_write_tree(&oid, index)); + cl_git_pass(git_tree_lookup(&commit_tree, g_repo, &oid)); + cl_git_pass(git_signature_new(&signature, "Rename", "rename@example.com", 1404157834, 0)); + cl_git_pass(git_commit_create(&oid, g_repo, "HEAD", signature, signature, NULL, "Test commit", commit_tree, 1, (const git_commit**)&head_commit)); - cl_git_rewritefile("renames/untimely.txt", "untimely\n"); - cl_git_pass(git_index_add_bypath(index, "untimely.txt")); + cl_git_mkfile("renames/copy.txt", "Hello World!\n"); + cl_git_rmfile("renames/small.txt"); - /* Tests that we can invoke find_similar on small files - * and that the GIT_EBUFS (too small) error code is not - * propagated to the caller. - */ - cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts)); + diff_options.flags = GIT_DIFF_INCLUDE_UNTRACKED; + cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, commit_tree, &diff_options)); + find_options.flags = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_FOR_UNTRACKED; + cl_git_pass(git_diff_find_similar(diff, &find_options)); - opts.flags = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES | - GIT_DIFF_FIND_AND_BREAK_REWRITES; - cl_git_pass(git_diff_find_similar(diff, &opts)); + cl_assert_equal_i(git_diff_num_deltas(diff), 1); + delta = git_diff_get_delta(diff, 0); + cl_assert_equal_i(delta->status, GIT_DELTA_RENAMED); + cl_assert_equal_s(delta->old_file.path, "small.txt"); + cl_assert_equal_s(delta->new_file.path, "copy.txt"); git_diff_free(diff); - git_tree_free(tree); + git_signature_free(signature); + git_tree_free(commit_tree); + git_tree_free(head_tree); + git_commit_free(head_commit); + git_reference_free(head_reference); git_index_free(index); } |