diff options
author | Vicent Martà <vicent@github.com> | 2013-02-27 14:50:32 -0800 |
---|---|---|
committer | Vicent Martà <vicent@github.com> | 2013-02-27 14:50:32 -0800 |
commit | e68e33f33d98c171d31dac33257250b5ecded4c9 (patch) | |
tree | 527f7c1f05dfd0427a82410a0a51c39fa6a33044 /tests-clar/diff/rename.c | |
parent | 9f9477d650c33eddad9cb48c5ec84cd703300c16 (diff) | |
parent | 1be4ba984216dfcb1f07945240c2831395fd0460 (diff) | |
download | libgit2-e68e33f33d98c171d31dac33257250b5ecded4c9.tar.gz |
Merge pull request #1233 from arrbee/file-similarity-metric
Add file similarity scoring to diff rename/copy detection
Diffstat (limited to 'tests-clar/diff/rename.c')
-rw-r--r-- | tests-clar/diff/rename.c | 228 |
1 files changed, 226 insertions, 2 deletions
diff --git a/tests-clar/diff/rename.c b/tests-clar/diff/rename.c index 2995b4ef5..539512538 100644 --- a/tests-clar/diff/rename.c +++ b/tests-clar/diff/rename.c @@ -23,8 +23,16 @@ void test_diff_rename__cleanup(void) * serving.txt -> sixserving.txt (rename, no change, 100% match) * sevencities.txt -> sevencities.txt (no change) * sevencities.txt -> songofseven.txt (copy, no change, 100% match) - * - * TODO: add commits with various % changes of copy / rename + * commit 1c068dee5790ef1580cfc4cd670915b48d790084 + * songofseven.txt -> songofseven.txt (major rewrite, <20% match - split) + * sixserving.txt -> sixserving.txt (indentation change) + * sixserving.txt -> ikeepsix.txt (copy, add title, >80% match) + * sevencities.txt (no change) + * commit 19dd32dfb1520a64e5bbaae8dce6ef423dfa2f13 + * songofseven.txt -> untimely.txt (rename, convert to crlf) + * ikeepsix.txt -> ikeepsix.txt (reorder sections in file) + * sixserving.txt -> sixserving.txt (whitespace change - not just indent) + * sevencities.txt -> songof7cities.txt (rename, small text changes) */ void test_diff_rename__match_oid(void) @@ -133,3 +141,219 @@ void test_diff_rename__checks_options_version(void) git_tree_free(old_tree); git_tree_free(new_tree); } + +void test_diff_rename__not_exact_match(void) +{ + const char *sha0 = "2bc7f351d20b53f1c72c16c4b036e491c478c49a"; + const char *sha1 = "1c068dee5790ef1580cfc4cd670915b48d790084"; + const char *sha2 = "19dd32dfb1520a64e5bbaae8dce6ef423dfa2f13"; + git_tree *old_tree, *new_tree; + git_diff_list *diff; + git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT; + git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT; + diff_expects exp; + + /* == Changes ===================================================== + * songofseven.txt -> songofseven.txt (major rewrite, <20% match - split) + * sixserving.txt -> sixserving.txt (indentation change) + * sixserving.txt -> ikeepsix.txt (copy, add title, >80% match) + * sevencities.txt (no change) + */ + + old_tree = resolve_commit_oid_to_tree(g_repo, sha0); + new_tree = resolve_commit_oid_to_tree(g_repo, sha1); + + /* Must pass GIT_DIFF_INCLUDE_UNMODIFIED if you expect to emulate + * --find-copies-harder during rename transformion... + */ + diffopts.flags |= GIT_DIFF_INCLUDE_UNMODIFIED; + + cl_git_pass(git_diff_tree_to_tree( + &diff, g_repo, old_tree, new_tree, &diffopts)); + + /* git diff --no-renames \ + * 2bc7f351d20b53f1c72c16c4b036e491c478c49a \ + * 1c068dee5790ef1580cfc4cd670915b48d790084 + */ + memset(&exp, 0, sizeof(exp)); + cl_git_pass(git_diff_foreach( + diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp)); + + cl_assert_equal_i(4, exp.files); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNMODIFIED]); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_ADDED]); + + /* git diff -M 2bc7f351d20b53f1c72c16c4b036e491c478c49a \ + * 1c068dee5790ef1580cfc4cd670915b48d790084 + * + * must not pass NULL for opts because it will pick up environment + * values for "diff.renames" and test won't be consistent. + */ + opts.flags = GIT_DIFF_FIND_RENAMES; + cl_git_pass(git_diff_find_similar(diff, &opts)); + + memset(&exp, 0, sizeof(exp)); + cl_git_pass(git_diff_foreach( + diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp)); + + cl_assert_equal_i(4, exp.files); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNMODIFIED]); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_ADDED]); + + git_diff_list_free(diff); + + /* git diff -M -C \ + * 2bc7f351d20b53f1c72c16c4b036e491c478c49a \ + * 1c068dee5790ef1580cfc4cd670915b48d790084 + */ + cl_git_pass(git_diff_tree_to_tree( + &diff, g_repo, old_tree, new_tree, &diffopts)); + + opts.flags = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES; + cl_git_pass(git_diff_find_similar(diff, &opts)); + + memset(&exp, 0, sizeof(exp)); + cl_git_pass(git_diff_foreach( + diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp)); + + cl_assert_equal_i(4, exp.files); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNMODIFIED]); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_COPIED]); + + git_diff_list_free(diff); + + /* git diff -M -C --find-copies-harder --break-rewrites \ + * 2bc7f351d20b53f1c72c16c4b036e491c478c49a \ + * 1c068dee5790ef1580cfc4cd670915b48d790084 + */ + cl_git_pass(git_diff_tree_to_tree( + &diff, g_repo, old_tree, new_tree, &diffopts)); + + opts.flags = GIT_DIFF_FIND_ALL; + cl_git_pass(git_diff_find_similar(diff, &opts)); + + memset(&exp, 0, sizeof(exp)); + cl_git_pass(git_diff_foreach( + diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp)); + + cl_assert_equal_i(5, exp.files); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNMODIFIED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_COPIED]); + + git_diff_list_free(diff); + + /* == Changes ===================================================== + * songofseven.txt -> untimely.txt (rename, convert to crlf) + * ikeepsix.txt -> ikeepsix.txt (reorder sections in file) + * sixserving.txt -> sixserving.txt (whitespace - not just indent) + * sevencities.txt -> songof7cities.txt (rename, small text changes) + */ + + git_tree_free(old_tree); + old_tree = new_tree; + new_tree = resolve_commit_oid_to_tree(g_repo, sha2); + + cl_git_pass(git_diff_tree_to_tree( + &diff, g_repo, old_tree, new_tree, &diffopts)); + + /* git diff --no-renames \ + * 1c068dee5790ef1580cfc4cd670915b48d790084 \ + * 19dd32dfb1520a64e5bbaae8dce6ef423dfa2f13 + */ + memset(&exp, 0, sizeof(exp)); + cl_git_pass(git_diff_foreach( + diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp)); + + cl_assert_equal_i(6, exp.files); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_DELETED]); + + /* git diff -M -C \ + * 1c068dee5790ef1580cfc4cd670915b48d790084 \ + * 19dd32dfb1520a64e5bbaae8dce6ef423dfa2f13 + */ + cl_git_pass(git_diff_tree_to_tree( + &diff, g_repo, old_tree, new_tree, &diffopts)); + + opts.flags = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES; + cl_git_pass(git_diff_find_similar(diff, &opts)); + + memset(&exp, 0, sizeof(exp)); + cl_git_pass(git_diff_foreach( + diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp)); + + cl_assert_equal_i(4, exp.files); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_RENAMED]); + + git_diff_list_free(diff); + + /* git diff -M -C --find-copies-harder --break-rewrites \ + * 1c068dee5790ef1580cfc4cd670915b48d790084 \ + * 19dd32dfb1520a64e5bbaae8dce6ef423dfa2f13 + * with libgit2 default similarity comparison... + */ + cl_git_pass(git_diff_tree_to_tree( + &diff, g_repo, old_tree, new_tree, &diffopts)); + + opts.flags = GIT_DIFF_FIND_ALL; + cl_git_pass(git_diff_find_similar(diff, &opts)); + + /* the default match algorithm is going to find the internal + * whitespace differences in the lines of sixserving.txt to be + * significant enough that this will decide to split it into + * an ADD and a DELETE + */ + + memset(&exp, 0, sizeof(exp)); + cl_git_pass(git_diff_foreach( + diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp)); + + cl_assert_equal_i(5, exp.files); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_RENAMED]); + + git_diff_list_free(diff); + + /* git diff -M -C --find-copies-harder --break-rewrites \ + * 1c068dee5790ef1580cfc4cd670915b48d790084 \ + * 19dd32dfb1520a64e5bbaae8dce6ef423dfa2f13 + * with ignore_space whitespace comparision + */ + cl_git_pass(git_diff_tree_to_tree( + &diff, g_repo, old_tree, new_tree, &diffopts)); + + opts.flags = GIT_DIFF_FIND_ALL | GIT_DIFF_FIND_IGNORE_WHITESPACE; + cl_git_pass(git_diff_find_similar(diff, &opts)); + + /* Ignoring whitespace, this should no longer split sixserver.txt */ + + memset(&exp, 0, sizeof(exp)); + cl_git_pass(git_diff_foreach( + diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp)); + + cl_assert_equal_i(4, exp.files); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_RENAMED]); + + git_diff_list_free(diff); + + git_tree_free(old_tree); + git_tree_free(new_tree); +} + +void test_diff_rename__working_directory_changes(void) +{ + /* let's rewrite some files in the working directory on demand */ + + /* and with / without CRLF changes */ +} |