summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/diff_output.c56
-rw-r--r--src/diff_tform.c235
2 files changed, 222 insertions, 69 deletions
diff --git a/src/diff_output.c b/src/diff_output.c
index 5e9d5fe76..13434beb9 100644
--- a/src/diff_output.c
+++ b/src/diff_output.c
@@ -456,7 +456,7 @@ static void release_content(git_diff_file *file, git_map *map, git_blob *blob)
}
-static void diff_context_init(
+static int diff_context_init(
diff_context *ctxt,
git_diff_list *diff,
git_repository *repo,
@@ -468,6 +468,12 @@ static void diff_context_init(
{
memset(ctxt, 0, sizeof(diff_context));
+ if (!repo && diff)
+ repo = diff->repo;
+
+ if (!opts && diff)
+ opts = &diff->opts;
+
ctxt->repo = repo;
ctxt->diff = diff;
ctxt->opts = opts;
@@ -478,6 +484,8 @@ static void diff_context_init(
ctxt->error = 0;
setup_xdiff_options(ctxt->opts, &ctxt->xdiff_config, &ctxt->xdiff_params);
+
+ return 0;
}
static int diff_delta_file_callback(
@@ -922,6 +930,15 @@ static int diff_patch_line_cb(
return 0;
}
+static int diff_required(git_diff_list *diff, const char *action)
+{
+ if (!diff) {
+ giterr_set(GITERR_INVALID, "Must provide valid diff to %s", action);
+ return -1;
+ }
+
+ return 0;
+}
int git_diff_foreach(
git_diff_list *diff,
@@ -935,9 +952,12 @@ int git_diff_foreach(
size_t idx;
git_diff_patch patch;
- diff_context_init(
- &ctxt, diff, diff->repo, &diff->opts,
- file_cb, hunk_cb, data_cb, payload);
+ if (diff_required(diff, "git_diff_foreach") < 0)
+ return -1;
+
+ if (diff_context_init(
+ &ctxt, diff, NULL, NULL, file_cb, hunk_cb, data_cb, payload) < 0)
+ return -1;
diff_patch_init(&ctxt, &patch);
@@ -1306,8 +1326,10 @@ static int diff_single_init(
memset(data, 0, sizeof(*data));
- diff_context_init(
- &data->ctxt, NULL, repo, opts, file_cb, hunk_cb, data_cb, payload);
+ if (diff_context_init(
+ &data->ctxt, NULL, repo, opts,
+ file_cb, hunk_cb, data_cb, payload) < 0)
+ return -1;
diff_patch_init(&data->ctxt, &data->patch);
@@ -1374,6 +1396,9 @@ int git_diff_blobs(
new_blob ? git_object_owner((const git_object *)new_blob) :
old_blob ? git_object_owner((const git_object *)old_blob) : NULL;
+ if (!repo) /* Hmm, given two NULL blobs, silently do no callbacks? */
+ return 0;
+
if ((error = diff_single_init(
&d, repo, options, file_cb, hunk_cb, data_cb, payload)) < 0)
return error;
@@ -1405,6 +1430,9 @@ int git_diff_blob_to_buffer(
git_repository *repo =
old_blob ? git_object_owner((const git_object *)old_blob) : NULL;
+ if (!repo && !buf) /* Hmm, given NULLs, silently do no callbacks? */
+ return 0;
+
if ((error = diff_single_init(
&d, repo, options, file_cb, hunk_cb, data_cb, payload)) < 0)
return error;
@@ -1453,11 +1481,19 @@ int git_diff_get_patch(
if (patch_ptr)
*patch_ptr = NULL;
+ if (delta_ptr)
+ *delta_ptr = NULL;
+
+ if (diff_required(diff, "git_diff_get_patch") < 0)
+ return -1;
+
+ if (diff_context_init(
+ &ctxt, diff, NULL, NULL,
+ NULL, diff_patch_hunk_cb, diff_patch_line_cb, NULL) < 0)
+ return -1;
delta = git_vector_get(&diff->deltas, idx);
if (!delta) {
- if (delta_ptr)
- *delta_ptr = NULL;
giterr_set(GITERR_INVALID, "Index out of range for delta in diff");
return GIT_ENOTFOUND;
}
@@ -1470,10 +1506,6 @@ int git_diff_get_patch(
(diff->opts.flags & GIT_DIFF_SKIP_BINARY_CHECK) != 0))
return 0;
- diff_context_init(
- &ctxt, diff, diff->repo, &diff->opts,
- NULL, diff_patch_hunk_cb, diff_patch_line_cb, NULL);
-
if (git_diff_delta__should_skip(ctxt.opts, delta))
return 0;
diff --git a/src/diff_tform.c b/src/diff_tform.c
index e051732c5..48332d3e5 100644
--- a/src/diff_tform.c
+++ b/src/diff_tform.c
@@ -7,6 +7,7 @@
#include "common.h"
#include "diff.h"
#include "git2/config.h"
+#include "git2/blob.h"
#include "hashsig.h"
static git_diff_delta *diff_delta__dup(
@@ -362,24 +363,86 @@ on_error:
return -1;
}
-typedef struct {
- /* array of delta index * 2 + (old_file/new_file) -> file hashes */
- git_hashsig *sigs;
-} diff_similarity_cache;
+GIT_INLINE(git_diff_file *) similarity_get_file(git_diff_list *diff, size_t idx)
+{
+ git_diff_delta *delta = git_vector_get(&diff->deltas, idx / 2);
+ return (idx & 1) ? &delta->new_file : &delta->old_file;
+}
-static unsigned int calc_similarity(
- void *ref, git_diff_file *old_file, git_diff_file *new_file)
+static int similarity_calc(
+ git_diff_list *diff,
+ git_diff_find_options *opts,
+ size_t file_idx,
+ void **cache)
{
- diff_similarity_cache *cache = ref;
+ int error = 0;
+ git_diff_file *file = similarity_get_file(diff, file_idx);
+ git_iterator_type_t src = (file_idx & 1) ? diff->old_src : diff->new_src;
+
+ if (src == GIT_ITERATOR_TYPE_WORKDIR) { /* compute hashsig from file */
+ git_buf path = GIT_BUF_INIT;
+
+ /* TODO: apply wd-to-odb filters to file data if necessary */
+
+ if (!(error = git_buf_joinpath(
+ &path, git_repository_workdir(diff->repo), file->path)))
+ error = opts->metric->file_signature(
+ &cache[file_idx], file, path.ptr, opts->metric->payload);
+
+ git_buf_free(&path);
+ } else { /* compute hashsig from blob buffer */
+ git_blob *blob = NULL;
- GIT_UNUSED(cache);
+ /* TODO: add max size threshold a la diff? */
- if (git_oid_cmp(&old_file->oid, &new_file->oid) == 0)
+ if ((error = git_blob_lookup(&blob, diff->repo, &file->oid)) < 0)
+ return error;
+
+ error = opts->metric->buffer_signature(
+ &cache[file_idx], file, git_blob_rawcontent(blob),
+ git_blob_rawsize(blob), opts->metric->payload);
+
+ git_blob_free(blob);
+ }
+
+ return error;
+}
+
+static int similarity_measure(
+ git_diff_list *diff,
+ git_diff_find_options *opts,
+ void **cache,
+ size_t a_idx,
+ size_t b_idx)
+{
+ int score = 0;
+ git_diff_file *a_file = similarity_get_file(diff, a_idx);
+ git_diff_file *b_file = similarity_get_file(diff, b_idx);
+
+ if (GIT_MODE_TYPE(a_file->mode) != GIT_MODE_TYPE(b_file->mode))
+ return 0;
+
+ if (git_oid_cmp(&a_file->oid, &b_file->oid) == 0)
return 100;
- /* TODO: insert actual similarity algo here */
+ /* update signature cache if needed */
+ if (!cache[a_idx] && similarity_calc(diff, opts, a_idx, cache) < 0)
+ return -1;
+ if (!cache[b_idx] && similarity_calc(diff, opts, b_idx, cache) < 0)
+ return -1;
- return 0;
+ /* compare signatures */
+ if (opts->metric->similarity(
+ &score, cache[a_idx], cache[b_idx], opts->metric->payload) < 0)
+ return -1;
+
+ /* clip score */
+ if (score < 0)
+ score = 0;
+ else if (score > 100)
+ score = 100;
+
+ return score;
}
#define FLAG_SET(opts,flag_name) ((opts.flags & flag_name) != 0)
@@ -388,67 +451,85 @@ int git_diff_find_similar(
git_diff_list *diff,
git_diff_find_options *given_opts)
{
- unsigned int i, j, similarity;
+ size_t i, j, cache_size, *matches;
+ int error = 0, similarity;
git_diff_delta *from, *to;
git_diff_find_options opts;
- unsigned int tried_targets, num_changes = 0;
- git_vector matches = GIT_VECTOR_INIT;
+ size_t tried_targets, num_rewrites = 0;
+ void **cache;
- if (normalize_find_opts(diff, &opts, given_opts) < 0)
- return -1;
+ if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0)
+ return error;
- /* first do splits if requested */
+ /* TODO: maybe abort if deltas.length > target_limit ??? */
+
+ cache_size = diff->deltas.length * 2; /* must store b/c length may change */
+ cache = git__calloc(cache_size, sizeof(void *));
+ GITERR_CHECK_ALLOC(cache);
+
+ matches = git__calloc(diff->deltas.length, sizeof(size_t));
+ GITERR_CHECK_ALLOC(matches);
+
+ /* first break MODIFIED records that are too different (if requested) */
if (FLAG_SET(opts, GIT_DIFF_FIND_AND_BREAK_REWRITES)) {
git_vector_foreach(&diff->deltas, i, from) {
if (from->status != GIT_DELTA_MODIFIED)
continue;
- /* Right now, this doesn't work right because the similarity
- * algorithm isn't actually implemented...
- */
- similarity = 100;
- /* calc_similarity(NULL, &from->old_file, from->new_file); */
+ similarity = similarity_measure(
+ diff, &opts, cache, 2 * i, 2 * i + 1);
- if (similarity < opts.break_rewrite_threshold) {
+ if (similarity < 0) {
+ error = similarity;
+ goto cleanup;
+ }
+
+ if ((unsigned int)similarity < opts.break_rewrite_threshold) {
from->flags |= GIT_DIFF_FLAG__TO_SPLIT;
- num_changes++;
+ num_rewrites++;
}
}
-
- /* apply splits as needed */
- if (num_changes > 0 &&
- apply_splits_and_deletes(
- diff, diff->deltas.length + num_changes) < 0)
- return -1;
}
/* next find the most similar delta for each rename / copy candidate */
- if (git_vector_init(&matches, diff->deltas.length, git_diff_delta__cmp) < 0)
- return -1;
-
git_vector_foreach(&diff->deltas, i, from) {
tried_targets = 0;
+ /* skip things that aren't blobs */
+ if (GIT_MODE_TYPE(from->old_file.mode) !=
+ GIT_MODE_TYPE(GIT_FILEMODE_BLOB))
+ continue;
+
git_vector_foreach(&diff->deltas, j, to) {
if (i == j)
continue;
+ /* skip things that aren't blobs */
+ if (GIT_MODE_TYPE(to->new_file.mode) !=
+ GIT_MODE_TYPE(GIT_FILEMODE_BLOB))
+ continue;
+
switch (to->status) {
case GIT_DELTA_ADDED:
case GIT_DELTA_UNTRACKED:
case GIT_DELTA_RENAMED:
case GIT_DELTA_COPIED:
break;
+ case GIT_DELTA_MODIFIED:
+ if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0)
+ continue;
+ break;
default:
/* only the above status values should be checked */
continue;
}
/* skip all but DELETED files unless copy detection is on */
- if (from->status != GIT_DELTA_DELETED &&
- !FLAG_SET(opts, GIT_DIFF_FIND_COPIES))
+ if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES) &&
+ from->status != GIT_DELTA_DELETED &&
+ (from->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0)
continue;
/* don't check UNMODIFIED files as source unless given option */
@@ -463,34 +544,44 @@ int git_diff_find_similar(
/* calculate similarity and see if this pair beats the
* similarity score of the current best pair.
*/
- similarity = calc_similarity(NULL, &from->old_file, &to->new_file);
+ similarity = similarity_measure(
+ diff, &opts, cache, 2 * i, 2 * j + 1);
+
+ if (similarity < 0) {
+ error = similarity;
+ goto cleanup;
+ }
- if (to->similarity < similarity) {
- to->similarity = similarity;
- if (git_vector_set(NULL, &matches, j, from) < 0)
- return -1;
+ if (to->similarity < (unsigned int)similarity) {
+ to->similarity = (unsigned int)similarity;
+ matches[j] = i + 1;
}
}
}
/* next rewrite the diffs with renames / copies */
- num_changes = 0;
+ num_rewrites = 0;
git_vector_foreach(&diff->deltas, j, to) {
- from = GIT_VECTOR_GET(&matches, j);
- if (!from) {
+ if (!matches[j]) {
assert(to->similarity == 0);
continue;
}
- /* three possible outcomes here:
+ i = matches[j] - 1;
+ from = GIT_VECTOR_GET(&diff->deltas, i);
+ assert(from);
+
+ /* four possible outcomes here:
* 1. old DELETED and if over rename threshold,
* new becomes RENAMED and old goes away
- * 2. old was MODIFIED but FIND_RENAMES_FROM_REWRITES is on and
+ * 2. old SPLIT and if over rename threshold,
+ * new becomes RENAMED and old becomes ADDED (clear SPLIT)
+ * 3. old was MODIFIED but FIND_RENAMES_FROM_REWRITES is on and
* old is more similar to new than it is to itself, in which
* case, new becomes RENAMED and old becomed ADDED
- * 3. otherwise if over copy threshold, new becomes COPIED
+ * 4. otherwise if over copy threshold, new becomes COPIED
*/
if (from->status == GIT_DELTA_DELETED) {
@@ -503,7 +594,26 @@ int git_diff_find_similar(
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
from->flags |= GIT_DIFF_FLAG__TO_DELETE;
- num_changes++;
+ num_rewrites++;
+
+ continue;
+ }
+
+ if (from->status == GIT_DELTA_MODIFIED &&
+ (from->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0)
+ {
+ if (to->similarity < opts.rename_threshold) {
+ to->similarity = 0;
+ continue;
+ }
+
+ to->status = GIT_DELTA_RENAMED;
+ memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
+
+ from->status = GIT_DELTA_ADDED;
+ from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
+ memset(&from->old_file, 0, sizeof(from->old_file));
+ num_rewrites--;
continue;
}
@@ -512,10 +622,15 @@ int git_diff_find_similar(
FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) &&
to->similarity > opts.rename_threshold)
{
- similarity = 100;
- /* calc_similarity(NULL, &from->old_file, from->new_file); */
+ similarity = similarity_measure(
+ diff, &opts, cache, 2 * i, 2 * i + 1);
+
+ if (similarity < 0) {
+ error = similarity;
+ goto cleanup;
+ }
- if (similarity < opts.rename_from_rewrite_threshold) {
+ if ((unsigned int)similarity < opts.rename_from_rewrite_threshold) {
to->status = GIT_DELTA_RENAMED;
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
@@ -538,17 +653,23 @@ int git_diff_find_similar(
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
}
- git_vector_free(&matches);
+ if (num_rewrites > 0) {
+ assert(num_rewrites < diff->deltas.length);
- if (num_changes > 0) {
- assert(num_changes < diff->deltas.length);
+ error = apply_splits_and_deletes(
+ diff, diff->deltas.length - num_rewrites);
+ }
+
+cleanup:
+ git__free(matches);
- if (apply_splits_and_deletes(
- diff, diff->deltas.length - num_changes) < 0)
- return -1;
+ for (i = 0; i < cache_size; ++i) {
+ if (cache[i] != NULL)
+ opts.metric->free_signature(cache[i], opts.metric->payload);
}
+ git__free(cache);
- return 0;
+ return error;
}
#undef FLAG_SET