summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/blame.c492
-rw-r--r--src/blame.h37
-rw-r--r--src/blame_git.c591
-rw-r--r--src/blame_git.h93
-rw-r--r--src/object.c36
5 files changed, 1249 insertions, 0 deletions
diff --git a/src/blame.c b/src/blame.c
new file mode 100644
index 000000000..30d65f02c
--- /dev/null
+++ b/src/blame.c
@@ -0,0 +1,492 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+
+#include "blame.h"
+#include "git2/commit.h"
+#include "git2/revparse.h"
+#include "git2/revwalk.h"
+#include "git2/tree.h"
+#include "git2/diff.h"
+#include "git2/blob.h"
+#include "util.h"
+#include "repository.h"
+#include "blame_git.h"
+
+
+static int hunk_search_cmp_helper(const void *key, size_t start_line, size_t num_lines)
+{
+ uint32_t lineno = *(size_t*)key;
+ if (lineno < start_line)
+ return -1;
+ if (lineno >= ((uint32_t)start_line + num_lines))
+ return 1;
+ return 0;
+}
+static int hunk_byfinalline_search_cmp(const void *key, const void *entry)
+{
+ git_blame_hunk *hunk = (git_blame_hunk*)entry;
+ return hunk_search_cmp_helper(key, hunk->final_start_line_number, hunk->lines_in_hunk);
+}
+static int hunk_sort_cmp_by_start_line(const void *_a, const void *_b)
+{
+ git_blame_hunk *a = (git_blame_hunk*)_a,
+ *b = (git_blame_hunk*)_b;
+
+ return a->final_start_line_number - b->final_start_line_number;
+}
+
+static bool hunk_ends_at_or_before_line(git_blame_hunk *hunk, size_t line)
+{
+ return line >= (size_t)(hunk->final_start_line_number + hunk->lines_in_hunk - 1);
+}
+
+static bool hunk_starts_at_or_after_line(git_blame_hunk *hunk, size_t line)
+{
+ return line <= hunk->final_start_line_number;
+}
+
+static git_blame_hunk* new_hunk(uint16_t start, uint16_t lines, uint16_t orig_start, const char *path)
+{
+ git_blame_hunk *hunk = git__calloc(1, sizeof(git_blame_hunk));
+ if (!hunk) return NULL;
+
+ hunk->lines_in_hunk = lines;
+ hunk->final_start_line_number = start;
+ hunk->orig_start_line_number = orig_start;
+ hunk->orig_path = path ? git__strdup(path) : NULL;
+
+ return hunk;
+}
+
+git_blame_hunk* git_blame__alloc_hunk()
+{
+ return new_hunk(0,0,0,NULL);
+}
+
+static git_blame_hunk* dup_hunk(git_blame_hunk *hunk)
+{
+ git_blame_hunk *newhunk = new_hunk(hunk->final_start_line_number, hunk->lines_in_hunk, hunk->orig_start_line_number, hunk->orig_path);
+ git_oid_cpy(&newhunk->orig_commit_id, &hunk->orig_commit_id);
+ git_oid_cpy(&newhunk->final_commit_id, &hunk->final_commit_id);
+ return newhunk;
+}
+
+static void free_hunk(git_blame_hunk *hunk)
+{
+ git__free((void*)hunk->orig_path);
+ git__free(hunk);
+}
+
+/* Starting with the hunk that includes start_line, shift all following hunks'
+ * final_start_line by shift_by lines */
+static void shift_hunks_by_final(git_vector *v, size_t start_line, int shift_by)
+{
+ size_t i;
+
+ if (!git_vector_bsearch2( &i, v, hunk_byfinalline_search_cmp, &start_line)) {
+ for (; i < v->length; i++) {
+ git_blame_hunk *hunk = (git_blame_hunk*)v->contents[i];
+ hunk->final_start_line_number += shift_by;
+ }
+ }
+}
+static int paths_cmp(const void *a, const void *b) { return git__strcmp((char*)a, (char*)b); }
+git_blame* git_blame__alloc(
+ git_repository *repo,
+ git_blame_options opts,
+ const char *path)
+{
+ git_blame *gbr = (git_blame*)git__calloc(1, sizeof(git_blame));
+ if (!gbr) {
+ giterr_set_oom();
+ return NULL;
+ }
+ git_vector_init(&gbr->hunks, 8, hunk_sort_cmp_by_start_line);
+ git_vector_init(&gbr->unclaimed_hunks, 8, hunk_sort_cmp_by_start_line);
+ git_vector_init(&gbr->paths, 8, paths_cmp);
+ gbr->repository = repo;
+ gbr->options = opts;
+ gbr->path = git__strdup(path);
+ git_vector_insert(&gbr->paths, git__strdup(path));
+ gbr->final_blob = NULL;
+ return gbr;
+}
+
+void git_blame_free(git_blame *blame)
+{
+ size_t i;
+ git_blame_hunk *hunk;
+ char *path;
+
+ if (!blame) return;
+
+ git_vector_foreach(&blame->hunks, i, hunk)
+ free_hunk(hunk);
+ git_vector_free(&blame->hunks);
+
+ git_vector_foreach(&blame->unclaimed_hunks, i, hunk)
+ free_hunk(hunk);
+ git_vector_free(&blame->unclaimed_hunks);
+
+ git_vector_foreach(&blame->paths, i, path)
+ git__free(path);
+ git_vector_free(&blame->paths);
+
+ git__free((void*)blame->path);
+ git_blob_free(blame->final_blob);
+ git__free(blame);
+}
+
+uint32_t git_blame_get_hunk_count(git_blame *blame)
+{
+ assert(blame);
+ return blame->hunks.length;
+}
+
+const git_blame_hunk *git_blame_get_hunk_byindex(git_blame *blame, uint32_t index)
+{
+ assert(blame);
+ return (git_blame_hunk*)git_vector_get(&blame->hunks, index);
+}
+
+const git_blame_hunk *git_blame_get_hunk_byline(git_blame *blame, uint32_t lineno)
+{
+ size_t i;
+ assert(blame);
+
+ if (!git_vector_bsearch2( &i, &blame->hunks, hunk_byfinalline_search_cmp, &lineno)) {
+ return git_blame_get_hunk_byindex(blame, i);
+ }
+
+ return NULL;
+}
+
+static void normalize_options(
+ git_blame_options *out,
+ const git_blame_options *in,
+ git_repository *repo)
+{
+ git_blame_options dummy = GIT_BLAME_OPTIONS_INIT;
+ if (!in) in = &dummy;
+
+ memcpy(out, in, sizeof(git_blame_options));
+
+ /* No newest_commit => HEAD */
+ if (git_oid_iszero(&out->newest_commit)) {
+ git_reference_name_to_id(&out->newest_commit, repo, "HEAD");
+ }
+}
+
+static git_blame_hunk *split_hunk_in_vector(
+ git_vector *vec,
+ git_blame_hunk *hunk,
+ size_t rel_line,
+ bool return_new)
+{
+ size_t new_line_count;
+ git_blame_hunk *nh;
+
+ /* Don't split if already at a boundary */
+ if (rel_line <= 0 ||
+ rel_line >= hunk->lines_in_hunk)
+ {
+ return hunk;
+ }
+
+ new_line_count = hunk->lines_in_hunk - rel_line;
+ nh = new_hunk(hunk->final_start_line_number+rel_line, new_line_count,
+ hunk->orig_start_line_number+rel_line, hunk->orig_path);
+ git_oid_cpy(&nh->final_commit_id, &hunk->final_commit_id);
+ git_oid_cpy(&nh->orig_commit_id, &hunk->orig_commit_id);
+
+ /* Adjust hunk that was split */
+ hunk->lines_in_hunk -= new_line_count;
+ git_vector_insert_sorted(vec, nh, NULL);
+ {
+ git_blame_hunk *ret = return_new ? nh : hunk;
+ return ret;
+ }
+}
+
+
+/*
+ * To allow quick access to the contents of nth line in the
+ * final image, prepare an index in the scoreboard.
+ */
+static int prepare_lines(struct scoreboard *sb)
+{
+ const char *buf = sb->final_buf;
+ git_off_t len = sb->final_buf_size;
+ int num = 0, incomplete = 0, bol = 1;
+
+ if (len && buf[len-1] != '\n')
+ incomplete++; /* incomplete line at the end */
+ while (len--) {
+ if (bol) {
+ bol = 0;
+ }
+ if (*buf++ == '\n') {
+ num++;
+ bol = 1;
+ }
+ }
+ sb->num_lines = num + incomplete;
+ return sb->num_lines;
+}
+
+static git_blame_hunk* hunk_from_entry(struct blame_entry *e)
+{
+ git_blame_hunk *h = new_hunk(
+ e->lno+1, e->num_lines, e->s_lno+1, e->suspect->path);
+ git_oid_cpy(&h->final_commit_id, git_commit_id(e->suspect->commit));
+ return h;
+}
+
+static void free_if_not_already_freed(git_vector *already, struct origin *o)
+{
+ size_t i;
+
+ if (!o) return;
+ if (!git_vector_search(&i, already, o))
+ return;
+
+ git_vector_insert(already, o);
+ free_if_not_already_freed(already, o->previous);
+ git_blob_free(o->blob);
+ git_commit_free(o->commit);
+ git__free(o);
+}
+
+static int walk_and_mark(git_blame *blame)
+{
+ int error;
+
+ struct scoreboard sb = {0};
+ struct blame_entry *ent = NULL;
+ git_blob *blob = NULL;
+ struct origin *o;
+ git_vector already = GIT_VECTOR_INIT;
+
+ if ((error = git_commit_lookup(&sb.final, blame->repository, &blame->options.newest_commit)) < 0 ||
+ (error = git_object_lookup_bypath((git_object**)&blob, (git_object*)sb.final, blame->path, GIT_OBJ_BLOB)) < 0)
+ goto cleanup;
+ sb.final_buf = git_blob_rawcontent(blob);
+ sb.final_buf_size = git_blob_rawsize(blob);
+ if ((error = get_origin(&o, &sb, sb.final, blame->path)) < 0)
+ goto cleanup;
+
+ ent = git__calloc(1, sizeof(*ent));
+ ent->num_lines = prepare_lines(&sb);
+ ent->suspect = o;
+ sb.ent = ent;
+ sb.path = blame->path;
+ sb.blame = blame;
+
+ assign_blame(&sb, blame->options.flags);
+ coalesce(&sb);
+
+ for (ent = sb.ent; ent; ) {
+ git_vector_insert(&blame->hunks, hunk_from_entry(ent));
+ ent = ent->next;
+ }
+
+cleanup:
+ for (ent = sb.ent; ent; ) {
+ struct blame_entry *e = ent->next;
+ struct origin *o = ent->suspect;
+
+ /* Linkages might not be ordered, so we only free pointers we haven't
+ * seen before. */
+ free_if_not_already_freed(&already, o);
+
+ git__free(ent);
+ ent = e;
+ }
+
+ git_vector_free(&already);
+ git_commit_free(sb.final);
+ git_blob_free(blob);
+ return error;
+}
+
+static int load_blob(git_blame *blame, git_repository *repo, git_oid *commit_id, const char *path)
+{
+ int retval = -1;
+ git_commit *commit = NULL;
+ git_tree *tree = NULL;
+ git_tree_entry *tree_entry = NULL;
+ git_object *obj = NULL;
+
+ if (((retval = git_commit_lookup(&commit, repo, commit_id)) < 0) ||
+ ((retval = git_object_lookup_bypath(&obj, (git_object*)commit, path, GIT_OBJ_BLOB)) < 0) ||
+ ((retval = git_object_type(obj)) != GIT_OBJ_BLOB))
+ goto cleanup;
+ blame->final_blob = (git_blob*)obj;
+
+cleanup:
+ git_tree_entry_free(tree_entry);
+ git_tree_free(tree);
+ git_commit_free(commit);
+ return retval;
+}
+
+/*******************************************************************************
+ * File blaming
+ ******************************************************************************/
+
+int git_blame_file(
+ git_blame **out,
+ git_repository *repo,
+ const char *path,
+ git_blame_options *options)
+{
+ int error = -1;
+ git_blame_options normOptions = GIT_BLAME_OPTIONS_INIT;
+ git_blame *blame = NULL;
+
+ assert(out && repo && path);
+ normalize_options(&normOptions, options, repo);
+
+ blame = git_blame__alloc(repo, normOptions, path);
+ GITERR_CHECK_ALLOC(blame);
+
+ if ((error = load_blob(blame, repo, &normOptions.newest_commit, path)) < 0)
+ goto on_error;
+
+ if ((error = walk_and_mark(blame)) < 0)
+ goto on_error;
+
+ *out = blame;
+ return 0;
+
+on_error:
+ git_blame_free(blame);
+ return error;
+}
+
+/*******************************************************************************
+ * Buffer blaming
+ *******************************************************************************/
+
+static bool hunk_is_bufferblame(git_blame_hunk *hunk)
+{
+ return git_oid_iszero(&hunk->final_commit_id);
+}
+
+static int buffer_hunk_cb(
+ const git_diff_delta *delta,
+ const git_diff_range *range,
+ const char *header,
+ size_t header_len,
+ void *payload)
+{
+ git_blame *blame = (git_blame*)payload;
+ size_t wedge_line;
+
+ GIT_UNUSED(delta);
+ GIT_UNUSED(header);
+ GIT_UNUSED(header_len);
+
+ wedge_line = (range->old_lines == 0) ? range->new_start : range->old_start;
+ blame->current_diff_line = wedge_line;
+
+ /* If this hunk doesn't start between existing hunks, split a hunk up so it does */
+ blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byline(blame, wedge_line);
+ if (!hunk_starts_at_or_after_line(blame->current_hunk, wedge_line)){
+ blame->current_hunk = split_hunk_in_vector(&blame->hunks, blame->current_hunk,
+ wedge_line - blame->current_hunk->orig_start_line_number, true);
+ }
+
+ return 0;
+}
+
+static int ptrs_equal_cmp(const void *a, const void *b) { return a<b ? -1 : a>b ? 1 : 0; }
+static int buffer_line_cb(
+ const git_diff_delta *delta,
+ const git_diff_range *range,
+ char line_origin,
+ const char *content,
+ size_t content_len,
+ void *payload)
+{
+ git_blame *blame = (git_blame*)payload;
+
+ GIT_UNUSED(delta);
+ GIT_UNUSED(range);
+ GIT_UNUSED(content);
+ GIT_UNUSED(content_len);
+
+#ifdef DO_DEBUG
+ {
+ char *str = git__substrdup(content, content_len);
+ git__free(str);
+ }
+#endif
+
+ if (line_origin == GIT_DIFF_LINE_ADDITION) {
+ if (hunk_is_bufferblame(blame->current_hunk) &&
+ hunk_ends_at_or_before_line(blame->current_hunk, blame->current_diff_line)) {
+ /* Append to the current buffer-blame hunk */
+ blame->current_hunk->lines_in_hunk++;
+ shift_hunks_by_final(&blame->hunks, blame->current_diff_line+1, 1);
+ } else {
+ /* Create a new buffer-blame hunk with this line */
+ shift_hunks_by_final(&blame->hunks, blame->current_diff_line, 1);
+ blame->current_hunk = new_hunk(blame->current_diff_line, 1, 0, blame->path);
+ git_vector_insert_sorted(&blame->hunks, blame->current_hunk, NULL);
+ }
+ blame->current_diff_line++;
+ }
+
+ if (line_origin == GIT_DIFF_LINE_DELETION) {
+ /* Trim the line from the current hunk; remove it if it's now empty */
+ size_t shift_base = blame->current_diff_line + blame->current_hunk->lines_in_hunk+1;
+
+ if (--(blame->current_hunk->lines_in_hunk) == 0) {
+ size_t i;
+ shift_base--;
+ if (!git_vector_search2(&i, &blame->hunks, ptrs_equal_cmp, blame->current_hunk)) {
+ git_vector_remove(&blame->hunks, i);
+ free_hunk(blame->current_hunk);
+ blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byindex(blame, i);
+ }
+ }
+ shift_hunks_by_final(&blame->hunks, shift_base, -1);
+ }
+ return 0;
+}
+
+int git_blame_buffer(
+ git_blame **out,
+ git_blame *reference,
+ const char *buffer,
+ size_t buffer_len)
+{
+ git_blame *blame;
+ git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
+ size_t i;
+ git_blame_hunk *hunk;
+
+ diffopts.context_lines = 0;
+
+ assert(out && reference && buffer && buffer_len);
+
+ blame = git_blame__alloc(reference->repository, reference->options, reference->path);
+
+ /* Duplicate all of the hunk structures in the reference blame */
+ git_vector_foreach(&reference->hunks, i, hunk) {
+ git_vector_insert(&blame->hunks, dup_hunk(hunk));
+ }
+
+ /* Diff to the reference blob */
+ git_diff_blob_to_buffer(reference->final_blob, blame->path,
+ buffer, buffer_len, blame->path,
+ &diffopts, NULL, buffer_hunk_cb, buffer_line_cb, blame);
+
+ *out = blame;
+ return 0;
+}
diff --git a/src/blame.h b/src/blame.h
new file mode 100644
index 000000000..260d5b5a1
--- /dev/null
+++ b/src/blame.h
@@ -0,0 +1,37 @@
+#ifndef INCLUDE_blame_h__
+#define INCLUDE_blame_h__
+
+#include "git2/blame.h"
+#include "common.h"
+#include "vector.h"
+#include "diff.h"
+#include "array.h"
+#include "git2/oid.h"
+
+struct git_blame {
+ const char *path;
+ git_repository *repository;
+ git_blame_options options;
+
+ git_vector hunks;
+ git_vector unclaimed_hunks;
+ git_vector paths;
+
+ git_blob *final_blob;
+ size_t num_lines;
+
+ git_oid current_commit;
+ git_oid parent_commit;
+ size_t current_diff_line;
+ size_t current_blame_line;
+ git_blame_hunk *current_hunk;
+};
+
+git_blame *git_blame__alloc(
+ git_repository *repo,
+ git_blame_options opts,
+ const char *path);
+
+git_blame_hunk *git_blame__alloc_hunk();
+
+#endif
diff --git a/src/blame_git.c b/src/blame_git.c
new file mode 100644
index 000000000..0dae2d9c1
--- /dev/null
+++ b/src/blame_git.c
@@ -0,0 +1,591 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+
+#include "blame_git.h"
+#include "commit.h"
+
+/*
+ * Locate an existing origin or create a new one.
+ */
+int get_origin(struct origin **out, struct scoreboard *sb, git_commit *commit, const char *path)
+{
+ struct blame_entry *e;
+
+ for (e = sb->ent; e; e = e->next)
+ if (e->suspect->commit == commit && !strcmp(e->suspect->path, path))
+ *out = origin_incref(e->suspect);
+ return make_origin(out, commit, path);
+}
+
+/*
+ * Given a commit and a path in it, create a new origin structure.
+ * The callers that add blame to the scoreboard should use
+ * get_origin() to obtain shared, refcounted copy instead of calling
+ * this function directly.
+ */
+int make_origin(struct origin **out, git_commit *commit, const char *path)
+{
+ int error = 0;
+ struct origin *o;
+ o = git__calloc(1, sizeof(*o) + strlen(path) + 1);
+ GITERR_CHECK_ALLOC(o);
+ o->commit = commit;
+ o->refcnt = 1;
+ strcpy(o->path, path);
+
+ if (!(error = git_object_lookup_bypath((git_object**)&o->blob, (git_object*)commit,
+ path, GIT_OBJ_BLOB)))
+ *out = o;
+ else
+ origin_decref(o);
+ return error;
+}
+
+struct blame_chunk_cb_data {
+ struct scoreboard *sb;
+ struct origin *target;
+ struct origin *parent;
+ long plno;
+ long tlno;
+};
+
+static bool same_suspect(struct origin *a, struct origin *b)
+{
+ if (a == b)
+ return true;
+ if (git_oid_cmp(git_commit_id(a->commit), git_commit_id(b->commit)))
+ return false;
+ return 0 == strcmp(a->path, b->path);
+}
+
+/* Find the line number of the last line the target is suspected for */
+static int find_last_in_target(struct scoreboard *sb, struct origin *target)
+{
+ struct blame_entry *e;
+ int last_in_target = -1;
+
+ for (e=sb->ent; e; e=e->next) {
+ if (e->guilty || !same_suspect(e->suspect, target))
+ continue;
+ if (last_in_target < e->s_lno + e->num_lines)
+ last_in_target = e->s_lno + e->num_lines;
+ }
+ return last_in_target;
+}
+
+/*
+ * It is known that lines between tlno to same came from parent, and e
+ * has an overlap with that range. it also is known that parent's
+ * line plno corresponds to e's line tlno.
+ *
+ * <---- e ----->
+ * <------>
+ * <------------>
+ * <------------>
+ * <------------------>
+ *
+ * Split e into potentially three parts; before this chunk, the chunk
+ * to be blamed for the parent, and after that portion.
+ */
+static void split_overlap(struct blame_entry *split, struct blame_entry *e,
+ int tlno, int plno, int same, struct origin *parent)
+{
+ int chunk_end_lno;
+
+ if (e->s_lno < tlno) {
+ /* there is a pre-chunk part not blamed on the parent */
+ split[0].suspect = origin_incref(e->suspect);
+ split[0].lno = e->lno;
+ split[0].s_lno = e->s_lno;
+ split[0].num_lines = tlno - e->s_lno;
+ split[1].lno = e->lno + tlno - e->s_lno;
+ split[1].s_lno = plno;
+ } else {
+ split[1].lno = e->lno;
+ split[1].s_lno = plno + (e->s_lno - tlno);
+ }
+
+ if (same < e->s_lno + e->num_lines) {
+ /* there is a post-chunk part not blamed on parent */
+ split[2].suspect = origin_incref(e->suspect);
+ split[2].lno = e->lno + (same - e->s_lno);
+ split[2].s_lno = e->s_lno + (same - e->s_lno);
+ split[2].num_lines = e->s_lno + e->num_lines - same;
+ chunk_end_lno = split[2].lno;
+ } else {
+ chunk_end_lno = e->lno + e->num_lines;
+ }
+ split[1].num_lines = chunk_end_lno - split[1].lno;
+
+ /*
+ * if it turns out there is nothing to blame the parent for, forget about
+ * the splitting. !split[1].suspect signals this.
+ */
+ if (split[1].num_lines < 1)
+ return;
+ split[1].suspect = origin_incref(parent);
+}
+
+/*
+ * Link in a new blame entry to the scoreboard. Entries that cover the same
+ * line range have been removed from the scoreboard previously.
+ */
+static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e)
+{
+ struct blame_entry *ent, *prev = NULL;
+
+ origin_incref(e->suspect);
+
+ for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next)
+ prev = ent;
+
+ /* prev, if not NULL, is the last one that is below e */
+ e->prev = prev;
+ if (prev) {
+ e->next = prev->next;
+ prev->next = e;
+ } else {
+ e->next = sb->ent;
+ sb->ent = e;
+ }
+ if (e->next)
+ e->next->prev = e;
+}
+
+/*
+ * src typically is on-stack; we want to copy the information in it to
+ * a malloced blame_entry that is already on the linked list of the scoreboard.
+ * The origin of dst loses a refcnt while the origin of src gains one.
+ */
+static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
+{
+ struct blame_entry *p, *n;
+
+ p = dst->prev;
+ n = dst->next;
+ origin_incref(src->suspect);
+ origin_decref(dst->suspect);
+ memcpy(dst, src, sizeof(*src));
+ dst->prev = p;
+ dst->next = n;
+ dst->score = 0;
+}
+
+/*
+ * split_overlap() divided an existing blame e into up to three parts in split.
+ * Adjust the linked list of blames in the scoreboard to reflect the split.
+ */
+static void split_blame(struct scoreboard *sb, struct blame_entry *split, struct blame_entry *e)
+{
+ struct blame_entry *new_entry;
+
+ if (split[0].suspect && split[2].suspect) {
+ /* The first part (reuse storage for the existing entry e */
+ dup_entry(e, &split[0]);
+
+ /* The last part -- me */
+ new_entry = git__malloc(sizeof(*new_entry));
+ memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
+ add_blame_entry(sb, new_entry);
+
+ /* ... and the middle part -- parent */
+ new_entry = git__malloc(sizeof(*new_entry));
+ memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
+ add_blame_entry(sb, new_entry);
+ } else if (!split[0].suspect && !split[2].suspect) {
+ /*
+ * The parent covers the entire area; reuse storage for e and replace it
+ * with the parent
+ */
+ dup_entry(e, &split[1]);
+ } else if (split[0].suspect) {
+ /* me and then parent */
+ dup_entry(e, &split[0]);
+ new_entry = git__malloc(sizeof(*new_entry));
+ memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
+ add_blame_entry(sb, new_entry);
+ } else {
+ /* parent and then me */
+ dup_entry(e, &split[1]);
+ new_entry = git__malloc(sizeof(*new_entry));
+ memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
+ add_blame_entry(sb, new_entry);
+ }
+}
+
+/*
+ * After splitting the blame, the origins used by the on-stack blame_entry
+ * should lose one refcnt each.
+ */
+static void decref_split(struct blame_entry *split)
+{
+ int i;
+ for (i=0; i<3; i++)
+ origin_decref(split[i].suspect);
+}
+
+/*
+ * Helper for blame_chunk(). blame_entry e is known to overlap with the patch
+ * hunk; split it and pass blame to the parent.
+ */
+static void blame_overlap(struct scoreboard *sb, struct blame_entry *e, int tlno, int plno, int same, struct origin *parent)
+{
+ struct blame_entry split[3] = {{0}};
+
+ split_overlap(split, e, tlno, plno, same, parent);
+ if (split[1].suspect)
+ split_blame(sb, split, e);
+ decref_split(split);
+}
+
+/*
+ * Process one hunk from the patch between the current suspect for blame_entry
+ * e and its parent. Find and split the overlap, and pass blame to the
+ * overlapping part to the parent.
+ */
+static void blame_chunk(struct scoreboard *sb, int tlno, int plno, int same, struct origin *target, struct origin *parent)
+{
+ struct blame_entry *e;
+
+ for (e = sb->ent; e; e = e->next) {
+ if (e->guilty || !same_suspect(e->suspect, target))
+ continue;
+ if (same <= e->s_lno)
+ continue;
+ if (tlno < e->s_lno + e->num_lines) {
+ blame_overlap(sb, e, tlno, plno, same, parent);
+ }
+ }
+}
+
+static void blame_chunk_cb(long start_a, long count_a, long start_b, long count_b, void *data)
+{
+ struct blame_chunk_cb_data *d = data;
+ blame_chunk(d->sb, d->tlno, d->plno, start_b, d->target, d->parent);
+ d->plno = start_a + count_a;
+ d->tlno = start_b + count_b;
+}
+
+static int my_emit_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, xdemitconf_t const *xecfg)
+{
+ xdchange_t *xch = xscr;
+ GIT_UNUSED(xe);
+ GIT_UNUSED(xecfg);
+ while (xch) {
+ blame_chunk_cb(xch->i1, xch->chg1, xch->i2, xch->chg2, ecb->priv);
+ xch = xch->next;
+ }
+ return 0;
+}
+
+static void trim_common_tail(mmfile_t *a, mmfile_t *b, long ctx)
+{
+ const int blk = 1024;
+ long trimmed = 0, recovered = 0;
+ char *ap = a->ptr + a->size;
+ char *bp = b->ptr + b->size;
+ long smaller = (a->size < b->size) ? a->size : b->size;
+
+ if (ctx)
+ return;
+
+ while (blk + trimmed <= smaller && !memcmp(ap - blk, bp - blk, blk)) {
+ trimmed += blk;
+ ap -= blk;
+ bp -= blk;
+ }
+
+ while (recovered < trimmed)
+ if (ap[recovered++] == '\n')
+ break;
+ a->size -= trimmed - recovered;
+ b->size -= trimmed - recovered;
+}
+
+static int xdi_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdemitconf_t const *xecfg, xdemitcb_t *xecb)
+{
+ mmfile_t a = *mf1;
+ mmfile_t b = *mf2;
+
+ trim_common_tail(&a, &b, xecfg->ctxlen);
+
+ return xdl_diff(&a, &b, xpp, xecfg, xecb);
+}
+
+
+static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b, void *cb_data)
+{
+ xpparam_t xpp = {0};
+ xdemitconf_t xecfg = {0};
+ xdemitcb_t ecb = {0};
+
+ xecfg.emit_func = (void(*)(void))my_emit_func;
+ ecb.priv = cb_data;
+ return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb);
+}
+
+static void fill_origin_blob(struct origin *o, mmfile_t *file)
+{
+ memset(file, 0, sizeof(*file));
+ if (o->blob) {
+ file->ptr = (char*)git_blob_rawcontent(o->blob);
+ file->size = (size_t)git_blob_rawsize(o->blob);
+ }
+}
+
+static int pass_blame_to_parent(struct scoreboard *sb,
+ struct origin *target,
+ struct origin *parent)
+{
+ int last_in_target;
+ mmfile_t file_p, file_o;
+ struct blame_chunk_cb_data d = { sb, target, parent, 0, 0 };
+
+ last_in_target = find_last_in_target(sb, target);
+ if (last_in_target < 0)
+ return 1; /* nothing remains for this target */
+
+ fill_origin_blob(parent, &file_p);
+ fill_origin_blob(target, &file_o);
+
+ diff_hunks(&file_p, &file_o, &d);
+ /* The reset (i.e. anything after tlno) are the same as the parent */
+ blame_chunk(sb, d.tlno, d.plno, last_in_target, target, parent);
+
+ return 0;
+}
+
+static int paths_on_dup(void **old, void *new)
+{
+ GIT_UNUSED(old);
+ git__free(new);
+ return -1;
+}
+static struct origin* find_origin(struct scoreboard *sb, git_commit *parent,
+ struct origin *origin)
+{
+ struct origin *porigin = NULL;
+ git_diff_list *difflist = NULL;
+ git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
+ git_tree *otree=NULL, *ptree=NULL;
+
+ /* Get the trees from this commit and its parent */
+ // TODO: check errors
+ git_commit_tree(&otree, origin->commit);
+ git_commit_tree(&ptree, parent);
+
+ /* Configure the diff */
+ diffopts.context_lines = 0;
+ diffopts.flags = GIT_DIFF_SKIP_BINARY_CHECK;
+
+ /* Check to see if files we're interested have changed */
+ diffopts.pathspec.count = sb->blame->paths.length;
+ diffopts.pathspec.strings = (char**)sb->blame->paths.contents;
+ // TODO: check error
+ git_diff_tree_to_tree(&difflist, sb->blame->repository, ptree, otree, &diffopts);
+
+ if (!git_diff_num_deltas(difflist)) {
+ /* No changes; copy data */
+ // TODO: check error
+ get_origin(&porigin, sb, parent, origin->path);
+ } else {
+ git_diff_find_options findopts = GIT_DIFF_FIND_OPTIONS_INIT;
+ int i;
+
+ /* Generate a full diff between the two trees */
+ git_diff_list_free(difflist);
+ diffopts.pathspec.count = 0;
+ // TODO: check error
+ git_diff_tree_to_tree(&difflist, sb->blame->repository, ptree, otree, &diffopts);
+
+ /* Let diff find renames */
+ findopts.flags = GIT_DIFF_FIND_RENAMES;
+ // TODO: check error
+ git_diff_find_similar(difflist, &findopts);
+
+ /* Find one that matches */
+ for (i=0; i<(int)git_diff_num_deltas(difflist); i++) {
+ const git_diff_delta *delta;
+ git_diff_get_patch(NULL, &delta, difflist, i);
+ if (git_vector_bsearch(NULL, &sb->blame->paths, delta->new_file.path) != 0)
+ continue;
+
+ git_vector_insert_sorted(&sb->blame->paths, (void*)git__strdup(delta->old_file.path), paths_on_dup);
+ // TODO: check error
+ make_origin(&porigin, parent, delta->old_file.path);
+ }
+ }
+
+ git_diff_list_free(difflist);
+ git_tree_free(otree);
+ git_tree_free(ptree);
+ return porigin;
+
+}
+
+/*
+ * The blobs of origin and porigin exactly match, so everything origin is
+ * suspected for can be blamed on the parent.
+ */
+static void pass_whole_blame(struct scoreboard *sb,
+ struct origin *origin, struct origin *porigin)
+{
+ struct blame_entry *e;
+
+ if (!porigin->blob)
+ git_object_lookup((git_object**)&porigin->blob, sb->blame->repository, git_blob_id(origin->blob),
+ GIT_OBJ_BLOB);
+ for (e=sb->ent; e; e=e->next) {
+ if (!same_suspect(e->suspect, origin))
+ continue;
+ origin_incref(porigin);
+ origin_decref(e->suspect);
+ e->suspect = porigin;
+ }
+}
+
+static void pass_blame(struct scoreboard *sb, struct origin *origin, uint32_t opt)
+{
+ git_commit *commit = origin->commit;
+ int i, num_sg;
+ struct origin *sg_buf[16];
+ struct origin *porigin, **sg_origin = sg_buf;
+
+ GIT_UNUSED(opt);
+
+ num_sg = git_commit_parentcount(commit);
+ if (!num_sg)
+ goto finish;
+ else if (num_sg < (int)ARRAY_SIZE(sg_buf))
+ memset(sg_buf, 0, sizeof(sg_buf));
+ else
+ sg_origin = git__calloc(num_sg, sizeof(*sg_origin));
+
+ for (i=0; i<num_sg; i++) {
+ git_commit *p;
+ int j, same;
+
+ if (sg_origin[i])
+ continue;
+
+ // TODO: check error
+ git_commit_parent(&p, origin->commit, i);
+ porigin = find_origin(sb, p, origin);
+
+ if (!porigin)
+ continue;
+ if (porigin->blob && origin->blob &&
+ !git_oid_cmp(git_blob_id(porigin->blob), git_blob_id(origin->blob))) {
+ pass_whole_blame(sb, origin, porigin);
+ origin_decref(porigin);
+ goto finish;
+ }
+ for (j = same = 0; j<i; j++)
+ if (sg_origin[j] &&
+ !git_oid_cmp(git_blob_id(sg_origin[j]->blob), git_blob_id(porigin->blob))) {
+ same = 1;
+ break;
+ }
+ if (!same)
+ sg_origin[i] = porigin;
+ else
+ origin_decref(porigin);
+ }
+
+ for (i=0; i<num_sg; i++) {
+ struct origin *porigin = sg_origin[i];
+ if (!porigin)
+ continue;
+ if (!origin->previous) {
+ origin_incref(porigin);
+ origin->previous = porigin;
+ }
+ if (pass_blame_to_parent(sb, origin, porigin))
+ goto finish;
+ }
+
+ /* TODO: optionally find moves in parents' files */
+
+ /* TODO: optionally find copies in parents' files */
+
+finish:
+ for (i=0; i<num_sg; i++)
+ if (sg_origin[i])
+ origin_decref(sg_origin[i]);
+ if (sg_origin != sg_buf)
+ git__free(sg_origin);
+ return;
+}
+
+/*
+ * Origin is refcounted and usually we keep the blob contents to be
+ * reused.
+ */
+struct origin *origin_incref(struct origin *o)
+{
+ if (o)
+ o->refcnt++;
+ return o;
+}
+
+void origin_decref(struct origin *o)
+{
+ if (o && --o->refcnt <= 0) {
+ if (o->previous)
+ origin_decref(o->previous);
+ git_blob_free(o->blob);
+ git_commit_free(o->commit);
+ git__free(o);
+ }
+}
+
+void assign_blame(struct scoreboard *sb, uint32_t opt)
+{
+ while (true) {
+ struct blame_entry *ent;
+ struct origin *suspect = NULL;
+
+ /* Find a suspect to break down */
+ for (ent = sb->ent; !suspect && ent; ent = ent->next)
+ if (!ent->guilty)
+ suspect = ent->suspect;
+ if (!suspect)
+ return; /* all done */
+
+ /* We'll use this suspect later in the loop, so hold on to it for now. */
+ origin_incref(suspect);
+ pass_blame(sb, suspect, opt);
+
+ /* Take responsibility for the remaining entries */
+ for (ent = sb->ent; ent; ent = ent->next)
+ if (same_suspect(ent->suspect, suspect))
+ ent->guilty = 1;
+ origin_decref(suspect);
+ }
+}
+
+void coalesce(struct scoreboard *sb)
+{
+ struct blame_entry *ent, *next;
+
+ for (ent=sb->ent; ent && (next = ent->next); ent = next) {
+ if (same_suspect(ent->suspect, next->suspect) &&
+ ent->guilty == next->guilty &&
+ ent->s_lno + ent->num_lines == next->s_lno)
+ {
+ ent->num_lines += next->num_lines;
+ ent->next = next->next;
+ if (ent->next)
+ ent->next->prev = ent;
+ origin_decref(next->suspect);
+ git__free(next);
+ ent->score = 0;
+ next = ent; /* again */
+ }
+ }
+}
+
diff --git a/src/blame_git.h b/src/blame_git.h
new file mode 100644
index 000000000..89ebc3228
--- /dev/null
+++ b/src/blame_git.h
@@ -0,0 +1,93 @@
+
+#ifndef INCLUDE_blame_git__
+#define INCLUDE_blame_git__
+
+#include "git2.h"
+#include "xdiff/xinclude.h"
+#include "blame.h"
+
+/*
+ * One blob in a commit that is being suspected
+ */
+struct origin {
+ int refcnt;
+ struct origin *previous;
+ git_commit *commit;
+ git_blob *blob;
+ char path[];
+};
+
+/*
+ * Each group of lines is described by a blame_entry; it can be split
+ * as we pass blame to the parents. They form a linked list in the
+ * scoreboard structure, sorted by the target line number.
+ */
+struct blame_entry {
+ struct blame_entry *prev;
+ struct blame_entry *next;
+
+ /* the first line of this group in the final image;
+ * internally all line numbers are 0 based.
+ */
+ int lno;
+
+ /* how many lines this group has */
+ int num_lines;
+
+ /* the commit that introduced this group into the final image */
+ struct origin *suspect;
+
+ /* true if the suspect is truly guilty; false while we have not
+ * checked if the group came from one of its parents.
+ */
+ char guilty;
+
+ /* true if the entry has been scanned for copies in the current parent
+ */
+ char scanned;
+
+ /* the line number of the first line of this group in the
+ * suspect's file; internally all line numbers are 0 based.
+ */
+ int s_lno;
+
+ /* how significant this entry is -- cached to avoid
+ * scanning the lines over and over.
+ */
+ unsigned score;
+};
+
+/*
+ * The current state of the blame assignment.
+ */
+struct scoreboard {
+ /* the final commit (i.e. where we started digging from) */
+ git_commit *final;
+ const char *path;
+
+ /*
+ * The contents in the final image.
+ * Used by many functions to obtain contents of the nth line,
+ * indexed with scoreboard.lineno[blame_entry.lno].
+ */
+ const char *final_buf;
+ git_off_t final_buf_size;
+
+ /* linked list of blames */
+ struct blame_entry *ent;
+
+ /* look-up a line in the final buffer */
+ int num_lines;
+
+ git_blame *blame;
+};
+
+
+int get_origin(struct origin **out, struct scoreboard *sb, git_commit *commit, const char *path);
+int make_origin(struct origin **out, git_commit *commit, const char *path);
+struct origin *origin_incref(struct origin *o);
+void origin_decref(struct origin *o);
+void assign_blame(struct scoreboard *sb, uint32_t flags);
+void coalesce(struct scoreboard *sb);
+
+#endif
diff --git a/src/object.c b/src/object.c
index 9b8ccdd3e..53666ffe4 100644
--- a/src/object.c
+++ b/src/object.c
@@ -364,3 +364,39 @@ int git_object_dup(git_object **dest, git_object *source)
*dest = source;
return 0;
}
+
+int git_object_lookup_bypath(
+ git_object **out,
+ const git_object *treeish,
+ const char *path,
+ git_otype type)
+{
+ int error = -1;
+ git_tree *tree = NULL;
+ git_tree_entry *entry = NULL;
+ git_object *tmpobj = NULL;
+
+ assert(out && treeish && path);
+
+ if (((error = git_object_peel((git_object**)&tree, treeish, GIT_OBJ_TREE)) < 0) ||
+ ((error = git_tree_entry_bypath(&entry, tree, path)) < 0) ||
+ ((error = git_tree_entry_to_object(&tmpobj, git_object_owner(treeish), entry)) < 0))
+ {
+ goto cleanup;
+ }
+
+ if (type == GIT_OBJ_ANY || git_object_type(tmpobj) == type) {
+ *out = tmpobj;
+ } else {
+ giterr_set(GITERR_OBJECT,
+ "object at path '%s' is not of the asked-for type %d",
+ path, type);
+ error = GIT_EINVALIDSPEC;
+ git_object_free(tmpobj);
+ }
+
+cleanup:
+ git_tree_entry_free(entry);
+ git_tree_free(tree);
+ return error;
+}