diff options
author | Ben Straub <bs@github.com> | 2013-09-16 16:20:38 -0700 |
---|---|---|
committer | Ben Straub <bs@github.com> | 2013-09-16 16:23:50 -0700 |
commit | ceab4e260637dc8374d0348ee9a078ab1c16e4ad (patch) | |
tree | 25124d6ea0528f5f3355046f56aaac43ba78ec4d /src | |
parent | 549931679a77b280eb1f36afeda8930eb31d70f7 (diff) | |
download | libgit2-ceab4e260637dc8374d0348ee9a078ab1c16e4ad.tar.gz |
Port blame from git.git
Diffstat (limited to 'src')
-rw-r--r-- | src/blame.c | 492 | ||||
-rw-r--r-- | src/blame.h | 37 | ||||
-rw-r--r-- | src/blame_git.c | 591 | ||||
-rw-r--r-- | src/blame_git.h | 93 | ||||
-rw-r--r-- | src/object.c | 36 |
5 files changed, 1249 insertions, 0 deletions
diff --git a/src/blame.c b/src/blame.c new file mode 100644 index 000000000..30d65f02c --- /dev/null +++ b/src/blame.c @@ -0,0 +1,492 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#include "blame.h" +#include "git2/commit.h" +#include "git2/revparse.h" +#include "git2/revwalk.h" +#include "git2/tree.h" +#include "git2/diff.h" +#include "git2/blob.h" +#include "util.h" +#include "repository.h" +#include "blame_git.h" + + +static int hunk_search_cmp_helper(const void *key, size_t start_line, size_t num_lines) +{ + uint32_t lineno = *(size_t*)key; + if (lineno < start_line) + return -1; + if (lineno >= ((uint32_t)start_line + num_lines)) + return 1; + return 0; +} +static int hunk_byfinalline_search_cmp(const void *key, const void *entry) +{ + git_blame_hunk *hunk = (git_blame_hunk*)entry; + return hunk_search_cmp_helper(key, hunk->final_start_line_number, hunk->lines_in_hunk); +} +static int hunk_sort_cmp_by_start_line(const void *_a, const void *_b) +{ + git_blame_hunk *a = (git_blame_hunk*)_a, + *b = (git_blame_hunk*)_b; + + return a->final_start_line_number - b->final_start_line_number; +} + +static bool hunk_ends_at_or_before_line(git_blame_hunk *hunk, size_t line) +{ + return line >= (size_t)(hunk->final_start_line_number + hunk->lines_in_hunk - 1); +} + +static bool hunk_starts_at_or_after_line(git_blame_hunk *hunk, size_t line) +{ + return line <= hunk->final_start_line_number; +} + +static git_blame_hunk* new_hunk(uint16_t start, uint16_t lines, uint16_t orig_start, const char *path) +{ + git_blame_hunk *hunk = git__calloc(1, sizeof(git_blame_hunk)); + if (!hunk) return NULL; + + hunk->lines_in_hunk = lines; + hunk->final_start_line_number = start; + hunk->orig_start_line_number = orig_start; + hunk->orig_path = path ? git__strdup(path) : NULL; + + return hunk; +} + +git_blame_hunk* git_blame__alloc_hunk() +{ + return new_hunk(0,0,0,NULL); +} + +static git_blame_hunk* dup_hunk(git_blame_hunk *hunk) +{ + git_blame_hunk *newhunk = new_hunk(hunk->final_start_line_number, hunk->lines_in_hunk, hunk->orig_start_line_number, hunk->orig_path); + git_oid_cpy(&newhunk->orig_commit_id, &hunk->orig_commit_id); + git_oid_cpy(&newhunk->final_commit_id, &hunk->final_commit_id); + return newhunk; +} + +static void free_hunk(git_blame_hunk *hunk) +{ + git__free((void*)hunk->orig_path); + git__free(hunk); +} + +/* Starting with the hunk that includes start_line, shift all following hunks' + * final_start_line by shift_by lines */ +static void shift_hunks_by_final(git_vector *v, size_t start_line, int shift_by) +{ + size_t i; + + if (!git_vector_bsearch2( &i, v, hunk_byfinalline_search_cmp, &start_line)) { + for (; i < v->length; i++) { + git_blame_hunk *hunk = (git_blame_hunk*)v->contents[i]; + hunk->final_start_line_number += shift_by; + } + } +} +static int paths_cmp(const void *a, const void *b) { return git__strcmp((char*)a, (char*)b); } +git_blame* git_blame__alloc( + git_repository *repo, + git_blame_options opts, + const char *path) +{ + git_blame *gbr = (git_blame*)git__calloc(1, sizeof(git_blame)); + if (!gbr) { + giterr_set_oom(); + return NULL; + } + git_vector_init(&gbr->hunks, 8, hunk_sort_cmp_by_start_line); + git_vector_init(&gbr->unclaimed_hunks, 8, hunk_sort_cmp_by_start_line); + git_vector_init(&gbr->paths, 8, paths_cmp); + gbr->repository = repo; + gbr->options = opts; + gbr->path = git__strdup(path); + git_vector_insert(&gbr->paths, git__strdup(path)); + gbr->final_blob = NULL; + return gbr; +} + +void git_blame_free(git_blame *blame) +{ + size_t i; + git_blame_hunk *hunk; + char *path; + + if (!blame) return; + + git_vector_foreach(&blame->hunks, i, hunk) + free_hunk(hunk); + git_vector_free(&blame->hunks); + + git_vector_foreach(&blame->unclaimed_hunks, i, hunk) + free_hunk(hunk); + git_vector_free(&blame->unclaimed_hunks); + + git_vector_foreach(&blame->paths, i, path) + git__free(path); + git_vector_free(&blame->paths); + + git__free((void*)blame->path); + git_blob_free(blame->final_blob); + git__free(blame); +} + +uint32_t git_blame_get_hunk_count(git_blame *blame) +{ + assert(blame); + return blame->hunks.length; +} + +const git_blame_hunk *git_blame_get_hunk_byindex(git_blame *blame, uint32_t index) +{ + assert(blame); + return (git_blame_hunk*)git_vector_get(&blame->hunks, index); +} + +const git_blame_hunk *git_blame_get_hunk_byline(git_blame *blame, uint32_t lineno) +{ + size_t i; + assert(blame); + + if (!git_vector_bsearch2( &i, &blame->hunks, hunk_byfinalline_search_cmp, &lineno)) { + return git_blame_get_hunk_byindex(blame, i); + } + + return NULL; +} + +static void normalize_options( + git_blame_options *out, + const git_blame_options *in, + git_repository *repo) +{ + git_blame_options dummy = GIT_BLAME_OPTIONS_INIT; + if (!in) in = &dummy; + + memcpy(out, in, sizeof(git_blame_options)); + + /* No newest_commit => HEAD */ + if (git_oid_iszero(&out->newest_commit)) { + git_reference_name_to_id(&out->newest_commit, repo, "HEAD"); + } +} + +static git_blame_hunk *split_hunk_in_vector( + git_vector *vec, + git_blame_hunk *hunk, + size_t rel_line, + bool return_new) +{ + size_t new_line_count; + git_blame_hunk *nh; + + /* Don't split if already at a boundary */ + if (rel_line <= 0 || + rel_line >= hunk->lines_in_hunk) + { + return hunk; + } + + new_line_count = hunk->lines_in_hunk - rel_line; + nh = new_hunk(hunk->final_start_line_number+rel_line, new_line_count, + hunk->orig_start_line_number+rel_line, hunk->orig_path); + git_oid_cpy(&nh->final_commit_id, &hunk->final_commit_id); + git_oid_cpy(&nh->orig_commit_id, &hunk->orig_commit_id); + + /* Adjust hunk that was split */ + hunk->lines_in_hunk -= new_line_count; + git_vector_insert_sorted(vec, nh, NULL); + { + git_blame_hunk *ret = return_new ? nh : hunk; + return ret; + } +} + + +/* + * To allow quick access to the contents of nth line in the + * final image, prepare an index in the scoreboard. + */ +static int prepare_lines(struct scoreboard *sb) +{ + const char *buf = sb->final_buf; + git_off_t len = sb->final_buf_size; + int num = 0, incomplete = 0, bol = 1; + + if (len && buf[len-1] != '\n') + incomplete++; /* incomplete line at the end */ + while (len--) { + if (bol) { + bol = 0; + } + if (*buf++ == '\n') { + num++; + bol = 1; + } + } + sb->num_lines = num + incomplete; + return sb->num_lines; +} + +static git_blame_hunk* hunk_from_entry(struct blame_entry *e) +{ + git_blame_hunk *h = new_hunk( + e->lno+1, e->num_lines, e->s_lno+1, e->suspect->path); + git_oid_cpy(&h->final_commit_id, git_commit_id(e->suspect->commit)); + return h; +} + +static void free_if_not_already_freed(git_vector *already, struct origin *o) +{ + size_t i; + + if (!o) return; + if (!git_vector_search(&i, already, o)) + return; + + git_vector_insert(already, o); + free_if_not_already_freed(already, o->previous); + git_blob_free(o->blob); + git_commit_free(o->commit); + git__free(o); +} + +static int walk_and_mark(git_blame *blame) +{ + int error; + + struct scoreboard sb = {0}; + struct blame_entry *ent = NULL; + git_blob *blob = NULL; + struct origin *o; + git_vector already = GIT_VECTOR_INIT; + + if ((error = git_commit_lookup(&sb.final, blame->repository, &blame->options.newest_commit)) < 0 || + (error = git_object_lookup_bypath((git_object**)&blob, (git_object*)sb.final, blame->path, GIT_OBJ_BLOB)) < 0) + goto cleanup; + sb.final_buf = git_blob_rawcontent(blob); + sb.final_buf_size = git_blob_rawsize(blob); + if ((error = get_origin(&o, &sb, sb.final, blame->path)) < 0) + goto cleanup; + + ent = git__calloc(1, sizeof(*ent)); + ent->num_lines = prepare_lines(&sb); + ent->suspect = o; + sb.ent = ent; + sb.path = blame->path; + sb.blame = blame; + + assign_blame(&sb, blame->options.flags); + coalesce(&sb); + + for (ent = sb.ent; ent; ) { + git_vector_insert(&blame->hunks, hunk_from_entry(ent)); + ent = ent->next; + } + +cleanup: + for (ent = sb.ent; ent; ) { + struct blame_entry *e = ent->next; + struct origin *o = ent->suspect; + + /* Linkages might not be ordered, so we only free pointers we haven't + * seen before. */ + free_if_not_already_freed(&already, o); + + git__free(ent); + ent = e; + } + + git_vector_free(&already); + git_commit_free(sb.final); + git_blob_free(blob); + return error; +} + +static int load_blob(git_blame *blame, git_repository *repo, git_oid *commit_id, const char *path) +{ + int retval = -1; + git_commit *commit = NULL; + git_tree *tree = NULL; + git_tree_entry *tree_entry = NULL; + git_object *obj = NULL; + + if (((retval = git_commit_lookup(&commit, repo, commit_id)) < 0) || + ((retval = git_object_lookup_bypath(&obj, (git_object*)commit, path, GIT_OBJ_BLOB)) < 0) || + ((retval = git_object_type(obj)) != GIT_OBJ_BLOB)) + goto cleanup; + blame->final_blob = (git_blob*)obj; + +cleanup: + git_tree_entry_free(tree_entry); + git_tree_free(tree); + git_commit_free(commit); + return retval; +} + +/******************************************************************************* + * File blaming + ******************************************************************************/ + +int git_blame_file( + git_blame **out, + git_repository *repo, + const char *path, + git_blame_options *options) +{ + int error = -1; + git_blame_options normOptions = GIT_BLAME_OPTIONS_INIT; + git_blame *blame = NULL; + + assert(out && repo && path); + normalize_options(&normOptions, options, repo); + + blame = git_blame__alloc(repo, normOptions, path); + GITERR_CHECK_ALLOC(blame); + + if ((error = load_blob(blame, repo, &normOptions.newest_commit, path)) < 0) + goto on_error; + + if ((error = walk_and_mark(blame)) < 0) + goto on_error; + + *out = blame; + return 0; + +on_error: + git_blame_free(blame); + return error; +} + +/******************************************************************************* + * Buffer blaming + *******************************************************************************/ + +static bool hunk_is_bufferblame(git_blame_hunk *hunk) +{ + return git_oid_iszero(&hunk->final_commit_id); +} + +static int buffer_hunk_cb( + const git_diff_delta *delta, + const git_diff_range *range, + const char *header, + size_t header_len, + void *payload) +{ + git_blame *blame = (git_blame*)payload; + size_t wedge_line; + + GIT_UNUSED(delta); + GIT_UNUSED(header); + GIT_UNUSED(header_len); + + wedge_line = (range->old_lines == 0) ? range->new_start : range->old_start; + blame->current_diff_line = wedge_line; + + /* If this hunk doesn't start between existing hunks, split a hunk up so it does */ + blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byline(blame, wedge_line); + if (!hunk_starts_at_or_after_line(blame->current_hunk, wedge_line)){ + blame->current_hunk = split_hunk_in_vector(&blame->hunks, blame->current_hunk, + wedge_line - blame->current_hunk->orig_start_line_number, true); + } + + return 0; +} + +static int ptrs_equal_cmp(const void *a, const void *b) { return a<b ? -1 : a>b ? 1 : 0; } +static int buffer_line_cb( + const git_diff_delta *delta, + const git_diff_range *range, + char line_origin, + const char *content, + size_t content_len, + void *payload) +{ + git_blame *blame = (git_blame*)payload; + + GIT_UNUSED(delta); + GIT_UNUSED(range); + GIT_UNUSED(content); + GIT_UNUSED(content_len); + +#ifdef DO_DEBUG + { + char *str = git__substrdup(content, content_len); + git__free(str); + } +#endif + + if (line_origin == GIT_DIFF_LINE_ADDITION) { + if (hunk_is_bufferblame(blame->current_hunk) && + hunk_ends_at_or_before_line(blame->current_hunk, blame->current_diff_line)) { + /* Append to the current buffer-blame hunk */ + blame->current_hunk->lines_in_hunk++; + shift_hunks_by_final(&blame->hunks, blame->current_diff_line+1, 1); + } else { + /* Create a new buffer-blame hunk with this line */ + shift_hunks_by_final(&blame->hunks, blame->current_diff_line, 1); + blame->current_hunk = new_hunk(blame->current_diff_line, 1, 0, blame->path); + git_vector_insert_sorted(&blame->hunks, blame->current_hunk, NULL); + } + blame->current_diff_line++; + } + + if (line_origin == GIT_DIFF_LINE_DELETION) { + /* Trim the line from the current hunk; remove it if it's now empty */ + size_t shift_base = blame->current_diff_line + blame->current_hunk->lines_in_hunk+1; + + if (--(blame->current_hunk->lines_in_hunk) == 0) { + size_t i; + shift_base--; + if (!git_vector_search2(&i, &blame->hunks, ptrs_equal_cmp, blame->current_hunk)) { + git_vector_remove(&blame->hunks, i); + free_hunk(blame->current_hunk); + blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byindex(blame, i); + } + } + shift_hunks_by_final(&blame->hunks, shift_base, -1); + } + return 0; +} + +int git_blame_buffer( + git_blame **out, + git_blame *reference, + const char *buffer, + size_t buffer_len) +{ + git_blame *blame; + git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT; + size_t i; + git_blame_hunk *hunk; + + diffopts.context_lines = 0; + + assert(out && reference && buffer && buffer_len); + + blame = git_blame__alloc(reference->repository, reference->options, reference->path); + + /* Duplicate all of the hunk structures in the reference blame */ + git_vector_foreach(&reference->hunks, i, hunk) { + git_vector_insert(&blame->hunks, dup_hunk(hunk)); + } + + /* Diff to the reference blob */ + git_diff_blob_to_buffer(reference->final_blob, blame->path, + buffer, buffer_len, blame->path, + &diffopts, NULL, buffer_hunk_cb, buffer_line_cb, blame); + + *out = blame; + return 0; +} diff --git a/src/blame.h b/src/blame.h new file mode 100644 index 000000000..260d5b5a1 --- /dev/null +++ b/src/blame.h @@ -0,0 +1,37 @@ +#ifndef INCLUDE_blame_h__ +#define INCLUDE_blame_h__ + +#include "git2/blame.h" +#include "common.h" +#include "vector.h" +#include "diff.h" +#include "array.h" +#include "git2/oid.h" + +struct git_blame { + const char *path; + git_repository *repository; + git_blame_options options; + + git_vector hunks; + git_vector unclaimed_hunks; + git_vector paths; + + git_blob *final_blob; + size_t num_lines; + + git_oid current_commit; + git_oid parent_commit; + size_t current_diff_line; + size_t current_blame_line; + git_blame_hunk *current_hunk; +}; + +git_blame *git_blame__alloc( + git_repository *repo, + git_blame_options opts, + const char *path); + +git_blame_hunk *git_blame__alloc_hunk(); + +#endif diff --git a/src/blame_git.c b/src/blame_git.c new file mode 100644 index 000000000..0dae2d9c1 --- /dev/null +++ b/src/blame_git.c @@ -0,0 +1,591 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#include "blame_git.h" +#include "commit.h" + +/* + * Locate an existing origin or create a new one. + */ +int get_origin(struct origin **out, struct scoreboard *sb, git_commit *commit, const char *path) +{ + struct blame_entry *e; + + for (e = sb->ent; e; e = e->next) + if (e->suspect->commit == commit && !strcmp(e->suspect->path, path)) + *out = origin_incref(e->suspect); + return make_origin(out, commit, path); +} + +/* + * Given a commit and a path in it, create a new origin structure. + * The callers that add blame to the scoreboard should use + * get_origin() to obtain shared, refcounted copy instead of calling + * this function directly. + */ +int make_origin(struct origin **out, git_commit *commit, const char *path) +{ + int error = 0; + struct origin *o; + o = git__calloc(1, sizeof(*o) + strlen(path) + 1); + GITERR_CHECK_ALLOC(o); + o->commit = commit; + o->refcnt = 1; + strcpy(o->path, path); + + if (!(error = git_object_lookup_bypath((git_object**)&o->blob, (git_object*)commit, + path, GIT_OBJ_BLOB))) + *out = o; + else + origin_decref(o); + return error; +} + +struct blame_chunk_cb_data { + struct scoreboard *sb; + struct origin *target; + struct origin *parent; + long plno; + long tlno; +}; + +static bool same_suspect(struct origin *a, struct origin *b) +{ + if (a == b) + return true; + if (git_oid_cmp(git_commit_id(a->commit), git_commit_id(b->commit))) + return false; + return 0 == strcmp(a->path, b->path); +} + +/* Find the line number of the last line the target is suspected for */ +static int find_last_in_target(struct scoreboard *sb, struct origin *target) +{ + struct blame_entry *e; + int last_in_target = -1; + + for (e=sb->ent; e; e=e->next) { + if (e->guilty || !same_suspect(e->suspect, target)) + continue; + if (last_in_target < e->s_lno + e->num_lines) + last_in_target = e->s_lno + e->num_lines; + } + return last_in_target; +} + +/* + * It is known that lines between tlno to same came from parent, and e + * has an overlap with that range. it also is known that parent's + * line plno corresponds to e's line tlno. + * + * <---- e -----> + * <------> + * <------------> + * <------------> + * <------------------> + * + * Split e into potentially three parts; before this chunk, the chunk + * to be blamed for the parent, and after that portion. + */ +static void split_overlap(struct blame_entry *split, struct blame_entry *e, + int tlno, int plno, int same, struct origin *parent) +{ + int chunk_end_lno; + + if (e->s_lno < tlno) { + /* there is a pre-chunk part not blamed on the parent */ + split[0].suspect = origin_incref(e->suspect); + split[0].lno = e->lno; + split[0].s_lno = e->s_lno; + split[0].num_lines = tlno - e->s_lno; + split[1].lno = e->lno + tlno - e->s_lno; + split[1].s_lno = plno; + } else { + split[1].lno = e->lno; + split[1].s_lno = plno + (e->s_lno - tlno); + } + + if (same < e->s_lno + e->num_lines) { + /* there is a post-chunk part not blamed on parent */ + split[2].suspect = origin_incref(e->suspect); + split[2].lno = e->lno + (same - e->s_lno); + split[2].s_lno = e->s_lno + (same - e->s_lno); + split[2].num_lines = e->s_lno + e->num_lines - same; + chunk_end_lno = split[2].lno; + } else { + chunk_end_lno = e->lno + e->num_lines; + } + split[1].num_lines = chunk_end_lno - split[1].lno; + + /* + * if it turns out there is nothing to blame the parent for, forget about + * the splitting. !split[1].suspect signals this. + */ + if (split[1].num_lines < 1) + return; + split[1].suspect = origin_incref(parent); +} + +/* + * Link in a new blame entry to the scoreboard. Entries that cover the same + * line range have been removed from the scoreboard previously. + */ +static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e) +{ + struct blame_entry *ent, *prev = NULL; + + origin_incref(e->suspect); + + for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next) + prev = ent; + + /* prev, if not NULL, is the last one that is below e */ + e->prev = prev; + if (prev) { + e->next = prev->next; + prev->next = e; + } else { + e->next = sb->ent; + sb->ent = e; + } + if (e->next) + e->next->prev = e; +} + +/* + * src typically is on-stack; we want to copy the information in it to + * a malloced blame_entry that is already on the linked list of the scoreboard. + * The origin of dst loses a refcnt while the origin of src gains one. + */ +static void dup_entry(struct blame_entry *dst, struct blame_entry *src) +{ + struct blame_entry *p, *n; + + p = dst->prev; + n = dst->next; + origin_incref(src->suspect); + origin_decref(dst->suspect); + memcpy(dst, src, sizeof(*src)); + dst->prev = p; + dst->next = n; + dst->score = 0; +} + +/* + * split_overlap() divided an existing blame e into up to three parts in split. + * Adjust the linked list of blames in the scoreboard to reflect the split. + */ +static void split_blame(struct scoreboard *sb, struct blame_entry *split, struct blame_entry *e) +{ + struct blame_entry *new_entry; + + if (split[0].suspect && split[2].suspect) { + /* The first part (reuse storage for the existing entry e */ + dup_entry(e, &split[0]); + + /* The last part -- me */ + new_entry = git__malloc(sizeof(*new_entry)); + memcpy(new_entry, &(split[2]), sizeof(struct blame_entry)); + add_blame_entry(sb, new_entry); + + /* ... and the middle part -- parent */ + new_entry = git__malloc(sizeof(*new_entry)); + memcpy(new_entry, &(split[1]), sizeof(struct blame_entry)); + add_blame_entry(sb, new_entry); + } else if (!split[0].suspect && !split[2].suspect) { + /* + * The parent covers the entire area; reuse storage for e and replace it + * with the parent + */ + dup_entry(e, &split[1]); + } else if (split[0].suspect) { + /* me and then parent */ + dup_entry(e, &split[0]); + new_entry = git__malloc(sizeof(*new_entry)); + memcpy(new_entry, &(split[1]), sizeof(struct blame_entry)); + add_blame_entry(sb, new_entry); + } else { + /* parent and then me */ + dup_entry(e, &split[1]); + new_entry = git__malloc(sizeof(*new_entry)); + memcpy(new_entry, &(split[2]), sizeof(struct blame_entry)); + add_blame_entry(sb, new_entry); + } +} + +/* + * After splitting the blame, the origins used by the on-stack blame_entry + * should lose one refcnt each. + */ +static void decref_split(struct blame_entry *split) +{ + int i; + for (i=0; i<3; i++) + origin_decref(split[i].suspect); +} + +/* + * Helper for blame_chunk(). blame_entry e is known to overlap with the patch + * hunk; split it and pass blame to the parent. + */ +static void blame_overlap(struct scoreboard *sb, struct blame_entry *e, int tlno, int plno, int same, struct origin *parent) +{ + struct blame_entry split[3] = {{0}}; + + split_overlap(split, e, tlno, plno, same, parent); + if (split[1].suspect) + split_blame(sb, split, e); + decref_split(split); +} + +/* + * Process one hunk from the patch between the current suspect for blame_entry + * e and its parent. Find and split the overlap, and pass blame to the + * overlapping part to the parent. + */ +static void blame_chunk(struct scoreboard *sb, int tlno, int plno, int same, struct origin *target, struct origin *parent) +{ + struct blame_entry *e; + + for (e = sb->ent; e; e = e->next) { + if (e->guilty || !same_suspect(e->suspect, target)) + continue; + if (same <= e->s_lno) + continue; + if (tlno < e->s_lno + e->num_lines) { + blame_overlap(sb, e, tlno, plno, same, parent); + } + } +} + +static void blame_chunk_cb(long start_a, long count_a, long start_b, long count_b, void *data) +{ + struct blame_chunk_cb_data *d = data; + blame_chunk(d->sb, d->tlno, d->plno, start_b, d->target, d->parent); + d->plno = start_a + count_a; + d->tlno = start_b + count_b; +} + +static int my_emit_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, xdemitconf_t const *xecfg) +{ + xdchange_t *xch = xscr; + GIT_UNUSED(xe); + GIT_UNUSED(xecfg); + while (xch) { + blame_chunk_cb(xch->i1, xch->chg1, xch->i2, xch->chg2, ecb->priv); + xch = xch->next; + } + return 0; +} + +static void trim_common_tail(mmfile_t *a, mmfile_t *b, long ctx) +{ + const int blk = 1024; + long trimmed = 0, recovered = 0; + char *ap = a->ptr + a->size; + char *bp = b->ptr + b->size; + long smaller = (a->size < b->size) ? a->size : b->size; + + if (ctx) + return; + + while (blk + trimmed <= smaller && !memcmp(ap - blk, bp - blk, blk)) { + trimmed += blk; + ap -= blk; + bp -= blk; + } + + while (recovered < trimmed) + if (ap[recovered++] == '\n') + break; + a->size -= trimmed - recovered; + b->size -= trimmed - recovered; +} + +static int xdi_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdemitconf_t const *xecfg, xdemitcb_t *xecb) +{ + mmfile_t a = *mf1; + mmfile_t b = *mf2; + + trim_common_tail(&a, &b, xecfg->ctxlen); + + return xdl_diff(&a, &b, xpp, xecfg, xecb); +} + + +static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b, void *cb_data) +{ + xpparam_t xpp = {0}; + xdemitconf_t xecfg = {0}; + xdemitcb_t ecb = {0}; + + xecfg.emit_func = (void(*)(void))my_emit_func; + ecb.priv = cb_data; + return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb); +} + +static void fill_origin_blob(struct origin *o, mmfile_t *file) +{ + memset(file, 0, sizeof(*file)); + if (o->blob) { + file->ptr = (char*)git_blob_rawcontent(o->blob); + file->size = (size_t)git_blob_rawsize(o->blob); + } +} + +static int pass_blame_to_parent(struct scoreboard *sb, + struct origin *target, + struct origin *parent) +{ + int last_in_target; + mmfile_t file_p, file_o; + struct blame_chunk_cb_data d = { sb, target, parent, 0, 0 }; + + last_in_target = find_last_in_target(sb, target); + if (last_in_target < 0) + return 1; /* nothing remains for this target */ + + fill_origin_blob(parent, &file_p); + fill_origin_blob(target, &file_o); + + diff_hunks(&file_p, &file_o, &d); + /* The reset (i.e. anything after tlno) are the same as the parent */ + blame_chunk(sb, d.tlno, d.plno, last_in_target, target, parent); + + return 0; +} + +static int paths_on_dup(void **old, void *new) +{ + GIT_UNUSED(old); + git__free(new); + return -1; +} +static struct origin* find_origin(struct scoreboard *sb, git_commit *parent, + struct origin *origin) +{ + struct origin *porigin = NULL; + git_diff_list *difflist = NULL; + git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT; + git_tree *otree=NULL, *ptree=NULL; + + /* Get the trees from this commit and its parent */ + // TODO: check errors + git_commit_tree(&otree, origin->commit); + git_commit_tree(&ptree, parent); + + /* Configure the diff */ + diffopts.context_lines = 0; + diffopts.flags = GIT_DIFF_SKIP_BINARY_CHECK; + + /* Check to see if files we're interested have changed */ + diffopts.pathspec.count = sb->blame->paths.length; + diffopts.pathspec.strings = (char**)sb->blame->paths.contents; + // TODO: check error + git_diff_tree_to_tree(&difflist, sb->blame->repository, ptree, otree, &diffopts); + + if (!git_diff_num_deltas(difflist)) { + /* No changes; copy data */ + // TODO: check error + get_origin(&porigin, sb, parent, origin->path); + } else { + git_diff_find_options findopts = GIT_DIFF_FIND_OPTIONS_INIT; + int i; + + /* Generate a full diff between the two trees */ + git_diff_list_free(difflist); + diffopts.pathspec.count = 0; + // TODO: check error + git_diff_tree_to_tree(&difflist, sb->blame->repository, ptree, otree, &diffopts); + + /* Let diff find renames */ + findopts.flags = GIT_DIFF_FIND_RENAMES; + // TODO: check error + git_diff_find_similar(difflist, &findopts); + + /* Find one that matches */ + for (i=0; i<(int)git_diff_num_deltas(difflist); i++) { + const git_diff_delta *delta; + git_diff_get_patch(NULL, &delta, difflist, i); + if (git_vector_bsearch(NULL, &sb->blame->paths, delta->new_file.path) != 0) + continue; + + git_vector_insert_sorted(&sb->blame->paths, (void*)git__strdup(delta->old_file.path), paths_on_dup); + // TODO: check error + make_origin(&porigin, parent, delta->old_file.path); + } + } + + git_diff_list_free(difflist); + git_tree_free(otree); + git_tree_free(ptree); + return porigin; + +} + +/* + * The blobs of origin and porigin exactly match, so everything origin is + * suspected for can be blamed on the parent. + */ +static void pass_whole_blame(struct scoreboard *sb, + struct origin *origin, struct origin *porigin) +{ + struct blame_entry *e; + + if (!porigin->blob) + git_object_lookup((git_object**)&porigin->blob, sb->blame->repository, git_blob_id(origin->blob), + GIT_OBJ_BLOB); + for (e=sb->ent; e; e=e->next) { + if (!same_suspect(e->suspect, origin)) + continue; + origin_incref(porigin); + origin_decref(e->suspect); + e->suspect = porigin; + } +} + +static void pass_blame(struct scoreboard *sb, struct origin *origin, uint32_t opt) +{ + git_commit *commit = origin->commit; + int i, num_sg; + struct origin *sg_buf[16]; + struct origin *porigin, **sg_origin = sg_buf; + + GIT_UNUSED(opt); + + num_sg = git_commit_parentcount(commit); + if (!num_sg) + goto finish; + else if (num_sg < (int)ARRAY_SIZE(sg_buf)) + memset(sg_buf, 0, sizeof(sg_buf)); + else + sg_origin = git__calloc(num_sg, sizeof(*sg_origin)); + + for (i=0; i<num_sg; i++) { + git_commit *p; + int j, same; + + if (sg_origin[i]) + continue; + + // TODO: check error + git_commit_parent(&p, origin->commit, i); + porigin = find_origin(sb, p, origin); + + if (!porigin) + continue; + if (porigin->blob && origin->blob && + !git_oid_cmp(git_blob_id(porigin->blob), git_blob_id(origin->blob))) { + pass_whole_blame(sb, origin, porigin); + origin_decref(porigin); + goto finish; + } + for (j = same = 0; j<i; j++) + if (sg_origin[j] && + !git_oid_cmp(git_blob_id(sg_origin[j]->blob), git_blob_id(porigin->blob))) { + same = 1; + break; + } + if (!same) + sg_origin[i] = porigin; + else + origin_decref(porigin); + } + + for (i=0; i<num_sg; i++) { + struct origin *porigin = sg_origin[i]; + if (!porigin) + continue; + if (!origin->previous) { + origin_incref(porigin); + origin->previous = porigin; + } + if (pass_blame_to_parent(sb, origin, porigin)) + goto finish; + } + + /* TODO: optionally find moves in parents' files */ + + /* TODO: optionally find copies in parents' files */ + +finish: + for (i=0; i<num_sg; i++) + if (sg_origin[i]) + origin_decref(sg_origin[i]); + if (sg_origin != sg_buf) + git__free(sg_origin); + return; +} + +/* + * Origin is refcounted and usually we keep the blob contents to be + * reused. + */ +struct origin *origin_incref(struct origin *o) +{ + if (o) + o->refcnt++; + return o; +} + +void origin_decref(struct origin *o) +{ + if (o && --o->refcnt <= 0) { + if (o->previous) + origin_decref(o->previous); + git_blob_free(o->blob); + git_commit_free(o->commit); + git__free(o); + } +} + +void assign_blame(struct scoreboard *sb, uint32_t opt) +{ + while (true) { + struct blame_entry *ent; + struct origin *suspect = NULL; + + /* Find a suspect to break down */ + for (ent = sb->ent; !suspect && ent; ent = ent->next) + if (!ent->guilty) + suspect = ent->suspect; + if (!suspect) + return; /* all done */ + + /* We'll use this suspect later in the loop, so hold on to it for now. */ + origin_incref(suspect); + pass_blame(sb, suspect, opt); + + /* Take responsibility for the remaining entries */ + for (ent = sb->ent; ent; ent = ent->next) + if (same_suspect(ent->suspect, suspect)) + ent->guilty = 1; + origin_decref(suspect); + } +} + +void coalesce(struct scoreboard *sb) +{ + struct blame_entry *ent, *next; + + for (ent=sb->ent; ent && (next = ent->next); ent = next) { + if (same_suspect(ent->suspect, next->suspect) && + ent->guilty == next->guilty && + ent->s_lno + ent->num_lines == next->s_lno) + { + ent->num_lines += next->num_lines; + ent->next = next->next; + if (ent->next) + ent->next->prev = ent; + origin_decref(next->suspect); + git__free(next); + ent->score = 0; + next = ent; /* again */ + } + } +} + diff --git a/src/blame_git.h b/src/blame_git.h new file mode 100644 index 000000000..89ebc3228 --- /dev/null +++ b/src/blame_git.h @@ -0,0 +1,93 @@ + +#ifndef INCLUDE_blame_git__ +#define INCLUDE_blame_git__ + +#include "git2.h" +#include "xdiff/xinclude.h" +#include "blame.h" + +/* + * One blob in a commit that is being suspected + */ +struct origin { + int refcnt; + struct origin *previous; + git_commit *commit; + git_blob *blob; + char path[]; +}; + +/* + * Each group of lines is described by a blame_entry; it can be split + * as we pass blame to the parents. They form a linked list in the + * scoreboard structure, sorted by the target line number. + */ +struct blame_entry { + struct blame_entry *prev; + struct blame_entry *next; + + /* the first line of this group in the final image; + * internally all line numbers are 0 based. + */ + int lno; + + /* how many lines this group has */ + int num_lines; + + /* the commit that introduced this group into the final image */ + struct origin *suspect; + + /* true if the suspect is truly guilty; false while we have not + * checked if the group came from one of its parents. + */ + char guilty; + + /* true if the entry has been scanned for copies in the current parent + */ + char scanned; + + /* the line number of the first line of this group in the + * suspect's file; internally all line numbers are 0 based. + */ + int s_lno; + + /* how significant this entry is -- cached to avoid + * scanning the lines over and over. + */ + unsigned score; +}; + +/* + * The current state of the blame assignment. + */ +struct scoreboard { + /* the final commit (i.e. where we started digging from) */ + git_commit *final; + const char *path; + + /* + * The contents in the final image. + * Used by many functions to obtain contents of the nth line, + * indexed with scoreboard.lineno[blame_entry.lno]. + */ + const char *final_buf; + git_off_t final_buf_size; + + /* linked list of blames */ + struct blame_entry *ent; + + /* look-up a line in the final buffer */ + int num_lines; + + git_blame *blame; +}; + + +int get_origin(struct origin **out, struct scoreboard *sb, git_commit *commit, const char *path); +int make_origin(struct origin **out, git_commit *commit, const char *path); +struct origin *origin_incref(struct origin *o); +void origin_decref(struct origin *o); +void assign_blame(struct scoreboard *sb, uint32_t flags); +void coalesce(struct scoreboard *sb); + +#endif diff --git a/src/object.c b/src/object.c index 9b8ccdd3e..53666ffe4 100644 --- a/src/object.c +++ b/src/object.c @@ -364,3 +364,39 @@ int git_object_dup(git_object **dest, git_object *source) *dest = source; return 0; } + +int git_object_lookup_bypath( + git_object **out, + const git_object *treeish, + const char *path, + git_otype type) +{ + int error = -1; + git_tree *tree = NULL; + git_tree_entry *entry = NULL; + git_object *tmpobj = NULL; + + assert(out && treeish && path); + + if (((error = git_object_peel((git_object**)&tree, treeish, GIT_OBJ_TREE)) < 0) || + ((error = git_tree_entry_bypath(&entry, tree, path)) < 0) || + ((error = git_tree_entry_to_object(&tmpobj, git_object_owner(treeish), entry)) < 0)) + { + goto cleanup; + } + + if (type == GIT_OBJ_ANY || git_object_type(tmpobj) == type) { + *out = tmpobj; + } else { + giterr_set(GITERR_OBJECT, + "object at path '%s' is not of the asked-for type %d", + path, type); + error = GIT_EINVALIDSPEC; + git_object_free(tmpobj); + } + +cleanup: + git_tree_entry_free(entry); + git_tree_free(tree); + return error; +} |