diff options
author | Patrick Steinhardt <ps@pks.im> | 2017-06-16 13:34:43 +0200 |
---|---|---|
committer | Patrick Steinhardt <ps@pks.im> | 2017-06-26 15:39:26 +0200 |
commit | 89a3482829c77590b3cc4fe95a33b93eebaecff5 (patch) | |
tree | b2c599e261657d61b3a3d182fdd3d84dc0491c0f /src/diff.c | |
parent | fa94875295bbd7a4afa0f7724e77dcff8cd3adab (diff) | |
download | libgit2-89a3482829c77590b3cc4fe95a33b93eebaecff5.tar.gz |
diff: implement function to calculate patch ID
The upstream git project provides the ability to calculate a so-called
patch ID. Quoting from git-patch-id(1):
A "patch ID" is nothing but a sum of SHA-1 of the file diffs
associated with a patch, with whitespace and line numbers ignored."
Patch IDs can be used to identify two patches which are probably the
same thing, e.g. when a patch has been cherry-picked to another branch.
This commit implements a new function `git_diff_patchid`, which gets a
patch and derives an OID from the diff. Note the different terminology
here: a patch in libgit2 are the differences in a single file and a diff
can contain multiple patches for different files. The implementation
matches the upstream implementation and should derive the same OID for
the same diff. In fact, some code has been directly derived from the
upstream implementation.
The upstream implementation has two different modes to calculate patch
IDs, which is the stable and unstable mode. The old way of calculating
the patch IDs was unstable in a sense that a different ordering the
diffs was leading to different results. This oversight was fixed in git
1.9, but as git tries hard to never break existing workflows, the old
and unstable way is still default. The newer and stable way does not
care for ordering of the diff hunks, and in fact it is the mode that
should probably be used today. So right now, we only implement the
stable way of generating the patch ID.
Diffstat (limited to 'src/diff.c')
-rw-r--r-- | src/diff.c | 144 |
1 files changed, 144 insertions, 0 deletions
diff --git a/src/diff.c b/src/diff.c index a93bd4cd0..bc4074398 100644 --- a/src/diff.c +++ b/src/diff.c @@ -19,6 +19,12 @@ #define DIFF_FLAG_SET(DIFF,FLAG,VAL) (DIFF)->opts.flags = \ (VAL) ? ((DIFF)->opts.flags | (FLAG)) : ((DIFF)->opts.flags & ~(VAL)) +struct patch_id_args { + git_hash_ctx ctx; + git_oid result; + int first_file; +}; + GIT_INLINE(const char *) diff_delta__path(const git_diff_delta *delta) { const char *str = delta->old_file.path; @@ -374,3 +380,141 @@ int git_diff_format_email_init_options( return 0; } +static int flush_hunk(git_oid *result, git_hash_ctx *ctx) +{ + git_oid hash; + unsigned short carry = 0; + int error, i; + + if ((error = git_hash_final(&hash, ctx)) < 0 || + (error = git_hash_init(ctx)) < 0) + return error; + + for (i = 0; i < GIT_OID_RAWSZ; i++) { + carry += result->id[i] + hash.id[i]; + result->id[i] = carry; + carry >>= 8; + } + + return 0; +} + +static void strip_spaces(git_buf *buf) +{ + char *src = buf->ptr, *dst = buf->ptr; + char c; + size_t len = 0; + + while ((c = *src++) != '\0') { + if (!git__isspace(c)) { + *dst++ = c; + len++; + } + } + + git_buf_truncate(buf, len); +} + +static int file_cb( + const git_diff_delta *delta, + float progress, + void *payload) +{ + struct patch_id_args *args = (struct patch_id_args *) payload; + git_buf buf = GIT_BUF_INIT; + int error; + + GIT_UNUSED(progress); + + if (!args->first_file && + (error = flush_hunk(&args->result, &args->ctx)) < 0) + goto out; + args->first_file = 0; + + if ((error = git_buf_printf(&buf, + "diff--gita/%sb/%s---a/%s+++b/%s", + delta->old_file.path, + delta->new_file.path, + delta->old_file.path, + delta->new_file.path)) < 0) + goto out; + + strip_spaces(&buf); + + if ((error = git_hash_update(&args->ctx, buf.ptr, buf.size)) < 0) + goto out; + +out: + git_buf_free(&buf); + return error; +} + +static int line_cb( + const git_diff_delta *delta, + const git_diff_hunk *hunk, + const git_diff_line *line, + void *payload) +{ + struct patch_id_args *args = (struct patch_id_args *) payload; + git_buf buf = GIT_BUF_INIT; + int error; + + GIT_UNUSED(delta); + GIT_UNUSED(hunk); + + switch (line->origin) { + case GIT_DIFF_LINE_ADDITION: + git_buf_putc(&buf, '+'); + break; + case GIT_DIFF_LINE_DELETION: + git_buf_putc(&buf, '-'); + break; + case GIT_DIFF_LINE_CONTEXT: + break; + default: + giterr_set(GITERR_PATCH, "invalid line origin for patch"); + return -1; + } + + git_buf_put(&buf, line->content, line->content_len); + strip_spaces(&buf); + + if ((error = git_hash_update(&args->ctx, buf.ptr, buf.size)) < 0) + goto out; + +out: + git_buf_free(&buf); + return error; +} + +int git_diff_patchid_init_options(git_diff_patchid_options *opts, unsigned int version) +{ + GIT_INIT_STRUCTURE_FROM_TEMPLATE( + opts, version, git_diff_patchid_options, GIT_DIFF_PATCHID_OPTIONS_INIT); + return 0; +} + +int git_diff_patchid(git_oid *out, git_diff *diff, git_diff_patchid_options *opts) +{ + struct patch_id_args args; + int error; + + GITERR_CHECK_VERSION( + opts, GIT_DIFF_PATCHID_OPTIONS_VERSION, "git_diff_patchid_options"); + + memset(&args, 0, sizeof(args)); + args.first_file = 1; + if ((error = git_hash_ctx_init(&args.ctx)) < 0) + goto out; + + if ((error = git_diff_foreach(diff, file_cb, NULL, NULL, line_cb, &args)) < 0) + goto out; + + if ((error = (flush_hunk(&args.result, &args.ctx))) < 0) + goto out; + + git_oid_cpy(out, &args.result); + +out: + return error; +} |