summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPatrick Steinhardt <ps@pks.im>2017-06-16 13:34:43 +0200
committerPatrick Steinhardt <ps@pks.im>2017-06-26 15:39:26 +0200
commit89a3482829c77590b3cc4fe95a33b93eebaecff5 (patch)
treeb2c599e261657d61b3a3d182fdd3d84dc0491c0f /src
parentfa94875295bbd7a4afa0f7724e77dcff8cd3adab (diff)
downloadlibgit2-89a3482829c77590b3cc4fe95a33b93eebaecff5.tar.gz
diff: implement function to calculate patch ID
The upstream git project provides the ability to calculate a so-called patch ID. Quoting from git-patch-id(1): A "patch ID" is nothing but a sum of SHA-1 of the file diffs associated with a patch, with whitespace and line numbers ignored." Patch IDs can be used to identify two patches which are probably the same thing, e.g. when a patch has been cherry-picked to another branch. This commit implements a new function `git_diff_patchid`, which gets a patch and derives an OID from the diff. Note the different terminology here: a patch in libgit2 are the differences in a single file and a diff can contain multiple patches for different files. The implementation matches the upstream implementation and should derive the same OID for the same diff. In fact, some code has been directly derived from the upstream implementation. The upstream implementation has two different modes to calculate patch IDs, which is the stable and unstable mode. The old way of calculating the patch IDs was unstable in a sense that a different ordering the diffs was leading to different results. This oversight was fixed in git 1.9, but as git tries hard to never break existing workflows, the old and unstable way is still default. The newer and stable way does not care for ordering of the diff hunks, and in fact it is the mode that should probably be used today. So right now, we only implement the stable way of generating the patch ID.
Diffstat (limited to 'src')
-rw-r--r--src/diff.c144
1 files changed, 144 insertions, 0 deletions
diff --git a/src/diff.c b/src/diff.c
index a93bd4cd0..bc4074398 100644
--- a/src/diff.c
+++ b/src/diff.c
@@ -19,6 +19,12 @@
#define DIFF_FLAG_SET(DIFF,FLAG,VAL) (DIFF)->opts.flags = \
(VAL) ? ((DIFF)->opts.flags | (FLAG)) : ((DIFF)->opts.flags & ~(VAL))
+struct patch_id_args {
+ git_hash_ctx ctx;
+ git_oid result;
+ int first_file;
+};
+
GIT_INLINE(const char *) diff_delta__path(const git_diff_delta *delta)
{
const char *str = delta->old_file.path;
@@ -374,3 +380,141 @@ int git_diff_format_email_init_options(
return 0;
}
+static int flush_hunk(git_oid *result, git_hash_ctx *ctx)
+{
+ git_oid hash;
+ unsigned short carry = 0;
+ int error, i;
+
+ if ((error = git_hash_final(&hash, ctx)) < 0 ||
+ (error = git_hash_init(ctx)) < 0)
+ return error;
+
+ for (i = 0; i < GIT_OID_RAWSZ; i++) {
+ carry += result->id[i] + hash.id[i];
+ result->id[i] = carry;
+ carry >>= 8;
+ }
+
+ return 0;
+}
+
+static void strip_spaces(git_buf *buf)
+{
+ char *src = buf->ptr, *dst = buf->ptr;
+ char c;
+ size_t len = 0;
+
+ while ((c = *src++) != '\0') {
+ if (!git__isspace(c)) {
+ *dst++ = c;
+ len++;
+ }
+ }
+
+ git_buf_truncate(buf, len);
+}
+
+static int file_cb(
+ const git_diff_delta *delta,
+ float progress,
+ void *payload)
+{
+ struct patch_id_args *args = (struct patch_id_args *) payload;
+ git_buf buf = GIT_BUF_INIT;
+ int error;
+
+ GIT_UNUSED(progress);
+
+ if (!args->first_file &&
+ (error = flush_hunk(&args->result, &args->ctx)) < 0)
+ goto out;
+ args->first_file = 0;
+
+ if ((error = git_buf_printf(&buf,
+ "diff--gita/%sb/%s---a/%s+++b/%s",
+ delta->old_file.path,
+ delta->new_file.path,
+ delta->old_file.path,
+ delta->new_file.path)) < 0)
+ goto out;
+
+ strip_spaces(&buf);
+
+ if ((error = git_hash_update(&args->ctx, buf.ptr, buf.size)) < 0)
+ goto out;
+
+out:
+ git_buf_free(&buf);
+ return error;
+}
+
+static int line_cb(
+ const git_diff_delta *delta,
+ const git_diff_hunk *hunk,
+ const git_diff_line *line,
+ void *payload)
+{
+ struct patch_id_args *args = (struct patch_id_args *) payload;
+ git_buf buf = GIT_BUF_INIT;
+ int error;
+
+ GIT_UNUSED(delta);
+ GIT_UNUSED(hunk);
+
+ switch (line->origin) {
+ case GIT_DIFF_LINE_ADDITION:
+ git_buf_putc(&buf, '+');
+ break;
+ case GIT_DIFF_LINE_DELETION:
+ git_buf_putc(&buf, '-');
+ break;
+ case GIT_DIFF_LINE_CONTEXT:
+ break;
+ default:
+ giterr_set(GITERR_PATCH, "invalid line origin for patch");
+ return -1;
+ }
+
+ git_buf_put(&buf, line->content, line->content_len);
+ strip_spaces(&buf);
+
+ if ((error = git_hash_update(&args->ctx, buf.ptr, buf.size)) < 0)
+ goto out;
+
+out:
+ git_buf_free(&buf);
+ return error;
+}
+
+int git_diff_patchid_init_options(git_diff_patchid_options *opts, unsigned int version)
+{
+ GIT_INIT_STRUCTURE_FROM_TEMPLATE(
+ opts, version, git_diff_patchid_options, GIT_DIFF_PATCHID_OPTIONS_INIT);
+ return 0;
+}
+
+int git_diff_patchid(git_oid *out, git_diff *diff, git_diff_patchid_options *opts)
+{
+ struct patch_id_args args;
+ int error;
+
+ GITERR_CHECK_VERSION(
+ opts, GIT_DIFF_PATCHID_OPTIONS_VERSION, "git_diff_patchid_options");
+
+ memset(&args, 0, sizeof(args));
+ args.first_file = 1;
+ if ((error = git_hash_ctx_init(&args.ctx)) < 0)
+ goto out;
+
+ if ((error = git_diff_foreach(diff, file_cb, NULL, NULL, line_cb, &args)) < 0)
+ goto out;
+
+ if ((error = (flush_hunk(&args.result, &args.ctx))) < 0)
+ goto out;
+
+ git_oid_cpy(out, &args.result);
+
+out:
+ return error;
+}