summaryrefslogtreecommitdiff
path: root/src/diff_file.c
diff options
context:
space:
mode:
authorRussell Belfer <rb@github.com>2013-06-10 10:10:39 -0700
committerRussell Belfer <rb@github.com>2013-06-10 10:10:39 -0700
commit114f5a6c41ea03393e00ae41126a6ddb0ef39a15 (patch)
treef579e849a72749123a54483180726396244177b2 /src/diff_file.c
parent7000f3fa7bad25ec07355d6afb640ea272201dff (diff)
downloadlibgit2-114f5a6c41ea03393e00ae41126a6ddb0ef39a15.tar.gz
Reorganize diff and add basic diff driver
This is a significant reorganization of the diff code to break it into a set of more clearly distinct files and to document the new organization. Hopefully this will make the diff code easier to understand and to extend. This adds a new `git_diff_driver` object that looks of diff driver information from the attributes and the config so that things like function content in diff headers can be provided. The full driver spec is not implemented in the commit - this is focused on the reorganization of the code and putting the driver hooks in place. This also removes a few #includes from src/repository.h that were overbroad, but as a result required extra #includes in a variety of places since including src/repository.h no longer results in pulling in the whole world.
Diffstat (limited to 'src/diff_file.c')
-rw-r--r--src/diff_file.c442
1 files changed, 442 insertions, 0 deletions
diff --git a/src/diff_file.c b/src/diff_file.c
new file mode 100644
index 000000000..e4f8ca1e8
--- /dev/null
+++ b/src/diff_file.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#include "common.h"
+#include "git2/blob.h"
+#include "git2/submodule.h"
+#include "diff.h"
+#include "diff_file.h"
+#include "odb.h"
+#include "fileops.h"
+#include "filter.h"
+
+#define DIFF_MAX_FILESIZE 0x20000000
+
+static bool diff_file_content_binary_by_size(git_diff_file_content *fc)
+{
+ /* if we have diff opts, check max_size vs file size */
+ if ((fc->file.flags & DIFF_FLAGS_KNOWN_BINARY) == 0 &&
+ fc->opts && fc->opts->max_size >= 0)
+ {
+ git_off_t threshold = DIFF_MAX_FILESIZE;
+ if (fc->opts->max_size > 0)
+ threshold = fc->opts->max_size;
+ if (fc->file.size > threshold)
+ fc->file.flags |= GIT_DIFF_FLAG_BINARY;
+ }
+
+ return ((fc->file.flags & GIT_DIFF_FLAG_BINARY) != 0);
+}
+
+static void diff_file_content_binary_by_content(git_diff_file_content *fc)
+{
+ if ((fc->file.flags & DIFF_FLAGS_KNOWN_BINARY) != 0)
+ return;
+
+ switch (git_diff_driver_content_is_binary(
+ fc->driver, fc->map.data, fc->map.len)) {
+ case 0: fc->file.flags |= GIT_DIFF_FLAG_NOT_BINARY; break;
+ case 1: fc->file.flags |= GIT_DIFF_FLAG_BINARY; break;
+ default: break;
+ }
+}
+
+static int diff_file_content_init_common(git_diff_file_content *fc)
+{
+ uint32_t flags = fc->opts ? fc->opts->flags : GIT_DIFF_NORMAL;
+
+ if (!fc->driver) {
+ if (git_diff_driver_lookup(&fc->driver, fc->repo, "") < 0)
+ return -1;
+ fc->src = GIT_ITERATOR_TYPE_TREE;
+ }
+
+ /* make sure file is conceivable mmap-able */
+ if ((git_off_t)((size_t)fc->file.size) != fc->file.size)
+ fc->file.flags |= GIT_DIFF_FLAG_BINARY;
+
+ /* check if user is forcing is to text diff the file */
+ else if (flags & GIT_DIFF_FORCE_TEXT)
+ fc->file.flags |= GIT_DIFF_FLAG_NOT_BINARY;
+
+ /* otherwise see if diff driver forces a behavior */
+ else switch (git_diff_driver_is_binary(fc->driver)) {
+ case 0: fc->file.flags |= GIT_DIFF_FLAG_NOT_BINARY; break;
+ case 1: fc->file.flags |= GIT_DIFF_FLAG_BINARY; break;
+ default: break;
+ }
+
+ diff_file_content_binary_by_size(fc);
+
+ if ((fc->file.flags & GIT_DIFF_FLAG__NO_DATA) != 0) {
+ fc->file.flags |= GIT_DIFF_FLAG__LOADED;
+ fc->map.len = 0;
+ fc->map.data = "";
+ }
+
+ if ((fc->file.flags & GIT_DIFF_FLAG__LOADED) != 0)
+ diff_file_content_binary_by_content(fc);
+
+ return 0;
+}
+
+int diff_file_content_init_from_diff(
+ git_diff_file_content *fc,
+ git_diff_list *diff,
+ size_t delta_index,
+ bool use_old)
+{
+ git_diff_delta *delta = git_vector_get(&diff->deltas, delta_index);
+ git_diff_file *file = use_old ? &delta->old_file : &delta->new_file;
+ bool has_data = true;
+
+ memset(fc, 0, sizeof(*fc));
+ fc->repo = diff->repo;
+ fc->opts = &diff->opts;
+ fc->src = use_old ? diff->old_src : diff->new_src;
+ memcpy(&fc->file, file, sizeof(fc->file));
+
+ if (git_diff_driver_lookup(&fc->driver, fc->repo, file->path) < 0)
+ return -1;
+
+ switch (delta->status) {
+ case GIT_DELTA_ADDED:
+ has_data = !use_old; break;
+ case GIT_DELTA_DELETED:
+ has_data = use_old; break;
+ case GIT_DELTA_UNTRACKED:
+ has_data = !use_old &&
+ (diff->opts.flags & GIT_DIFF_INCLUDE_UNTRACKED_CONTENT) != 0;
+ break;
+ case GIT_DELTA_MODIFIED:
+ case GIT_DELTA_COPIED:
+ case GIT_DELTA_RENAMED:
+ break;
+ default:
+ has_data = false;
+ break;
+ }
+
+ if (!has_data)
+ fc->file.flags |= GIT_DIFF_FLAG__NO_DATA;
+
+ return diff_file_content_init_common(fc);
+}
+
+int diff_file_content_init_from_blob(
+ git_diff_file_content *fc,
+ git_repository *repo,
+ const git_diff_options *opts,
+ const git_blob *blob)
+{
+ memset(fc, 0, sizeof(*fc));
+ fc->repo = repo;
+ fc->opts = opts;
+ fc->blob = blob;
+
+ if (!blob) {
+ fc->file.flags |= GIT_DIFF_FLAG__NO_DATA;
+ } else {
+ fc->file.flags |= GIT_DIFF_FLAG__LOADED | GIT_DIFF_FLAG_VALID_OID;
+ fc->file.size = git_blob_rawsize(blob);
+ fc->file.mode = 0644;
+ git_oid_cpy(&fc->file.oid, git_blob_id(blob));
+
+ fc->map.len = (size_t)fc->file.size;
+ fc->map.data = (char *)git_blob_rawcontent(blob);
+ }
+
+ return diff_file_content_init_common(fc);
+}
+
+int diff_file_content_init_from_raw(
+ git_diff_file_content *fc,
+ git_repository *repo,
+ const git_diff_options *opts,
+ const char *buf,
+ size_t buflen)
+{
+ memset(fc, 0, sizeof(*fc));
+ fc->repo = repo;
+ fc->opts = opts;
+
+ if (!buf) {
+ fc->file.flags |= GIT_DIFF_FLAG__NO_DATA;
+ } else {
+ fc->file.flags |= GIT_DIFF_FLAG__LOADED | GIT_DIFF_FLAG_VALID_OID;
+ fc->file.size = buflen;
+ fc->file.mode = 0644;
+ git_odb_hash(&fc->file.oid, buf, buflen, GIT_OBJ_BLOB);
+
+ fc->map.len = buflen;
+ fc->map.data = (char *)buf;
+ }
+
+ return diff_file_content_init_common(fc);
+}
+
+static int diff_file_content_commit_to_str(
+ git_diff_file_content *fc, bool check_status)
+{
+ char oid[GIT_OID_HEXSZ+1];
+ git_buf content = GIT_BUF_INIT;
+ const char *status = "";
+
+ if (check_status) {
+ int error = 0;
+ git_submodule *sm = NULL;
+ unsigned int sm_status = 0;
+ const git_oid *sm_head;
+
+ if ((error = git_submodule_lookup(&sm, fc->repo, fc->file.path)) < 0 ||
+ (error = git_submodule_status(&sm_status, sm)) < 0) {
+ /* GIT_EEXISTS means a "submodule" that has not been git added */
+ if (error == GIT_EEXISTS)
+ error = 0;
+ return error;
+ }
+
+ /* update OID if we didn't have it previously */
+ if ((fc->file.flags & GIT_DIFF_FLAG_VALID_OID) == 0 &&
+ ((sm_head = git_submodule_wd_id(sm)) != NULL ||
+ (sm_head = git_submodule_head_id(sm)) != NULL))
+ {
+ git_oid_cpy(&fc->file.oid, sm_head);
+ fc->file.flags |= GIT_DIFF_FLAG_VALID_OID;
+ }
+
+ if (GIT_SUBMODULE_STATUS_IS_WD_DIRTY(sm_status))
+ status = "-dirty";
+ }
+
+ git_oid_tostr(oid, sizeof(oid), &fc->file.oid);
+ if (git_buf_printf(&content, "Subproject commit %s%s\n", oid, status) < 0)
+ return -1;
+
+ fc->map.len = git_buf_len(&content);
+ fc->map.data = git_buf_detach(&content);
+ fc->file.flags |= GIT_DIFF_FLAG__FREE_DATA;
+
+ return 0;
+}
+
+static int diff_file_content_load_blob(git_diff_file_content *fc)
+{
+ int error = 0;
+ git_odb_object *odb_obj = NULL;
+
+ if (git_oid_iszero(&fc->file.oid))
+ return 0;
+
+ if (fc->file.mode == GIT_FILEMODE_COMMIT)
+ return diff_file_content_commit_to_str(fc, false);
+
+ /* if we don't know size, try to peek at object header first */
+ if (!fc->file.size) {
+ git_odb *odb;
+ size_t len;
+ git_otype type;
+
+ if (!(error = git_repository_odb__weakptr(&odb, fc->repo))) {
+ error = git_odb__read_header_or_object(
+ &odb_obj, &len, &type, odb, &fc->file.oid);
+ git_odb_free(odb);
+ }
+ if (error)
+ return error;
+
+ fc->file.size = len;
+ }
+
+ if (diff_file_content_binary_by_size(fc))
+ return 0;
+
+ if (odb_obj != NULL) {
+ error = git_object__from_odb_object(
+ (git_object **)&fc->blob, fc->repo, odb_obj, GIT_OBJ_BLOB);
+ git_odb_object_free(odb_obj);
+ } else {
+ error = git_blob_lookup(
+ (git_blob **)&fc->blob, fc->repo, &fc->file.oid);
+ }
+
+ if (!error) {
+ fc->file.flags |= GIT_DIFF_FLAG__FREE_BLOB;
+ fc->map.data = (void *)git_blob_rawcontent(fc->blob);
+ fc->map.len = (size_t)git_blob_rawsize(fc->blob);
+ }
+
+ return error;
+}
+
+static int diff_file_content_load_workdir_symlink(
+ git_diff_file_content *fc, git_buf *path)
+{
+ ssize_t alloc_len, read_len;
+
+ /* link path on disk could be UTF-16, so prepare a buffer that is
+ * big enough to handle some UTF-8 data expansion
+ */
+ alloc_len = (ssize_t)(fc->file.size * 2) + 1;
+
+ fc->map.data = git__calloc(alloc_len, sizeof(char));
+ GITERR_CHECK_ALLOC(fc->map.data);
+
+ fc->file.flags |= GIT_DIFF_FLAG__FREE_DATA;
+
+ read_len = p_readlink(git_buf_cstr(path), fc->map.data, alloc_len);
+ if (read_len < 0) {
+ giterr_set(GITERR_OS, "Failed to read symlink '%s'", fc->file.path);
+ return -1;
+ }
+
+ fc->map.len = read_len;
+ return 0;
+}
+
+static int diff_file_content_load_workdir_file(
+ git_diff_file_content *fc, git_buf *path)
+{
+ int error = 0;
+ git_vector filters = GIT_VECTOR_INIT;
+ git_buf raw = GIT_BUF_INIT, filtered = GIT_BUF_INIT;
+ git_file fd = git_futils_open_ro(git_buf_cstr(path));
+
+ if (fd < 0)
+ return fd;
+
+ if (!fc->file.size &&
+ !(fc->file.size = git_futils_filesize(fd)))
+ goto cleanup;
+
+ if (diff_file_content_binary_by_size(fc))
+ goto cleanup;
+
+ if ((error = git_filters_load(
+ &filters, fc->repo, fc->file.path, GIT_FILTER_TO_ODB)) < 0)
+ goto cleanup;
+ /* error >= is a filter count */
+
+ if (error == 0) {
+ if (!(error = git_futils_mmap_ro(
+ &fc->map, fd, 0, (size_t)fc->file.size)))
+ fc->file.flags |= GIT_DIFF_FLAG__UNMAP_DATA;
+ else /* fall through to try readbuffer below */
+ giterr_clear();
+ }
+
+ if (error != 0) {
+ error = git_futils_readbuffer_fd(&raw, fd, (size_t)fc->file.size);
+ if (error < 0)
+ goto cleanup;
+
+ if (!filters.length)
+ git_buf_swap(&filtered, &raw);
+ else
+ error = git_filters_apply(&filtered, &raw, &filters);
+
+ if (!error) {
+ fc->map.len = git_buf_len(&filtered);
+ fc->map.data = git_buf_detach(&filtered);
+ fc->file.flags |= GIT_DIFF_FLAG__FREE_DATA;
+ }
+
+ git_buf_free(&raw);
+ git_buf_free(&filtered);
+ }
+
+cleanup:
+ git_filters_free(&filters);
+ p_close(fd);
+
+ return error;
+}
+
+static int diff_file_content_load_workdir(git_diff_file_content *fc)
+{
+ int error = 0;
+ git_buf path = GIT_BUF_INIT;
+
+ if (fc->file.mode == GIT_FILEMODE_COMMIT)
+ return diff_file_content_commit_to_str(fc, true);
+
+ if (fc->file.mode == GIT_FILEMODE_TREE)
+ return 0;
+
+ if (git_buf_joinpath(
+ &path, git_repository_workdir(fc->repo), fc->file.path) < 0)
+ return -1;
+
+ if (S_ISLNK(fc->file.mode))
+ error = diff_file_content_load_workdir_symlink(fc, &path);
+ else
+ error = diff_file_content_load_workdir_file(fc, &path);
+
+ /* once data is loaded, update OID if we didn't have it previously */
+ if (!error && (fc->file.flags & GIT_DIFF_FLAG_VALID_OID) == 0) {
+ error = git_odb_hash(
+ &fc->file.oid, fc->map.data, fc->map.len, GIT_OBJ_BLOB);
+ fc->file.flags |= GIT_DIFF_FLAG_VALID_OID;
+ }
+
+ git_buf_free(&path);
+ return error;
+}
+
+int diff_file_content_load(git_diff_file_content *fc)
+{
+ int error = 0;
+
+ if ((fc->file.flags & GIT_DIFF_FLAG__LOADED) != 0)
+ return 0;
+
+ if (fc->file.flags & GIT_DIFF_FLAG_BINARY)
+ return 0;
+
+ if (fc->src == GIT_ITERATOR_TYPE_WORKDIR)
+ error = diff_file_content_load_workdir(fc);
+ else
+ error = diff_file_content_load_blob(fc);
+ if (error)
+ return error;
+
+ fc->file.flags |= GIT_DIFF_FLAG__LOADED;
+
+ diff_file_content_binary_by_content(fc);
+
+ return 0;
+}
+
+void diff_file_content_unload(git_diff_file_content *fc)
+{
+ if (fc->file.flags & GIT_DIFF_FLAG__FREE_DATA) {
+ git__free(fc->map.data);
+ fc->map.data = "";
+ fc->map.len = 0;
+ fc->file.flags &= ~GIT_DIFF_FLAG__FREE_DATA;
+ }
+ else if (fc->file.flags & GIT_DIFF_FLAG__UNMAP_DATA) {
+ git_futils_mmap_free(&fc->map);
+ fc->map.data = "";
+ fc->map.len = 0;
+ fc->file.flags &= ~GIT_DIFF_FLAG__UNMAP_DATA;
+ }
+
+ if (fc->file.flags & GIT_DIFF_FLAG__FREE_BLOB) {
+ git_blob_free((git_blob *)fc->blob);
+ fc->blob = NULL;
+ fc->file.flags &= ~GIT_DIFF_FLAG__FREE_BLOB;
+ }
+
+ fc->file.flags &= ~GIT_DIFF_FLAG__LOADED;
+}
+
+void diff_file_content_clear(git_diff_file_content *fc)
+{
+ diff_file_content_unload(fc);
+
+ /* for now, nothing else to do */
+}