summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBen Straub <bs@github.com>2013-08-08 08:53:37 -0700
committerBen Straub <bs@github.com>2013-08-08 08:53:37 -0700
commitbf145a6a2f08add441f55387e5ae7daef58187ae (patch)
treeb191401ba86062de136d435d92b40239faf5cb09 /src
parent46c37911155a01db13185a70d34383b7d7133653 (diff)
parente8242022bc409b3cb7e234eabdd9bda05ae3a158 (diff)
downloadlibgit2-bf145a6a2f08add441f55387e5ae7daef58187ae.tar.gz
Merge pull request #1746 from libgit2/rename-detection-performance
Rename detection slow
Diffstat (limited to 'src')
-rw-r--r--src/array.h11
-rw-r--r--src/blob.c89
-rw-r--r--src/blob.h9
-rw-r--r--src/diff.h29
-rw-r--r--src/diff_file.c17
-rw-r--r--src/diff_patch.c30
-rw-r--r--src/diff_tform.c352
-rw-r--r--src/hashsig.c214
-rw-r--r--src/index.c34
-rw-r--r--src/util.h5
10 files changed, 458 insertions, 332 deletions
diff --git a/src/array.h b/src/array.h
index 248010425..c25a1b29e 100644
--- a/src/array.h
+++ b/src/array.h
@@ -39,25 +39,26 @@
#define GITERR_CHECK_ARRAY(a) GITERR_CHECK_ALLOC((a).ptr)
-typedef git_array_t(void) git_array_generic_t;
+typedef git_array_t(char) git_array_generic_t;
/* use a generic array for growth so this can return the new item */
-GIT_INLINE(void *) git_array_grow(git_array_generic_t *a, size_t item_size)
+GIT_INLINE(void *) git_array_grow(void *_a, size_t item_size)
{
+ git_array_generic_t *a = _a;
uint32_t new_size = (a->size < 8) ? 8 : a->asize * 3 / 2;
- void *new_array = git__realloc(a->ptr, new_size * item_size);
+ char *new_array = git__realloc(a->ptr, new_size * item_size);
if (!new_array) {
git_array_clear(*a);
return NULL;
} else {
a->ptr = new_array; a->asize = new_size; a->size++;
- return (((char *)a->ptr) + (a->size - 1) * item_size);
+ return a->ptr + (a->size - 1) * item_size;
}
}
#define git_array_alloc(a) \
((a).size >= (a).asize) ? \
- git_array_grow((git_array_generic_t *)&(a), sizeof(*(a).ptr)) : \
+ git_array_grow(&(a), sizeof(*(a).ptr)) : \
(a).ptr ? &(a).ptr[(a).size++] : NULL
#define git_array_last(a) ((a).size ? &(a).ptr[(a).size - 1] : NULL)
diff --git a/src/blob.c b/src/blob.c
index 2e4d5f479..5bb51f7cf 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -105,6 +105,7 @@ static int write_file_stream(
static int write_file_filtered(
git_oid *oid,
+ git_off_t *size,
git_odb *odb,
const char *full_path,
git_vector *filters)
@@ -123,8 +124,11 @@ static int write_file_filtered(
git_buf_free(&source);
/* Write the file to disk if it was properly filtered */
- if (!error)
+ if (!error) {
+ *size = dest.size;
+
error = git_odb_write(oid, odb, dest.ptr, dest.size, GIT_OBJ_BLOB);
+ }
git_buf_free(&dest);
return error;
@@ -152,21 +156,46 @@ static int write_symlink(
return error;
}
-static int blob_create_internal(git_oid *oid, git_repository *repo, const char *content_path, const char *hint_path, bool try_load_filters)
+int git_blob__create_from_paths(
+ git_oid *oid,
+ struct stat *out_st,
+ git_repository *repo,
+ const char *content_path,
+ const char *hint_path,
+ mode_t hint_mode,
+ bool try_load_filters)
{
int error;
struct stat st;
git_odb *odb = NULL;
git_off_t size;
+ mode_t mode;
+ git_buf path = GIT_BUF_INIT;
assert(hint_path || !try_load_filters);
- if ((error = git_path_lstat(content_path, &st)) < 0 || (error = git_repository_odb__weakptr(&odb, repo)) < 0)
- return error;
+ if (!content_path) {
+ if (git_repository__ensure_not_bare(repo, "create blob from file") < 0)
+ return GIT_EBAREREPO;
+
+ if (git_buf_joinpath(
+ &path, git_repository_workdir(repo), hint_path) < 0)
+ return -1;
+
+ content_path = path.ptr;
+ }
+
+ if ((error = git_path_lstat(content_path, &st)) < 0 ||
+ (error = git_repository_odb(&odb, repo)) < 0)
+ goto done;
+
+ if (out_st)
+ memcpy(out_st, &st, sizeof(st));
size = st.st_size;
+ mode = hint_mode ? hint_mode : st.st_mode;
- if (S_ISLNK(st.st_mode)) {
+ if (S_ISLNK(mode)) {
error = write_symlink(oid, odb, content_path, (size_t)size);
} else {
git_vector write_filters = GIT_VECTOR_INIT;
@@ -187,7 +216,8 @@ static int blob_create_internal(git_oid *oid, git_repository *repo, const char *
error = write_file_stream(oid, odb, content_path, size);
} else {
/* We need to apply one or more filters */
- error = write_file_filtered(oid, odb, content_path, &write_filters);
+ error = write_file_filtered(
+ oid, &size, odb, content_path, &write_filters);
}
git_filters_free(&write_filters);
@@ -207,34 +237,21 @@ static int blob_create_internal(git_oid *oid, git_repository *repo, const char *
*/
}
+done:
+ git_odb_free(odb);
+ git_buf_free(&path);
+
return error;
}
-int git_blob_create_fromworkdir(git_oid *oid, git_repository *repo, const char *path)
+int git_blob_create_fromworkdir(
+ git_oid *oid, git_repository *repo, const char *path)
{
- git_buf full_path = GIT_BUF_INIT;
- const char *workdir;
- int error;
-
- if ((error = git_repository__ensure_not_bare(repo, "create blob from file")) < 0)
- return error;
-
- workdir = git_repository_workdir(repo);
-
- if (git_buf_joinpath(&full_path, workdir, path) < 0) {
- git_buf_free(&full_path);
- return -1;
- }
-
- error = blob_create_internal(
- oid, repo, git_buf_cstr(&full_path),
- git_buf_cstr(&full_path) + strlen(workdir), true);
-
- git_buf_free(&full_path);
- return error;
+ return git_blob__create_from_paths(oid, NULL, repo, NULL, path, 0, true);
}
-int git_blob_create_fromdisk(git_oid *oid, git_repository *repo, const char *path)
+int git_blob_create_fromdisk(
+ git_oid *oid, git_repository *repo, const char *path)
{
int error;
git_buf full_path = GIT_BUF_INIT;
@@ -251,8 +268,8 @@ int git_blob_create_fromdisk(git_oid *oid, git_repository *repo, const char *pat
if (workdir && !git__prefixcmp(hintpath, workdir))
hintpath += strlen(workdir);
- error = blob_create_internal(
- oid, repo, git_buf_cstr(&full_path), hintpath, true);
+ error = git_blob__create_from_paths(
+ oid, NULL, repo, git_buf_cstr(&full_path), hintpath, 0, true);
git_buf_free(&full_path);
return error;
@@ -272,12 +289,9 @@ int git_blob_create_fromchunks(
git_filebuf file = GIT_FILEBUF_INIT;
git_buf path = GIT_BUF_INIT;
- if (git_buf_join_n(
- &path, '/', 3,
- git_repository_path(repo),
- GIT_OBJECTS_DIR,
- "streamed") < 0)
- goto cleanup;
+ if (git_buf_joinpath(
+ &path, git_repository_path(repo), GIT_OBJECTS_DIR "streamed") < 0)
+ goto cleanup;
content = git__malloc(BUFFER_SIZE);
GITERR_CHECK_ALLOC(content);
@@ -303,7 +317,8 @@ int git_blob_create_fromchunks(
if (git_filebuf_flush(&file) < 0)
goto cleanup;
- error = blob_create_internal(oid, repo, file.path_lock, hintpath, hintpath != NULL);
+ error = git_blob__create_from_paths(
+ oid, NULL, repo, file.path_lock, hintpath, 0, hintpath != NULL);
cleanup:
git_buf_free(&path);
diff --git a/src/blob.h b/src/blob.h
index 22e37cc3a..4cd9f1e0c 100644
--- a/src/blob.h
+++ b/src/blob.h
@@ -21,4 +21,13 @@ void git_blob__free(void *blob);
int git_blob__parse(void *blob, git_odb_object *obj);
int git_blob__getbuf(git_buf *buffer, git_blob *blob);
+extern int git_blob__create_from_paths(
+ git_oid *out_oid,
+ struct stat *out_st,
+ git_repository *repo,
+ const char *full_path,
+ const char *hint_path,
+ mode_t hint_mode,
+ bool apply_filters);
+
#endif
diff --git a/src/diff.h b/src/diff.h
index d09a130bc..d1bec00c6 100644
--- a/src/diff.h
+++ b/src/diff.h
@@ -16,6 +16,7 @@
#include "iterator.h"
#include "repository.h"
#include "pool.h"
+#include "odb.h"
#define DIFF_OLD_PREFIX_DEFAULT "a/"
#define DIFF_NEW_PREFIX_DEFAULT "b/"
@@ -108,5 +109,33 @@ extern void git_diff_find_similar__hashsig_free(void *sig, void *payload);
extern int git_diff_find_similar__calc_similarity(
int *score, void *siga, void *sigb, void *payload);
+/*
+ * Sometimes a git_diff_file will have a zero size; this attempts to
+ * fill in the size without loading the blob if possible. If that is
+ * not possible, then it will return the git_odb_object that had to be
+ * loaded and the caller can use it or dispose of it as needed.
+ */
+GIT_INLINE(int) git_diff_file__resolve_zero_size(
+ git_diff_file *file, git_odb_object **odb_obj, git_repository *repo)
+{
+ int error;
+ git_odb *odb;
+ size_t len;
+ git_otype type;
+
+ if ((error = git_repository_odb(&odb, repo)) < 0)
+ return error;
+
+ error = git_odb__read_header_or_object(
+ odb_obj, &len, &type, odb, &file->oid);
+
+ git_odb_free(odb);
+
+ if (!error)
+ file->size = (git_off_t)len;
+
+ return error;
+}
+
#endif
diff --git a/src/diff_file.c b/src/diff_file.c
index 9d06daafa..bcfef13cd 100644
--- a/src/diff_file.c
+++ b/src/diff_file.c
@@ -241,19 +241,9 @@ static int diff_file_content_load_blob(git_diff_file_content *fc)
/* if we don't know size, try to peek at object header first */
if (!fc->file->size) {
- git_odb *odb;
- size_t len;
- git_otype type;
-
- if (!(error = git_repository_odb__weakptr(&odb, fc->repo))) {
- error = git_odb__read_header_or_object(
- &odb_obj, &len, &type, odb, &fc->file->oid);
- git_odb_free(odb);
- }
- if (error)
+ if ((error = git_diff_file__resolve_zero_size(
+ fc->file, &odb_obj, fc->repo)) < 0)
return error;
-
- fc->file->size = len;
}
if (diff_file_content_binary_by_size(fc))
@@ -417,6 +407,9 @@ int git_diff_file_content__load(git_diff_file_content *fc)
void git_diff_file_content__unload(git_diff_file_content *fc)
{
+ if ((fc->flags & GIT_DIFF_FLAG__LOADED) == 0)
+ return;
+
if (fc->flags & GIT_DIFF_FLAG__FREE_DATA) {
git__free(fc->map.data);
fc->map.data = "";
diff --git a/src/diff_patch.c b/src/diff_patch.c
index 1b4adac03..69bb08198 100644
--- a/src/diff_patch.c
+++ b/src/diff_patch.c
@@ -230,6 +230,10 @@ static int diff_patch_generate(git_diff_patch *patch, git_diff_output *output)
if ((patch->flags & GIT_DIFF_PATCH_DIFFED) != 0)
return 0;
+ /* if we are not looking at the hunks and lines, don't do the diff */
+ if (!output->hunk_cb && !output->data_cb)
+ return 0;
+
if ((patch->flags & GIT_DIFF_PATCH_LOADED) == 0 &&
(error = diff_patch_load(patch, output)) < 0)
return error;
@@ -284,8 +288,8 @@ int git_diff_foreach(
if (diff_required(diff, "git_diff_foreach") < 0)
return -1;
- diff_output_init((git_diff_output *)&xo,
- &diff->opts, file_cb, hunk_cb, data_cb, payload);
+ diff_output_init(
+ &xo.output, &diff->opts, file_cb, hunk_cb, data_cb, payload);
git_xdiff_init(&xo, &diff->opts);
git_vector_foreach(&diff->deltas, idx, patch.delta) {
@@ -296,10 +300,10 @@ int git_diff_foreach(
if (!(error = diff_patch_init_from_diff(&patch, diff, idx))) {
- error = diff_patch_file_callback(&patch, (git_diff_output *)&xo);
+ error = diff_patch_file_callback(&patch, &xo.output);
if (!error)
- error = diff_patch_generate(&patch, (git_diff_output *)&xo);
+ error = diff_patch_generate(&patch, &xo.output);
git_diff_patch_free(&patch);
}
@@ -437,7 +441,7 @@ int git_diff_blobs(
memset(&xo, 0, sizeof(xo));
diff_output_init(
- (git_diff_output *)&xo, opts, file_cb, hunk_cb, data_cb, payload);
+ &xo.output, opts, file_cb, hunk_cb, data_cb, payload);
git_xdiff_init(&xo, opts);
if (!old_path && new_path)
@@ -448,7 +452,7 @@ int git_diff_blobs(
error = diff_patch_from_blobs(
&pd, &xo, old_blob, old_path, new_blob, new_path, opts);
- git_diff_patch_free((git_diff_patch *)&pd);
+ git_diff_patch_free(&pd.patch);
return error;
}
@@ -473,7 +477,7 @@ int git_diff_patch_from_blobs(
memset(&xo, 0, sizeof(xo));
- diff_output_to_patch((git_diff_output *)&xo, &pd->patch);
+ diff_output_to_patch(&xo.output, &pd->patch);
git_xdiff_init(&xo, opts);
error = diff_patch_from_blobs(
@@ -549,7 +553,7 @@ int git_diff_blob_to_buffer(
memset(&xo, 0, sizeof(xo));
diff_output_init(
- (git_diff_output *)&xo, opts, file_cb, hunk_cb, data_cb, payload);
+ &xo.output, opts, file_cb, hunk_cb, data_cb, payload);
git_xdiff_init(&xo, opts);
if (!old_path && buf_path)
@@ -560,7 +564,7 @@ int git_diff_blob_to_buffer(
error = diff_patch_from_blob_and_buffer(
&pd, &xo, old_blob, old_path, buf, buflen, buf_path, opts);
- git_diff_patch_free((git_diff_patch *)&pd);
+ git_diff_patch_free(&pd.patch);
return error;
}
@@ -586,7 +590,7 @@ int git_diff_patch_from_blob_and_buffer(
memset(&xo, 0, sizeof(xo));
- diff_output_to_patch((git_diff_output *)&xo, &pd->patch);
+ diff_output_to_patch(&xo.output, &pd->patch);
git_xdiff_init(&xo, opts);
error = diff_patch_from_blob_and_buffer(
@@ -638,13 +642,13 @@ int git_diff_get_patch(
if ((error = diff_patch_alloc_from_diff(&patch, diff, idx)) < 0)
return error;
- diff_output_to_patch((git_diff_output *)&xo, patch);
+ diff_output_to_patch(&xo.output, patch);
git_xdiff_init(&xo, &diff->opts);
- error = diff_patch_file_callback(patch, (git_diff_output *)&xo);
+ error = diff_patch_file_callback(patch, &xo.output);
if (!error)
- error = diff_patch_generate(patch, (git_diff_output *)&xo);
+ error = diff_patch_generate(patch, &xo.output);
if (!error) {
/* if cumulative diff size is < 0.5 total size, flatten the patch */
diff --git a/src/diff_tform.c b/src/diff_tform.c
index ac5356a8c..ba35d3c14 100644
--- a/src/diff_tform.c
+++ b/src/diff_tform.c
@@ -408,57 +408,99 @@ GIT_INLINE(git_diff_file *) similarity_get_file(git_diff_list *diff, size_t idx)
return (idx & 1) ? &delta->new_file : &delta->old_file;
}
-static int similarity_calc(
- git_diff_list *diff,
+typedef struct {
+ size_t idx;
+ git_iterator_type_t src;
+ git_repository *repo;
+ git_diff_file *file;
+ git_buf data;
+ git_odb_object *odb_obj;
+ git_blob *blob;
+} similarity_info;
+
+static int similarity_init(
+ similarity_info *info, git_diff_list *diff, size_t file_idx)
+{
+ info->idx = file_idx;
+ info->src = (file_idx & 1) ? diff->new_src : diff->old_src;
+ info->repo = diff->repo;
+ info->file = similarity_get_file(diff, file_idx);
+ info->odb_obj = NULL;
+ info->blob = NULL;
+ git_buf_init(&info->data, 0);
+
+ if (info->file->size > 0)
+ return 0;
+
+ return git_diff_file__resolve_zero_size(
+ info->file, &info->odb_obj, info->repo);
+}
+
+static int similarity_sig(
+ similarity_info *info,
const git_diff_find_options *opts,
- size_t file_idx,
void **cache)
{
int error = 0;
- git_diff_file *file = similarity_get_file(diff, file_idx);
- git_iterator_type_t src = (file_idx & 1) ? diff->new_src : diff->old_src;
-
- if (src == GIT_ITERATOR_TYPE_WORKDIR) { /* compute hashsig from file */
- git_buf path = GIT_BUF_INIT;
-
- /* TODO: apply wd-to-odb filters to file data if necessary */
+ git_diff_file *file = info->file;
+ if (info->src == GIT_ITERATOR_TYPE_WORKDIR) {
if ((error = git_buf_joinpath(
- &path, git_repository_workdir(diff->repo), file->path)) < 0)
+ &info->data, git_repository_workdir(info->repo), file->path)) < 0)
return error;
/* if path is not a regular file, just skip this item */
- if (git_path_isfile(path.ptr))
- error = opts->metric->file_signature(
- &cache[file_idx], file, path.ptr, opts->metric->payload);
+ if (!git_path_isfile(info->data.ptr))
+ return 0;
- git_buf_free(&path);
- } else { /* compute hashsig from blob buffer */
- git_blob *blob = NULL;
- git_off_t blobsize;
+ /* TODO: apply wd-to-odb filters to file data if necessary */
- /* TODO: add max size threshold a la diff? */
+ error = opts->metric->file_signature(
+ &cache[info->idx], info->file,
+ info->data.ptr, opts->metric->payload);
+ } else {
+ /* if we didn't initially know the size, we might have an odb_obj
+ * around from earlier, so convert that, otherwise load the blob now
+ */
+ if (info->odb_obj != NULL)
+ error = git_object__from_odb_object(
+ (git_object **)&info->blob, info->repo,
+ info->odb_obj, GIT_OBJ_BLOB);
+ else
+ error = git_blob_lookup(&info->blob, info->repo, &file->oid);
- if (git_blob_lookup(&blob, diff->repo, &file->oid) < 0) {
+ if (error < 0) {
/* if lookup fails, just skip this item in similarity calc */
giterr_clear();
- return 0;
- }
+ } else {
+ size_t sz;
- blobsize = git_blob_rawsize(blob);
- if (!git__is_sizet(blobsize)) /* ? what to do ? */
- blobsize = (size_t)-1;
+ /* index size may not be actual blob size if filtered */
+ if (file->size != git_blob_rawsize(info->blob))
+ file->size = git_blob_rawsize(info->blob);
- error = opts->metric->buffer_signature(
- &cache[file_idx], file, git_blob_rawcontent(blob),
- (size_t)blobsize, opts->metric->payload);
+ sz = (size_t)(git__is_sizet(file->size) ? file->size : -1);
- git_blob_free(blob);
+ error = opts->metric->buffer_signature(
+ &cache[info->idx], info->file,
+ git_blob_rawcontent(info->blob), sz, opts->metric->payload);
+ }
}
return error;
}
+static void similarity_unload(similarity_info *info)
+{
+ if (info->odb_obj)
+ git_odb_object_free(info->odb_obj);
+
+ if (info->blob)
+ git_blob_free(info->blob);
+ else
+ git_buf_free(&info->data);
+}
+
#define FLAG_SET(opts,flag_name) (((opts)->flags & flag_name) != 0)
/* - score < 0 means files cannot be compared
@@ -476,6 +518,8 @@ static int similarity_measure(
git_diff_file *a_file = similarity_get_file(diff, a_idx);
git_diff_file *b_file = similarity_get_file(diff, b_idx);
bool exact_match = FLAG_SET(opts, GIT_DIFF_FIND_EXACT_MATCH_ONLY);
+ int error = 0;
+ similarity_info a_info, b_info;
*score = -1;
@@ -510,19 +554,44 @@ static int similarity_measure(
return 0;
}
+ memset(&a_info, 0, sizeof(a_info));
+ memset(&b_info, 0, sizeof(b_info));
+
+ /* set up similarity data (will try to update missing file sizes) */
+ if (!cache[a_idx] && (error = similarity_init(&a_info, diff, a_idx)) < 0)
+ return error;
+ if (!cache[b_idx] && (error = similarity_init(&b_info, diff, b_idx)) < 0)
+ goto cleanup;
+
+ /* check if file sizes are nowhere near each other */
+ if (a_file->size > 127 &&
+ b_file->size > 127 &&
+ (a_file->size > (b_file->size << 3) ||
+ b_file->size > (a_file->size << 3)))
+ goto cleanup;
+
/* update signature cache if needed */
- if (!cache[a_idx] && similarity_calc(diff, opts, a_idx, cache) < 0)
- return -1;
- if (!cache[b_idx] && similarity_calc(diff, opts, b_idx, cache) < 0)
- return -1;
+ if (!cache[a_idx]) {
+ if ((error = similarity_sig(&a_info, opts, cache)) < 0)
+ goto cleanup;
+ }
+ if (!cache[b_idx]) {
+ if ((error = similarity_sig(&b_info, opts, cache)) < 0)
+ goto cleanup;
+ }
- /* some metrics may not wish to process this file (too big / too small) */
- if (!cache[a_idx] || !cache[b_idx])
- return 0;
+ /* calculate similarity provided that the metric choose to process
+ * both the a and b files (some may not if file is too big, etc).
+ */
+ if (cache[a_idx] && cache[b_idx])
+ error = opts->metric->similarity(
+ score, cache[a_idx], cache[b_idx], opts->metric->payload);
+
+cleanup:
+ similarity_unload(&a_info);
+ similarity_unload(&b_info);
- /* compare signatures */
- return opts->metric->similarity(
- score, cache[a_idx], cache[b_idx], opts->metric->payload);
+ return error;
}
static int calc_self_similarity(
@@ -693,25 +762,30 @@ int git_diff_find_similar(
git_diff_list *diff,
git_diff_find_options *given_opts)
{
- size_t i, j, sigcache_size;
+ size_t s, t;
int error = 0, similarity;
- git_diff_delta *from, *to;
+ git_diff_delta *src, *tgt;
git_diff_find_options opts;
- size_t num_srcs = 0, num_tgts = 0, tried_srcs = 0, tried_tgts = 0;
+ size_t num_deltas, num_srcs = 0, num_tgts = 0;
+ size_t tried_srcs = 0, tried_tgts = 0;
size_t num_rewrites = 0, num_updates = 0, num_bumped = 0;
void **sigcache; /* cache of similarity metric file signatures */
- diff_find_match *match_srcs = NULL, *match_tgts = NULL, *best_match;
+ diff_find_match *tgt2src = NULL;
+ diff_find_match *src2tgt = NULL;
+ diff_find_match *tgt2src_copy = NULL;
+ diff_find_match *best_match;
git_diff_file swap;
if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0)
return error;
+ num_deltas = diff->deltas.length;
+
/* TODO: maybe abort if deltas.length > rename_limit ??? */
- if (!git__is_uint32(diff->deltas.length))
+ if (!git__is_uint32(num_deltas))
return 0;
- sigcache_size = diff->deltas.length * 2; /* keep size b/c diff may change */
- sigcache = git__calloc(sigcache_size, sizeof(void *));
+ sigcache = git__calloc(num_deltas * 2, sizeof(void *));
GITERR_CHECK_ALLOC(sigcache);
/* Label rename sources and targets
@@ -719,11 +793,11 @@ int git_diff_find_similar(
* This will also set self-similarity scores for MODIFIED files and
* mark them for splitting if break-rewrites is enabled
*/
- git_vector_foreach(&diff->deltas, i, to) {
- if (is_rename_source(diff, &opts, i, sigcache))
+ git_vector_foreach(&diff->deltas, t, tgt) {
+ if (is_rename_source(diff, &opts, t, sigcache))
++num_srcs;
- if (is_rename_target(diff, &opts, i, sigcache))
+ if (is_rename_target(diff, &opts, t, sigcache))
++num_tgts;
}
@@ -731,10 +805,15 @@ int git_diff_find_similar(
if (!num_srcs || !num_tgts)
goto cleanup;
- match_tgts = git__calloc(diff->deltas.length, sizeof(diff_find_match));
- GITERR_CHECK_ALLOC(match_tgts);
- match_srcs = git__calloc(diff->deltas.length, sizeof(diff_find_match));
- GITERR_CHECK_ALLOC(match_srcs);
+ src2tgt = git__calloc(num_deltas, sizeof(diff_find_match));
+ GITERR_CHECK_ALLOC(src2tgt);
+ tgt2src = git__calloc(num_deltas, sizeof(diff_find_match));
+ GITERR_CHECK_ALLOC(tgt2src);
+
+ if (FLAG_SET(&opts, GIT_DIFF_FIND_COPIES)) {
+ tgt2src_copy = git__calloc(num_deltas, sizeof(diff_find_match));
+ GITERR_CHECK_ALLOC(tgt2src_copy);
+ }
/*
* Find best-fit matches for rename / copy candidates
@@ -743,47 +822,61 @@ int git_diff_find_similar(
find_best_matches:
tried_tgts = num_bumped = 0;
- git_vector_foreach(&diff->deltas, i, to) {
+ git_vector_foreach(&diff->deltas, t, tgt) {
/* skip things that are not rename targets */
- if ((to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0)
+ if ((tgt->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0)
continue;
tried_srcs = 0;
- git_vector_foreach(&diff->deltas, j, from) {
+ git_vector_foreach(&diff->deltas, s, src) {
/* skip things that are not rename sources */
- if ((from->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) == 0)
+ if ((src->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) == 0)
continue;
/* calculate similarity for this pair and find best match */
- if (i == j)
+ if (s == t)
similarity = -1; /* don't measure self-similarity here */
else if ((error = similarity_measure(
- &similarity, diff, &opts, sigcache, 2 * j, 2 * i + 1)) < 0)
+ &similarity, diff, &opts, sigcache, 2 * s, 2 * t + 1)) < 0)
goto cleanup;
- /* if this pairing is better for the src and the tgt, keep it */
- if (similarity > 0 &&
- match_tgts[i].similarity < (uint32_t)similarity &&
- match_srcs[j].similarity < (uint32_t)similarity)
+ if (similarity < 0)
+ continue;
+
+ /* is this a better rename? */
+ if (tgt2src[t].similarity < (uint32_t)similarity &&
+ src2tgt[s].similarity < (uint32_t)similarity)
{
- if (match_tgts[i].similarity > 0) {
- match_tgts[match_srcs[j].idx].similarity = 0;
- match_srcs[match_tgts[i].idx].similarity = 0;
- ++num_bumped;
+ /* eject old mapping */
+ if (src2tgt[s].similarity > 0) {
+ tgt2src[src2tgt[s].idx].similarity = 0;
+ num_bumped++;
+ }
+ if (tgt2src[t].similarity > 0) {
+ src2tgt[tgt2src[t].idx].similarity = 0;
+ num_bumped++;
}
- match_tgts[i].similarity = (uint32_t)similarity;
- match_tgts[i].idx = (uint32_t)j;
+ /* write new mapping */
+ tgt2src[t].idx = s;
+ tgt2src[t].similarity = (uint32_t)similarity;
+ src2tgt[s].idx = t;
+ src2tgt[s].similarity = (uint32_t)similarity;
+ }
- match_srcs[j].similarity = (uint32_t)similarity;
- match_srcs[j].idx = (uint32_t)i;
+ /* keep best absolute match for copies */
+ if (tgt2src_copy != NULL &&
+ tgt2src_copy[t].similarity < (uint32_t)similarity)
+ {
+ tgt2src_copy[t].idx = s;
+ tgt2src_copy[t].similarity = (uint32_t)similarity;
}
if (++tried_srcs >= num_srcs)
break;
- /* cap on maximum targets we'll examine (per "to" file) */
+ /* cap on maximum targets we'll examine (per "tgt" file) */
if (tried_srcs > opts.rename_limit)
break;
}
@@ -801,18 +894,21 @@ find_best_matches:
tried_tgts = 0;
- git_vector_foreach(&diff->deltas, i, to) {
+ git_vector_foreach(&diff->deltas, t, tgt) {
/* skip things that are not rename targets */
- if ((to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0)
+ if ((tgt->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0)
continue;
/* check if this delta was the target of a similarity */
- best_match = &match_tgts[i];
- if (!best_match->similarity)
+ if (tgt2src[t].similarity)
+ best_match = &tgt2src[t];
+ else if (tgt2src_copy && tgt2src_copy[t].similarity)
+ best_match = &tgt2src_copy[t];
+ else
continue;
- j = best_match->idx;
- from = GIT_VECTOR_GET(&diff->deltas, j);
+ s = best_match->idx;
+ src = GIT_VECTOR_GET(&diff->deltas, s);
/* possible scenarios:
* 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME
@@ -822,101 +918,112 @@ find_best_matches:
* 5. from OTHER to ADD/UNTRACK/IGNORE = OTHER + COPY
*/
- if (from->status == GIT_DELTA_DELETED) {
+ if (src->status == GIT_DELTA_DELETED) {
- if (delta_is_new_only(to)) {
+ if (delta_is_new_only(tgt)) {
if (best_match->similarity < opts.rename_threshold)
continue;
- delta_make_rename(to, from, best_match->similarity);
+ delta_make_rename(tgt, src, best_match->similarity);
- from->flags |= GIT_DIFF_FLAG__TO_DELETE;
+ src->flags |= GIT_DIFF_FLAG__TO_DELETE;
num_rewrites++;
} else {
- assert(delta_is_split(to));
+ assert(delta_is_split(tgt));
if (best_match->similarity < opts.rename_from_rewrite_threshold)
continue;
- memcpy(&swap, &to->old_file, sizeof(swap));
+ memcpy(&swap, &tgt->old_file, sizeof(swap));
- delta_make_rename(to, from, best_match->similarity);
+ delta_make_rename(tgt, src, best_match->similarity);
num_rewrites--;
- from->status = GIT_DELTA_DELETED;
- memcpy(&from->old_file, &swap, sizeof(from->old_file));
- memset(&from->new_file, 0, sizeof(from->new_file));
- from->new_file.path = from->old_file.path;
- from->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
+ src->status = GIT_DELTA_DELETED;
+ memcpy(&src->old_file, &swap, sizeof(src->old_file));
+ memset(&src->new_file, 0, sizeof(src->new_file));
+ src->new_file.path = src->old_file.path;
+ src->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
num_updates++;
+
+ if (src2tgt[t].similarity > 0 && src2tgt[t].idx > t) {
+ /* what used to be at src t is now at src s */
+ tgt2src[src2tgt[t].idx].idx = (uint32_t)s;
+ }
}
}
- else if (delta_is_split(from)) {
+ else if (delta_is_split(src)) {
- if (delta_is_new_only(to)) {
+ if (delta_is_new_only(tgt)) {
if (best_match->similarity < opts.rename_threshold)
continue;
- delta_make_rename(to, from, best_match->similarity);
+ delta_make_rename(tgt, src, best_match->similarity);
- from->status = (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) ?
+ src->status = (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) ?
GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED;
- memset(&from->old_file, 0, sizeof(from->old_file));
- from->old_file.path = from->new_file.path;
- from->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
+ memset(&src->old_file, 0, sizeof(src->old_file));
+ src->old_file.path = src->new_file.path;
+ src->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
- from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
+ src->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
num_updates++;
} else {
- assert(delta_is_split(from));
+ assert(delta_is_split(src));
if (best_match->similarity < opts.rename_from_rewrite_threshold)
continue;
- memcpy(&swap, &to->old_file, sizeof(swap));
+ memcpy(&swap, &tgt->old_file, sizeof(swap));
- delta_make_rename(to, from, best_match->similarity);
+ delta_make_rename(tgt, src, best_match->similarity);
num_rewrites--;
num_updates++;
- memcpy(&from->old_file, &swap, sizeof(from->old_file));
+ memcpy(&src->old_file, &swap, sizeof(src->old_file));
/* if we've just swapped the new element into the correct
* place, clear the SPLIT flag
*/
- if (match_tgts[j].idx == i &&
- match_tgts[j].similarity >
+ if (tgt2src[s].idx == t &&
+ tgt2src[s].similarity >
opts.rename_from_rewrite_threshold) {
-
- from->status = GIT_DELTA_RENAMED;
- from->similarity = match_tgts[j].similarity;
- match_tgts[j].similarity = 0;
- from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
+ src->status = GIT_DELTA_RENAMED;
+ src->similarity = tgt2src[s].similarity;
+ tgt2src[s].similarity = 0;
+ src->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
}
/* otherwise, if we just overwrote a source, update mapping */
- else if (j > i && match_srcs[i].similarity > 0) {
- match_tgts[match_srcs[i].idx].idx = (uint32_t)j;
+ else if (src2tgt[t].similarity > 0 && src2tgt[t].idx > t) {
+ /* what used to be at src t is now at src s */
+ tgt2src[src2tgt[t].idx].idx = (uint32_t)s;
}
num_updates++;
}
}
- else if (delta_is_new_only(to)) {
- if (!FLAG_SET(&opts, GIT_DIFF_FIND_COPIES) ||
- best_match->similarity < opts.copy_threshold)
+ else if (delta_is_new_only(tgt)) {
+ if (!FLAG_SET(&opts, GIT_DIFF_FIND_COPIES))
continue;
- to->status = GIT_DELTA_COPIED;
- to->similarity = best_match->similarity;
- memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
+ if (tgt2src_copy[t].similarity < opts.copy_threshold)
+ continue;
+
+ /* always use best possible source for copy */
+ best_match = &tgt2src_copy[t];
+ src = GIT_VECTOR_GET(&diff->deltas, best_match->idx);
+
+ tgt->status = GIT_DELTA_COPIED;
+ tgt->similarity = best_match->similarity;
+ memcpy(&tgt->old_file, &src->old_file, sizeof(tgt->old_file));
num_updates++;
}
@@ -932,12 +1039,13 @@ find_best_matches:
FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES));
cleanup:
- git__free(match_srcs);
- git__free(match_tgts);
+ git__free(tgt2src);
+ git__free(src2tgt);
+ git__free(tgt2src_copy);
- for (i = 0; i < sigcache_size; ++i) {
- if (sigcache[i] != NULL)
- opts.metric->free_signature(sigcache[i], opts.metric->payload);
+ for (t = 0; t < num_deltas * 2; ++t) {
+ if (sigcache[t] != NULL)
+ opts.metric->free_signature(sigcache[t], opts.metric->payload);
}
git__free(sigcache);
diff --git a/src/hashsig.c b/src/hashsig.c
index ab8d8b3f0..109f966ba 100644
--- a/src/hashsig.c
+++ b/src/hashsig.c
@@ -13,12 +13,15 @@ typedef uint64_t hashsig_state;
#define HASHSIG_SCALE 100
-#define HASHSIG_HASH_WINDOW 32
-#define HASHSIG_HASH_START 0
+#define HASHSIG_MAX_RUN 80
+#define HASHSIG_HASH_START 0x012345678ABCDEF0LL
#define HASHSIG_HASH_SHIFT 5
-#define HASHSIG_HASH_MASK 0x7FFFFFFF
+
+#define HASHSIG_HASH_MIX(S,CH) \
+ (S) = ((S) << HASHSIG_HASH_SHIFT) - (S) + (hashsig_state)(CH)
#define HASHSIG_HEAP_SIZE ((1 << 7) - 1)
+#define HASHSIG_HEAP_MIN_SIZE 4
typedef int (*hashsig_cmp)(const void *a, const void *b, void *);
@@ -28,14 +31,6 @@ typedef struct {
hashsig_t values[HASHSIG_HEAP_SIZE];
} hashsig_heap;
-typedef struct {
- hashsig_state state, shift_n;
- char window[HASHSIG_HASH_WINDOW];
- int win_len, win_pos, saw_lf;
-} hashsig_in_progress;
-
-#define HASHSIG_IN_PROGRESS_INIT { HASHSIG_HASH_START, 1, {0}, 0, 0, 1 }
-
struct git_hashsig {
hashsig_heap mins;
hashsig_heap maxs;
@@ -43,8 +38,8 @@ struct git_hashsig {
int considered;
};
-#define HEAP_LCHILD_OF(I) (((I)*2)+1)
-#define HEAP_RCHILD_OF(I) (((I)*2)+2)
+#define HEAP_LCHILD_OF(I) (((I)<<1)+1)
+#define HEAP_RCHILD_OF(I) (((I)<<1)+2)
#define HEAP_PARENT_OF(I) (((I)-1)>>1)
static void hashsig_heap_init(hashsig_heap *h, hashsig_cmp cmp)
@@ -115,134 +110,109 @@ static void hashsig_heap_sort(hashsig_heap *h)
static void hashsig_heap_insert(hashsig_heap *h, hashsig_t val)
{
- /* if heap is full, pop top if new element should replace it */
- if (h->size == h->asize && h->cmp(&val, &h->values[0], NULL) > 0) {
- h->size--;
- h->values[0] = h->values[h->size];
- hashsig_heap_down(h, 0);
- }
-
/* if heap is not full, insert new element */
if (h->size < h->asize) {
h->values[h->size++] = val;
hashsig_heap_up(h, h->size - 1);
}
-}
-
-GIT_INLINE(bool) hashsig_include_char(
- char ch, git_hashsig_option_t opt, int *saw_lf)
-{
- if ((opt & GIT_HASHSIG_IGNORE_WHITESPACE) && git__isspace(ch))
- return false;
-
- if (opt & GIT_HASHSIG_SMART_WHITESPACE) {
- if (ch == '\r' || (*saw_lf && git__isspace(ch)))
- return false;
- *saw_lf = (ch == '\n');
+ /* if heap is full, pop top if new element should replace it */
+ else if (h->cmp(&val, &h->values[0], NULL) > 0) {
+ h->size--;
+ h->values[0] = h->values[h->size];
+ hashsig_heap_down(h, 0);
}
- return true;
}
-static void hashsig_initial_window(
- git_hashsig *sig,
- const char **data,
- size_t size,
- hashsig_in_progress *prog)
-{
- hashsig_state state, shift_n;
- int win_len;
- const char *scan, *end;
-
- /* init until we have processed at least HASHSIG_HASH_WINDOW data */
-
- if (prog->win_len >= HASHSIG_HASH_WINDOW)
- return;
-
- state = prog->state;
- win_len = prog->win_len;
- shift_n = prog->shift_n;
-
- scan = *data;
- end = scan + size;
-
- while (scan < end && win_len < HASHSIG_HASH_WINDOW) {
- char ch = *scan++;
-
- if (!hashsig_include_char(ch, sig->opt, &prog->saw_lf))
- continue;
-
- state = (state * HASHSIG_HASH_SHIFT + ch) & HASHSIG_HASH_MASK;
-
- if (!win_len)
- shift_n = 1;
- else
- shift_n = (shift_n * HASHSIG_HASH_SHIFT) & HASHSIG_HASH_MASK;
-
- prog->window[win_len++] = ch;
- }
-
- /* insert initial hash if we just finished */
+typedef struct {
+ int use_ignores;
+ uint8_t ignore_ch[256];
+} hashsig_in_progress;
- if (win_len == HASHSIG_HASH_WINDOW) {
- hashsig_heap_insert(&sig->mins, (hashsig_t)state);
- hashsig_heap_insert(&sig->maxs, (hashsig_t)state);
- sig->considered = 1;
+static void hashsig_in_progress_init(
+ hashsig_in_progress *prog, git_hashsig *sig)
+{
+ int i;
+
+ switch (sig->opt) {
+ case GIT_HASHSIG_IGNORE_WHITESPACE:
+ for (i = 0; i < 256; ++i)
+ prog->ignore_ch[i] = git__isspace_nonlf(i);
+ prog->use_ignores = 1;
+ break;
+ case GIT_HASHSIG_SMART_WHITESPACE:
+ for (i = 0; i < 256; ++i)
+ prog->ignore_ch[i] = git__isspace(i);
+ prog->use_ignores = 1;
+ break;
+ default:
+ memset(prog, 0, sizeof(*prog));
+ break;
}
-
- prog->state = state;
- prog->win_len = win_len;
- prog->shift_n = shift_n;
-
- *data = scan;
}
+#define HASHSIG_IN_PROGRESS_INIT { 1 }
+
static int hashsig_add_hashes(
git_hashsig *sig,
- const char *data,
+ const uint8_t *data,
size_t size,
hashsig_in_progress *prog)
{
- const char *scan = data, *end = data + size;
- hashsig_state state, shift_n, rmv;
-
- if (prog->win_len < HASHSIG_HASH_WINDOW)
- hashsig_initial_window(sig, &scan, size, prog);
-
- state = prog->state;
- shift_n = prog->shift_n;
-
- /* advance window, adding new chars and removing old */
-
- for (; scan < end; ++scan) {
- char ch = *scan;
-
- if (!hashsig_include_char(ch, sig->opt, &prog->saw_lf))
- continue;
-
- rmv = shift_n * prog->window[prog->win_pos];
+ const uint8_t *scan = data, *end = data + size;
+ hashsig_state state = HASHSIG_HASH_START;
+ int use_ignores = prog->use_ignores, len;
+ uint8_t ch;
+
+ while (scan < end) {
+ state = HASHSIG_HASH_START;
+
+ for (len = 0; scan < end && len < HASHSIG_MAX_RUN; ) {
+ ch = *scan;
+
+ if (use_ignores)
+ for (; scan < end && git__isspace_nonlf(ch); ch = *scan)
+ ++scan;
+ else if (sig->opt != GIT_HASHSIG_NORMAL)
+ for (; scan < end && ch == '\r'; ch = *scan)
+ ++scan;
+
+ /* peek at next character to decide what to do next */
+ if (sig->opt == GIT_HASHSIG_SMART_WHITESPACE)
+ use_ignores = (ch == '\n');
+
+ if (scan >= end)
+ break;
+ ++scan;
+
+ /* check run terminator */
+ if (ch == '\n' || ch == '\0')
+ break;
+
+ ++len;
+ HASHSIG_HASH_MIX(state, ch);
+ }
- state = (state - rmv) & HASHSIG_HASH_MASK;
- state = (state * HASHSIG_HASH_SHIFT) & HASHSIG_HASH_MASK;
- state = (state + ch) & HASHSIG_HASH_MASK;
+ if (len > 0) {
+ hashsig_heap_insert(&sig->mins, (hashsig_t)state);
+ hashsig_heap_insert(&sig->maxs, (hashsig_t)state);
- hashsig_heap_insert(&sig->mins, (hashsig_t)state);
- hashsig_heap_insert(&sig->maxs, (hashsig_t)state);
- sig->considered++;
+ sig->considered++;
- prog->window[prog->win_pos] = ch;
- prog->win_pos = (prog->win_pos + 1) % HASHSIG_HASH_WINDOW;
+ while (scan < end && (*scan == '\n' || !*scan))
+ ++scan;
+ }
}
- prog->state = state;
+ prog->use_ignores = use_ignores;
return 0;
}
static int hashsig_finalize_hashes(git_hashsig *sig)
{
- if (sig->mins.size < HASHSIG_HEAP_SIZE) {
+ if (sig->mins.size < HASHSIG_HEAP_MIN_SIZE) {
giterr_set(GITERR_INVALID,
"File too small for similarity signature calculation");
return GIT_EBUFS;
@@ -274,11 +244,13 @@ int git_hashsig_create(
git_hashsig_option_t opts)
{
int error;
- hashsig_in_progress prog = HASHSIG_IN_PROGRESS_INIT;
+ hashsig_in_progress prog;
git_hashsig *sig = hashsig_alloc(opts);
GITERR_CHECK_ALLOC(sig);
- error = hashsig_add_hashes(sig, buf, buflen, &prog);
+ hashsig_in_progress_init(&prog, sig);
+
+ error = hashsig_add_hashes(sig, (const uint8_t *)buf, buflen, &prog);
if (!error)
error = hashsig_finalize_hashes(sig);
@@ -296,10 +268,10 @@ int git_hashsig_create_fromfile(
const char *path,
git_hashsig_option_t opts)
{
- char buf[4096];
+ uint8_t buf[0x1000];
ssize_t buflen = 0;
int error = 0, fd;
- hashsig_in_progress prog = HASHSIG_IN_PROGRESS_INIT;
+ hashsig_in_progress prog;
git_hashsig *sig = hashsig_alloc(opts);
GITERR_CHECK_ALLOC(sig);
@@ -308,6 +280,8 @@ int git_hashsig_create_fromfile(
return fd;
}
+ hashsig_in_progress_init(&prog, sig);
+
while (!error) {
if ((buflen = p_read(fd, buf, sizeof(buf))) <= 0) {
if ((error = (int)buflen) < 0)
@@ -362,6 +336,12 @@ static int hashsig_heap_compare(const hashsig_heap *a, const hashsig_heap *b)
int git_hashsig_compare(const git_hashsig *a, const git_hashsig *b)
{
- return (hashsig_heap_compare(&a->mins, &b->mins) +
- hashsig_heap_compare(&a->maxs, &b->maxs)) / 2;
+ /* if we have fewer than the maximum number of elements, then just use
+ * one array since the two arrays will be the same
+ */
+ if (a->mins.size < HASHSIG_HEAP_SIZE)
+ return hashsig_heap_compare(&a->mins, &b->mins);
+ else
+ return (hashsig_heap_compare(&a->mins, &b->mins) +
+ hashsig_heap_compare(&a->maxs, &b->maxs)) / 2;
}
diff --git a/src/index.c b/src/index.c
index 0610eb5b9..cbdd43bdc 100644
--- a/src/index.c
+++ b/src/index.c
@@ -16,6 +16,7 @@
#include "iterator.h"
#include "pathspec.h"
#include "ignore.h"
+#include "blob.h"
#include "git2/odb.h"
#include "git2/oid.h"
@@ -604,42 +605,23 @@ int git_index_entry__cmp_icase(const void *a, const void *b)
return strcasecmp(entry_a->path, entry_b->path);
}
-static int index_entry_init(git_index_entry **entry_out, git_index *index, const char *rel_path)
+static int index_entry_init(
+ git_index_entry **entry_out, git_index *index, const char *rel_path)
{
+ int error = 0;
git_index_entry *entry = NULL;
struct stat st;
git_oid oid;
- const char *workdir;
- git_buf full_path = GIT_BUF_INIT;
- int error;
if (INDEX_OWNER(index) == NULL)
return create_index_error(-1,
"Could not initialize index entry. "
"Index is not backed up by an existing repository.");
- workdir = git_repository_workdir(INDEX_OWNER(index));
-
- if (!workdir)
- return create_index_error(GIT_EBAREREPO,
- "Could not initialize index entry. Repository is bare");
-
- if ((error = git_buf_joinpath(&full_path, workdir, rel_path)) < 0)
- return error;
-
- if ((error = git_path_lstat(full_path.ptr, &st)) < 0) {
- git_buf_free(&full_path);
- return error;
- }
-
- git_buf_free(&full_path); /* done with full path */
-
- /* There is no need to validate the rel_path here, since it will be
- * immediately validated by the call to git_blob_create_fromfile.
- */
-
- /* write the blob to disk and get the oid */
- if ((error = git_blob_create_fromworkdir(&oid, INDEX_OWNER(index), rel_path)) < 0)
+ /* write the blob to disk and get the oid and stat info */
+ error = git_blob__create_from_paths(
+ &oid, &st, INDEX_OWNER(index), NULL, rel_path, 0, true);
+ if (error < 0)
return error;
entry = git__calloc(1, sizeof(git_index_entry));
diff --git a/src/util.h b/src/util.h
index a97c9bf39..ed9624770 100644
--- a/src/util.h
+++ b/src/util.h
@@ -294,6 +294,11 @@ GIT_INLINE(bool) git__isspace(int c)
return (c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r' || c == '\v' || c == 0x85 /* Unicode CR+LF */);
}
+GIT_INLINE(bool) git__isspace_nonlf(int c)
+{
+ return (c == ' ' || c == '\t' || c == '\f' || c == '\r' || c == '\v' || c == 0x85 /* Unicode CR+LF */);
+}
+
GIT_INLINE(bool) git__iswildcard(int c)
{
return (c == '*' || c == '?' || c == '[');