summaryrefslogtreecommitdiff
path: root/merge-recursive.c
diff options
context:
space:
mode:
authorElijah Newren <newren@gmail.com>2018-02-14 10:51:55 -0800
committerJunio C Hamano <gitster@pobox.com>2018-02-14 13:02:52 -0800
commit8383408dc79300211778b615b353b3463ccc369a (patch)
tree5348f3e2018c0299c99800bdafef9db289ab56ac /merge-recursive.c
parent84a548dedd6520b73eb8764a8bebd8ede81620c8 (diff)
downloadgit-8383408dc79300211778b615b353b3463ccc369a.tar.gz
merge-recursive: add get_directory_renames()
This populates a set of directory renames for us. The set of directory renames is not yet used, but will be in subsequent commits. Note that the use of a string_list for possible_new_dirs in the new dir_rename_entry struct implies an O(n^2) algorithm; however, in practice I expect the number of distinct directories that files were renamed into from a single original directory to be O(1). My guess is that n has a mode of 1 and a mean of less than 2, so, for now, string_list seems good enough for possible_new_dirs. Reviewed-by: Stefan Beller <sbeller@google.com> Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'merge-recursive.c')
-rw-r--r--merge-recursive.c224
1 files changed, 221 insertions, 3 deletions
diff --git a/merge-recursive.c b/merge-recursive.c
index 76c7a56a2d..7d3c4f4151 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -49,6 +49,44 @@ static unsigned int path_hash(const char *path)
return ignore_case ? strihash(path) : strhash(path);
}
+static struct dir_rename_entry *dir_rename_find_entry(struct hashmap *hashmap,
+ char *dir)
+{
+ struct dir_rename_entry key;
+
+ if (dir == NULL)
+ return NULL;
+ hashmap_entry_init(&key, strhash(dir));
+ key.dir = dir;
+ return hashmap_get(hashmap, &key, NULL);
+}
+
+static int dir_rename_cmp(const void *unused_cmp_data,
+ const void *entry,
+ const void *entry_or_key,
+ const void *unused_keydata)
+{
+ const struct dir_rename_entry *e1 = entry;
+ const struct dir_rename_entry *e2 = entry_or_key;
+
+ return strcmp(e1->dir, e2->dir);
+}
+
+static void dir_rename_init(struct hashmap *map)
+{
+ hashmap_init(map, dir_rename_cmp, NULL, 0);
+}
+
+static void dir_rename_entry_init(struct dir_rename_entry *entry,
+ char *directory)
+{
+ hashmap_entry_init(entry, strhash(directory));
+ entry->dir = directory;
+ entry->non_unique_new_dir = 0;
+ strbuf_init(&entry->new_dir, 0);
+ string_list_init(&entry->possible_new_dirs, 0);
+}
+
static void flush_output(struct merge_options *o)
{
if (o->buffer_output < 2 && o->obuf.len) {
@@ -1347,6 +1385,169 @@ static struct diff_queue_struct *get_diffpairs(struct merge_options *o,
return ret;
}
+static void get_renamed_dir_portion(const char *old_path, const char *new_path,
+ char **old_dir, char **new_dir)
+{
+ char *end_of_old, *end_of_new;
+ int old_len, new_len;
+
+ *old_dir = NULL;
+ *new_dir = NULL;
+
+ /*
+ * For
+ * "a/b/c/d/e/foo.c" -> "a/b/some/thing/else/e/foo.c"
+ * the "e/foo.c" part is the same, we just want to know that
+ * "a/b/c/d" was renamed to "a/b/some/thing/else"
+ * so, for this example, this function returns "a/b/c/d" in
+ * *old_dir and "a/b/some/thing/else" in *new_dir.
+ *
+ * Also, if the basename of the file changed, we don't care. We
+ * want to know which portion of the directory, if any, changed.
+ */
+ end_of_old = strrchr(old_path, '/');
+ end_of_new = strrchr(new_path, '/');
+
+ if (end_of_old == NULL || end_of_new == NULL)
+ return;
+ while (*--end_of_new == *--end_of_old &&
+ end_of_old != old_path &&
+ end_of_new != new_path)
+ ; /* Do nothing; all in the while loop */
+ /*
+ * We've found the first non-matching character in the directory
+ * paths. That means the current directory we were comparing
+ * represents the rename. Move end_of_old and end_of_new back
+ * to the full directory name.
+ */
+ if (*end_of_old == '/')
+ end_of_old++;
+ if (*end_of_old != '/')
+ end_of_new++;
+ end_of_old = strchr(end_of_old, '/');
+ end_of_new = strchr(end_of_new, '/');
+
+ /*
+ * It may have been the case that old_path and new_path were the same
+ * directory all along. Don't claim a rename if they're the same.
+ */
+ old_len = end_of_old - old_path;
+ new_len = end_of_new - new_path;
+
+ if (old_len != new_len || strncmp(old_path, new_path, old_len)) {
+ *old_dir = xstrndup(old_path, old_len);
+ *new_dir = xstrndup(new_path, new_len);
+ }
+}
+
+static struct hashmap *get_directory_renames(struct diff_queue_struct *pairs,
+ struct tree *tree)
+{
+ struct hashmap *dir_renames;
+ struct hashmap_iter iter;
+ struct dir_rename_entry *entry;
+ int i;
+
+ /*
+ * Typically, we think of a directory rename as all files from a
+ * certain directory being moved to a target directory. However,
+ * what if someone first moved two files from the original
+ * directory in one commit, and then renamed the directory
+ * somewhere else in a later commit? At merge time, we just know
+ * that files from the original directory went to two different
+ * places, and that the bulk of them ended up in the same place.
+ * We want each directory rename to represent where the bulk of the
+ * files from that directory end up; this function exists to find
+ * where the bulk of the files went.
+ *
+ * The first loop below simply iterates through the list of file
+ * renames, finding out how often each directory rename pair
+ * possibility occurs.
+ */
+ dir_renames = xmalloc(sizeof(struct hashmap));
+ dir_rename_init(dir_renames);
+ for (i = 0; i < pairs->nr; ++i) {
+ struct string_list_item *item;
+ int *count;
+ struct diff_filepair *pair = pairs->queue[i];
+ char *old_dir, *new_dir;
+
+ /* File not part of directory rename if it wasn't renamed */
+ if (pair->status != 'R')
+ continue;
+
+ get_renamed_dir_portion(pair->one->path, pair->two->path,
+ &old_dir, &new_dir);
+ if (!old_dir)
+ /* Directory didn't change at all; ignore this one. */
+ continue;
+
+ entry = dir_rename_find_entry(dir_renames, old_dir);
+ if (!entry) {
+ entry = xmalloc(sizeof(struct dir_rename_entry));
+ dir_rename_entry_init(entry, old_dir);
+ hashmap_put(dir_renames, entry);
+ } else {
+ free(old_dir);
+ }
+ item = string_list_lookup(&entry->possible_new_dirs, new_dir);
+ if (!item) {
+ item = string_list_insert(&entry->possible_new_dirs,
+ new_dir);
+ item->util = xcalloc(1, sizeof(int));
+ } else {
+ free(new_dir);
+ }
+ count = item->util;
+ *count += 1;
+ }
+
+ /*
+ * For each directory with files moved out of it, we find out which
+ * target directory received the most files so we can declare it to
+ * be the "winning" target location for the directory rename. This
+ * winner gets recorded in new_dir. If there is no winner
+ * (multiple target directories received the same number of files),
+ * we set non_unique_new_dir. Once we've determined the winner (or
+ * that there is no winner), we no longer need possible_new_dirs.
+ */
+ hashmap_iter_init(dir_renames, &iter);
+ while ((entry = hashmap_iter_next(&iter))) {
+ int max = 0;
+ int bad_max = 0;
+ char *best = NULL;
+
+ for (i = 0; i < entry->possible_new_dirs.nr; i++) {
+ int *count = entry->possible_new_dirs.items[i].util;
+
+ if (*count == max)
+ bad_max = max;
+ else if (*count > max) {
+ max = *count;
+ best = entry->possible_new_dirs.items[i].string;
+ }
+ }
+ if (bad_max == max)
+ entry->non_unique_new_dir = 1;
+ else {
+ assert(entry->new_dir.len == 0);
+ strbuf_addstr(&entry->new_dir, best);
+ }
+ /*
+ * The relevant directory sub-portion of the original full
+ * filepaths were xstrndup'ed before inserting into
+ * possible_new_dirs, and instead of manually iterating the
+ * list and free'ing each, just lie and tell
+ * possible_new_dirs that it did the strdup'ing so that it
+ * will free them for us.
+ */
+ entry->possible_new_dirs.strdup_strings = 1;
+ string_list_clear(&entry->possible_new_dirs, 1);
+ }
+
+ return dir_renames;
+}
+
/*
* Get information of all renames which occurred in 'pairs', making use of
* any implicit directory renames inferred from the other side of history.
@@ -1658,8 +1859,21 @@ struct rename_info {
struct string_list *merge_renames;
};
-static void initial_cleanup_rename(struct diff_queue_struct *pairs)
+static void initial_cleanup_rename(struct diff_queue_struct *pairs,
+ struct hashmap *dir_renames)
{
+ struct hashmap_iter iter;
+ struct dir_rename_entry *e;
+
+ hashmap_iter_init(dir_renames, &iter);
+ while ((e = hashmap_iter_next(&iter))) {
+ free(e->dir);
+ strbuf_release(&e->new_dir);
+ /* possible_new_dirs already cleared in get_directory_renames */
+ }
+ hashmap_free(dir_renames, 1);
+ free(dir_renames);
+
free(pairs->queue);
free(pairs);
}
@@ -1672,6 +1886,7 @@ static int handle_renames(struct merge_options *o,
struct rename_info *ri)
{
struct diff_queue_struct *head_pairs, *merge_pairs;
+ struct hashmap *dir_re_head, *dir_re_merge;
int clean;
ri->head_renames = NULL;
@@ -1683,6 +1898,9 @@ static int handle_renames(struct merge_options *o,
head_pairs = get_diffpairs(o, common, head);
merge_pairs = get_diffpairs(o, common, merge);
+ dir_re_head = get_directory_renames(head_pairs, head);
+ dir_re_merge = get_directory_renames(merge_pairs, merge);
+
ri->head_renames = get_renames(o, head_pairs, head,
common, head, merge, entries);
ri->merge_renames = get_renames(o, merge_pairs, merge,
@@ -1694,8 +1912,8 @@ static int handle_renames(struct merge_options *o,
* data structures are still needed and referenced in
* process_entry(). But there are a few things we can free now.
*/
- initial_cleanup_rename(head_pairs);
- initial_cleanup_rename(merge_pairs);
+ initial_cleanup_rename(head_pairs, dir_re_head);
+ initial_cleanup_rename(merge_pairs, dir_re_merge);
return clean;
}