diff options
| -rw-r--r-- | Makefile | 6 | ||||
| -rw-r--r-- | cache.h | 15 | ||||
| -rw-r--r-- | combine-diff.c | 168 | ||||
| -rw-r--r-- | compat/win32/alloca.h (renamed from compat/vcbuild/include/alloca.h) | 0 | ||||
| -rw-r--r-- | config.mak.uname | 11 | ||||
| -rw-r--r-- | configure.ac | 8 | ||||
| -rw-r--r-- | diff.c | 2 | ||||
| -rw-r--r-- | diff.h | 11 | ||||
| -rw-r--r-- | git-compat-util.h | 8 | ||||
| -rw-r--r-- | tree-diff.c | 664 | 
10 files changed, 723 insertions, 170 deletions
| @@ -30,6 +30,8 @@ all::  # Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in  # /foo/bar/include and /foo/bar/lib directories.  # +# Define HAVE_ALLOCA_H if you have working alloca(3) defined in that header. +#  # Define NO_CURL if you do not have libcurl installed.  git-http-fetch and  # git-http-push are not built, and you cannot use http:// and https://  # transports (neither smart nor dumb). @@ -1111,6 +1113,10 @@ ifdef USE_LIBPCRE  	EXTLIBS += -lpcre  endif +ifdef HAVE_ALLOCA_H +	BASIC_CFLAGS += -DHAVE_ALLOCA_H +endif +  ifdef NO_CURL  	BASIC_CFLAGS += -DNO_CURL  	REMOTE_CURL_PRIMARY = @@ -75,6 +75,21 @@ unsigned long git_deflate_bound(git_zstream *, unsigned long);  #define S_ISGITLINK(m)	(((m) & S_IFMT) == S_IFGITLINK)  /* + * Some mode bits are also used internally for computations. + * + * They *must* not overlap with any valid modes, and they *must* not be emitted + * to outside world - i.e. appear on disk or network. In other words, it's just + * temporary fields, which we internally use, but they have to stay in-house. + * + * ( such approach is valid, as standard S_IF* fits into 16 bits, and in Git + *   codebase mode is `unsigned int` which is assumed to be at least 32 bits ) + */ + +/* used internally in tree-diff */ +#define S_DIFFTREE_IFXMIN_NEQ	0x80000000 + + +/*   * Intensive research over the course of many years has shown that   * port 9418 is totally unused by anything else. Or   * diff --git a/combine-diff.c b/combine-diff.c index 24ca7e2334..12764fb733 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -1301,6 +1301,81 @@ static const char *path_path(void *obj)  	return path->path;  } + +/* find set of paths that every parent touches */ +static struct combine_diff_path *find_paths_generic(const unsigned char *sha1, +	const struct sha1_array *parents, struct diff_options *opt) +{ +	struct combine_diff_path *paths = NULL; +	int i, num_parent = parents->nr; + +	int output_format = opt->output_format; +	const char *orderfile = opt->orderfile; + +	opt->output_format = DIFF_FORMAT_NO_OUTPUT; +	/* tell diff_tree to emit paths in sorted (=tree) order */ +	opt->orderfile = NULL; + +	/* D(A,P1...Pn) = D(A,P1) ^ ... ^ D(A,Pn)  (wrt paths) */ +	for (i = 0; i < num_parent; i++) { +		/* +		 * show stat against the first parent even when doing +		 * combined diff. +		 */ +		int stat_opt = (output_format & +				(DIFF_FORMAT_NUMSTAT|DIFF_FORMAT_DIFFSTAT)); +		if (i == 0 && stat_opt) +			opt->output_format = stat_opt; +		else +			opt->output_format = DIFF_FORMAT_NO_OUTPUT; +		diff_tree_sha1(parents->sha1[i], sha1, "", opt); +		diffcore_std(opt); +		paths = intersect_paths(paths, i, num_parent); + +		/* if showing diff, show it in requested order */ +		if (opt->output_format != DIFF_FORMAT_NO_OUTPUT && +		    orderfile) { +			diffcore_order(orderfile); +		} + +		diff_flush(opt); +	} + +	opt->output_format = output_format; +	opt->orderfile = orderfile; +	return paths; +} + + +/* + * find set of paths that everybody touches, assuming diff is run without + * rename/copy detection, etc, comparing all trees simultaneously (= faster). + */ +static struct combine_diff_path *find_paths_multitree( +	const unsigned char *sha1, const struct sha1_array *parents, +	struct diff_options *opt) +{ +	int i, nparent = parents->nr; +	const unsigned char **parents_sha1; +	struct combine_diff_path paths_head; +	struct strbuf base; + +	parents_sha1 = xmalloc(nparent * sizeof(parents_sha1[0])); +	for (i = 0; i < nparent; i++) +		parents_sha1[i] = parents->sha1[i]; + +	/* fake list head, so worker can assume it is non-NULL */ +	paths_head.next = NULL; + +	strbuf_init(&base, PATH_MAX); +	diff_tree_paths(&paths_head, sha1, parents_sha1, nparent, &base, opt); + +	strbuf_release(&base); +	free(parents_sha1); +	return paths_head.next; +} + +  void diff_tree_combined(const unsigned char *sha1,  			const struct sha1_array *parents,  			int dense, @@ -1308,49 +1383,82 @@ void diff_tree_combined(const unsigned char *sha1,  {  	struct diff_options *opt = &rev->diffopt;  	struct diff_options diffopts; -	struct combine_diff_path *p, *paths = NULL; +	struct combine_diff_path *p, *paths;  	int i, num_paths, needsep, show_log_first, num_parent = parents->nr; +	int need_generic_pathscan; + +	/* nothing to do, if no parents */ +	if (!num_parent) +		return; + +	show_log_first = !!rev->loginfo && !rev->no_commit_id; +	needsep = 0; +	if (show_log_first) { +		show_log(rev); + +		if (rev->verbose_header && opt->output_format) +			printf("%s%c", diff_line_prefix(opt), +			       opt->line_termination); +	}  	diffopts = *opt;  	copy_pathspec(&diffopts.pathspec, &opt->pathspec); -	diffopts.output_format = DIFF_FORMAT_NO_OUTPUT;  	DIFF_OPT_SET(&diffopts, RECURSIVE);  	DIFF_OPT_CLR(&diffopts, ALLOW_EXTERNAL); -	/* tell diff_tree to emit paths in sorted (=tree) order */ -	diffopts.orderfile = NULL; -	show_log_first = !!rev->loginfo && !rev->no_commit_id; -	needsep = 0; -	/* find set of paths that everybody touches */ -	for (i = 0; i < num_parent; i++) { -		/* show stat against the first parent even +	/* find set of paths that everybody touches +	 * +	 * NOTE +	 * +	 * Diffcore transformations are bound to diff_filespec and logic +	 * comparing two entries - i.e. they do not apply directly to combine +	 * diff. +	 * +	 * If some of such transformations is requested - we launch generic +	 * path scanning, which works significantly slower compared to +	 * simultaneous all-trees-in-one-go scan in find_paths_multitree(). +	 * +	 * TODO some of the filters could be ported to work on +	 * combine_diff_paths - i.e. all functionality that skips paths, so in +	 * theory, we could end up having only multitree path scanning. +	 * +	 * NOTE please keep this semantically in sync with diffcore_std() +	 */ +	need_generic_pathscan = opt->skip_stat_unmatch	|| +			DIFF_OPT_TST(opt, FOLLOW_RENAMES)	|| +			opt->break_opt != -1	|| +			opt->detect_rename	|| +			opt->pickaxe		|| +			opt->filter; + + +	if (need_generic_pathscan) { +		/* +		 * NOTE generic case also handles --stat, as it computes +		 * diff(sha1,parent_i) for all i to do the job, specifically +		 * for parent0. +		 */ +		paths = find_paths_generic(sha1, parents, &diffopts); +	} +	else { +		int stat_opt; +		paths = find_paths_multitree(sha1, parents, &diffopts); + +		/* +		 * show stat against the first parent even  		 * when doing combined diff.  		 */ -		int stat_opt = (opt->output_format & +		stat_opt = (opt->output_format &  				(DIFF_FORMAT_NUMSTAT|DIFF_FORMAT_DIFFSTAT)); -		if (i == 0 && stat_opt) +		if (stat_opt) {  			diffopts.output_format = stat_opt; -		else -			diffopts.output_format = DIFF_FORMAT_NO_OUTPUT; -		diff_tree_sha1(parents->sha1[i], sha1, "", &diffopts); -		diffcore_std(&diffopts); -		paths = intersect_paths(paths, i, num_parent); -		if (show_log_first && i == 0) { -			show_log(rev); - -			if (rev->verbose_header && opt->output_format) -				printf("%s%c", diff_line_prefix(opt), -				       opt->line_termination); +			diff_tree_sha1(parents->sha1[0], sha1, "", &diffopts); +			diffcore_std(&diffopts); +			if (opt->orderfile) +				diffcore_order(opt->orderfile); +			diff_flush(&diffopts);  		} - -		/* if showing diff, show it in requested order */ -		if (diffopts.output_format != DIFF_FORMAT_NO_OUTPUT && -		    opt->orderfile) { -			diffcore_order(opt->orderfile); -		} - -		diff_flush(&diffopts);  	}  	/* find out number of surviving paths */ diff --git a/compat/vcbuild/include/alloca.h b/compat/win32/alloca.h index c0d7985b7e..c0d7985b7e 100644 --- a/compat/vcbuild/include/alloca.h +++ b/compat/win32/alloca.h diff --git a/config.mak.uname b/config.mak.uname index 23a8803656..97acf8fea6 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -28,6 +28,7 @@ ifeq ($(uname_S),OSF1)  	NO_NSEC = YesPlease  endif  ifeq ($(uname_S),Linux) +	HAVE_ALLOCA_H = YesPlease  	NO_STRLCPY = YesPlease  	NO_MKSTEMPS = YesPlease  	HAVE_PATHS_H = YesPlease @@ -35,6 +36,7 @@ ifeq ($(uname_S),Linux)  	HAVE_DEV_TTY = YesPlease  endif  ifeq ($(uname_S),GNU/kFreeBSD) +	HAVE_ALLOCA_H = YesPlease  	NO_STRLCPY = YesPlease  	NO_MKSTEMPS = YesPlease  	HAVE_PATHS_H = YesPlease @@ -103,6 +105,7 @@ ifeq ($(uname_S),SunOS)  	NEEDS_NSL = YesPlease  	SHELL_PATH = /bin/bash  	SANE_TOOL_PATH = /usr/xpg6/bin:/usr/xpg4/bin +	HAVE_ALLOCA_H = YesPlease  	NO_STRCASESTR = YesPlease  	NO_MEMMEM = YesPlease  	NO_MKDTEMP = YesPlease @@ -145,7 +148,7 @@ ifeq ($(uname_S),SunOS)  	endif  	INSTALL = /usr/ucb/install  	TAR = gtar -	BASIC_CFLAGS += -D__EXTENSIONS__ -D__sun__ -DHAVE_ALLOCA_H +	BASIC_CFLAGS += -D__EXTENSIONS__ -D__sun__  endif  ifeq ($(uname_O),Cygwin)  	ifeq ($(shell expr "$(uname_R)" : '1\.[1-6]\.'),4) @@ -165,6 +168,7 @@ ifeq ($(uname_O),Cygwin)  	else  		NO_REGEX = UnfortunatelyYes  	endif +	HAVE_ALLOCA_H = YesPlease  	NEEDS_LIBICONV = YesPlease  	NO_FAST_WORKING_DIRECTORY = UnfortunatelyYes  	NO_ST_BLOCKS_IN_STRUCT_STAT = YesPlease @@ -239,6 +243,7 @@ ifeq ($(uname_S),AIX)  endif  ifeq ($(uname_S),GNU)  	# GNU/Hurd +	HAVE_ALLOCA_H = YesPlease  	NO_STRLCPY = YesPlease  	NO_MKSTEMPS = YesPlease  	HAVE_PATHS_H = YesPlease @@ -313,6 +318,7 @@ endif  ifeq ($(uname_S),Windows)  	GIT_VERSION := $(GIT_VERSION).MSVC  	pathsep = ; +	HAVE_ALLOCA_H = YesPlease  	NO_PREAD = YesPlease  	NEEDS_CRYPTO_WITH_SSL = YesPlease  	NO_LIBGEN_H = YesPlease @@ -357,7 +363,7 @@ ifeq ($(uname_S),Windows)  	COMPAT_OBJS = compat/msvc.o compat/winansi.o \  		compat/win32/pthread.o compat/win32/syslog.o \  		compat/win32/dirent.o -	COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DNOGDI -DHAVE_STRING_H -DHAVE_ALLOCA_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\" +	COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DNOGDI -DHAVE_STRING_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\"  	BASIC_LDFLAGS = -IGNORE:4217 -IGNORE:4049 -NOLOGO -SUBSYSTEM:CONSOLE -NODEFAULTLIB:MSVCRT.lib  	EXTLIBS = user32.lib advapi32.lib shell32.lib wininet.lib ws2_32.lib invalidcontinue.obj  	PTHREAD_LIBS = @@ -465,6 +471,7 @@ ifeq ($(uname_S),NONSTOP_KERNEL)  endif  ifneq (,$(findstring MINGW,$(uname_S)))  	pathsep = ; +	HAVE_ALLOCA_H = YesPlease  	NO_PREAD = YesPlease  	NEEDS_CRYPTO_WITH_SSL = YesPlease  	NO_LIBGEN_H = YesPlease diff --git a/configure.ac b/configure.ac index b7112542b4..4b1ae7c3c9 100644 --- a/configure.ac +++ b/configure.ac @@ -272,6 +272,14 @@ AS_HELP_STRING([],           [ARG can be also prefix for libpcre library and hea  	GIT_CONF_SUBST([LIBPCREDIR])      fi)  # +# Define HAVE_ALLOCA_H if you have working alloca(3) defined in that header. +AC_FUNC_ALLOCA +case $ac_cv_working_alloca_h in +    yes)    HAVE_ALLOCA_H=YesPlease;; +    *)      HAVE_ALLOCA_H='';; +esac +GIT_CONF_SUBST([HAVE_ALLOCA_H]) +#  # Define NO_CURL if you do not have curl installed.  git-http-pull and  # git-http-push are not built, and you cannot use http:// and https://  # transports. @@ -3205,6 +3205,7 @@ void diff_setup(struct diff_options *options)  	options->context = diff_context_default;  	DIFF_OPT_SET(options, RENAME_EMPTY); +	/* pathchange left =NULL by default */  	options->change = diff_change;  	options->add_remove = diff_addremove;  	options->use_color = diff_use_color_default; @@ -4749,6 +4750,7 @@ void diffcore_fix_diff_index(struct diff_options *options)  void diffcore_std(struct diff_options *options)  { +	/* NOTE please keep the following in sync with diff_tree_combined() */  	if (options->skip_stat_unmatch)  		diffcore_skip_stat_unmatch(options);  	if (!options->found_follow) { @@ -15,6 +15,10 @@ struct diff_filespec;  struct userdiff_driver;  struct sha1_array;  struct commit; +struct combine_diff_path; + +typedef int (*pathchange_fn_t)(struct diff_options *options, +		 struct combine_diff_path *path);  typedef void (*change_fn_t)(struct diff_options *options,  		 unsigned old_mode, unsigned new_mode, @@ -157,6 +161,7 @@ struct diff_options {  	int close_file;  	struct pathspec pathspec; +	pathchange_fn_t pathchange;  	change_fn_t change;  	add_remove_fn_t add_remove;  	diff_format_fn_t format_callback; @@ -189,8 +194,10 @@ const char *diff_line_prefix(struct diff_options *);  extern const char mime_boundary_leader[]; -extern int diff_tree(struct tree_desc *t1, struct tree_desc *t2, -		     const char *base, struct diff_options *opt); +extern struct combine_diff_path *diff_tree_paths( +	struct combine_diff_path *p, const unsigned char *sha1, +	const unsigned char **parent_sha1, int nparent, +	struct strbuf *base, struct diff_options *opt);  extern int diff_tree_sha1(const unsigned char *old, const unsigned char *new,  			  const char *base, struct diff_options *opt);  extern int diff_root_tree_sha1(const unsigned char *new, const char *base, diff --git a/git-compat-util.h b/git-compat-util.h index f6d3a46622..7fe1ffda08 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -521,6 +521,14 @@ extern void release_pack_memory(size_t);  typedef void (*try_to_free_t)(size_t);  extern try_to_free_t set_try_to_free_routine(try_to_free_t); +#ifdef HAVE_ALLOCA_H +# include <alloca.h> +# define xalloca(size)      (alloca(size)) +# define xalloca_free(p)    do {} while (0) +#else +# define xalloca(size)      (xmalloc(size)) +# define xalloca_free(p)    (free(p)) +#endif  extern char *xstrdup(const char *str);  extern void *xmalloc(size_t size);  extern void *xmallocz(size_t size); diff --git a/tree-diff.c b/tree-diff.c index 11c3550177..e7b378c8b2 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -6,121 +6,293 @@  #include "diffcore.h"  #include "tree.h" -static void show_entry(struct diff_options *opt, const char *prefix, -		       struct tree_desc *desc, struct strbuf *base); +/* + * internal mode marker, saying a tree entry != entry of tp[imin] + * (see ll_diff_tree_paths for what it means there) + * + * we will update/use/emit entry for diff only with it unset. + */ +#define S_IFXMIN_NEQ	S_DIFFTREE_IFXMIN_NEQ -static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, -			      struct strbuf *base, struct diff_options *opt) -{ -	unsigned mode1, mode2; -	const char *path1, *path2; -	const unsigned char *sha1, *sha2; -	int cmp, pathlen1, pathlen2; -	int old_baselen = base->len; -	sha1 = tree_entry_extract(t1, &path1, &mode1); -	sha2 = tree_entry_extract(t2, &path2, &mode2); +static struct combine_diff_path *ll_diff_tree_paths( +	struct combine_diff_path *p, const unsigned char *sha1, +	const unsigned char **parents_sha1, int nparent, +	struct strbuf *base, struct diff_options *opt); +static int ll_diff_tree_sha1(const unsigned char *old, const unsigned char *new, +			     struct strbuf *base, struct diff_options *opt); + +/* + * Compare two tree entries, taking into account only path/S_ISDIR(mode), + * but not their sha1's. + * + * NOTE files and directories *always* compare differently, even when having + *      the same name - thanks to base_name_compare(). + * + * NOTE empty (=invalid) descriptor(s) take part in comparison as +infty, + *      so that they sort *after* valid tree entries. + * + *      Due to this convention, if trees are scanned in sorted order, all + *      non-empty descriptors will be processed first. + */ +static int tree_entry_pathcmp(struct tree_desc *t1, struct tree_desc *t2) +{ +	struct name_entry *e1, *e2; +	int cmp; -	pathlen1 = tree_entry_len(&t1->entry); -	pathlen2 = tree_entry_len(&t2->entry); -	cmp = base_name_compare(path1, pathlen1, mode1, path2, pathlen2, mode2); -	if (cmp < 0) { -		show_entry(opt, "-", t1, base); +	/* empty descriptors sort after valid tree entries */ +	if (!t1->size) +		return t2->size ? 1 : 0; +	else if (!t2->size)  		return -1; -	} -	if (cmp > 0) { -		show_entry(opt, "+", t2, base); -		return 1; -	} -	if (!DIFF_OPT_TST(opt, FIND_COPIES_HARDER) && !hashcmp(sha1, sha2) && mode1 == mode2) -		return 0; -	/* -	 * If the filemode has changed to/from a directory from/to a regular -	 * file, we need to consider it a remove and an add. -	 */ -	if (S_ISDIR(mode1) != S_ISDIR(mode2)) { -		show_entry(opt, "-", t1, base); -		show_entry(opt, "+", t2, base); -		return 0; -	} +	e1 = &t1->entry; +	e2 = &t2->entry; +	cmp = base_name_compare(e1->path, tree_entry_len(e1), e1->mode, +				e2->path, tree_entry_len(e2), e2->mode); +	return cmp; +} -	strbuf_add(base, path1, pathlen1); -	if (DIFF_OPT_TST(opt, RECURSIVE) && S_ISDIR(mode1)) { -		if (DIFF_OPT_TST(opt, TREE_IN_RECURSIVE)) { -			opt->change(opt, mode1, mode2, -				    sha1, sha2, 1, 1, base->buf, 0, 0); + +/* + * convert path -> opt->diff_*() callbacks + * + * emits diff to first parent only, and tells diff tree-walker that we are done + * with p and it can be freed. + */ +static int emit_diff_first_parent_only(struct diff_options *opt, struct combine_diff_path *p) +{ +	struct combine_diff_parent *p0 = &p->parent[0]; +	if (p->mode && p0->mode) { +		opt->change(opt, p0->mode, p->mode, p0->sha1, p->sha1, +			1, 1, p->path, 0, 0); +	} +	else { +		const unsigned char *sha1; +		unsigned int mode; +		int addremove; + +		if (p->mode) { +			addremove = '+'; +			sha1 = p->sha1; +			mode = p->mode; +		} else { +			addremove = '-'; +			sha1 = p0->sha1; +			mode = p0->mode;  		} -		strbuf_addch(base, '/'); -		diff_tree_sha1(sha1, sha2, base->buf, opt); -	} else { -		opt->change(opt, mode1, mode2, sha1, sha2, 1, 1, base->buf, 0, 0); + +		opt->add_remove(opt, addremove, mode, sha1, 1, p->path, 0);  	} -	strbuf_setlen(base, old_baselen); -	return 0; + +	return 0;	/* we are done with p */  } -/* A whole sub-tree went away or appeared */ -static void show_tree(struct diff_options *opt, const char *prefix, -		      struct tree_desc *desc, struct strbuf *base) + +/* + * Make a new combine_diff_path from path/mode/sha1 + * and append it to paths list tail. + * + * Memory for created elements could be reused: + * + *	- if last->next == NULL, the memory is allocated; + * + *	- if last->next != NULL, it is assumed that p=last->next was returned + *	  earlier by this function, and p->next was *not* modified. + *	  The memory is then reused from p. + * + * so for clients, + * + * - if you do need to keep the element + * + *	p = path_appendnew(p, ...); + *	process(p); + *	p->next = NULL; + * + * - if you don't need to keep the element after processing + * + *	pprev = p; + *	p = path_appendnew(p, ...); + *	process(p); + *	p = pprev; + *	; don't forget to free tail->next in the end + * + * p->parent[] remains uninitialized. + */ +static struct combine_diff_path *path_appendnew(struct combine_diff_path *last, +	int nparent, const struct strbuf *base, const char *path, int pathlen, +	unsigned mode, const unsigned char *sha1)  { -	enum interesting match = entry_not_interesting; -	for (; desc->size; update_tree_entry(desc)) { -		if (match != all_entries_interesting) { -			match = tree_entry_interesting(&desc->entry, base, 0, -						       &opt->pathspec); -			if (match == all_entries_not_interesting) -				break; -			if (match == entry_not_interesting) -				continue; -		} -		show_entry(opt, prefix, desc, base); +	struct combine_diff_path *p; +	int len = base->len + pathlen; +	int alloclen = combine_diff_path_size(nparent, len); + +	/* if last->next is !NULL - it is a pre-allocated memory, we can reuse */ +	p = last->next; +	if (p && (alloclen > (intptr_t)p->next)) { +		free(p); +		p = NULL; +	} + +	if (!p) { +		p = xmalloc(alloclen); + +		/* +		 * until we go to it next round, .next holds how many bytes we +		 * allocated (for faster realloc - we don't need copying old data). +		 */ +		p->next = (struct combine_diff_path *)(intptr_t)alloclen;  	} + +	last->next = p; + +	p->path = (char *)&(p->parent[nparent]); +	memcpy(p->path, base->buf, base->len); +	memcpy(p->path + base->len, path, pathlen); +	p->path[len] = 0; +	p->mode = mode; +	hashcpy(p->sha1, sha1 ? sha1 : null_sha1); + +	return p;  } -/* A file entry went away or appeared */ -static void show_entry(struct diff_options *opt, const char *prefix, -		       struct tree_desc *desc, struct strbuf *base) +/* + * new path should be added to combine diff + * + * 3 cases on how/when it should be called and behaves: + * + *	 t, !tp		-> path added, all parents lack it + *	!t,  tp		-> path removed from all parents + *	 t,  tp		-> path modified/added + *			   (M for tp[i]=tp[imin], A otherwise) + */ +static struct combine_diff_path *emit_path(struct combine_diff_path *p, +	struct strbuf *base, struct diff_options *opt, int nparent, +	struct tree_desc *t, struct tree_desc *tp, +	int imin)  {  	unsigned mode;  	const char *path; -	const unsigned char *sha1 = tree_entry_extract(desc, &path, &mode); -	int pathlen = tree_entry_len(&desc->entry); +	const unsigned char *sha1; +	int pathlen;  	int old_baselen = base->len; +	int i, isdir, recurse = 0, emitthis = 1; + +	/* at least something has to be valid */ +	assert(t || tp); -	strbuf_add(base, path, pathlen); -	if (DIFF_OPT_TST(opt, RECURSIVE) && S_ISDIR(mode)) { -		enum object_type type; -		struct tree_desc inner; -		void *tree; -		unsigned long size; +	if (t) { +		/* path present in resulting tree */ +		sha1 = tree_entry_extract(t, &path, &mode); +		pathlen = tree_entry_len(&t->entry); +		isdir = S_ISDIR(mode); +	} else { +		/* +		 * a path was removed - take path from imin parent. Also take +		 * mode from that parent, to decide on recursion(1). +		 * +		 * 1) all modes for tp[i]=tp[imin] should be the same wrt +		 *    S_ISDIR, thanks to base_name_compare(). +		 */ +		tree_entry_extract(&tp[imin], &path, &mode); +		pathlen = tree_entry_len(&tp[imin].entry); -		tree = read_sha1_file(sha1, &type, &size); -		if (!tree || type != OBJ_TREE) -			die("corrupt tree sha %s", sha1_to_hex(sha1)); +		isdir = S_ISDIR(mode); +		sha1 = NULL; +		mode = 0; +	} -		if (DIFF_OPT_TST(opt, TREE_IN_RECURSIVE)) -			opt->add_remove(opt, *prefix, mode, sha1, 1, base->buf, 0); +	if (DIFF_OPT_TST(opt, RECURSIVE) && isdir) { +		recurse = 1; +		emitthis = DIFF_OPT_TST(opt, TREE_IN_RECURSIVE); +	} -		strbuf_addch(base, '/'); +	if (emitthis) { +		int keep; +		struct combine_diff_path *pprev = p; +		p = path_appendnew(p, nparent, base, path, pathlen, mode, sha1); -		init_tree_desc(&inner, tree, size); -		show_tree(opt, prefix, &inner, base); -		free(tree); -	} else -		opt->add_remove(opt, prefix[0], mode, sha1, 1, base->buf, 0); +		for (i = 0; i < nparent; ++i) { +			/* +			 * tp[i] is valid, if present and if tp[i]==tp[imin] - +			 * otherwise, we should ignore it. +			 */ +			int tpi_valid = tp && !(tp[i].entry.mode & S_IFXMIN_NEQ); + +			const unsigned char *sha1_i; +			unsigned mode_i; + +			p->parent[i].status = +				!t ? DIFF_STATUS_DELETED : +					tpi_valid ? +						DIFF_STATUS_MODIFIED : +						DIFF_STATUS_ADDED; + +			if (tpi_valid) { +				sha1_i = tp[i].entry.sha1; +				mode_i = tp[i].entry.mode; +			} +			else { +				sha1_i = NULL; +				mode_i = 0; +			} + +			p->parent[i].mode = mode_i; +			hashcpy(p->parent[i].sha1, sha1_i ? sha1_i : null_sha1); +		} + +		keep = 1; +		if (opt->pathchange) +			keep = opt->pathchange(opt, p); + +		/* +		 * If a path was filtered or consumed - we don't need to add it +		 * to the list and can reuse its memory, leaving it as +		 * pre-allocated element on the tail. +		 * +		 * On the other hand, if path needs to be kept, we need to +		 * correct its .next to NULL, as it was pre-initialized to how +		 * much memory was allocated. +		 * +		 * see path_appendnew() for details. +		 */ +		if (!keep) +			p = pprev; +		else +			p->next = NULL; +	} + +	if (recurse) { +		const unsigned char **parents_sha1; + +		parents_sha1 = xalloca(nparent * sizeof(parents_sha1[0])); +		for (i = 0; i < nparent; ++i) { +			/* same rule as in emitthis */ +			int tpi_valid = tp && !(tp[i].entry.mode & S_IFXMIN_NEQ); + +			parents_sha1[i] = tpi_valid ? tp[i].entry.sha1 +						    : NULL; +		} + +		strbuf_add(base, path, pathlen); +		strbuf_addch(base, '/'); +		p = ll_diff_tree_paths(p, sha1, parents_sha1, nparent, base, opt); +		xalloca_free(parents_sha1); +	}  	strbuf_setlen(base, old_baselen); +	return p;  }  static void skip_uninteresting(struct tree_desc *t, struct strbuf *base, -			       struct diff_options *opt, -			       enum interesting *match) +			       struct diff_options *opt)  { +	enum interesting match; +  	while (t->size) { -		*match = tree_entry_interesting(&t->entry, base, 0, &opt->pathspec); -		if (*match) { -			if (*match == all_entries_not_interesting) +		match = tree_entry_interesting(&t->entry, base, 0, &opt->pathspec); +		if (match) { +			if (match == all_entries_not_interesting)  				t->size = 0;  			break;  		} @@ -128,55 +300,260 @@ static void skip_uninteresting(struct tree_desc *t, struct strbuf *base,  	}  } -int diff_tree(struct tree_desc *t1, struct tree_desc *t2, -	      const char *base_str, struct diff_options *opt) + +/* + * generate paths for combined diff D(sha1,parents_sha1[]) + * + * Resulting paths are appended to combine_diff_path linked list, and also, are + * emitted on the go via opt->pathchange() callback, so it is possible to + * process the result as batch or incrementally. + * + * The paths are generated scanning new tree and all parents trees + * simultaneously, similarly to what diff_tree() was doing for 2 trees. + * The theory behind such scan is as follows: + * + * + * D(T,P1...Pn) calculation scheme + * ------------------------------- + * + * D(T,P1...Pn) = D(T,P1) ^ ... ^ D(T,Pn)	(regarding resulting paths set) + * + *	D(T,Pj)		- diff between T..Pj + *	D(T,P1...Pn)	- combined diff from T to parents P1,...,Pn + * + * + * We start from all trees, which are sorted, and compare their entries in + * lock-step: + * + *	 T     P1       Pn + *	 -     -        - + *	|t|   |p1|     |pn| + *	|-|   |--| ... |--|      imin = argmin(p1...pn) + *	| |   |  |     |  | + *	|-|   |--|     |--| + *	|.|   |. |     |. | + *	 .     .        . + *	 .     .        . + * + * at any time there could be 3 cases: + * + *	1)  t < p[imin]; + *	2)  t > p[imin]; + *	3)  t = p[imin]. + * + * Schematic deduction of what every case means, and what to do, follows: + * + * 1)  t < p[imin]  ->  ∀j t ∉ Pj  ->  "+t" ∈ D(T,Pj)  ->  D += "+t";  t↓ + * + * 2)  t > p[imin] + * + *     2.1) ∃j: pj > p[imin]  ->  "-p[imin]" ∉ D(T,Pj)  ->  D += ø;  ∀ pi=p[imin]  pi↓ + *     2.2) ∀i  pi = p[imin]  ->  pi ∉ T  ->  "-pi" ∈ D(T,Pi)  ->  D += "-p[imin]";  ∀i pi↓ + * + * 3)  t = p[imin] + * + *     3.1) ∃j: pj > p[imin]  ->  "+t" ∈ D(T,Pj)  ->  only pi=p[imin] remains to investigate + *     3.2) pi = p[imin]  ->  investigate δ(t,pi) + *      | + *      | + *      v + * + *     3.1+3.2) looking at δ(t,pi) ∀i: pi=p[imin] - if all != ø  -> + * + *                       ⎧δ(t,pi)  - if pi=p[imin] + *              ->  D += ⎨ + *                       ⎩"+t"     - if pi>p[imin] + * + * + *     in any case t↓  ∀ pi=p[imin]  pi↓ + * + * + * ~~~~~~~~ + * + * NOTE + * + *	Usual diff D(A,B) is by definition the same as combined diff D(A,[B]), + *	so this diff paths generator can, and is used, for plain diffs + *	generation too. + * + *	Please keep attention to the common D(A,[B]) case when working on the + *	code, in order not to slow it down. + * + * NOTE + *	nparent must be > 0. + */ + + +/* ∀ pi=p[imin]  pi↓ */ +static inline void update_tp_entries(struct tree_desc *tp, int nparent)  { -	struct strbuf base; -	int baselen = strlen(base_str); -	enum interesting t1_match = entry_not_interesting; -	enum interesting t2_match = entry_not_interesting; +	int i; +	for (i = 0; i < nparent; ++i) +		if (!(tp[i].entry.mode & S_IFXMIN_NEQ)) +			update_tree_entry(&tp[i]); +} + +static struct combine_diff_path *ll_diff_tree_paths( +	struct combine_diff_path *p, const unsigned char *sha1, +	const unsigned char **parents_sha1, int nparent, +	struct strbuf *base, struct diff_options *opt) +{ +	struct tree_desc t, *tp; +	void *ttree, **tptree; +	int i; + +	tp     = xalloca(nparent * sizeof(tp[0])); +	tptree = xalloca(nparent * sizeof(tptree[0])); + +	/* +	 * load parents first, as they are probably already cached. +	 * +	 * ( log_tree_diff() parses commit->parent before calling here via +	 *   diff_tree_sha1(parent, commit) ) +	 */ +	for (i = 0; i < nparent; ++i) +		tptree[i] = fill_tree_descriptor(&tp[i], parents_sha1[i]); +	ttree = fill_tree_descriptor(&t, sha1);  	/* Enable recursion indefinitely */  	opt->pathspec.recursive = DIFF_OPT_TST(opt, RECURSIVE); -	strbuf_init(&base, PATH_MAX); -	strbuf_add(&base, base_str, baselen); -  	for (;;) { +		int imin, cmp; +  		if (diff_can_quit_early(opt))  			break; +  		if (opt->pathspec.nr) { -			skip_uninteresting(t1, &base, opt, &t1_match); -			skip_uninteresting(t2, &base, opt, &t2_match); +			skip_uninteresting(&t, base, opt); +			for (i = 0; i < nparent; i++) +				skip_uninteresting(&tp[i], base, opt);  		} -		if (!t1->size) { -			if (!t2->size) + +		/* comparing is finished when all trees are done */ +		if (!t.size) { +			int done = 1; +			for (i = 0; i < nparent; ++i) +				if (tp[i].size) { +					done = 0; +					break; +				} +			if (done)  				break; -			show_entry(opt, "+", t2, &base); -			update_tree_entry(t2); -			continue;  		} -		if (!t2->size) { -			show_entry(opt, "-", t1, &base); -			update_tree_entry(t1); -			continue; + +		/* +		 * lookup imin = argmin(p1...pn), +		 * mark entries whether they =p[imin] along the way +		 */ +		imin = 0; +		tp[0].entry.mode &= ~S_IFXMIN_NEQ; + +		for (i = 1; i < nparent; ++i) { +			cmp = tree_entry_pathcmp(&tp[i], &tp[imin]); +			if (cmp < 0) { +				imin = i; +				tp[i].entry.mode &= ~S_IFXMIN_NEQ; +			} +			else if (cmp == 0) { +				tp[i].entry.mode &= ~S_IFXMIN_NEQ; +			} +			else { +				tp[i].entry.mode |= S_IFXMIN_NEQ; +			} +		} + +		/* fixup markings for entries before imin */ +		for (i = 0; i < imin; ++i) +			tp[i].entry.mode |= S_IFXMIN_NEQ;	/* pi > p[imin] */ + + + +		/* compare t vs p[imin] */ +		cmp = tree_entry_pathcmp(&t, &tp[imin]); + +		/* t = p[imin] */ +		if (cmp == 0) { +			/* are either pi > p[imin] or diff(t,pi) != ø ? */ +			if (!DIFF_OPT_TST(opt, FIND_COPIES_HARDER)) { +				for (i = 0; i < nparent; ++i) { +					/* p[i] > p[imin] */ +					if (tp[i].entry.mode & S_IFXMIN_NEQ) +						continue; + +					/* diff(t,pi) != ø */ +					if (hashcmp(t.entry.sha1, tp[i].entry.sha1) || +					    (t.entry.mode != tp[i].entry.mode)) +						continue; + +					goto skip_emit_t_tp; +				} +			} + +			/* D += {δ(t,pi) if pi=p[imin];  "+a" if pi > p[imin]} */ +			p = emit_path(p, base, opt, nparent, +					&t, tp, imin); + +		skip_emit_t_tp: +			/* t↓,  ∀ pi=p[imin]  pi↓ */ +			update_tree_entry(&t); +			update_tp_entries(tp, nparent); +		} + +		/* t < p[imin] */ +		else if (cmp < 0) { +			/* D += "+t" */ +			p = emit_path(p, base, opt, nparent, +					&t, /*tp=*/NULL, -1); + +			/* t↓ */ +			update_tree_entry(&t);  		} -		switch (compare_tree_entry(t1, t2, &base, opt)) { -		case -1: -			update_tree_entry(t1); -			continue; -		case 0: -			update_tree_entry(t1); -			/* Fallthrough */ -		case 1: -			update_tree_entry(t2); -			continue; + +		/* t > p[imin] */ +		else { +			/* ∀i pi=p[imin] -> D += "-p[imin]" */ +			if (!DIFF_OPT_TST(opt, FIND_COPIES_HARDER)) { +				for (i = 0; i < nparent; ++i) +					if (tp[i].entry.mode & S_IFXMIN_NEQ) +						goto skip_emit_tp; +			} + +			p = emit_path(p, base, opt, nparent, +					/*t=*/NULL, tp, imin); + +		skip_emit_tp: +			/* ∀ pi=p[imin]  pi↓ */ +			update_tp_entries(tp, nparent);  		} -		die("git diff-tree: internal error");  	} -	strbuf_release(&base); -	return 0; +	free(ttree); +	for (i = nparent-1; i >= 0; i--) +		free(tptree[i]); +	xalloca_free(tptree); +	xalloca_free(tp); + +	return p; +} + +struct combine_diff_path *diff_tree_paths( +	struct combine_diff_path *p, const unsigned char *sha1, +	const unsigned char **parents_sha1, int nparent, +	struct strbuf *base, struct diff_options *opt) +{ +	p = ll_diff_tree_paths(p, sha1, parents_sha1, nparent, base, opt); + +	/* +	 * free pre-allocated last element, if any +	 * (see path_appendnew() for details about why) +	 */ +	if (p->next) { +		free(p->next); +		p->next = NULL; +	} + +	return p;  }  /* @@ -190,7 +567,7 @@ static inline int diff_might_be_rename(void)  		!DIFF_FILE_VALID(diff_queued_diff.queue[0]->one);  } -static void try_to_follow_renames(struct tree_desc *t1, struct tree_desc *t2, const char *base, struct diff_options *opt) +static void try_to_follow_renames(const unsigned char *old, const unsigned char *new, struct strbuf *base, struct diff_options *opt)  {  	struct diff_options diff_opts;  	struct diff_queue_struct *q = &diff_queued_diff; @@ -228,7 +605,7 @@ static void try_to_follow_renames(struct tree_desc *t1, struct tree_desc *t2, co  	diff_opts.break_opt = opt->break_opt;  	diff_opts.rename_score = opt->rename_score;  	diff_setup_done(&diff_opts); -	diff_tree(t1, t2, base, &diff_opts); +	ll_diff_tree_sha1(old, new, base, &diff_opts);  	diffcore_std(&diff_opts);  	free_pathspec(&diff_opts.pathspec); @@ -287,25 +664,40 @@ static void try_to_follow_renames(struct tree_desc *t1, struct tree_desc *t2, co  	q->nr = 1;  } -int diff_tree_sha1(const unsigned char *old, const unsigned char *new, const char *base, struct diff_options *opt) +static int ll_diff_tree_sha1(const unsigned char *old, const unsigned char *new, +			     struct strbuf *base, struct diff_options *opt)  { -	void *tree1, *tree2; -	struct tree_desc t1, t2; -	unsigned long size1, size2; -	int retval; +	struct combine_diff_path phead, *p; +	pathchange_fn_t pathchange_old = opt->pathchange; -	tree1 = fill_tree_descriptor(&t1, old); -	tree2 = fill_tree_descriptor(&t2, new); -	size1 = t1.size; -	size2 = t2.size; -	retval = diff_tree(&t1, &t2, base, opt); -	if (!*base && DIFF_OPT_TST(opt, FOLLOW_RENAMES) && diff_might_be_rename()) { -		init_tree_desc(&t1, tree1, size1); -		init_tree_desc(&t2, tree2, size2); -		try_to_follow_renames(&t1, &t2, base, opt); +	phead.next = NULL; +	opt->pathchange = emit_diff_first_parent_only; +	diff_tree_paths(&phead, new, &old, 1, base, opt); + +	for (p = phead.next; p;) { +		struct combine_diff_path *pprev = p; +		p = p->next; +		free(pprev);  	} -	free(tree1); -	free(tree2); + +	opt->pathchange = pathchange_old; +	return 0; +} + +int diff_tree_sha1(const unsigned char *old, const unsigned char *new, const char *base_str, struct diff_options *opt) +{ +	struct strbuf base; +	int retval; + +	strbuf_init(&base, PATH_MAX); +	strbuf_addstr(&base, base_str); + +	retval = ll_diff_tree_sha1(old, new, &base, opt); +	if (!*base_str && DIFF_OPT_TST(opt, FOLLOW_RENAMES) && diff_might_be_rename()) +		try_to_follow_renames(old, new, &base, opt); + +	strbuf_release(&base); +  	return retval;  } | 
