summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <junkio@cox.net>2006-02-28 20:16:41 -0800
committerJunio C Hamano <junkio@cox.net>2006-02-28 20:20:04 -0800
commite29e1147e485654d90a0ea0fd5fb7151bb194265 (patch)
tree33cb94f2c55681744f6429259b56622abb016f87
parent65416758cd83772f2f3c69f1bd1f501608e64745 (diff)
downloadgit-e29e1147e485654d90a0ea0fd5fb7151bb194265.tar.gz
diffcore-delta: stop using deltifier for packing.
This switches the change estimation logic used by break, rename and copy detection from delta packing code to a more line oriented one. This way, thee performance-density tradeoff by delta packing code can be made without worrying about breaking the rename detection. Signed-off-by: Junio C Hamano <junkio@cox.net>
-rw-r--r--diffcore-delta.c141
1 files changed, 113 insertions, 28 deletions
diff --git a/diffcore-delta.c b/diffcore-delta.c
index 1e6a6911ec..d03787be65 100644
--- a/diffcore-delta.c
+++ b/diffcore-delta.c
@@ -1,43 +1,128 @@
#include "cache.h"
#include "diff.h"
#include "diffcore.h"
-#include "delta.h"
-#include "count-delta.h"
-
-static int diffcore_count_changes_1(void *src, unsigned long src_size,
- void *dst, unsigned long dst_size,
- unsigned long delta_limit,
- unsigned long *src_copied,
- unsigned long *literal_added)
-{
- void *delta;
- unsigned long delta_size;
-
- delta = diff_delta(src, src_size,
- dst, dst_size,
- &delta_size, delta_limit);
- if (!delta)
- /* If delta_limit is exceeded, we have too much differences */
- return -1;
- /* Estimate the edit size by interpreting delta. */
- if (count_delta(delta, delta_size, src_copied, literal_added)) {
- free(delta);
- return -1;
+struct linehash {
+ unsigned long bytes;
+ unsigned long hash;
+};
+
+static unsigned long hash_extended_line(const unsigned char **buf_p,
+ unsigned long left)
+{
+ /* An extended line is zero or more whitespace letters (including LF)
+ * followed by one non whitespace letter followed by zero or more
+ * non LF, and terminated with by a LF (or EOF).
+ */
+ const unsigned char *bol = *buf_p;
+ const unsigned char *buf = bol;
+ unsigned long hashval = 0;
+ while (left) {
+ unsigned c = *buf++;
+ if (!c)
+ goto binary;
+ left--;
+ if (' ' < c) {
+ hashval = c;
+ break;
+ }
+ }
+ while (left) {
+ unsigned c = *buf++;
+ if (!c)
+ goto binary;
+ left--;
+ if (c == '\n')
+ break;
+ if (' ' < c)
+ hashval = hashval * 11 + c;
}
- free(delta);
+ *buf_p = buf;
+ return hashval;
+
+ binary:
+ *buf_p = NULL;
+ return 0;
+}
+
+static int linehash_compare(const void *a_, const void *b_)
+{
+ struct linehash *a = (struct linehash *) a_;
+ struct linehash *b = (struct linehash *) b_;
+ if (a->hash < b->hash) return -1;
+ if (a->hash > b->hash) return 1;
return 0;
}
+static struct linehash *hash_lines(const unsigned char *buf,
+ unsigned long size)
+{
+ const unsigned char *eobuf = buf + size;
+ struct linehash *line = NULL;
+ int alloc = 0, used = 0;
+
+ while (buf < eobuf) {
+ const unsigned char *ptr = buf;
+ unsigned long hash = hash_extended_line(&buf, eobuf-ptr);
+ if (!buf) {
+ free(line);
+ return NULL;
+ }
+ if (alloc <= used) {
+ alloc = alloc_nr(alloc);
+ line = xrealloc(line, sizeof(*line) * alloc);
+ }
+ line[used].bytes = buf - ptr;
+ line[used].hash = hash;
+ used++;
+ }
+ qsort(line, used, sizeof(*line), linehash_compare);
+
+ /* Terminate the list */
+ if (alloc <= used)
+ line = xrealloc(line, sizeof(*line) * (used+1));
+ line[used].bytes = line[used].hash = 0;
+ return line;
+}
+
int diffcore_count_changes(void *src, unsigned long src_size,
void *dst, unsigned long dst_size,
unsigned long delta_limit,
unsigned long *src_copied,
unsigned long *literal_added)
{
- return diffcore_count_changes_1(src, src_size,
- dst, dst_size,
- delta_limit,
- src_copied,
- literal_added);
+ struct linehash *src_lines, *dst_lines;
+ unsigned long sc, la;
+
+ src_lines = hash_lines(src, src_size);
+ if (!src_lines)
+ return -1;
+ dst_lines = hash_lines(dst, dst_size);
+ if (!dst_lines) {
+ free(src_lines);
+ return -1;
+ }
+ sc = la = 0;
+ while (src_lines->bytes && dst_lines->bytes) {
+ int cmp = linehash_compare(src_lines, dst_lines);
+ if (!cmp) {
+ sc += src_lines->bytes;
+ src_lines++;
+ dst_lines++;
+ continue;
+ }
+ if (cmp < 0) {
+ src_lines++;
+ continue;
+ }
+ la += dst_lines->bytes;
+ dst_lines++;
+ }
+ while (dst_lines->bytes) {
+ la += dst_lines->bytes;
+ dst_lines++;
+ }
+ *src_copied = sc;
+ *literal_added = la;
+ return 0;
}