summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWayne Davison <wayned@samba.org>2013-08-03 09:44:13 -0700
committerWayne Davison <wayned@samba.org>2013-08-03 09:59:38 -0700
commitde94193353864221280be9fbb6193d92eb133000 (patch)
tree76bee964a0f7424bc8eb2b259e5168850d63a674
parent05fce6582a9192c58b2107153ec00056fd120d14 (diff)
downloadrsync-de94193353864221280be9fbb6193d92eb133000.tar.gz
Remove bypassed checksums in --inplace to improve speed.
When checking a checksum that refers to a part of an --inplace file that has been overwritten w/o getting SUMFLG_SAME_OFFSET set, we remove the checksum from the list. This will speed up files that have a lot of identical checksum blocks (e.g. sequences of zeros) that we can't use due to them not getting marked as being the same. Patch provided by Michael Chapman.
-rw-r--r--NEWS3
-rw-r--r--match.c26
2 files changed, 20 insertions, 9 deletions
diff --git a/NEWS b/NEWS
index 040ac2d7..eec631d3 100644
--- a/NEWS
+++ b/NEWS
@@ -154,6 +154,9 @@ Changes since 3.0.9:
file for one way to package the resulting files. (Suggestions for
how to make this even easier to install & use are welcomed.)
+ - Improved the speed of some --inplace updates when there are lots of
+ identical checksum blocks that end up being unsuable.
+
- Added the --outbuf=N|L|B option for chosing the output buffering.
- Repating the --fuzzy option now causes the code to look for fuzzy matches
diff --git a/match.c b/match.c
index bafab9f3..a8bd1f30 100644
--- a/match.c
+++ b/match.c
@@ -178,7 +178,8 @@ static void hash_search(int f,struct sum_struct *s,
do {
int done_csum2 = 0;
- int32 i;
+ uint32 hash_entry;
+ int32 i, *prev;
if (DEBUG_GTE(DELTASUM, 4)) {
rprintf(FINFO, "offset=%s sum=%04x%04x\n",
@@ -186,19 +187,32 @@ static void hash_search(int f,struct sum_struct *s,
}
if (tablesize == TRADITIONAL_TABLESIZE) {
- if ((i = hash_table[SUM2HASH2(s1,s2)]) < 0)
+ hash_entry = SUM2HASH2(s1,s2);
+ if ((i = hash_table[hash_entry]) < 0)
goto null_hash;
sum = (s1 & 0xffff) | (s2 << 16);
} else {
sum = (s1 & 0xffff) | (s2 << 16);
- if ((i = hash_table[BIG_SUM2HASH(sum)]) < 0)
+ hash_entry = BIG_SUM2HASH(sum);
+ if ((i = hash_table[hash_entry]) < 0)
goto null_hash;
}
+ prev = &hash_table[hash_entry];
hash_hits++;
do {
int32 l;
+ /* When updating in-place, the chunk's offset must be
+ * either >= our offset or identical data at that offset.
+ * Remove any bypassed entries that we can never use. */
+ if (updating_basis_file && s->sums[i].offset < offset
+ && !(s->sums[i].flags & SUMFLG_SAME_OFFSET)) {
+ *prev = s->sums[i].chain;
+ continue;
+ }
+ prev = &s->sums[i].chain;
+
if (sum != s->sums[i].sum1)
continue;
@@ -207,12 +221,6 @@ static void hash_search(int f,struct sum_struct *s,
if (l != s->sums[i].len)
continue;
- /* in-place: ensure chunk's offset is either >= our
- * offset or that the data didn't move. */
- if (updating_basis_file && s->sums[i].offset < offset
- && !(s->sums[i].flags & SUMFLG_SAME_OFFSET))
- continue;
-
if (DEBUG_GTE(DELTASUM, 3)) {
rprintf(FINFO,
"potential match at %s i=%ld sum=%08x\n",