diff options
author | Wayne Davison <wayned@samba.org> | 2013-08-03 09:44:13 -0700 |
---|---|---|
committer | Wayne Davison <wayned@samba.org> | 2013-08-03 09:59:38 -0700 |
commit | de94193353864221280be9fbb6193d92eb133000 (patch) | |
tree | 76bee964a0f7424bc8eb2b259e5168850d63a674 | |
parent | 05fce6582a9192c58b2107153ec00056fd120d14 (diff) | |
download | rsync-de94193353864221280be9fbb6193d92eb133000.tar.gz |
Remove bypassed checksums in --inplace to improve speed.
When checking a checksum that refers to a part of an --inplace file that
has been overwritten w/o getting SUMFLG_SAME_OFFSET set, we remove the
checksum from the list. This will speed up files that have a lot of
identical checksum blocks (e.g. sequences of zeros) that we can't use
due to them not getting marked as being the same. Patch provided by
Michael Chapman.
-rw-r--r-- | NEWS | 3 | ||||
-rw-r--r-- | match.c | 26 |
2 files changed, 20 insertions, 9 deletions
@@ -154,6 +154,9 @@ Changes since 3.0.9: file for one way to package the resulting files. (Suggestions for how to make this even easier to install & use are welcomed.) + - Improved the speed of some --inplace updates when there are lots of + identical checksum blocks that end up being unsuable. + - Added the --outbuf=N|L|B option for chosing the output buffering. - Repating the --fuzzy option now causes the code to look for fuzzy matches @@ -178,7 +178,8 @@ static void hash_search(int f,struct sum_struct *s, do { int done_csum2 = 0; - int32 i; + uint32 hash_entry; + int32 i, *prev; if (DEBUG_GTE(DELTASUM, 4)) { rprintf(FINFO, "offset=%s sum=%04x%04x\n", @@ -186,19 +187,32 @@ static void hash_search(int f,struct sum_struct *s, } if (tablesize == TRADITIONAL_TABLESIZE) { - if ((i = hash_table[SUM2HASH2(s1,s2)]) < 0) + hash_entry = SUM2HASH2(s1,s2); + if ((i = hash_table[hash_entry]) < 0) goto null_hash; sum = (s1 & 0xffff) | (s2 << 16); } else { sum = (s1 & 0xffff) | (s2 << 16); - if ((i = hash_table[BIG_SUM2HASH(sum)]) < 0) + hash_entry = BIG_SUM2HASH(sum); + if ((i = hash_table[hash_entry]) < 0) goto null_hash; } + prev = &hash_table[hash_entry]; hash_hits++; do { int32 l; + /* When updating in-place, the chunk's offset must be + * either >= our offset or identical data at that offset. + * Remove any bypassed entries that we can never use. */ + if (updating_basis_file && s->sums[i].offset < offset + && !(s->sums[i].flags & SUMFLG_SAME_OFFSET)) { + *prev = s->sums[i].chain; + continue; + } + prev = &s->sums[i].chain; + if (sum != s->sums[i].sum1) continue; @@ -207,12 +221,6 @@ static void hash_search(int f,struct sum_struct *s, if (l != s->sums[i].len) continue; - /* in-place: ensure chunk's offset is either >= our - * offset or that the data didn't move. */ - if (updating_basis_file && s->sums[i].offset < offset - && !(s->sums[i].flags & SUMFLG_SAME_OFFSET)) - continue; - if (DEBUG_GTE(DELTASUM, 3)) { rprintf(FINFO, "potential match at %s i=%ld sum=%08x\n", |