diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-08-05 16:22:51 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-08-05 16:22:51 +0000 |
commit | cf46733632c7279a9fd0fe6ce26f9185a4ae82a9 (patch) | |
tree | da27775a2161723ef342e91af41a8b51fedef405 /subversion/libsvn_delta | |
parent | bb0ef45f7c46b0ae221b26265ef98a768c33f820 (diff) | |
download | subversion-tarball-master.tar.gz |
subversion-1.9.7HEADsubversion-1.9.7master
Diffstat (limited to 'subversion/libsvn_delta')
-rw-r--r-- | subversion/libsvn_delta/compat.c | 95 | ||||
-rw-r--r-- | subversion/libsvn_delta/compose_delta.c | 48 | ||||
-rw-r--r-- | subversion/libsvn_delta/debug_editor.c | 33 | ||||
-rw-r--r-- | subversion/libsvn_delta/debug_editor.h | 7 | ||||
-rw-r--r-- | subversion/libsvn_delta/editor.c | 77 | ||||
-rw-r--r-- | subversion/libsvn_delta/libsvn_delta.pc.in | 12 | ||||
-rw-r--r-- | subversion/libsvn_delta/path_driver.c | 11 | ||||
-rw-r--r-- | subversion/libsvn_delta/svndiff.c | 393 | ||||
-rw-r--r-- | subversion/libsvn_delta/text_delta.c | 114 | ||||
-rw-r--r-- | subversion/libsvn_delta/xdelta.c | 142 |
10 files changed, 342 insertions, 590 deletions
diff --git a/subversion/libsvn_delta/compat.c b/subversion/libsvn_delta/compat.c index 470efa2..dfa9743 100644 --- a/subversion/libsvn_delta/compat.c +++ b/subversion/libsvn_delta/compat.c @@ -36,6 +36,8 @@ #include "svn_private_config.h" #include "private/svn_delta_private.h" +#include "private/svn_sorts_private.h" +#include "svn_private_config.h" struct file_rev_handler_wrapper_baton { @@ -188,6 +190,7 @@ struct change_node apr_hash_t *props; /* new/final set of props to apply */ + svn_boolean_t contents_changed; /* the file contents changed */ const char *contents_abspath; /* file containing new fulltext */ svn_checksum_t *checksum; /* checksum of new fulltext */ @@ -292,7 +295,7 @@ get_children(struct ev2_edit_baton *eb, for (hi = apr_hash_first(pool, eb->changes); hi; hi = apr_hash_next(hi)) { - const char *repos_relpath = svn__apr_hash_index_key(hi); + const char *repos_relpath = apr_hash_this_key(hi); const char *child; /* Find potential children. */ @@ -347,17 +350,26 @@ process_actions(struct ev2_edit_baton *eb, return SVN_NO_ERROR; } - if (change->contents_abspath != NULL) + if (change->contents_changed) { /* We can only set text on files. */ /* ### validate we aren't overwriting KIND? */ kind = svn_node_file; - /* ### the checksum might be in CHANGE->CHECKSUM */ - SVN_ERR(svn_io_file_checksum2(&checksum, change->contents_abspath, - svn_checksum_sha1, scratch_pool)); - SVN_ERR(svn_stream_open_readonly(&contents, change->contents_abspath, - scratch_pool, scratch_pool)); + if (change->contents_abspath) + { + /* ### the checksum might be in CHANGE->CHECKSUM */ + SVN_ERR(svn_io_file_checksum2(&checksum, change->contents_abspath, + svn_checksum_sha1, scratch_pool)); + SVN_ERR(svn_stream_open_readonly(&contents, change->contents_abspath, + scratch_pool, scratch_pool)); + } + else + { + contents = svn_stream_empty(scratch_pool); + checksum = svn_checksum_empty_checksum(svn_checksum_sha1, + scratch_pool); + } } if (change->props != NULL) @@ -399,7 +411,7 @@ process_actions(struct ev2_edit_baton *eb, else { /* If this file was added, but apply_txdelta() was not - called (ie. no CONTENTS_ABSPATH), then we're adding + called (i.e., CONTENTS_CHANGED is FALSE), then we're adding an empty file. */ if (change->contents_abspath == NULL) { @@ -440,8 +452,8 @@ process_actions(struct ev2_edit_baton *eb, change->changing, NULL, props)); else SVN_ERR(svn_editor_alter_file(eb->editor, repos_relpath, - change->changing, props, - checksum, contents)); + change->changing, + checksum, contents, props)); } return SVN_NO_ERROR; @@ -789,6 +801,26 @@ window_handler(svn_txdelta_window_t *window, void *baton) return svn_error_trace(err); } +/* Lazy-open handler for getting a read-only stream of the delta base. */ +static svn_error_t * +open_delta_base(svn_stream_t **stream, void *baton, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) +{ + const char *const delta_base = baton; + return svn_stream_open_readonly(stream, delta_base, + result_pool, scratch_pool); +} + +/* Lazy-open handler for opening a stream for the delta result. */ +static svn_error_t * +open_delta_target(svn_stream_t **stream, void *baton, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) +{ + const char **delta_target = baton; + return svn_stream_open_unique(stream, delta_target, NULL, + svn_io_file_del_on_pool_cleanup, + result_pool, scratch_pool); +} static svn_error_t * ev2_apply_textdelta(void *file_baton, @@ -802,10 +834,9 @@ ev2_apply_textdelta(void *file_baton, struct handler_baton *hb = apr_pcalloc(handler_pool, sizeof(*hb)); struct change_node *change; svn_stream_t *target; - /* ### fix this. for now, we know this has a "short" lifetime. */ - apr_pool_t *scratch_pool = handler_pool; change = locate_change(fb->eb, fb->path); + SVN_ERR_ASSERT(!change->contents_changed); SVN_ERR_ASSERT(change->contents_abspath == NULL); SVN_ERR_ASSERT(!SVN_IS_VALID_REVNUM(change->changing) || change->changing == fb->base_revision); @@ -814,12 +845,14 @@ ev2_apply_textdelta(void *file_baton, if (! fb->delta_base) hb->source = svn_stream_empty(handler_pool); else - SVN_ERR(svn_stream_open_readonly(&hb->source, fb->delta_base, handler_pool, - scratch_pool)); + hb->source = svn_stream_lazyopen_create(open_delta_base, + (char*)fb->delta_base, + FALSE, handler_pool); - SVN_ERR(svn_stream_open_unique(&target, &change->contents_abspath, NULL, - svn_io_file_del_on_pool_cleanup, - fb->eb->edit_pool, scratch_pool)); + change->contents_changed = TRUE; + target = svn_stream_lazyopen_create(open_delta_target, + &change->contents_abspath, + FALSE, fb->eb->edit_pool); svn_txdelta_apply(hb->source, target, NULL, NULL, @@ -1106,6 +1139,7 @@ add_file_cb(void *baton, change->kind = svn_node_file; change->deleting = replaces_rev; change->props = svn_prop_hash_dup(props, eb->edit_pool); + change->contents_changed = TRUE; change->contents_abspath = tmp_filename; change->checksum = svn_checksum_dup(md5_checksum, eb->edit_pool); @@ -1183,9 +1217,9 @@ static svn_error_t * alter_file_cb(void *baton, const char *relpath, svn_revnum_t revision, - apr_hash_t *props, const svn_checksum_t *checksum, svn_stream_t *contents, + apr_hash_t *props, apr_pool_t *scratch_pool) { struct editor_baton *eb = baton; @@ -1199,12 +1233,12 @@ alter_file_cb(void *baton, if (contents) { /* We may need to re-checksum these contents */ - if (!(checksum && checksum->kind == svn_checksum_md5)) + if (checksum && checksum->kind == svn_checksum_md5) + md5_checksum = (svn_checksum_t *)checksum; + else contents = svn_stream_checksummed2(contents, &md5_checksum, NULL, svn_checksum_md5, TRUE, scratch_pool); - else - md5_checksum = (svn_checksum_t *)checksum; /* Spool the contents to a tempfile, and provide that to the driver. */ SVN_ERR(svn_stream_open_unique(&tmp_stream, &tmp_filename, NULL, @@ -1223,6 +1257,7 @@ alter_file_cb(void *baton, change->props = svn_prop_hash_dup(props, eb->edit_pool); if (contents != NULL) { + change->contents_changed = TRUE; change->contents_abspath = tmp_filename; change->checksum = svn_checksum_dup(md5_checksum, eb->edit_pool); } @@ -1235,8 +1270,8 @@ static svn_error_t * alter_symlink_cb(void *baton, const char *relpath, svn_revnum_t revision, - apr_hash_t *props, const char *target, + apr_hash_t *props, apr_pool_t *scratch_pool) { /* ### should we verify the kind is truly a symlink? */ @@ -1331,17 +1366,6 @@ move_cb(void *baton, return SVN_NO_ERROR; } -/* This implements svn_editor_cb_rotate_t */ -static svn_error_t * -rotate_cb(void *baton, - const apr_array_header_t *relpaths, - const apr_array_header_t *revisions, - apr_pool_t *scratch_pool) -{ - SVN__NOT_IMPLEMENTED(); -} - - static int count_components(const char *relpath) { @@ -1640,7 +1664,7 @@ apply_change(void **dir_baton, /* Make this an FS path by prepending "/" */ if (copyfrom_url[0] != '/') copyfrom_url = apr_pstrcat(scratch_pool, "/", - copyfrom_url, NULL); + copyfrom_url, SVN_VA_NULL); } copyfrom_rev = change->copyfrom_rev; @@ -1673,7 +1697,7 @@ apply_change(void **dir_baton, else SVN_ERR(drive_ev1_props(eb, relpath, change, file_baton, scratch_pool)); - if (change->contents_abspath) + if (change->contents_changed && change->contents_abspath) { svn_txdelta_window_handler_t handler; void *handler_baton; @@ -1889,7 +1913,6 @@ svn_delta__editor_from_delta(svn_editor_t **editor_p, delete_cb, copy_cb, move_cb, - rotate_cb, complete_cb, abort_cb }; diff --git a/subversion/libsvn_delta/compose_delta.c b/subversion/libsvn_delta/compose_delta.c index 7b96438..6d757f2 100644 --- a/subversion/libsvn_delta/compose_delta.c +++ b/subversion/libsvn_delta/compose_delta.c @@ -648,15 +648,18 @@ copy_source_ops(apr_size_t offset, apr_size_t limit, { const svn_txdelta_op_t *const op = &window->ops[op_ndx]; const apr_size_t *const off = &ndx->offs[op_ndx]; - apr_size_t fix_offset; - apr_size_t fix_limit; - + const apr_size_t fix_offset = (offset > off[0] ? offset - off[0] : 0); + const apr_size_t fix_limit = (off[0] >= limit ? 0 + : (off[1] > limit ? off[1] - limit : 0)); + + /* Ideally, we'd do this check before assigning fix_offset and + fix_limit; but then we couldn't make them const whilst still + adhering to C90 rules. Instead, we're going to assume that a + smart optimizing compiler will reorder this check before the + local variable initialization. */ if (off[0] >= limit) break; - fix_offset = (offset > off[0] ? offset - off[0] : 0); - fix_limit = (off[1] > limit ? off[1] - limit : 0); - /* It would be extremely weird if the fixed-up op had zero length. */ assert(fix_offset + fix_limit < op->length); @@ -701,23 +704,22 @@ copy_source_ops(apr_size_t offset, apr_size_t limit, apr_size_t tgt_off = target_offset; assert(ptn_length > ptn_overlap); - /* ### FIXME: ptn_overlap is unsigned, so the if() condition - below is always true! Either it should be '> 0', or the - code block should be unconditional. See also r842362. */ - if (ptn_overlap >= 0) - { - /* Issue second subrange in the pattern. */ - const apr_size_t length = - MIN(op->length - fix_off - fix_limit, - ptn_length - ptn_overlap); - copy_source_ops(op->offset + ptn_overlap, - op->offset + ptn_overlap + length, - tgt_off, - op_ndx, - build_baton, window, ndx, pool); - fix_off += length; - tgt_off += length; - } + /* Unconditionally issue the second subrange of the + pattern. This is always correct, since the outer + condition already verifies that there is an overlap + in the target copy. */ + { + const apr_size_t length = + MIN(op->length - fix_off - fix_limit, + ptn_length - ptn_overlap); + copy_source_ops(op->offset + ptn_overlap, + op->offset + ptn_overlap + length, + tgt_off, + op_ndx, + build_baton, window, ndx, pool); + fix_off += length; + tgt_off += length; + } assert(fix_off + fix_limit <= op->length); if (ptn_overlap > 0 diff --git a/subversion/libsvn_delta/debug_editor.c b/subversion/libsvn_delta/debug_editor.c index 7c2cdec..8ca7b20 100644 --- a/subversion/libsvn_delta/debug_editor.c +++ b/subversion/libsvn_delta/debug_editor.c @@ -33,6 +33,7 @@ struct edit_baton int indent_level; svn_stream_t *out; + const char *prefix; }; struct dir_baton @@ -52,8 +53,7 @@ write_indent(struct edit_baton *eb, apr_pool_t *pool) { int i; - /* This is DBG_FLAG from ../libsvn_subr/debug.c */ - SVN_ERR(svn_stream_puts(eb->out, "DBG:")); + SVN_ERR(svn_stream_puts(eb->out, eb->prefix)); for (i = 0; i < eb->indent_level; ++i) SVN_ERR(svn_stream_puts(eb->out, " ")); @@ -346,8 +346,8 @@ change_file_prop(void *file_baton, struct edit_baton *eb = fb->edit_baton; SVN_ERR(write_indent(eb, pool)); - SVN_ERR(svn_stream_printf(eb->out, pool, "change_file_prop : %s\n", - name)); + SVN_ERR(svn_stream_printf(eb->out, pool, "change_file_prop : %s -> %s\n", + name, value ? value->data : "<deleted>")); SVN_ERR(eb->wrapped_editor->change_file_prop(fb->wrapped_file_baton, name, @@ -367,7 +367,8 @@ change_dir_prop(void *dir_baton, struct edit_baton *eb = db->edit_baton; SVN_ERR(write_indent(eb, pool)); - SVN_ERR(svn_stream_printf(eb->out, pool, "change_dir_prop : %s\n", name)); + SVN_ERR(svn_stream_printf(eb->out, pool, "change_dir_prop : %s -> %s\n", + name, value ? value->data : "<deleted>")); SVN_ERR(eb->wrapped_editor->change_dir_prop(db->wrapped_dir_baton, name, @@ -391,19 +392,34 @@ close_edit(void *edit_baton, return SVN_NO_ERROR; } +static svn_error_t * +abort_edit(void *edit_baton, + apr_pool_t *pool) +{ + struct edit_baton *eb = edit_baton; + + SVN_ERR(write_indent(eb, pool)); + SVN_ERR(svn_stream_printf(eb->out, pool, "abort_edit\n")); + + SVN_ERR(eb->wrapped_editor->abort_edit(eb->wrapped_edit_baton, pool)); + + return SVN_NO_ERROR; +} + svn_error_t * svn_delta__get_debug_editor(const svn_delta_editor_t **editor, void **edit_baton, const svn_delta_editor_t *wrapped_editor, void *wrapped_edit_baton, + const char *prefix, apr_pool_t *pool) { - svn_delta_editor_t *tree_editor = svn_delta_default_editor(pool); + svn_delta_editor_t *tree_editor = apr_palloc(pool, sizeof(*tree_editor)); struct edit_baton *eb = apr_palloc(pool, sizeof(*eb)); apr_file_t *errfp; svn_stream_t *out; - apr_status_t apr_err = apr_file_open_stderr(&errfp, pool); + apr_status_t apr_err = apr_file_open_stdout(&errfp, pool); if (apr_err) return svn_error_wrap_apr(apr_err, "Problem opening stderr"); @@ -424,11 +440,14 @@ svn_delta__get_debug_editor(const svn_delta_editor_t **editor, tree_editor->close_file = close_file; tree_editor->absent_file = absent_file; tree_editor->close_edit = close_edit; + tree_editor->abort_edit = abort_edit; eb->wrapped_editor = wrapped_editor; eb->wrapped_edit_baton = wrapped_edit_baton; eb->out = out; eb->indent_level = 0; + /* This is DBG_FLAG from ../libsvn_subr/debug.c */ + eb->prefix = apr_pstrcat(pool, "DBG: ", prefix, SVN_VA_NULL); *editor = tree_editor; *edit_baton = eb; diff --git a/subversion/libsvn_delta/debug_editor.h b/subversion/libsvn_delta/debug_editor.h index 2b031af..63c90d3 100644 --- a/subversion/libsvn_delta/debug_editor.h +++ b/subversion/libsvn_delta/debug_editor.h @@ -32,14 +32,19 @@ extern "C" { /* Return a debug editor that wraps @a wrapped_editor. * * The debug editor simply prints an indication of what callbacks are being - * called to @c stderr, and is only intended for use in debugging subversion + * called to @c stdout, and is only intended for use in debugging subversion * editors. + * + * @a prefix, if non-null, is printed between "DBG: " and each indication. + * + * Note: Our test suite generally ignores stdout lines starting with "DBG:". */ svn_error_t * svn_delta__get_debug_editor(const svn_delta_editor_t **editor, void **edit_baton, const svn_delta_editor_t *wrapped_editor, void *wrapped_baton, + const char *prefix, apr_pool_t *pool); #ifdef __cplusplus diff --git a/subversion/libsvn_delta/editor.c b/subversion/libsvn_delta/editor.c index 1dc94b2..1c5e298 100644 --- a/subversion/libsvn_delta/editor.c +++ b/subversion/libsvn_delta/editor.c @@ -391,16 +391,6 @@ svn_editor_setcb_move(svn_editor_t *editor, svn_error_t * -svn_editor_setcb_rotate(svn_editor_t *editor, - svn_editor_cb_rotate_t callback, - apr_pool_t *scratch_pool) -{ - editor->funcs.cb_rotate = callback; - return SVN_NO_ERROR; -} - - -svn_error_t * svn_editor_setcb_complete(svn_editor_t *editor, svn_editor_cb_complete_t callback, apr_pool_t *scratch_pool) @@ -437,7 +427,6 @@ svn_editor_setcb_many(svn_editor_t *editor, COPY_CALLBACK(cb_delete); COPY_CALLBACK(cb_copy); COPY_CALLBACK(cb_move); - COPY_CALLBACK(cb_rotate); COPY_CALLBACK(cb_complete); COPY_CALLBACK(cb_abort); @@ -683,9 +672,9 @@ svn_error_t * svn_editor_alter_file(svn_editor_t *editor, const char *relpath, svn_revnum_t revision, - apr_hash_t *props, const svn_checksum_t *checksum, - svn_stream_t *contents) + svn_stream_t *contents, + apr_hash_t *props) { svn_error_t *err = SVN_NO_ERROR; @@ -705,8 +694,8 @@ svn_editor_alter_file(svn_editor_t *editor, { START_CALLBACK(editor); err = editor->funcs.cb_alter_file(editor->baton, - relpath, revision, props, - checksum, contents, + relpath, revision, + checksum, contents, props, editor->scratch_pool); END_CALLBACK(editor); } @@ -723,8 +712,8 @@ svn_error_t * svn_editor_alter_symlink(svn_editor_t *editor, const char *relpath, svn_revnum_t revision, - apr_hash_t *props, - const char *target) + const char *target, + apr_hash_t *props) { svn_error_t *err = SVN_NO_ERROR; @@ -740,8 +729,8 @@ svn_editor_alter_symlink(svn_editor_t *editor, { START_CALLBACK(editor); err = editor->funcs.cb_alter_symlink(editor->baton, - relpath, revision, props, - target, + relpath, revision, + target, props, editor->scratch_pool); END_CALLBACK(editor); } @@ -862,56 +851,6 @@ svn_editor_move(svn_editor_t *editor, svn_error_t * -svn_editor_rotate(svn_editor_t *editor, - const apr_array_header_t *relpaths, - const apr_array_header_t *revisions) -{ - svn_error_t *err = SVN_NO_ERROR; - - SHOULD_NOT_BE_FINISHED(editor); -#ifdef ENABLE_ORDERING_CHECK - { - int i; - for (i = 0; i < relpaths->nelts; i++) - { - const char *relpath = APR_ARRAY_IDX(relpaths, i, const char *); - - SVN_ERR_ASSERT(svn_relpath_is_canonical(relpath)); - SHOULD_NOT_BE_COMPLETED(editor, relpath); - VERIFY_PARENT_MAY_EXIST(editor, relpath); - CHILD_DELETIONS_ALLOWED(editor, relpath); - } - } -#endif - - SVN_ERR(check_cancel(editor)); - - if (editor->funcs.cb_rotate) - { - START_CALLBACK(editor); - err = editor->funcs.cb_rotate(editor->baton, relpaths, revisions, - editor->scratch_pool); - END_CALLBACK(editor); - } - -#ifdef ENABLE_ORDERING_CHECK - { - int i; - for (i = 0; i < relpaths->nelts; i++) - { - const char *relpath = APR_ARRAY_IDX(relpaths, i, const char *); - MARK_ALLOW_ALTER(editor, relpath); - MARK_PARENT_STABLE(editor, relpath); - } - } -#endif - - svn_pool_clear(editor->scratch_pool); - return svn_error_trace(err); -} - - -svn_error_t * svn_editor_complete(svn_editor_t *editor) { svn_error_t *err = SVN_NO_ERROR; diff --git a/subversion/libsvn_delta/libsvn_delta.pc.in b/subversion/libsvn_delta/libsvn_delta.pc.in new file mode 100644 index 0000000..b96e6ab --- /dev/null +++ b/subversion/libsvn_delta/libsvn_delta.pc.in @@ -0,0 +1,12 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libsvn_delta +Description: Subversion Delta Library +Version: @PACKAGE_VERSION@ +Requires: apr-util-@SVN_APR_MAJOR_VERSION@ apr-@SVN_APR_MAJOR_VERSION@ +Requires.private: libsvn_subr +Libs: -L${libdir} -lsvn_delta @SVN_ZLIB_LIBS@ +Cflags: -I${includedir} diff --git a/subversion/libsvn_delta/path_driver.c b/subversion/libsvn_delta/path_driver.c index 62e703a..c1f3e07 100644 --- a/subversion/libsvn_delta/path_driver.c +++ b/subversion/libsvn_delta/path_driver.c @@ -32,6 +32,7 @@ #include "svn_path.h" #include "svn_sorts.h" #include "private/svn_fspath.h" +#include "private/svn_sorts_private.h" /*** Helper functions. ***/ @@ -157,8 +158,7 @@ svn_delta_path_driver2(const svn_delta_editor_t *editor, if (sort_paths && paths->nelts > 1) { apr_array_header_t *sorted = apr_array_copy(subpool, paths); - qsort(sorted->elts, sorted->nelts, sorted->elt_size, - svn_sort_compare_paths); + svn_sort__array(sorted, svn_sort_compare_paths); paths = sorted; } @@ -187,7 +187,7 @@ svn_delta_path_driver2(const svn_delta_editor_t *editor, driving the editor. */ for (; i < paths->nelts; i++) { - const char *pdir, *bname; + const char *pdir; const char *common = ""; size_t common_len; @@ -224,9 +224,10 @@ svn_delta_path_driver2(const svn_delta_editor_t *editor, /*** Step C - Open any directories between the common ancestor and the parent of the current path. ***/ if (*path == '/') - svn_fspath__split(&pdir, &bname, path, iterpool); + pdir = svn_fspath__dirname(path, iterpool); else - svn_relpath_split(&pdir, &bname, path, iterpool); + pdir = svn_relpath_dirname(path, iterpool); + if (strlen(pdir) > common_len) { const char *piece = pdir + common_len + 1; diff --git a/subversion/libsvn_delta/svndiff.c b/subversion/libsvn_delta/svndiff.c index b9cb285..070c638 100644 --- a/subversion/libsvn_delta/svndiff.c +++ b/subversion/libsvn_delta/svndiff.c @@ -29,21 +29,12 @@ #include "delta.h" #include "svn_pools.h" #include "svn_private_config.h" -#include <zlib.h> #include "private/svn_error_private.h" #include "private/svn_delta_private.h" - -/* The zlib compressBound function was not exported until 1.2.0. */ -#if ZLIB_VERNUM >= 0x1200 -#define svnCompressBound(LEN) compressBound(LEN) -#else -#define svnCompressBound(LEN) ((LEN) + ((LEN) >> 12) + ((LEN) >> 14) + 11) -#endif - -/* For svndiff1, address/instruction/new data under this size will not - be compressed using zlib as a secondary compressor. */ -#define MIN_COMPRESS_SIZE 512 +#include "private/svn_subr_private.h" +#include "private/svn_string_private.h" +#include "private/svn_dep_compat.h" /* ----- Text delta to svndiff ----- */ @@ -58,139 +49,31 @@ struct encoder_baton { apr_pool_t *pool; }; -/* This is at least as big as the largest size of an integer that - encode_int can generate; it is sufficient for creating buffers for - it to write into. This assumes that integers are at most 64 bits, - and so 10 bytes (with 7 bits of information each) are sufficient to - represent them. */ -#define MAX_ENCODED_INT_LEN 10 /* This is at least as big as the largest size for a single instruction. */ -#define MAX_INSTRUCTION_LEN (2*MAX_ENCODED_INT_LEN+1) +#define MAX_INSTRUCTION_LEN (2*SVN__MAX_ENCODED_UINT_LEN+1) /* This is at least as big as the largest possible instructions section: in theory, the instructions could be SVN_DELTA_WINDOW_SIZE 1-byte copy-from-source instructions (though this is very unlikely). */ #define MAX_INSTRUCTION_SECTION_LEN (SVN_DELTA_WINDOW_SIZE*MAX_INSTRUCTION_LEN) -/* Encode VAL into the buffer P using the variable-length svndiff - integer format. Return the incremented value of P after the - encoded bytes have been written. P must point to a buffer of size - at least MAX_ENCODED_INT_LEN. - - This encoding uses the high bit of each byte as a continuation bit - and the other seven bits as data bits. High-order data bits are - encoded first, followed by lower-order bits, so the value can be - reconstructed by concatenating the data bits from left to right and - interpreting the result as a binary number. Examples (brackets - denote byte boundaries, spaces are for clarity only): - - 1 encodes as [0 0000001] - 33 encodes as [0 0100001] - 129 encodes as [1 0000001] [0 0000001] - 2000 encodes as [1 0001111] [0 1010000] -*/ -static unsigned char * -encode_int(unsigned char *p, svn_filesize_t val) -{ - int n; - svn_filesize_t v; - unsigned char cont; - - SVN_ERR_ASSERT_NO_RETURN(val >= 0); - - /* Figure out how many bytes we'll need. */ - v = val >> 7; - n = 1; - while (v > 0) - { - v = v >> 7; - n++; - } - - SVN_ERR_ASSERT_NO_RETURN(n <= MAX_ENCODED_INT_LEN); - - /* Encode the remaining bytes; n is always the number of bytes - coming after the one we're encoding. */ - while (--n >= 0) - { - cont = ((n > 0) ? 0x1 : 0x0) << 7; - *p++ = (unsigned char)(((val >> (n * 7)) & 0x7f) | cont); - } - - return p; -} - /* Append an encoded integer to a string. */ static void append_encoded_int(svn_stringbuf_t *header, svn_filesize_t val) { - unsigned char buf[MAX_ENCODED_INT_LEN], *p; + unsigned char buf[SVN__MAX_ENCODED_UINT_LEN], *p; - p = encode_int(buf, val); + SVN_ERR_ASSERT_NO_RETURN(val >= 0); + p = svn__encode_uint(buf, (apr_uint64_t)val); svn_stringbuf_appendbytes(header, (const char *)buf, p - buf); } -/* If IN is a string that is >= MIN_COMPRESS_SIZE and the COMPRESSION_LEVEL - is not SVN_DELTA_COMPRESSION_LEVEL_NONE, zlib compress it and places the - result in OUT, with an integer prepended specifying the original size. - If IN is < MIN_COMPRESS_SIZE, or if the compressed version of IN was no - smaller than the original IN, OUT will be a copy of IN with the size - prepended as an integer. */ -static svn_error_t * -zlib_encode(const char *data, - apr_size_t len, - svn_stringbuf_t *out, - int compression_level) -{ - unsigned long endlen; - apr_size_t intlen; - - svn_stringbuf_setempty(out); - append_encoded_int(out, len); - intlen = out->len; - - /* Compression initialization overhead is considered to large for - short buffers. Also, if we don't actually want to compress data, - ZLIB will produce an output no shorter than the input. Hence, - the DATA would directly appended to OUT, so we can do that directly - without calling ZLIB before. */ - if ( (len < MIN_COMPRESS_SIZE) - || (compression_level == SVN_DELTA_COMPRESSION_LEVEL_NONE)) - { - svn_stringbuf_appendbytes(out, data, len); - } - else - { - int zerr; - - svn_stringbuf_ensure(out, svnCompressBound(len) + intlen); - endlen = out->blocksize; - - zerr = compress2((unsigned char *)out->data + intlen, &endlen, - (const unsigned char *)data, len, - compression_level); - if (zerr != Z_OK) - return svn_error_trace(svn_error__wrap_zlib( - zerr, "compress2", - _("Compression of svndiff data failed"))); - - /* Compression didn't help :(, just append the original text */ - if (endlen >= len) - { - svn_stringbuf_appendbytes(out, data, len); - return SVN_NO_ERROR; - } - out->len = endlen + intlen; - out->data[out->len] = 0; - } - return SVN_NO_ERROR; -} - static svn_error_t * send_simple_insertion_window(svn_txdelta_window_t *window, struct encoder_baton *eb) { - unsigned char headers[4 + 5 * MAX_ENCODED_INT_LEN + MAX_INSTRUCTION_LEN]; + unsigned char headers[4 + 5 * SVN__MAX_ENCODED_UINT_LEN + + MAX_INSTRUCTION_LEN]; unsigned char ibuf[MAX_INSTRUCTION_LEN]; unsigned char *header_current; apr_size_t header_len; @@ -226,16 +109,17 @@ send_simple_insertion_window(svn_txdelta_window_t *window, else { ibuf[0] = (0x2 << 6); - ip_len = encode_int(ibuf + 1, window->tview_len) - ibuf; + ip_len = svn__encode_uint(ibuf + 1, window->tview_len) - ibuf; } /* encode the window header. Please note that the source window may * have content despite not being used for deltification. */ - header_current = encode_int(header_current, window->sview_offset); - header_current = encode_int(header_current, window->sview_len); - header_current = encode_int(header_current, window->tview_len); + header_current = svn__encode_uint(header_current, + (apr_uint64_t)window->sview_offset); + header_current = svn__encode_uint(header_current, window->sview_len); + header_current = svn__encode_uint(header_current, window->tview_len); header_current[0] = (unsigned char)ip_len; /* 1 instruction */ - header_current = encode_int(&header_current[1], len); + header_current = svn__encode_uint(&header_current[1], len); /* append instructions (1 to a handful of bytes) */ for (i = 0; i < ip_len; ++i) @@ -319,9 +203,9 @@ window_handler(svn_txdelta_window_t *window, void *baton) if (op->length >> 6 == 0) *ip++ |= (unsigned char)op->length; else - ip = encode_int(ip + 1, op->length); + ip = svn__encode_uint(ip + 1, op->length); if (op->action_code != svn_txdelta_new) - ip = encode_int(ip, op->offset); + ip = svn__encode_uint(ip, op->offset); svn_stringbuf_appendbytes(instructions, (const char *)ibuf, ip - ibuf); } @@ -331,20 +215,20 @@ window_handler(svn_txdelta_window_t *window, void *baton) append_encoded_int(header, window->tview_len); if (eb->version == 1) { - SVN_ERR(zlib_encode(instructions->data, instructions->len, - i1, eb->compression_level)); + SVN_ERR(svn__compress(instructions, i1, eb->compression_level)); instructions = i1; } append_encoded_int(header, instructions->len); if (eb->version == 1) { - svn_stringbuf_t *temp = svn_stringbuf_create_empty(pool); - svn_string_t *tempstr = svn_string_create_empty(pool); - SVN_ERR(zlib_encode(window->new_data->data, window->new_data->len, - temp, eb->compression_level)); - tempstr->data = temp->data; - tempstr->len = temp->len; - newdata = tempstr; + svn_stringbuf_t *compressed = svn_stringbuf_create_empty(pool); + svn_stringbuf_t *original = svn_stringbuf_create_empty(pool); + original->data = (char *)window->new_data->data; /* won't be modified */ + original->len = window->new_data->len; + original->blocksize = window->new_data->len + 1; + + SVN_ERR(svn__compress(original, compressed, eb->compression_level)); + newdata = svn_stringbuf__morph_into_string(compressed); } else newdata = window->new_data; @@ -453,128 +337,32 @@ struct decode_baton }; -/* Decode an svndiff-encoded integer into *VAL and return a pointer to - the byte after the integer. The bytes to be decoded live in the - range [P..END-1]. If these bytes do not contain a whole encoded - integer, return NULL; in this case *VAL is undefined. - - See the comment for encode_int() earlier in this file for more detail on - the encoding format. */ +/* Wrapper aroung svn__deencode_uint taking a file size as *VAL. */ static const unsigned char * decode_file_offset(svn_filesize_t *val, const unsigned char *p, const unsigned char *end) { - svn_filesize_t temp = 0; - - if (p + MAX_ENCODED_INT_LEN < end) - end = p + MAX_ENCODED_INT_LEN; - /* Decode bytes until we're done. */ - while (p < end) - { - /* Don't use svn_filesize_t here, because this might be 64 bits - * on 32 bit targets. Optimizing compilers may or may not be - * able to reduce that to the effective code below. */ - unsigned int c = *p++; - - temp = (temp << 7) | (c & 0x7f); - if (c < 0x80) - { - *val = temp; - return p; - } - } + apr_uint64_t temp = 0; + const unsigned char *result = svn__decode_uint(&temp, p, end); + *val = (svn_filesize_t)temp; - return NULL; + return result; } - /* Same as above, only decode into a size variable. */ static const unsigned char * decode_size(apr_size_t *val, const unsigned char *p, const unsigned char *end) { - apr_size_t temp = 0; - - if (p + MAX_ENCODED_INT_LEN < end) - end = p + MAX_ENCODED_INT_LEN; - /* Decode bytes until we're done. */ - while (p < end) - { - apr_size_t c = *p++; - - temp = (temp << 7) | (c & 0x7f); - if (c < 0x80) - { - *val = temp; - return p; - } - } - - return NULL; -} - -/* Decode the possibly-zlib compressed string of length INLEN that is in - IN, into OUT. We expect an integer is prepended to IN that specifies - the original size, and that if encoded size == original size, that the - remaining data is not compressed. - In that case, we will simply return pointer into IN as data pointer for - OUT, COPYLESS_ALLOWED has been set. The, the caller is expected not to - modify the contents of OUT. - An error is returned if the decoded length exceeds the given LIMIT. - */ -static svn_error_t * -zlib_decode(const unsigned char *in, apr_size_t inLen, svn_stringbuf_t *out, - apr_size_t limit) -{ - apr_size_t len; - const unsigned char *oldplace = in; - - /* First thing in the string is the original length. */ - in = decode_size(&len, in, in + inLen); - if (in == NULL) - return svn_error_create(SVN_ERR_SVNDIFF_INVALID_COMPRESSED_DATA, NULL, - _("Decompression of svndiff data failed: no size")); - if (len > limit) - return svn_error_create(SVN_ERR_SVNDIFF_INVALID_COMPRESSED_DATA, NULL, - _("Decompression of svndiff data failed: " - "size too large")); - /* We need to subtract the size of the encoded original length off the - * still remaining input length. */ - inLen -= (in - oldplace); - if (inLen == len) - { - svn_stringbuf_ensure(out, len); - memcpy(out->data, in, len); - out->data[len] = 0; - out->len = len; + apr_uint64_t temp = 0; + const unsigned char *result = svn__decode_uint(&temp, p, end); + if (temp > APR_SIZE_MAX) + return NULL; - return SVN_NO_ERROR; - } - else - { - unsigned long zlen = len; - int zerr; - - svn_stringbuf_ensure(out, len); - zerr = uncompress((unsigned char *)out->data, &zlen, in, inLen); - if (zerr != Z_OK) - return svn_error_trace(svn_error__wrap_zlib( - zerr, "uncompress", - _("Decompression of svndiff data failed"))); - - /* Zlib should not produce something that has a different size than the - original length we stored. */ - if (zlen != len) - return svn_error_create(SVN_ERR_SVNDIFF_INVALID_COMPRESSED_DATA, - NULL, - _("Size of uncompressed data " - "does not match stored original length")); - out->data[zlen] = 0; - out->len = zlen; - } - return SVN_NO_ERROR; + *val = (apr_size_t)temp; + return result; } /* Decode an instruction into OP, returning a pointer to the text @@ -695,6 +483,21 @@ count_and_verify_instructions(int *ninst, return SVN_NO_ERROR; } +static svn_error_t * +zlib_decode(const unsigned char *in, apr_size_t inLen, svn_stringbuf_t *out, + apr_size_t limit) +{ + /* construct a fake string buffer as parameter to svn__decompress. + This is fine as that function never writes to it. */ + svn_stringbuf_t compressed; + compressed.pool = NULL; + compressed.data = (char *)in; + compressed.len = inLen; + compressed.blocksize = inLen + 1; + + return svn__decompress(&compressed, out, limit); +} + /* Given the five integer fields of a window header and a pointer to the remainder of the window contents, fill in a delta window structure *WINDOW. New allocations will be performed in POOL; @@ -775,6 +578,10 @@ decode_window(svn_txdelta_window_t *window, svn_filesize_t sview_offset, return SVN_NO_ERROR; } +static const char SVNDIFF_V0[] = { 'S', 'V', 'N', 0 }; +static const char SVNDIFF_V1[] = { 'S', 'V', 'N', 1 }; +#define SVNDIFF_HEADER_SIZE (sizeof(SVNDIFF_V0)) + static svn_error_t * write_handler(void *baton, const char *buffer, @@ -787,14 +594,14 @@ write_handler(void *baton, apr_size_t buflen = *len; /* Chew up four bytes at the beginning for the header. */ - if (db->header_bytes < 4) + if (db->header_bytes < SVNDIFF_HEADER_SIZE) { - apr_size_t nheader = 4 - db->header_bytes; + apr_size_t nheader = SVNDIFF_HEADER_SIZE - db->header_bytes; if (nheader > buflen) nheader = buflen; - if (memcmp(buffer, "SVN\0" + db->header_bytes, nheader) == 0) + if (memcmp(buffer, SVNDIFF_V0 + db->header_bytes, nheader) == 0) db->version = 0; - else if (memcmp(buffer, "SVN\1" + db->header_bytes, nheader) == 0) + else if (memcmp(buffer, SVNDIFF_V1 + db->header_bytes, nheader) == 0) db->version = 1; else return svn_error_create(SVN_ERR_SVNDIFF_INVALID_HEADER, NULL, @@ -830,28 +637,28 @@ write_handler(void *baton, p = decode_file_offset(&sview_offset, p, end); if (p == NULL) - return SVN_NO_ERROR; + break; p = decode_size(&sview_len, p, end); if (p == NULL) - return SVN_NO_ERROR; + break; p = decode_size(&tview_len, p, end); if (p == NULL) - return SVN_NO_ERROR; + break; p = decode_size(&inslen, p, end); if (p == NULL) - return SVN_NO_ERROR; + break; p = decode_size(&newlen, p, end); if (p == NULL) - return SVN_NO_ERROR; + break; if (tview_len > SVN_DELTA_WINDOW_SIZE || sview_len > SVN_DELTA_WINDOW_SIZE || /* for svndiff1, newlen includes the original length */ - newlen > SVN_DELTA_WINDOW_SIZE + MAX_ENCODED_INT_LEN || + newlen > SVN_DELTA_WINDOW_SIZE + SVN__MAX_ENCODED_UINT_LEN || inslen > MAX_INSTRUCTION_SECTION_LEN) return svn_error_create(SVN_ERR_SVNDIFF_CORRUPT_WINDOW, NULL, _("Svndiff contains a too-large window")); @@ -904,7 +711,15 @@ write_handler(void *baton, db->subpool = newpool; } - /* NOTREACHED */ + /* At this point we processed all integral windows and DB->BUFFER is empty + or contains partially read window header. + Check that unprocessed data is not larger that theoretical maximum + window header size. */ + if (db->buffer->len > 5 * SVN__MAX_ENCODED_UINT_LEN) + return svn_error_create(SVN_ERR_SVNDIFF_CORRUPT_WINDOW, NULL, + _("Svndiff contains a too-large window header")); + + return SVN_NO_ERROR; } /* Minimal svn_stream_t write handler, doing nothing */ @@ -981,7 +796,7 @@ read_one_byte(unsigned char *byte, svn_stream_t *stream) char c; apr_size_t len = 1; - SVN_ERR(svn_stream_read(stream, &c, &len)); + SVN_ERR(svn_stream_read_full(stream, &c, &len)); if (len == 0) return svn_error_create(SVN_ERR_SVNDIFF_UNEXPECTED_END, NULL, _("Unexpected end of svndiff input")); @@ -989,9 +804,12 @@ read_one_byte(unsigned char *byte, svn_stream_t *stream) return SVN_NO_ERROR; } -/* Read and decode one integer from STREAM into *SIZE. */ +/* Read and decode one integer from STREAM into *SIZE. + Increment *BYTE_COUNTER by the number of chars we have read. */ static svn_error_t * -read_one_size(apr_size_t *size, svn_stream_t *stream) +read_one_size(apr_size_t *size, + apr_size_t *byte_counter, + svn_stream_t *stream) { unsigned char c; @@ -999,6 +817,7 @@ read_one_size(apr_size_t *size, svn_stream_t *stream) while (1) { SVN_ERR(read_one_byte(&c, stream)); + ++*byte_counter; *size = (*size << 7) | (c & 0x7f); if (!(c & 0x80)) break; @@ -1010,30 +829,33 @@ read_one_size(apr_size_t *size, svn_stream_t *stream) static svn_error_t * read_window_header(svn_stream_t *stream, svn_filesize_t *sview_offset, apr_size_t *sview_len, apr_size_t *tview_len, - apr_size_t *inslen, apr_size_t *newlen) + apr_size_t *inslen, apr_size_t *newlen, + apr_size_t *header_len) { unsigned char c; /* Read the source view offset by hand, since it's not an apr_size_t. */ + *header_len = 0; *sview_offset = 0; while (1) { SVN_ERR(read_one_byte(&c, stream)); + ++*header_len; *sview_offset = (*sview_offset << 7) | (c & 0x7f); if (!(c & 0x80)) break; } /* Read the four size fields. */ - SVN_ERR(read_one_size(sview_len, stream)); - SVN_ERR(read_one_size(tview_len, stream)); - SVN_ERR(read_one_size(inslen, stream)); - SVN_ERR(read_one_size(newlen, stream)); + SVN_ERR(read_one_size(sview_len, header_len, stream)); + SVN_ERR(read_one_size(tview_len, header_len, stream)); + SVN_ERR(read_one_size(inslen, header_len, stream)); + SVN_ERR(read_one_size(newlen, header_len, stream)); if (*tview_len > SVN_DELTA_WINDOW_SIZE || *sview_len > SVN_DELTA_WINDOW_SIZE || /* for svndiff1, newlen includes the original length */ - *newlen > SVN_DELTA_WINDOW_SIZE + MAX_ENCODED_INT_LEN || + *newlen > SVN_DELTA_WINDOW_SIZE + SVN__MAX_ENCODED_UINT_LEN || *inslen > MAX_INSTRUCTION_SECTION_LEN) return svn_error_create(SVN_ERR_SVNDIFF_CORRUPT_WINDOW, NULL, _("Svndiff contains a too-large window")); @@ -1055,14 +877,14 @@ svn_txdelta_read_svndiff_window(svn_txdelta_window_t **window, apr_pool_t *pool) { svn_filesize_t sview_offset; - apr_size_t sview_len, tview_len, inslen, newlen, len; + apr_size_t sview_len, tview_len, inslen, newlen, len, header_len; unsigned char *buf; SVN_ERR(read_window_header(stream, &sview_offset, &sview_len, &tview_len, - &inslen, &newlen)); + &inslen, &newlen, &header_len)); len = inslen + newlen; buf = apr_palloc(pool, len); - SVN_ERR(svn_stream_read(stream, (char*)buf, &len)); + SVN_ERR(svn_stream_read_full(stream, (char*)buf, &len)); if (len < inslen + newlen) return svn_error_create(SVN_ERR_SVNDIFF_UNEXPECTED_END, NULL, _("Unexpected end of svndiff input")); @@ -1079,29 +901,28 @@ svn_txdelta_skip_svndiff_window(apr_file_t *file, { svn_stream_t *stream = svn_stream_from_aprfile2(file, TRUE, pool); svn_filesize_t sview_offset; - apr_size_t sview_len, tview_len, inslen, newlen; + apr_size_t sview_len, tview_len, inslen, newlen, header_len; apr_off_t offset; SVN_ERR(read_window_header(stream, &sview_offset, &sview_len, &tview_len, - &inslen, &newlen)); + &inslen, &newlen, &header_len)); offset = inslen + newlen; return svn_io_file_seek(file, APR_CUR, &offset, pool); } - svn_error_t * -svn__compress(svn_string_t *in, - svn_stringbuf_t *out, - int compression_level) +svn_txdelta__read_raw_window_len(apr_size_t *window_len, + svn_stream_t *stream, + apr_pool_t *pool) { - return zlib_encode(in->data, in->len, out, compression_level); -} + svn_filesize_t sview_offset; + apr_size_t sview_len, tview_len, inslen, newlen, header_len; -svn_error_t * -svn__decompress(svn_string_t *in, - svn_stringbuf_t *out, - apr_size_t limit) -{ - return zlib_decode((const unsigned char*)in->data, in->len, out, limit); + SVN_ERR(read_window_header(stream, &sview_offset, &sview_len, &tview_len, + &inslen, &newlen, &header_len)); + + *window_len = inslen + newlen + header_len; + return SVN_NO_ERROR; } + diff --git a/subversion/libsvn_delta/text_delta.c b/subversion/libsvn_delta/text_delta.c index be2c434..04eca8a 100644 --- a/subversion/libsvn_delta/text_delta.c +++ b/subversion/libsvn_delta/text_delta.c @@ -366,14 +366,14 @@ txdelta_next_window(svn_txdelta_window_t **window, /* Read the source stream. */ if (b->more_source) { - SVN_ERR(svn_stream_read(b->source, b->buf, &source_len)); + SVN_ERR(svn_stream_read_full(b->source, b->buf, &source_len)); b->more_source = (source_len == SVN_DELTA_WINDOW_SIZE); } else source_len = 0; /* Read the target stream. */ - SVN_ERR(svn_stream_read(b->target, b->buf + source_len, &target_len)); + SVN_ERR(svn_stream_read_full(b->target, b->buf + source_len, &target_len)); b->pos += source_len; if (target_len == 0) @@ -522,7 +522,7 @@ tpush_write_handler(void *baton, const char *data, apr_size_t *len) if (tb->source_len == 0 && !tb->source_done) { tb->source_len = SVN_DELTA_WINDOW_SIZE; - SVN_ERR(svn_stream_read(tb->source, tb->buf, &tb->source_len)); + SVN_ERR(svn_stream_read_full(tb->source, tb->buf, &tb->source_len)); if (tb->source_len < SVN_DELTA_WINDOW_SIZE) tb->source_done = TRUE; } @@ -623,68 +623,31 @@ size_buffer(char **buf, apr_size_t *buf_size, return SVN_NO_ERROR; } -/* Copy LEN bytes from SOURCE to TARGET, optimizing for the case where LEN - * is often very small. Return a pointer to the first byte after the copied - * target range, unlike standard memcpy(), as a potential further - * optimization for the caller. - * - * memcpy() is hard to tune for a wide range of buffer lengths. Therefore, - * it is often tuned for high throughput on large buffers and relatively - * low latency for mid-sized buffers (tens of bytes). However, the overhead - * for very small buffers (<10 bytes) is still high. Even passing the - * parameters, for instance, may take as long as copying 3 bytes. - * - * Because short copy sequences seem to be a common case, at least in - * "format 2" FSFS repositories, we copy them directly. Larger buffer sizes - * aren't hurt measurably by the exta 'if' clause. */ -static APR_INLINE char * -fast_memcpy(char *target, const char *source, apr_size_t len) -{ - if (len > 7) - { - memcpy(target, source, len); - target += len; - } - else - { - /* memcpy is not exactly fast for small block sizes. - * Since they are common, let's run optimized code for them. */ - const char *end = source + len; - for (; source != end; source++) - *(target++) = *source; - } - - return target; -} - /* Copy LEN bytes from SOURCE to TARGET. Unlike memmove() or memcpy(), * create repeating patterns if the source and target ranges overlap. * Return a pointer to the first byte after the copied target range. */ static APR_INLINE char * patterning_copy(char *target, const char *source, apr_size_t len) { - const char *end = source + len; - - /* On many machines, we can do "chunky" copies. */ - -#if SVN_UNALIGNED_ACCESS_IS_OK - - if (end + sizeof(apr_uint32_t) <= target) + /* If the source and target overlap, repeat the overlapping pattern + in the target buffer. Always copy from the source buffer because + presumably it will be in the L1 cache after the first iteration + and doing this should avoid pipeline stalls due to write/read + dependencies. */ + const apr_size_t overlap = target - source; + while (len > overlap) { - /* Source and target are at least 4 bytes apart, so we can copy in - * 4-byte chunks. */ - for (; source + sizeof(apr_uint32_t) <= end; - source += sizeof(apr_uint32_t), - target += sizeof(apr_uint32_t)) - *(apr_uint32_t *)(target) = *(apr_uint32_t *)(source); + memcpy(target, source, overlap); + target += overlap; + len -= overlap; } -#endif - - /* fall through to byte-wise copy (either for the below-chunk-size tail - * or the whole copy) */ - for (; source != end; source++) - *(target++) = *source; + /* Copy any remaining source pattern. */ + if (len) + { + memcpy(target, source, len); + target += len; + } return target; } @@ -697,6 +660,11 @@ svn_txdelta_apply_instructions(svn_txdelta_window_t *window, const svn_txdelta_op_t *op; apr_size_t tpos = 0; + /* Nothing to do for empty buffers. + * This check allows for NULL TBUF in that case. */ + if (*tlen == 0) + return; + for (op = window->ops; op < window->ops + window->num_ops; op++) { const apr_size_t buf_len = (op->length < *tlen - tpos @@ -711,7 +679,7 @@ svn_txdelta_apply_instructions(svn_txdelta_window_t *window, /* Copy from source area. */ assert(sbuf); assert(op->offset + op->length <= window->sview_len); - fast_memcpy(tbuf + tpos, sbuf + op->offset, buf_len); + memcpy(tbuf + tpos, sbuf + op->offset, buf_len); break; case svn_txdelta_target: @@ -728,9 +696,9 @@ svn_txdelta_apply_instructions(svn_txdelta_window_t *window, case svn_txdelta_new: /* Copy from window new area. */ assert(op->offset + op->length <= window->new_data->len); - fast_memcpy(tbuf + tpos, - window->new_data->data + op->offset, - buf_len); + memcpy(tbuf + tpos, + window->new_data->data + op->offset, + buf_len); break; default: @@ -747,20 +715,6 @@ svn_txdelta_apply_instructions(svn_txdelta_window_t *window, *tlen = tpos; } -/* This is a private interlibrary compatibility wrapper. */ -void -svn_txdelta__apply_instructions(svn_txdelta_window_t *window, - const char *sbuf, char *tbuf, - apr_size_t *tlen); -void -svn_txdelta__apply_instructions(svn_txdelta_window_t *window, - const char *sbuf, char *tbuf, - apr_size_t *tlen) -{ - svn_txdelta_apply_instructions(window, sbuf, tbuf, tlen); -} - - /* Apply WINDOW to the streams given by APPL. */ static svn_error_t * apply_window(svn_txdelta_window_t *window, void *baton) @@ -819,7 +773,7 @@ apply_window(svn_txdelta_window_t *window, void *baton) if (ab->sbuf_len < window->sview_len) { len = window->sview_len - ab->sbuf_len; - err = svn_stream_read(ab->source, ab->sbuf + ab->sbuf_len, &len); + err = svn_stream_read_full(ab->source, ab->sbuf + ab->sbuf_len, &len); if (err == SVN_NO_ERROR && len != window->sview_len - ab->sbuf_len) err = svn_error_create(SVN_ERR_INCOMPLETE_DATA, NULL, "Delta source ended unexpectedly"); @@ -836,13 +790,7 @@ apply_window(svn_txdelta_window_t *window, void *baton) /* Write out the output. */ - /* ### We've also considered just adding two (optionally null) - arguments to svn_stream_create(): read_checksum and - write_checksum. Then instead of every caller updating an md5 - context when it calls svn_stream_write() or svn_stream_read(), - streams would do it automatically, and verify the checksum in - svn_stream_closed(). But this might be overkill for issue #689; - so for now we just update the context here. */ + /* Just update the context here. */ if (ab->result_digest) apr_md5_update(&(ab->md5_context), ab->tbuf, len); @@ -936,7 +884,7 @@ svn_error_t *svn_txdelta_send_stream(svn_stream_t *stream, { apr_size_t read_len = SVN__STREAM_CHUNK_SIZE; - SVN_ERR(svn_stream_read(stream, read_buf, &read_len)); + SVN_ERR(svn_stream_read_full(stream, read_buf, &read_len)); if (read_len == 0) break; diff --git a/subversion/libsvn_delta/xdelta.c b/subversion/libsvn_delta/xdelta.c index 2075a51..2e5bb26 100644 --- a/subversion/libsvn_delta/xdelta.c +++ b/subversion/libsvn_delta/xdelta.c @@ -29,6 +29,7 @@ #include "svn_hash.h" #include "svn_delta.h" +#include "private/svn_string_private.h" #include "delta.h" /* This is pseudo-adler32. It is adler32 without the prime modulus. @@ -43,6 +44,15 @@ */ #define MATCH_BLOCKSIZE 64 +/* Size of the checksum presence FLAGS array in BLOCKS_T. With standard + MATCH_BLOCKSIZE and SVN_DELTA_WINDOW_SIZE, 32k entries is about 20x + the number of checksums that actually occur, i.e. we expect a >95% + probability that non-matching checksums get already detected by checking + against the FLAGS array. + Must be a power of 2. + */ +#define FLAGS_COUNT (32 * 1024) + /* "no" / "invalid" / "unused" value for positions within the delta windows */ #define NO_POSITION ((apr_uint32_t)-1) @@ -104,7 +114,7 @@ struct block (our delta window size much much smaller then 4GB). That reduces the hash table size by 50% from 32to 16KB and makes it easier to fit into the CPU's L1 cache. */ - apr_uint32_t pos; /* NO_POSITION -> block is not used */ + apr_uint32_t pos; /* NO_POSITION -> block is not used */ }; /* A hash table, using open addressing, of the blocks of the source. */ @@ -117,8 +127,19 @@ struct blocks hte same width as the block position index, (struct block).pos. */ apr_uint32_t max; + /* Source buffer that the positions in SLOTS refer to. */ const char* data; + + /* Bit array indicating whether there may be a matching slot for a given + adler32 checksum. Since FLAGS has much more entries than SLOTS, this + will indicate most cases of non-matching checksums with a "0" bit, i.e. + as "known not to have a match". + The mapping of adler32 checksum bits is [0..2][16..27] (LSB -> MSB), + i.e. address the byte by the multiplicative part of adler32 and address + the bits in that byte by the additive part of adler32. */ + char flags[FLAGS_COUNT / 8]; + /* The vector of blocks. A pos value of NO_POSITION represents an unused slot. */ struct block *slots; @@ -135,6 +156,15 @@ static apr_uint32_t hash_func(apr_uint32_t sum) return sum ^ (sum >> 12); } +/* Return the offset in BLOCKS.FLAGS for the adler32 SUM. */ +static apr_uint32_t hash_flags(apr_uint32_t sum) +{ + /* The upper half of SUM has a wider value range than the lower 16 bit. + Also, we want to a different folding than HASH_FUNC to minimize + correlation between different hash levels. */ + return (sum >> 16) & ((FLAGS_COUNT / 8) - 1); +} + /* Insert a block with the checksum ADLERSUM at position POS in the source data into the table BLOCKS. Ignore true duplicates, i.e. blocks with actually the same content. */ @@ -152,6 +182,7 @@ add_block(struct blocks *blocks, apr_uint32_t adlersum, apr_uint32_t pos) blocks->slots[h].adlersum = adlersum; blocks->slots[h].pos = pos; + blocks->flags[hash_flags(adlersum)] |= 1 << (adlersum & 7); } /* Find a block in BLOCKS with the checksum ADLERSUM and matching the content @@ -216,6 +247,9 @@ init_blocks_table(const char *data, blocks->slots[i].pos = NO_POSITION; } + /* No checksum entries in SLOTS, yet => reset all checksum flags. */ + memset(blocks->flags, 0, sizeof(blocks->flags)); + /* If there is an odd block at the end of the buffer, we will not use that shorter block for deltification (only indirectly as an extension of some previous block). */ @@ -223,73 +257,6 @@ init_blocks_table(const char *data, add_block(blocks, init_adler32(data + i), i); } -/* Return the lowest position at which A and B differ. If no difference - * can be found in the first MAX_LEN characters, MAX_LEN will be returned. - */ -static apr_size_t -match_length(const char *a, const char *b, apr_size_t max_len) -{ - apr_size_t pos = 0; - -#if SVN_UNALIGNED_ACCESS_IS_OK - - /* Chunky processing is so much faster ... - * - * We can't make this work on architectures that require aligned access - * because A and B will probably have different alignment. So, skipping - * the first few chars until alignment is reached is not an option. - */ - for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t)) - if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos)) - break; - -#endif - - for (; pos < max_len; ++pos) - if (a[pos] != b[pos]) - break; - - return pos; -} - -/* Return the number of bytes before A and B that don't differ. If no - * difference can be found in the first MAX_LEN characters, MAX_LEN will - * be returned. Please note that A-MAX_LEN and B-MAX_LEN must both be - * valid addresses. - */ -static apr_size_t -reverse_match_length(const char *a, const char *b, apr_size_t max_len) -{ - apr_size_t pos = 0; - -#if SVN_UNALIGNED_ACCESS_IS_OK - - /* Chunky processing is so much faster ... - * - * We can't make this work on architectures that require aligned access - * because A and B will probably have different alignment. So, skipping - * the first few chars until alignment is reached is not an option. - */ - for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t)) - if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos)) - break; - - pos -= sizeof(apr_size_t); - -#endif - - /* If we find a mismatch at -pos, pos-1 characters matched. - */ - while (++pos <= max_len) - if (a[0-pos] != b[0-pos]) - return pos - 1; - - /* No mismatch found -> at least MAX_LEN matching chars. - */ - return max_len; -} - - /* Try to find a match for the target data B in BLOCKS, and then extend the match as long as data in A and B at the match position continues to match. We set the position in A we ended up in (in @@ -323,9 +290,9 @@ find_match(const struct blocks *blocks, max_delta = asize - apos - MATCH_BLOCKSIZE < bsize - bpos - MATCH_BLOCKSIZE ? asize - apos - MATCH_BLOCKSIZE : bsize - bpos - MATCH_BLOCKSIZE; - delta = match_length(a + apos + MATCH_BLOCKSIZE, - b + bpos + MATCH_BLOCKSIZE, - max_delta); + delta = svn_cstring__match_length(a + apos + MATCH_BLOCKSIZE, + b + bpos + MATCH_BLOCKSIZE, + max_delta); /* See if we can extend backwards (max MATCH_BLOCKSIZE-1 steps because A's content has been sampled only every MATCH_BLOCKSIZE positions). */ @@ -362,7 +329,8 @@ store_delta_trailer(svn_txdelta__ops_baton_t *build_baton, if (max_len == 0) return; - end_match = reverse_match_length(a + asize, b + bsize, max_len); + end_match = svn_cstring__reverse_match_length(a + asize, b + bsize, + max_len); if (end_match <= 4) end_match = 0; @@ -409,12 +377,12 @@ compute_delta(svn_txdelta__ops_baton_t *build_baton, { struct blocks blocks; apr_uint32_t rolling; - apr_size_t lo = 0, pending_insert_start = 0; + apr_size_t lo = 0, pending_insert_start = 0, upper; /* Optimization: directly compare window starts. If more than 4 * bytes match, we can immediately create a matching windows. * Shorter sequences result in a net data increase. */ - lo = match_length(a, b, asize > bsize ? bsize : asize); + lo = svn_cstring__match_length(a, b, asize > bsize ? bsize : asize); if ((lo > 4) || (lo == bsize)) { svn_txdelta__insert_op(build_baton, svn_txdelta_source, @@ -432,19 +400,32 @@ compute_delta(svn_txdelta__ops_baton_t *build_baton, return; } + upper = bsize - MATCH_BLOCKSIZE; /* this is now known to be >= LO */ + /* Initialize the matches table. */ init_blocks_table(a, asize, &blocks, pool); /* Initialize our rolling checksum. */ rolling = init_adler32(b + lo); - while (lo < bsize) + while (lo < upper) { - apr_size_t matchlen = 0; + apr_size_t matchlen; apr_size_t apos; - if (lo + MATCH_BLOCKSIZE <= bsize) - matchlen = find_match(&blocks, rolling, a, asize, b, bsize, - &lo, &apos, pending_insert_start); + /* Quickly skip positions whose respective ROLLING checksums + definitely do not match any SLOT in BLOCKS. */ + while (!(blocks.flags[hash_flags(rolling)] & (1 << (rolling & 7))) + && lo < upper) + { + rolling = adler32_replace(rolling, b[lo], b[lo+MATCH_BLOCKSIZE]); + lo++; + } + + /* LO is still <= UPPER, i.e. the following lookup is legal: + Closely check whether we've got a match for the current location. + Due to the above pre-filter, chances are that we find one. */ + matchlen = find_match(&blocks, rolling, a, asize, b, bsize, + &lo, &apos, pending_insert_start); /* If we didn't find a real match, insert the byte at the target position into the pending insert. */ @@ -468,7 +449,8 @@ compute_delta(svn_txdelta__ops_baton_t *build_baton, { /* the match borders on the previous op. Maybe, we found a * match that is better than / overlapping the previous one. */ - apr_size_t len = reverse_match_length(a + apos, b + lo, apos < lo ? apos : lo); + apr_size_t len = svn_cstring__reverse_match_length + (a + apos, b + lo, apos < lo ? apos : lo); if (len > 0) { len = svn_txdelta__remove_copy(build_baton, len); |