diff options
author | unknown <tomas@poseidon.ndb.mysql.com> | 2005-07-12 20:01:22 +0200 |
---|---|---|
committer | unknown <tomas@poseidon.ndb.mysql.com> | 2005-07-12 20:01:22 +0200 |
commit | e4be45da93b3b64ede35053a7fa3989a40a2766e (patch) | |
tree | 8fd7b30e9e4cce3fdfc60e700fdb2d668f2f6449 /storage | |
parent | f9027b7a050186c504b9a5ab7e164c0cbaacb8f3 (diff) | |
parent | ff7db9bfd9f0c50d75f3085c6554ea8ecb9b906f (diff) | |
download | mariadb-git-e4be45da93b3b64ede35053a7fa3989a40a2766e.tar.gz |
Merge
BitKeeper/etc/logging_ok:
auto-union
BUILD/autorun.sh:
Auto merged
BitKeeper/etc/config:
Auto merged
Makefile.am:
Auto merged
include/my_bitmap.h:
Auto merged
libmysqld/Makefile.am:
Auto merged
mysql-test/mysql-test-run.pl:
Auto merged
mysql-test/mysql-test-run.sh:
Auto merged
mysql-test/r/grant.result:
Auto merged
mysql-test/r/ps_6bdb.result:
Auto merged
mysql-test/r/ps_7ndb.result:
Auto merged
mysys/Makefile.am:
Auto merged
mysys/default.c:
Auto merged
scripts/mysql_create_system_tables.sh:
Auto merged
scripts/mysql_fix_privilege_tables.sql:
Auto merged
sql/Makefile.am:
Auto merged
sql/field.cc:
Auto merged
sql/field.h:
Auto merged
sql/ha_ndbcluster.h:
Auto merged
sql/handler.cc:
Auto merged
sql/handler.h:
Auto merged
sql/item.cc:
Auto merged
sql/log_event.cc:
Auto merged
sql/mysql_priv.h:
Auto merged
sql/opt_range.cc:
Auto merged
sql/sql_acl.cc:
Auto merged
sql/sql_acl.h:
Auto merged
sql/sql_base.cc:
Auto merged
sql/sql_cache.cc:
Auto merged
sql/sql_class.h:
Auto merged
sql/sql_delete.cc:
Auto merged
sql/sql_lex.cc:
Auto merged
sql/sql_parse.cc:
Auto merged
sql/sql_table.cc:
Auto merged
sql/sql_udf.cc:
Auto merged
sql/sql_yacc.yy:
Auto merged
storage/heap/Makefile.am:
Auto merged
storage/heap/hp_hash.c:
Auto merged
storage/innobase/btr/btr0btr.c:
Auto merged
storage/innobase/btr/btr0cur.c:
Auto merged
storage/innobase/configure.in:
Auto merged
storage/innobase/fil/fil0fil.c:
Auto merged
storage/innobase/ibuf/ibuf0ibuf.c:
Auto merged
storage/innobase/include/os0file.h:
Auto merged
storage/innobase/include/page0cur.h:
Auto merged
storage/innobase/include/row0mysql.h:
Auto merged
storage/innobase/include/srv0srv.h:
Auto merged
storage/innobase/include/trx0trx.h:
Auto merged
storage/innobase/include/trx0trx.ic:
Auto merged
storage/innobase/lock/lock0lock.c:
Auto merged
storage/innobase/log/log0recv.c:
Auto merged
storage/innobase/os/os0file.c:
Auto merged
storage/innobase/page/page0cur.c:
Auto merged
storage/innobase/page/page0page.c:
Auto merged
storage/innobase/rem/rem0rec.c:
Auto merged
storage/innobase/row/row0mysql.c:
Auto merged
storage/innobase/row/row0sel.c:
Auto merged
storage/innobase/row/row0upd.c:
Auto merged
storage/innobase/srv/srv0srv.c:
Auto merged
storage/innobase/trx/trx0trx.c:
Auto merged
storage/innobase/trx/trx0undo.c:
Auto merged
storage/myisam/Makefile.am:
Auto merged
storage/myisam/mi_create.c:
Auto merged
storage/myisam/mi_open.c:
Auto merged
storage/myisam/mi_packrec.c:
Auto merged
storage/myisam/mi_unique.c:
Auto merged
storage/myisam/myisampack.c:
Auto merged
storage/myisammrg/Makefile.am:
Auto merged
storage/ndb/include/mgmcommon/ConfigRetriever.hpp:
Auto merged
storage/ndb/include/transporter/TransporterDefinitions.hpp:
Auto merged
storage/ndb/src/common/mgmcommon/ConfigRetriever.cpp:
Auto merged
storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp:
Auto merged
storage/ndb/src/mgmsrv/main.cpp:
Auto merged
storage/ndb/src/ndbapi/NdbDictionaryImpl.cpp:
Auto merged
storage/ndb/test/ndbapi/create_tab.cpp:
Auto merged
storage/ndb/test/ndbapi/testBlobs.cpp:
Auto merged
strings/ctype-big5.c:
Auto merged
strings/ctype-ucs2.c:
Auto merged
support-files/mysql.spec.sh:
Auto merged
configure.in:
merge
mysql-test/t/disabled.def:
merge
mysys/my_bitmap.c:
SCCS merged
sql/ha_federated.cc:
merge
sql/ha_innodb.cc:
merge
sql/ha_ndbcluster.cc:
merge
sql/mysqld.cc:
e
merge
sql/sql_insert.cc:
merge
sql/sql_lex.h:
merge
sql/sql_load.cc:
merge
sql/sql_select.cc:
merge
e
C
sql/sql_update.cc:
merge
sql/table.cc:
merge
Diffstat (limited to 'storage')
35 files changed, 1714 insertions, 398 deletions
diff --git a/storage/heap/Makefile.am b/storage/heap/Makefile.am index f9f0ba775ff..2890dc6ee23 100644 --- a/storage/heap/Makefile.am +++ b/storage/heap/Makefile.am @@ -14,7 +14,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -INCLUDES = -I$(top_srcdir)/include +INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include LDADD = libheap.a $(top_srcdir)/mysys/libmysys.a \ $(top_srcdir)/dbug/libdbug.a \ $(top_srcdir)/strings/libmystrings.a diff --git a/storage/heap/hp_hash.c b/storage/heap/hp_hash.c index 9e4636ebdc0..d643f776731 100644 --- a/storage/heap/hp_hash.c +++ b/storage/heap/hp_hash.c @@ -552,9 +552,9 @@ int hp_rec_key_cmp(HP_KEYDEF *keydef, const byte *rec1, const byte *rec2, if (cs->mbmaxlen > 1) { uint char_length= seg->length / cs->mbmaxlen; - char_length1= my_charpos(cs, pos1, pos1 + char_length1, char_length); + char_length1= my_charpos(cs, pos1, pos1 + char_length1, char_length1); set_if_smaller(char_length1, seg->length); - char_length2= my_charpos(cs, pos2, pos2 + char_length2, char_length); + char_length2= my_charpos(cs, pos2, pos2 + char_length2, char_length2); set_if_smaller(char_length2, seg->length); } diff --git a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c index 2d84586216a..c27fb73ff8d 100644 --- a/storage/innobase/btr/btr0btr.c +++ b/storage/innobase/btr/btr0btr.c @@ -143,7 +143,7 @@ btr_root_get( root_page_no = dict_tree_get_page(tree); root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr); - ut_a(!!page_is_comp(root) == + ut_a((ibool)!!page_is_comp(root) == UT_LIST_GET_FIRST(tree->tree_indexes)->table->comp); return(root); @@ -2014,7 +2014,7 @@ btr_compress( page = btr_cur_get_page(cursor); tree = btr_cur_get_tree(cursor); comp = page_is_comp(page); - ut_a(!!comp == cursor->index->table->comp); + ut_a((ibool)!!comp == cursor->index->table->comp); ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree), MTR_MEMO_X_LOCK)); @@ -2508,7 +2508,7 @@ btr_index_rec_validate( return(TRUE); } - if (UNIV_UNLIKELY(!!page_is_comp(page) != index->table->comp)) { + if (UNIV_UNLIKELY((ibool)!!page_is_comp(page) != index->table->comp)) { btr_index_rec_validate_report(page, rec, index); fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n", (ulong) !!page_is_comp(page), diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index 4ae27f007d6..f81cce5b8e9 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -316,7 +316,9 @@ btr_cur_search_to_nth_level( if (btr_search_latch.writer == RW_LOCK_NOT_LOCKED && latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ && !estimate +#ifdef PAGE_CUR_LE_OR_EXTENDS && mode != PAGE_CUR_LE_OR_EXTENDS +#endif /* PAGE_CUR_LE_OR_EXTENDS */ && srv_use_adaptive_hash_indexes && btr_search_guess_on_hash(index, info, tuple, mode, latch_mode, cursor, @@ -390,9 +392,12 @@ btr_cur_search_to_nth_level( page_mode = PAGE_CUR_LE; break; default: - ut_ad(mode == PAGE_CUR_L - || mode == PAGE_CUR_LE +#ifdef PAGE_CUR_LE_OR_EXTENDS + ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE || mode == PAGE_CUR_LE_OR_EXTENDS); +#else /* PAGE_CUR_LE_OR_EXTENDS */ + ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE); +#endif /* PAGE_CUR_LE_OR_EXTENDS */ page_mode = mode; break; } @@ -507,7 +512,7 @@ retry_page_get: /* x-latch the page */ page = btr_page_get(space, page_no, RW_X_LATCH, mtr); - ut_a(!!page_is_comp(page) + ut_a((ibool)!!page_is_comp(page) == index->table->comp); } @@ -1385,7 +1390,7 @@ btr_cur_parse_update_in_place( goto func_exit; } - ut_a(!!page_is_comp(page) == index->table->comp); + ut_a((ibool)!!page_is_comp(page) == index->table->comp); rec = page + rec_offset; /* We do not need to reserve btr_search_latch, as the page is only diff --git a/storage/innobase/configure.in b/storage/innobase/configure.in index baf11272ab9..c56bd8274c4 100644 --- a/storage/innobase/configure.in +++ b/storage/innobase/configure.in @@ -117,6 +117,13 @@ case "$target" in CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";; esac +# must go in pair with AR as set by MYSQL_CHECK_AR +if test -z "$ARFLAGS" +then + ARFLAGS="cru" +fi +AC_SUBST(ARFLAGS) + AC_OUTPUT(Makefile os/Makefile ut/Makefile btr/Makefile dnl buf/Makefile data/Makefile dnl dict/Makefile dyn/Makefile dnl diff --git a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c index 299b55f3d2b..20f522c1a60 100644 --- a/storage/innobase/fil/fil0fil.c +++ b/storage/innobase/fil/fil0fil.c @@ -3410,9 +3410,9 @@ fil_extend_space_to_desired_size( fil_space_t* space; byte* buf2; byte* buf; + ulint buf_size; ulint start_page_no; ulint file_start_page_no; - ulint n_pages; ulint offset_high; ulint offset_low; ibool success = TRUE; @@ -3437,22 +3437,20 @@ fil_extend_space_to_desired_size( fil_node_prepare_for_io(node, system, space); - /* Extend 1 MB at a time */ - - buf2 = mem_alloc(1024 * 1024 + UNIV_PAGE_SIZE); - buf = ut_align(buf2, UNIV_PAGE_SIZE); - - memset(buf, '\0', 1024 * 1024); - start_page_no = space->size; file_start_page_no = space->size - node->size; - while (start_page_no < size_after_extend) { - n_pages = size_after_extend - start_page_no; + /* Extend at most 64 pages at a time */ + buf_size = ut_min(64, size_after_extend - start_page_no) + * UNIV_PAGE_SIZE; + buf2 = mem_alloc(buf_size + UNIV_PAGE_SIZE); + buf = ut_align(buf2, UNIV_PAGE_SIZE); - if (n_pages > (1024 * 1024) / UNIV_PAGE_SIZE) { - n_pages = (1024 * 1024) / UNIV_PAGE_SIZE; - } + memset(buf, 0, buf_size); + + while (start_page_no < size_after_extend) { + ulint n_pages = ut_min(buf_size / UNIV_PAGE_SIZE, + size_after_extend - start_page_no); offset_high = (start_page_no - file_start_page_no) / (4096 * ((1024 * 1024) / UNIV_PAGE_SIZE)); @@ -4034,7 +4032,7 @@ fil_aio_wait( if (os_aio_use_native_aio) { srv_set_io_thread_op_info(segment, "native aio handle"); #ifdef WIN_ASYNC_IO - ret = os_aio_windows_handle(segment, 0, (void**) &fil_node, + ret = os_aio_windows_handle(segment, 0, &fil_node, &message, &type); #elif defined(POSIX_ASYNC_IO) ret = os_aio_posix_handle(segment, &fil_node, &message); diff --git a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c index 712d43f916c..d7fa48b6e66 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.c +++ b/storage/innobase/ibuf/ibuf0ibuf.c @@ -2810,7 +2810,7 @@ ibuf_insert_to_index_page( ut_ad(ibuf_inside()); ut_ad(dtuple_check_typed(entry)); - if (UNIV_UNLIKELY(index->table->comp != !!page_is_comp(page))) { + if (UNIV_UNLIKELY(index->table->comp != (ibool)!!page_is_comp(page))) { fputs( "InnoDB: Trying to insert a record from the insert buffer to an index page\n" "InnoDB: but the 'compact' flag does not match!\n", stderr); diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index e75281dd93c..362e3552411 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -373,7 +373,7 @@ os_file_get_size_as_iblonglong( /* out: size in bytes, -1 if error */ os_file_t file); /* in: handle to a file */ /*************************************************************************** -Sets a file size. This function can be used to extend or truncate a file. */ +Write the specified number of zeros to a newly created file. */ ibool os_file_set_size( diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h index e89e740e775..b03302b0e77 100644 --- a/storage/innobase/include/page0cur.h +++ b/storage/innobase/include/page0cur.h @@ -26,11 +26,13 @@ Created 10/4/1994 Heikki Tuuri #define PAGE_CUR_GE 2 #define PAGE_CUR_L 3 #define PAGE_CUR_LE 4 -#define PAGE_CUR_LE_OR_EXTENDS 5 /* This is a search mode used in +/*#define PAGE_CUR_LE_OR_EXTENDS 5*/ /* This is a search mode used in "column LIKE 'abc%' ORDER BY column DESC"; we have to find strings which are <= 'abc' or which extend it */ -#define PAGE_CUR_DBG 6 +#ifdef UNIV_SEARCH_DEBUG +# define PAGE_CUR_DBG 6 /* As PAGE_CUR_LE, but skips search shortcut */ +#endif /* UNIV_SEARCH_DEBUG */ #ifdef PAGE_CUR_ADAPT # ifdef UNIV_SEARCH_PERF_STAT diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h index 4bb9fa63cd1..4e6ff73b0f8 100644 --- a/storage/innobase/include/row0mysql.h +++ b/storage/innobase/include/row0mysql.h @@ -243,17 +243,27 @@ row_update_for_mysql( the MySQL format */ row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL handle */ - /************************************************************************* -Does an unlock of a row for MySQL. */ +This can only be used when srv_locks_unsafe_for_binlog is TRUE. Before +calling this function we must use trx_reset_new_rec_lock_info() and +trx_register_new_rec_lock() to store the information which new record locks +really were set. This function removes a newly set lock under prebuilt->pcur, +and also under prebuilt->clust_pcur. Currently, this is only used and tested +in the case of an UPDATE or a DELETE statement, where the row lock is of the +LOCK_X type. +Thus, this implements a 'mini-rollback' that releases the latest record +locks we set. */ int row_unlock_for_mysql( /*=================*/ /* out: error code or DB_SUCCESS */ - row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL + row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL handle */ - + ibool has_latches_on_recs);/* TRUE if called so that we have + the latches on the records under pcur + and clust_pcur, and we do not need to + reposition the cursors. */ /************************************************************************* Creates an query graph node of 'update' type to be used in the MySQL interface. */ diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 6e4241965c1..116ae7b6438 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -182,6 +182,7 @@ extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs, #define kernel_mutex (*kernel_mutex_temp) #define SRV_MAX_N_IO_THREADS 100 +#define SRV_CONCURRENCY_THRESHOLD 20 /* Array of English strings describing the current state of an i/o handler thread */ diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 8df50d6703d..146730d46f8 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -16,10 +16,39 @@ Created 3/26/1996 Heikki Tuuri #include "que0types.h" #include "mem0mem.h" #include "read0types.h" +#include "dict0types.h" #include "trx0xa.h" extern ulint trx_n_mysql_transactions; +/***************************************************************** +Resets the new record lock info in a transaction struct. */ +UNIV_INLINE +void +trx_reset_new_rec_lock_info( +/*========================*/ + trx_t* trx); /* in: transaction struct */ +/***************************************************************** +Registers that we have set a new record lock on an index. We only have space +to store 2 indexes! If this is called to store more than 2 indexes after +trx_reset_new_rec_lock_info(), then this function does nothing. */ +UNIV_INLINE +void +trx_register_new_rec_lock( +/*======================*/ + trx_t* trx, /* in: transaction struct */ + dict_index_t* index); /* in: trx sets a new record lock on this + index */ +/***************************************************************** +Checks if trx has set a new record lock on an index. */ +UNIV_INLINE +ibool +trx_new_rec_locks_contain( +/*======================*/ + /* out: TRUE if trx has set a new record lock + on index */ + trx_t* trx, /* in: transaction struct */ + dict_index_t* index); /* in: index */ /************************************************************************ Releases the search latch if trx has reserved it. */ @@ -495,8 +524,18 @@ struct trx_struct{ lock_t* auto_inc_lock; /* possible auto-inc lock reserved by the transaction; note that it is also in the lock list trx_locks */ - ibool trx_create_lock;/* this is TRUE if we have created a - new lock for a record accessed */ + dict_index_t* new_rec_locks[2];/* these are normally NULL; if + srv_locks_unsafe_for_binlog is TRUE, + in a cursor search, if we set a new + record lock on an index, this is set + to point to the index; this is + used in releasing the locks under the + cursors if we are performing an UPDATE + and we determine after retrieving + the row that it does not need to be + locked; thus, these can be used to + implement a 'mini-rollback' that + releases the latest record locks */ UT_LIST_NODE_T(trx_t) trx_list; /* list of transactions */ UT_LIST_NODE_T(trx_t) diff --git a/storage/innobase/include/trx0trx.ic b/storage/innobase/include/trx0trx.ic index 78e5acda148..54cf2ff331f 100644 --- a/storage/innobase/include/trx0trx.ic +++ b/storage/innobase/include/trx0trx.ic @@ -39,4 +39,60 @@ trx_start_if_not_started_low( } } +/***************************************************************** +Resets the new record lock info in a transaction struct. */ +UNIV_INLINE +void +trx_reset_new_rec_lock_info( +/*========================*/ + trx_t* trx) /* in: transaction struct */ +{ + trx->new_rec_locks[0] = NULL; + trx->new_rec_locks[1] = NULL; +} + +/***************************************************************** +Registers that we have set a new record lock on an index. We only have space +to store 2 indexes! If this is called to store more than 2 indexes after +trx_reset_new_rec_lock_info(), then this function does nothing. */ +UNIV_INLINE +void +trx_register_new_rec_lock( +/*======================*/ + trx_t* trx, /* in: transaction struct */ + dict_index_t* index) /* in: trx sets a new record lock on this + index */ +{ + if (trx->new_rec_locks[0] == NULL) { + trx->new_rec_locks[0] = index; + + return; + } + + if (trx->new_rec_locks[0] == index) { + return; + } + + if (trx->new_rec_locks[1] != NULL) { + + return; + } + + trx->new_rec_locks[1] = index; +} + +/***************************************************************** +Checks if trx has set a new record lock on an index. */ +UNIV_INLINE +ibool +trx_new_rec_locks_contain( +/*======================*/ + /* out: TRUE if trx has set a new record lock + on index */ + trx_t* trx, /* in: transaction struct */ + dict_index_t* index) /* in: index */ +{ + return(trx->new_rec_locks[0] == index + || trx->new_rec_locks[1] == index); +} diff --git a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c index 48db7ced0cb..280c4871ee9 100644 --- a/storage/innobase/lock/lock0lock.c +++ b/storage/innobase/lock/lock0lock.c @@ -956,7 +956,7 @@ lock_rec_has_to_wait( cause waits */ if ((lock_is_on_supremum || (type_mode & LOCK_GAP)) - && !(type_mode & LOCK_INSERT_INTENTION)) { + && !(type_mode & LOCK_INSERT_INTENTION)) { /* Gap type locks without LOCK_INSERT_INTENTION flag do not need to wait for anything. This is because @@ -1765,10 +1765,7 @@ lock_rec_create( lock_rec_set_nth_bit(lock, heap_no); HASH_INSERT(lock_t, hash, lock_sys->rec_hash, - lock_rec_fold(space, page_no), lock); - /* Note that we have create a new lock */ - trx->trx_create_lock = TRUE; - + lock_rec_fold(space, page_no), lock); if (type_mode & LOCK_WAIT) { lock_set_lock_and_trx_wait(lock, trx); @@ -1945,15 +1942,6 @@ lock_rec_add_to_queue( if (similar_lock && !somebody_waits && !(type_mode & LOCK_WAIT)) { - /* If the nth bit of a record lock is already set then we - do not set a new lock bit, otherwice we set */ - - if (lock_rec_get_nth_bit(similar_lock, heap_no)) { - trx->trx_create_lock = FALSE; - } else { - trx->trx_create_lock = TRUE; - } - lock_rec_set_nth_bit(similar_lock, heap_no); return(similar_lock); @@ -2005,11 +1993,14 @@ lock_rec_lock_fast( lock = lock_rec_get_first_on_page(rec); trx = thr_get_trx(thr); - trx->trx_create_lock = FALSE; if (lock == NULL) { if (!impl) { lock_rec_create(mode, rec, index, trx); + + if (srv_locks_unsafe_for_binlog) { + trx_register_new_rec_lock(trx, index); + } } return(TRUE); @@ -2021,23 +2012,22 @@ lock_rec_lock_fast( } if (lock->trx != trx - || lock->type_mode != (mode | LOCK_REC) - || lock_rec_get_n_bits(lock) <= heap_no) { + || lock->type_mode != (mode | LOCK_REC) + || lock_rec_get_n_bits(lock) <= heap_no) { + return(FALSE); } if (!impl) { + /* If the nth bit of the record lock is already set then we + do not set a new lock bit, otherwise we do set */ - /* If the nth bit of a record lock is already set then we - do not set a new lock bit, otherwice we set */ - - if (lock_rec_get_nth_bit(lock, heap_no)) { - trx->trx_create_lock = FALSE; - } else { - trx->trx_create_lock = TRUE; + if (!lock_rec_get_nth_bit(lock, heap_no)) { + lock_rec_set_nth_bit(lock, heap_no); + if (srv_locks_unsafe_for_binlog) { + trx_register_new_rec_lock(trx, index); + } } - - lock_rec_set_nth_bit(lock, heap_no); } return(TRUE); @@ -2093,12 +2083,19 @@ lock_rec_lock_slow( enough already granted on the record, we have to wait. */ err = lock_rec_enqueue_waiting(mode, rec, index, thr); + + if (srv_locks_unsafe_for_binlog) { + trx_register_new_rec_lock(trx, index); + } } else { if (!impl) { /* Set the requested lock on the record */ lock_rec_add_to_queue(LOCK_REC | mode, rec, index, trx); + if (srv_locks_unsafe_for_binlog) { + trx_register_new_rec_lock(trx, index); + } } err = DB_SUCCESS; @@ -2436,8 +2433,15 @@ lock_rec_inherit_to_gap( lock = lock_rec_get_first(rec); + /* If srv_locks_unsafe_for_binlog is TRUE, we do not want locks set + by an UPDATE or a DELETE to be inherited as gap type locks. But we + DO want S-locks set by a consistency constraint to be inherited also + then. */ + while (lock != NULL) { - if (!lock_rec_get_insert_intention(lock)) { + if (!lock_rec_get_insert_intention(lock) + && !(srv_locks_unsafe_for_binlog + && lock_get_mode(lock) == LOCK_X)) { lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock) | LOCK_GAP, @@ -3069,7 +3073,7 @@ lock_update_insert( lock_rec_inherit_to_gap_if_gap_lock(rec, page_rec_get_next(rec)); lock_mutex_exit_kernel(); -} +} /***************************************************************** Updates the lock table when a record is removed. */ diff --git a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c index 8d9780bfbda..42e854398ba 100644 --- a/storage/innobase/log/log0recv.c +++ b/storage/innobase/log/log0recv.c @@ -768,7 +768,8 @@ recv_parse_or_apply_log_rec_body( case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT: if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, type == MLOG_COMP_REC_INSERT, &index))) { - ut_a(!page||!!page_is_comp(page)==index->table->comp); + ut_a(!page + || (ibool)!!page_is_comp(page)==index->table->comp); ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr, index, page, mtr); } @@ -776,7 +777,8 @@ recv_parse_or_apply_log_rec_body( case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK: if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, type == MLOG_COMP_REC_CLUST_DELETE_MARK, &index))) { - ut_a(!page||!!page_is_comp(page)==index->table->comp); + ut_a(!page + || (ibool)!!page_is_comp(page)==index->table->comp); ptr = btr_cur_parse_del_mark_set_clust_rec(ptr, end_ptr, index, page); } @@ -796,7 +798,8 @@ recv_parse_or_apply_log_rec_body( case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE: if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, type == MLOG_COMP_REC_UPDATE_IN_PLACE, &index))) { - ut_a(!page||!!page_is_comp(page)==index->table->comp); + ut_a(!page + || (ibool)!!page_is_comp(page)==index->table->comp); ptr = btr_cur_parse_update_in_place(ptr, end_ptr, page, index); } @@ -806,7 +809,8 @@ recv_parse_or_apply_log_rec_body( if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, type == MLOG_COMP_LIST_END_DELETE || type == MLOG_COMP_LIST_START_DELETE, &index))) { - ut_a(!page||!!page_is_comp(page)==index->table->comp); + ut_a(!page + || (ibool)!!page_is_comp(page)==index->table->comp); ptr = page_parse_delete_rec_list(type, ptr, end_ptr, index, page, mtr); } @@ -814,7 +818,8 @@ recv_parse_or_apply_log_rec_body( case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED: if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, type == MLOG_COMP_LIST_END_COPY_CREATED, &index))) { - ut_a(!page||!!page_is_comp(page)==index->table->comp); + ut_a(!page + || (ibool)!!page_is_comp(page)==index->table->comp); ptr = page_parse_copy_rec_list_to_created_page(ptr, end_ptr, index, page, mtr); } @@ -822,7 +827,8 @@ recv_parse_or_apply_log_rec_body( case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE: if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, type == MLOG_COMP_PAGE_REORGANIZE, &index))) { - ut_a(!page||!!page_is_comp(page)==index->table->comp); + ut_a(!page + || (ibool)!!page_is_comp(page)==index->table->comp); ptr = btr_parse_page_reorganize(ptr, end_ptr, index, page, mtr); } @@ -855,7 +861,8 @@ recv_parse_or_apply_log_rec_body( case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE: if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, type == MLOG_COMP_REC_DELETE, &index))) { - ut_a(!page||!!page_is_comp(page)==index->table->comp); + ut_a(!page + || (ibool)!!page_is_comp(page)==index->table->comp); ptr = page_cur_parse_delete_rec(ptr, end_ptr, index, page, mtr); } diff --git a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c index c68f5738798..48dc808e36c 100644 --- a/storage/innobase/os/os0file.c +++ b/storage/innobase/os/os0file.c @@ -1653,7 +1653,7 @@ os_file_get_size_as_iblonglong( } /*************************************************************************** -Sets a file size. This function can be used to extend or truncate a file. */ +Write the specified number of zeros to a newly created file. */ ibool os_file_set_size( @@ -1666,44 +1666,46 @@ os_file_set_size( size */ ulint size_high)/* in: most significant 32 bits of size */ { - ib_longlong offset; - ib_longlong low; - ulint n_bytes; + ib_longlong current_size; + ib_longlong desired_size; ibool ret; byte* buf; byte* buf2; + ulint buf_size; ut_a(size == (size & 0xFFFFFFFF)); - /* We use a very big 8 MB buffer in writing because Linux may be - extremely slow in fsync on 1 MB writes */ + current_size = 0; + desired_size = (ib_longlong)size + (((ib_longlong)size_high) << 32); - buf2 = ut_malloc(UNIV_PAGE_SIZE * 513); + /* Write up to 1 megabyte at a time. */ + buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE)) + * UNIV_PAGE_SIZE; + buf2 = ut_malloc(buf_size + UNIV_PAGE_SIZE); /* Align the buffer for possible raw i/o */ buf = ut_align(buf2, UNIV_PAGE_SIZE); /* Write buffer full of zeros */ - memset(buf, 0, UNIV_PAGE_SIZE * 512); + memset(buf, 0, buf_size); - offset = 0; - low = (ib_longlong)size + (((ib_longlong)size_high) << 32); - - if (low >= (ib_longlong)(100 * 1024 * 1024)) { + if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) { fprintf(stderr, "InnoDB: Progress in MB:"); } - while (offset < low) { - if (low - offset < UNIV_PAGE_SIZE * 512) { - n_bytes = (ulint)(low - offset); - } else { - n_bytes = UNIV_PAGE_SIZE * 512; - } - + while (current_size < desired_size) { + ulint n_bytes; + + if (desired_size - current_size < (ib_longlong) buf_size) { + n_bytes = (ulint) (desired_size - current_size); + } else { + n_bytes = buf_size; + } + ret = os_file_write(name, file, buf, - (ulint)(offset & 0xFFFFFFFF), - (ulint)(offset >> 32), + (ulint)(current_size & 0xFFFFFFFF), + (ulint)(current_size >> 32), n_bytes); if (!ret) { ut_free(buf2); @@ -1711,18 +1713,18 @@ os_file_set_size( } /* Print about progress for each 100 MB written */ - if ((ib_longlong) (offset + n_bytes) / (ib_longlong)(100 * 1024 * 1024) - != offset / (ib_longlong)(100 * 1024 * 1024)) { + if ((current_size + n_bytes) / (ib_longlong)(100 * 1024 * 1024) + != current_size / (ib_longlong)(100 * 1024 * 1024)) { fprintf(stderr, " %lu00", - (ulong) ((offset + n_bytes) + (ulong) ((current_size + n_bytes) / (ib_longlong)(100 * 1024 * 1024))); } - offset += n_bytes; + current_size += n_bytes; } - if (low >= (ib_longlong)(100 * 1024 * 1024)) { + if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) { fprintf(stderr, "\n"); } @@ -3296,7 +3298,7 @@ os_aio( ibool retval; BOOL ret = TRUE; DWORD len = (DWORD) n; - void* dummy_mess1; + struct fil_node_struct * dummy_mess1; void* dummy_mess2; ulint dummy_type; #endif diff --git a/storage/innobase/page/page0cur.c b/storage/innobase/page/page0cur.c index df6d898d4ac..d0b89e81787 100644 --- a/storage/innobase/page/page0cur.c +++ b/storage/innobase/page/page0cur.c @@ -47,7 +47,6 @@ page_cur_try_search_shortcut( not yet completely matched */ page_cur_t* cursor) /* out: page cursor */ { - int cmp; rec_t* rec; rec_t* next_rec; ulint low_match; @@ -79,9 +78,8 @@ page_cur_try_search_shortcut( up_match = low_match; up_bytes = low_bytes; - cmp = page_cmp_dtuple_rec_with_match(tuple, rec, offsets, &low_match, - &low_bytes); - if (cmp == -1) { + if (page_cmp_dtuple_rec_with_match(tuple, rec, offsets, + &low_match, &low_bytes) < 0) { goto exit_func; } @@ -89,9 +87,8 @@ page_cur_try_search_shortcut( offsets = rec_get_offsets(next_rec, index, offsets, dtuple_get_n_fields(tuple), &heap); - cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets, - &up_match, &up_bytes); - if (cmp != -1) { + if (page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets, + &up_match, &up_bytes) >= 0) { goto exit_func; } @@ -115,7 +112,7 @@ page_cur_try_search_shortcut( ut_a(*ilow_matched_fields == low_match); ut_a(*ilow_matched_bytes == low_bytes); #endif - if (next_rec != page_get_supremum_rec(page)) { + if (!page_rec_is_supremum(next_rec)) { *iup_matched_fields = up_match; *iup_matched_bytes = up_bytes; @@ -137,6 +134,7 @@ exit_func: #endif +#ifdef PAGE_CUR_LE_OR_EXTENDS /******************************************************************** Checks if the nth field in a record is a character type field which extends the nth field in tuple, i.e., the field is longer or equal in length and has @@ -185,6 +183,7 @@ page_cur_rec_field_extends( return(FALSE); } +#endif /* PAGE_CUR_LE_OR_EXTENDS */ /******************************************************************** Searches the right position for a page cursor. */ @@ -239,10 +238,17 @@ page_cur_search_with_match( && ilow_matched_fields && ilow_matched_bytes && cursor); ut_ad(dtuple_validate(tuple)); ut_ad(dtuple_check_typed(tuple)); +#ifdef UNIV_DEBUG +# ifdef PAGE_CUR_DBG + if (mode != PAGE_CUR_DBG) +# endif /* PAGE_CUR_DBG */ +# ifdef PAGE_CUR_LE_OR_EXTENDS + if (mode != PAGE_CUR_LE_OR_EXTENDS) +# endif /* PAGE_CUR_LE_OR_EXTENDS */ ut_ad((mode == PAGE_CUR_L) || (mode == PAGE_CUR_LE) - || (mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE) - || (mode == PAGE_CUR_LE_OR_EXTENDS) || (mode == PAGE_CUR_DBG)); - + || (mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)); +#endif /* UNIV_DEBUG */ + page_check_dir(page); #ifdef PAGE_CUR_ADAPT @@ -261,16 +267,18 @@ page_cur_search_with_match( return; } } -/*#ifdef UNIV_SEARCH_DEBUG */ +# ifdef PAGE_CUR_DBG if (mode == PAGE_CUR_DBG) { mode = PAGE_CUR_LE; } -/*#endif */ +# endif #endif /* The following flag does not work for non-latin1 char sets because cmp_full_field does not tell how many bytes matched */ +#ifdef PAGE_CUR_LE_OR_EXTENDS ut_a(mode != PAGE_CUR_LE_OR_EXTENDS); +#endif /* PAGE_CUR_LE_OR_EXTENDS */ /* If mode PAGE_CUR_G is specified, we are trying to position the cursor to answer a query of the form "tuple < X", where tuple is @@ -308,33 +316,36 @@ page_cur_search_with_match( cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets, &cur_matched_fields, &cur_matched_bytes); - if (cmp == 1) { + if (UNIV_LIKELY(cmp > 0)) { +low_slot_match: low = mid; low_matched_fields = cur_matched_fields; low_matched_bytes = cur_matched_bytes; - } else if (cmp == -1) { + } else if (UNIV_LIKELY(cmp /* == -1 */)) { +#ifdef PAGE_CUR_LE_OR_EXTENDS if (mode == PAGE_CUR_LE_OR_EXTENDS && page_cur_rec_field_extends(tuple, mid_rec, offsets, cur_matched_fields)) { - low = mid; - low_matched_fields = cur_matched_fields; - low_matched_bytes = cur_matched_bytes; - } else { - up = mid; - up_matched_fields = cur_matched_fields; - up_matched_bytes = cur_matched_bytes; - } - } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE - || mode == PAGE_CUR_LE_OR_EXTENDS) { - low = mid; - low_matched_fields = cur_matched_fields; - low_matched_bytes = cur_matched_bytes; - } else { + goto low_slot_match; + } +#endif /* PAGE_CUR_LE_OR_EXTENDS */ +up_slot_match: up = mid; up_matched_fields = cur_matched_fields; up_matched_bytes = cur_matched_bytes; + + } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE +#ifdef PAGE_CUR_LE_OR_EXTENDS + || mode == PAGE_CUR_LE_OR_EXTENDS +#endif /* PAGE_CUR_LE_OR_EXTENDS */ + ) { + + goto low_slot_match; + } else { + + goto up_slot_match; } } @@ -360,32 +371,35 @@ page_cur_search_with_match( cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets, &cur_matched_fields, &cur_matched_bytes); - if (cmp == 1) { + if (UNIV_LIKELY(cmp > 0)) { +low_rec_match: low_rec = mid_rec; low_matched_fields = cur_matched_fields; low_matched_bytes = cur_matched_bytes; - } else if (cmp == -1) { + } else if (UNIV_LIKELY(cmp /* == -1 */)) { +#ifdef PAGE_CUR_LE_OR_EXTENDS if (mode == PAGE_CUR_LE_OR_EXTENDS && page_cur_rec_field_extends(tuple, mid_rec, offsets, cur_matched_fields)) { - low_rec = mid_rec; - low_matched_fields = cur_matched_fields; - low_matched_bytes = cur_matched_bytes; - } else { - up_rec = mid_rec; - up_matched_fields = cur_matched_fields; - up_matched_bytes = cur_matched_bytes; + + goto low_rec_match; } - } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE - || mode == PAGE_CUR_LE_OR_EXTENDS) { - low_rec = mid_rec; - low_matched_fields = cur_matched_fields; - low_matched_bytes = cur_matched_bytes; - } else { +#endif /* PAGE_CUR_LE_OR_EXTENDS */ +up_rec_match: up_rec = mid_rec; up_matched_fields = cur_matched_fields; up_matched_bytes = cur_matched_bytes; + } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE +#ifdef PAGE_CUR_LE_OR_EXTENDS + || mode == PAGE_CUR_LE_OR_EXTENDS +#endif /* PAGE_CUR_LE_OR_EXTENDS */ + ) { + + goto low_rec_match; + } else { + + goto up_rec_match; } } diff --git a/storage/innobase/page/page0page.c b/storage/innobase/page/page0page.c index 1fe7f1d9356..7e09cdf073e 100644 --- a/storage/innobase/page/page0page.c +++ b/storage/innobase/page/page0page.c @@ -483,7 +483,7 @@ page_copy_rec_list_end_no_locks( page_cur_move_to_next(&cur1); } - ut_a(!!page_is_comp(new_page) == index->table->comp); + ut_a((ibool)!!page_is_comp(new_page) == index->table->comp); ut_a(page_is_comp(new_page) == page_is_comp(page)); ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint) (page_is_comp(new_page) @@ -1347,7 +1347,7 @@ page_print_list( ulint* offsets = offsets_; *offsets_ = (sizeof offsets_) / sizeof *offsets_; - ut_a(!!page_is_comp(page) == index->table->comp); + ut_a((ibool)!!page_is_comp(page) == index->table->comp); fprintf(stderr, "--------------------------------\n" @@ -1741,7 +1741,7 @@ page_validate( ulint* offsets = NULL; ulint* old_offsets = NULL; - if (!!comp != index->table->comp) { + if ((ibool)!!comp != index->table->comp) { fputs("InnoDB: 'compact format' flag mismatch\n", stderr); goto func_exit2; } diff --git a/storage/innobase/rem/rem0rec.c b/storage/innobase/rem/rem0rec.c index 580a7bfe509..fbc33aea669 100644 --- a/storage/innobase/rem/rem0rec.c +++ b/storage/innobase/rem/rem0rec.c @@ -601,30 +601,38 @@ rec_set_nth_field_extern_bit_new( /* read the lengths of fields 0..n */ for (i = 0; i < n_fields; i++) { - ibool is_null; - ulint len; field = dict_index_get_nth_field(index, i); type = dict_col_get_type(dict_field_get_col(field)); - is_null = !(dtype_get_prtype(type) & DATA_NOT_NULL); - if (is_null) { - /* nullable field => read the null flag */ - is_null = !!(*nulls & null_mask); + if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) { + if (UNIV_UNLIKELY(!(byte) null_mask)) { + nulls--; + null_mask = 1; + } + + if (*nulls & null_mask) { + null_mask <<= 1; + /* NULL fields cannot be external. */ + ut_ad(i != ith); + continue; + } + null_mask <<= 1; - if (null_mask == 0x100) - nulls--, null_mask = 1; } - if (is_null || field->fixed_len) { - /* No length (or extern bit) is stored for - fields that are NULL or fixed-length. */ + if (field->fixed_len) { + /* fixed-length fields cannot be external + (Fixed-length fields longer than + DICT_MAX_COL_PREFIX_LEN will be treated as + variable-length ones in dict_index_add_col().) */ ut_ad(i != ith); continue; } - len = *lens--; + lens--; if (dtype_get_len(type) > 255 || dtype_get_mtype(type) == DATA_BLOB) { + ulint len = lens[1]; if (len & 0x80) { /* 1exxxxxx: 2-byte length */ if (i == ith) { - if (!val == !(len & 0x20)) { + if (!val == !(len & 0x40)) { return; /* no change */ } /* toggle the extern bit */ @@ -823,6 +831,7 @@ rec_convert_dtuple_to_rec_new( byte* lens; ulint len; ulint i; + ulint n_node_ptr_field; ulint fixed_len; ulint null_mask = 1; const ulint n_fields = dtuple_get_n_fields(dtuple); @@ -831,16 +840,26 @@ rec_convert_dtuple_to_rec_new( ut_ad(index->table->comp); ut_ad(n_fields > 0); - switch (status) { + + /* Try to ensure that the memset() between the for() loops + completes fast. The address is not exact, but UNIV_PREFETCH + should never generate a memory fault. */ + UNIV_PREFETCH_RW(rec - REC_N_NEW_EXTRA_BYTES - n_fields); + UNIV_PREFETCH_RW(rec); + + switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) { case REC_STATUS_ORDINARY: ut_ad(n_fields <= dict_index_get_n_fields(index)); + n_node_ptr_field = ULINT_UNDEFINED; break; case REC_STATUS_NODE_PTR: ut_ad(n_fields == dict_index_get_n_unique_in_tree(index) + 1); + n_node_ptr_field = n_fields - 1; break; case REC_STATUS_INFIMUM: case REC_STATUS_SUPREMUM: ut_ad(n_fields == 1); + n_node_ptr_field = ULINT_UNDEFINED; goto init; default: ut_a(0); @@ -852,15 +871,18 @@ rec_convert_dtuple_to_rec_new( rec += (index->n_nullable + 7) / 8; for (i = 0; i < n_fields; i++) { + if (UNIV_UNLIKELY(i == n_node_ptr_field)) { +#ifdef UNIV_DEBUG + field = dtuple_get_nth_field(dtuple, i); + type = dfield_get_type(field); + ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL); + ut_ad(dfield_get_len(field) == 4); +#endif /* UNIV_DEBUG */ + goto init; + } field = dtuple_get_nth_field(dtuple, i); type = dfield_get_type(field); len = dfield_get_len(field); - if (status == REC_STATUS_NODE_PTR && i == n_fields - 1) { - fixed_len = 4; - ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL); - ut_ad(len == 4); - continue; - } fixed_len = dict_index_get_nth_field(index, i)->fixed_len; if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) { @@ -902,27 +924,33 @@ init: type = dfield_get_type(field); len = dfield_get_len(field); - if (status == REC_STATUS_NODE_PTR && i == n_fields - 1) { - fixed_len = 4; + if (UNIV_UNLIKELY(i == n_node_ptr_field)) { ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL); ut_ad(len == 4); - goto copy; + memcpy(end, dfield_get_data(field), len); + break; } fixed_len = dict_index_get_nth_field(index, i)->fixed_len; if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) { /* nullable field */ ut_ad(index->n_nullable > 0); + + if (UNIV_UNLIKELY(!(byte) null_mask)) { + nulls--; + null_mask = 1; + } + ut_ad(*nulls < null_mask); + /* set the null flag if necessary */ if (len == UNIV_SQL_NULL) { *nulls |= null_mask; + null_mask <<= 1; + continue; } + null_mask <<= 1; - if (null_mask == 0x100) - nulls--, null_mask = 1; - if (len == UNIV_SQL_NULL) - continue; } /* only nullable fields can be null */ ut_ad(len != UNIV_SQL_NULL); @@ -942,7 +970,7 @@ init: *lens-- = (byte) len; } } - copy: + memcpy(end, dfield_get_data(field), len); end += len; } @@ -1105,7 +1133,6 @@ rec_copy_prefix_to_buf( dtype_t* type; ulint i; ulint prefix_len; - ibool is_null; ulint null_mask; ulint status; @@ -1146,20 +1173,22 @@ rec_copy_prefix_to_buf( for (i = 0; i < n_fields; i++) { field = dict_index_get_nth_field(index, i); type = dict_col_get_type(dict_field_get_col(field)); - is_null = !(dtype_get_prtype(type) & DATA_NOT_NULL); - if (is_null) { + if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) { /* nullable field => read the null flag */ - is_null = !!(*nulls & null_mask); - null_mask <<= 1; - if (null_mask == 0x100) { - --nulls; - UNIV_PREFETCH_R(nulls); + if (UNIV_UNLIKELY(!(byte) null_mask)) { + nulls--; null_mask = 1; } + + if (*nulls & null_mask) { + null_mask <<= 1; + continue; + } + + null_mask <<= 1; } - if (is_null) { - } else if (field->fixed_len) { + if (field->fixed_len) { prefix_len += field->fixed_len; } else { ulint len = *lens--; diff --git a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c index eb50b83a4d5..2ac0824b331 100644 --- a/storage/innobase/row/row0mysql.c +++ b/storage/innobase/row/row0mysql.c @@ -1429,51 +1429,106 @@ run_again: } /************************************************************************* -Does an unlock of a row for MySQL. */ +This can only be used when srv_locks_unsafe_for_binlog is TRUE. Before +calling this function we must use trx_reset_new_rec_lock_info() and +trx_register_new_rec_lock() to store the information which new record locks +really were set. This function removes a newly set lock under prebuilt->pcur, +and also under prebuilt->clust_pcur. Currently, this is only used and tested +in the case of an UPDATE or a DELETE statement, where the row lock is of the +LOCK_X type. +Thus, this implements a 'mini-rollback' that releases the latest record +locks we set. */ int row_unlock_for_mysql( /*=================*/ /* out: error code or DB_SUCCESS */ - row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL + row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL handle */ + ibool has_latches_on_recs)/* TRUE if called so that we have + the latches on the records under pcur + and clust_pcur, and we do not need to + reposition the cursors. */ { - rec_t* rec; - btr_pcur_t* cur = prebuilt->pcur; + dict_index_t* index; + btr_pcur_t* pcur = prebuilt->pcur; + btr_pcur_t* clust_pcur = prebuilt->clust_pcur; trx_t* trx = prebuilt->trx; + rec_t* rec; mtr_t mtr; ut_ad(prebuilt && trx); ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); - + + if (!srv_locks_unsafe_for_binlog) { + + fprintf(stderr, +"InnoDB: Error: calling row_unlock_for_mysql though\n" +"InnoDB: srv_locks_unsafe_for_binlog is FALSE.\n"); + + return(DB_SUCCESS); + } + trx->op_info = "unlock_row"; - - if (srv_locks_unsafe_for_binlog) { - if (trx->trx_create_lock == TRUE) { - mtr_start(&mtr); + index = btr_pcur_get_btr_cur(pcur)->index; + + if (index != NULL && trx_new_rec_locks_contain(trx, index)) { + + mtr_start(&mtr); - /* Restore a cursor position and find a record */ - btr_pcur_restore_position(BTR_SEARCH_LEAF, cur, &mtr); - rec = btr_pcur_get_rec(cur); + /* Restore the cursor position and find the record */ + + if (!has_latches_on_recs) { + btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, &mtr); + } - if (rec) { + rec = btr_pcur_get_rec(pcur); - lock_rec_reset_and_release_wait(rec); - } else { - fputs("InnoDB: Error: " - "Record for the lock not found\n", - stderr); - mem_analyze_corruption((byte*) trx); - ut_error; - } + mutex_enter(&kernel_mutex); - trx->trx_create_lock = FALSE; - mtr_commit(&mtr); + lock_rec_reset_and_release_wait(rec); + + mutex_exit(&kernel_mutex); + + mtr_commit(&mtr); + + /* If the search was done through the clustered index, then + we have not used clust_pcur at all, and we must NOT try to + reset locks on clust_pcur. The values in clust_pcur may be + garbage! */ + + if (index->type & DICT_CLUSTERED) { + + goto func_exit; } - + } + + index = btr_pcur_get_btr_cur(clust_pcur)->index; + + if (index != NULL && trx_new_rec_locks_contain(trx, index)) { + + mtr_start(&mtr); + + /* Restore the cursor position and find the record */ + + if (!has_latches_on_recs) { + btr_pcur_restore_position(BTR_SEARCH_LEAF, clust_pcur, + &mtr); + } + + rec = btr_pcur_get_rec(clust_pcur); + + mutex_enter(&kernel_mutex); + + lock_rec_reset_and_release_wait(rec); + + mutex_exit(&kernel_mutex); + + mtr_commit(&mtr); } +func_exit: trx->op_info = ""; return(DB_SUCCESS); diff --git a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c index c7a548fe448..15439bed7e7 100644 --- a/storage/innobase/row/row0sel.c +++ b/storage/innobase/row/row0sel.c @@ -2784,6 +2784,10 @@ sel_restore_position_for_mysql( process the record the cursor is now positioned on (i.e. we should not go to the next record yet) */ + ibool* same_user_rec, /* out: TRUE if we were able to restore + the cursor on a user record with the + same ordering prefix in in the + B-tree index */ ulint latch_mode, /* in: latch mode wished in restoration */ btr_pcur_t* pcur, /* in: cursor whose position @@ -2800,6 +2804,8 @@ sel_restore_position_for_mysql( success = btr_pcur_restore_position(latch_mode, pcur, mtr); + *same_user_rec = success; + if (relative_position == BTR_PCUR_ON) { if (success) { return(FALSE); @@ -3064,10 +3070,12 @@ row_search_for_mysql( ulint cnt = 0; #endif /* UNIV_SEARCH_DEBUG */ ulint next_offs; + ibool same_user_rec; mtr_t mtr; mem_heap_t* heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; + *offsets_ = (sizeof offsets_) / sizeof *offsets_; ut_ad(index && pcur && search_tuple); @@ -3138,6 +3146,16 @@ row_search_for_mysql( trx->search_latch_timeout = BTR_SEA_TIMEOUT; } + /* Reset the new record lock info if we srv_locks_unsafe_for_binlog + is set. Then we are able to remove the record locks set here on an + individual row. */ + + if (srv_locks_unsafe_for_binlog + && prebuilt->select_lock_type != LOCK_NONE) { + + trx_reset_new_rec_lock_info(trx); + } + /*-------------------------------------------------------------*/ /* PHASE 1: Try to pop the row from the prefetch cache */ @@ -3396,8 +3414,9 @@ shortcut_fails_too_big_rec: clust_index = dict_table_get_first_index(index->table); if (UNIV_LIKELY(direction != 0)) { - if (!sel_restore_position_for_mysql(BTR_SEARCH_LEAF, pcur, - moves_up, &mtr)) { + if (!sel_restore_position_for_mysql(&same_user_rec, + BTR_SEARCH_LEAF, + pcur, moves_up, &mtr)) { goto next_rec; } @@ -3659,7 +3678,7 @@ rec_loop: goto normal_return; } } - + /* We are ready to look at a possible new index entry in the result set: the cursor is now placed on a user record */ @@ -3679,6 +3698,7 @@ rec_loop: || srv_locks_unsafe_for_binlog || (unique_search && !UNIV_UNLIKELY(rec_get_deleted_flag( rec, page_rec_is_comp(rec))))) { + goto no_gap_lock; } else { lock_type = LOCK_ORDINARY; @@ -3701,7 +3721,7 @@ rec_loop: && dtuple_get_n_fields_cmp(search_tuple) == dict_index_get_n_unique(index) && 0 == cmp_dtuple_rec(search_tuple, rec, offsets)) { - no_gap_lock: +no_gap_lock: lock_type = LOCK_REC_NOT_GAP; } @@ -3764,6 +3784,7 @@ rec_loop: /* Get the clustered index record if needed */ index_rec = rec; ut_ad(index != clust_index); + goto requires_clust_rec; } } @@ -3773,6 +3794,17 @@ rec_loop: /* The record is delete-marked: we can skip it if this is not a consistent read which might see an earlier version of a non-clustered index record */ + + if (srv_locks_unsafe_for_binlog + && prebuilt->select_lock_type != LOCK_NONE) { + + /* No need to keep a lock on a delete-marked record + if we do not want to use next-key locking. */ + + row_unlock_for_mysql(prebuilt, TRUE); + + trx_reset_new_rec_lock_info(trx); + } goto next_rec; } @@ -3783,7 +3815,8 @@ rec_loop: index_rec = rec; if (index != clust_index && prebuilt->need_to_access_clustered) { - requires_clust_rec: + +requires_clust_rec: /* Before and after this "if" block, "offsets" will be related to "rec", which may be in a secondary index "index" or the clustered index ("clust_index"). However, after this @@ -3816,6 +3849,18 @@ rec_loop: /* The record is delete marked: we can skip it */ + if (srv_locks_unsafe_for_binlog + && prebuilt->select_lock_type != LOCK_NONE) { + + /* No need to keep a lock on a delete-marked + record if we do not want to use next-key + locking. */ + + row_unlock_for_mysql(prebuilt, TRUE); + + trx_reset_new_rec_lock_info(trx); + } + goto next_rec; } @@ -3908,7 +3953,7 @@ got_row: next_rec: /*-------------------------------------------------------------*/ /* PHASE 5: Move the cursor to the next index record */ - + if (UNIV_UNLIKELY(mtr_has_extra_clust_latch)) { /* We must commit mtr if we are moving to the next non-clustered index record, because we could break the @@ -3921,8 +3966,9 @@ next_rec: mtr_has_extra_clust_latch = FALSE; mtr_start(&mtr); - if (sel_restore_position_for_mysql(BTR_SEARCH_LEAF, pcur, - moves_up, &mtr)) { + if (sel_restore_position_for_mysql(&same_user_rec, + BTR_SEARCH_LEAF, + pcur, moves_up, &mtr)) { #ifdef UNIV_SEARCH_DEBUG cnt++; #endif /* UNIV_SEARCH_DEBUG */ @@ -3973,11 +4019,34 @@ lock_wait_or_error: thr->lock_state = QUE_THR_LOCK_ROW; if (row_mysql_handle_errors(&err, trx, thr, NULL)) { + /* It was a lock wait, and it ended */ + thr->lock_state = QUE_THR_LOCK_NOLOCK; mtr_start(&mtr); - sel_restore_position_for_mysql(BTR_SEARCH_LEAF, pcur, - moves_up, &mtr); + sel_restore_position_for_mysql(&same_user_rec, + BTR_SEARCH_LEAF, pcur, + moves_up, &mtr); + if (srv_locks_unsafe_for_binlog && !same_user_rec) { + /* Since we were not able to restore the cursor + on the same user record, we cannot use + row_unlock_for_mysql() to unlock any records, and + we must thus reset the new rec lock info. Since + in lock0lock.c we have blocked the inheriting of gap + X-locks, we actually do not have any new record locks + set in this case. + + Note that if we were able to restore on the 'same' + user record, it is still possible that we were actually + waiting on a delete-marked record, and meanwhile + it was removed by purge and inserted again by some + other user. But that is no problem, because in + rec_loop we will again try to set a lock, and + new_rec_lock_info in trx will be right at the end. */ + + trx_reset_new_rec_lock_info(trx); + } + mode = pcur->search_mode; goto rec_loop; diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c index cf2b8db5d32..514fb6bd577 100644 --- a/storage/innobase/row/row0upd.c +++ b/storage/innobase/row/row0upd.c @@ -818,7 +818,7 @@ row_upd_build_difference_binary( extern_bit = upd_ext_vec_contains(ext_vec, n_ext_vec, i); if (UNIV_UNLIKELY(extern_bit == - !rec_offs_nth_extern(offsets, i)) + (ibool)!rec_offs_nth_extern(offsets, i)) || !dfield_data_is_binary_equal(dfield, len, data)) { upd_field = upd_get_nth_field(update, n_diff); diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index f901425a5f9..837c5be2bb6 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -260,7 +260,7 @@ semaphore contention and convoy problems can occur withput this restriction. Value 10 should be good if there are less than 4 processors + 4 disks in the computer. Bigger computers need bigger values. */ -ulong srv_thread_concurrency = 8; +ulong srv_thread_concurrency = SRV_CONCURRENCY_THRESHOLD; os_fast_mutex_t srv_conc_mutex; /* this mutex protects srv_conc data structures */ @@ -983,12 +983,6 @@ srv_conc_enter_innodb( srv_conc_slot_t* slot = NULL; ulint i; - if (srv_thread_concurrency >= 500) { - /* Disable the concurrency check */ - - return; - } - /* If trx has 'free tickets' to enter the engine left, then use one such ticket */ @@ -1134,7 +1128,7 @@ srv_conc_force_enter_innodb( trx_t* trx) /* in: transaction object associated with the thread */ { - if (srv_thread_concurrency >= 500) { + if (srv_thread_concurrency >= SRV_CONCURRENCY_THRESHOLD) { return; } @@ -1160,7 +1154,7 @@ srv_conc_force_exit_innodb( { srv_conc_slot_t* slot = NULL; - if (srv_thread_concurrency >= 500) { + if (srv_thread_concurrency >= SRV_CONCURRENCY_THRESHOLD) { return; } @@ -1212,11 +1206,6 @@ srv_conc_exit_innodb( trx_t* trx) /* in: transaction object associated with the thread */ { - if (srv_thread_concurrency >= 500) { - - return; - } - if (trx->n_tickets_to_enter_innodb > 0) { /* We will pretend the thread is still inside InnoDB though it now leaves the InnoDB engine. In this way we save diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c index 9e155ee1de0..10fbf3468c0 100644 --- a/storage/innobase/trx/trx0trx.c +++ b/storage/innobase/trx/trx0trx.c @@ -166,6 +166,8 @@ trx_create( memset(&trx->xid, 0, sizeof(trx->xid)); trx->xid.formatID = -1; + trx_reset_new_rec_lock_info(trx); + return(trx); } diff --git a/storage/innobase/trx/trx0undo.c b/storage/innobase/trx/trx0undo.c index c14e4a1f3ab..7441dd3f152 100644 --- a/storage/innobase/trx/trx0undo.c +++ b/storage/innobase/trx/trx0undo.c @@ -559,14 +559,14 @@ trx_undo_write_xid( const XID* xid, /* in: X/Open XA Transaction Identification */ mtr_t* mtr) /* in: mtr */ { - mlog_write_ulint(log_hdr + TRX_UNDO_XA_FORMAT, xid->formatID, - MLOG_4BYTES, mtr); + mlog_write_ulint(log_hdr + TRX_UNDO_XA_FORMAT, + (ulint)xid->formatID, MLOG_4BYTES, mtr); - mlog_write_ulint(log_hdr + TRX_UNDO_XA_TRID_LEN, xid->gtrid_length, - MLOG_4BYTES, mtr); + mlog_write_ulint(log_hdr + TRX_UNDO_XA_TRID_LEN, + (ulint)xid->gtrid_length, MLOG_4BYTES, mtr); - mlog_write_ulint(log_hdr + TRX_UNDO_XA_BQUAL_LEN, xid->bqual_length, - MLOG_4BYTES, mtr); + mlog_write_ulint(log_hdr + TRX_UNDO_XA_BQUAL_LEN, + (ulint)xid->bqual_length, MLOG_4BYTES, mtr); mlog_write_string(log_hdr + TRX_UNDO_XA_XID, (const byte*) xid->data, XIDDATASIZE, mtr); @@ -581,18 +581,14 @@ trx_undo_read_xid( trx_ulogf_t* log_hdr,/* in: undo log header */ XID* xid) /* out: X/Open XA Transaction Identification */ { - ulint i; - - xid->formatID = mach_read_from_4(log_hdr + TRX_UNDO_XA_FORMAT); + xid->formatID = (long)mach_read_from_4(log_hdr + TRX_UNDO_XA_FORMAT); - xid->gtrid_length = mach_read_from_4(log_hdr + TRX_UNDO_XA_TRID_LEN); - - xid->bqual_length = mach_read_from_4(log_hdr + TRX_UNDO_XA_BQUAL_LEN); + xid->gtrid_length = + (long)mach_read_from_4(log_hdr + TRX_UNDO_XA_TRID_LEN); + xid->bqual_length = + (long)mach_read_from_4(log_hdr + TRX_UNDO_XA_BQUAL_LEN); - for (i = 0; i < XIDDATASIZE; i++) { - xid->data[i] = (char)mach_read_from_1(log_hdr + - TRX_UNDO_XA_XID + i); - } + memcpy(xid->data, log_hdr + TRX_UNDO_XA_XID, XIDDATASIZE); } /******************************************************************* diff --git a/storage/myisam/Makefile.am b/storage/myisam/Makefile.am index c61647b25f7..e4327070997 100644 --- a/storage/myisam/Makefile.am +++ b/storage/myisam/Makefile.am @@ -17,7 +17,7 @@ EXTRA_DIST = mi_test_all.sh mi_test_all.res pkgdata_DATA = mi_test_all mi_test_all.res -INCLUDES = -I$(top_srcdir)/include +INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include LDADD = @CLIENT_EXTRA_LDFLAGS@ libmyisam.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ diff --git a/storage/myisam/mi_create.c b/storage/myisam/mi_create.c index 47332b02a72..33b344405ec 100644 --- a/storage/myisam/mi_create.c +++ b/storage/myisam/mi_create.c @@ -770,10 +770,13 @@ err: uint mi_get_pointer_length(ulonglong file_length, uint def) { + DBUG_ASSERT(def >= 2 && def <= 7); if (file_length) /* If not default */ { +#ifdef NOT_YET_READY_FOR_8_BYTE_POINTERS if (file_length >= (longlong) 1 << 56) def=8; +#endif if (file_length >= (longlong) 1 << 48) def=7; if (file_length >= (longlong) 1 << 40) diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c index 456b87b7799..74b97a65609 100644 --- a/storage/myisam/mi_open.c +++ b/storage/myisam/mi_open.c @@ -78,7 +78,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) int lock_error,kfile,open_mode,save_errno,have_rtree=0; uint i,j,len,errpos,head_length,base_pos,offset,info_length,keys, key_parts,unique_key_parts,fulltext_keys,uniques; - char name_buff[FN_REFLEN], org_name [FN_REFLEN], index_name[FN_REFLEN], + char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN], data_name[FN_REFLEN]; char *disk_cache, *disk_pos, *end_pos; MI_INFO info,*m_info,*old_info; diff --git a/storage/myisam/mi_packrec.c b/storage/myisam/mi_packrec.c index 4b512dd89dd..c251e4dda4a 100644 --- a/storage/myisam/mi_packrec.c +++ b/storage/myisam/mi_packrec.c @@ -416,8 +416,19 @@ static uint find_longest_bitstream(uint16 *table, uint16 *end) } - /* Read record from datafile */ - /* Returns length of packed record, -1 if error */ +/* + Read record from datafile. + + SYNOPSIS + _mi_read_pack_record() + info A pointer to MI_INFO. + filepos File offset of the record. + buf RETURN The buffer to receive the record. + + RETURN + 0 on success + HA_ERR_WRONG_IN_RECORD or -1 on error +*/ int _mi_read_pack_record(MI_INFO *info, my_off_t filepos, byte *buf) { diff --git a/storage/myisam/mi_unique.c b/storage/myisam/mi_unique.c index f2d5f01be25..34f5f595f30 100644 --- a/storage/myisam/mi_unique.c +++ b/storage/myisam/mi_unique.c @@ -64,7 +64,12 @@ my_bool mi_check_unique(MI_INFO *info, MI_UNIQUEDEF *def, byte *record, } -/* Calculate a hash for a row */ +/* + Calculate a hash for a row + + TODO + Add support for bit fields +*/ ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const byte *record) { @@ -126,9 +131,17 @@ ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const byte *record) return crc; } - /* - Returns 0 if both rows have equal unique value - */ + +/* + compare unique key for two rows + + TODO + Add support for bit fields + + RETURN + 0 if both rows have equal unique value + # Rows are different +*/ int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b, my_bool null_are_equal) diff --git a/storage/myisam/myisampack.c b/storage/myisam/myisampack.c index 74bb541b220..daf2bbdf85c 100644 --- a/storage/myisam/myisampack.c +++ b/storage/myisam/myisampack.c @@ -33,10 +33,10 @@ #include <my_getopt.h> #include <assert.h> -#if INT_MAX > 32767 -#define BITS_SAVED 32 +#if SIZEOF_LONG_LONG > 4 +#define BITS_SAVED 64 #else -#define BITS_SAVED 16 +#define BITS_SAVED 32 #endif #define IS_OFFSET ((uint) 32768) /* Bit if offset or char in tree */ @@ -49,10 +49,10 @@ struct st_file_buffer { File file; - char *buffer,*pos,*end; + uchar *buffer,*pos,*end; my_off_t pos_in_file; int bits; - uint current_byte; + ulonglong bitbucket; }; struct st_huff_tree; @@ -69,13 +69,17 @@ typedef struct st_huff_counts { my_off_t end_space[8]; my_off_t pre_space[8]; my_off_t tot_end_space,tot_pre_space,zero_fields,empty_fields,bytes_packed; - TREE int_tree; - byte *tree_buff; - byte *tree_pos; + TREE int_tree; /* Tree for detecting distinct column values. */ + byte *tree_buff; /* Column values, 'field_length' each. */ + byte *tree_pos; /* Points to end of column values in 'tree_buff'. */ } HUFF_COUNTS; typedef struct st_huff_element HUFF_ELEMENT; +/* + WARNING: It is crucial for the optimizations in calc_packed_length() + that 'count' is the first element of 'HUFF_ELEMENT'. +*/ struct st_huff_element { my_off_t count; union un_element { @@ -98,7 +102,7 @@ typedef struct st_huff_tree { my_off_t bytes_packed; uint tree_pack_length; uint min_chr,max_chr,char_bits,offset_bits,max_offset,height; - ulong *code; + ulonglong *code; uchar *code_len; } HUFF_TREE; @@ -146,7 +150,7 @@ static uint join_same_trees(HUFF_COUNTS *huff_counts,uint trees); static int make_huff_decode_table(HUFF_TREE *huff_tree,uint trees); static void make_traverse_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element,uint size, - ulong code); + ulonglong code); static int write_header(PACK_MRG_INFO *isam_file, uint header_length,uint trees, my_off_t tot_elements,my_off_t filelength); static void write_field_info(HUFF_COUNTS *counts, uint fields,uint trees); @@ -161,7 +165,7 @@ static char *make_old_name(char *new_name,char *old_name); static void init_file_buffer(File file,pbool read_buffer); static int flush_buffer(ulong neaded_length); static void end_file_buffer(void); -static void write_bits(ulong value,uint bits); +static void write_bits(ulonglong value, uint bits); static void flush_bits(void); static int save_state(MI_INFO *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length, ha_checksum crc); @@ -170,13 +174,23 @@ static int save_state_mrg(File file,PACK_MRG_INFO *isam_file,my_off_t new_length static int mrg_close(PACK_MRG_INFO *mrg); static int mrg_rrnd(PACK_MRG_INFO *info,byte *buf); static void mrg_reset(PACK_MRG_INFO *mrg); +#if !defined(DBUG_OFF) +static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count); +static int fakecmp(my_off_t **count1, my_off_t **count2); +#endif static int error_on_write=0,test_only=0,verbose=0,silent=0, write_loop=0,force_pack=0, isamchk_neaded=0; static int tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL; static my_bool backup, opt_wait; -static uint tree_buff_length=8196-MALLOC_OVERHEAD; +/* + tree_buff_length is somewhat arbitrary. The bigger it is the better + the chance to win in terms of compression factor. On the other hand, + this table becomes part of the compressed file header. And its length + is coded with 16 bits in the header. Hence the limit is 2**16 - 1. +*/ +static uint tree_buff_length= 65536 - MALLOC_OVERHEAD; static char tmp_dir[FN_REFLEN]={0},*join_table; static my_off_t intervall_length; static ha_checksum glob_crc; @@ -225,7 +239,8 @@ int main(int argc, char **argv) } if (ok && isamchk_neaded && !silent) puts("Remember to run myisamchk -rq on compressed tables"); - VOID(fflush(stdout)); VOID(fflush(stderr)); + VOID(fflush(stdout)); + VOID(fflush(stderr)); free_defaults(default_argv); my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR); exit(error ? 2 : 0); @@ -260,7 +275,7 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"test", 't', "Don't pack table, only test packing it.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"verbose", 'v', "Write info about progress and packing result.", + {"verbose", 'v', "Write info about progress and packing result. Use many -v for more verbosity!", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"version", 'V', "Output version information and exit.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, @@ -273,7 +288,8 @@ static struct my_option my_long_options[] = static void print_version(void) { - printf("%s Ver 1.22 for %s on %s\n", my_progname, SYSTEM_TYPE, MACHINE_TYPE); + VOID(printf("%s Ver 1.23 for %s on %s\n", + my_progname, SYSTEM_TYPE, MACHINE_TYPE)); NETWARE_SET_SCREEN_MODE(1); } @@ -290,7 +306,7 @@ static void usage(void) puts("afterwards to update the keys."); puts("You should give the .MYI file as the filename argument."); - printf("\nUsage: %s [OPTIONS] filename...\n", my_progname); + VOID(printf("\nUsage: %s [OPTIONS] filename...\n", my_progname)); my_print_help(my_long_options); print_defaults("my", load_default_groups); my_print_variables(my_long_options); @@ -314,7 +330,10 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), silent= 1; break; case 't': - test_only= verbose= 1; + test_only= 1; + /* Avoid to reset 'verbose' if it was already set > 1. */ + if (! verbose) + verbose= 1; break; case 'T': length= (uint) (strmov(tmp_dir, argument) - tmp_dir); @@ -325,7 +344,7 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), } break; case 'v': - verbose= 1; + verbose++; /* Allow for selecting the level of verbosity. */ silent= 0; break; case '#': @@ -380,7 +399,7 @@ static MI_INFO *open_isam_file(char *name,int mode) (opt_wait ? HA_OPEN_WAIT_IF_LOCKED : HA_OPEN_ABORT_IF_LOCKED)))) { - VOID(fprintf(stderr,"%s gave error %d on open\n",name,my_errno)); + VOID(fprintf(stderr, "%s gave error %d on open\n", name, my_errno)); DBUG_RETURN(0); } share=isam_file->s; @@ -388,7 +407,7 @@ static MI_INFO *open_isam_file(char *name,int mode) { if (!force_pack) { - VOID(fprintf(stderr,"%s is already compressed\n",name)); + VOID(fprintf(stderr, "%s is already compressed\n", name)); VOID(mi_close(isam_file)); DBUG_RETURN(0); } @@ -400,7 +419,7 @@ static MI_INFO *open_isam_file(char *name,int mode) (share->state.state.records <= 1 || share->state.state.data_file_length < 1024)) { - VOID(fprintf(stderr,"%s is too small to compress\n",name)); + VOID(fprintf(stderr, "%s is too small to compress\n", name)); VOID(mi_close(isam_file)); DBUG_RETURN(0); } @@ -446,8 +465,8 @@ static bool open_isam_files(PACK_MRG_INFO *mrg,char **names,uint count) return 0; diff_file: - fprintf(stderr,"%s: Tables '%s' and '%s' are not identical\n", - my_progname,names[j],names[j+1]); + VOID(fprintf(stderr, "%s: Tables '%s' and '%s' are not identical\n", + my_progname, names[j], names[j+1])); error: while (i--) mi_close(mrg->file[i]); @@ -518,16 +537,25 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table) mrg->records=0; for (i=0 ; i < mrg->count ; i++) mrg->records+=mrg->file[i]->s->state.state.records; + + DBUG_PRINT("info", ("Compressing %s: (%lu records)", + result_table ? new_name : org_name, + (ulong) mrg->records)); if (write_loop || verbose) { - printf("Compressing %s: (%lu records)\n", - result_table ? new_name : org_name,(ulong) mrg->records); + VOID(printf("Compressing %s: (%lu records)\n", + result_table ? new_name : org_name, (ulong) mrg->records)); } trees=fields=share->base.fields; huff_counts=init_huff_count(isam_file,mrg->records); QUICK_SAFEMALLOC; + + /* + Read the whole data file(s) for statistics. + */ + DBUG_PRINT("info", ("- Calculating statistics")); if (write_loop || verbose) - printf("- Calculating statistics\n"); + VOID(printf("- Calculating statistics\n")); if (get_statistic(mrg,huff_counts)) goto err; NORMAL_SAFEMALLOC; @@ -536,29 +564,74 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table) old_length+= (mrg->file[i]->s->state.state.data_file_length - mrg->file[i]->s->state.state.empty); + /* + Create a global priority queue in preparation for making + temporary Huffman trees. + */ if (init_queue(&queue,256,0,0,compare_huff_elements,0)) goto err; + + /* + Check each column if we should use pre-space-compress, end-space- + compress, empty-field-compress or zero-field-compress. + */ check_counts(huff_counts,fields,mrg->records); + + /* + Build a Huffman tree for each column. + */ huff_trees=make_huff_trees(huff_counts,trees); + + /* + If the packed lengths of combined columns is less then the sum of + the non-combined columns, then create common Huffman trees for them. + We do this only for byte compressed columns, not for distinct values + compressed columns. + */ if ((int) (used_trees=join_same_trees(huff_counts,trees)) < 0) goto err; + + /* + Assign codes to all byte or column values. + */ if (make_huff_decode_table(huff_trees,fields)) goto err; + /* Prepare a file buffer. */ init_file_buffer(new_file,0); + + /* + Reserve space in the target file for the fixed compressed file header. + */ file_buffer.pos_in_file=HEAD_LENGTH; if (! test_only) VOID(my_seek(new_file,file_buffer.pos_in_file,MY_SEEK_SET,MYF(0))); + /* + Write field infos: field type, pack type, length bits, tree number. + */ write_field_info(huff_counts,fields,used_trees); + + /* + Write decode trees. + */ if (!(tot_elements=write_huff_tree(huff_trees,trees))) goto err; + + /* + Calculate the total length of the compression info header. + This includes the fixed compressed file header, the column compression + type descriptions, and the decode trees. + */ header_length=(uint) file_buffer.pos_in_file+ (uint) (file_buffer.pos-file_buffer.buffer); - /* Compress file */ + /* + Compress the source file into the target file. + */ + DBUG_PRINT("info", ("- Compressing file")); if (write_loop || verbose) - printf("- Compressing file\n"); + VOID(printf("- Compressing file\n")); error=compress_isam_file(mrg,huff_counts); new_length=file_buffer.pos_in_file; if (!error && !test_only) @@ -568,16 +641,28 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table) error=my_write(file_buffer.file,buff,sizeof(buff), MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0; } + + /* + Write the fixed compressed file header. + */ if (!error) error=write_header(mrg,header_length,used_trees,tot_elements, new_length); + + /* Flush the file buffer. */ end_file_buffer(); + /* Display statistics. */ + DBUG_PRINT("info", ("Min record length: %6d Max length: %6d " + "Mean total length: %6ld\n", + mrg->min_pack_length, mrg->max_pack_length, + (ulong) (mrg->records ? (new_length/mrg->records) : 0))); if (verbose && mrg->records) - printf("Min record length: %6d Max length: %6d Mean total length: %6ld\n", - mrg->min_pack_length,mrg->max_pack_length, - (ulong) (new_length/mrg->records)); + VOID(printf("Min record length: %6d Max length: %6d " + "Mean total length: %6ld\n", mrg->min_pack_length, + mrg->max_pack_length, (ulong) (new_length/mrg->records))); + /* Close source and target file. */ if (!test_only) { error|=my_close(new_file,MYF(MY_WME)); @@ -588,6 +673,7 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table) } } + /* Cleanup. */ free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields); if (! test_only && ! error) { @@ -629,15 +715,16 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table) error|=my_close(join_isam_file,MYF(MY_WME)); if (error) { - VOID(fprintf(stderr,"Aborting: %s is not compressed\n",org_name)); + VOID(fprintf(stderr, "Aborting: %s is not compressed\n", org_name)); VOID(my_delete(new_name,MYF(MY_WME))); DBUG_RETURN(-1); } if (write_loop || verbose) { if (old_length) - printf("%.4g%% \n", (((longlong) (old_length -new_length))*100.0/ - (longlong) old_length)); + VOID(printf("%.4g%% \n", + (((longlong) (old_length - new_length)) * 100.0 / + (longlong) old_length))); else puts("Empty file saved in compressed format"); } @@ -650,7 +737,7 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table) if (join_isam_file >= 0) VOID(my_close(join_isam_file,MYF(0))); mrg_close(mrg); - VOID(fprintf(stderr,"Aborted: %s is not compressed\n",org_name)); + VOID(fprintf(stderr, "Aborted: %s is not compressed\n", org_name)); DBUG_RETURN(-1); } @@ -677,6 +764,12 @@ static HUFF_COUNTS *init_huff_count(MI_INFO *info,my_off_t records) (type == FIELD_NORMAL || type == FIELD_SKIP_ZERO)) count[i].max_zero_fill= count[i].field_length; + /* + For every column initialize a tree, which is used to detect distinct + column values. 'int_tree' works together with 'tree_buff' and + 'tree_pos'. It's keys are implemented by pointers into 'tree_buff'. + This is accomplished by '-1' as the element size. + */ init_tree(&count[i].int_tree,0,0,-1,(qsort_cmp2) compare_tree,0, NULL, NULL); if (records && type != FIELD_BLOB && type != FIELD_VARCHAR) @@ -762,10 +855,13 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) ulong tot_blob_length=0; if (! error) { + /* glob_crc is a checksum over all bytes of all records. */ if (static_row_size) glob_crc+=mi_static_checksum(mrg->file[0],record); else glob_crc+=mi_checksum(mrg->file[0],record); + + /* Count the incidence of values separately for every column. */ for (pos=record,count=huff_counts ; count < end_count ; count++, @@ -773,15 +869,48 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) { next_pos=end_pos=(start_pos=pos)+count->field_length; - /* Put value in tree if there is room for it */ + /* + Put the whole column value in a tree if there is room for it. + 'int_tree' is used to quickly check for duplicate values. + 'tree_buff' collects as many distinct column values as + possible. If the field length is > 1, it is tree_buff_length, + else 2 bytes. Each value is 'field_length' bytes big. If there + are more distinct column values than fit into the buffer, we + give up with this tree. BLOBs and VARCHARs do not have a + tree_buff as it can only be used with fixed length columns. + For the special case of field length == 1, we handle only the + case that there is only one distinct value in the table(s). + Otherwise, we can have a maximum of 256 distinct values. This + is then handled by the normal Huffman tree build. + + Another limit for collecting distinct column values is the + number of values itself. Since we would need to build a + Huffman tree for the values, we are limited by the 'IS_OFFSET' + constant. This constant expresses a bit which is used to + determine if a tree element holds a final value or an offset + to a child element. Hence, all values and offsets need to be + smaller than 'IS_OFFSET'. A tree element is implemented with + two integer values, one for the left branch and one for the + right branch. For the extreme case that the first element + points to the last element, the number of integers in the tree + must be less or equal to IS_OFFSET. So the number of elements + must be less or equal to IS_OFFSET / 2. + + WARNING: At first, we insert a pointer into the record buffer + as the key for the tree. If we got a new distinct value, which + is really inserted into the tree, instead of being counted + only, we will copy the column value from the record buffer to + 'tree_buff' and adjust the key pointer of the tree accordingly. + */ if (count->tree_buff) { global_count=count; if (!(element=tree_insert(&count->int_tree,pos, 0, count->int_tree.custom_arg)) || (element->count == 1 && - count->tree_buff + tree_buff_length < - count->tree_pos + count->field_length) || + (count->tree_buff + tree_buff_length < + count->tree_pos + count->field_length)) || + (count->int_tree.elements_in_tree > IS_OFFSET / 2) || (count->field_length == 1 && count->int_tree.elements_in_tree > 1)) { @@ -791,10 +920,17 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) } else { + /* + If tree_insert() succeeds, it either creates a new element + or increments the counter of an existing element. + */ if (element->count == 1) - { /* New element */ + { + /* Copy the new column value into 'tree_buff'. */ memcpy(count->tree_pos,pos,(size_t) count->field_length); + /* Adjust the key pointer in the tree. */ tree_set_pointer(element,count->tree_pos); + /* Point behind the last column value so far. */ count->tree_pos+=count->field_length; } } @@ -804,15 +940,21 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) if (count->field_type == FIELD_NORMAL || count->field_type == FIELD_SKIP_ENDSPACE) { + /* Ignore trailing space. */ for ( ; end_pos > pos ; end_pos--) if (end_pos[-1] != ' ') break; + /* Empty fields are just counted. Go to the next record. */ if (end_pos == pos) { count->empty_fields++; count->max_zero_fill=0; continue; } + /* + Count the total of all trailing spaces and the number of + short trailing spaces. Remember the longest trailing space. + */ length= (uint) (next_pos-end_pos); count->tot_end_space+=length; if (length < 8) @@ -820,18 +962,25 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) if (count->max_end_space < length) count->max_end_space = length; } + if (count->field_type == FIELD_NORMAL || count->field_type == FIELD_SKIP_PRESPACE) { + /* Ignore leading space. */ for (pos=start_pos; pos < end_pos ; pos++) if (pos[0] != ' ') break; + /* Empty fields are just counted. Go to the next record. */ if (end_pos == pos) { count->empty_fields++; count->max_zero_fill=0; continue; } + /* + Count the total of all leading spaces and the number of + short leading spaces. Remember the longest leading space. + */ length= (uint) (pos-start_pos); count->tot_pre_space+=length; if (length < 8) @@ -839,6 +988,8 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) if (count->max_pre_space < length) count->max_pre_space = length; } + + /* Calculate pos, end_pos, and max_length for variable length fields. */ if (count->field_type == FIELD_BLOB) { uint field_length=count->field_length -mi_portable_sizeof_char_ptr; @@ -857,45 +1008,121 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) end_pos= pos+length; set_if_bigger(count->max_length,length); } + + /* Evaluate 'max_zero_fill' for short fields. */ if (count->field_length <= 8 && (count->field_type == FIELD_NORMAL || count->field_type == FIELD_SKIP_ZERO)) { uint i; + /* Zero fields are just counted. Go to the next record. */ if (!memcmp((byte*) start_pos,zero_string,count->field_length)) { count->zero_fields++; continue; } + /* + max_zero_fill starts with field_length. It is decreased every + time a shorter "zero trailer" is found. It is set to zero when + an empty field is found (see above). This suggests that the + variable should be called 'min_zero_fill'. + */ for (i =0 ; i < count->max_zero_fill && ! end_pos[-1 - (int) i] ; i++) ; if (i < count->max_zero_fill) count->max_zero_fill=i; } + + /* Ignore zero fields and check fields. */ if (count->field_type == FIELD_ZERO || count->field_type == FIELD_CHECK) continue; + + /* + Count the incidence of every byte value in the + significant field value. + */ for ( ; pos < end_pos ; pos++) count->counts[(uchar) *pos]++; + + /* Step to next field. */ } + if (tot_blob_length > max_blob_length) max_blob_length=tot_blob_length; record_count++; if (write_loop && record_count % WRITE_COUNT == 0) { - printf("%lu\r",(ulong) record_count); VOID(fflush(stdout)); + VOID(printf("%lu\r", (ulong) record_count)); + VOID(fflush(stdout)); } } else if (error != HA_ERR_RECORD_DELETED) { - fprintf(stderr,"Got error %d while reading rows",error); + VOID(fprintf(stderr, "Got error %d while reading rows", error)); break; } + + /* Step to next record. */ } if (write_loop) { - printf(" \r"); VOID(fflush(stdout)); + VOID(printf(" \r")); + VOID(fflush(stdout)); } + + /* + If --debug=d,fakebigcodes is set, fake the counts to get big Huffman + codes. + */ + DBUG_EXECUTE_IF("fakebigcodes", fakebigcodes(huff_counts, end_count);); + + DBUG_PRINT("info", ("Found the following number of incidents " + "of the byte codes:")); + if (verbose >= 2) + VOID(printf("Found the following number of incidents " + "of the byte codes:\n")); + for (count= huff_counts ; count < end_count; count++) + { + uint idx; + my_off_t total_count; + char llbuf[32]; + + DBUG_PRINT("info", ("column: %3u", count - huff_counts + 1)); + if (verbose >= 2) + VOID(printf("column: %3u\n", count - huff_counts + 1)); + if (count->tree_buff) + { + DBUG_PRINT("info", ("number of distinct values: %u", + (count->tree_pos - count->tree_buff) / + count->field_length)); + if (verbose >= 2) + VOID(printf("number of distinct values: %u\n", + (count->tree_pos - count->tree_buff) / + count->field_length)); + } + total_count= 0; + for (idx= 0; idx < 256; idx++) + { + if (count->counts[idx]) + { + total_count+= count->counts[idx]; + DBUG_PRINT("info", ("counts[0x%02x]: %12s", idx, + llstr((longlong) count->counts[idx], llbuf))); + if (verbose >= 2) + VOID(printf("counts[0x%02x]: %12s\n", idx, + llstr((longlong) count->counts[idx], llbuf))); + } + } + DBUG_PRINT("info", ("total: %12s", llstr((longlong) total_count, + llbuf))); + if ((verbose >= 2) && total_count) + { + VOID(printf("total: %12s\n", + llstr((longlong) total_count, llbuf))); + } + } + mrg->records=record_count; mrg->max_blob_length=max_blob_length; my_afree((gptr) record); @@ -944,9 +1171,14 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees, huff_counts->field_type=FIELD_NORMAL; huff_counts->pack_type=0; + /* Check for zero-filled records (in this column), or zero records. */ if (huff_counts->zero_fields || ! records) { my_off_t old_space_count; + /* + If there are only zero filled records (in this column), + or no records at all, we are done. + */ if (huff_counts->zero_fields == records) { huff_counts->field_type= FIELD_ZERO; @@ -954,14 +1186,22 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees, huff_counts->counts[0]=0; goto found_pack; } + /* Remeber the number of significant spaces. */ old_space_count=huff_counts->counts[' ']; - huff_counts->counts[' ']+=huff_counts->tot_end_space+ - huff_counts->tot_pre_space + - huff_counts->empty_fields * huff_counts->field_length; + /* Add all leading and trailing spaces. */ + huff_counts->counts[' ']+= (huff_counts->tot_end_space + + huff_counts->tot_pre_space + + huff_counts->empty_fields * + huff_counts->field_length); + /* Check, what the compressed length of this would be. */ old_length=calc_packed_length(huff_counts,0)+records/8; + /* Get the number of zero bytes. */ length=huff_counts->zero_fields*huff_counts->field_length; + /* Add it to the counts. */ huff_counts->counts[0]+=length; + /* Check, what the compressed length of this would be. */ new_length=calc_packed_length(huff_counts,0); + /* If the compression without the zeroes would be shorter, we are done. */ if (old_length < new_length && huff_counts->field_length > 1) { huff_counts->field_type=FIELD_SKIP_ZERO; @@ -969,9 +1209,16 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees, huff_counts->bytes_packed=old_length- records/8; goto found_pack; } + /* Remove the insignificant spaces, but keep the zeroes. */ huff_counts->counts[' ']=old_space_count; } + /* Check, what the compressed length of this column would be. */ huff_counts->bytes_packed=calc_packed_length(huff_counts,0); + + /* + If there are enough empty records (in this column), + treating them specially may pay off. + */ if (huff_counts->empty_fields) { if (huff_counts->field_length > 2 && @@ -1003,6 +1250,11 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees, } } } + + /* + If there are enough trailing spaces (in this column), + treating them specially may pay off. + */ if (huff_counts->tot_end_space) { huff_counts->counts[' ']+=huff_counts->tot_pre_space; @@ -1012,6 +1264,11 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees, goto found_pack; huff_counts->counts[' ']-=huff_counts->tot_pre_space; } + + /* + If there are enough leading spaces (in this column), + treating them specially may pay off. + */ if (huff_counts->tot_pre_space) { if (test_space_compress(huff_counts,records,huff_counts->max_pre_space, @@ -1041,6 +1298,8 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees, { HUFF_TREE tree; + DBUG_EXECUTE_IF("forceintervall", + huff_counts->bytes_packed= ~ (my_off_t) 0;); tree.element_buffer=0; if (!make_huff_tree(&tree,huff_counts) && tree.bytes_packed+tree.tree_pack_length < huff_counts->bytes_packed) @@ -1066,14 +1325,27 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees, fill_zero_fields++; field_count[huff_counts->field_type]++; } + DBUG_PRINT("info", ("normal: %3d empty-space: %3d " + "empty-zero: %3d empty-fill: %3d", + field_count[FIELD_NORMAL],space_fields, + field_count[FIELD_SKIP_ZERO],fill_zero_fields)); + DBUG_PRINT("info", ("pre-space: %3d end-space: %3d " + "intervall-fields: %3d zero: %3d", + field_count[FIELD_SKIP_PRESPACE], + field_count[FIELD_SKIP_ENDSPACE], + field_count[FIELD_INTERVALL], + field_count[FIELD_ZERO])); if (verbose) - printf("\nnormal: %3d empty-space: %3d empty-zero: %3d empty-fill: %3d\npre-space: %3d end-space: %3d intervall-fields: %3d zero: %3d\n", - field_count[FIELD_NORMAL],space_fields, - field_count[FIELD_SKIP_ZERO],fill_zero_fields, - field_count[FIELD_SKIP_PRESPACE], - field_count[FIELD_SKIP_ENDSPACE], - field_count[FIELD_INTERVALL], - field_count[FIELD_ZERO]); + VOID(printf("\nnormal: %3d empty-space: %3d " + "empty-zero: %3d empty-fill: %3d\n" + "pre-space: %3d end-space: %3d " + "intervall-fields: %3d zero: %3d\n", + field_count[FIELD_NORMAL],space_fields, + field_count[FIELD_SKIP_ZERO],fill_zero_fields, + field_count[FIELD_SKIP_PRESPACE], + field_count[FIELD_SKIP_ENDSPACE], + field_count[FIELD_INTERVALL], + field_count[FIELD_ZERO])); DBUG_VOID_RETURN; } @@ -1170,8 +1442,24 @@ static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts, uint trees) DBUG_RETURN(huff_tree); } - /* Update huff_tree according to huff_counts->counts or - huff_counts->tree_buff */ +/* + Build a Huffman tree. + + SYNOPSIS + make_huff_tree() + huff_tree The Huffman tree. + huff_counts The counts. + + DESCRIPTION + Build a Huffman tree according to huff_counts->counts or + huff_counts->tree_buff. tree_buff, if non-NULL contains up to + tree_buff_length of distinct column values. In that case, whole + values can be Huffman encoded instead of single bytes. + + RETURN + 0 OK + != 0 Error +*/ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) { @@ -1182,12 +1470,14 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) first=last=0; if (huff_counts->tree_buff) { + /* Calculate the number of distinct values in tree_buff. */ found= (uint) (huff_counts->tree_pos - huff_counts->tree_buff) / huff_counts->field_length; first=0; last=found-1; } else { + /* Count the number of byte codes found in the column. */ for (i=found=0 ; i < 256 ; i++) { if (huff_counts->counts[i]) @@ -1201,6 +1491,7 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) found=2; } + /* When using 'tree_buff' we can have more that 256 values. */ if (queue.max_elements < found) { delete_queue(&queue); @@ -1208,6 +1499,7 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) return -1; } + /* Allocate or reallocate an element buffer for the Huffman tree. */ if (!huff_tree->element_buffer) { if (!(huff_tree->element_buffer= @@ -1235,15 +1527,25 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) if (huff_counts->tree_buff) { huff_tree->elements=0; - tree_walk(&huff_counts->int_tree, - (int (*)(void*, element_count,void*)) save_counts_in_queue, - (gptr) huff_tree, left_root_right); huff_tree->tree_pack_length=(1+15+16+5+5+ (huff_tree->char_bits+1)*found+ (huff_tree->offset_bits+1)* (found-2)+7)/8 + (uint) (huff_tree->counts->tree_pos- huff_tree->counts->tree_buff); + /* + Put a HUFF_ELEMENT into the queue for every distinct column value. + + tree_walk() calls save_counts_in_queue() for every element in + 'int_tree'. This takes elements from the target trees element + buffer and places references to them into the buffer of the + priority queue. We insert in column value order, but the order is + in fact irrelevant here. We will establish the correct order + later. + */ + tree_walk(&huff_counts->int_tree, + (int (*)(void*, element_count,void*)) save_counts_in_queue, + (gptr) huff_tree, left_root_right); } else { @@ -1252,7 +1554,15 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) (huff_tree->char_bits+1)*found+ (huff_tree->offset_bits+1)* (found-2)+7)/8; + /* + Put a HUFF_ELEMENT into the queue for every byte code found in the column. + The elements are taken from the target trees element buffer. + Instead of using queue_insert(), we just place references to the + elements into the buffer of the priority queue. We insert in byte + value order, but the order is in fact irrelevant here. We will + establish the correct order later. + */ for (i=first, found=0 ; i <= last ; i++) { if (huff_counts->counts[i]) @@ -1264,8 +1574,13 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) queue.root[found]=(byte*) new_huff_el; } } + /* + If there is only a single byte value in this field in all records, + add a second element with zero incidence. This is required to enter + the loop, which builds the Huffman tree. + */ while (found < 2) - { /* Our huff_trees request at least 2 elements */ + { new_huff_el=huff_tree->element_buffer+(found++); new_huff_el->count=0; new_huff_el->a.leaf.null=0; @@ -1276,21 +1591,53 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) queue.root[found]=(byte*) new_huff_el; } } + + /* Make a queue from the queue buffer. */ queue.elements=found; + /* + Make a priority queue from the queue. Construct its index so that we + have a partially ordered tree. + */ for (i=found/2 ; i > 0 ; i--) _downheap(&queue,i); + + /* The Huffman algorithm. */ bytes_packed=0; bits_packed=0; for (i=1 ; i < found ; i++) { + /* + Pop the top element from the queue (the one with the least incidence). + Popping from a priority queue includes a re-ordering of the queue, + to get the next least incidence element to the top. + */ a=(HUFF_ELEMENT*) queue_remove(&queue,0); + /* + Copy the next least incidence element. The queue implementation + reserves root[0] for temporary purposes. root[1] is the top. + */ b=(HUFF_ELEMENT*) queue.root[1]; + /* Get a new element from the element buffer. */ new_huff_el=huff_tree->element_buffer+found+i; + /* The new element gets the sum of the two least incidence elements. */ new_huff_el->count=a->count+b->count; + /* + The Huffman algorithm assigns another bit to the code for a byte + every time that bytes incidence is combined (directly or indirectly) + to a new element as one of the two least incidence elements. + This means that one more bit per incidence of that byte is required + in the resulting file. So we add the new combined incidence as the + number of bits by which the result grows. + */ bits_packed+=(uint) (new_huff_el->count & 7); bytes_packed+=new_huff_el->count/8; - new_huff_el->a.nod.left=a; /* lesser in left */ + /* The new element points to its children, lesser in left. */ + new_huff_el->a.nod.left=a; new_huff_el->a.nod.right=b; + /* + Replace the copied top element by the new element and re-order the + queue. + */ queue.root[1]=(byte*) new_huff_el; queue_replaced(&queue); } @@ -1309,7 +1656,26 @@ static int compare_tree(void* cmp_arg __attribute__((unused)), return 0; } - /* Used by make_huff_tree to save intervall-counts in queue */ +/* + Organize distinct column values and their incidences into a priority queue. + + SYNOPSIS + save_counts_in_queue() + key The column value. + count The incidence of this value. + tree The Huffman tree to be built later. + + DESCRIPTION + We use the element buffer of the targeted tree. The distinct column + values are organized in a priority queue first. The Huffman + algorithm will later organize the elements into a Huffman tree. For + the time being, we just place references to the elements into the + queue buffer. The buffer will later be organized into a priority + queue. + + RETURN + 0 + */ static int save_counts_in_queue(byte *key, element_count count, HUFF_TREE *tree) @@ -1326,8 +1692,23 @@ static int save_counts_in_queue(byte *key, element_count count, } - /* Calculate length of file if given counts should be used */ - /* Its actually a faster version of make_huff_tree */ +/* + Calculate length of file if given counts should be used. + + SYNOPSIS + calc_packed_length() + huff_counts The counts for a column of the table(s). + add_tree_lenght If the decode tree length should be added. + + DESCRIPTION + We need to follow the Huffman algorithm until we know, how many bits + are required for each byte code. But we do not need the resulting + Huffman tree. Hence, we can leave out some steps which are essential + in make_huff_tree(). + + RETURN + Number of bytes required to compress this table column. +*/ static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts, uint add_tree_lenght) @@ -1337,6 +1718,23 @@ static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts, HUFF_ELEMENT element_buffer[256]; DBUG_ENTER("calc_packed_length"); + /* + WARNING: We use a small hack for efficiency: Instead of placing + references to HUFF_ELEMENTs into the queue, we just insert + references to the counts of the byte codes which appeared in this + table column. During the Huffman algorithm they are successively + replaced by references to HUFF_ELEMENTs. This works, because + HUFF_ELEMENTs have the incidence count at their beginning. + Regardless, wether the queue array contains references to counts of + type my_off_t or references to HUFF_ELEMENTs which have the count of + type my_off_t at their beginning, it always points to a count of the + same type. + + Instead of using queue_insert(), we just copy the references into + the buffer of the priority queue. We insert in byte value order, but + the order is in fact irrelevant here. We will establish the correct + order later. + */ first=last=0; for (i=found=0 ; i < 256 ; i++) { @@ -1345,31 +1743,73 @@ static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts, if (! found++) first=i; last=i; + /* We start with root[1], which is the queues top element. */ queue.root[found]=(byte*) &huff_counts->counts[i]; } } if (!found) DBUG_RETURN(0); /* Empty tree */ + /* + If there is only a single byte value in this field in all records, + add a second element with zero incidence. This is required to enter + the loop, which follows the Huffman algorithm. + */ if (found < 2) queue.root[++found]=(byte*) &huff_counts->counts[last ? 0 : 1]; + /* Make a queue from the queue buffer. */ queue.elements=found; bytes_packed=0; bits_packed=0; + /* Add the length of the coding table, which would become part of the file. */ if (add_tree_lenght) bytes_packed=(8+9+5+5+(max_bit(last-first)+1)*found+ (max_bit(found-1)+1+1)*(found-2) +7)/8; + + /* + Make a priority queue from the queue. Construct its index so that we + have a partially ordered tree. + */ for (i=(found+1)/2 ; i > 0 ; i--) _downheap(&queue,i); + + /* The Huffman algorithm. */ for (i=0 ; i < found-1 ; i++) { - HUFF_ELEMENT *a,*b,*new_huff_el; - a=(HUFF_ELEMENT*) queue_remove(&queue,0); - b=(HUFF_ELEMENT*) queue.root[1]; - new_huff_el=element_buffer+i; - new_huff_el->count=a->count+b->count; + my_off_t *a; + my_off_t *b; + HUFF_ELEMENT *new_huff_el; + + /* + Pop the top element from the queue (the one with the least + incidence). Popping from a priority queue includes a re-ordering + of the queue, to get the next least incidence element to the top. + */ + a= (my_off_t*) queue_remove(&queue, 0); + /* + Copy the next least incidence element. The queue implementation + reserves root[0] for temporary purposes. root[1] is the top. + */ + b= (my_off_t*) queue.root[1]; + /* Create a new element in a local (automatic) buffer. */ + new_huff_el= element_buffer + i; + /* The new element gets the sum of the two least incidence elements. */ + new_huff_el->count= *a + *b; + /* + The Huffman algorithm assigns another bit to the code for a byte + every time that bytes incidence is combined (directly or indirectly) + to a new element as one of the two least incidence elements. + This means that one more bit per incidence of that byte is required + in the resulting file. So we add the new combined incidence as the + number of bits by which the result grows. + */ bits_packed+=(uint) (new_huff_el->count & 7); bytes_packed+=new_huff_el->count/8; + /* + Replace the copied top element by the new element and re-order the + queue. This successively replaces the references to counts by + references to HUFF_ELEMENTs. + */ queue.root[1]=(byte*) new_huff_el; queue_replaced(&queue); } @@ -1417,13 +1857,26 @@ static uint join_same_trees(HUFF_COUNTS *huff_counts, uint trees) } } } + DBUG_PRINT("info", ("Original trees: %d After join: %d", + trees, tree_number)); if (verbose) - printf("Original trees: %d After join: %d\n",trees,tree_number); + VOID(printf("Original trees: %d After join: %d\n", trees, tree_number)); return tree_number; /* Return trees left */ } - /* Fill in huff_tree decode tables */ +/* + Fill in huff_tree encode tables. + + SYNOPSIS + make_huff_decode_table() + huff_tree An array of HUFF_TREE which are to be encoded. + trees The number of HUFF_TREE in the array. + + RETURN + 0 success + != 0 error +*/ static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees) { @@ -1434,12 +1887,13 @@ static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees) { elements=huff_tree->counts->tree_buff ? huff_tree->elements : 256; if (!(huff_tree->code = - (ulong*) my_malloc(elements* - (sizeof(ulong)+sizeof(uchar)), - MYF(MY_WME | MY_ZEROFILL)))) + (ulonglong*) my_malloc(elements* + (sizeof(ulonglong) + sizeof(uchar)), + MYF(MY_WME | MY_ZEROFILL)))) return 1; huff_tree->code_len=(uchar*) (huff_tree->code+elements); - make_traverse_code_tree(huff_tree,huff_tree->root,32,0); + make_traverse_code_tree(huff_tree, huff_tree->root, + 8 * sizeof(ulonglong), LL(0)); } } return 0; @@ -1448,28 +1902,90 @@ static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees) static void make_traverse_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element, - uint size, ulong code) + uint size, ulonglong code) { uint chr; if (!element->a.leaf.null) { chr=element->a.leaf.element_nr; - huff_tree->code_len[chr]=(uchar) (32-size); - huff_tree->code[chr]= (code >> size); - if (huff_tree->height < 32-size) - huff_tree->height= 32-size; + huff_tree->code_len[chr]= (uchar) (8 * sizeof(ulonglong) - size); + huff_tree->code[chr]= (code >> size); + if (huff_tree->height < 8 * sizeof(ulonglong) - size) + huff_tree->height= 8 * sizeof(ulonglong) - size; } else { size--; make_traverse_code_tree(huff_tree,element->a.nod.left,size,code); - make_traverse_code_tree(huff_tree,element->a.nod.right,size, - code+((ulong) 1L << size)); + make_traverse_code_tree(huff_tree, element->a.nod.right, size, + code + (((ulonglong) 1) << size)); } return; } +/* + Convert a value into binary digits. + + SYNOPSIS + bindigits() + value The value. + length The number of low order bits to convert. + + NOTE + The result string is in static storage. It is reused on every call. + So you cannot use it twice in one expression. + + RETURN + A pointer to a static NUL-terminated string. + */ + +static char *bindigits(ulonglong value, uint bits) +{ + static char digits[72]; + char *ptr= digits; + uint idx= bits; + + DBUG_ASSERT(idx < sizeof(digits)); + while (idx) + *(ptr++)= '0' + ((value >> (--idx)) & 1); + *ptr= '\0'; + return digits; +} + + +/* + Convert a value into hexadecimal digits. + + SYNOPSIS + hexdigits() + value The value. + + NOTE + The result string is in static storage. It is reused on every call. + So you cannot use it twice in one expression. + + RETURN + A pointer to a static NUL-terminated string. + */ + +static char *hexdigits(ulonglong value) +{ + static char digits[20]; + char *ptr= digits; + uint idx= 2 * sizeof(value); /* Two hex digits per byte. */ + + DBUG_ASSERT(idx < sizeof(digits)); + while (idx) + { + if ((*(ptr++)= '0' + ((value >> (4 * (--idx))) & 0xf)) > '9') + *(ptr - 1)+= 'a' - '9' - 1; + } + *ptr= '\0'; + return digits; +} + + /* Write header to new packed data file */ static int write_header(PACK_MRG_INFO *mrg,uint head_length,uint trees, @@ -1503,15 +2019,64 @@ static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees) uint huff_tree_bits; huff_tree_bits=max_bit(trees ? trees-1 : 0); + DBUG_PRINT("info", ("")); + DBUG_PRINT("info", ("column types:")); + DBUG_PRINT("info", ("FIELD_NORMAL 0")); + DBUG_PRINT("info", ("FIELD_SKIP_ENDSPACE 1")); + DBUG_PRINT("info", ("FIELD_SKIP_PRESPACE 2")); + DBUG_PRINT("info", ("FIELD_SKIP_ZERO 3")); + DBUG_PRINT("info", ("FIELD_BLOB 4")); + DBUG_PRINT("info", ("FIELD_CONSTANT 5")); + DBUG_PRINT("info", ("FIELD_INTERVALL 6")); + DBUG_PRINT("info", ("FIELD_ZERO 7")); + DBUG_PRINT("info", ("FIELD_VARCHAR 8")); + DBUG_PRINT("info", ("FIELD_CHECK 9")); + DBUG_PRINT("info", ("")); + DBUG_PRINT("info", ("pack type as a set of flags:")); + DBUG_PRINT("info", ("PACK_TYPE_SELECTED 1")); + DBUG_PRINT("info", ("PACK_TYPE_SPACE_FIELDS 2")); + DBUG_PRINT("info", ("PACK_TYPE_ZERO_FILL 4")); + DBUG_PRINT("info", ("")); + if (verbose >= 2) + { + VOID(printf("\n")); + VOID(printf("column types:\n")); + VOID(printf("FIELD_NORMAL 0\n")); + VOID(printf("FIELD_SKIP_ENDSPACE 1\n")); + VOID(printf("FIELD_SKIP_PRESPACE 2\n")); + VOID(printf("FIELD_SKIP_ZERO 3\n")); + VOID(printf("FIELD_BLOB 4\n")); + VOID(printf("FIELD_CONSTANT 5\n")); + VOID(printf("FIELD_INTERVALL 6\n")); + VOID(printf("FIELD_ZERO 7\n")); + VOID(printf("FIELD_VARCHAR 8\n")); + VOID(printf("FIELD_CHECK 9\n")); + VOID(printf("\n")); + VOID(printf("pack type as a set of flags:\n")); + VOID(printf("PACK_TYPE_SELECTED 1\n")); + VOID(printf("PACK_TYPE_SPACE_FIELDS 2\n")); + VOID(printf("PACK_TYPE_ZERO_FILL 4\n")); + VOID(printf("\n")); + } for (i=0 ; i++ < fields ; counts++) { - write_bits((ulong) (int) counts->field_type,5); + write_bits((ulonglong) (int) counts->field_type, 5); write_bits(counts->pack_type,6); if (counts->pack_type & PACK_TYPE_ZERO_FILL) write_bits(counts->max_zero_fill,5); else write_bits(counts->length_bits,5); - write_bits((ulong) counts->tree->tree_number-1,huff_tree_bits); + write_bits((ulonglong) counts->tree->tree_number - 1, huff_tree_bits); + DBUG_PRINT("info", ("column: %3u type: %2u pack: %2u zero: %4u " + "lbits: %2u tree: %2u length: %4u", + i , counts->field_type, counts->pack_type, + counts->max_zero_fill, counts->length_bits, + counts->tree->tree_number, counts->field_length)); + if (verbose >= 2) + VOID(printf("column: %3u type: %2u pack: %2u zero: %4u lbits: %2u " + "tree: %2u length: %4u\n", i , counts->field_type, + counts->pack_type, counts->max_zero_fill, counts->length_bits, + counts->tree->tree_number, counts->field_length)); } flush_bits(); return; @@ -1524,45 +2089,72 @@ static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees) static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) { uint i,int_length; + uint tree_no; + uint codes; + uint errors= 0; uint *packed_tree,*offset,length; my_off_t elements; + /* Find the highest number of elements in the trees. */ for (i=length=0 ; i < trees ; i++) if (huff_tree[i].tree_number > 0 && huff_tree[i].elements > length) length=huff_tree[i].elements; + /* + Allocate a buffer for packing a decode tree. Two numbers per element + (left child and right child). + */ if (!(packed_tree=(uint*) my_alloca(sizeof(uint)*length*2))) { my_error(EE_OUTOFMEMORY,MYF(ME_BELL),sizeof(uint)*length*2); return 0; } + DBUG_PRINT("info", ("")); + if (verbose >= 2) + VOID(printf("\n")); + tree_no= 0; intervall_length=0; for (elements=0; trees-- ; huff_tree++) { + /* Skip columns that have been joined with other columns. */ if (huff_tree->tree_number == 0) continue; /* Deleted tree */ + tree_no++; + DBUG_PRINT("info", ("")); + if (verbose >= 3) + VOID(printf("\n")); + /* Count the total number of elements (byte codes or column values). */ elements+=huff_tree->elements; huff_tree->max_offset=2; + /* Build a tree of offsets and codes for decoding in 'packed_tree'. */ if (huff_tree->elements <= 1) offset=packed_tree; else offset=make_offset_code_tree(huff_tree,huff_tree->root,packed_tree); + + /* This should be the same as 'length' above. */ huff_tree->offset_bits=max_bit(huff_tree->max_offset); + + /* + Since we check this during collecting the distinct column values, + this should never happen. + */ if (huff_tree->max_offset >= IS_OFFSET) { /* This should be impossible */ - VOID(fprintf(stderr,"Tree offset got too big: %d, aborted\n", - huff_tree->max_offset)); + VOID(fprintf(stderr, "Tree offset got too big: %d, aborted\n", + huff_tree->max_offset)); my_afree((gptr) packed_tree); return 0; } -#ifdef EXTRA_DBUG - printf("pos: %d elements: %d tree-elements: %d char_bits: %d\n", - (uint) (file_buffer.pos-file_buffer.buffer), - huff_tree->elements, (offset-packed_tree),huff_tree->char_bits); -#endif + DBUG_PRINT("info", ("pos: %lu elements: %u tree-elements: %lu " + "char_bits: %u\n", + (ulong) (file_buffer.pos - file_buffer.buffer), + huff_tree->elements, (ulong) (offset - packed_tree), + huff_tree->char_bits)); if (!huff_tree->counts->tree_buff) { + /* We do a byte compression on this column. Mark with bit 0. */ write_bits(0,1); write_bits(huff_tree->min_chr,8); write_bits(huff_tree->elements,9); @@ -1574,6 +2166,7 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) { int_length=(uint) (huff_tree->counts->tree_pos - huff_tree->counts->tree_buff); + /* We have distinct column values for this column. Mark with bit 1. */ write_bits(1,1); write_bits(huff_tree->elements,15); write_bits(int_length,16); @@ -1581,10 +2174,29 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) write_bits(huff_tree->offset_bits,5); intervall_length+=int_length; } + DBUG_PRINT("info", ("tree: %2u elements: %4u char_bits: %2u " + "offset_bits: %2u %s: %5u codelen: %2u", + tree_no, huff_tree->elements, huff_tree->char_bits, + huff_tree->offset_bits, huff_tree->counts->tree_buff ? + "bufflen" : "min_chr", huff_tree->counts->tree_buff ? + int_length : huff_tree->min_chr, huff_tree->height)); + if (verbose >= 2) + VOID(printf("tree: %2u elements: %4u char_bits: %2u offset_bits: %2u " + "%s: %5u codelen: %2u\n", tree_no, huff_tree->elements, + huff_tree->char_bits, huff_tree->offset_bits, + huff_tree->counts->tree_buff ? "bufflen" : "min_chr", + huff_tree->counts->tree_buff ? int_length : + huff_tree->min_chr, huff_tree->height)); + + /* Check that the code tree length matches the element count. */ length=(uint) (offset-packed_tree); if (length != huff_tree->elements*2-2) - printf("error: Huff-tree-length: %d != calc_length: %d\n", - length,huff_tree->elements*2-2); + { + VOID(fprintf(stderr, "error: Huff-tree-length: %d != calc_length: %d\n", + length, huff_tree->elements * 2 - 2)); + errors++; + break; + } for (i=0 ; i < length ; i++) { @@ -1593,16 +2205,122 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) huff_tree->offset_bits+1); else write_bits(packed_tree[i]-huff_tree->min_chr,huff_tree->char_bits+1); + DBUG_PRINT("info", ("tree[0x%04x]: %s0x%04x", + i, (packed_tree[i] & IS_OFFSET) ? + " -> " : "", (packed_tree[i] & IS_OFFSET) ? + packed_tree[i] - IS_OFFSET + i : packed_tree[i])); + if (verbose >= 3) + VOID(printf("tree[0x%04x]: %s0x%04x\n", + i, (packed_tree[i] & IS_OFFSET) ? " -> " : "", + (packed_tree[i] & IS_OFFSET) ? + packed_tree[i] - IS_OFFSET + i : packed_tree[i])); } flush_bits(); + + /* + Display coding tables and check their correctness. + */ + codes= huff_tree->counts->tree_buff ? huff_tree->elements : 256; + for (i= 0; i < codes; i++) + { + ulonglong code; + uint bits; + uint len; + uint idx; + + if (! (len= huff_tree->code_len[i])) + continue; + DBUG_PRINT("info", ("code[0x%04x]: 0x%s bits: %2u bin: %s", i, + hexdigits(huff_tree->code[i]), huff_tree->code_len[i], + bindigits(huff_tree->code[i], + huff_tree->code_len[i]))); + if (verbose >= 3) + VOID(printf("code[0x%04x]: 0x%s bits: %2u bin: %s\n", i, + hexdigits(huff_tree->code[i]), huff_tree->code_len[i], + bindigits(huff_tree->code[i], huff_tree->code_len[i]))); + + /* Check that the encode table decodes correctly. */ + code= 0; + bits= 0; + idx= 0; + DBUG_EXECUTE_IF("forcechkerr1", len--;); + DBUG_EXECUTE_IF("forcechkerr2", bits= 8 * sizeof(code);); + DBUG_EXECUTE_IF("forcechkerr3", idx= length;); + for (;;) + { + if (! len) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: code 0x%s with %u bits not found\n", + hexdigits(huff_tree->code[i]), huff_tree->code_len[i])); + errors++; + break; + } + code<<= 1; + code|= (huff_tree->code[i] >> (--len)) & 1; + bits++; + if (bits > 8 * sizeof(code)) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: Huffman code too long: %u/%u\n", + bits, 8 * sizeof(code))); + errors++; + break; + } + idx+= code & 1; + if (idx >= length) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: illegal tree offset: %u/%u\n", + idx, length)); + errors++; + break; + } + if (packed_tree[idx] & IS_OFFSET) + idx+= packed_tree[idx] & ~IS_OFFSET; + else + break; /* Hit a leaf. This contains the result value. */ + } + if (errors) + break; + + DBUG_EXECUTE_IF("forcechkerr4", packed_tree[idx]++;); + if (packed_tree[idx] != i) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: decoded value 0x%04x should be: 0x%04x\n", + packed_tree[idx], i)); + errors++; + break; + } + } /*end for (codes)*/ + if (errors) + break; + + /* Write column values in case of distinct column value compression. */ if (huff_tree->counts->tree_buff) { for (i=0 ; i < int_length ; i++) - write_bits((uint) (uchar) huff_tree->counts->tree_buff[i],8); + { + write_bits((ulonglong) (uchar) huff_tree->counts->tree_buff[i], 8); + DBUG_PRINT("info", ("column_values[0x%04x]: 0x%02x", + i, (uchar) huff_tree->counts->tree_buff[i])); + if (verbose >= 3) + VOID(printf("column_values[0x%04x]: 0x%02x\n", + i, (uchar) huff_tree->counts->tree_buff[i])); + } } flush_bits(); } + DBUG_PRINT("info", ("")); + if (verbose >= 2) + VOID(printf("\n")); my_afree((gptr) packed_tree); + if (errors) + { + VOID(fprintf(stderr, "Error: Generated decode trees are corrupt. Stop.\n")); + return 0; + } return elements; } @@ -1613,23 +2331,43 @@ static uint *make_offset_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element, uint *prev_offset; prev_offset= offset; + /* + 'a.leaf.null' takes the same place as 'a.nod.left'. If this is null, + then there is no left child and, hence no right child either. This + is a property of a binary tree. An element is either a node with two + childs, or a leaf without childs. + + The current element is always a node with two childs. Go left first. + */ if (!element->a.nod.left->a.leaf.null) { - offset[0] =(uint) element->a.nod.left->a.leaf.element_nr; + /* Store the byte code or the index of the column value. */ + prev_offset[0] =(uint) element->a.nod.left->a.leaf.element_nr; offset+=2; } else { + /* + Recursively traverse the tree to the left. Mark it as an offset to + another tree node (in contrast to a byte code or column value index). + */ prev_offset[0]= IS_OFFSET+2; offset=make_offset_code_tree(huff_tree,element->a.nod.left,offset+2); } + + /* Now, check the right child. */ if (!element->a.nod.right->a.leaf.null) { + /* Store the byte code or the index of the column value. */ prev_offset[1]=element->a.nod.right->a.leaf.element_nr; return offset; } else { + /* + Recursively traverse the tree to the right. Mark it as an offset to + another tree node (in contrast to a byte code or column value index). + */ uint temp=(uint) (offset-prev_offset-1); prev_offset[1]= IS_OFFSET+ temp; if (huff_tree->max_offset < temp) @@ -1656,6 +2394,7 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) uint i,max_calc_length,pack_ref_length,min_record_length,max_record_length, intervall,field_length,max_pack_length,pack_blob_length; my_off_t record_count; + char llbuf[32]; ulong length,pack_length; byte *record,*pos,*end_pos,*record_pos,*start_pos; HUFF_COUNTS *count,*end_count; @@ -1663,12 +2402,23 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) MI_INFO *isam_file=mrg->file[0]; DBUG_ENTER("compress_isam_file"); + /* Allocate a buffer for the records (excluding blobs). */ if (!(record=(byte*) my_alloca(isam_file->s->base.reclength))) return -1; + end_count=huff_counts+isam_file->s->base.fields; min_record_length= (uint) ~0; max_record_length=0; + /* + Calculate the maximum number of bits required to pack the records. + Remember to understand 'max_zero_fill' as 'min_zero_fill'. + The tree height determines the maximum number of bits per value. + Some fields skip leading or trailing spaces or zeroes. The skipped + number of bytes is encoded by 'length_bits' bits. + Empty blobs and varchar are encoded with a single 1 bit. Other blobs + and varchar get a leading 0 bit. + */ for (i=max_calc_length=0 ; i < isam_file->s->base.fields ; i++) { if (!(huff_counts[i].pack_type & PACK_TYPE_ZERO_FILL)) @@ -1687,14 +2437,16 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) (huff_counts[i].field_length - huff_counts[i].max_zero_fill)* huff_counts[i].tree->height+huff_counts[i].length_bits; } - max_calc_length/=8; + max_calc_length= (max_calc_length + 7) / 8; if (max_calc_length < 254) pack_ref_length=1; else if (max_calc_length <= 65535) pack_ref_length=3; else pack_ref_length=4; + record_count=0; + /* 'max_blob_length' is the max length of all blobs of a record. */ pack_blob_length=0; if (isam_file->s->base.blobs) { @@ -1707,6 +2459,7 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) } max_pack_length=pack_ref_length+pack_blob_length; + DBUG_PRINT("fields", ("===")); mrg_reset(mrg); while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE) { @@ -1722,15 +2475,29 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) end_pos=start_pos+(field_length=count->field_length); tree=count->tree; + DBUG_PRINT("fields", ("column: %3lu type: %2u pack: %2u zero: %4u " + "lbits: %2u tree: %2u length: %4u", + (ulong) (count - huff_counts + 1), + count->field_type, + count->pack_type, count->max_zero_fill, + count->length_bits, count->tree->tree_number, + count->field_length)); + + /* Check if the column contains spaces only. */ if (count->pack_type & PACK_TYPE_SPACE_FIELDS) { for (pos=start_pos ; *pos == ' ' && pos < end_pos; pos++) ; if (pos == end_pos) { + DBUG_PRINT("fields", + ("PACK_TYPE_SPACE_FIELDS spaces only, bits: 1")); + DBUG_PRINT("fields", ("---")); write_bits(1,1); start_pos=end_pos; continue; } + DBUG_PRINT("fields", + ("PACK_TYPE_SPACE_FIELDS not only spaces, bits: 1")); write_bits(0,1); } end_pos-=count->max_zero_fill; @@ -1740,65 +2507,129 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) case FIELD_SKIP_ZERO: if (!memcmp((byte*) start_pos,zero_string,field_length)) { + DBUG_PRINT("fields", ("FIELD_SKIP_ZERO zeroes only, bits: 1")); write_bits(1,1); start_pos=end_pos; break; } + DBUG_PRINT("fields", ("FIELD_SKIP_ZERO not only zeroes, bits: 1")); write_bits(0,1); /* Fall through */ case FIELD_NORMAL: + DBUG_PRINT("fields", ("FIELD_NORMAL %lu bytes", + (ulong) (end_pos - start_pos))); for ( ; start_pos < end_pos ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]))); write_bits(tree->code[(uchar) *start_pos], (uint) tree->code_len[(uchar) *start_pos]); + } break; case FIELD_SKIP_ENDSPACE: for (pos=end_pos ; pos > start_pos && pos[-1] == ' ' ; pos--) ; - length=(uint) (end_pos-pos); + length= (ulong) (end_pos - pos); if (count->pack_type & PACK_TYPE_SELECTED) { if (length > count->min_space) { + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE more than min_space, bits: 1")); + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); write_bits(1,1); write_bits(length,count->length_bits); } else { + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE not more than min_space, " + "bits: 1")); write_bits(0,1); pos=end_pos; } } else + { + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); write_bits(length,count->length_bits); + } + /* Encode all significant bytes. */ + DBUG_PRINT("fields", ("FIELD_SKIP_ENDSPACE %lu bytes", + (ulong) (pos - start_pos))); for ( ; start_pos < pos ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]))); write_bits(tree->code[(uchar) *start_pos], (uint) tree->code_len[(uchar) *start_pos]); + } start_pos=end_pos; break; case FIELD_SKIP_PRESPACE: for (pos=start_pos ; pos < end_pos && pos[0] == ' ' ; pos++) ; - length=(uint) (pos-start_pos); + length= (ulong) (pos - start_pos); if (count->pack_type & PACK_TYPE_SELECTED) { if (length > count->min_space) { + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE more than min_space, bits: 1")); + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); write_bits(1,1); write_bits(length,count->length_bits); } else { + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE not more than min_space, " + "bits: 1")); pos=start_pos; write_bits(0,1); } } else + { + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); write_bits(length,count->length_bits); + } + /* Encode all significant bytes. */ + DBUG_PRINT("fields", ("FIELD_SKIP_PRESPACE %lu bytes", + (ulong) (end_pos - start_pos))); for (start_pos=pos ; start_pos < end_pos ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]))); write_bits(tree->code[(uchar) *start_pos], (uint) tree->code_len[(uchar) *start_pos]); + } break; case FIELD_CONSTANT: case FIELD_ZERO: case FIELD_CHECK: + DBUG_PRINT("fields", ("FIELD_CONSTANT/ZERO/CHECK")); start_pos=end_pos; break; case FIELD_INTERVALL: @@ -1806,6 +2637,10 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) pos=(byte*) tree_search(&count->int_tree, start_pos, count->int_tree.custom_arg); intervall=(uint) (pos - count->tree_buff)/field_length; + DBUG_PRINT("fields", ("FIELD_INTERVALL")); + DBUG_PRINT("fields", ("index: %4u code: 0x%s bits: %2u", + intervall, hexdigits(tree->code[intervall]), + (uint) tree->code_len[intervall])); write_bits(tree->code[intervall],(uint) tree->code_len[intervall]); start_pos=end_pos; break; @@ -1814,21 +2649,36 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) ulong blob_length=_mi_calc_blob_length(field_length- mi_portable_sizeof_char_ptr, start_pos); + /* Empty blobs are encoded with a single 1 bit. */ if (!blob_length) { - write_bits(1,1); /* Empty blob */ + DBUG_PRINT("fields", ("FIELD_BLOB empty, bits: 1")); + write_bits(1,1); } else { byte *blob,*blob_end; + DBUG_PRINT("fields", ("FIELD_BLOB not empty, bits: 1")); write_bits(0,1); + /* Write the blob length. */ + DBUG_PRINT("fields", ("FIELD_BLOB %lu bytes, bits: %2u", + blob_length, count->length_bits)); write_bits(blob_length,count->length_bits); memcpy_fixed(&blob,end_pos-mi_portable_sizeof_char_ptr, sizeof(char*)); blob_end=blob+blob_length; + /* Encode the blob bytes. */ for ( ; blob < blob_end ; blob++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *blob, hexdigits(tree->code[(uchar) *blob]), + (uint) tree->code_len[(uchar) *blob], + bindigits(tree->code[(uchar) *start_pos], + (uint)tree->code_len[(uchar) *start_pos]))); write_bits(tree->code[(uchar) *blob], (uint) tree->code_len[(uchar) *blob]); + } tot_blob_length+=blob_length; } start_pos= end_pos; @@ -1839,18 +2689,34 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1); ulong col_length= (pack_length == 1 ? (uint) *(uchar*) start_pos : uint2korr(start_pos)); + /* Empty varchar are encoded with a single 1 bit. */ if (!col_length) { + DBUG_PRINT("fields", ("FIELD_VARCHAR empty, bits: 1")); write_bits(1,1); /* Empty varchar */ } else { byte *end=start_pos+pack_length+col_length; + DBUG_PRINT("fields", ("FIELD_VARCHAR not empty, bits: 1")); write_bits(0,1); + /* Write the varchar length. */ + DBUG_PRINT("fields", ("FIELD_VARCHAR %lu bytes, bits: %2u", + col_length, count->length_bits)); write_bits(col_length,count->length_bits); + /* Encode the varchar bytes. */ for (start_pos+=pack_length ; start_pos < end ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint)tree->code_len[(uchar) *start_pos]))); write_bits(tree->code[(uchar) *start_pos], (uint) tree->code_len[(uchar) *start_pos]); + } } start_pos= end_pos; break; @@ -1859,12 +2725,17 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) abort(); /* Impossible */ } start_pos+=count->max_zero_fill; + DBUG_PRINT("fields", ("---")); } flush_bits(); - length=(ulong) (file_buffer.pos-record_pos)-max_pack_length; + length=(ulong) ((byte*) file_buffer.pos - record_pos) - max_pack_length; pack_length=save_pack_length(record_pos,length); if (pack_blob_length) pack_length+=save_pack_length(record_pos+pack_length,tot_blob_length); + DBUG_PRINT("fields", ("record: %lu length: %lu blob-length: %lu " + "length-bytes: %lu", (ulong) record_count, length, + tot_blob_length, pack_length)); + DBUG_PRINT("fields", ("===")); /* Correct file buffer if the header was smaller */ if (pack_length != max_pack_length) @@ -1876,9 +2747,11 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) min_record_length=(uint) length; if (length > (ulong) max_record_length) max_record_length=(uint) length; - if (write_loop && ++record_count % WRITE_COUNT == 0) + record_count++; + if (write_loop && record_count % WRITE_COUNT == 0) { - printf("%lu\r",(ulong) record_count); VOID(fflush(stdout)); + VOID(printf("%lu\r", (ulong) record_count)); + VOID(fflush(stdout)); } } else if (error != HA_ERR_RECORD_DELETED) @@ -1888,8 +2761,11 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) error=0; else { - fprintf(stderr,"%s: Got error %d reading records\n",my_progname,error); + VOID(fprintf(stderr, "%s: Got error %d reading records\n", + my_progname, error)); } + if (verbose >= 2) + VOID(printf("wrote %s records.\n", llstr((longlong) record_count, llbuf))); my_afree((gptr) record); mrg->ref_length=max_pack_length; @@ -1929,7 +2805,7 @@ static void init_file_buffer(File file, pbool read_buffer) file_buffer.pos=file_buffer.buffer; file_buffer.bits=BITS_SAVED; } - file_buffer.current_byte=0; + file_buffer.bitbucket= 0; } @@ -1972,7 +2848,8 @@ static int flush_buffer(ulong neaded_length) tmp=my_realloc(file_buffer.buffer, neaded_length,MYF(MY_WME)); if (!tmp) return 1; - file_buffer.pos= tmp + (ulong) (file_buffer.pos - file_buffer.buffer); + file_buffer.pos= ((uchar*) tmp + + (ulong) (file_buffer.pos - file_buffer.buffer)); file_buffer.buffer=tmp; file_buffer.end=tmp+neaded_length-8; } @@ -1987,68 +2864,59 @@ static void end_file_buffer(void) /* output `bits` low bits of `value' */ -static void write_bits (register ulong value, register uint bits) +static void write_bits(register ulonglong value, register uint bits) { - if ((file_buffer.bits-=(int) bits) >= 0) + DBUG_ASSERT(((bits < 8 * sizeof(value)) && ! (value >> bits)) || + (bits == 8 * sizeof(value))); + + if ((file_buffer.bits-= (int) bits) >= 0) { - file_buffer.current_byte|=value << file_buffer.bits; + file_buffer.bitbucket|= value << file_buffer.bits; } else { - reg3 uint byte_buff; + reg3 ulonglong bit_buffer; bits= (uint) -file_buffer.bits; - DBUG_ASSERT(bits <= 8 * sizeof(value)); - byte_buff= (file_buffer.current_byte | - ((bits != 8 * sizeof(value)) ? (uint) (value >> bits) : 0)); -#if BITS_SAVED == 32 - *file_buffer.pos++= (byte) (byte_buff >> 24) ; - *file_buffer.pos++= (byte) (byte_buff >> 16) ; + bit_buffer= (file_buffer.bitbucket | + ((bits != 8 * sizeof(value)) ? (value >> bits) : 0)); +#if BITS_SAVED == 64 + *file_buffer.pos++= (uchar) (bit_buffer >> 56); + *file_buffer.pos++= (uchar) (bit_buffer >> 48); + *file_buffer.pos++= (uchar) (bit_buffer >> 40); + *file_buffer.pos++= (uchar) (bit_buffer >> 32); #endif - *file_buffer.pos++= (byte) (byte_buff >> 8) ; - *file_buffer.pos++= (byte) byte_buff; + *file_buffer.pos++= (uchar) (bit_buffer >> 24); + *file_buffer.pos++= (uchar) (bit_buffer >> 16); + *file_buffer.pos++= (uchar) (bit_buffer >> 8); + *file_buffer.pos++= (uchar) (bit_buffer); - DBUG_ASSERT(bits <= 8 * sizeof(ulong)); if (bits != 8 * sizeof(value)) - value&= (((ulong) 1) << bits) - 1; -#if BITS_SAVED == 16 - if (bits >= sizeof(uint)) - { - bits-=8; - *file_buffer.pos++= (uchar) (value >> bits); - value&= (1 << bits)-1; - if (bits >= sizeof(uint)) - { - bits-=8; - *file_buffer.pos++= (uchar) (value >> bits); - value&= (1 << bits)-1; - } - } -#endif + value&= (((ulonglong) 1) << bits) - 1; if (file_buffer.pos >= file_buffer.end) VOID(flush_buffer(~ (ulong) 0)); file_buffer.bits=(int) (BITS_SAVED - bits); - file_buffer.current_byte=(uint) (value << (BITS_SAVED - bits)); + file_buffer.bitbucket= value << (BITS_SAVED - bits); } return; } /* Flush bits in bit_buffer to buffer */ -static void flush_bits (void) +static void flush_bits(void) { - uint bits,byte_buff; + int bits; + ulonglong bit_buffer; - bits=(file_buffer.bits) & ~7; - byte_buff = file_buffer.current_byte >> bits; - bits=BITS_SAVED - bits; + bits= file_buffer.bits & ~7; + bit_buffer= file_buffer.bitbucket >> bits; + bits= BITS_SAVED - bits; while (bits > 0) { - bits-=8; - *file_buffer.pos++= (byte) (uchar) (byte_buff >> bits) ; + bits-= 8; + *file_buffer.pos++= (uchar) (bit_buffer >> bits); } - file_buffer.bits=BITS_SAVED; - file_buffer.current_byte=0; - return; + file_buffer.bits= BITS_SAVED; + file_buffer.bitbucket= 0; } @@ -2196,3 +3064,131 @@ static int mrg_close(PACK_MRG_INFO *mrg) my_free((gptr) mrg->file,MYF(0)); return error; } + + +#if !defined(DBUG_OFF) +/* + Fake the counts to get big Huffman codes. + + SYNOPSIS + fakebigcodes() + huff_counts A pointer to the counts array. + end_count A pointer past the counts array. + + DESCRIPTION + + Huffman coding works by removing the two least frequent values from + the list of values and add a new value with the sum of their + incidences in a loop until only one value is left. Every time a + value is reused for a new value, it gets one more bit for its + encoding. Hence, the least frequent values get the longest codes. + + To get a maximum code length for a value, two of the values must + have an incidence of 1. As their sum is 2, the next infrequent value + must have at least an incidence of 2, then 4, 8, 16 and so on. This + means that one needs 2**n bytes (values) for a code length of n + bits. However, using more distinct values forces the use of longer + codes, or reaching the code length with less total bytes (values). + + To get 64(32)-bit codes, I sort the counts by decreasing incidence. + I assign counts of 1 to the two most frequent values, a count of 2 + for the next one, then 4, 8, and so on until 2**64-1(2**30-1). All + the remaining values get 1. That way every possible byte has an + assigned code, though not all codes are used if not all byte values + are present in the column. + + This strategy would work with distinct column values too, but + requires that at least 64(32) values are present. To make things + easier here, I cancel all distinct column values and force byte + compression for all columns. + + RETURN + void +*/ + +static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count) +{ + HUFF_COUNTS *count; + my_off_t *cur_count_p; + my_off_t *end_count_p; + my_off_t **cur_sort_p; + my_off_t **end_sort_p; + my_off_t *sort_counts[256]; + my_off_t total; + DBUG_ENTER("fakebigcodes"); + + for (count= huff_counts; count < end_count; count++) + { + /* + Remove distinct column values. + */ + if (huff_counts->tree_buff) + { + my_free((gptr) huff_counts->tree_buff, MYF(0)); + delete_tree(&huff_counts->int_tree); + huff_counts->tree_buff= NULL; + DBUG_PRINT("fakebigcodes", ("freed distinct column values")); + } + + /* + Sort counts by decreasing incidence. + */ + cur_count_p= count->counts; + end_count_p= cur_count_p + 256; + cur_sort_p= sort_counts; + while (cur_count_p < end_count_p) + *(cur_sort_p++)= cur_count_p++; + (void) qsort(sort_counts, 256, sizeof(my_off_t*), (qsort_cmp) fakecmp); + + /* + Assign faked counts. + */ + cur_sort_p= sort_counts; +#if SIZEOF_LONG_LONG > 4 + end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 1; +#else + end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 2; +#endif + /* Most frequent value gets a faked count of 1. */ + **(cur_sort_p++)= 1; + total= 1; + while (cur_sort_p < end_sort_p) + { + **(cur_sort_p++)= total; + total<<= 1; + } + /* Set the last value. */ + **(cur_sort_p++)= --total; + /* + Set the remaining counts. + */ + end_sort_p= sort_counts + 256; + while (cur_sort_p < end_sort_p) + **(cur_sort_p++)= 1; + } + DBUG_VOID_RETURN; +} + + +/* + Compare two counts for reverse sorting. + + SYNOPSIS + fakecmp() + count1 One count. + count2 Another count. + + RETURN + 1 count1 < count2 + 0 count1 == count2 + -1 count1 > count2 +*/ + +static int fakecmp(my_off_t **count1, my_off_t **count2) +{ + return ((**count1 < **count2) ? 1 : + (**count1 > **count2) ? -1 : 0); +} +#endif + + diff --git a/storage/myisammrg/Makefile.am b/storage/myisammrg/Makefile.am index 6975fab6770..14e3295c1ae 100644 --- a/storage/myisammrg/Makefile.am +++ b/storage/myisammrg/Makefile.am @@ -14,7 +14,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -INCLUDES = -I$(top_srcdir)/include +INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include pkglib_LIBRARIES = libmyisammrg.a noinst_HEADERS = myrg_def.h libmyisammrg_a_SOURCES = myrg_open.c myrg_extra.c myrg_info.c myrg_locking.c \ diff --git a/storage/ndb/include/mgmcommon/ConfigRetriever.hpp b/storage/ndb/include/mgmcommon/ConfigRetriever.hpp index 95d257dea23..c0b877af07d 100644 --- a/storage/ndb/include/mgmcommon/ConfigRetriever.hpp +++ b/storage/ndb/include/mgmcommon/ConfigRetriever.hpp @@ -32,6 +32,7 @@ public: ~ConfigRetriever(); int do_connect(int no_retries, int retry_delay_in_seconds, int verbose); + int disconnect(); /** * Get configuration for current node. diff --git a/storage/ndb/src/common/mgmcommon/ConfigRetriever.cpp b/storage/ndb/src/common/mgmcommon/ConfigRetriever.cpp index eca886c8586..0e9b2a83e2f 100644 --- a/storage/ndb/src/common/mgmcommon/ConfigRetriever.cpp +++ b/storage/ndb/src/common/mgmcommon/ConfigRetriever.cpp @@ -107,6 +107,12 @@ ConfigRetriever::do_connect(int no_retries, 0 : -1; } +int +ConfigRetriever::disconnect() +{ + return ndb_mgm_disconnect(m_handle); +} + //**************************************************************************** //**************************************************************************** //**************************************************************************** diff --git a/storage/ndb/src/mgmsrv/main.cpp b/storage/ndb/src/mgmsrv/main.cpp index 3335fdc827c..c7315c61ba1 100644 --- a/storage/ndb/src/mgmsrv/main.cpp +++ b/storage/ndb/src/mgmsrv/main.cpp @@ -353,7 +353,8 @@ int main(int argc, char** argv) g_eventLogger.info("Shutting down server..."); glob.socketServer->stopServer(); - glob.socketServer->stopSessions(); + glob.mgmObject->get_config_retriever()->disconnect(); + glob.socketServer->stopSessions(true); g_eventLogger.info("Shutdown complete"); return 0; error_end: |