summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorunknown <tomas@poseidon.ndb.mysql.com>2005-07-12 20:01:22 +0200
committerunknown <tomas@poseidon.ndb.mysql.com>2005-07-12 20:01:22 +0200
commite4be45da93b3b64ede35053a7fa3989a40a2766e (patch)
tree8fd7b30e9e4cce3fdfc60e700fdb2d668f2f6449 /storage
parentf9027b7a050186c504b9a5ab7e164c0cbaacb8f3 (diff)
parentff7db9bfd9f0c50d75f3085c6554ea8ecb9b906f (diff)
downloadmariadb-git-e4be45da93b3b64ede35053a7fa3989a40a2766e.tar.gz
Merge
BitKeeper/etc/logging_ok: auto-union BUILD/autorun.sh: Auto merged BitKeeper/etc/config: Auto merged Makefile.am: Auto merged include/my_bitmap.h: Auto merged libmysqld/Makefile.am: Auto merged mysql-test/mysql-test-run.pl: Auto merged mysql-test/mysql-test-run.sh: Auto merged mysql-test/r/grant.result: Auto merged mysql-test/r/ps_6bdb.result: Auto merged mysql-test/r/ps_7ndb.result: Auto merged mysys/Makefile.am: Auto merged mysys/default.c: Auto merged scripts/mysql_create_system_tables.sh: Auto merged scripts/mysql_fix_privilege_tables.sql: Auto merged sql/Makefile.am: Auto merged sql/field.cc: Auto merged sql/field.h: Auto merged sql/ha_ndbcluster.h: Auto merged sql/handler.cc: Auto merged sql/handler.h: Auto merged sql/item.cc: Auto merged sql/log_event.cc: Auto merged sql/mysql_priv.h: Auto merged sql/opt_range.cc: Auto merged sql/sql_acl.cc: Auto merged sql/sql_acl.h: Auto merged sql/sql_base.cc: Auto merged sql/sql_cache.cc: Auto merged sql/sql_class.h: Auto merged sql/sql_delete.cc: Auto merged sql/sql_lex.cc: Auto merged sql/sql_parse.cc: Auto merged sql/sql_table.cc: Auto merged sql/sql_udf.cc: Auto merged sql/sql_yacc.yy: Auto merged storage/heap/Makefile.am: Auto merged storage/heap/hp_hash.c: Auto merged storage/innobase/btr/btr0btr.c: Auto merged storage/innobase/btr/btr0cur.c: Auto merged storage/innobase/configure.in: Auto merged storage/innobase/fil/fil0fil.c: Auto merged storage/innobase/ibuf/ibuf0ibuf.c: Auto merged storage/innobase/include/os0file.h: Auto merged storage/innobase/include/page0cur.h: Auto merged storage/innobase/include/row0mysql.h: Auto merged storage/innobase/include/srv0srv.h: Auto merged storage/innobase/include/trx0trx.h: Auto merged storage/innobase/include/trx0trx.ic: Auto merged storage/innobase/lock/lock0lock.c: Auto merged storage/innobase/log/log0recv.c: Auto merged storage/innobase/os/os0file.c: Auto merged storage/innobase/page/page0cur.c: Auto merged storage/innobase/page/page0page.c: Auto merged storage/innobase/rem/rem0rec.c: Auto merged storage/innobase/row/row0mysql.c: Auto merged storage/innobase/row/row0sel.c: Auto merged storage/innobase/row/row0upd.c: Auto merged storage/innobase/srv/srv0srv.c: Auto merged storage/innobase/trx/trx0trx.c: Auto merged storage/innobase/trx/trx0undo.c: Auto merged storage/myisam/Makefile.am: Auto merged storage/myisam/mi_create.c: Auto merged storage/myisam/mi_open.c: Auto merged storage/myisam/mi_packrec.c: Auto merged storage/myisam/mi_unique.c: Auto merged storage/myisam/myisampack.c: Auto merged storage/myisammrg/Makefile.am: Auto merged storage/ndb/include/mgmcommon/ConfigRetriever.hpp: Auto merged storage/ndb/include/transporter/TransporterDefinitions.hpp: Auto merged storage/ndb/src/common/mgmcommon/ConfigRetriever.cpp: Auto merged storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp: Auto merged storage/ndb/src/mgmsrv/main.cpp: Auto merged storage/ndb/src/ndbapi/NdbDictionaryImpl.cpp: Auto merged storage/ndb/test/ndbapi/create_tab.cpp: Auto merged storage/ndb/test/ndbapi/testBlobs.cpp: Auto merged strings/ctype-big5.c: Auto merged strings/ctype-ucs2.c: Auto merged support-files/mysql.spec.sh: Auto merged configure.in: merge mysql-test/t/disabled.def: merge mysys/my_bitmap.c: SCCS merged sql/ha_federated.cc: merge sql/ha_innodb.cc: merge sql/ha_ndbcluster.cc: merge sql/mysqld.cc: e merge sql/sql_insert.cc: merge sql/sql_lex.h: merge sql/sql_load.cc: merge sql/sql_select.cc: merge e C sql/sql_update.cc: merge sql/table.cc: merge
Diffstat (limited to 'storage')
-rw-r--r--storage/heap/Makefile.am2
-rw-r--r--storage/heap/hp_hash.c4
-rw-r--r--storage/innobase/btr/btr0btr.c6
-rw-r--r--storage/innobase/btr/btr0cur.c13
-rw-r--r--storage/innobase/configure.in7
-rw-r--r--storage/innobase/fil/fil0fil.c26
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.c2
-rw-r--r--storage/innobase/include/os0file.h2
-rw-r--r--storage/innobase/include/page0cur.h6
-rw-r--r--storage/innobase/include/row0mysql.h18
-rw-r--r--storage/innobase/include/srv0srv.h1
-rw-r--r--storage/innobase/include/trx0trx.h43
-rw-r--r--storage/innobase/include/trx0trx.ic56
-rw-r--r--storage/innobase/lock/lock0lock.c60
-rw-r--r--storage/innobase/log/log0recv.c21
-rw-r--r--storage/innobase/os/os0file.c56
-rw-r--r--storage/innobase/page/page0cur.c102
-rw-r--r--storage/innobase/page/page0page.c6
-rw-r--r--storage/innobase/rem/rem0rec.c105
-rw-r--r--storage/innobase/row/row0mysql.c103
-rw-r--r--storage/innobase/row/row0sel.c89
-rw-r--r--storage/innobase/row/row0upd.c2
-rw-r--r--storage/innobase/srv/srv0srv.c17
-rw-r--r--storage/innobase/trx/trx0trx.c2
-rw-r--r--storage/innobase/trx/trx0undo.c28
-rw-r--r--storage/myisam/Makefile.am2
-rw-r--r--storage/myisam/mi_create.c3
-rw-r--r--storage/myisam/mi_open.c2
-rw-r--r--storage/myisam/mi_packrec.c15
-rw-r--r--storage/myisam/mi_unique.c21
-rw-r--r--storage/myisam/myisampack.c1280
-rw-r--r--storage/myisammrg/Makefile.am2
-rw-r--r--storage/ndb/include/mgmcommon/ConfigRetriever.hpp1
-rw-r--r--storage/ndb/src/common/mgmcommon/ConfigRetriever.cpp6
-rw-r--r--storage/ndb/src/mgmsrv/main.cpp3
35 files changed, 1714 insertions, 398 deletions
diff --git a/storage/heap/Makefile.am b/storage/heap/Makefile.am
index f9f0ba775ff..2890dc6ee23 100644
--- a/storage/heap/Makefile.am
+++ b/storage/heap/Makefile.am
@@ -14,7 +14,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-INCLUDES = -I$(top_srcdir)/include
+INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include
LDADD = libheap.a $(top_srcdir)/mysys/libmysys.a \
$(top_srcdir)/dbug/libdbug.a \
$(top_srcdir)/strings/libmystrings.a
diff --git a/storage/heap/hp_hash.c b/storage/heap/hp_hash.c
index 9e4636ebdc0..d643f776731 100644
--- a/storage/heap/hp_hash.c
+++ b/storage/heap/hp_hash.c
@@ -552,9 +552,9 @@ int hp_rec_key_cmp(HP_KEYDEF *keydef, const byte *rec1, const byte *rec2,
if (cs->mbmaxlen > 1)
{
uint char_length= seg->length / cs->mbmaxlen;
- char_length1= my_charpos(cs, pos1, pos1 + char_length1, char_length);
+ char_length1= my_charpos(cs, pos1, pos1 + char_length1, char_length1);
set_if_smaller(char_length1, seg->length);
- char_length2= my_charpos(cs, pos2, pos2 + char_length2, char_length);
+ char_length2= my_charpos(cs, pos2, pos2 + char_length2, char_length2);
set_if_smaller(char_length2, seg->length);
}
diff --git a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c
index 2d84586216a..c27fb73ff8d 100644
--- a/storage/innobase/btr/btr0btr.c
+++ b/storage/innobase/btr/btr0btr.c
@@ -143,7 +143,7 @@ btr_root_get(
root_page_no = dict_tree_get_page(tree);
root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr);
- ut_a(!!page_is_comp(root) ==
+ ut_a((ibool)!!page_is_comp(root) ==
UT_LIST_GET_FIRST(tree->tree_indexes)->table->comp);
return(root);
@@ -2014,7 +2014,7 @@ btr_compress(
page = btr_cur_get_page(cursor);
tree = btr_cur_get_tree(cursor);
comp = page_is_comp(page);
- ut_a(!!comp == cursor->index->table->comp);
+ ut_a((ibool)!!comp == cursor->index->table->comp);
ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
MTR_MEMO_X_LOCK));
@@ -2508,7 +2508,7 @@ btr_index_rec_validate(
return(TRUE);
}
- if (UNIV_UNLIKELY(!!page_is_comp(page) != index->table->comp)) {
+ if (UNIV_UNLIKELY((ibool)!!page_is_comp(page) != index->table->comp)) {
btr_index_rec_validate_report(page, rec, index);
fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n",
(ulong) !!page_is_comp(page),
diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
index 4ae27f007d6..f81cce5b8e9 100644
--- a/storage/innobase/btr/btr0cur.c
+++ b/storage/innobase/btr/btr0cur.c
@@ -316,7 +316,9 @@ btr_cur_search_to_nth_level(
if (btr_search_latch.writer == RW_LOCK_NOT_LOCKED
&& latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
&& !estimate
+#ifdef PAGE_CUR_LE_OR_EXTENDS
&& mode != PAGE_CUR_LE_OR_EXTENDS
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
&& srv_use_adaptive_hash_indexes
&& btr_search_guess_on_hash(index, info, tuple, mode,
latch_mode, cursor,
@@ -390,9 +392,12 @@ btr_cur_search_to_nth_level(
page_mode = PAGE_CUR_LE;
break;
default:
- ut_ad(mode == PAGE_CUR_L
- || mode == PAGE_CUR_LE
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+ ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
|| mode == PAGE_CUR_LE_OR_EXTENDS);
+#else /* PAGE_CUR_LE_OR_EXTENDS */
+ ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE);
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
page_mode = mode;
break;
}
@@ -507,7 +512,7 @@ retry_page_get:
/* x-latch the page */
page = btr_page_get(space,
page_no, RW_X_LATCH, mtr);
- ut_a(!!page_is_comp(page)
+ ut_a((ibool)!!page_is_comp(page)
== index->table->comp);
}
@@ -1385,7 +1390,7 @@ btr_cur_parse_update_in_place(
goto func_exit;
}
- ut_a(!!page_is_comp(page) == index->table->comp);
+ ut_a((ibool)!!page_is_comp(page) == index->table->comp);
rec = page + rec_offset;
/* We do not need to reserve btr_search_latch, as the page is only
diff --git a/storage/innobase/configure.in b/storage/innobase/configure.in
index baf11272ab9..c56bd8274c4 100644
--- a/storage/innobase/configure.in
+++ b/storage/innobase/configure.in
@@ -117,6 +117,13 @@ case "$target" in
CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";;
esac
+# must go in pair with AR as set by MYSQL_CHECK_AR
+if test -z "$ARFLAGS"
+then
+ ARFLAGS="cru"
+fi
+AC_SUBST(ARFLAGS)
+
AC_OUTPUT(Makefile os/Makefile ut/Makefile btr/Makefile dnl
buf/Makefile data/Makefile dnl
dict/Makefile dyn/Makefile dnl
diff --git a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
index 299b55f3d2b..20f522c1a60 100644
--- a/storage/innobase/fil/fil0fil.c
+++ b/storage/innobase/fil/fil0fil.c
@@ -3410,9 +3410,9 @@ fil_extend_space_to_desired_size(
fil_space_t* space;
byte* buf2;
byte* buf;
+ ulint buf_size;
ulint start_page_no;
ulint file_start_page_no;
- ulint n_pages;
ulint offset_high;
ulint offset_low;
ibool success = TRUE;
@@ -3437,22 +3437,20 @@ fil_extend_space_to_desired_size(
fil_node_prepare_for_io(node, system, space);
- /* Extend 1 MB at a time */
-
- buf2 = mem_alloc(1024 * 1024 + UNIV_PAGE_SIZE);
- buf = ut_align(buf2, UNIV_PAGE_SIZE);
-
- memset(buf, '\0', 1024 * 1024);
-
start_page_no = space->size;
file_start_page_no = space->size - node->size;
- while (start_page_no < size_after_extend) {
- n_pages = size_after_extend - start_page_no;
+ /* Extend at most 64 pages at a time */
+ buf_size = ut_min(64, size_after_extend - start_page_no)
+ * UNIV_PAGE_SIZE;
+ buf2 = mem_alloc(buf_size + UNIV_PAGE_SIZE);
+ buf = ut_align(buf2, UNIV_PAGE_SIZE);
- if (n_pages > (1024 * 1024) / UNIV_PAGE_SIZE) {
- n_pages = (1024 * 1024) / UNIV_PAGE_SIZE;
- }
+ memset(buf, 0, buf_size);
+
+ while (start_page_no < size_after_extend) {
+ ulint n_pages = ut_min(buf_size / UNIV_PAGE_SIZE,
+ size_after_extend - start_page_no);
offset_high = (start_page_no - file_start_page_no)
/ (4096 * ((1024 * 1024) / UNIV_PAGE_SIZE));
@@ -4034,7 +4032,7 @@ fil_aio_wait(
if (os_aio_use_native_aio) {
srv_set_io_thread_op_info(segment, "native aio handle");
#ifdef WIN_ASYNC_IO
- ret = os_aio_windows_handle(segment, 0, (void**) &fil_node,
+ ret = os_aio_windows_handle(segment, 0, &fil_node,
&message, &type);
#elif defined(POSIX_ASYNC_IO)
ret = os_aio_posix_handle(segment, &fil_node, &message);
diff --git a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
index 712d43f916c..d7fa48b6e66 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.c
+++ b/storage/innobase/ibuf/ibuf0ibuf.c
@@ -2810,7 +2810,7 @@ ibuf_insert_to_index_page(
ut_ad(ibuf_inside());
ut_ad(dtuple_check_typed(entry));
- if (UNIV_UNLIKELY(index->table->comp != !!page_is_comp(page))) {
+ if (UNIV_UNLIKELY(index->table->comp != (ibool)!!page_is_comp(page))) {
fputs(
"InnoDB: Trying to insert a record from the insert buffer to an index page\n"
"InnoDB: but the 'compact' flag does not match!\n", stderr);
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index e75281dd93c..362e3552411 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -373,7 +373,7 @@ os_file_get_size_as_iblonglong(
/* out: size in bytes, -1 if error */
os_file_t file); /* in: handle to a file */
/***************************************************************************
-Sets a file size. This function can be used to extend or truncate a file. */
+Write the specified number of zeros to a newly created file. */
ibool
os_file_set_size(
diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h
index e89e740e775..b03302b0e77 100644
--- a/storage/innobase/include/page0cur.h
+++ b/storage/innobase/include/page0cur.h
@@ -26,11 +26,13 @@ Created 10/4/1994 Heikki Tuuri
#define PAGE_CUR_GE 2
#define PAGE_CUR_L 3
#define PAGE_CUR_LE 4
-#define PAGE_CUR_LE_OR_EXTENDS 5 /* This is a search mode used in
+/*#define PAGE_CUR_LE_OR_EXTENDS 5*/ /* This is a search mode used in
"column LIKE 'abc%' ORDER BY column DESC";
we have to find strings which are <= 'abc' or
which extend it */
-#define PAGE_CUR_DBG 6
+#ifdef UNIV_SEARCH_DEBUG
+# define PAGE_CUR_DBG 6 /* As PAGE_CUR_LE, but skips search shortcut */
+#endif /* UNIV_SEARCH_DEBUG */
#ifdef PAGE_CUR_ADAPT
# ifdef UNIV_SEARCH_PERF_STAT
diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h
index 4bb9fa63cd1..4e6ff73b0f8 100644
--- a/storage/innobase/include/row0mysql.h
+++ b/storage/innobase/include/row0mysql.h
@@ -243,17 +243,27 @@ row_update_for_mysql(
the MySQL format */
row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
handle */
-
/*************************************************************************
-Does an unlock of a row for MySQL. */
+This can only be used when srv_locks_unsafe_for_binlog is TRUE. Before
+calling this function we must use trx_reset_new_rec_lock_info() and
+trx_register_new_rec_lock() to store the information which new record locks
+really were set. This function removes a newly set lock under prebuilt->pcur,
+and also under prebuilt->clust_pcur. Currently, this is only used and tested
+in the case of an UPDATE or a DELETE statement, where the row lock is of the
+LOCK_X type.
+Thus, this implements a 'mini-rollback' that releases the latest record
+locks we set. */
int
row_unlock_for_mysql(
/*=================*/
/* out: error code or DB_SUCCESS */
- row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
+ row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL
handle */
-
+ ibool has_latches_on_recs);/* TRUE if called so that we have
+ the latches on the records under pcur
+ and clust_pcur, and we do not need to
+ reposition the cursors. */
/*************************************************************************
Creates an query graph node of 'update' type to be used in the MySQL
interface. */
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 6e4241965c1..116ae7b6438 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -182,6 +182,7 @@ extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs,
#define kernel_mutex (*kernel_mutex_temp)
#define SRV_MAX_N_IO_THREADS 100
+#define SRV_CONCURRENCY_THRESHOLD 20
/* Array of English strings describing the current state of an
i/o handler thread */
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index 8df50d6703d..146730d46f8 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -16,10 +16,39 @@ Created 3/26/1996 Heikki Tuuri
#include "que0types.h"
#include "mem0mem.h"
#include "read0types.h"
+#include "dict0types.h"
#include "trx0xa.h"
extern ulint trx_n_mysql_transactions;
+/*****************************************************************
+Resets the new record lock info in a transaction struct. */
+UNIV_INLINE
+void
+trx_reset_new_rec_lock_info(
+/*========================*/
+ trx_t* trx); /* in: transaction struct */
+/*****************************************************************
+Registers that we have set a new record lock on an index. We only have space
+to store 2 indexes! If this is called to store more than 2 indexes after
+trx_reset_new_rec_lock_info(), then this function does nothing. */
+UNIV_INLINE
+void
+trx_register_new_rec_lock(
+/*======================*/
+ trx_t* trx, /* in: transaction struct */
+ dict_index_t* index); /* in: trx sets a new record lock on this
+ index */
+/*****************************************************************
+Checks if trx has set a new record lock on an index. */
+UNIV_INLINE
+ibool
+trx_new_rec_locks_contain(
+/*======================*/
+ /* out: TRUE if trx has set a new record lock
+ on index */
+ trx_t* trx, /* in: transaction struct */
+ dict_index_t* index); /* in: index */
/************************************************************************
Releases the search latch if trx has reserved it. */
@@ -495,8 +524,18 @@ struct trx_struct{
lock_t* auto_inc_lock; /* possible auto-inc lock reserved by
the transaction; note that it is also
in the lock list trx_locks */
- ibool trx_create_lock;/* this is TRUE if we have created a
- new lock for a record accessed */
+ dict_index_t* new_rec_locks[2];/* these are normally NULL; if
+ srv_locks_unsafe_for_binlog is TRUE,
+ in a cursor search, if we set a new
+ record lock on an index, this is set
+ to point to the index; this is
+ used in releasing the locks under the
+ cursors if we are performing an UPDATE
+ and we determine after retrieving
+ the row that it does not need to be
+ locked; thus, these can be used to
+ implement a 'mini-rollback' that
+ releases the latest record locks */
UT_LIST_NODE_T(trx_t)
trx_list; /* list of transactions */
UT_LIST_NODE_T(trx_t)
diff --git a/storage/innobase/include/trx0trx.ic b/storage/innobase/include/trx0trx.ic
index 78e5acda148..54cf2ff331f 100644
--- a/storage/innobase/include/trx0trx.ic
+++ b/storage/innobase/include/trx0trx.ic
@@ -39,4 +39,60 @@ trx_start_if_not_started_low(
}
}
+/*****************************************************************
+Resets the new record lock info in a transaction struct. */
+UNIV_INLINE
+void
+trx_reset_new_rec_lock_info(
+/*========================*/
+ trx_t* trx) /* in: transaction struct */
+{
+ trx->new_rec_locks[0] = NULL;
+ trx->new_rec_locks[1] = NULL;
+}
+
+/*****************************************************************
+Registers that we have set a new record lock on an index. We only have space
+to store 2 indexes! If this is called to store more than 2 indexes after
+trx_reset_new_rec_lock_info(), then this function does nothing. */
+UNIV_INLINE
+void
+trx_register_new_rec_lock(
+/*======================*/
+ trx_t* trx, /* in: transaction struct */
+ dict_index_t* index) /* in: trx sets a new record lock on this
+ index */
+{
+ if (trx->new_rec_locks[0] == NULL) {
+ trx->new_rec_locks[0] = index;
+
+ return;
+ }
+
+ if (trx->new_rec_locks[0] == index) {
+ return;
+ }
+
+ if (trx->new_rec_locks[1] != NULL) {
+
+ return;
+ }
+
+ trx->new_rec_locks[1] = index;
+}
+
+/*****************************************************************
+Checks if trx has set a new record lock on an index. */
+UNIV_INLINE
+ibool
+trx_new_rec_locks_contain(
+/*======================*/
+ /* out: TRUE if trx has set a new record lock
+ on index */
+ trx_t* trx, /* in: transaction struct */
+ dict_index_t* index) /* in: index */
+{
+ return(trx->new_rec_locks[0] == index
+ || trx->new_rec_locks[1] == index);
+}
diff --git a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c
index 48db7ced0cb..280c4871ee9 100644
--- a/storage/innobase/lock/lock0lock.c
+++ b/storage/innobase/lock/lock0lock.c
@@ -956,7 +956,7 @@ lock_rec_has_to_wait(
cause waits */
if ((lock_is_on_supremum || (type_mode & LOCK_GAP))
- && !(type_mode & LOCK_INSERT_INTENTION)) {
+ && !(type_mode & LOCK_INSERT_INTENTION)) {
/* Gap type locks without LOCK_INSERT_INTENTION flag
do not need to wait for anything. This is because
@@ -1765,10 +1765,7 @@ lock_rec_create(
lock_rec_set_nth_bit(lock, heap_no);
HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
- lock_rec_fold(space, page_no), lock);
- /* Note that we have create a new lock */
- trx->trx_create_lock = TRUE;
-
+ lock_rec_fold(space, page_no), lock);
if (type_mode & LOCK_WAIT) {
lock_set_lock_and_trx_wait(lock, trx);
@@ -1945,15 +1942,6 @@ lock_rec_add_to_queue(
if (similar_lock && !somebody_waits && !(type_mode & LOCK_WAIT)) {
- /* If the nth bit of a record lock is already set then we
- do not set a new lock bit, otherwice we set */
-
- if (lock_rec_get_nth_bit(similar_lock, heap_no)) {
- trx->trx_create_lock = FALSE;
- } else {
- trx->trx_create_lock = TRUE;
- }
-
lock_rec_set_nth_bit(similar_lock, heap_no);
return(similar_lock);
@@ -2005,11 +1993,14 @@ lock_rec_lock_fast(
lock = lock_rec_get_first_on_page(rec);
trx = thr_get_trx(thr);
- trx->trx_create_lock = FALSE;
if (lock == NULL) {
if (!impl) {
lock_rec_create(mode, rec, index, trx);
+
+ if (srv_locks_unsafe_for_binlog) {
+ trx_register_new_rec_lock(trx, index);
+ }
}
return(TRUE);
@@ -2021,23 +2012,22 @@ lock_rec_lock_fast(
}
if (lock->trx != trx
- || lock->type_mode != (mode | LOCK_REC)
- || lock_rec_get_n_bits(lock) <= heap_no) {
+ || lock->type_mode != (mode | LOCK_REC)
+ || lock_rec_get_n_bits(lock) <= heap_no) {
+
return(FALSE);
}
if (!impl) {
+ /* If the nth bit of the record lock is already set then we
+ do not set a new lock bit, otherwise we do set */
- /* If the nth bit of a record lock is already set then we
- do not set a new lock bit, otherwice we set */
-
- if (lock_rec_get_nth_bit(lock, heap_no)) {
- trx->trx_create_lock = FALSE;
- } else {
- trx->trx_create_lock = TRUE;
+ if (!lock_rec_get_nth_bit(lock, heap_no)) {
+ lock_rec_set_nth_bit(lock, heap_no);
+ if (srv_locks_unsafe_for_binlog) {
+ trx_register_new_rec_lock(trx, index);
+ }
}
-
- lock_rec_set_nth_bit(lock, heap_no);
}
return(TRUE);
@@ -2093,12 +2083,19 @@ lock_rec_lock_slow(
enough already granted on the record, we have to wait. */
err = lock_rec_enqueue_waiting(mode, rec, index, thr);
+
+ if (srv_locks_unsafe_for_binlog) {
+ trx_register_new_rec_lock(trx, index);
+ }
} else {
if (!impl) {
/* Set the requested lock on the record */
lock_rec_add_to_queue(LOCK_REC | mode, rec, index,
trx);
+ if (srv_locks_unsafe_for_binlog) {
+ trx_register_new_rec_lock(trx, index);
+ }
}
err = DB_SUCCESS;
@@ -2436,8 +2433,15 @@ lock_rec_inherit_to_gap(
lock = lock_rec_get_first(rec);
+ /* If srv_locks_unsafe_for_binlog is TRUE, we do not want locks set
+ by an UPDATE or a DELETE to be inherited as gap type locks. But we
+ DO want S-locks set by a consistency constraint to be inherited also
+ then. */
+
while (lock != NULL) {
- if (!lock_rec_get_insert_intention(lock)) {
+ if (!lock_rec_get_insert_intention(lock)
+ && !(srv_locks_unsafe_for_binlog
+ && lock_get_mode(lock) == LOCK_X)) {
lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock)
| LOCK_GAP,
@@ -3069,7 +3073,7 @@ lock_update_insert(
lock_rec_inherit_to_gap_if_gap_lock(rec, page_rec_get_next(rec));
lock_mutex_exit_kernel();
-}
+}
/*****************************************************************
Updates the lock table when a record is removed. */
diff --git a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
index 8d9780bfbda..42e854398ba 100644
--- a/storage/innobase/log/log0recv.c
+++ b/storage/innobase/log/log0recv.c
@@ -768,7 +768,8 @@ recv_parse_or_apply_log_rec_body(
case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
if (NULL != (ptr = mlog_parse_index(ptr, end_ptr,
type == MLOG_COMP_REC_INSERT, &index))) {
- ut_a(!page||!!page_is_comp(page)==index->table->comp);
+ ut_a(!page
+ || (ibool)!!page_is_comp(page)==index->table->comp);
ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
index, page, mtr);
}
@@ -776,7 +777,8 @@ recv_parse_or_apply_log_rec_body(
case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
if (NULL != (ptr = mlog_parse_index(ptr, end_ptr,
type == MLOG_COMP_REC_CLUST_DELETE_MARK, &index))) {
- ut_a(!page||!!page_is_comp(page)==index->table->comp);
+ ut_a(!page
+ || (ibool)!!page_is_comp(page)==index->table->comp);
ptr = btr_cur_parse_del_mark_set_clust_rec(ptr,
end_ptr, index, page);
}
@@ -796,7 +798,8 @@ recv_parse_or_apply_log_rec_body(
case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
if (NULL != (ptr = mlog_parse_index(ptr, end_ptr,
type == MLOG_COMP_REC_UPDATE_IN_PLACE, &index))) {
- ut_a(!page||!!page_is_comp(page)==index->table->comp);
+ ut_a(!page
+ || (ibool)!!page_is_comp(page)==index->table->comp);
ptr = btr_cur_parse_update_in_place(ptr, end_ptr,
page, index);
}
@@ -806,7 +809,8 @@ recv_parse_or_apply_log_rec_body(
if (NULL != (ptr = mlog_parse_index(ptr, end_ptr,
type == MLOG_COMP_LIST_END_DELETE
|| type == MLOG_COMP_LIST_START_DELETE, &index))) {
- ut_a(!page||!!page_is_comp(page)==index->table->comp);
+ ut_a(!page
+ || (ibool)!!page_is_comp(page)==index->table->comp);
ptr = page_parse_delete_rec_list(type, ptr, end_ptr,
index, page, mtr);
}
@@ -814,7 +818,8 @@ recv_parse_or_apply_log_rec_body(
case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
if (NULL != (ptr = mlog_parse_index(ptr, end_ptr,
type == MLOG_COMP_LIST_END_COPY_CREATED, &index))) {
- ut_a(!page||!!page_is_comp(page)==index->table->comp);
+ ut_a(!page
+ || (ibool)!!page_is_comp(page)==index->table->comp);
ptr = page_parse_copy_rec_list_to_created_page(ptr,
end_ptr, index, page, mtr);
}
@@ -822,7 +827,8 @@ recv_parse_or_apply_log_rec_body(
case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE:
if (NULL != (ptr = mlog_parse_index(ptr, end_ptr,
type == MLOG_COMP_PAGE_REORGANIZE, &index))) {
- ut_a(!page||!!page_is_comp(page)==index->table->comp);
+ ut_a(!page
+ || (ibool)!!page_is_comp(page)==index->table->comp);
ptr = btr_parse_page_reorganize(ptr, end_ptr, index,
page, mtr);
}
@@ -855,7 +861,8 @@ recv_parse_or_apply_log_rec_body(
case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
if (NULL != (ptr = mlog_parse_index(ptr, end_ptr,
type == MLOG_COMP_REC_DELETE, &index))) {
- ut_a(!page||!!page_is_comp(page)==index->table->comp);
+ ut_a(!page
+ || (ibool)!!page_is_comp(page)==index->table->comp);
ptr = page_cur_parse_delete_rec(ptr, end_ptr,
index, page, mtr);
}
diff --git a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
index c68f5738798..48dc808e36c 100644
--- a/storage/innobase/os/os0file.c
+++ b/storage/innobase/os/os0file.c
@@ -1653,7 +1653,7 @@ os_file_get_size_as_iblonglong(
}
/***************************************************************************
-Sets a file size. This function can be used to extend or truncate a file. */
+Write the specified number of zeros to a newly created file. */
ibool
os_file_set_size(
@@ -1666,44 +1666,46 @@ os_file_set_size(
size */
ulint size_high)/* in: most significant 32 bits of size */
{
- ib_longlong offset;
- ib_longlong low;
- ulint n_bytes;
+ ib_longlong current_size;
+ ib_longlong desired_size;
ibool ret;
byte* buf;
byte* buf2;
+ ulint buf_size;
ut_a(size == (size & 0xFFFFFFFF));
- /* We use a very big 8 MB buffer in writing because Linux may be
- extremely slow in fsync on 1 MB writes */
+ current_size = 0;
+ desired_size = (ib_longlong)size + (((ib_longlong)size_high) << 32);
- buf2 = ut_malloc(UNIV_PAGE_SIZE * 513);
+ /* Write up to 1 megabyte at a time. */
+ buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
+ * UNIV_PAGE_SIZE;
+ buf2 = ut_malloc(buf_size + UNIV_PAGE_SIZE);
/* Align the buffer for possible raw i/o */
buf = ut_align(buf2, UNIV_PAGE_SIZE);
/* Write buffer full of zeros */
- memset(buf, 0, UNIV_PAGE_SIZE * 512);
+ memset(buf, 0, buf_size);
- offset = 0;
- low = (ib_longlong)size + (((ib_longlong)size_high) << 32);
-
- if (low >= (ib_longlong)(100 * 1024 * 1024)) {
+ if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) {
fprintf(stderr, "InnoDB: Progress in MB:");
}
- while (offset < low) {
- if (low - offset < UNIV_PAGE_SIZE * 512) {
- n_bytes = (ulint)(low - offset);
- } else {
- n_bytes = UNIV_PAGE_SIZE * 512;
- }
-
+ while (current_size < desired_size) {
+ ulint n_bytes;
+
+ if (desired_size - current_size < (ib_longlong) buf_size) {
+ n_bytes = (ulint) (desired_size - current_size);
+ } else {
+ n_bytes = buf_size;
+ }
+
ret = os_file_write(name, file, buf,
- (ulint)(offset & 0xFFFFFFFF),
- (ulint)(offset >> 32),
+ (ulint)(current_size & 0xFFFFFFFF),
+ (ulint)(current_size >> 32),
n_bytes);
if (!ret) {
ut_free(buf2);
@@ -1711,18 +1713,18 @@ os_file_set_size(
}
/* Print about progress for each 100 MB written */
- if ((ib_longlong) (offset + n_bytes) / (ib_longlong)(100 * 1024 * 1024)
- != offset / (ib_longlong)(100 * 1024 * 1024)) {
+ if ((current_size + n_bytes) / (ib_longlong)(100 * 1024 * 1024)
+ != current_size / (ib_longlong)(100 * 1024 * 1024)) {
fprintf(stderr, " %lu00",
- (ulong) ((offset + n_bytes)
+ (ulong) ((current_size + n_bytes)
/ (ib_longlong)(100 * 1024 * 1024)));
}
- offset += n_bytes;
+ current_size += n_bytes;
}
- if (low >= (ib_longlong)(100 * 1024 * 1024)) {
+ if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) {
fprintf(stderr, "\n");
}
@@ -3296,7 +3298,7 @@ os_aio(
ibool retval;
BOOL ret = TRUE;
DWORD len = (DWORD) n;
- void* dummy_mess1;
+ struct fil_node_struct * dummy_mess1;
void* dummy_mess2;
ulint dummy_type;
#endif
diff --git a/storage/innobase/page/page0cur.c b/storage/innobase/page/page0cur.c
index df6d898d4ac..d0b89e81787 100644
--- a/storage/innobase/page/page0cur.c
+++ b/storage/innobase/page/page0cur.c
@@ -47,7 +47,6 @@ page_cur_try_search_shortcut(
not yet completely matched */
page_cur_t* cursor) /* out: page cursor */
{
- int cmp;
rec_t* rec;
rec_t* next_rec;
ulint low_match;
@@ -79,9 +78,8 @@ page_cur_try_search_shortcut(
up_match = low_match;
up_bytes = low_bytes;
- cmp = page_cmp_dtuple_rec_with_match(tuple, rec, offsets, &low_match,
- &low_bytes);
- if (cmp == -1) {
+ if (page_cmp_dtuple_rec_with_match(tuple, rec, offsets,
+ &low_match, &low_bytes) < 0) {
goto exit_func;
}
@@ -89,9 +87,8 @@ page_cur_try_search_shortcut(
offsets = rec_get_offsets(next_rec, index, offsets,
dtuple_get_n_fields(tuple), &heap);
- cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets,
- &up_match, &up_bytes);
- if (cmp != -1) {
+ if (page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets,
+ &up_match, &up_bytes) >= 0) {
goto exit_func;
}
@@ -115,7 +112,7 @@ page_cur_try_search_shortcut(
ut_a(*ilow_matched_fields == low_match);
ut_a(*ilow_matched_bytes == low_bytes);
#endif
- if (next_rec != page_get_supremum_rec(page)) {
+ if (!page_rec_is_supremum(next_rec)) {
*iup_matched_fields = up_match;
*iup_matched_bytes = up_bytes;
@@ -137,6 +134,7 @@ exit_func:
#endif
+#ifdef PAGE_CUR_LE_OR_EXTENDS
/********************************************************************
Checks if the nth field in a record is a character type field which extends
the nth field in tuple, i.e., the field is longer or equal in length and has
@@ -185,6 +183,7 @@ page_cur_rec_field_extends(
return(FALSE);
}
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
/********************************************************************
Searches the right position for a page cursor. */
@@ -239,10 +238,17 @@ page_cur_search_with_match(
&& ilow_matched_fields && ilow_matched_bytes && cursor);
ut_ad(dtuple_validate(tuple));
ut_ad(dtuple_check_typed(tuple));
+#ifdef UNIV_DEBUG
+# ifdef PAGE_CUR_DBG
+ if (mode != PAGE_CUR_DBG)
+# endif /* PAGE_CUR_DBG */
+# ifdef PAGE_CUR_LE_OR_EXTENDS
+ if (mode != PAGE_CUR_LE_OR_EXTENDS)
+# endif /* PAGE_CUR_LE_OR_EXTENDS */
ut_ad((mode == PAGE_CUR_L) || (mode == PAGE_CUR_LE)
- || (mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)
- || (mode == PAGE_CUR_LE_OR_EXTENDS) || (mode == PAGE_CUR_DBG));
-
+ || (mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE));
+#endif /* UNIV_DEBUG */
+
page_check_dir(page);
#ifdef PAGE_CUR_ADAPT
@@ -261,16 +267,18 @@ page_cur_search_with_match(
return;
}
}
-/*#ifdef UNIV_SEARCH_DEBUG */
+# ifdef PAGE_CUR_DBG
if (mode == PAGE_CUR_DBG) {
mode = PAGE_CUR_LE;
}
-/*#endif */
+# endif
#endif
/* The following flag does not work for non-latin1 char sets because
cmp_full_field does not tell how many bytes matched */
+#ifdef PAGE_CUR_LE_OR_EXTENDS
ut_a(mode != PAGE_CUR_LE_OR_EXTENDS);
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
/* If mode PAGE_CUR_G is specified, we are trying to position the
cursor to answer a query of the form "tuple < X", where tuple is
@@ -308,33 +316,36 @@ page_cur_search_with_match(
cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
&cur_matched_fields,
&cur_matched_bytes);
- if (cmp == 1) {
+ if (UNIV_LIKELY(cmp > 0)) {
+low_slot_match:
low = mid;
low_matched_fields = cur_matched_fields;
low_matched_bytes = cur_matched_bytes;
- } else if (cmp == -1) {
+ } else if (UNIV_LIKELY(cmp /* == -1 */)) {
+#ifdef PAGE_CUR_LE_OR_EXTENDS
if (mode == PAGE_CUR_LE_OR_EXTENDS
&& page_cur_rec_field_extends(tuple, mid_rec,
offsets, cur_matched_fields)) {
- low = mid;
- low_matched_fields = cur_matched_fields;
- low_matched_bytes = cur_matched_bytes;
- } else {
- up = mid;
- up_matched_fields = cur_matched_fields;
- up_matched_bytes = cur_matched_bytes;
- }
- } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
- || mode == PAGE_CUR_LE_OR_EXTENDS) {
- low = mid;
- low_matched_fields = cur_matched_fields;
- low_matched_bytes = cur_matched_bytes;
- } else {
+ goto low_slot_match;
+ }
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+up_slot_match:
up = mid;
up_matched_fields = cur_matched_fields;
up_matched_bytes = cur_matched_bytes;
+
+ } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+ || mode == PAGE_CUR_LE_OR_EXTENDS
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+ ) {
+
+ goto low_slot_match;
+ } else {
+
+ goto up_slot_match;
}
}
@@ -360,32 +371,35 @@ page_cur_search_with_match(
cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
&cur_matched_fields,
&cur_matched_bytes);
- if (cmp == 1) {
+ if (UNIV_LIKELY(cmp > 0)) {
+low_rec_match:
low_rec = mid_rec;
low_matched_fields = cur_matched_fields;
low_matched_bytes = cur_matched_bytes;
- } else if (cmp == -1) {
+ } else if (UNIV_LIKELY(cmp /* == -1 */)) {
+#ifdef PAGE_CUR_LE_OR_EXTENDS
if (mode == PAGE_CUR_LE_OR_EXTENDS
&& page_cur_rec_field_extends(tuple, mid_rec,
offsets, cur_matched_fields)) {
- low_rec = mid_rec;
- low_matched_fields = cur_matched_fields;
- low_matched_bytes = cur_matched_bytes;
- } else {
- up_rec = mid_rec;
- up_matched_fields = cur_matched_fields;
- up_matched_bytes = cur_matched_bytes;
+
+ goto low_rec_match;
}
- } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
- || mode == PAGE_CUR_LE_OR_EXTENDS) {
- low_rec = mid_rec;
- low_matched_fields = cur_matched_fields;
- low_matched_bytes = cur_matched_bytes;
- } else {
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+up_rec_match:
up_rec = mid_rec;
up_matched_fields = cur_matched_fields;
up_matched_bytes = cur_matched_bytes;
+ } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+ || mode == PAGE_CUR_LE_OR_EXTENDS
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+ ) {
+
+ goto low_rec_match;
+ } else {
+
+ goto up_rec_match;
}
}
diff --git a/storage/innobase/page/page0page.c b/storage/innobase/page/page0page.c
index 1fe7f1d9356..7e09cdf073e 100644
--- a/storage/innobase/page/page0page.c
+++ b/storage/innobase/page/page0page.c
@@ -483,7 +483,7 @@ page_copy_rec_list_end_no_locks(
page_cur_move_to_next(&cur1);
}
- ut_a(!!page_is_comp(new_page) == index->table->comp);
+ ut_a((ibool)!!page_is_comp(new_page) == index->table->comp);
ut_a(page_is_comp(new_page) == page_is_comp(page));
ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint)
(page_is_comp(new_page)
@@ -1347,7 +1347,7 @@ page_print_list(
ulint* offsets = offsets_;
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
- ut_a(!!page_is_comp(page) == index->table->comp);
+ ut_a((ibool)!!page_is_comp(page) == index->table->comp);
fprintf(stderr,
"--------------------------------\n"
@@ -1741,7 +1741,7 @@ page_validate(
ulint* offsets = NULL;
ulint* old_offsets = NULL;
- if (!!comp != index->table->comp) {
+ if ((ibool)!!comp != index->table->comp) {
fputs("InnoDB: 'compact format' flag mismatch\n", stderr);
goto func_exit2;
}
diff --git a/storage/innobase/rem/rem0rec.c b/storage/innobase/rem/rem0rec.c
index 580a7bfe509..fbc33aea669 100644
--- a/storage/innobase/rem/rem0rec.c
+++ b/storage/innobase/rem/rem0rec.c
@@ -601,30 +601,38 @@ rec_set_nth_field_extern_bit_new(
/* read the lengths of fields 0..n */
for (i = 0; i < n_fields; i++) {
- ibool is_null;
- ulint len;
field = dict_index_get_nth_field(index, i);
type = dict_col_get_type(dict_field_get_col(field));
- is_null = !(dtype_get_prtype(type) & DATA_NOT_NULL);
- if (is_null) {
- /* nullable field => read the null flag */
- is_null = !!(*nulls & null_mask);
+ if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) {
+ if (UNIV_UNLIKELY(!(byte) null_mask)) {
+ nulls--;
+ null_mask = 1;
+ }
+
+ if (*nulls & null_mask) {
+ null_mask <<= 1;
+ /* NULL fields cannot be external. */
+ ut_ad(i != ith);
+ continue;
+ }
+
null_mask <<= 1;
- if (null_mask == 0x100)
- nulls--, null_mask = 1;
}
- if (is_null || field->fixed_len) {
- /* No length (or extern bit) is stored for
- fields that are NULL or fixed-length. */
+ if (field->fixed_len) {
+ /* fixed-length fields cannot be external
+ (Fixed-length fields longer than
+ DICT_MAX_COL_PREFIX_LEN will be treated as
+ variable-length ones in dict_index_add_col().) */
ut_ad(i != ith);
continue;
}
- len = *lens--;
+ lens--;
if (dtype_get_len(type) > 255
|| dtype_get_mtype(type) == DATA_BLOB) {
+ ulint len = lens[1];
if (len & 0x80) { /* 1exxxxxx: 2-byte length */
if (i == ith) {
- if (!val == !(len & 0x20)) {
+ if (!val == !(len & 0x40)) {
return; /* no change */
}
/* toggle the extern bit */
@@ -823,6 +831,7 @@ rec_convert_dtuple_to_rec_new(
byte* lens;
ulint len;
ulint i;
+ ulint n_node_ptr_field;
ulint fixed_len;
ulint null_mask = 1;
const ulint n_fields = dtuple_get_n_fields(dtuple);
@@ -831,16 +840,26 @@ rec_convert_dtuple_to_rec_new(
ut_ad(index->table->comp);
ut_ad(n_fields > 0);
- switch (status) {
+
+ /* Try to ensure that the memset() between the for() loops
+ completes fast. The address is not exact, but UNIV_PREFETCH
+ should never generate a memory fault. */
+ UNIV_PREFETCH_RW(rec - REC_N_NEW_EXTRA_BYTES - n_fields);
+ UNIV_PREFETCH_RW(rec);
+
+ switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
case REC_STATUS_ORDINARY:
ut_ad(n_fields <= dict_index_get_n_fields(index));
+ n_node_ptr_field = ULINT_UNDEFINED;
break;
case REC_STATUS_NODE_PTR:
ut_ad(n_fields == dict_index_get_n_unique_in_tree(index) + 1);
+ n_node_ptr_field = n_fields - 1;
break;
case REC_STATUS_INFIMUM:
case REC_STATUS_SUPREMUM:
ut_ad(n_fields == 1);
+ n_node_ptr_field = ULINT_UNDEFINED;
goto init;
default:
ut_a(0);
@@ -852,15 +871,18 @@ rec_convert_dtuple_to_rec_new(
rec += (index->n_nullable + 7) / 8;
for (i = 0; i < n_fields; i++) {
+ if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
+#ifdef UNIV_DEBUG
+ field = dtuple_get_nth_field(dtuple, i);
+ type = dfield_get_type(field);
+ ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL);
+ ut_ad(dfield_get_len(field) == 4);
+#endif /* UNIV_DEBUG */
+ goto init;
+ }
field = dtuple_get_nth_field(dtuple, i);
type = dfield_get_type(field);
len = dfield_get_len(field);
- if (status == REC_STATUS_NODE_PTR && i == n_fields - 1) {
- fixed_len = 4;
- ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL);
- ut_ad(len == 4);
- continue;
- }
fixed_len = dict_index_get_nth_field(index, i)->fixed_len;
if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) {
@@ -902,27 +924,33 @@ init:
type = dfield_get_type(field);
len = dfield_get_len(field);
- if (status == REC_STATUS_NODE_PTR && i == n_fields - 1) {
- fixed_len = 4;
+ if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL);
ut_ad(len == 4);
- goto copy;
+ memcpy(end, dfield_get_data(field), len);
+ break;
}
fixed_len = dict_index_get_nth_field(index, i)->fixed_len;
if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) {
/* nullable field */
ut_ad(index->n_nullable > 0);
+
+ if (UNIV_UNLIKELY(!(byte) null_mask)) {
+ nulls--;
+ null_mask = 1;
+ }
+
ut_ad(*nulls < null_mask);
+
/* set the null flag if necessary */
if (len == UNIV_SQL_NULL) {
*nulls |= null_mask;
+ null_mask <<= 1;
+ continue;
}
+
null_mask <<= 1;
- if (null_mask == 0x100)
- nulls--, null_mask = 1;
- if (len == UNIV_SQL_NULL)
- continue;
}
/* only nullable fields can be null */
ut_ad(len != UNIV_SQL_NULL);
@@ -942,7 +970,7 @@ init:
*lens-- = (byte) len;
}
}
- copy:
+
memcpy(end, dfield_get_data(field), len);
end += len;
}
@@ -1105,7 +1133,6 @@ rec_copy_prefix_to_buf(
dtype_t* type;
ulint i;
ulint prefix_len;
- ibool is_null;
ulint null_mask;
ulint status;
@@ -1146,20 +1173,22 @@ rec_copy_prefix_to_buf(
for (i = 0; i < n_fields; i++) {
field = dict_index_get_nth_field(index, i);
type = dict_col_get_type(dict_field_get_col(field));
- is_null = !(dtype_get_prtype(type) & DATA_NOT_NULL);
- if (is_null) {
+ if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) {
/* nullable field => read the null flag */
- is_null = !!(*nulls & null_mask);
- null_mask <<= 1;
- if (null_mask == 0x100) {
- --nulls;
- UNIV_PREFETCH_R(nulls);
+ if (UNIV_UNLIKELY(!(byte) null_mask)) {
+ nulls--;
null_mask = 1;
}
+
+ if (*nulls & null_mask) {
+ null_mask <<= 1;
+ continue;
+ }
+
+ null_mask <<= 1;
}
- if (is_null) {
- } else if (field->fixed_len) {
+ if (field->fixed_len) {
prefix_len += field->fixed_len;
} else {
ulint len = *lens--;
diff --git a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c
index eb50b83a4d5..2ac0824b331 100644
--- a/storage/innobase/row/row0mysql.c
+++ b/storage/innobase/row/row0mysql.c
@@ -1429,51 +1429,106 @@ run_again:
}
/*************************************************************************
-Does an unlock of a row for MySQL. */
+This can only be used when srv_locks_unsafe_for_binlog is TRUE. Before
+calling this function we must use trx_reset_new_rec_lock_info() and
+trx_register_new_rec_lock() to store the information which new record locks
+really were set. This function removes a newly set lock under prebuilt->pcur,
+and also under prebuilt->clust_pcur. Currently, this is only used and tested
+in the case of an UPDATE or a DELETE statement, where the row lock is of the
+LOCK_X type.
+Thus, this implements a 'mini-rollback' that releases the latest record
+locks we set. */
int
row_unlock_for_mysql(
/*=================*/
/* out: error code or DB_SUCCESS */
- row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
+ row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL
handle */
+ ibool has_latches_on_recs)/* TRUE if called so that we have
+ the latches on the records under pcur
+ and clust_pcur, and we do not need to
+ reposition the cursors. */
{
- rec_t* rec;
- btr_pcur_t* cur = prebuilt->pcur;
+ dict_index_t* index;
+ btr_pcur_t* pcur = prebuilt->pcur;
+ btr_pcur_t* clust_pcur = prebuilt->clust_pcur;
trx_t* trx = prebuilt->trx;
+ rec_t* rec;
mtr_t mtr;
ut_ad(prebuilt && trx);
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
-
+
+ if (!srv_locks_unsafe_for_binlog) {
+
+ fprintf(stderr,
+"InnoDB: Error: calling row_unlock_for_mysql though\n"
+"InnoDB: srv_locks_unsafe_for_binlog is FALSE.\n");
+
+ return(DB_SUCCESS);
+ }
+
trx->op_info = "unlock_row";
-
- if (srv_locks_unsafe_for_binlog) {
- if (trx->trx_create_lock == TRUE) {
- mtr_start(&mtr);
+ index = btr_pcur_get_btr_cur(pcur)->index;
+
+ if (index != NULL && trx_new_rec_locks_contain(trx, index)) {
+
+ mtr_start(&mtr);
- /* Restore a cursor position and find a record */
- btr_pcur_restore_position(BTR_SEARCH_LEAF, cur, &mtr);
- rec = btr_pcur_get_rec(cur);
+ /* Restore the cursor position and find the record */
+
+ if (!has_latches_on_recs) {
+ btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, &mtr);
+ }
- if (rec) {
+ rec = btr_pcur_get_rec(pcur);
- lock_rec_reset_and_release_wait(rec);
- } else {
- fputs("InnoDB: Error: "
- "Record for the lock not found\n",
- stderr);
- mem_analyze_corruption((byte*) trx);
- ut_error;
- }
+ mutex_enter(&kernel_mutex);
- trx->trx_create_lock = FALSE;
- mtr_commit(&mtr);
+ lock_rec_reset_and_release_wait(rec);
+
+ mutex_exit(&kernel_mutex);
+
+ mtr_commit(&mtr);
+
+ /* If the search was done through the clustered index, then
+ we have not used clust_pcur at all, and we must NOT try to
+ reset locks on clust_pcur. The values in clust_pcur may be
+ garbage! */
+
+ if (index->type & DICT_CLUSTERED) {
+
+ goto func_exit;
}
-
+ }
+
+ index = btr_pcur_get_btr_cur(clust_pcur)->index;
+
+ if (index != NULL && trx_new_rec_locks_contain(trx, index)) {
+
+ mtr_start(&mtr);
+
+ /* Restore the cursor position and find the record */
+
+ if (!has_latches_on_recs) {
+ btr_pcur_restore_position(BTR_SEARCH_LEAF, clust_pcur,
+ &mtr);
+ }
+
+ rec = btr_pcur_get_rec(clust_pcur);
+
+ mutex_enter(&kernel_mutex);
+
+ lock_rec_reset_and_release_wait(rec);
+
+ mutex_exit(&kernel_mutex);
+
+ mtr_commit(&mtr);
}
+func_exit:
trx->op_info = "";
return(DB_SUCCESS);
diff --git a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c
index c7a548fe448..15439bed7e7 100644
--- a/storage/innobase/row/row0sel.c
+++ b/storage/innobase/row/row0sel.c
@@ -2784,6 +2784,10 @@ sel_restore_position_for_mysql(
process the record the cursor is
now positioned on (i.e. we should
not go to the next record yet) */
+ ibool* same_user_rec, /* out: TRUE if we were able to restore
+ the cursor on a user record with the
+ same ordering prefix in in the
+ B-tree index */
ulint latch_mode, /* in: latch mode wished in
restoration */
btr_pcur_t* pcur, /* in: cursor whose position
@@ -2800,6 +2804,8 @@ sel_restore_position_for_mysql(
success = btr_pcur_restore_position(latch_mode, pcur, mtr);
+ *same_user_rec = success;
+
if (relative_position == BTR_PCUR_ON) {
if (success) {
return(FALSE);
@@ -3064,10 +3070,12 @@ row_search_for_mysql(
ulint cnt = 0;
#endif /* UNIV_SEARCH_DEBUG */
ulint next_offs;
+ ibool same_user_rec;
mtr_t mtr;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
+
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
ut_ad(index && pcur && search_tuple);
@@ -3138,6 +3146,16 @@ row_search_for_mysql(
trx->search_latch_timeout = BTR_SEA_TIMEOUT;
}
+ /* Reset the new record lock info if we srv_locks_unsafe_for_binlog
+ is set. Then we are able to remove the record locks set here on an
+ individual row. */
+
+ if (srv_locks_unsafe_for_binlog
+ && prebuilt->select_lock_type != LOCK_NONE) {
+
+ trx_reset_new_rec_lock_info(trx);
+ }
+
/*-------------------------------------------------------------*/
/* PHASE 1: Try to pop the row from the prefetch cache */
@@ -3396,8 +3414,9 @@ shortcut_fails_too_big_rec:
clust_index = dict_table_get_first_index(index->table);
if (UNIV_LIKELY(direction != 0)) {
- if (!sel_restore_position_for_mysql(BTR_SEARCH_LEAF, pcur,
- moves_up, &mtr)) {
+ if (!sel_restore_position_for_mysql(&same_user_rec,
+ BTR_SEARCH_LEAF,
+ pcur, moves_up, &mtr)) {
goto next_rec;
}
@@ -3659,7 +3678,7 @@ rec_loop:
goto normal_return;
}
}
-
+
/* We are ready to look at a possible new index entry in the result
set: the cursor is now placed on a user record */
@@ -3679,6 +3698,7 @@ rec_loop:
|| srv_locks_unsafe_for_binlog
|| (unique_search && !UNIV_UNLIKELY(rec_get_deleted_flag(
rec, page_rec_is_comp(rec))))) {
+
goto no_gap_lock;
} else {
lock_type = LOCK_ORDINARY;
@@ -3701,7 +3721,7 @@ rec_loop:
&& dtuple_get_n_fields_cmp(search_tuple)
== dict_index_get_n_unique(index)
&& 0 == cmp_dtuple_rec(search_tuple, rec, offsets)) {
- no_gap_lock:
+no_gap_lock:
lock_type = LOCK_REC_NOT_GAP;
}
@@ -3764,6 +3784,7 @@ rec_loop:
/* Get the clustered index record if needed */
index_rec = rec;
ut_ad(index != clust_index);
+
goto requires_clust_rec;
}
}
@@ -3773,6 +3794,17 @@ rec_loop:
/* The record is delete-marked: we can skip it if this is
not a consistent read which might see an earlier version
of a non-clustered index record */
+
+ if (srv_locks_unsafe_for_binlog
+ && prebuilt->select_lock_type != LOCK_NONE) {
+
+ /* No need to keep a lock on a delete-marked record
+ if we do not want to use next-key locking. */
+
+ row_unlock_for_mysql(prebuilt, TRUE);
+
+ trx_reset_new_rec_lock_info(trx);
+ }
goto next_rec;
}
@@ -3783,7 +3815,8 @@ rec_loop:
index_rec = rec;
if (index != clust_index && prebuilt->need_to_access_clustered) {
- requires_clust_rec:
+
+requires_clust_rec:
/* Before and after this "if" block, "offsets" will be
related to "rec", which may be in a secondary index "index" or
the clustered index ("clust_index"). However, after this
@@ -3816,6 +3849,18 @@ rec_loop:
/* The record is delete marked: we can skip it */
+ if (srv_locks_unsafe_for_binlog
+ && prebuilt->select_lock_type != LOCK_NONE) {
+
+ /* No need to keep a lock on a delete-marked
+ record if we do not want to use next-key
+ locking. */
+
+ row_unlock_for_mysql(prebuilt, TRUE);
+
+ trx_reset_new_rec_lock_info(trx);
+ }
+
goto next_rec;
}
@@ -3908,7 +3953,7 @@ got_row:
next_rec:
/*-------------------------------------------------------------*/
/* PHASE 5: Move the cursor to the next index record */
-
+
if (UNIV_UNLIKELY(mtr_has_extra_clust_latch)) {
/* We must commit mtr if we are moving to the next
non-clustered index record, because we could break the
@@ -3921,8 +3966,9 @@ next_rec:
mtr_has_extra_clust_latch = FALSE;
mtr_start(&mtr);
- if (sel_restore_position_for_mysql(BTR_SEARCH_LEAF, pcur,
- moves_up, &mtr)) {
+ if (sel_restore_position_for_mysql(&same_user_rec,
+ BTR_SEARCH_LEAF,
+ pcur, moves_up, &mtr)) {
#ifdef UNIV_SEARCH_DEBUG
cnt++;
#endif /* UNIV_SEARCH_DEBUG */
@@ -3973,11 +4019,34 @@ lock_wait_or_error:
thr->lock_state = QUE_THR_LOCK_ROW;
if (row_mysql_handle_errors(&err, trx, thr, NULL)) {
+ /* It was a lock wait, and it ended */
+
thr->lock_state = QUE_THR_LOCK_NOLOCK;
mtr_start(&mtr);
- sel_restore_position_for_mysql(BTR_SEARCH_LEAF, pcur,
- moves_up, &mtr);
+ sel_restore_position_for_mysql(&same_user_rec,
+ BTR_SEARCH_LEAF, pcur,
+ moves_up, &mtr);
+ if (srv_locks_unsafe_for_binlog && !same_user_rec) {
+ /* Since we were not able to restore the cursor
+ on the same user record, we cannot use
+ row_unlock_for_mysql() to unlock any records, and
+ we must thus reset the new rec lock info. Since
+ in lock0lock.c we have blocked the inheriting of gap
+ X-locks, we actually do not have any new record locks
+ set in this case.
+
+ Note that if we were able to restore on the 'same'
+ user record, it is still possible that we were actually
+ waiting on a delete-marked record, and meanwhile
+ it was removed by purge and inserted again by some
+ other user. But that is no problem, because in
+ rec_loop we will again try to set a lock, and
+ new_rec_lock_info in trx will be right at the end. */
+
+ trx_reset_new_rec_lock_info(trx);
+ }
+
mode = pcur->search_mode;
goto rec_loop;
diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c
index cf2b8db5d32..514fb6bd577 100644
--- a/storage/innobase/row/row0upd.c
+++ b/storage/innobase/row/row0upd.c
@@ -818,7 +818,7 @@ row_upd_build_difference_binary(
extern_bit = upd_ext_vec_contains(ext_vec, n_ext_vec, i);
if (UNIV_UNLIKELY(extern_bit ==
- !rec_offs_nth_extern(offsets, i))
+ (ibool)!rec_offs_nth_extern(offsets, i))
|| !dfield_data_is_binary_equal(dfield, len, data)) {
upd_field = upd_get_nth_field(update, n_diff);
diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
index f901425a5f9..837c5be2bb6 100644
--- a/storage/innobase/srv/srv0srv.c
+++ b/storage/innobase/srv/srv0srv.c
@@ -260,7 +260,7 @@ semaphore contention and convoy problems can occur withput this restriction.
Value 10 should be good if there are less than 4 processors + 4 disks in the
computer. Bigger computers need bigger values. */
-ulong srv_thread_concurrency = 8;
+ulong srv_thread_concurrency = SRV_CONCURRENCY_THRESHOLD;
os_fast_mutex_t srv_conc_mutex; /* this mutex protects srv_conc data
structures */
@@ -983,12 +983,6 @@ srv_conc_enter_innodb(
srv_conc_slot_t* slot = NULL;
ulint i;
- if (srv_thread_concurrency >= 500) {
- /* Disable the concurrency check */
-
- return;
- }
-
/* If trx has 'free tickets' to enter the engine left, then use one
such ticket */
@@ -1134,7 +1128,7 @@ srv_conc_force_enter_innodb(
trx_t* trx) /* in: transaction object associated with the
thread */
{
- if (srv_thread_concurrency >= 500) {
+ if (srv_thread_concurrency >= SRV_CONCURRENCY_THRESHOLD) {
return;
}
@@ -1160,7 +1154,7 @@ srv_conc_force_exit_innodb(
{
srv_conc_slot_t* slot = NULL;
- if (srv_thread_concurrency >= 500) {
+ if (srv_thread_concurrency >= SRV_CONCURRENCY_THRESHOLD) {
return;
}
@@ -1212,11 +1206,6 @@ srv_conc_exit_innodb(
trx_t* trx) /* in: transaction object associated with the
thread */
{
- if (srv_thread_concurrency >= 500) {
-
- return;
- }
-
if (trx->n_tickets_to_enter_innodb > 0) {
/* We will pretend the thread is still inside InnoDB though it
now leaves the InnoDB engine. In this way we save
diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
index 9e155ee1de0..10fbf3468c0 100644
--- a/storage/innobase/trx/trx0trx.c
+++ b/storage/innobase/trx/trx0trx.c
@@ -166,6 +166,8 @@ trx_create(
memset(&trx->xid, 0, sizeof(trx->xid));
trx->xid.formatID = -1;
+ trx_reset_new_rec_lock_info(trx);
+
return(trx);
}
diff --git a/storage/innobase/trx/trx0undo.c b/storage/innobase/trx/trx0undo.c
index c14e4a1f3ab..7441dd3f152 100644
--- a/storage/innobase/trx/trx0undo.c
+++ b/storage/innobase/trx/trx0undo.c
@@ -559,14 +559,14 @@ trx_undo_write_xid(
const XID* xid, /* in: X/Open XA Transaction Identification */
mtr_t* mtr) /* in: mtr */
{
- mlog_write_ulint(log_hdr + TRX_UNDO_XA_FORMAT, xid->formatID,
- MLOG_4BYTES, mtr);
+ mlog_write_ulint(log_hdr + TRX_UNDO_XA_FORMAT,
+ (ulint)xid->formatID, MLOG_4BYTES, mtr);
- mlog_write_ulint(log_hdr + TRX_UNDO_XA_TRID_LEN, xid->gtrid_length,
- MLOG_4BYTES, mtr);
+ mlog_write_ulint(log_hdr + TRX_UNDO_XA_TRID_LEN,
+ (ulint)xid->gtrid_length, MLOG_4BYTES, mtr);
- mlog_write_ulint(log_hdr + TRX_UNDO_XA_BQUAL_LEN, xid->bqual_length,
- MLOG_4BYTES, mtr);
+ mlog_write_ulint(log_hdr + TRX_UNDO_XA_BQUAL_LEN,
+ (ulint)xid->bqual_length, MLOG_4BYTES, mtr);
mlog_write_string(log_hdr + TRX_UNDO_XA_XID, (const byte*) xid->data,
XIDDATASIZE, mtr);
@@ -581,18 +581,14 @@ trx_undo_read_xid(
trx_ulogf_t* log_hdr,/* in: undo log header */
XID* xid) /* out: X/Open XA Transaction Identification */
{
- ulint i;
-
- xid->formatID = mach_read_from_4(log_hdr + TRX_UNDO_XA_FORMAT);
+ xid->formatID = (long)mach_read_from_4(log_hdr + TRX_UNDO_XA_FORMAT);
- xid->gtrid_length = mach_read_from_4(log_hdr + TRX_UNDO_XA_TRID_LEN);
-
- xid->bqual_length = mach_read_from_4(log_hdr + TRX_UNDO_XA_BQUAL_LEN);
+ xid->gtrid_length =
+ (long)mach_read_from_4(log_hdr + TRX_UNDO_XA_TRID_LEN);
+ xid->bqual_length =
+ (long)mach_read_from_4(log_hdr + TRX_UNDO_XA_BQUAL_LEN);
- for (i = 0; i < XIDDATASIZE; i++) {
- xid->data[i] = (char)mach_read_from_1(log_hdr +
- TRX_UNDO_XA_XID + i);
- }
+ memcpy(xid->data, log_hdr + TRX_UNDO_XA_XID, XIDDATASIZE);
}
/*******************************************************************
diff --git a/storage/myisam/Makefile.am b/storage/myisam/Makefile.am
index c61647b25f7..e4327070997 100644
--- a/storage/myisam/Makefile.am
+++ b/storage/myisam/Makefile.am
@@ -17,7 +17,7 @@
EXTRA_DIST = mi_test_all.sh mi_test_all.res
pkgdata_DATA = mi_test_all mi_test_all.res
-INCLUDES = -I$(top_srcdir)/include
+INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include
LDADD = @CLIENT_EXTRA_LDFLAGS@ libmyisam.a \
$(top_builddir)/mysys/libmysys.a \
$(top_builddir)/dbug/libdbug.a \
diff --git a/storage/myisam/mi_create.c b/storage/myisam/mi_create.c
index 47332b02a72..33b344405ec 100644
--- a/storage/myisam/mi_create.c
+++ b/storage/myisam/mi_create.c
@@ -770,10 +770,13 @@ err:
uint mi_get_pointer_length(ulonglong file_length, uint def)
{
+ DBUG_ASSERT(def >= 2 && def <= 7);
if (file_length) /* If not default */
{
+#ifdef NOT_YET_READY_FOR_8_BYTE_POINTERS
if (file_length >= (longlong) 1 << 56)
def=8;
+#endif
if (file_length >= (longlong) 1 << 48)
def=7;
if (file_length >= (longlong) 1 << 40)
diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c
index 456b87b7799..74b97a65609 100644
--- a/storage/myisam/mi_open.c
+++ b/storage/myisam/mi_open.c
@@ -78,7 +78,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
int lock_error,kfile,open_mode,save_errno,have_rtree=0;
uint i,j,len,errpos,head_length,base_pos,offset,info_length,keys,
key_parts,unique_key_parts,fulltext_keys,uniques;
- char name_buff[FN_REFLEN], org_name [FN_REFLEN], index_name[FN_REFLEN],
+ char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN],
data_name[FN_REFLEN];
char *disk_cache, *disk_pos, *end_pos;
MI_INFO info,*m_info,*old_info;
diff --git a/storage/myisam/mi_packrec.c b/storage/myisam/mi_packrec.c
index 4b512dd89dd..c251e4dda4a 100644
--- a/storage/myisam/mi_packrec.c
+++ b/storage/myisam/mi_packrec.c
@@ -416,8 +416,19 @@ static uint find_longest_bitstream(uint16 *table, uint16 *end)
}
- /* Read record from datafile */
- /* Returns length of packed record, -1 if error */
+/*
+ Read record from datafile.
+
+ SYNOPSIS
+ _mi_read_pack_record()
+ info A pointer to MI_INFO.
+ filepos File offset of the record.
+ buf RETURN The buffer to receive the record.
+
+ RETURN
+ 0 on success
+ HA_ERR_WRONG_IN_RECORD or -1 on error
+*/
int _mi_read_pack_record(MI_INFO *info, my_off_t filepos, byte *buf)
{
diff --git a/storage/myisam/mi_unique.c b/storage/myisam/mi_unique.c
index f2d5f01be25..34f5f595f30 100644
--- a/storage/myisam/mi_unique.c
+++ b/storage/myisam/mi_unique.c
@@ -64,7 +64,12 @@ my_bool mi_check_unique(MI_INFO *info, MI_UNIQUEDEF *def, byte *record,
}
-/* Calculate a hash for a row */
+/*
+ Calculate a hash for a row
+
+ TODO
+ Add support for bit fields
+*/
ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const byte *record)
{
@@ -126,9 +131,17 @@ ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const byte *record)
return crc;
}
- /*
- Returns 0 if both rows have equal unique value
- */
+
+/*
+ compare unique key for two rows
+
+ TODO
+ Add support for bit fields
+
+ RETURN
+ 0 if both rows have equal unique value
+ # Rows are different
+*/
int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b,
my_bool null_are_equal)
diff --git a/storage/myisam/myisampack.c b/storage/myisam/myisampack.c
index 74bb541b220..daf2bbdf85c 100644
--- a/storage/myisam/myisampack.c
+++ b/storage/myisam/myisampack.c
@@ -33,10 +33,10 @@
#include <my_getopt.h>
#include <assert.h>
-#if INT_MAX > 32767
-#define BITS_SAVED 32
+#if SIZEOF_LONG_LONG > 4
+#define BITS_SAVED 64
#else
-#define BITS_SAVED 16
+#define BITS_SAVED 32
#endif
#define IS_OFFSET ((uint) 32768) /* Bit if offset or char in tree */
@@ -49,10 +49,10 @@
struct st_file_buffer {
File file;
- char *buffer,*pos,*end;
+ uchar *buffer,*pos,*end;
my_off_t pos_in_file;
int bits;
- uint current_byte;
+ ulonglong bitbucket;
};
struct st_huff_tree;
@@ -69,13 +69,17 @@ typedef struct st_huff_counts {
my_off_t end_space[8];
my_off_t pre_space[8];
my_off_t tot_end_space,tot_pre_space,zero_fields,empty_fields,bytes_packed;
- TREE int_tree;
- byte *tree_buff;
- byte *tree_pos;
+ TREE int_tree; /* Tree for detecting distinct column values. */
+ byte *tree_buff; /* Column values, 'field_length' each. */
+ byte *tree_pos; /* Points to end of column values in 'tree_buff'. */
} HUFF_COUNTS;
typedef struct st_huff_element HUFF_ELEMENT;
+/*
+ WARNING: It is crucial for the optimizations in calc_packed_length()
+ that 'count' is the first element of 'HUFF_ELEMENT'.
+*/
struct st_huff_element {
my_off_t count;
union un_element {
@@ -98,7 +102,7 @@ typedef struct st_huff_tree {
my_off_t bytes_packed;
uint tree_pack_length;
uint min_chr,max_chr,char_bits,offset_bits,max_offset,height;
- ulong *code;
+ ulonglong *code;
uchar *code_len;
} HUFF_TREE;
@@ -146,7 +150,7 @@ static uint join_same_trees(HUFF_COUNTS *huff_counts,uint trees);
static int make_huff_decode_table(HUFF_TREE *huff_tree,uint trees);
static void make_traverse_code_tree(HUFF_TREE *huff_tree,
HUFF_ELEMENT *element,uint size,
- ulong code);
+ ulonglong code);
static int write_header(PACK_MRG_INFO *isam_file, uint header_length,uint trees,
my_off_t tot_elements,my_off_t filelength);
static void write_field_info(HUFF_COUNTS *counts, uint fields,uint trees);
@@ -161,7 +165,7 @@ static char *make_old_name(char *new_name,char *old_name);
static void init_file_buffer(File file,pbool read_buffer);
static int flush_buffer(ulong neaded_length);
static void end_file_buffer(void);
-static void write_bits(ulong value,uint bits);
+static void write_bits(ulonglong value, uint bits);
static void flush_bits(void);
static int save_state(MI_INFO *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length,
ha_checksum crc);
@@ -170,13 +174,23 @@ static int save_state_mrg(File file,PACK_MRG_INFO *isam_file,my_off_t new_length
static int mrg_close(PACK_MRG_INFO *mrg);
static int mrg_rrnd(PACK_MRG_INFO *info,byte *buf);
static void mrg_reset(PACK_MRG_INFO *mrg);
+#if !defined(DBUG_OFF)
+static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count);
+static int fakecmp(my_off_t **count1, my_off_t **count2);
+#endif
static int error_on_write=0,test_only=0,verbose=0,silent=0,
write_loop=0,force_pack=0, isamchk_neaded=0;
static int tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL;
static my_bool backup, opt_wait;
-static uint tree_buff_length=8196-MALLOC_OVERHEAD;
+/*
+ tree_buff_length is somewhat arbitrary. The bigger it is the better
+ the chance to win in terms of compression factor. On the other hand,
+ this table becomes part of the compressed file header. And its length
+ is coded with 16 bits in the header. Hence the limit is 2**16 - 1.
+*/
+static uint tree_buff_length= 65536 - MALLOC_OVERHEAD;
static char tmp_dir[FN_REFLEN]={0},*join_table;
static my_off_t intervall_length;
static ha_checksum glob_crc;
@@ -225,7 +239,8 @@ int main(int argc, char **argv)
}
if (ok && isamchk_neaded && !silent)
puts("Remember to run myisamchk -rq on compressed tables");
- VOID(fflush(stdout)); VOID(fflush(stderr));
+ VOID(fflush(stdout));
+ VOID(fflush(stderr));
free_defaults(default_argv);
my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
exit(error ? 2 : 0);
@@ -260,7 +275,7 @@ static struct my_option my_long_options[] =
0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"test", 't', "Don't pack table, only test packing it.",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
- {"verbose", 'v', "Write info about progress and packing result.",
+ {"verbose", 'v', "Write info about progress and packing result. Use many -v for more verbosity!",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"version", 'V', "Output version information and exit.",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
@@ -273,7 +288,8 @@ static struct my_option my_long_options[] =
static void print_version(void)
{
- printf("%s Ver 1.22 for %s on %s\n", my_progname, SYSTEM_TYPE, MACHINE_TYPE);
+ VOID(printf("%s Ver 1.23 for %s on %s\n",
+ my_progname, SYSTEM_TYPE, MACHINE_TYPE));
NETWARE_SET_SCREEN_MODE(1);
}
@@ -290,7 +306,7 @@ static void usage(void)
puts("afterwards to update the keys.");
puts("You should give the .MYI file as the filename argument.");
- printf("\nUsage: %s [OPTIONS] filename...\n", my_progname);
+ VOID(printf("\nUsage: %s [OPTIONS] filename...\n", my_progname));
my_print_help(my_long_options);
print_defaults("my", load_default_groups);
my_print_variables(my_long_options);
@@ -314,7 +330,10 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
silent= 1;
break;
case 't':
- test_only= verbose= 1;
+ test_only= 1;
+ /* Avoid to reset 'verbose' if it was already set > 1. */
+ if (! verbose)
+ verbose= 1;
break;
case 'T':
length= (uint) (strmov(tmp_dir, argument) - tmp_dir);
@@ -325,7 +344,7 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
}
break;
case 'v':
- verbose= 1;
+ verbose++; /* Allow for selecting the level of verbosity. */
silent= 0;
break;
case '#':
@@ -380,7 +399,7 @@ static MI_INFO *open_isam_file(char *name,int mode)
(opt_wait ? HA_OPEN_WAIT_IF_LOCKED :
HA_OPEN_ABORT_IF_LOCKED))))
{
- VOID(fprintf(stderr,"%s gave error %d on open\n",name,my_errno));
+ VOID(fprintf(stderr, "%s gave error %d on open\n", name, my_errno));
DBUG_RETURN(0);
}
share=isam_file->s;
@@ -388,7 +407,7 @@ static MI_INFO *open_isam_file(char *name,int mode)
{
if (!force_pack)
{
- VOID(fprintf(stderr,"%s is already compressed\n",name));
+ VOID(fprintf(stderr, "%s is already compressed\n", name));
VOID(mi_close(isam_file));
DBUG_RETURN(0);
}
@@ -400,7 +419,7 @@ static MI_INFO *open_isam_file(char *name,int mode)
(share->state.state.records <= 1 ||
share->state.state.data_file_length < 1024))
{
- VOID(fprintf(stderr,"%s is too small to compress\n",name));
+ VOID(fprintf(stderr, "%s is too small to compress\n", name));
VOID(mi_close(isam_file));
DBUG_RETURN(0);
}
@@ -446,8 +465,8 @@ static bool open_isam_files(PACK_MRG_INFO *mrg,char **names,uint count)
return 0;
diff_file:
- fprintf(stderr,"%s: Tables '%s' and '%s' are not identical\n",
- my_progname,names[j],names[j+1]);
+ VOID(fprintf(stderr, "%s: Tables '%s' and '%s' are not identical\n",
+ my_progname, names[j], names[j+1]));
error:
while (i--)
mi_close(mrg->file[i]);
@@ -518,16 +537,25 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table)
mrg->records=0;
for (i=0 ; i < mrg->count ; i++)
mrg->records+=mrg->file[i]->s->state.state.records;
+
+ DBUG_PRINT("info", ("Compressing %s: (%lu records)",
+ result_table ? new_name : org_name,
+ (ulong) mrg->records));
if (write_loop || verbose)
{
- printf("Compressing %s: (%lu records)\n",
- result_table ? new_name : org_name,(ulong) mrg->records);
+ VOID(printf("Compressing %s: (%lu records)\n",
+ result_table ? new_name : org_name, (ulong) mrg->records));
}
trees=fields=share->base.fields;
huff_counts=init_huff_count(isam_file,mrg->records);
QUICK_SAFEMALLOC;
+
+ /*
+ Read the whole data file(s) for statistics.
+ */
+ DBUG_PRINT("info", ("- Calculating statistics"));
if (write_loop || verbose)
- printf("- Calculating statistics\n");
+ VOID(printf("- Calculating statistics\n"));
if (get_statistic(mrg,huff_counts))
goto err;
NORMAL_SAFEMALLOC;
@@ -536,29 +564,74 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table)
old_length+= (mrg->file[i]->s->state.state.data_file_length -
mrg->file[i]->s->state.state.empty);
+ /*
+ Create a global priority queue in preparation for making
+ temporary Huffman trees.
+ */
if (init_queue(&queue,256,0,0,compare_huff_elements,0))
goto err;
+
+ /*
+ Check each column if we should use pre-space-compress, end-space-
+ compress, empty-field-compress or zero-field-compress.
+ */
check_counts(huff_counts,fields,mrg->records);
+
+ /*
+ Build a Huffman tree for each column.
+ */
huff_trees=make_huff_trees(huff_counts,trees);
+
+ /*
+ If the packed lengths of combined columns is less then the sum of
+ the non-combined columns, then create common Huffman trees for them.
+ We do this only for byte compressed columns, not for distinct values
+ compressed columns.
+ */
if ((int) (used_trees=join_same_trees(huff_counts,trees)) < 0)
goto err;
+
+ /*
+ Assign codes to all byte or column values.
+ */
if (make_huff_decode_table(huff_trees,fields))
goto err;
+ /* Prepare a file buffer. */
init_file_buffer(new_file,0);
+
+ /*
+ Reserve space in the target file for the fixed compressed file header.
+ */
file_buffer.pos_in_file=HEAD_LENGTH;
if (! test_only)
VOID(my_seek(new_file,file_buffer.pos_in_file,MY_SEEK_SET,MYF(0)));
+ /*
+ Write field infos: field type, pack type, length bits, tree number.
+ */
write_field_info(huff_counts,fields,used_trees);
+
+ /*
+ Write decode trees.
+ */
if (!(tot_elements=write_huff_tree(huff_trees,trees)))
goto err;
+
+ /*
+ Calculate the total length of the compression info header.
+ This includes the fixed compressed file header, the column compression
+ type descriptions, and the decode trees.
+ */
header_length=(uint) file_buffer.pos_in_file+
(uint) (file_buffer.pos-file_buffer.buffer);
- /* Compress file */
+ /*
+ Compress the source file into the target file.
+ */
+ DBUG_PRINT("info", ("- Compressing file"));
if (write_loop || verbose)
- printf("- Compressing file\n");
+ VOID(printf("- Compressing file\n"));
error=compress_isam_file(mrg,huff_counts);
new_length=file_buffer.pos_in_file;
if (!error && !test_only)
@@ -568,16 +641,28 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table)
error=my_write(file_buffer.file,buff,sizeof(buff),
MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0;
}
+
+ /*
+ Write the fixed compressed file header.
+ */
if (!error)
error=write_header(mrg,header_length,used_trees,tot_elements,
new_length);
+
+ /* Flush the file buffer. */
end_file_buffer();
+ /* Display statistics. */
+ DBUG_PRINT("info", ("Min record length: %6d Max length: %6d "
+ "Mean total length: %6ld\n",
+ mrg->min_pack_length, mrg->max_pack_length,
+ (ulong) (mrg->records ? (new_length/mrg->records) : 0)));
if (verbose && mrg->records)
- printf("Min record length: %6d Max length: %6d Mean total length: %6ld\n",
- mrg->min_pack_length,mrg->max_pack_length,
- (ulong) (new_length/mrg->records));
+ VOID(printf("Min record length: %6d Max length: %6d "
+ "Mean total length: %6ld\n", mrg->min_pack_length,
+ mrg->max_pack_length, (ulong) (new_length/mrg->records)));
+ /* Close source and target file. */
if (!test_only)
{
error|=my_close(new_file,MYF(MY_WME));
@@ -588,6 +673,7 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table)
}
}
+ /* Cleanup. */
free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields);
if (! test_only && ! error)
{
@@ -629,15 +715,16 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table)
error|=my_close(join_isam_file,MYF(MY_WME));
if (error)
{
- VOID(fprintf(stderr,"Aborting: %s is not compressed\n",org_name));
+ VOID(fprintf(stderr, "Aborting: %s is not compressed\n", org_name));
VOID(my_delete(new_name,MYF(MY_WME)));
DBUG_RETURN(-1);
}
if (write_loop || verbose)
{
if (old_length)
- printf("%.4g%% \n", (((longlong) (old_length -new_length))*100.0/
- (longlong) old_length));
+ VOID(printf("%.4g%% \n",
+ (((longlong) (old_length - new_length)) * 100.0 /
+ (longlong) old_length)));
else
puts("Empty file saved in compressed format");
}
@@ -650,7 +737,7 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table)
if (join_isam_file >= 0)
VOID(my_close(join_isam_file,MYF(0)));
mrg_close(mrg);
- VOID(fprintf(stderr,"Aborted: %s is not compressed\n",org_name));
+ VOID(fprintf(stderr, "Aborted: %s is not compressed\n", org_name));
DBUG_RETURN(-1);
}
@@ -677,6 +764,12 @@ static HUFF_COUNTS *init_huff_count(MI_INFO *info,my_off_t records)
(type == FIELD_NORMAL ||
type == FIELD_SKIP_ZERO))
count[i].max_zero_fill= count[i].field_length;
+ /*
+ For every column initialize a tree, which is used to detect distinct
+ column values. 'int_tree' works together with 'tree_buff' and
+ 'tree_pos'. It's keys are implemented by pointers into 'tree_buff'.
+ This is accomplished by '-1' as the element size.
+ */
init_tree(&count[i].int_tree,0,0,-1,(qsort_cmp2) compare_tree,0, NULL,
NULL);
if (records && type != FIELD_BLOB && type != FIELD_VARCHAR)
@@ -762,10 +855,13 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
ulong tot_blob_length=0;
if (! error)
{
+ /* glob_crc is a checksum over all bytes of all records. */
if (static_row_size)
glob_crc+=mi_static_checksum(mrg->file[0],record);
else
glob_crc+=mi_checksum(mrg->file[0],record);
+
+ /* Count the incidence of values separately for every column. */
for (pos=record,count=huff_counts ;
count < end_count ;
count++,
@@ -773,15 +869,48 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
{
next_pos=end_pos=(start_pos=pos)+count->field_length;
- /* Put value in tree if there is room for it */
+ /*
+ Put the whole column value in a tree if there is room for it.
+ 'int_tree' is used to quickly check for duplicate values.
+ 'tree_buff' collects as many distinct column values as
+ possible. If the field length is > 1, it is tree_buff_length,
+ else 2 bytes. Each value is 'field_length' bytes big. If there
+ are more distinct column values than fit into the buffer, we
+ give up with this tree. BLOBs and VARCHARs do not have a
+ tree_buff as it can only be used with fixed length columns.
+ For the special case of field length == 1, we handle only the
+ case that there is only one distinct value in the table(s).
+ Otherwise, we can have a maximum of 256 distinct values. This
+ is then handled by the normal Huffman tree build.
+
+ Another limit for collecting distinct column values is the
+ number of values itself. Since we would need to build a
+ Huffman tree for the values, we are limited by the 'IS_OFFSET'
+ constant. This constant expresses a bit which is used to
+ determine if a tree element holds a final value or an offset
+ to a child element. Hence, all values and offsets need to be
+ smaller than 'IS_OFFSET'. A tree element is implemented with
+ two integer values, one for the left branch and one for the
+ right branch. For the extreme case that the first element
+ points to the last element, the number of integers in the tree
+ must be less or equal to IS_OFFSET. So the number of elements
+ must be less or equal to IS_OFFSET / 2.
+
+ WARNING: At first, we insert a pointer into the record buffer
+ as the key for the tree. If we got a new distinct value, which
+ is really inserted into the tree, instead of being counted
+ only, we will copy the column value from the record buffer to
+ 'tree_buff' and adjust the key pointer of the tree accordingly.
+ */
if (count->tree_buff)
{
global_count=count;
if (!(element=tree_insert(&count->int_tree,pos, 0,
count->int_tree.custom_arg)) ||
(element->count == 1 &&
- count->tree_buff + tree_buff_length <
- count->tree_pos + count->field_length) ||
+ (count->tree_buff + tree_buff_length <
+ count->tree_pos + count->field_length)) ||
+ (count->int_tree.elements_in_tree > IS_OFFSET / 2) ||
(count->field_length == 1 &&
count->int_tree.elements_in_tree > 1))
{
@@ -791,10 +920,17 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
}
else
{
+ /*
+ If tree_insert() succeeds, it either creates a new element
+ or increments the counter of an existing element.
+ */
if (element->count == 1)
- { /* New element */
+ {
+ /* Copy the new column value into 'tree_buff'. */
memcpy(count->tree_pos,pos,(size_t) count->field_length);
+ /* Adjust the key pointer in the tree. */
tree_set_pointer(element,count->tree_pos);
+ /* Point behind the last column value so far. */
count->tree_pos+=count->field_length;
}
}
@@ -804,15 +940,21 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
if (count->field_type == FIELD_NORMAL ||
count->field_type == FIELD_SKIP_ENDSPACE)
{
+ /* Ignore trailing space. */
for ( ; end_pos > pos ; end_pos--)
if (end_pos[-1] != ' ')
break;
+ /* Empty fields are just counted. Go to the next record. */
if (end_pos == pos)
{
count->empty_fields++;
count->max_zero_fill=0;
continue;
}
+ /*
+ Count the total of all trailing spaces and the number of
+ short trailing spaces. Remember the longest trailing space.
+ */
length= (uint) (next_pos-end_pos);
count->tot_end_space+=length;
if (length < 8)
@@ -820,18 +962,25 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
if (count->max_end_space < length)
count->max_end_space = length;
}
+
if (count->field_type == FIELD_NORMAL ||
count->field_type == FIELD_SKIP_PRESPACE)
{
+ /* Ignore leading space. */
for (pos=start_pos; pos < end_pos ; pos++)
if (pos[0] != ' ')
break;
+ /* Empty fields are just counted. Go to the next record. */
if (end_pos == pos)
{
count->empty_fields++;
count->max_zero_fill=0;
continue;
}
+ /*
+ Count the total of all leading spaces and the number of
+ short leading spaces. Remember the longest leading space.
+ */
length= (uint) (pos-start_pos);
count->tot_pre_space+=length;
if (length < 8)
@@ -839,6 +988,8 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
if (count->max_pre_space < length)
count->max_pre_space = length;
}
+
+ /* Calculate pos, end_pos, and max_length for variable length fields. */
if (count->field_type == FIELD_BLOB)
{
uint field_length=count->field_length -mi_portable_sizeof_char_ptr;
@@ -857,45 +1008,121 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
end_pos= pos+length;
set_if_bigger(count->max_length,length);
}
+
+ /* Evaluate 'max_zero_fill' for short fields. */
if (count->field_length <= 8 &&
(count->field_type == FIELD_NORMAL ||
count->field_type == FIELD_SKIP_ZERO))
{
uint i;
+ /* Zero fields are just counted. Go to the next record. */
if (!memcmp((byte*) start_pos,zero_string,count->field_length))
{
count->zero_fields++;
continue;
}
+ /*
+ max_zero_fill starts with field_length. It is decreased every
+ time a shorter "zero trailer" is found. It is set to zero when
+ an empty field is found (see above). This suggests that the
+ variable should be called 'min_zero_fill'.
+ */
for (i =0 ; i < count->max_zero_fill && ! end_pos[-1 - (int) i] ;
i++) ;
if (i < count->max_zero_fill)
count->max_zero_fill=i;
}
+
+ /* Ignore zero fields and check fields. */
if (count->field_type == FIELD_ZERO ||
count->field_type == FIELD_CHECK)
continue;
+
+ /*
+ Count the incidence of every byte value in the
+ significant field value.
+ */
for ( ; pos < end_pos ; pos++)
count->counts[(uchar) *pos]++;
+
+ /* Step to next field. */
}
+
if (tot_blob_length > max_blob_length)
max_blob_length=tot_blob_length;
record_count++;
if (write_loop && record_count % WRITE_COUNT == 0)
{
- printf("%lu\r",(ulong) record_count); VOID(fflush(stdout));
+ VOID(printf("%lu\r", (ulong) record_count));
+ VOID(fflush(stdout));
}
}
else if (error != HA_ERR_RECORD_DELETED)
{
- fprintf(stderr,"Got error %d while reading rows",error);
+ VOID(fprintf(stderr, "Got error %d while reading rows", error));
break;
}
+
+ /* Step to next record. */
}
if (write_loop)
{
- printf(" \r"); VOID(fflush(stdout));
+ VOID(printf(" \r"));
+ VOID(fflush(stdout));
}
+
+ /*
+ If --debug=d,fakebigcodes is set, fake the counts to get big Huffman
+ codes.
+ */
+ DBUG_EXECUTE_IF("fakebigcodes", fakebigcodes(huff_counts, end_count););
+
+ DBUG_PRINT("info", ("Found the following number of incidents "
+ "of the byte codes:"));
+ if (verbose >= 2)
+ VOID(printf("Found the following number of incidents "
+ "of the byte codes:\n"));
+ for (count= huff_counts ; count < end_count; count++)
+ {
+ uint idx;
+ my_off_t total_count;
+ char llbuf[32];
+
+ DBUG_PRINT("info", ("column: %3u", count - huff_counts + 1));
+ if (verbose >= 2)
+ VOID(printf("column: %3u\n", count - huff_counts + 1));
+ if (count->tree_buff)
+ {
+ DBUG_PRINT("info", ("number of distinct values: %u",
+ (count->tree_pos - count->tree_buff) /
+ count->field_length));
+ if (verbose >= 2)
+ VOID(printf("number of distinct values: %u\n",
+ (count->tree_pos - count->tree_buff) /
+ count->field_length));
+ }
+ total_count= 0;
+ for (idx= 0; idx < 256; idx++)
+ {
+ if (count->counts[idx])
+ {
+ total_count+= count->counts[idx];
+ DBUG_PRINT("info", ("counts[0x%02x]: %12s", idx,
+ llstr((longlong) count->counts[idx], llbuf)));
+ if (verbose >= 2)
+ VOID(printf("counts[0x%02x]: %12s\n", idx,
+ llstr((longlong) count->counts[idx], llbuf)));
+ }
+ }
+ DBUG_PRINT("info", ("total: %12s", llstr((longlong) total_count,
+ llbuf)));
+ if ((verbose >= 2) && total_count)
+ {
+ VOID(printf("total: %12s\n",
+ llstr((longlong) total_count, llbuf)));
+ }
+ }
+
mrg->records=record_count;
mrg->max_blob_length=max_blob_length;
my_afree((gptr) record);
@@ -944,9 +1171,14 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
huff_counts->field_type=FIELD_NORMAL;
huff_counts->pack_type=0;
+ /* Check for zero-filled records (in this column), or zero records. */
if (huff_counts->zero_fields || ! records)
{
my_off_t old_space_count;
+ /*
+ If there are only zero filled records (in this column),
+ or no records at all, we are done.
+ */
if (huff_counts->zero_fields == records)
{
huff_counts->field_type= FIELD_ZERO;
@@ -954,14 +1186,22 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
huff_counts->counts[0]=0;
goto found_pack;
}
+ /* Remeber the number of significant spaces. */
old_space_count=huff_counts->counts[' '];
- huff_counts->counts[' ']+=huff_counts->tot_end_space+
- huff_counts->tot_pre_space +
- huff_counts->empty_fields * huff_counts->field_length;
+ /* Add all leading and trailing spaces. */
+ huff_counts->counts[' ']+= (huff_counts->tot_end_space +
+ huff_counts->tot_pre_space +
+ huff_counts->empty_fields *
+ huff_counts->field_length);
+ /* Check, what the compressed length of this would be. */
old_length=calc_packed_length(huff_counts,0)+records/8;
+ /* Get the number of zero bytes. */
length=huff_counts->zero_fields*huff_counts->field_length;
+ /* Add it to the counts. */
huff_counts->counts[0]+=length;
+ /* Check, what the compressed length of this would be. */
new_length=calc_packed_length(huff_counts,0);
+ /* If the compression without the zeroes would be shorter, we are done. */
if (old_length < new_length && huff_counts->field_length > 1)
{
huff_counts->field_type=FIELD_SKIP_ZERO;
@@ -969,9 +1209,16 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
huff_counts->bytes_packed=old_length- records/8;
goto found_pack;
}
+ /* Remove the insignificant spaces, but keep the zeroes. */
huff_counts->counts[' ']=old_space_count;
}
+ /* Check, what the compressed length of this column would be. */
huff_counts->bytes_packed=calc_packed_length(huff_counts,0);
+
+ /*
+ If there are enough empty records (in this column),
+ treating them specially may pay off.
+ */
if (huff_counts->empty_fields)
{
if (huff_counts->field_length > 2 &&
@@ -1003,6 +1250,11 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
}
}
}
+
+ /*
+ If there are enough trailing spaces (in this column),
+ treating them specially may pay off.
+ */
if (huff_counts->tot_end_space)
{
huff_counts->counts[' ']+=huff_counts->tot_pre_space;
@@ -1012,6 +1264,11 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
goto found_pack;
huff_counts->counts[' ']-=huff_counts->tot_pre_space;
}
+
+ /*
+ If there are enough leading spaces (in this column),
+ treating them specially may pay off.
+ */
if (huff_counts->tot_pre_space)
{
if (test_space_compress(huff_counts,records,huff_counts->max_pre_space,
@@ -1041,6 +1298,8 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
{
HUFF_TREE tree;
+ DBUG_EXECUTE_IF("forceintervall",
+ huff_counts->bytes_packed= ~ (my_off_t) 0;);
tree.element_buffer=0;
if (!make_huff_tree(&tree,huff_counts) &&
tree.bytes_packed+tree.tree_pack_length < huff_counts->bytes_packed)
@@ -1066,14 +1325,27 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
fill_zero_fields++;
field_count[huff_counts->field_type]++;
}
+ DBUG_PRINT("info", ("normal: %3d empty-space: %3d "
+ "empty-zero: %3d empty-fill: %3d",
+ field_count[FIELD_NORMAL],space_fields,
+ field_count[FIELD_SKIP_ZERO],fill_zero_fields));
+ DBUG_PRINT("info", ("pre-space: %3d end-space: %3d "
+ "intervall-fields: %3d zero: %3d",
+ field_count[FIELD_SKIP_PRESPACE],
+ field_count[FIELD_SKIP_ENDSPACE],
+ field_count[FIELD_INTERVALL],
+ field_count[FIELD_ZERO]));
if (verbose)
- printf("\nnormal: %3d empty-space: %3d empty-zero: %3d empty-fill: %3d\npre-space: %3d end-space: %3d intervall-fields: %3d zero: %3d\n",
- field_count[FIELD_NORMAL],space_fields,
- field_count[FIELD_SKIP_ZERO],fill_zero_fields,
- field_count[FIELD_SKIP_PRESPACE],
- field_count[FIELD_SKIP_ENDSPACE],
- field_count[FIELD_INTERVALL],
- field_count[FIELD_ZERO]);
+ VOID(printf("\nnormal: %3d empty-space: %3d "
+ "empty-zero: %3d empty-fill: %3d\n"
+ "pre-space: %3d end-space: %3d "
+ "intervall-fields: %3d zero: %3d\n",
+ field_count[FIELD_NORMAL],space_fields,
+ field_count[FIELD_SKIP_ZERO],fill_zero_fields,
+ field_count[FIELD_SKIP_PRESPACE],
+ field_count[FIELD_SKIP_ENDSPACE],
+ field_count[FIELD_INTERVALL],
+ field_count[FIELD_ZERO]));
DBUG_VOID_RETURN;
}
@@ -1170,8 +1442,24 @@ static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts, uint trees)
DBUG_RETURN(huff_tree);
}
- /* Update huff_tree according to huff_counts->counts or
- huff_counts->tree_buff */
+/*
+ Build a Huffman tree.
+
+ SYNOPSIS
+ make_huff_tree()
+ huff_tree The Huffman tree.
+ huff_counts The counts.
+
+ DESCRIPTION
+ Build a Huffman tree according to huff_counts->counts or
+ huff_counts->tree_buff. tree_buff, if non-NULL contains up to
+ tree_buff_length of distinct column values. In that case, whole
+ values can be Huffman encoded instead of single bytes.
+
+ RETURN
+ 0 OK
+ != 0 Error
+*/
static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
{
@@ -1182,12 +1470,14 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
first=last=0;
if (huff_counts->tree_buff)
{
+ /* Calculate the number of distinct values in tree_buff. */
found= (uint) (huff_counts->tree_pos - huff_counts->tree_buff) /
huff_counts->field_length;
first=0; last=found-1;
}
else
{
+ /* Count the number of byte codes found in the column. */
for (i=found=0 ; i < 256 ; i++)
{
if (huff_counts->counts[i])
@@ -1201,6 +1491,7 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
found=2;
}
+ /* When using 'tree_buff' we can have more that 256 values. */
if (queue.max_elements < found)
{
delete_queue(&queue);
@@ -1208,6 +1499,7 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
return -1;
}
+ /* Allocate or reallocate an element buffer for the Huffman tree. */
if (!huff_tree->element_buffer)
{
if (!(huff_tree->element_buffer=
@@ -1235,15 +1527,25 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
if (huff_counts->tree_buff)
{
huff_tree->elements=0;
- tree_walk(&huff_counts->int_tree,
- (int (*)(void*, element_count,void*)) save_counts_in_queue,
- (gptr) huff_tree, left_root_right);
huff_tree->tree_pack_length=(1+15+16+5+5+
(huff_tree->char_bits+1)*found+
(huff_tree->offset_bits+1)*
(found-2)+7)/8 +
(uint) (huff_tree->counts->tree_pos-
huff_tree->counts->tree_buff);
+ /*
+ Put a HUFF_ELEMENT into the queue for every distinct column value.
+
+ tree_walk() calls save_counts_in_queue() for every element in
+ 'int_tree'. This takes elements from the target trees element
+ buffer and places references to them into the buffer of the
+ priority queue. We insert in column value order, but the order is
+ in fact irrelevant here. We will establish the correct order
+ later.
+ */
+ tree_walk(&huff_counts->int_tree,
+ (int (*)(void*, element_count,void*)) save_counts_in_queue,
+ (gptr) huff_tree, left_root_right);
}
else
{
@@ -1252,7 +1554,15 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
(huff_tree->char_bits+1)*found+
(huff_tree->offset_bits+1)*
(found-2)+7)/8;
+ /*
+ Put a HUFF_ELEMENT into the queue for every byte code found in the column.
+ The elements are taken from the target trees element buffer.
+ Instead of using queue_insert(), we just place references to the
+ elements into the buffer of the priority queue. We insert in byte
+ value order, but the order is in fact irrelevant here. We will
+ establish the correct order later.
+ */
for (i=first, found=0 ; i <= last ; i++)
{
if (huff_counts->counts[i])
@@ -1264,8 +1574,13 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
queue.root[found]=(byte*) new_huff_el;
}
}
+ /*
+ If there is only a single byte value in this field in all records,
+ add a second element with zero incidence. This is required to enter
+ the loop, which builds the Huffman tree.
+ */
while (found < 2)
- { /* Our huff_trees request at least 2 elements */
+ {
new_huff_el=huff_tree->element_buffer+(found++);
new_huff_el->count=0;
new_huff_el->a.leaf.null=0;
@@ -1276,21 +1591,53 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
queue.root[found]=(byte*) new_huff_el;
}
}
+
+ /* Make a queue from the queue buffer. */
queue.elements=found;
+ /*
+ Make a priority queue from the queue. Construct its index so that we
+ have a partially ordered tree.
+ */
for (i=found/2 ; i > 0 ; i--)
_downheap(&queue,i);
+
+ /* The Huffman algorithm. */
bytes_packed=0; bits_packed=0;
for (i=1 ; i < found ; i++)
{
+ /*
+ Pop the top element from the queue (the one with the least incidence).
+ Popping from a priority queue includes a re-ordering of the queue,
+ to get the next least incidence element to the top.
+ */
a=(HUFF_ELEMENT*) queue_remove(&queue,0);
+ /*
+ Copy the next least incidence element. The queue implementation
+ reserves root[0] for temporary purposes. root[1] is the top.
+ */
b=(HUFF_ELEMENT*) queue.root[1];
+ /* Get a new element from the element buffer. */
new_huff_el=huff_tree->element_buffer+found+i;
+ /* The new element gets the sum of the two least incidence elements. */
new_huff_el->count=a->count+b->count;
+ /*
+ The Huffman algorithm assigns another bit to the code for a byte
+ every time that bytes incidence is combined (directly or indirectly)
+ to a new element as one of the two least incidence elements.
+ This means that one more bit per incidence of that byte is required
+ in the resulting file. So we add the new combined incidence as the
+ number of bits by which the result grows.
+ */
bits_packed+=(uint) (new_huff_el->count & 7);
bytes_packed+=new_huff_el->count/8;
- new_huff_el->a.nod.left=a; /* lesser in left */
+ /* The new element points to its children, lesser in left. */
+ new_huff_el->a.nod.left=a;
new_huff_el->a.nod.right=b;
+ /*
+ Replace the copied top element by the new element and re-order the
+ queue.
+ */
queue.root[1]=(byte*) new_huff_el;
queue_replaced(&queue);
}
@@ -1309,7 +1656,26 @@ static int compare_tree(void* cmp_arg __attribute__((unused)),
return 0;
}
- /* Used by make_huff_tree to save intervall-counts in queue */
+/*
+ Organize distinct column values and their incidences into a priority queue.
+
+ SYNOPSIS
+ save_counts_in_queue()
+ key The column value.
+ count The incidence of this value.
+ tree The Huffman tree to be built later.
+
+ DESCRIPTION
+ We use the element buffer of the targeted tree. The distinct column
+ values are organized in a priority queue first. The Huffman
+ algorithm will later organize the elements into a Huffman tree. For
+ the time being, we just place references to the elements into the
+ queue buffer. The buffer will later be organized into a priority
+ queue.
+
+ RETURN
+ 0
+ */
static int save_counts_in_queue(byte *key, element_count count,
HUFF_TREE *tree)
@@ -1326,8 +1692,23 @@ static int save_counts_in_queue(byte *key, element_count count,
}
- /* Calculate length of file if given counts should be used */
- /* Its actually a faster version of make_huff_tree */
+/*
+ Calculate length of file if given counts should be used.
+
+ SYNOPSIS
+ calc_packed_length()
+ huff_counts The counts for a column of the table(s).
+ add_tree_lenght If the decode tree length should be added.
+
+ DESCRIPTION
+ We need to follow the Huffman algorithm until we know, how many bits
+ are required for each byte code. But we do not need the resulting
+ Huffman tree. Hence, we can leave out some steps which are essential
+ in make_huff_tree().
+
+ RETURN
+ Number of bytes required to compress this table column.
+*/
static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,
uint add_tree_lenght)
@@ -1337,6 +1718,23 @@ static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,
HUFF_ELEMENT element_buffer[256];
DBUG_ENTER("calc_packed_length");
+ /*
+ WARNING: We use a small hack for efficiency: Instead of placing
+ references to HUFF_ELEMENTs into the queue, we just insert
+ references to the counts of the byte codes which appeared in this
+ table column. During the Huffman algorithm they are successively
+ replaced by references to HUFF_ELEMENTs. This works, because
+ HUFF_ELEMENTs have the incidence count at their beginning.
+ Regardless, wether the queue array contains references to counts of
+ type my_off_t or references to HUFF_ELEMENTs which have the count of
+ type my_off_t at their beginning, it always points to a count of the
+ same type.
+
+ Instead of using queue_insert(), we just copy the references into
+ the buffer of the priority queue. We insert in byte value order, but
+ the order is in fact irrelevant here. We will establish the correct
+ order later.
+ */
first=last=0;
for (i=found=0 ; i < 256 ; i++)
{
@@ -1345,31 +1743,73 @@ static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,
if (! found++)
first=i;
last=i;
+ /* We start with root[1], which is the queues top element. */
queue.root[found]=(byte*) &huff_counts->counts[i];
}
}
if (!found)
DBUG_RETURN(0); /* Empty tree */
+ /*
+ If there is only a single byte value in this field in all records,
+ add a second element with zero incidence. This is required to enter
+ the loop, which follows the Huffman algorithm.
+ */
if (found < 2)
queue.root[++found]=(byte*) &huff_counts->counts[last ? 0 : 1];
+ /* Make a queue from the queue buffer. */
queue.elements=found;
bytes_packed=0; bits_packed=0;
+ /* Add the length of the coding table, which would become part of the file. */
if (add_tree_lenght)
bytes_packed=(8+9+5+5+(max_bit(last-first)+1)*found+
(max_bit(found-1)+1+1)*(found-2) +7)/8;
+
+ /*
+ Make a priority queue from the queue. Construct its index so that we
+ have a partially ordered tree.
+ */
for (i=(found+1)/2 ; i > 0 ; i--)
_downheap(&queue,i);
+
+ /* The Huffman algorithm. */
for (i=0 ; i < found-1 ; i++)
{
- HUFF_ELEMENT *a,*b,*new_huff_el;
- a=(HUFF_ELEMENT*) queue_remove(&queue,0);
- b=(HUFF_ELEMENT*) queue.root[1];
- new_huff_el=element_buffer+i;
- new_huff_el->count=a->count+b->count;
+ my_off_t *a;
+ my_off_t *b;
+ HUFF_ELEMENT *new_huff_el;
+
+ /*
+ Pop the top element from the queue (the one with the least
+ incidence). Popping from a priority queue includes a re-ordering
+ of the queue, to get the next least incidence element to the top.
+ */
+ a= (my_off_t*) queue_remove(&queue, 0);
+ /*
+ Copy the next least incidence element. The queue implementation
+ reserves root[0] for temporary purposes. root[1] is the top.
+ */
+ b= (my_off_t*) queue.root[1];
+ /* Create a new element in a local (automatic) buffer. */
+ new_huff_el= element_buffer + i;
+ /* The new element gets the sum of the two least incidence elements. */
+ new_huff_el->count= *a + *b;
+ /*
+ The Huffman algorithm assigns another bit to the code for a byte
+ every time that bytes incidence is combined (directly or indirectly)
+ to a new element as one of the two least incidence elements.
+ This means that one more bit per incidence of that byte is required
+ in the resulting file. So we add the new combined incidence as the
+ number of bits by which the result grows.
+ */
bits_packed+=(uint) (new_huff_el->count & 7);
bytes_packed+=new_huff_el->count/8;
+ /*
+ Replace the copied top element by the new element and re-order the
+ queue. This successively replaces the references to counts by
+ references to HUFF_ELEMENTs.
+ */
queue.root[1]=(byte*) new_huff_el;
queue_replaced(&queue);
}
@@ -1417,13 +1857,26 @@ static uint join_same_trees(HUFF_COUNTS *huff_counts, uint trees)
}
}
}
+ DBUG_PRINT("info", ("Original trees: %d After join: %d",
+ trees, tree_number));
if (verbose)
- printf("Original trees: %d After join: %d\n",trees,tree_number);
+ VOID(printf("Original trees: %d After join: %d\n", trees, tree_number));
return tree_number; /* Return trees left */
}
- /* Fill in huff_tree decode tables */
+/*
+ Fill in huff_tree encode tables.
+
+ SYNOPSIS
+ make_huff_decode_table()
+ huff_tree An array of HUFF_TREE which are to be encoded.
+ trees The number of HUFF_TREE in the array.
+
+ RETURN
+ 0 success
+ != 0 error
+*/
static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees)
{
@@ -1434,12 +1887,13 @@ static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees)
{
elements=huff_tree->counts->tree_buff ? huff_tree->elements : 256;
if (!(huff_tree->code =
- (ulong*) my_malloc(elements*
- (sizeof(ulong)+sizeof(uchar)),
- MYF(MY_WME | MY_ZEROFILL))))
+ (ulonglong*) my_malloc(elements*
+ (sizeof(ulonglong) + sizeof(uchar)),
+ MYF(MY_WME | MY_ZEROFILL))))
return 1;
huff_tree->code_len=(uchar*) (huff_tree->code+elements);
- make_traverse_code_tree(huff_tree,huff_tree->root,32,0);
+ make_traverse_code_tree(huff_tree, huff_tree->root,
+ 8 * sizeof(ulonglong), LL(0));
}
}
return 0;
@@ -1448,28 +1902,90 @@ static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees)
static void make_traverse_code_tree(HUFF_TREE *huff_tree,
HUFF_ELEMENT *element,
- uint size, ulong code)
+ uint size, ulonglong code)
{
uint chr;
if (!element->a.leaf.null)
{
chr=element->a.leaf.element_nr;
- huff_tree->code_len[chr]=(uchar) (32-size);
- huff_tree->code[chr]= (code >> size);
- if (huff_tree->height < 32-size)
- huff_tree->height= 32-size;
+ huff_tree->code_len[chr]= (uchar) (8 * sizeof(ulonglong) - size);
+ huff_tree->code[chr]= (code >> size);
+ if (huff_tree->height < 8 * sizeof(ulonglong) - size)
+ huff_tree->height= 8 * sizeof(ulonglong) - size;
}
else
{
size--;
make_traverse_code_tree(huff_tree,element->a.nod.left,size,code);
- make_traverse_code_tree(huff_tree,element->a.nod.right,size,
- code+((ulong) 1L << size));
+ make_traverse_code_tree(huff_tree, element->a.nod.right, size,
+ code + (((ulonglong) 1) << size));
}
return;
}
+/*
+ Convert a value into binary digits.
+
+ SYNOPSIS
+ bindigits()
+ value The value.
+ length The number of low order bits to convert.
+
+ NOTE
+ The result string is in static storage. It is reused on every call.
+ So you cannot use it twice in one expression.
+
+ RETURN
+ A pointer to a static NUL-terminated string.
+ */
+
+static char *bindigits(ulonglong value, uint bits)
+{
+ static char digits[72];
+ char *ptr= digits;
+ uint idx= bits;
+
+ DBUG_ASSERT(idx < sizeof(digits));
+ while (idx)
+ *(ptr++)= '0' + ((value >> (--idx)) & 1);
+ *ptr= '\0';
+ return digits;
+}
+
+
+/*
+ Convert a value into hexadecimal digits.
+
+ SYNOPSIS
+ hexdigits()
+ value The value.
+
+ NOTE
+ The result string is in static storage. It is reused on every call.
+ So you cannot use it twice in one expression.
+
+ RETURN
+ A pointer to a static NUL-terminated string.
+ */
+
+static char *hexdigits(ulonglong value)
+{
+ static char digits[20];
+ char *ptr= digits;
+ uint idx= 2 * sizeof(value); /* Two hex digits per byte. */
+
+ DBUG_ASSERT(idx < sizeof(digits));
+ while (idx)
+ {
+ if ((*(ptr++)= '0' + ((value >> (4 * (--idx))) & 0xf)) > '9')
+ *(ptr - 1)+= 'a' - '9' - 1;
+ }
+ *ptr= '\0';
+ return digits;
+}
+
+
/* Write header to new packed data file */
static int write_header(PACK_MRG_INFO *mrg,uint head_length,uint trees,
@@ -1503,15 +2019,64 @@ static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees)
uint huff_tree_bits;
huff_tree_bits=max_bit(trees ? trees-1 : 0);
+ DBUG_PRINT("info", (""));
+ DBUG_PRINT("info", ("column types:"));
+ DBUG_PRINT("info", ("FIELD_NORMAL 0"));
+ DBUG_PRINT("info", ("FIELD_SKIP_ENDSPACE 1"));
+ DBUG_PRINT("info", ("FIELD_SKIP_PRESPACE 2"));
+ DBUG_PRINT("info", ("FIELD_SKIP_ZERO 3"));
+ DBUG_PRINT("info", ("FIELD_BLOB 4"));
+ DBUG_PRINT("info", ("FIELD_CONSTANT 5"));
+ DBUG_PRINT("info", ("FIELD_INTERVALL 6"));
+ DBUG_PRINT("info", ("FIELD_ZERO 7"));
+ DBUG_PRINT("info", ("FIELD_VARCHAR 8"));
+ DBUG_PRINT("info", ("FIELD_CHECK 9"));
+ DBUG_PRINT("info", (""));
+ DBUG_PRINT("info", ("pack type as a set of flags:"));
+ DBUG_PRINT("info", ("PACK_TYPE_SELECTED 1"));
+ DBUG_PRINT("info", ("PACK_TYPE_SPACE_FIELDS 2"));
+ DBUG_PRINT("info", ("PACK_TYPE_ZERO_FILL 4"));
+ DBUG_PRINT("info", (""));
+ if (verbose >= 2)
+ {
+ VOID(printf("\n"));
+ VOID(printf("column types:\n"));
+ VOID(printf("FIELD_NORMAL 0\n"));
+ VOID(printf("FIELD_SKIP_ENDSPACE 1\n"));
+ VOID(printf("FIELD_SKIP_PRESPACE 2\n"));
+ VOID(printf("FIELD_SKIP_ZERO 3\n"));
+ VOID(printf("FIELD_BLOB 4\n"));
+ VOID(printf("FIELD_CONSTANT 5\n"));
+ VOID(printf("FIELD_INTERVALL 6\n"));
+ VOID(printf("FIELD_ZERO 7\n"));
+ VOID(printf("FIELD_VARCHAR 8\n"));
+ VOID(printf("FIELD_CHECK 9\n"));
+ VOID(printf("\n"));
+ VOID(printf("pack type as a set of flags:\n"));
+ VOID(printf("PACK_TYPE_SELECTED 1\n"));
+ VOID(printf("PACK_TYPE_SPACE_FIELDS 2\n"));
+ VOID(printf("PACK_TYPE_ZERO_FILL 4\n"));
+ VOID(printf("\n"));
+ }
for (i=0 ; i++ < fields ; counts++)
{
- write_bits((ulong) (int) counts->field_type,5);
+ write_bits((ulonglong) (int) counts->field_type, 5);
write_bits(counts->pack_type,6);
if (counts->pack_type & PACK_TYPE_ZERO_FILL)
write_bits(counts->max_zero_fill,5);
else
write_bits(counts->length_bits,5);
- write_bits((ulong) counts->tree->tree_number-1,huff_tree_bits);
+ write_bits((ulonglong) counts->tree->tree_number - 1, huff_tree_bits);
+ DBUG_PRINT("info", ("column: %3u type: %2u pack: %2u zero: %4u "
+ "lbits: %2u tree: %2u length: %4u",
+ i , counts->field_type, counts->pack_type,
+ counts->max_zero_fill, counts->length_bits,
+ counts->tree->tree_number, counts->field_length));
+ if (verbose >= 2)
+ VOID(printf("column: %3u type: %2u pack: %2u zero: %4u lbits: %2u "
+ "tree: %2u length: %4u\n", i , counts->field_type,
+ counts->pack_type, counts->max_zero_fill, counts->length_bits,
+ counts->tree->tree_number, counts->field_length));
}
flush_bits();
return;
@@ -1524,45 +2089,72 @@ static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees)
static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees)
{
uint i,int_length;
+ uint tree_no;
+ uint codes;
+ uint errors= 0;
uint *packed_tree,*offset,length;
my_off_t elements;
+ /* Find the highest number of elements in the trees. */
for (i=length=0 ; i < trees ; i++)
if (huff_tree[i].tree_number > 0 && huff_tree[i].elements > length)
length=huff_tree[i].elements;
+ /*
+ Allocate a buffer for packing a decode tree. Two numbers per element
+ (left child and right child).
+ */
if (!(packed_tree=(uint*) my_alloca(sizeof(uint)*length*2)))
{
my_error(EE_OUTOFMEMORY,MYF(ME_BELL),sizeof(uint)*length*2);
return 0;
}
+ DBUG_PRINT("info", (""));
+ if (verbose >= 2)
+ VOID(printf("\n"));
+ tree_no= 0;
intervall_length=0;
for (elements=0; trees-- ; huff_tree++)
{
+ /* Skip columns that have been joined with other columns. */
if (huff_tree->tree_number == 0)
continue; /* Deleted tree */
+ tree_no++;
+ DBUG_PRINT("info", (""));
+ if (verbose >= 3)
+ VOID(printf("\n"));
+ /* Count the total number of elements (byte codes or column values). */
elements+=huff_tree->elements;
huff_tree->max_offset=2;
+ /* Build a tree of offsets and codes for decoding in 'packed_tree'. */
if (huff_tree->elements <= 1)
offset=packed_tree;
else
offset=make_offset_code_tree(huff_tree,huff_tree->root,packed_tree);
+
+ /* This should be the same as 'length' above. */
huff_tree->offset_bits=max_bit(huff_tree->max_offset);
+
+ /*
+ Since we check this during collecting the distinct column values,
+ this should never happen.
+ */
if (huff_tree->max_offset >= IS_OFFSET)
{ /* This should be impossible */
- VOID(fprintf(stderr,"Tree offset got too big: %d, aborted\n",
- huff_tree->max_offset));
+ VOID(fprintf(stderr, "Tree offset got too big: %d, aborted\n",
+ huff_tree->max_offset));
my_afree((gptr) packed_tree);
return 0;
}
-#ifdef EXTRA_DBUG
- printf("pos: %d elements: %d tree-elements: %d char_bits: %d\n",
- (uint) (file_buffer.pos-file_buffer.buffer),
- huff_tree->elements, (offset-packed_tree),huff_tree->char_bits);
-#endif
+ DBUG_PRINT("info", ("pos: %lu elements: %u tree-elements: %lu "
+ "char_bits: %u\n",
+ (ulong) (file_buffer.pos - file_buffer.buffer),
+ huff_tree->elements, (ulong) (offset - packed_tree),
+ huff_tree->char_bits));
if (!huff_tree->counts->tree_buff)
{
+ /* We do a byte compression on this column. Mark with bit 0. */
write_bits(0,1);
write_bits(huff_tree->min_chr,8);
write_bits(huff_tree->elements,9);
@@ -1574,6 +2166,7 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees)
{
int_length=(uint) (huff_tree->counts->tree_pos -
huff_tree->counts->tree_buff);
+ /* We have distinct column values for this column. Mark with bit 1. */
write_bits(1,1);
write_bits(huff_tree->elements,15);
write_bits(int_length,16);
@@ -1581,10 +2174,29 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees)
write_bits(huff_tree->offset_bits,5);
intervall_length+=int_length;
}
+ DBUG_PRINT("info", ("tree: %2u elements: %4u char_bits: %2u "
+ "offset_bits: %2u %s: %5u codelen: %2u",
+ tree_no, huff_tree->elements, huff_tree->char_bits,
+ huff_tree->offset_bits, huff_tree->counts->tree_buff ?
+ "bufflen" : "min_chr", huff_tree->counts->tree_buff ?
+ int_length : huff_tree->min_chr, huff_tree->height));
+ if (verbose >= 2)
+ VOID(printf("tree: %2u elements: %4u char_bits: %2u offset_bits: %2u "
+ "%s: %5u codelen: %2u\n", tree_no, huff_tree->elements,
+ huff_tree->char_bits, huff_tree->offset_bits,
+ huff_tree->counts->tree_buff ? "bufflen" : "min_chr",
+ huff_tree->counts->tree_buff ? int_length :
+ huff_tree->min_chr, huff_tree->height));
+
+ /* Check that the code tree length matches the element count. */
length=(uint) (offset-packed_tree);
if (length != huff_tree->elements*2-2)
- printf("error: Huff-tree-length: %d != calc_length: %d\n",
- length,huff_tree->elements*2-2);
+ {
+ VOID(fprintf(stderr, "error: Huff-tree-length: %d != calc_length: %d\n",
+ length, huff_tree->elements * 2 - 2));
+ errors++;
+ break;
+ }
for (i=0 ; i < length ; i++)
{
@@ -1593,16 +2205,122 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees)
huff_tree->offset_bits+1);
else
write_bits(packed_tree[i]-huff_tree->min_chr,huff_tree->char_bits+1);
+ DBUG_PRINT("info", ("tree[0x%04x]: %s0x%04x",
+ i, (packed_tree[i] & IS_OFFSET) ?
+ " -> " : "", (packed_tree[i] & IS_OFFSET) ?
+ packed_tree[i] - IS_OFFSET + i : packed_tree[i]));
+ if (verbose >= 3)
+ VOID(printf("tree[0x%04x]: %s0x%04x\n",
+ i, (packed_tree[i] & IS_OFFSET) ? " -> " : "",
+ (packed_tree[i] & IS_OFFSET) ?
+ packed_tree[i] - IS_OFFSET + i : packed_tree[i]));
}
flush_bits();
+
+ /*
+ Display coding tables and check their correctness.
+ */
+ codes= huff_tree->counts->tree_buff ? huff_tree->elements : 256;
+ for (i= 0; i < codes; i++)
+ {
+ ulonglong code;
+ uint bits;
+ uint len;
+ uint idx;
+
+ if (! (len= huff_tree->code_len[i]))
+ continue;
+ DBUG_PRINT("info", ("code[0x%04x]: 0x%s bits: %2u bin: %s", i,
+ hexdigits(huff_tree->code[i]), huff_tree->code_len[i],
+ bindigits(huff_tree->code[i],
+ huff_tree->code_len[i])));
+ if (verbose >= 3)
+ VOID(printf("code[0x%04x]: 0x%s bits: %2u bin: %s\n", i,
+ hexdigits(huff_tree->code[i]), huff_tree->code_len[i],
+ bindigits(huff_tree->code[i], huff_tree->code_len[i])));
+
+ /* Check that the encode table decodes correctly. */
+ code= 0;
+ bits= 0;
+ idx= 0;
+ DBUG_EXECUTE_IF("forcechkerr1", len--;);
+ DBUG_EXECUTE_IF("forcechkerr2", bits= 8 * sizeof(code););
+ DBUG_EXECUTE_IF("forcechkerr3", idx= length;);
+ for (;;)
+ {
+ if (! len)
+ {
+ VOID(fflush(stdout));
+ VOID(fprintf(stderr, "error: code 0x%s with %u bits not found\n",
+ hexdigits(huff_tree->code[i]), huff_tree->code_len[i]));
+ errors++;
+ break;
+ }
+ code<<= 1;
+ code|= (huff_tree->code[i] >> (--len)) & 1;
+ bits++;
+ if (bits > 8 * sizeof(code))
+ {
+ VOID(fflush(stdout));
+ VOID(fprintf(stderr, "error: Huffman code too long: %u/%u\n",
+ bits, 8 * sizeof(code)));
+ errors++;
+ break;
+ }
+ idx+= code & 1;
+ if (idx >= length)
+ {
+ VOID(fflush(stdout));
+ VOID(fprintf(stderr, "error: illegal tree offset: %u/%u\n",
+ idx, length));
+ errors++;
+ break;
+ }
+ if (packed_tree[idx] & IS_OFFSET)
+ idx+= packed_tree[idx] & ~IS_OFFSET;
+ else
+ break; /* Hit a leaf. This contains the result value. */
+ }
+ if (errors)
+ break;
+
+ DBUG_EXECUTE_IF("forcechkerr4", packed_tree[idx]++;);
+ if (packed_tree[idx] != i)
+ {
+ VOID(fflush(stdout));
+ VOID(fprintf(stderr, "error: decoded value 0x%04x should be: 0x%04x\n",
+ packed_tree[idx], i));
+ errors++;
+ break;
+ }
+ } /*end for (codes)*/
+ if (errors)
+ break;
+
+ /* Write column values in case of distinct column value compression. */
if (huff_tree->counts->tree_buff)
{
for (i=0 ; i < int_length ; i++)
- write_bits((uint) (uchar) huff_tree->counts->tree_buff[i],8);
+ {
+ write_bits((ulonglong) (uchar) huff_tree->counts->tree_buff[i], 8);
+ DBUG_PRINT("info", ("column_values[0x%04x]: 0x%02x",
+ i, (uchar) huff_tree->counts->tree_buff[i]));
+ if (verbose >= 3)
+ VOID(printf("column_values[0x%04x]: 0x%02x\n",
+ i, (uchar) huff_tree->counts->tree_buff[i]));
+ }
}
flush_bits();
}
+ DBUG_PRINT("info", (""));
+ if (verbose >= 2)
+ VOID(printf("\n"));
my_afree((gptr) packed_tree);
+ if (errors)
+ {
+ VOID(fprintf(stderr, "Error: Generated decode trees are corrupt. Stop.\n"));
+ return 0;
+ }
return elements;
}
@@ -1613,23 +2331,43 @@ static uint *make_offset_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element,
uint *prev_offset;
prev_offset= offset;
+ /*
+ 'a.leaf.null' takes the same place as 'a.nod.left'. If this is null,
+ then there is no left child and, hence no right child either. This
+ is a property of a binary tree. An element is either a node with two
+ childs, or a leaf without childs.
+
+ The current element is always a node with two childs. Go left first.
+ */
if (!element->a.nod.left->a.leaf.null)
{
- offset[0] =(uint) element->a.nod.left->a.leaf.element_nr;
+ /* Store the byte code or the index of the column value. */
+ prev_offset[0] =(uint) element->a.nod.left->a.leaf.element_nr;
offset+=2;
}
else
{
+ /*
+ Recursively traverse the tree to the left. Mark it as an offset to
+ another tree node (in contrast to a byte code or column value index).
+ */
prev_offset[0]= IS_OFFSET+2;
offset=make_offset_code_tree(huff_tree,element->a.nod.left,offset+2);
}
+
+ /* Now, check the right child. */
if (!element->a.nod.right->a.leaf.null)
{
+ /* Store the byte code or the index of the column value. */
prev_offset[1]=element->a.nod.right->a.leaf.element_nr;
return offset;
}
else
{
+ /*
+ Recursively traverse the tree to the right. Mark it as an offset to
+ another tree node (in contrast to a byte code or column value index).
+ */
uint temp=(uint) (offset-prev_offset-1);
prev_offset[1]= IS_OFFSET+ temp;
if (huff_tree->max_offset < temp)
@@ -1656,6 +2394,7 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
uint i,max_calc_length,pack_ref_length,min_record_length,max_record_length,
intervall,field_length,max_pack_length,pack_blob_length;
my_off_t record_count;
+ char llbuf[32];
ulong length,pack_length;
byte *record,*pos,*end_pos,*record_pos,*start_pos;
HUFF_COUNTS *count,*end_count;
@@ -1663,12 +2402,23 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
MI_INFO *isam_file=mrg->file[0];
DBUG_ENTER("compress_isam_file");
+ /* Allocate a buffer for the records (excluding blobs). */
if (!(record=(byte*) my_alloca(isam_file->s->base.reclength)))
return -1;
+
end_count=huff_counts+isam_file->s->base.fields;
min_record_length= (uint) ~0;
max_record_length=0;
+ /*
+ Calculate the maximum number of bits required to pack the records.
+ Remember to understand 'max_zero_fill' as 'min_zero_fill'.
+ The tree height determines the maximum number of bits per value.
+ Some fields skip leading or trailing spaces or zeroes. The skipped
+ number of bytes is encoded by 'length_bits' bits.
+ Empty blobs and varchar are encoded with a single 1 bit. Other blobs
+ and varchar get a leading 0 bit.
+ */
for (i=max_calc_length=0 ; i < isam_file->s->base.fields ; i++)
{
if (!(huff_counts[i].pack_type & PACK_TYPE_ZERO_FILL))
@@ -1687,14 +2437,16 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
(huff_counts[i].field_length - huff_counts[i].max_zero_fill)*
huff_counts[i].tree->height+huff_counts[i].length_bits;
}
- max_calc_length/=8;
+ max_calc_length= (max_calc_length + 7) / 8;
if (max_calc_length < 254)
pack_ref_length=1;
else if (max_calc_length <= 65535)
pack_ref_length=3;
else
pack_ref_length=4;
+
record_count=0;
+ /* 'max_blob_length' is the max length of all blobs of a record. */
pack_blob_length=0;
if (isam_file->s->base.blobs)
{
@@ -1707,6 +2459,7 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
}
max_pack_length=pack_ref_length+pack_blob_length;
+ DBUG_PRINT("fields", ("==="));
mrg_reset(mrg);
while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE)
{
@@ -1722,15 +2475,29 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
end_pos=start_pos+(field_length=count->field_length);
tree=count->tree;
+ DBUG_PRINT("fields", ("column: %3lu type: %2u pack: %2u zero: %4u "
+ "lbits: %2u tree: %2u length: %4u",
+ (ulong) (count - huff_counts + 1),
+ count->field_type,
+ count->pack_type, count->max_zero_fill,
+ count->length_bits, count->tree->tree_number,
+ count->field_length));
+
+ /* Check if the column contains spaces only. */
if (count->pack_type & PACK_TYPE_SPACE_FIELDS)
{
for (pos=start_pos ; *pos == ' ' && pos < end_pos; pos++) ;
if (pos == end_pos)
{
+ DBUG_PRINT("fields",
+ ("PACK_TYPE_SPACE_FIELDS spaces only, bits: 1"));
+ DBUG_PRINT("fields", ("---"));
write_bits(1,1);
start_pos=end_pos;
continue;
}
+ DBUG_PRINT("fields",
+ ("PACK_TYPE_SPACE_FIELDS not only spaces, bits: 1"));
write_bits(0,1);
}
end_pos-=count->max_zero_fill;
@@ -1740,65 +2507,129 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
case FIELD_SKIP_ZERO:
if (!memcmp((byte*) start_pos,zero_string,field_length))
{
+ DBUG_PRINT("fields", ("FIELD_SKIP_ZERO zeroes only, bits: 1"));
write_bits(1,1);
start_pos=end_pos;
break;
}
+ DBUG_PRINT("fields", ("FIELD_SKIP_ZERO not only zeroes, bits: 1"));
write_bits(0,1);
/* Fall through */
case FIELD_NORMAL:
+ DBUG_PRINT("fields", ("FIELD_NORMAL %lu bytes",
+ (ulong) (end_pos - start_pos)));
for ( ; start_pos < end_pos ; start_pos++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *start_pos,
+ hexdigits(tree->code[(uchar) *start_pos]),
+ (uint) tree->code_len[(uchar) *start_pos],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos])));
write_bits(tree->code[(uchar) *start_pos],
(uint) tree->code_len[(uchar) *start_pos]);
+ }
break;
case FIELD_SKIP_ENDSPACE:
for (pos=end_pos ; pos > start_pos && pos[-1] == ' ' ; pos--) ;
- length=(uint) (end_pos-pos);
+ length= (ulong) (end_pos - pos);
if (count->pack_type & PACK_TYPE_SELECTED)
{
if (length > count->min_space)
{
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_ENDSPACE more than min_space, bits: 1"));
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u",
+ length, field_length, count->length_bits));
write_bits(1,1);
write_bits(length,count->length_bits);
}
else
{
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_ENDSPACE not more than min_space, "
+ "bits: 1"));
write_bits(0,1);
pos=end_pos;
}
}
else
+ {
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u",
+ length, field_length, count->length_bits));
write_bits(length,count->length_bits);
+ }
+ /* Encode all significant bytes. */
+ DBUG_PRINT("fields", ("FIELD_SKIP_ENDSPACE %lu bytes",
+ (ulong) (pos - start_pos)));
for ( ; start_pos < pos ; start_pos++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *start_pos,
+ hexdigits(tree->code[(uchar) *start_pos]),
+ (uint) tree->code_len[(uchar) *start_pos],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos])));
write_bits(tree->code[(uchar) *start_pos],
(uint) tree->code_len[(uchar) *start_pos]);
+ }
start_pos=end_pos;
break;
case FIELD_SKIP_PRESPACE:
for (pos=start_pos ; pos < end_pos && pos[0] == ' ' ; pos++) ;
- length=(uint) (pos-start_pos);
+ length= (ulong) (pos - start_pos);
if (count->pack_type & PACK_TYPE_SELECTED)
{
if (length > count->min_space)
{
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_PRESPACE more than min_space, bits: 1"));
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u",
+ length, field_length, count->length_bits));
write_bits(1,1);
write_bits(length,count->length_bits);
}
else
{
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_PRESPACE not more than min_space, "
+ "bits: 1"));
pos=start_pos;
write_bits(0,1);
}
}
else
+ {
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u",
+ length, field_length, count->length_bits));
write_bits(length,count->length_bits);
+ }
+ /* Encode all significant bytes. */
+ DBUG_PRINT("fields", ("FIELD_SKIP_PRESPACE %lu bytes",
+ (ulong) (end_pos - start_pos)));
for (start_pos=pos ; start_pos < end_pos ; start_pos++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *start_pos,
+ hexdigits(tree->code[(uchar) *start_pos]),
+ (uint) tree->code_len[(uchar) *start_pos],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos])));
write_bits(tree->code[(uchar) *start_pos],
(uint) tree->code_len[(uchar) *start_pos]);
+ }
break;
case FIELD_CONSTANT:
case FIELD_ZERO:
case FIELD_CHECK:
+ DBUG_PRINT("fields", ("FIELD_CONSTANT/ZERO/CHECK"));
start_pos=end_pos;
break;
case FIELD_INTERVALL:
@@ -1806,6 +2637,10 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
pos=(byte*) tree_search(&count->int_tree, start_pos,
count->int_tree.custom_arg);
intervall=(uint) (pos - count->tree_buff)/field_length;
+ DBUG_PRINT("fields", ("FIELD_INTERVALL"));
+ DBUG_PRINT("fields", ("index: %4u code: 0x%s bits: %2u",
+ intervall, hexdigits(tree->code[intervall]),
+ (uint) tree->code_len[intervall]));
write_bits(tree->code[intervall],(uint) tree->code_len[intervall]);
start_pos=end_pos;
break;
@@ -1814,21 +2649,36 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
ulong blob_length=_mi_calc_blob_length(field_length-
mi_portable_sizeof_char_ptr,
start_pos);
+ /* Empty blobs are encoded with a single 1 bit. */
if (!blob_length)
{
- write_bits(1,1); /* Empty blob */
+ DBUG_PRINT("fields", ("FIELD_BLOB empty, bits: 1"));
+ write_bits(1,1);
}
else
{
byte *blob,*blob_end;
+ DBUG_PRINT("fields", ("FIELD_BLOB not empty, bits: 1"));
write_bits(0,1);
+ /* Write the blob length. */
+ DBUG_PRINT("fields", ("FIELD_BLOB %lu bytes, bits: %2u",
+ blob_length, count->length_bits));
write_bits(blob_length,count->length_bits);
memcpy_fixed(&blob,end_pos-mi_portable_sizeof_char_ptr,
sizeof(char*));
blob_end=blob+blob_length;
+ /* Encode the blob bytes. */
for ( ; blob < blob_end ; blob++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *blob, hexdigits(tree->code[(uchar) *blob]),
+ (uint) tree->code_len[(uchar) *blob],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint)tree->code_len[(uchar) *start_pos])));
write_bits(tree->code[(uchar) *blob],
(uint) tree->code_len[(uchar) *blob]);
+ }
tot_blob_length+=blob_length;
}
start_pos= end_pos;
@@ -1839,18 +2689,34 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1);
ulong col_length= (pack_length == 1 ? (uint) *(uchar*) start_pos :
uint2korr(start_pos));
+ /* Empty varchar are encoded with a single 1 bit. */
if (!col_length)
{
+ DBUG_PRINT("fields", ("FIELD_VARCHAR empty, bits: 1"));
write_bits(1,1); /* Empty varchar */
}
else
{
byte *end=start_pos+pack_length+col_length;
+ DBUG_PRINT("fields", ("FIELD_VARCHAR not empty, bits: 1"));
write_bits(0,1);
+ /* Write the varchar length. */
+ DBUG_PRINT("fields", ("FIELD_VARCHAR %lu bytes, bits: %2u",
+ col_length, count->length_bits));
write_bits(col_length,count->length_bits);
+ /* Encode the varchar bytes. */
for (start_pos+=pack_length ; start_pos < end ; start_pos++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *start_pos,
+ hexdigits(tree->code[(uchar) *start_pos]),
+ (uint) tree->code_len[(uchar) *start_pos],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint)tree->code_len[(uchar) *start_pos])));
write_bits(tree->code[(uchar) *start_pos],
(uint) tree->code_len[(uchar) *start_pos]);
+ }
}
start_pos= end_pos;
break;
@@ -1859,12 +2725,17 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
abort(); /* Impossible */
}
start_pos+=count->max_zero_fill;
+ DBUG_PRINT("fields", ("---"));
}
flush_bits();
- length=(ulong) (file_buffer.pos-record_pos)-max_pack_length;
+ length=(ulong) ((byte*) file_buffer.pos - record_pos) - max_pack_length;
pack_length=save_pack_length(record_pos,length);
if (pack_blob_length)
pack_length+=save_pack_length(record_pos+pack_length,tot_blob_length);
+ DBUG_PRINT("fields", ("record: %lu length: %lu blob-length: %lu "
+ "length-bytes: %lu", (ulong) record_count, length,
+ tot_blob_length, pack_length));
+ DBUG_PRINT("fields", ("==="));
/* Correct file buffer if the header was smaller */
if (pack_length != max_pack_length)
@@ -1876,9 +2747,11 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
min_record_length=(uint) length;
if (length > (ulong) max_record_length)
max_record_length=(uint) length;
- if (write_loop && ++record_count % WRITE_COUNT == 0)
+ record_count++;
+ if (write_loop && record_count % WRITE_COUNT == 0)
{
- printf("%lu\r",(ulong) record_count); VOID(fflush(stdout));
+ VOID(printf("%lu\r", (ulong) record_count));
+ VOID(fflush(stdout));
}
}
else if (error != HA_ERR_RECORD_DELETED)
@@ -1888,8 +2761,11 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
error=0;
else
{
- fprintf(stderr,"%s: Got error %d reading records\n",my_progname,error);
+ VOID(fprintf(stderr, "%s: Got error %d reading records\n",
+ my_progname, error));
}
+ if (verbose >= 2)
+ VOID(printf("wrote %s records.\n", llstr((longlong) record_count, llbuf)));
my_afree((gptr) record);
mrg->ref_length=max_pack_length;
@@ -1929,7 +2805,7 @@ static void init_file_buffer(File file, pbool read_buffer)
file_buffer.pos=file_buffer.buffer;
file_buffer.bits=BITS_SAVED;
}
- file_buffer.current_byte=0;
+ file_buffer.bitbucket= 0;
}
@@ -1972,7 +2848,8 @@ static int flush_buffer(ulong neaded_length)
tmp=my_realloc(file_buffer.buffer, neaded_length,MYF(MY_WME));
if (!tmp)
return 1;
- file_buffer.pos= tmp + (ulong) (file_buffer.pos - file_buffer.buffer);
+ file_buffer.pos= ((uchar*) tmp +
+ (ulong) (file_buffer.pos - file_buffer.buffer));
file_buffer.buffer=tmp;
file_buffer.end=tmp+neaded_length-8;
}
@@ -1987,68 +2864,59 @@ static void end_file_buffer(void)
/* output `bits` low bits of `value' */
-static void write_bits (register ulong value, register uint bits)
+static void write_bits(register ulonglong value, register uint bits)
{
- if ((file_buffer.bits-=(int) bits) >= 0)
+ DBUG_ASSERT(((bits < 8 * sizeof(value)) && ! (value >> bits)) ||
+ (bits == 8 * sizeof(value)));
+
+ if ((file_buffer.bits-= (int) bits) >= 0)
{
- file_buffer.current_byte|=value << file_buffer.bits;
+ file_buffer.bitbucket|= value << file_buffer.bits;
}
else
{
- reg3 uint byte_buff;
+ reg3 ulonglong bit_buffer;
bits= (uint) -file_buffer.bits;
- DBUG_ASSERT(bits <= 8 * sizeof(value));
- byte_buff= (file_buffer.current_byte |
- ((bits != 8 * sizeof(value)) ? (uint) (value >> bits) : 0));
-#if BITS_SAVED == 32
- *file_buffer.pos++= (byte) (byte_buff >> 24) ;
- *file_buffer.pos++= (byte) (byte_buff >> 16) ;
+ bit_buffer= (file_buffer.bitbucket |
+ ((bits != 8 * sizeof(value)) ? (value >> bits) : 0));
+#if BITS_SAVED == 64
+ *file_buffer.pos++= (uchar) (bit_buffer >> 56);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 48);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 40);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 32);
#endif
- *file_buffer.pos++= (byte) (byte_buff >> 8) ;
- *file_buffer.pos++= (byte) byte_buff;
+ *file_buffer.pos++= (uchar) (bit_buffer >> 24);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 16);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 8);
+ *file_buffer.pos++= (uchar) (bit_buffer);
- DBUG_ASSERT(bits <= 8 * sizeof(ulong));
if (bits != 8 * sizeof(value))
- value&= (((ulong) 1) << bits) - 1;
-#if BITS_SAVED == 16
- if (bits >= sizeof(uint))
- {
- bits-=8;
- *file_buffer.pos++= (uchar) (value >> bits);
- value&= (1 << bits)-1;
- if (bits >= sizeof(uint))
- {
- bits-=8;
- *file_buffer.pos++= (uchar) (value >> bits);
- value&= (1 << bits)-1;
- }
- }
-#endif
+ value&= (((ulonglong) 1) << bits) - 1;
if (file_buffer.pos >= file_buffer.end)
VOID(flush_buffer(~ (ulong) 0));
file_buffer.bits=(int) (BITS_SAVED - bits);
- file_buffer.current_byte=(uint) (value << (BITS_SAVED - bits));
+ file_buffer.bitbucket= value << (BITS_SAVED - bits);
}
return;
}
/* Flush bits in bit_buffer to buffer */
-static void flush_bits (void)
+static void flush_bits(void)
{
- uint bits,byte_buff;
+ int bits;
+ ulonglong bit_buffer;
- bits=(file_buffer.bits) & ~7;
- byte_buff = file_buffer.current_byte >> bits;
- bits=BITS_SAVED - bits;
+ bits= file_buffer.bits & ~7;
+ bit_buffer= file_buffer.bitbucket >> bits;
+ bits= BITS_SAVED - bits;
while (bits > 0)
{
- bits-=8;
- *file_buffer.pos++= (byte) (uchar) (byte_buff >> bits) ;
+ bits-= 8;
+ *file_buffer.pos++= (uchar) (bit_buffer >> bits);
}
- file_buffer.bits=BITS_SAVED;
- file_buffer.current_byte=0;
- return;
+ file_buffer.bits= BITS_SAVED;
+ file_buffer.bitbucket= 0;
}
@@ -2196,3 +3064,131 @@ static int mrg_close(PACK_MRG_INFO *mrg)
my_free((gptr) mrg->file,MYF(0));
return error;
}
+
+
+#if !defined(DBUG_OFF)
+/*
+ Fake the counts to get big Huffman codes.
+
+ SYNOPSIS
+ fakebigcodes()
+ huff_counts A pointer to the counts array.
+ end_count A pointer past the counts array.
+
+ DESCRIPTION
+
+ Huffman coding works by removing the two least frequent values from
+ the list of values and add a new value with the sum of their
+ incidences in a loop until only one value is left. Every time a
+ value is reused for a new value, it gets one more bit for its
+ encoding. Hence, the least frequent values get the longest codes.
+
+ To get a maximum code length for a value, two of the values must
+ have an incidence of 1. As their sum is 2, the next infrequent value
+ must have at least an incidence of 2, then 4, 8, 16 and so on. This
+ means that one needs 2**n bytes (values) for a code length of n
+ bits. However, using more distinct values forces the use of longer
+ codes, or reaching the code length with less total bytes (values).
+
+ To get 64(32)-bit codes, I sort the counts by decreasing incidence.
+ I assign counts of 1 to the two most frequent values, a count of 2
+ for the next one, then 4, 8, and so on until 2**64-1(2**30-1). All
+ the remaining values get 1. That way every possible byte has an
+ assigned code, though not all codes are used if not all byte values
+ are present in the column.
+
+ This strategy would work with distinct column values too, but
+ requires that at least 64(32) values are present. To make things
+ easier here, I cancel all distinct column values and force byte
+ compression for all columns.
+
+ RETURN
+ void
+*/
+
+static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count)
+{
+ HUFF_COUNTS *count;
+ my_off_t *cur_count_p;
+ my_off_t *end_count_p;
+ my_off_t **cur_sort_p;
+ my_off_t **end_sort_p;
+ my_off_t *sort_counts[256];
+ my_off_t total;
+ DBUG_ENTER("fakebigcodes");
+
+ for (count= huff_counts; count < end_count; count++)
+ {
+ /*
+ Remove distinct column values.
+ */
+ if (huff_counts->tree_buff)
+ {
+ my_free((gptr) huff_counts->tree_buff, MYF(0));
+ delete_tree(&huff_counts->int_tree);
+ huff_counts->tree_buff= NULL;
+ DBUG_PRINT("fakebigcodes", ("freed distinct column values"));
+ }
+
+ /*
+ Sort counts by decreasing incidence.
+ */
+ cur_count_p= count->counts;
+ end_count_p= cur_count_p + 256;
+ cur_sort_p= sort_counts;
+ while (cur_count_p < end_count_p)
+ *(cur_sort_p++)= cur_count_p++;
+ (void) qsort(sort_counts, 256, sizeof(my_off_t*), (qsort_cmp) fakecmp);
+
+ /*
+ Assign faked counts.
+ */
+ cur_sort_p= sort_counts;
+#if SIZEOF_LONG_LONG > 4
+ end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 1;
+#else
+ end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 2;
+#endif
+ /* Most frequent value gets a faked count of 1. */
+ **(cur_sort_p++)= 1;
+ total= 1;
+ while (cur_sort_p < end_sort_p)
+ {
+ **(cur_sort_p++)= total;
+ total<<= 1;
+ }
+ /* Set the last value. */
+ **(cur_sort_p++)= --total;
+ /*
+ Set the remaining counts.
+ */
+ end_sort_p= sort_counts + 256;
+ while (cur_sort_p < end_sort_p)
+ **(cur_sort_p++)= 1;
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Compare two counts for reverse sorting.
+
+ SYNOPSIS
+ fakecmp()
+ count1 One count.
+ count2 Another count.
+
+ RETURN
+ 1 count1 < count2
+ 0 count1 == count2
+ -1 count1 > count2
+*/
+
+static int fakecmp(my_off_t **count1, my_off_t **count2)
+{
+ return ((**count1 < **count2) ? 1 :
+ (**count1 > **count2) ? -1 : 0);
+}
+#endif
+
+
diff --git a/storage/myisammrg/Makefile.am b/storage/myisammrg/Makefile.am
index 6975fab6770..14e3295c1ae 100644
--- a/storage/myisammrg/Makefile.am
+++ b/storage/myisammrg/Makefile.am
@@ -14,7 +14,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-INCLUDES = -I$(top_srcdir)/include
+INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include
pkglib_LIBRARIES = libmyisammrg.a
noinst_HEADERS = myrg_def.h
libmyisammrg_a_SOURCES = myrg_open.c myrg_extra.c myrg_info.c myrg_locking.c \
diff --git a/storage/ndb/include/mgmcommon/ConfigRetriever.hpp b/storage/ndb/include/mgmcommon/ConfigRetriever.hpp
index 95d257dea23..c0b877af07d 100644
--- a/storage/ndb/include/mgmcommon/ConfigRetriever.hpp
+++ b/storage/ndb/include/mgmcommon/ConfigRetriever.hpp
@@ -32,6 +32,7 @@ public:
~ConfigRetriever();
int do_connect(int no_retries, int retry_delay_in_seconds, int verbose);
+ int disconnect();
/**
* Get configuration for current node.
diff --git a/storage/ndb/src/common/mgmcommon/ConfigRetriever.cpp b/storage/ndb/src/common/mgmcommon/ConfigRetriever.cpp
index eca886c8586..0e9b2a83e2f 100644
--- a/storage/ndb/src/common/mgmcommon/ConfigRetriever.cpp
+++ b/storage/ndb/src/common/mgmcommon/ConfigRetriever.cpp
@@ -107,6 +107,12 @@ ConfigRetriever::do_connect(int no_retries,
0 : -1;
}
+int
+ConfigRetriever::disconnect()
+{
+ return ndb_mgm_disconnect(m_handle);
+}
+
//****************************************************************************
//****************************************************************************
//****************************************************************************
diff --git a/storage/ndb/src/mgmsrv/main.cpp b/storage/ndb/src/mgmsrv/main.cpp
index 3335fdc827c..c7315c61ba1 100644
--- a/storage/ndb/src/mgmsrv/main.cpp
+++ b/storage/ndb/src/mgmsrv/main.cpp
@@ -353,7 +353,8 @@ int main(int argc, char** argv)
g_eventLogger.info("Shutting down server...");
glob.socketServer->stopServer();
- glob.socketServer->stopSessions();
+ glob.mgmObject->get_config_retriever()->disconnect();
+ glob.socketServer->stopSessions(true);
g_eventLogger.info("Shutdown complete");
return 0;
error_end: