summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--configure.in2
-rw-r--r--include/Makefile.am2
-rw-r--r--include/lf.h268
-rw-r--r--include/my_pthread.h13
-rw-r--r--mysys/Makefile.am3
-rw-r--r--mysys/lf_alloc-pin.c534
-rw-r--r--mysys/lf_dynarray.c207
-rw-r--r--mysys/lf_hash.c505
-rw-r--r--mysys/my_thr_init.c3
-rw-r--r--unittest/mysys/Makefile.am2
-rw-r--r--unittest/mysys/lf-t.c178
11 files changed, 1713 insertions, 4 deletions
diff --git a/configure.in b/configure.in
index dd2c53a2b52..db86eeaa779 100644
--- a/configure.in
+++ b/configure.in
@@ -2114,7 +2114,7 @@ AC_CHECK_FUNCS(alarm bcmp bfill bmove bsearch bzero \
pthread_setprio_np pthread_setschedparam pthread_sigmask readlink \
realpath rename rint rwlock_init setupterm \
shmget shmat shmdt shmctl sigaction sigemptyset sigaddset \
- sighold sigset sigthreadmask port_create sleep \
+ sighold sigset sigthreadmask port_create sleep thr_yield \
snprintf socket stpcpy strcasecmp strerror strsignal strnlen strpbrk strstr \
strtol strtoll strtoul strtoull tell tempnam thr_setconcurrency vidattr \
posix_fallocate backtrace backtrace_symbols backtrace_symbols_fd)
diff --git a/include/Makefile.am b/include/Makefile.am
index e712ef18722..83a22f1beec 100644
--- a/include/Makefile.am
+++ b/include/Makefile.am
@@ -30,7 +30,7 @@ pkginclude_HEADERS = $(HEADERS_ABI) my_dbug.h m_string.h my_sys.h \
m_ctype.h my_attribute.h $(HEADERS_GEN_CONFIGURE) \
$(HEADERS_GEN_MAKE) probes_mysql.h probes_mysql_nodtrace.h
-noinst_HEADERS = config-win.h config-netware.h my_bit.h \
+noinst_HEADERS = config-win.h config-netware.h lf.h my_bit.h \
heap.h my_bitmap.h my_uctype.h \
myisam.h myisampack.h myisammrg.h ft_global.h\
mysys_err.h my_base.h help_start.h help_end.h \
diff --git a/include/lf.h b/include/lf.h
new file mode 100644
index 00000000000..7e8f05f4ada
--- /dev/null
+++ b/include/lf.h
@@ -0,0 +1,268 @@
+/* Copyright (C) 2007-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _lf_h
+#define _lf_h
+
+#include <my_atomic.h>
+
+C_MODE_START
+
+/*
+ Helpers to define both func() and _func(), where
+ func() is a _func() protected by my_atomic_rwlock_wrlock()
+*/
+
+#define lock_wrap(f, t, proto_args, args, lock) \
+t _ ## f proto_args; \
+static inline t f proto_args \
+{ \
+ t ret; \
+ my_atomic_rwlock_wrlock(lock); \
+ ret= _ ## f args; \
+ my_atomic_rwlock_wrunlock(lock); \
+ return ret; \
+}
+
+#define lock_wrap_void(f, proto_args, args, lock) \
+void _ ## f proto_args; \
+static inline void f proto_args \
+{ \
+ my_atomic_rwlock_wrlock(lock); \
+ _ ## f args; \
+ my_atomic_rwlock_wrunlock(lock); \
+}
+
+#define nolock_wrap(f, t, proto_args, args) \
+t _ ## f proto_args; \
+static inline t f proto_args \
+{ \
+ return _ ## f args; \
+}
+
+#define nolock_wrap_void(f, proto_args, args) \
+void _ ## f proto_args; \
+static inline void f proto_args \
+{ \
+ _ ## f args; \
+}
+
+/*
+ wait-free dynamic array, see lf_dynarray.c
+
+ 4 levels of 256 elements each mean 4311810304 elements in an array - it
+ should be enough for a while
+*/
+#define LF_DYNARRAY_LEVEL_LENGTH 256
+#define LF_DYNARRAY_LEVELS 4
+
+typedef struct {
+ void * volatile level[LF_DYNARRAY_LEVELS];
+ uint size_of_element;
+ my_atomic_rwlock_t lock;
+} LF_DYNARRAY;
+
+typedef int (*lf_dynarray_func)(void *, void *);
+
+void lf_dynarray_init(LF_DYNARRAY *array, uint element_size);
+void lf_dynarray_destroy(LF_DYNARRAY *array);
+
+nolock_wrap(lf_dynarray_value, void *,
+ (LF_DYNARRAY *array, uint idx),
+ (array, idx))
+lock_wrap(lf_dynarray_lvalue, void *,
+ (LF_DYNARRAY *array, uint idx),
+ (array, idx),
+ &array->lock)
+nolock_wrap(lf_dynarray_iterate, int,
+ (LF_DYNARRAY *array, lf_dynarray_func func, void *arg),
+ (array, func, arg))
+
+/*
+ pin manager for memory allocator, lf_alloc-pin.c
+*/
+
+#define LF_PINBOX_PINS 4
+#define LF_PURGATORY_SIZE 10
+
+typedef void lf_pinbox_free_func(void *, void *, void*);
+
+typedef struct {
+ LF_DYNARRAY pinarray;
+ lf_pinbox_free_func *free_func;
+ void *free_func_arg;
+ uint free_ptr_offset;
+ uint32 volatile pinstack_top_ver; /* this is a versioned pointer */
+ uint32 volatile pins_in_array; /* number of elements in array */
+} LF_PINBOX;
+
+typedef struct {
+ void * volatile pin[LF_PINBOX_PINS];
+ LF_PINBOX *pinbox;
+ void **stack_ends_here;
+ void *purgatory;
+ uint32 purgatory_count;
+ uint32 volatile link;
+/* we want sizeof(LF_PINS) to be 64 to avoid false sharing */
+#if SIZEOF_INT*2+SIZEOF_CHARP*(LF_PINBOX_PINS+3) != 64
+ char pad[64-sizeof(uint32)*2-sizeof(void*)*(LF_PINBOX_PINS+3)];
+#endif
+} LF_PINS;
+
+/*
+ shortcut macros to do an atomic_wrlock on a structure that uses pins
+ (e.g. lf_hash).
+*/
+#define lf_rwlock_by_pins(PINS) \
+ my_atomic_rwlock_wrlock(&(PINS)->pinbox->pinarray.lock)
+#define lf_rwunlock_by_pins(PINS) \
+ my_atomic_rwlock_wrunlock(&(PINS)->pinbox->pinarray.lock)
+
+/*
+ compile-time assert, to require "no less than N" pins
+ it's enough if it'll fail on at least one compiler, so
+ we'll enable it on GCC only, which supports zero-length arrays.
+*/
+#if defined(__GNUC__) && defined(MY_LF_EXTRA_DEBUG)
+#define LF_REQUIRE_PINS(N) \
+ static const char require_pins[LF_PINBOX_PINS-N] \
+ __attribute__ ((unused)); \
+ static const int LF_NUM_PINS_IN_THIS_FILE= N;
+#define _lf_pin(PINS, PIN, ADDR) \
+ ( \
+ assert(PIN < LF_NUM_PINS_IN_THIS_FILE), \
+ my_atomic_storeptr(&(PINS)->pin[PIN], (ADDR)) \
+ )
+#else
+#define LF_REQUIRE_PINS(N)
+#define _lf_pin(PINS, PIN, ADDR) my_atomic_storeptr(&(PINS)->pin[PIN], (ADDR))
+#endif
+
+#define _lf_unpin(PINS, PIN) _lf_pin(PINS, PIN, NULL)
+#define lf_pin(PINS, PIN, ADDR) \
+ do { \
+ lf_rwlock_by_pins(PINS); \
+ _lf_pin(PINS, PIN, ADDR); \
+ lf_rwunlock_by_pins(PINS); \
+ } while (0)
+#define lf_unpin(PINS, PIN) lf_pin(PINS, PIN, NULL)
+#define _lf_assert_pin(PINS, PIN) assert((PINS)->pin[PIN] != 0)
+#define _lf_assert_unpin(PINS, PIN) assert((PINS)->pin[PIN] == 0)
+
+void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset,
+ lf_pinbox_free_func *free_func, void * free_func_arg);
+void lf_pinbox_destroy(LF_PINBOX *pinbox);
+
+lock_wrap(lf_pinbox_get_pins, LF_PINS *,
+ (LF_PINBOX *pinbox),
+ (pinbox),
+ &pinbox->pinarray.lock)
+lock_wrap_void(lf_pinbox_put_pins,
+ (LF_PINS *pins),
+ (pins),
+ &pins->pinbox->pinarray.lock)
+lock_wrap_void(lf_pinbox_free,
+ (LF_PINS *pins, void *addr),
+ (pins, addr),
+ &pins->pinbox->pinarray.lock)
+
+/*
+ memory allocator, lf_alloc-pin.c
+*/
+
+typedef struct st_lf_allocator {
+ LF_PINBOX pinbox;
+ uchar * volatile top;
+ uint element_size;
+ uint32 volatile mallocs;
+ void (*constructor)(uchar *); /* called, when an object is malloc()'ed */
+ void (*destructor)(uchar *); /* called, when an object is free()'d */
+} LF_ALLOCATOR;
+
+void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset);
+void lf_alloc_destroy(LF_ALLOCATOR *allocator);
+uint lf_alloc_pool_count(LF_ALLOCATOR *allocator);
+/*
+ shortcut macros to access underlying pinbox functions from an LF_ALLOCATOR
+ see _lf_pinbox_get_pins() and _lf_pinbox_put_pins()
+*/
+#define _lf_alloc_free(PINS, PTR) _lf_pinbox_free((PINS), (PTR))
+#define lf_alloc_free(PINS, PTR) lf_pinbox_free((PINS), (PTR))
+#define _lf_alloc_get_pins(A) _lf_pinbox_get_pins(&(A)->pinbox)
+#define lf_alloc_get_pins(A) lf_pinbox_get_pins(&(A)->pinbox)
+#define _lf_alloc_put_pins(PINS) _lf_pinbox_put_pins(PINS)
+#define lf_alloc_put_pins(PINS) lf_pinbox_put_pins(PINS)
+#define lf_alloc_direct_free(ALLOC, ADDR) my_free((uchar*)(ADDR), MYF(0))
+
+lock_wrap(lf_alloc_new, void *,
+ (LF_PINS *pins),
+ (pins),
+ &pins->pinbox->pinarray.lock)
+
+C_MODE_END
+
+/*
+ extendible hash, lf_hash.c
+*/
+#include <hash.h>
+
+C_MODE_START
+
+#define LF_HASH_UNIQUE 1
+
+/* lf_hash overhead per element (that is, sizeof(LF_SLIST) */
+extern const int LF_HASH_OVERHEAD;
+
+typedef struct {
+ LF_DYNARRAY array; /* hash itself */
+ LF_ALLOCATOR alloc; /* allocator for elements */
+ my_hash_get_key get_key; /* see HASH */
+ CHARSET_INFO *charset; /* see HASH */
+ uint key_offset, key_length; /* see HASH */
+ uint element_size; /* size of memcpy'ed area on insert */
+ uint flags; /* LF_HASH_UNIQUE, etc */
+ int32 volatile size; /* size of array */
+ int32 volatile count; /* number of elements in the hash */
+} LF_HASH;
+
+void lf_hash_init(LF_HASH *hash, uint element_size, uint flags,
+ uint key_offset, uint key_length, my_hash_get_key get_key,
+ CHARSET_INFO *charset);
+void lf_hash_destroy(LF_HASH *hash);
+int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data);
+void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen);
+int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen);
+/*
+ shortcut macros to access underlying pinbox functions from an LF_HASH
+ see _lf_pinbox_get_pins() and _lf_pinbox_put_pins()
+*/
+#define _lf_hash_get_pins(HASH) _lf_alloc_get_pins(&(HASH)->alloc)
+#define lf_hash_get_pins(HASH) lf_alloc_get_pins(&(HASH)->alloc)
+#define _lf_hash_put_pins(PINS) _lf_pinbox_put_pins(PINS)
+#define lf_hash_put_pins(PINS) lf_pinbox_put_pins(PINS)
+#define lf_hash_search_unpin(PINS) lf_unpin((PINS), 2)
+/*
+ cleanup
+*/
+
+#undef lock_wrap_void
+#undef lock_wrap
+#undef nolock_wrap_void
+#undef nolock_wrap
+
+C_MODE_END
+
+#endif
+
diff --git a/include/my_pthread.h b/include/my_pthread.h
index b6d9feae067..3e23c0fa410 100644
--- a/include/my_pthread.h
+++ b/include/my_pthread.h
@@ -152,6 +152,7 @@ int pthread_join(pthread_t thread, void **value_ptr);
#define pthread_detach_this_thread()
#define pthread_condattr_init(A)
#define pthread_condattr_destroy(A)
+#define pthread_yield() SwitchToThread()
#define my_pthread_getprio(thread_id) pthread_dummy(0)
@@ -385,6 +386,17 @@ void my_pthread_attr_getstacksize(pthread_attr_t *attrib, size_t *size);
int my_pthread_mutex_trylock(pthread_mutex_t *mutex);
#endif
+#if !defined(HAVE_PTHREAD_YIELD_ONE_ARG) && !defined(HAVE_PTHREAD_YIELD_ZERO_ARG)
+/* no pthread_yield() available */
+#ifdef HAVE_SCHED_YIELD
+#define pthread_yield() sched_yield()
+#elif defined(HAVE_PTHREAD_YIELD_NP) /* can be Mac OS X */
+#define pthread_yield() pthread_yield_np()
+#elif defined(HAVE_THR_YIELD)
+#define pthread_yield() thr_yield()
+#endif
+#endif
+
/*
The defines set_timespec and set_timespec_nsec should be used
for calculating an absolute time at which
@@ -663,6 +675,7 @@ struct st_my_thread_var
my_bool init;
struct st_my_thread_var *next,**prev;
void *opt_info;
+ void *stack_ends_here;
#ifndef DBUG_OFF
void *dbug;
char name[THREAD_NAME_SIZE+1];
diff --git a/mysys/Makefile.am b/mysys/Makefile.am
index 68fde34ee07..3b53fc1dd59 100644
--- a/mysys/Makefile.am
+++ b/mysys/Makefile.am
@@ -30,7 +30,8 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \
mf_tempdir.c my_lock.c mf_brkhant.c my_alarm.c \
my_malloc.c my_realloc.c my_once.c mulalloc.c \
my_alloc.c safemalloc.c my_new.cc \
- my_vle.c my_atomic.c \
+ my_vle.c my_atomic.c lf_hash.c \
+ lf_dynarray.c lf_alloc-pin.c \
my_fopen.c my_fstream.c my_getsystime.c \
my_error.c errors.c my_div.c my_messnc.c \
mf_format.c mf_same.c mf_dirname.c mf_fn_ext.c \
diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c
new file mode 100644
index 00000000000..fda9b97791d
--- /dev/null
+++ b/mysys/lf_alloc-pin.c
@@ -0,0 +1,534 @@
+/* QQ: TODO multi-pinbox */
+/* Copyright (C) 2006-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ wait-free concurrent allocator based on pinning addresses
+
+ It works as follows: every thread (strictly speaking - every CPU, but
+ it's too difficult to do) has a small array of pointers. They're called
+ "pins". Before using an object its address must be stored in this array
+ (pinned). When an object is no longer necessary its address must be
+ removed from this array (unpinned). When a thread wants to free() an
+ object it scans all pins of all threads to see if somebody has this
+ object pinned. If yes - the object is not freed (but stored in a
+ "purgatory"). To reduce the cost of a single free() pins are not scanned
+ on every free() but only added to (thread-local) purgatory. On every
+ LF_PURGATORY_SIZE free() purgatory is scanned and all unpinned objects
+ are freed.
+
+ Pins are used to solve ABA problem. To use pins one must obey
+ a pinning protocol:
+
+ 1. Let's assume that PTR is a shared pointer to an object. Shared means
+ that any thread may modify it anytime to point to a different object
+ and free the old object. Later the freed object may be potentially
+ allocated by another thread. If we're unlucky that other thread may
+ set PTR to point to this object again. This is ABA problem.
+ 2. Create a local pointer LOCAL_PTR.
+ 3. Pin the PTR in a loop:
+ do
+ {
+ LOCAL_PTR= PTR;
+ pin(PTR, PIN_NUMBER);
+ } while (LOCAL_PTR != PTR)
+ 4. It is guaranteed that after the loop has ended, LOCAL_PTR
+ points to an object (or NULL, if PTR may be NULL), that
+ will never be freed. It is not guaranteed though
+ that LOCAL_PTR == PTR (as PTR can change any time)
+ 5. When done working with the object, remove the pin:
+ unpin(PIN_NUMBER)
+ 6. When copying pins (as in the list traversing loop:
+ pin(CUR, 1);
+ while ()
+ {
+ do // standard
+ { // pinning
+ NEXT=CUR->next; // loop
+ pin(NEXT, 0); // see #3
+ } while (NEXT != CUR->next); // above
+ ...
+ ...
+ CUR=NEXT;
+ pin(CUR, 1); // copy pin[0] to pin[1]
+ }
+ which keeps CUR address constantly pinned), note than pins may be
+ copied only upwards (!!!), that is pin[N] to pin[M], M > N.
+ 7. Don't keep the object pinned longer than necessary - the number of
+ pins you have is limited (and small), keeping an object pinned
+ prevents its reuse and cause unnecessary mallocs.
+
+ Explanations:
+
+ 3. The loop is important. The following can occur:
+ thread1> LOCAL_PTR= PTR
+ thread2> free(PTR); PTR=0;
+ thread1> pin(PTR, PIN_NUMBER);
+ now thread1 cannot access LOCAL_PTR, even if it's pinned,
+ because it points to a freed memory. That is, it *must*
+ verify that it has indeed pinned PTR, the shared pointer.
+
+ 6. When a thread wants to free some LOCAL_PTR, and it scans
+ all lists of pins to see whether it's pinned, it does it
+ upwards, from low pin numbers to high. Thus another thread
+ must copy an address from one pin to another in the same
+ direction - upwards, otherwise the scanning thread may
+ miss it.
+
+ Implementation details:
+
+ Pins are given away from a "pinbox". Pinbox is stack-based allocator.
+ It used dynarray for storing pins, new elements are allocated by dynarray
+ as necessary, old are pushed in the stack for reuse. ABA is solved by
+ versioning a pointer - because we use an array, a pointer to pins is 16 bit,
+ upper 16 bits are used for a version.
+
+ It is assumed that pins belong to a THD and are not transferable
+ between THD's (LF_PINS::stack_ends_here being a primary reason
+ for this limitation).
+*/
+#include <my_global.h>
+#include <my_sys.h>
+#include <lf.h>
+
+#define LF_PINBOX_MAX_PINS 65536
+
+static void _lf_pinbox_real_free(LF_PINS *pins);
+
+/*
+ Initialize a pinbox. Normally called from lf_alloc_init.
+ See the latter for details.
+*/
+void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset,
+ lf_pinbox_free_func *free_func, void *free_func_arg)
+{
+ DBUG_ASSERT(free_ptr_offset % sizeof(void *) == 0);
+ compile_time_assert(sizeof(LF_PINS) == 64);
+ lf_dynarray_init(&pinbox->pinarray, sizeof(LF_PINS));
+ pinbox->pinstack_top_ver= 0;
+ pinbox->pins_in_array= 0;
+ pinbox->free_ptr_offset= free_ptr_offset;
+ pinbox->free_func= free_func;
+ pinbox->free_func_arg= free_func_arg;
+}
+
+void lf_pinbox_destroy(LF_PINBOX *pinbox)
+{
+ lf_dynarray_destroy(&pinbox->pinarray);
+}
+
+/*
+ Get pins from a pinbox. Usually called via lf_alloc_get_pins() or
+ lf_hash_get_pins().
+
+ SYNOPSYS
+ pinbox -
+
+ DESCRIPTION
+ get a new LF_PINS structure from a stack of unused pins,
+ or allocate a new one out of dynarray.
+
+ NOTE
+ It is assumed that pins belong to a thread and are not transferable
+ between threads.
+*/
+LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox)
+{
+ uint32 pins, next, top_ver;
+ LF_PINS *el;
+ /*
+ We have an array of max. 64k elements.
+ The highest index currently allocated is pinbox->pins_in_array.
+ Freed elements are in a lifo stack, pinstack_top_ver.
+ pinstack_top_ver is 32 bits; 16 low bits are the index in the
+ array, to the first element of the list. 16 high bits are a version
+ (every time the 16 low bits are updated, the 16 high bits are
+ incremented). Versioniong prevents the ABA problem.
+ */
+ top_ver= pinbox->pinstack_top_ver;
+ do
+ {
+ if (!(pins= top_ver % LF_PINBOX_MAX_PINS))
+ {
+ /* the stack of free elements is empty */
+ pins= my_atomic_add32((int32 volatile*) &pinbox->pins_in_array, 1)+1;
+ if (unlikely(pins >= LF_PINBOX_MAX_PINS))
+ return 0;
+ /*
+ note that the first allocated element has index 1 (pins==1).
+ index 0 is reserved to mean "NULL pointer"
+ */
+ el= (LF_PINS *)_lf_dynarray_lvalue(&pinbox->pinarray, pins);
+ if (unlikely(!el))
+ return 0;
+ break;
+ }
+ el= (LF_PINS *)_lf_dynarray_value(&pinbox->pinarray, pins);
+ next= el->link;
+ } while (!my_atomic_cas32((int32 volatile*) &pinbox->pinstack_top_ver,
+ (int32*) &top_ver,
+ top_ver-pins+next+LF_PINBOX_MAX_PINS));
+ /*
+ set el->link to the index of el in the dynarray (el->link has two usages:
+ - if element is allocated, it's its own index
+ - if element is free, it's its next element in the free stack
+ */
+ el->link= pins;
+ el->purgatory_count= 0;
+ el->pinbox= pinbox;
+ el->stack_ends_here= & my_thread_var->stack_ends_here;
+ return el;
+}
+
+/*
+ Put pins back to a pinbox. Usually called via lf_alloc_put_pins() or
+ lf_hash_put_pins().
+
+ DESCRIPTION
+ empty the purgatory (XXX deadlock warning below!),
+ push LF_PINS structure to a stack
+*/
+void _lf_pinbox_put_pins(LF_PINS *pins)
+{
+ LF_PINBOX *pinbox= pins->pinbox;
+ uint32 top_ver, nr;
+ nr= pins->link;
+#ifdef MY_LF_EXTRA_DEBUG
+ {
+ int i;
+ for (i= 0; i < LF_PINBOX_PINS; i++)
+ DBUG_ASSERT(pins->pin[i] == 0);
+ }
+#endif
+ /*
+ XXX this will deadlock if other threads will wait for
+ the caller to do something after _lf_pinbox_put_pins(),
+ and they would have pinned addresses that the caller wants to free.
+ Thus: only free pins when all work is done and nobody can wait for you!!!
+ */
+ while (pins->purgatory_count)
+ {
+ _lf_pinbox_real_free(pins);
+ if (pins->purgatory_count)
+ {
+ my_atomic_rwlock_wrunlock(&pins->pinbox->pinarray.lock);
+ pthread_yield();
+ my_atomic_rwlock_wrlock(&pins->pinbox->pinarray.lock);
+ }
+ }
+ top_ver= pinbox->pinstack_top_ver;
+ do
+ {
+ pins->link= top_ver % LF_PINBOX_MAX_PINS;
+ } while (!my_atomic_cas32((int32 volatile*) &pinbox->pinstack_top_ver,
+ (int32*) &top_ver,
+ top_ver-pins->link+nr+LF_PINBOX_MAX_PINS));
+ return;
+}
+
+static int ptr_cmp(void **a, void **b)
+{
+ return *a < *b ? -1 : *a == *b ? 0 : 1;
+}
+
+#define add_to_purgatory(PINS, ADDR) \
+ do \
+ { \
+ *(void **)((char *)(ADDR)+(PINS)->pinbox->free_ptr_offset)= \
+ (PINS)->purgatory; \
+ (PINS)->purgatory= (ADDR); \
+ (PINS)->purgatory_count++; \
+ } while (0)
+
+/*
+ Free an object allocated via pinbox allocator
+
+ DESCRIPTION
+ add an object to purgatory. if necessary, call _lf_pinbox_real_free()
+ to actually free something.
+*/
+void _lf_pinbox_free(LF_PINS *pins, void *addr)
+{
+ add_to_purgatory(pins, addr);
+ if (pins->purgatory_count % LF_PURGATORY_SIZE)
+ _lf_pinbox_real_free(pins);
+}
+
+struct st_harvester {
+ void **granary;
+ int npins;
+};
+
+/*
+ callback for _lf_dynarray_iterate:
+ scan all pins of all threads and accumulate all pins
+*/
+static int harvest_pins(LF_PINS *el, struct st_harvester *hv)
+{
+ int i;
+ LF_PINS *el_end= el+min(hv->npins, LF_DYNARRAY_LEVEL_LENGTH);
+ for (; el < el_end; el++)
+ {
+ for (i= 0; i < LF_PINBOX_PINS; i++)
+ {
+ void *p= el->pin[i];
+ if (p)
+ *hv->granary++= p;
+ }
+ }
+ /*
+ hv->npins may become negative below, but it means that
+ we're on the last dynarray page and harvest_pins() won't be
+ called again. We don't bother to make hv->npins() correct
+ (that is 0) in this case.
+ */
+ hv->npins-= LF_DYNARRAY_LEVEL_LENGTH;
+ return 0;
+}
+
+/*
+ callback for _lf_dynarray_iterate:
+ scan all pins of all threads and see if addr is present there
+*/
+static int match_pins(LF_PINS *el, void *addr)
+{
+ int i;
+ LF_PINS *el_end= el+LF_DYNARRAY_LEVEL_LENGTH;
+ for (; el < el_end; el++)
+ for (i= 0; i < LF_PINBOX_PINS; i++)
+ if (el->pin[i] == addr)
+ return 1;
+ return 0;
+}
+
+#if STACK_DIRECTION < 0
+#define available_stack_size(CUR,END) (long) ((char*)(CUR) - (char*)(END))
+#else
+#define available_stack_size(CUR,END) (long) ((char*)(END) - (char*)(CUR))
+#endif
+
+#define next_node(P, X) (*((uchar * volatile *)(((uchar *)(X)) + (P)->free_ptr_offset)))
+#define anext_node(X) next_node(&allocator->pinbox, (X))
+
+/*
+ Scan the purgatory and free everything that can be freed
+*/
+static void _lf_pinbox_real_free(LF_PINS *pins)
+{
+ int npins, alloca_size;
+ void *list, **addr;
+ void *first, *last= NULL;
+ LF_PINBOX *pinbox= pins->pinbox;
+
+ LINT_INIT(first);
+ npins= pinbox->pins_in_array+1;
+
+#ifdef HAVE_ALLOCA
+ alloca_size= sizeof(void *)*LF_PINBOX_PINS*npins;
+ /* create a sorted list of pinned addresses, to speed up searches */
+ if (available_stack_size(&pinbox, *pins->stack_ends_here) > alloca_size)
+ {
+ struct st_harvester hv;
+ addr= (void **) alloca(alloca_size);
+ hv.granary= addr;
+ hv.npins= npins;
+ /* scan the dynarray and accumulate all pinned addresses */
+ _lf_dynarray_iterate(&pinbox->pinarray,
+ (lf_dynarray_func)harvest_pins, &hv);
+
+ npins= hv.granary-addr;
+ /* and sort them */
+ if (npins)
+ qsort(addr, npins, sizeof(void *), (qsort_cmp)ptr_cmp);
+ }
+ else
+#endif
+ addr= 0;
+
+ list= pins->purgatory;
+ pins->purgatory= 0;
+ pins->purgatory_count= 0;
+ while (list)
+ {
+ void *cur= list;
+ list= *(void **)((char *)cur+pinbox->free_ptr_offset);
+ if (npins)
+ {
+ if (addr) /* use binary search */
+ {
+ void **a, **b, **c;
+ for (a= addr, b= addr+npins-1, c= a+(b-a)/2; (b-a) > 1; c= a+(b-a)/2)
+ if (cur == *c)
+ a= b= c;
+ else if (cur > *c)
+ a= c;
+ else
+ b= c;
+ if (cur == *a || cur == *b)
+ goto found;
+ }
+ else /* no alloca - no cookie. linear search here */
+ {
+ if (_lf_dynarray_iterate(&pinbox->pinarray,
+ (lf_dynarray_func)match_pins, cur))
+ goto found;
+ }
+ }
+ /* not pinned - freeing */
+ if (last)
+ last= next_node(pinbox, last)= (uchar *)cur;
+ else
+ first= last= (uchar *)cur;
+ continue;
+found:
+ /* pinned - keeping */
+ add_to_purgatory(pins, cur);
+ }
+ if (last)
+ pinbox->free_func(first, last, pinbox->free_func_arg);
+}
+
+/* lock-free memory allocator for fixed-size objects */
+
+LF_REQUIRE_PINS(1)
+
+/*
+ callback for _lf_pinbox_real_free to free a list of unpinned objects -
+ add it back to the allocator stack
+
+ DESCRIPTION
+ 'first' and 'last' are the ends of the linked list of nodes:
+ first->el->el->....->el->last. Use first==last to free only one element.
+*/
+static void alloc_free(uchar *first,
+ uchar volatile *last,
+ LF_ALLOCATOR *allocator)
+{
+ /*
+ we need a union here to access type-punned pointer reliably.
+ otherwise gcc -fstrict-aliasing will not see 'tmp' changed in the loop
+ */
+ union { uchar * node; void *ptr; } tmp;
+ tmp.node= allocator->top;
+ do
+ {
+ anext_node(last)= tmp.node;
+ } while (!my_atomic_casptr((void **)(char *)&allocator->top,
+ (void **)&tmp.ptr, first) && LF_BACKOFF);
+}
+
+/*
+ initialize lock-free allocator
+
+ SYNOPSYS
+ allocator -
+ size a size of an object to allocate
+ free_ptr_offset an offset inside the object to a sizeof(void *)
+ memory that is guaranteed to be unused after
+ the object is put in the purgatory. Unused by ANY
+ thread, not only the purgatory owner.
+ This memory will be used to link waiting-to-be-freed
+ objects in a purgatory list.
+*/
+void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset)
+{
+ lf_pinbox_init(&allocator->pinbox, free_ptr_offset,
+ (lf_pinbox_free_func *)alloc_free, allocator);
+ allocator->top= 0;
+ allocator->mallocs= 0;
+ allocator->element_size= size;
+ allocator->constructor= 0;
+ allocator->destructor= 0;
+ DBUG_ASSERT(size >= sizeof(void*) + free_ptr_offset);
+}
+
+/*
+ destroy the allocator, free everything that's in it
+
+ NOTE
+ As every other init/destroy function here and elsewhere it
+ is not thread safe. No, this function is no different, ensure
+ that no thread needs the allocator before destroying it.
+ We are not responsible for any damage that may be caused by
+ accessing the allocator when it is being or has been destroyed.
+ Oh yes, and don't put your cat in a microwave.
+*/
+void lf_alloc_destroy(LF_ALLOCATOR *allocator)
+{
+ uchar *node= allocator->top;
+ while (node)
+ {
+ uchar *tmp= anext_node(node);
+ if (allocator->destructor)
+ allocator->destructor(node);
+ my_free((void *)node, MYF(0));
+ node= tmp;
+ }
+ lf_pinbox_destroy(&allocator->pinbox);
+ allocator->top= 0;
+}
+
+/*
+ Allocate and return an new object.
+
+ DESCRIPTION
+ Pop an unused object from the stack or malloc it is the stack is empty.
+ pin[0] is used, it's removed on return.
+*/
+void *_lf_alloc_new(LF_PINS *pins)
+{
+ LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg);
+ uchar *node;
+ for (;;)
+ {
+ do
+ {
+ node= allocator->top;
+ _lf_pin(pins, 0, node);
+ } while (node != allocator->top && LF_BACKOFF);
+ if (!node)
+ {
+ node= (void *)my_malloc(allocator->element_size, MYF(MY_WME));
+ if (allocator->constructor)
+ allocator->constructor(node);
+#ifdef MY_LF_EXTRA_DEBUG
+ if (likely(node != 0))
+ my_atomic_add32(&allocator->mallocs, 1);
+#endif
+ break;
+ }
+ if (my_atomic_casptr((void **)(char *)&allocator->top,
+ (void *)&node, anext_node(node)))
+ break;
+ }
+ _lf_unpin(pins, 0);
+ return node;
+}
+
+/*
+ count the number of objects in a pool.
+
+ NOTE
+ This is NOT thread-safe !!!
+*/
+uint lf_alloc_pool_count(LF_ALLOCATOR *allocator)
+{
+ uint i;
+ uchar *node;
+ for (node= allocator->top, i= 0; node; node= anext_node(node), i++)
+ /* no op */;
+ return i;
+}
+
diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c
new file mode 100644
index 00000000000..b1cdce698a9
--- /dev/null
+++ b/mysys/lf_dynarray.c
@@ -0,0 +1,207 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Analog of DYNAMIC_ARRAY that never reallocs
+ (so no pointer into the array may ever become invalid).
+
+ Memory is allocated in non-contiguous chunks.
+ This data structure is not space efficient for sparse arrays.
+
+ Every element is aligned to sizeof(element) boundary
+ (to avoid false sharing if element is big enough).
+
+ LF_DYNARRAY is a recursive structure. On the zero level
+ LF_DYNARRAY::level[0] it's an array of LF_DYNARRAY_LEVEL_LENGTH elements,
+ on the first level it's an array of LF_DYNARRAY_LEVEL_LENGTH pointers
+ to arrays of elements, on the second level it's an array of pointers
+ to arrays of pointers to arrays of elements. And so on.
+
+ With four levels the number of elements is limited to 4311810304
+ (but as in all functions index is uint, the real limit is 2^32-1)
+
+ Actually, it's wait-free, not lock-free ;-)
+*/
+
+#include <my_global.h>
+#include <m_string.h>
+#include <my_sys.h>
+#include <lf.h>
+
+void lf_dynarray_init(LF_DYNARRAY *array, uint element_size)
+{
+ bzero(array, sizeof(*array));
+ array->size_of_element= element_size;
+ my_atomic_rwlock_init(&array->lock);
+}
+
+static void recursive_free(void **alloc, int level)
+{
+ if (!alloc)
+ return;
+
+ if (level)
+ {
+ int i;
+ for (i= 0; i < LF_DYNARRAY_LEVEL_LENGTH; i++)
+ recursive_free(alloc[i], level-1);
+ my_free((void *)alloc, MYF(0));
+ }
+ else
+ my_free(alloc[-1], MYF(0));
+}
+
+void lf_dynarray_destroy(LF_DYNARRAY *array)
+{
+ int i;
+ for (i= 0; i < LF_DYNARRAY_LEVELS; i++)
+ recursive_free(array->level[i], i);
+ my_atomic_rwlock_destroy(&array->lock);
+}
+
+static const ulong dynarray_idxes_in_prev_levels[LF_DYNARRAY_LEVELS]=
+{
+ 0, /* +1 here to to avoid -1's below */
+ LF_DYNARRAY_LEVEL_LENGTH,
+ LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH +
+ LF_DYNARRAY_LEVEL_LENGTH,
+ LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH *
+ LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH *
+ LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH
+};
+
+static const ulong dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]=
+{
+ 0, /* +1 here to to avoid -1's below */
+ LF_DYNARRAY_LEVEL_LENGTH,
+ LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH,
+ LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH *
+ LF_DYNARRAY_LEVEL_LENGTH,
+};
+
+/*
+ Returns a valid lvalue pointer to the element number 'idx'.
+ Allocates memory if necessary.
+*/
+void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx)
+{
+ void * ptr, * volatile * ptr_ptr= 0;
+ int i;
+
+ for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--)
+ /* no-op */;
+ ptr_ptr= &array->level[i];
+ idx-= dynarray_idxes_in_prev_levels[i];
+ for (; i > 0; i--)
+ {
+ if (!(ptr= *ptr_ptr))
+ {
+ void *alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * sizeof(void *),
+ MYF(MY_WME|MY_ZEROFILL));
+ if (unlikely(!alloc))
+ return(NULL);
+ if (my_atomic_casptr(ptr_ptr, &ptr, alloc))
+ ptr= alloc;
+ else
+ my_free(alloc, MYF(0));
+ }
+ ptr_ptr= ((void **)ptr) + idx / dynarray_idxes_in_prev_level[i];
+ idx%= dynarray_idxes_in_prev_level[i];
+ }
+ if (!(ptr= *ptr_ptr))
+ {
+ uchar *alloc, *data;
+ alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element +
+ max(array->size_of_element, sizeof(void *)),
+ MYF(MY_WME|MY_ZEROFILL));
+ if (unlikely(!alloc))
+ return(NULL);
+ /* reserve the space for free() address */
+ data= alloc + sizeof(void *);
+ { /* alignment */
+ intptr mod= ((intptr)data) % array->size_of_element;
+ if (mod)
+ data+= array->size_of_element - mod;
+ }
+ ((void **)data)[-1]= alloc; /* free() will need the original pointer */
+ if (my_atomic_casptr(ptr_ptr, &ptr, data))
+ ptr= data;
+ else
+ my_free(alloc, MYF(0));
+ }
+ return ((uchar*)ptr) + array->size_of_element * idx;
+}
+
+/*
+ Returns a pointer to the element number 'idx'
+ or NULL if an element does not exists
+*/
+void *_lf_dynarray_value(LF_DYNARRAY *array, uint idx)
+{
+ void * ptr, * volatile * ptr_ptr= 0;
+ int i;
+
+ for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--)
+ /* no-op */;
+ ptr_ptr= &array->level[i];
+ idx-= dynarray_idxes_in_prev_levels[i];
+ for (; i > 0; i--)
+ {
+ if (!(ptr= *ptr_ptr))
+ return(NULL);
+ ptr_ptr= ((void **)ptr) + idx / dynarray_idxes_in_prev_level[i];
+ idx %= dynarray_idxes_in_prev_level[i];
+ }
+ if (!(ptr= *ptr_ptr))
+ return(NULL);
+ return ((uchar*)ptr) + array->size_of_element * idx;
+}
+
+static int recursive_iterate(LF_DYNARRAY *array, void *ptr, int level,
+ lf_dynarray_func func, void *arg)
+{
+ int res, i;
+ if (!ptr)
+ return 0;
+ if (!level)
+ return func(ptr, arg);
+ for (i= 0; i < LF_DYNARRAY_LEVEL_LENGTH; i++)
+ if ((res= recursive_iterate(array, ((void **)ptr)[i], level-1, func, arg)))
+ return res;
+ return 0;
+}
+
+/*
+ Calls func(array, arg) on every array of LF_DYNARRAY_LEVEL_LENGTH elements
+ in lf_dynarray.
+
+ DESCRIPTION
+ lf_dynarray consists of a set of arrays, LF_DYNARRAY_LEVEL_LENGTH elements
+ each. _lf_dynarray_iterate() calls user-supplied function on every array
+ from the set. It is the fastest way to scan the array, faster than
+ for (i=0; i < N; i++) { func(_lf_dynarray_value(dynarray, i)); }
+
+ NOTE
+ if func() returns non-zero, the scan is aborted
+*/
+int _lf_dynarray_iterate(LF_DYNARRAY *array, lf_dynarray_func func, void *arg)
+{
+ int i, res;
+ for (i= 0; i < LF_DYNARRAY_LEVELS; i++)
+ if ((res= recursive_iterate(array, array->level[i], i, func, arg)))
+ return res;
+ return 0;
+}
+
diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c
new file mode 100644
index 00000000000..f478196c7c8
--- /dev/null
+++ b/mysys/lf_hash.c
@@ -0,0 +1,505 @@
+/* Copyright (C) 2006-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ extensible hash
+
+ TODO
+ try to get rid of dummy nodes ?
+ for non-unique hash, count only _distinct_ values
+ (but how to do it in lf_hash_delete ?)
+*/
+#include <my_global.h>
+#include <m_string.h>
+#include <my_sys.h>
+#include <my_bit.h>
+#include <lf.h>
+
+LF_REQUIRE_PINS(3)
+
+/* An element of the list */
+typedef struct {
+ intptr volatile link; /* a pointer to the next element in a listand a flag */
+ uint32 hashnr; /* reversed hash number, for sorting */
+ const uchar *key;
+ size_t keylen;
+ /*
+ data is stored here, directly after the keylen.
+ thus the pointer to data is (void*)(slist_element_ptr+1)
+ */
+} LF_SLIST;
+
+const int LF_HASH_OVERHEAD= sizeof(LF_SLIST);
+
+/*
+ a structure to pass the context (pointers two the three successive elements
+ in a list) from lfind to linsert/ldelete
+*/
+typedef struct {
+ intptr volatile *prev;
+ LF_SLIST *curr, *next;
+} CURSOR;
+
+/*
+ the last bit in LF_SLIST::link is a "deleted" flag.
+ the helper macros below convert it to a pure pointer or a pure flag
+*/
+#define PTR(V) (LF_SLIST *)((V) & (~(intptr)1))
+#define DELETED(V) ((V) & 1)
+
+/*
+ DESCRIPTION
+ Search for hashnr/key/keylen in the list starting from 'head' and
+ position the cursor. The list is ORDER BY hashnr, key
+
+ RETURN
+ 0 - not found
+ 1 - found
+
+ NOTE
+ cursor is positioned in either case
+ pins[0..2] are used, they are NOT removed on return
+*/
+static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
+ const uchar *key, uint keylen, CURSOR *cursor, LF_PINS *pins)
+{
+ uint32 cur_hashnr;
+ const uchar *cur_key;
+ uint cur_keylen;
+ intptr link;
+
+retry:
+ cursor->prev= (intptr *)head;
+ do { /* PTR() isn't necessary below, head is a dummy node */
+ cursor->curr= (LF_SLIST *)(*cursor->prev);
+ _lf_pin(pins, 1, cursor->curr);
+ } while (*cursor->prev != (intptr)cursor->curr && LF_BACKOFF);
+ for (;;)
+ {
+ if (unlikely(!cursor->curr))
+ return 0; /* end of the list */
+ do {
+ /* QQ: XXX or goto retry ? */
+ link= cursor->curr->link;
+ cursor->next= PTR(link);
+ _lf_pin(pins, 0, cursor->next);
+ } while (link != cursor->curr->link && LF_BACKOFF);
+ cur_hashnr= cursor->curr->hashnr;
+ cur_key= cursor->curr->key;
+ cur_keylen= cursor->curr->keylen;
+ if (*cursor->prev != (intptr)cursor->curr)
+ {
+ (void)LF_BACKOFF;
+ goto retry;
+ }
+ if (!DELETED(link))
+ {
+ if (cur_hashnr >= hashnr)
+ {
+ int r= 1;
+ if (cur_hashnr > hashnr ||
+ (r= my_strnncoll(cs, (uchar*) cur_key, cur_keylen, (uchar*) key,
+ keylen)) >= 0)
+ return !r;
+ }
+ cursor->prev= &(cursor->curr->link);
+ _lf_pin(pins, 2, cursor->curr);
+ }
+ else
+ {
+ /*
+ we found a deleted node - be nice, help the other thread
+ and remove this deleted node
+ */
+ if (my_atomic_casptr((void **)cursor->prev,
+ (void **)&cursor->curr, cursor->next))
+ _lf_alloc_free(pins, cursor->curr);
+ else
+ {
+ (void)LF_BACKOFF;
+ goto retry;
+ }
+ }
+ cursor->curr= cursor->next;
+ _lf_pin(pins, 1, cursor->curr);
+ }
+}
+
+/*
+ DESCRIPTION
+ insert a 'node' in the list that starts from 'head' in the correct
+ position (as found by lfind)
+
+ RETURN
+ 0 - inserted
+ not 0 - a pointer to a duplicate (not pinned and thus unusable)
+
+ NOTE
+ it uses pins[0..2], on return all pins are removed.
+ if there're nodes with the same key value, a new node is added before them.
+*/
+static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs,
+ LF_SLIST *node, LF_PINS *pins, uint flags)
+{
+ CURSOR cursor;
+ int res;
+
+ for (;;)
+ {
+ if (lfind(head, cs, node->hashnr, node->key, node->keylen,
+ &cursor, pins) &&
+ (flags & LF_HASH_UNIQUE))
+ {
+ res= 0; /* duplicate found */
+ break;
+ }
+ else
+ {
+ node->link= (intptr)cursor.curr;
+ DBUG_ASSERT(node->link != (intptr)node); /* no circular references */
+ DBUG_ASSERT(cursor.prev != &node->link); /* no circular references */
+ if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node))
+ {
+ res= 1; /* inserted ok */
+ break;
+ }
+ }
+ }
+ _lf_unpin(pins, 0);
+ _lf_unpin(pins, 1);
+ _lf_unpin(pins, 2);
+ /*
+ Note that cursor.curr is not pinned here and the pointer is unreliable,
+ the object may dissapear anytime. But if it points to a dummy node, the
+ pointer is safe, because dummy nodes are never freed - initialize_bucket()
+ uses this fact.
+ */
+ return res ? 0 : cursor.curr;
+}
+
+/*
+ DESCRIPTION
+ deletes a node as identified by hashnr/keey/keylen from the list
+ that starts from 'head'
+
+ RETURN
+ 0 - ok
+ 1 - not found
+
+ NOTE
+ it uses pins[0..2], on return all pins are removed.
+*/
+static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
+ const uchar *key, uint keylen, LF_PINS *pins)
+{
+ CURSOR cursor;
+ int res;
+
+ for (;;)
+ {
+ if (!lfind(head, cs, hashnr, key, keylen, &cursor, pins))
+ {
+ res= 1; /* not found */
+ break;
+ }
+ else
+ {
+ /* mark the node deleted */
+ if (my_atomic_casptr((void **)&(cursor.curr->link),
+ (void **)&cursor.next,
+ (void *)(((intptr)cursor.next) | 1)))
+ {
+ /* and remove it from the list */
+ if (my_atomic_casptr((void **)cursor.prev,
+ (void **)&cursor.curr, cursor.next))
+ _lf_alloc_free(pins, cursor.curr);
+ else
+ {
+ /*
+ somebody already "helped" us and removed the node ?
+ Let's check if we need to help that someone too!
+ (to ensure the number of "set DELETED flag" actions
+ is equal to the number of "remove from the list" actions)
+ */
+ lfind(head, cs, hashnr, key, keylen, &cursor, pins);
+ }
+ res= 0;
+ break;
+ }
+ }
+ }
+ _lf_unpin(pins, 0);
+ _lf_unpin(pins, 1);
+ _lf_unpin(pins, 2);
+ return res;
+}
+
+/*
+ DESCRIPTION
+ searches for a node as identified by hashnr/keey/keylen in the list
+ that starts from 'head'
+
+ RETURN
+ 0 - not found
+ node - found
+
+ NOTE
+ it uses pins[0..2], on return the pin[2] keeps the node found
+ all other pins are removed.
+*/
+static LF_SLIST *lsearch(LF_SLIST * volatile *head, CHARSET_INFO *cs,
+ uint32 hashnr, const uchar *key, uint keylen,
+ LF_PINS *pins)
+{
+ CURSOR cursor;
+ int res= lfind(head, cs, hashnr, key, keylen, &cursor, pins);
+ if (res)
+ _lf_pin(pins, 2, cursor.curr);
+ _lf_unpin(pins, 0);
+ _lf_unpin(pins, 1);
+ return res ? cursor.curr : 0;
+}
+
+static inline const uchar* hash_key(const LF_HASH *hash,
+ const uchar *record, size_t *length)
+{
+ if (hash->get_key)
+ return (*hash->get_key)(record, length, 0);
+ *length= hash->key_length;
+ return record + hash->key_offset;
+}
+
+/*
+ Compute the hash key value from the raw key.
+
+ @note, that the hash value is limited to 2^31, because we need one
+ bit to distinguish between normal and dummy nodes.
+*/
+static inline uint calc_hash(LF_HASH *hash, const uchar *key, uint keylen)
+{
+ ulong nr1= 1, nr2= 4;
+ hash->charset->coll->hash_sort(hash->charset, (uchar*) key, keylen,
+ &nr1, &nr2);
+ return nr1 & INT_MAX32;
+}
+
+#define MAX_LOAD 1.0 /* average number of elements in a bucket */
+
+static int initialize_bucket(LF_HASH *, LF_SLIST * volatile*, uint, LF_PINS *);
+
+/*
+ Initializes lf_hash, the arguments are compatible with hash_init
+
+ @note element_size sets both the size of allocated memory block for
+ lf_alloc and a size of memcpy'ed block size in lf_hash_insert. Typically
+ they are the same, indeed. But LF_HASH::element_size can be decreased
+ after lf_hash_init, and then lf_alloc will allocate larger block that
+ lf_hash_insert will copy over. It is desireable if part of the element
+ is expensive to initialize - for example if there is a mutex or
+ DYNAMIC_ARRAY. In this case they should be initialize in the
+ LF_ALLOCATOR::constructor, and lf_hash_insert should not overwrite them.
+ See wt_init() for example.
+*/
+void lf_hash_init(LF_HASH *hash, uint element_size, uint flags,
+ uint key_offset, uint key_length, my_hash_get_key get_key,
+ CHARSET_INFO *charset)
+{
+ lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size,
+ offsetof(LF_SLIST, key));
+ lf_dynarray_init(&hash->array, sizeof(LF_SLIST *));
+ hash->size= 1;
+ hash->count= 0;
+ hash->element_size= element_size;
+ hash->flags= flags;
+ hash->charset= charset ? charset : &my_charset_bin;
+ hash->key_offset= key_offset;
+ hash->key_length= key_length;
+ hash->get_key= get_key;
+ DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length);
+}
+
+void lf_hash_destroy(LF_HASH *hash)
+{
+ LF_SLIST *el, **head= (LF_SLIST **)_lf_dynarray_value(&hash->array, 0);
+
+ if (unlikely(!head))
+ return;
+ el= *head;
+
+ while (el)
+ {
+ intptr next= el->link;
+ if (el->hashnr & 1)
+ lf_alloc_direct_free(&hash->alloc, el); /* normal node */
+ else
+ my_free((void *)el, MYF(0)); /* dummy node */
+ el= (LF_SLIST *)next;
+ }
+ lf_alloc_destroy(&hash->alloc);
+ lf_dynarray_destroy(&hash->array);
+}
+
+/*
+ DESCRIPTION
+ inserts a new element to a hash. it will have a _copy_ of
+ data, not a pointer to it.
+
+ RETURN
+ 0 - inserted
+ 1 - didn't (unique key conflict)
+ -1 - out of memory
+
+ NOTE
+ see linsert() for pin usage notes
+*/
+int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data)
+{
+ int csize, bucket, hashnr;
+ LF_SLIST *node, * volatile *el;
+
+ lf_rwlock_by_pins(pins);
+ node= (LF_SLIST *)_lf_alloc_new(pins);
+ if (unlikely(!node))
+ return -1;
+ memcpy(node+1, data, hash->element_size);
+ node->key= hash_key(hash, (uchar *)(node+1), &node->keylen);
+ hashnr= calc_hash(hash, node->key, node->keylen);
+ bucket= hashnr % hash->size;
+ el= _lf_dynarray_lvalue(&hash->array, bucket);
+ if (unlikely(!el))
+ return -1;
+ if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
+ return -1;
+ node->hashnr= my_reverse_bits(hashnr) | 1; /* normal node */
+ if (linsert(el, hash->charset, node, pins, hash->flags))
+ {
+ _lf_alloc_free(pins, node);
+ lf_rwunlock_by_pins(pins);
+ return 1;
+ }
+ csize= hash->size;
+ if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD)
+ my_atomic_cas32(&hash->size, &csize, csize*2);
+ lf_rwunlock_by_pins(pins);
+ return 0;
+}
+
+/*
+ DESCRIPTION
+ deletes an element with the given key from the hash (if a hash is
+ not unique and there're many elements with this key - the "first"
+ matching element is deleted)
+ RETURN
+ 0 - deleted
+ 1 - didn't (not found)
+ -1 - out of memory
+ NOTE
+ see ldelete() for pin usage notes
+*/
+int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
+{
+ LF_SLIST * volatile *el;
+ uint bucket, hashnr= calc_hash(hash, (uchar *)key, keylen);
+
+ bucket= hashnr % hash->size;
+ lf_rwlock_by_pins(pins);
+ el= _lf_dynarray_lvalue(&hash->array, bucket);
+ if (unlikely(!el))
+ return -1;
+ /*
+ note that we still need to initialize_bucket here,
+ we cannot return "node not found", because an old bucket of that
+ node may've been split and the node was assigned to a new bucket
+ that was never accessed before and thus is not initialized.
+ */
+ if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
+ return -1;
+ if (ldelete(el, hash->charset, my_reverse_bits(hashnr) | 1,
+ (uchar *)key, keylen, pins))
+ {
+ lf_rwunlock_by_pins(pins);
+ return 1;
+ }
+ my_atomic_add32(&hash->count, -1);
+ lf_rwunlock_by_pins(pins);
+ return 0;
+}
+
+/*
+ RETURN
+ a pointer to an element with the given key (if a hash is not unique and
+ there're many elements with this key - the "first" matching element)
+ NULL if nothing is found
+ MY_ERRPTR if OOM
+
+ NOTE
+ see lsearch() for pin usage notes
+*/
+void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
+{
+ LF_SLIST * volatile *el, *found;
+ uint bucket, hashnr= calc_hash(hash, (uchar *)key, keylen);
+
+ bucket= hashnr % hash->size;
+ lf_rwlock_by_pins(pins);
+ el= _lf_dynarray_lvalue(&hash->array, bucket);
+ if (unlikely(!el))
+ return MY_ERRPTR;
+ if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
+ return MY_ERRPTR;
+ found= lsearch(el, hash->charset, my_reverse_bits(hashnr) | 1,
+ (uchar *)key, keylen, pins);
+ lf_rwunlock_by_pins(pins);
+ return found ? found+1 : 0;
+}
+
+static const uchar *dummy_key= (uchar*)"";
+
+/*
+ RETURN
+ 0 - ok
+ -1 - out of memory
+*/
+static int initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node,
+ uint bucket, LF_PINS *pins)
+{
+ uint parent= my_clear_highest_bit(bucket);
+ LF_SLIST *dummy= (LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME));
+ LF_SLIST **tmp= 0, *cur;
+ LF_SLIST * volatile *el= _lf_dynarray_lvalue(&hash->array, parent);
+ if (unlikely(!el || !dummy))
+ return -1;
+ if (*el == NULL && bucket &&
+ unlikely(initialize_bucket(hash, el, parent, pins)))
+ return -1;
+ dummy->hashnr= my_reverse_bits(bucket) | 0; /* dummy node */
+ dummy->key= dummy_key;
+ dummy->keylen= 0;
+ if ((cur= linsert(el, hash->charset, dummy, pins, LF_HASH_UNIQUE)))
+ {
+ my_free((void *)dummy, MYF(0));
+ dummy= cur;
+ }
+ my_atomic_casptr((void **)node, (void **)&tmp, dummy);
+ /*
+ note that if the CAS above failed (after linsert() succeeded),
+ it would mean that some other thread has executed linsert() for
+ the same dummy node, its linsert() failed, it picked up our
+ dummy node (in "dummy= cur") and executed the same CAS as above.
+ Which means that even if CAS above failed we don't need to retry,
+ and we should not free(dummy) - there's no memory leak here
+ */
+ return 0;
+}
diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c
index b2972faf0f8..ba59c483012 100644
--- a/mysys/my_thr_init.c
+++ b/mysys/my_thr_init.c
@@ -284,6 +284,9 @@ my_bool my_thread_init(void)
pthread_cond_init(&tmp->suspend, NULL);
tmp->init= 1;
+ tmp->stack_ends_here= (char*)&tmp +
+ STACK_DIRECTION * (long)my_thread_stack_size;
+
pthread_mutex_lock(&THR_LOCK_threads);
tmp->id= ++thread_id;
++THR_thread_count;
diff --git a/unittest/mysys/Makefile.am b/unittest/mysys/Makefile.am
index 6e8058c4d9b..d83b2909048 100644
--- a/unittest/mysys/Makefile.am
+++ b/unittest/mysys/Makefile.am
@@ -23,7 +23,7 @@ LDADD = $(top_builddir)/unittest/mytap/libmytap.a \
$(top_builddir)/dbug/libdbug.a \
$(top_builddir)/strings/libmystrings.a
-noinst_PROGRAMS = bitmap-t base64-t my_vsnprintf-t
+noinst_PROGRAMS = bitmap-t base64-t lf-t my_vsnprintf-t
if NEED_THREAD
# my_atomic-t is used to check thread functions, so it is safe to
diff --git a/unittest/mysys/lf-t.c b/unittest/mysys/lf-t.c
new file mode 100644
index 00000000000..61b7ae08cf5
--- /dev/null
+++ b/unittest/mysys/lf-t.c
@@ -0,0 +1,178 @@
+/* Copyright (C) 2008-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/**
+ @file
+
+ Unit tests for lock-free algorithms of mysys
+*/
+
+#include "thr_template.c"
+
+#include <lf.h>
+
+int32 inserts= 0, N;
+LF_ALLOCATOR lf_allocator;
+LF_HASH lf_hash;
+
+/*
+ pin allocator - alloc and release an element in a loop
+*/
+pthread_handler_t test_lf_pinbox(void *arg)
+{
+ int m= *(int *)arg;
+ int32 x= 0;
+ LF_PINS *pins;
+
+ my_thread_init();
+
+ pins= lf_pinbox_get_pins(&lf_allocator.pinbox);
+
+ for (x= ((int)(intptr)(&m)); m ; m--)
+ {
+ lf_pinbox_put_pins(pins);
+ pins= lf_pinbox_get_pins(&lf_allocator.pinbox);
+ }
+ lf_pinbox_put_pins(pins);
+ pthread_mutex_lock(&mutex);
+ if (!--running_threads) pthread_cond_signal(&cond);
+ pthread_mutex_unlock(&mutex);
+ my_thread_end();
+ return 0;
+}
+
+/*
+ thread local data area, allocated using lf_alloc.
+ union is required to enforce the minimum required element size (sizeof(ptr))
+*/
+typedef union {
+ int32 data;
+ void *not_used;
+} TLA;
+
+pthread_handler_t test_lf_alloc(void *arg)
+{
+ int m= (*(int *)arg)/2;
+ int32 x,y= 0;
+ LF_PINS *pins;
+
+ my_thread_init();
+
+ pins= lf_alloc_get_pins(&lf_allocator);
+
+ for (x= ((int)(intptr)(&m)); m ; m--)
+ {
+ TLA *node1, *node2;
+ x= (x*m+0x87654321) & INT_MAX32;
+ node1= (TLA *)lf_alloc_new(pins);
+ node1->data= x;
+ y+= node1->data;
+ node1->data= 0;
+ node2= (TLA *)lf_alloc_new(pins);
+ node2->data= x;
+ y-= node2->data;
+ node2->data= 0;
+ lf_alloc_free(pins, node1);
+ lf_alloc_free(pins, node2);
+ }
+ lf_alloc_put_pins(pins);
+ pthread_mutex_lock(&mutex);
+ bad+= y;
+
+ if (--N == 0)
+ {
+ diag("%d mallocs, %d pins in stack",
+ lf_allocator.mallocs, lf_allocator.pinbox.pins_in_array);
+#ifdef MY_LF_EXTRA_DEBUG
+ bad|= lf_allocator.mallocs - lf_alloc_pool_count(&lf_allocator);
+#endif
+ }
+ if (!--running_threads) pthread_cond_signal(&cond);
+ pthread_mutex_unlock(&mutex);
+ my_thread_end();
+ return 0;
+}
+
+#define N_TLH 1000
+pthread_handler_t test_lf_hash(void *arg)
+{
+ int m= (*(int *)arg)/(2*N_TLH);
+ int32 x,y,z,sum= 0, ins= 0;
+ LF_PINS *pins;
+
+ my_thread_init();
+
+ pins= lf_hash_get_pins(&lf_hash);
+
+ for (x= ((int)(intptr)(&m)); m ; m--)
+ {
+ int i;
+ y= x;
+ for (i= 0; i < N_TLH; i++)
+ {
+ x= (x*(m+i)+0x87654321) & INT_MAX32;
+ z= (x<0) ? -x : x;
+ if (lf_hash_insert(&lf_hash, pins, &z))
+ {
+ sum+= z;
+ ins++;
+ }
+ }
+ for (i= 0; i < N_TLH; i++)
+ {
+ y= (y*(m+i)+0x87654321) & INT_MAX32;
+ z= (y<0) ? -y : y;
+ if (lf_hash_delete(&lf_hash, pins, (uchar *)&z, sizeof(z)))
+ sum-= z;
+ }
+ }
+ lf_hash_put_pins(pins);
+ pthread_mutex_lock(&mutex);
+ bad+= sum;
+ inserts+= ins;
+
+ if (--N == 0)
+ {
+ diag("%d mallocs, %d pins in stack, %d hash size, %d inserts",
+ lf_hash.alloc.mallocs, lf_hash.alloc.pinbox.pins_in_array,
+ lf_hash.size, inserts);
+ bad|= lf_hash.count;
+ }
+ if (!--running_threads) pthread_cond_signal(&cond);
+ pthread_mutex_unlock(&mutex);
+ my_thread_end();
+ return 0;
+}
+
+
+void do_tests()
+{
+ plan(4);
+
+ lf_alloc_init(&lf_allocator, sizeof(TLA), offsetof(TLA, not_used));
+ lf_hash_init(&lf_hash, sizeof(int), LF_HASH_UNIQUE, 0, sizeof(int), 0,
+ &my_charset_bin);
+
+ bad= my_atomic_initialize();
+ ok(!bad, "my_atomic_initialize() returned %d", bad);
+
+ test_concurrently("lf_pinbox", test_lf_pinbox, N= THREADS, CYCLES);
+ test_concurrently("lf_alloc", test_lf_alloc, N= THREADS, CYCLES);
+ test_concurrently("lf_hash", test_lf_hash, N= THREADS, CYCLES/10);
+
+ lf_hash_destroy(&lf_hash);
+ lf_alloc_destroy(&lf_allocator);
+}
+