summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>2017-08-31 20:24:15 -0300
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2018-01-18 10:30:55 -0200
commitde7326abda657b27142b445d0647722cc1713b99 (patch)
tree0a25b9103454dd2d210bfb35282c4d258903131b
parenta707334fdfcac4ce7b479be6ac8727c45893bdab (diff)
downloadglibc-azanella/qsort-smooth.tar.gz
stdlib: Use smoothsort for qsort{_r}azanella/qsort-smooth
This patch changes the internal qsort implementation to use the smoothsort algorithm instead of a combination of quicksort/mergesort. Current implementation has some issues: * stdlib/Makefile (routines): Remove msort. (CFLAGS-msort.c): Remove rule. * stdlib/msort.c: Remove file. * stdlib/qsort.c (__qsort_r, qsort): Reimplement using smoothsort. * stdlib/qsort_common.c: New file.
-rw-r--r--stdlib/Makefile3
-rw-r--r--stdlib/msort.c310
-rw-r--r--stdlib/qsort.c428
-rw-r--r--stdlib/qsort_common.c169
4 files changed, 379 insertions, 531 deletions
diff --git a/stdlib/Makefile b/stdlib/Makefile
index 6ef20a70bd..a570c767b1 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -34,7 +34,7 @@ headers := stdlib.h bits/stdlib.h bits/stdlib-ldbl.h bits/stdlib-float.h \
routines := \
atof atoi atol atoll \
abort \
- bsearch qsort msort \
+ bsearch qsort \
getenv putenv setenv secure-getenv \
exit on_exit atexit cxa_atexit cxa_finalize old_atexit \
quick_exit at_quick_exit cxa_at_quick_exit cxa_thread_atexit_impl \
@@ -136,7 +136,6 @@ extra-test-objs += tst-putenvmod.os
generated += isomac isomac.out tst-putenvmod.so
CFLAGS-bsearch.c += $(uses-callbacks)
-CFLAGS-msort.c += $(uses-callbacks)
CFLAGS-qsort.c += $(uses-callbacks)
CFLAGS-system.c += -fexceptions
CFLAGS-system.os = -fomit-frame-pointer
diff --git a/stdlib/msort.c b/stdlib/msort.c
deleted file mode 100644
index 266c2538c0..0000000000
--- a/stdlib/msort.c
+++ /dev/null
@@ -1,310 +0,0 @@
-/* An alternative to qsort, with an identical interface.
- This file is part of the GNU C Library.
- Copyright (C) 1992-2018 Free Software Foundation, Inc.
- Written by Mike Haertel, September 1988.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <alloca.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <memcopy.h>
-#include <errno.h>
-#include <atomic.h>
-
-struct msort_param
-{
- size_t s;
- size_t var;
- __compar_d_fn_t cmp;
- void *arg;
- char *t;
-};
-static void msort_with_tmp (const struct msort_param *p, void *b, size_t n);
-
-static void
-msort_with_tmp (const struct msort_param *p, void *b, size_t n)
-{
- char *b1, *b2;
- size_t n1, n2;
-
- if (n <= 1)
- return;
-
- n1 = n / 2;
- n2 = n - n1;
- b1 = b;
- b2 = (char *) b + (n1 * p->s);
-
- msort_with_tmp (p, b1, n1);
- msort_with_tmp (p, b2, n2);
-
- char *tmp = p->t;
- const size_t s = p->s;
- __compar_d_fn_t cmp = p->cmp;
- void *arg = p->arg;
- switch (p->var)
- {
- case 0:
- while (n1 > 0 && n2 > 0)
- {
- if ((*cmp) (b1, b2, arg) <= 0)
- {
- *(uint32_t *) tmp = *(uint32_t *) b1;
- b1 += sizeof (uint32_t);
- --n1;
- }
- else
- {
- *(uint32_t *) tmp = *(uint32_t *) b2;
- b2 += sizeof (uint32_t);
- --n2;
- }
- tmp += sizeof (uint32_t);
- }
- break;
- case 1:
- while (n1 > 0 && n2 > 0)
- {
- if ((*cmp) (b1, b2, arg) <= 0)
- {
- *(uint64_t *) tmp = *(uint64_t *) b1;
- b1 += sizeof (uint64_t);
- --n1;
- }
- else
- {
- *(uint64_t *) tmp = *(uint64_t *) b2;
- b2 += sizeof (uint64_t);
- --n2;
- }
- tmp += sizeof (uint64_t);
- }
- break;
- case 2:
- while (n1 > 0 && n2 > 0)
- {
- unsigned long *tmpl = (unsigned long *) tmp;
- unsigned long *bl;
-
- tmp += s;
- if ((*cmp) (b1, b2, arg) <= 0)
- {
- bl = (unsigned long *) b1;
- b1 += s;
- --n1;
- }
- else
- {
- bl = (unsigned long *) b2;
- b2 += s;
- --n2;
- }
- while (tmpl < (unsigned long *) tmp)
- *tmpl++ = *bl++;
- }
- break;
- case 3:
- while (n1 > 0 && n2 > 0)
- {
- if ((*cmp) (*(const void **) b1, *(const void **) b2, arg) <= 0)
- {
- *(void **) tmp = *(void **) b1;
- b1 += sizeof (void *);
- --n1;
- }
- else
- {
- *(void **) tmp = *(void **) b2;
- b2 += sizeof (void *);
- --n2;
- }
- tmp += sizeof (void *);
- }
- break;
- default:
- while (n1 > 0 && n2 > 0)
- {
- if ((*cmp) (b1, b2, arg) <= 0)
- {
- tmp = (char *) __mempcpy (tmp, b1, s);
- b1 += s;
- --n1;
- }
- else
- {
- tmp = (char *) __mempcpy (tmp, b2, s);
- b2 += s;
- --n2;
- }
- }
- break;
- }
-
- if (n1 > 0)
- memcpy (tmp, b1, n1 * s);
- memcpy (b, p->t, (n - n2) * s);
-}
-
-
-void
-__qsort_r (void *b, size_t n, size_t s, __compar_d_fn_t cmp, void *arg)
-{
- size_t size = n * s;
- char *tmp = NULL;
- struct msort_param p;
-
- /* For large object sizes use indirect sorting. */
- if (s > 32)
- size = 2 * n * sizeof (void *) + s;
-
- if (size < 1024)
- /* The temporary array is small, so put it on the stack. */
- p.t = __alloca (size);
- else
- {
- /* We should avoid allocating too much memory since this might
- have to be backed up by swap space. */
- static long int phys_pages;
- static int pagesize;
-
- if (pagesize == 0)
- {
- phys_pages = __sysconf (_SC_PHYS_PAGES);
-
- if (phys_pages == -1)
- /* Error while determining the memory size. So let's
- assume there is enough memory. Otherwise the
- implementer should provide a complete implementation of
- the `sysconf' function. */
- phys_pages = (long int) (~0ul >> 1);
-
- /* The following determines that we will never use more than
- a quarter of the physical memory. */
- phys_pages /= 4;
-
- /* Make sure phys_pages is written to memory. */
- atomic_write_barrier ();
-
- pagesize = __sysconf (_SC_PAGESIZE);
- }
-
- /* Just a comment here. We cannot compute
- phys_pages * pagesize
- and compare the needed amount of memory against this value.
- The problem is that some systems might have more physical
- memory then can be represented with a `size_t' value (when
- measured in bytes. */
-
- /* If the memory requirements are too high don't allocate memory. */
- if (size / pagesize > (size_t) phys_pages)
- {
- _quicksort (b, n, s, cmp, arg);
- return;
- }
-
- /* It's somewhat large, so malloc it. */
- int save = errno;
- tmp = malloc (size);
- __set_errno (save);
- if (tmp == NULL)
- {
- /* Couldn't get space, so use the slower algorithm
- that doesn't need a temporary array. */
- _quicksort (b, n, s, cmp, arg);
- return;
- }
- p.t = tmp;
- }
-
- p.s = s;
- p.var = 4;
- p.cmp = cmp;
- p.arg = arg;
-
- if (s > 32)
- {
- /* Indirect sorting. */
- char *ip = (char *) b;
- void **tp = (void **) (p.t + n * sizeof (void *));
- void **t = tp;
- void *tmp_storage = (void *) (tp + n);
-
- while ((void *) t < tmp_storage)
- {
- *t++ = ip;
- ip += s;
- }
- p.s = sizeof (void *);
- p.var = 3;
- msort_with_tmp (&p, p.t + n * sizeof (void *), n);
-
- /* tp[0] .. tp[n - 1] is now sorted, copy around entries of
- the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */
- char *kp;
- size_t i;
- for (i = 0, ip = (char *) b; i < n; i++, ip += s)
- if ((kp = tp[i]) != ip)
- {
- size_t j = i;
- char *jp = ip;
- memcpy (tmp_storage, ip, s);
-
- do
- {
- size_t k = (kp - (char *) b) / s;
- tp[j] = jp;
- memcpy (jp, kp, s);
- j = k;
- jp = kp;
- kp = tp[k];
- }
- while (kp != ip);
-
- tp[j] = jp;
- memcpy (jp, tmp_storage, s);
- }
- }
- else
- {
- if ((s & (sizeof (uint32_t) - 1)) == 0
- && ((char *) b - (char *) 0) % __alignof__ (uint32_t) == 0)
- {
- if (s == sizeof (uint32_t))
- p.var = 0;
- else if (s == sizeof (uint64_t)
- && ((char *) b - (char *) 0) % __alignof__ (uint64_t) == 0)
- p.var = 1;
- else if ((s & (sizeof (unsigned long) - 1)) == 0
- && ((char *) b - (char *) 0)
- % __alignof__ (unsigned long) == 0)
- p.var = 2;
- }
- msort_with_tmp (&p, b, n);
- }
- free (tmp);
-}
-libc_hidden_def (__qsort_r)
-weak_alias (__qsort_r, qsort_r)
-
-
-void
-qsort (void *b, size_t n, size_t s, __compar_fn_t cmp)
-{
- return __qsort_r (b, n, s, (__compar_d_fn_t) cmp, NULL);
-}
-libc_hidden_def (qsort)
diff --git a/stdlib/qsort.c b/stdlib/qsort.c
index 264a06b8a9..f14b469989 100644
--- a/stdlib/qsort.c
+++ b/stdlib/qsort.c
@@ -1,10 +1,10 @@
-/* Copyright (C) 1991-2018 Free Software Foundation, Inc.
+/* qsort, sort an array.
This file is part of the GNU C Library.
- Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
+ Copyright (C) 2018 Free Software Foundation, Inc.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
+ GLicense as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
@@ -16,234 +16,224 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-/* If you consider tuning this algorithm, you should consult first:
- Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
- Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
+/* The qsort{_r} implementation is based on Smoothsort by Dijkstra [1]
+ with the optimization to used O(1) extra space (implicit Leonardo
+ Heaps). A extensive analysis on algorithm details was written by
+ Keith Schwarz [2]. This implementation is based on paper [3].
+
+ The Smoothsort is used because:
+
+ 1. follows the comparison sort with an average-case lower bound of
+ O(n * log (n)).
+
+ 2. It uses O(1) extra memory (the implicit Leonardo heap plus stack
+ function usage). It allows the function to be as-safe and also
+ to avoid extra latency (due a possible malloc or extra function
+ calls on a mergesort for instance).
+
+ 3. It has a the advantage over heapsort to approximate of O(n) for
+ inputs already sorted in some degree.
+
+ [1] http://www.cs.utexas.edu/users/EWD/ewd07xx/EWD796a.PDF
+ [2] http://www.keithschwarz.com/smoothsort/
+ [3] http://www.enterag.ch/hartwig/order/smoothsort.pdf
+ */
#include <alloca.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
+#include <stdbool.h>
-/* Byte-wise swap two items of size SIZE. */
-#define SWAP(a, b, size) \
- do \
- { \
- size_t __size = (size); \
- char *__a = (a), *__b = (b); \
- do \
- { \
- char __tmp = *__a; \
- *__a++ = *__b; \
- *__b++ = __tmp; \
- } while (--__size > 0); \
- } while (0)
-
-/* Discontinue quicksort algorithm when partition gets below this size.
- This particular magic number was chosen to work best on a Sun 4/260. */
-#define MAX_THRESH 4
-
-/* Stack node declarations used to store unfulfilled partition obligations. */
-typedef struct
- {
- char *lo;
- char *hi;
- } stack_node;
-
-/* The next 4 #defines implement a very fast in-line stack abstraction. */
-/* The stack needs log (total_elements) entries (we could even subtract
- log(MAX_THRESH)). Since total_elements has type size_t, we get as
- upper bound for log (total_elements):
- bits per byte (CHAR_BIT) * sizeof(size_t). */
-#define STACK_SIZE (CHAR_BIT * sizeof(size_t))
-#define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
-#define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
-#define STACK_NOT_EMPTY (stack < top)
-
-
-/* Order size using quicksort. This implementation incorporates
- four optimizations discussed in Sedgewick:
-
- 1. Non-recursive, using an explicit stack of pointer that store the
- next array partition to sort. To save time, this maximum amount
- of space required to store an array of SIZE_MAX is allocated on the
- stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
- only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
- Pretty cheap, actually.
-
- 2. Chose the pivot element using a median-of-three decision tree.
- This reduces the probability of selecting a bad pivot value and
- eliminates certain extraneous comparisons.
-
- 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
- insertion sort to order the MAX_THRESH items within each partition.
- This is a big win, since insertion sort is faster for small, mostly
- sorted array segments.
-
- 4. The larger of the two sub-partitions is always pushed onto the
- stack first, with the algorithm then concentrating on the
- smaller partition. This *guarantees* no more than log (total_elems)
- stack size is needed (actually O(1) in this case)! */
+/* Helper to get the address of BASE access as an array of elements of
+ size SIZE. */
+static inline void *
+arr (void *base, size_t idx, size_t size)
+{
+ return (void*)((uintptr_t)base + (idx * size));
+}
-void
-_quicksort (void *const pbase, size_t total_elems, size_t size,
- __compar_d_fn_t cmp, void *arg)
+/* Bitvector to encode the used Leonardo Heaps. We need at most 1.7i bits
+ to encode all the Leonardo numbers that can fit in a machine with word
+ of size 2^i. */
+typedef size_t lnbitvector_t[2];
+
+static inline void
+incr (lnbitvector_t p)
{
- char *base_ptr = (char *) pbase;
+ size_t r = p[0] + 1;
+ p[1] += ((p[0] ^ r) & p[0]) >> (sizeof(size_t) * 8 - 1);
+ p[0] = r;
+}
- const size_t max_thresh = MAX_THRESH * size;
+static inline void
+decr (lnbitvector_t p)
+{
+ size_t r = p[0] - 1;
+ p[1] -= ((r ^ p[0]) & r) >> (sizeof(size_t) * 8 - 1);
+ p[0] = r;
+}
- if (total_elems == 0)
- /* Avoid lossage with unsigned arithmetic below. */
- return;
+static inline void
+shr (lnbitvector_t p)
+{
+ p[0] >>= 1;
+ p[0] |= p[1] << (sizeof(size_t) * 8 - 1);
+ p[1] >>= 1;
+}
+
+static inline void
+shl (lnbitvector_t p)
+{
+ p[1] <<= 1;
+ p[1] |= p[0] >> (sizeof(size_t) * 8 - 1);
+ p[0] <<= 1;
+}
- if (total_elems > MAX_THRESH)
+
+/* Swap SIZE bytes between addresses A and B. Helper to generic types
+ are provided as an optimization. */
+
+typedef void (*swap_t)(void *, void *, size_t);
+
+static inline bool
+check_alignment (const void *base, size_t align)
+{
+ return _STRING_ARCH_unaligned || ((uintptr_t)base % (align - 1)) == 0;
+}
+
+static void
+swap_u32 (void *a, void *b, size_t size)
+{
+ uint32_t tmp = *(uint32_t*) a;
+ *(uint32_t*) a = *(uint32_t*) b;
+ *(uint32_t*) b = tmp;
+}
+
+static void
+swap_u64 (void *a, void *b, size_t size)
+{
+ uint64_t tmp = *(uint64_t*) a;
+ *(uint64_t*) a = *(uint64_t*) b;
+ *(uint64_t*) b = tmp;
+}
+
+static inline void
+swap_generic (void *a, void *b, size_t size)
+{
+ unsigned char tmp[256];
+ do
{
- char *lo = base_ptr;
- char *hi = &lo[size * (total_elems - 1)];
- stack_node stack[STACK_SIZE];
- stack_node *top = stack;
-
- PUSH (NULL, NULL);
-
- while (STACK_NOT_EMPTY)
- {
- char *left_ptr;
- char *right_ptr;
-
- /* Select median value from among LO, MID, and HI. Rearrange
- LO and HI so the three values are sorted. This lowers the
- probability of picking a pathological pivot value and
- skips a comparison for both the LEFT_PTR and RIGHT_PTR in
- the while loops. */
-
- char *mid = lo + size * ((hi - lo) / size >> 1);
-
- if ((*cmp) ((void *) mid, (void *) lo, arg) < 0)
- SWAP (mid, lo, size);
- if ((*cmp) ((void *) hi, (void *) mid, arg) < 0)
- SWAP (mid, hi, size);
- else
- goto jump_over;
- if ((*cmp) ((void *) mid, (void *) lo, arg) < 0)
- SWAP (mid, lo, size);
- jump_over:;
-
- left_ptr = lo + size;
- right_ptr = hi - size;
-
- /* Here's the famous ``collapse the walls'' section of quicksort.
- Gotta like those tight inner loops! They are the main reason
- that this algorithm runs much faster than others. */
- do
- {
- while ((*cmp) ((void *) left_ptr, (void *) mid, arg) < 0)
- left_ptr += size;
-
- while ((*cmp) ((void *) mid, (void *) right_ptr, arg) < 0)
- right_ptr -= size;
-
- if (left_ptr < right_ptr)
- {
- SWAP (left_ptr, right_ptr, size);
- if (mid == left_ptr)
- mid = right_ptr;
- else if (mid == right_ptr)
- mid = left_ptr;
- left_ptr += size;
- right_ptr -= size;
- }
- else if (left_ptr == right_ptr)
- {
- left_ptr += size;
- right_ptr -= size;
- break;
- }
- }
- while (left_ptr <= right_ptr);
-
- /* Set up pointers for next iteration. First determine whether
- left and right partitions are below the threshold size. If so,
- ignore one or both. Otherwise, push the larger partition's
- bounds on the stack and continue sorting the smaller one. */
-
- if ((size_t) (right_ptr - lo) <= max_thresh)
- {
- if ((size_t) (hi - left_ptr) <= max_thresh)
- /* Ignore both small partitions. */
- POP (lo, hi);
- else
- /* Ignore small left partition. */
- lo = left_ptr;
- }
- else if ((size_t) (hi - left_ptr) <= max_thresh)
- /* Ignore small right partition. */
- hi = right_ptr;
- else if ((right_ptr - lo) > (hi - left_ptr))
- {
- /* Push larger left partition indices. */
- PUSH (lo, right_ptr);
- lo = left_ptr;
- }
- else
- {
- /* Push larger right partition indices. */
- PUSH (left_ptr, hi);
- hi = right_ptr;
- }
- }
+ size_t s = size > sizeof (tmp) ? sizeof (tmp) : size;
+ memcpy (tmp, a, s);
+ a = __mempcpy (a, b, s);
+ b = __mempcpy (b, tmp, s);
+ size -= s;
}
+ while (size > 0);
+}
- /* Once the BASE_PTR array is partially sorted by quicksort the rest
- is completely sorted using insertion sort, since this is efficient
- for partitions below MAX_THRESH size. BASE_PTR points to the beginning
- of the array to sort, and END_PTR points at the very last element in
- the array (*not* one beyond it!). */
-
-#define min(x, y) ((x) < (y) ? (x) : (y))
-
- {
- char *const end_ptr = &base_ptr[size * (total_elems - 1)];
- char *tmp_ptr = base_ptr;
- char *thresh = min(end_ptr, base_ptr + max_thresh);
- char *run_ptr;
-
- /* Find smallest element in first threshold and place it at the
- array's beginning. This is the smallest array element,
- and the operation speeds up insertion sort's inner loop. */
-
- for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
- if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)
- tmp_ptr = run_ptr;
-
- if (tmp_ptr != base_ptr)
- SWAP (tmp_ptr, base_ptr, size);
-
- /* Insertion sort, running from left-hand-side up to right-hand-side. */
-
- run_ptr = base_ptr + size;
- while ((run_ptr += size) <= end_ptr)
- {
- tmp_ptr = run_ptr - size;
- while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)
- tmp_ptr -= size;
-
- tmp_ptr += size;
- if (tmp_ptr != run_ptr)
- {
- char *trav;
-
- trav = run_ptr + size;
- while (--trav >= run_ptr)
- {
- char c = *trav;
- char *hi, *lo;
-
- for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
- *hi = *lo;
- *hi = c;
- }
- }
- }
- }
+static inline swap_t
+select_swap_func (const void *base, size_t size)
+{
+ if (size == 4 && check_alignment (base, 4))
+ return swap_u32;
+ else if (size == 8 && check_alignment (base, 8))
+ return swap_u64;
+ return swap_generic;
}
+
+
+struct state_t
+{
+ size_t q; /* Length of the unsorted prefix (1 <= q <= n). */
+ size_t n; /* Total number of elements. */
+ size_t r;
+ size_t b; /* Leonardo number. */
+ size_t c; /* Next Leonardo number. */
+ lnbitvector_t p; /* Bitvector to track the Leonardo heaps. */
+};
+
+static inline size_t
+up (struct state_t *s)
+{
+ shr (s->p);
+
+ size_t next = s->b + s->c + 1;
+ s->c = s->b;
+ s->b = next;
+ return next;
+}
+
+static inline size_t
+down (struct state_t *s, size_t bit)
+{
+ shl (s->p);
+
+ s->p[0] |= bit;
+
+ size_t prev = s->c;
+ s->c = s->b - s->c - 1;
+ s->b = prev;
+ return prev;
+}
+
+/* Invariant arguments used generic functions. */
+struct args_t
+{
+ void *m; /* Base pointer to array. */
+ size_t s; /* Element size. */
+ __compar_fn_t cmp; /* Comparison function. */
+ swap_t swap; /* Swap function. */
+};
+
+#include "qsort_common.c"
+
+void
+qsort (void *base, size_t nmemb, size_t size, __compar_fn_t cmp)
+{
+ struct args_t args = { base, size, cmp, select_swap_func (base, size) };
+ struct state_t s = { 1, nmemb, 0, 1, 1, { 1, 0 } };
+
+ if (nmemb <= 0)
+ return;
+
+ buildtree (&args, &s);
+
+ trinkle (&args, s.r, s);
+
+ buildsorted (&args, &s);
+}
+libc_hidden_def (qsort)
+
+struct args_t_r
+{
+ void *m;
+ size_t s;
+ __compar_d_fn_t cmp;
+ void *a;
+ swap_t swap;
+};
+
+#define R_VERSION
+#include "qsort_common.c"
+
+void
+__qsort_r (void *base, size_t nmemb, size_t size, __compar_d_fn_t cmp,
+ void *arg)
+{
+ struct args_t_r args = { base, size, cmp, arg, select_swap_func (base, size) };
+ struct state_t s = { 1, nmemb, 0, 1, 1, { 1, 0 } };
+
+ if (nmemb <= 1)
+ return;
+
+ buildtree_r (&args, &s);
+
+ trinkle_r (&args, s.r, s);
+
+ buildsorted_r (&args, &s);
+}
+
+libc_hidden_def (__qsort_r)
+weak_alias (__qsort_r, qsort_r)
diff --git a/stdlib/qsort_common.c b/stdlib/qsort_common.c
new file mode 100644
index 0000000000..20cae9b784
--- /dev/null
+++ b/stdlib/qsort_common.c
@@ -0,0 +1,169 @@
+/* Common implementation for qsort and qsort_r.
+ This file is part of the GNU C Library.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ GLicense as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef R_VERSION
+# define R_NAME(func) func ## _r
+# define R_CMP_TYPE __compar_fn_t
+# define R_CMP(args, p1, p2) (args)->cmp (p1, p2, args->a)
+#else
+# define R_NAME(func) func
+# define R_CMP_TYPE __compar_d_fn_t
+# define R_CMP(args, p1, p2) (args)->cmp (p1, p2)
+#endif
+
+static void
+R_NAME (sift) (const struct R_NAME(args_t) *args, size_t r, struct state_t s)
+{
+ while (s.b >= 3)
+ {
+ size_t r2 = r - s.b + s.c;
+ if (R_CMP (args, arr (args->m, r - 1, args->s),
+ arr (args->m, r2, args->s)) >= 0)
+ {
+ r2 = r - 1;
+ down (&s, 0);
+ }
+ if (R_CMP (args, arr (args->m, r2, args->s),
+ arr (args->m, r, args->s)) < 0)
+ break;
+ args->swap (arr (args->m, r, args->s),
+ arr (args->m, r2, args->s),
+ args->s);
+ r = r2;
+ down (&s, 0);
+ }
+}
+
+static void
+R_NAME (trinkle) (const struct R_NAME(args_t) *args, size_t r,
+ struct state_t s)
+{
+ while (s.p[0] != 0)
+ {
+ while ((s.p[0] & 1) == 0)
+ up (&s);
+
+ if (s.p[0] == 1)
+ break;
+ size_t r3 = r - s.b;
+ if (R_CMP (args, arr (args->m, r3, args->s),
+ arr (args->m, r, args->s)) < 0)
+ break;
+ decr (s.p);
+ if (s.b < 3)
+ {
+ args->swap (arr (args->m, r, args->s),
+ arr (args->m, r3, args->s), args->s);
+ r = r3;
+ continue;
+ }
+ size_t r2 = r - s.b + s.c;
+ if (R_CMP (args, arr (args->m, r2, args->s),
+ arr (args->m, r - 1, args->s)) < 0)
+ {
+ r2 = r - 1;
+ down (&s, 0);
+ }
+
+ if (R_CMP (args, arr (args->m, r2, args->s),
+ arr (args->m, r3, args->s)) < 0)
+ {
+ args->swap (arr (args->m, r, args->s),
+ arr (args->m, r3, args->s),
+ args->s);
+ r = r3;
+ continue;
+ }
+
+ args->swap (arr (args->m, r, args->s),
+ arr (args->m, r2, args->s),
+ args->s);
+ r = r2;
+ down (&s, 0);
+ break;
+ }
+
+ R_NAME (sift) (args, r, s);
+}
+
+static void
+R_NAME (semitrinkle) (const struct R_NAME (args_t) *args, size_t r,
+ struct state_t s)
+{
+ size_t r1 = r - s.c;
+ if (R_CMP (args, arr (args->m, r, args->s), arr (args->m, r1, args->s)) < 0)
+ {
+ args->swap (arr (args->m, r, args->s),
+ arr (args->m, r1, args->s),
+ args->s);
+ R_NAME (trinkle) (args, r1, s);
+ }
+}
+
+/* Construct the implicit Leonardo heap based on state S and argumetn ARGS.
+ The state is expected to set Q to 1 (initial state), N to total number
+ of elements and R to 0. */
+static inline void
+R_NAME (buildtree) (const struct R_NAME (args_t) *args, struct state_t *s)
+{
+ for (; s->q != s->n; s->q++, s->r++, incr (s->p))
+ {
+ if ((s->p[0] & 7) == 3)
+ {
+ R_NAME (sift) (args, s->r, *s);
+ up (s);
+ up (s);
+ }
+ else /* if ((s.p[0] & 3) == 1) */
+ {
+ if (s->q + s->c < s->n)
+ R_NAME (sift) (args, s->r, *s);
+ else
+ R_NAME (trinkle) (args, s->r, *s);
+ while (down (s, 0) > 1);
+ }
+ }
+}
+
+static inline void
+R_NAME (buildsorted) (const struct R_NAME (args_t) *args, struct state_t *s)
+{
+ for (; s->q > 1; s->q--)
+ {
+ decr (s->p);
+ if (s->b <= 1)
+ {
+ while ((s->p[0] & 1) == 0)
+ up (s);
+ --s->r;
+ }
+ else /* if s.b >= 3 */
+ {
+ if (s->p[0])
+ R_NAME (semitrinkle) (args, s->r - (s->b - s->c), *s);
+ down (s, 1);
+ R_NAME (semitrinkle) (args, --s->r, *s);
+ down (s, 1);
+ }
+ }
+}
+
+#undef R_NAME
+#undef R_CMP
+#undef R_CMP_TYPE
+#undef R_VERSION