diff options
Diffstat (limited to 'stdlib')
-rw-r--r-- | stdlib/Makefile | 3 | ||||
-rw-r--r-- | stdlib/msort.c | 310 | ||||
-rw-r--r-- | stdlib/qsort.c | 428 | ||||
-rw-r--r-- | stdlib/qsort_common.c | 169 |
4 files changed, 379 insertions, 531 deletions
diff --git a/stdlib/Makefile b/stdlib/Makefile index 6ef20a70bd..a570c767b1 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -34,7 +34,7 @@ headers := stdlib.h bits/stdlib.h bits/stdlib-ldbl.h bits/stdlib-float.h \ routines := \ atof atoi atol atoll \ abort \ - bsearch qsort msort \ + bsearch qsort \ getenv putenv setenv secure-getenv \ exit on_exit atexit cxa_atexit cxa_finalize old_atexit \ quick_exit at_quick_exit cxa_at_quick_exit cxa_thread_atexit_impl \ @@ -136,7 +136,6 @@ extra-test-objs += tst-putenvmod.os generated += isomac isomac.out tst-putenvmod.so CFLAGS-bsearch.c += $(uses-callbacks) -CFLAGS-msort.c += $(uses-callbacks) CFLAGS-qsort.c += $(uses-callbacks) CFLAGS-system.c += -fexceptions CFLAGS-system.os = -fomit-frame-pointer diff --git a/stdlib/msort.c b/stdlib/msort.c deleted file mode 100644 index 266c2538c0..0000000000 --- a/stdlib/msort.c +++ /dev/null @@ -1,310 +0,0 @@ -/* An alternative to qsort, with an identical interface. - This file is part of the GNU C Library. - Copyright (C) 1992-2018 Free Software Foundation, Inc. - Written by Mike Haertel, September 1988. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <alloca.h> -#include <stdint.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <memcopy.h> -#include <errno.h> -#include <atomic.h> - -struct msort_param -{ - size_t s; - size_t var; - __compar_d_fn_t cmp; - void *arg; - char *t; -}; -static void msort_with_tmp (const struct msort_param *p, void *b, size_t n); - -static void -msort_with_tmp (const struct msort_param *p, void *b, size_t n) -{ - char *b1, *b2; - size_t n1, n2; - - if (n <= 1) - return; - - n1 = n / 2; - n2 = n - n1; - b1 = b; - b2 = (char *) b + (n1 * p->s); - - msort_with_tmp (p, b1, n1); - msort_with_tmp (p, b2, n2); - - char *tmp = p->t; - const size_t s = p->s; - __compar_d_fn_t cmp = p->cmp; - void *arg = p->arg; - switch (p->var) - { - case 0: - while (n1 > 0 && n2 > 0) - { - if ((*cmp) (b1, b2, arg) <= 0) - { - *(uint32_t *) tmp = *(uint32_t *) b1; - b1 += sizeof (uint32_t); - --n1; - } - else - { - *(uint32_t *) tmp = *(uint32_t *) b2; - b2 += sizeof (uint32_t); - --n2; - } - tmp += sizeof (uint32_t); - } - break; - case 1: - while (n1 > 0 && n2 > 0) - { - if ((*cmp) (b1, b2, arg) <= 0) - { - *(uint64_t *) tmp = *(uint64_t *) b1; - b1 += sizeof (uint64_t); - --n1; - } - else - { - *(uint64_t *) tmp = *(uint64_t *) b2; - b2 += sizeof (uint64_t); - --n2; - } - tmp += sizeof (uint64_t); - } - break; - case 2: - while (n1 > 0 && n2 > 0) - { - unsigned long *tmpl = (unsigned long *) tmp; - unsigned long *bl; - - tmp += s; - if ((*cmp) (b1, b2, arg) <= 0) - { - bl = (unsigned long *) b1; - b1 += s; - --n1; - } - else - { - bl = (unsigned long *) b2; - b2 += s; - --n2; - } - while (tmpl < (unsigned long *) tmp) - *tmpl++ = *bl++; - } - break; - case 3: - while (n1 > 0 && n2 > 0) - { - if ((*cmp) (*(const void **) b1, *(const void **) b2, arg) <= 0) - { - *(void **) tmp = *(void **) b1; - b1 += sizeof (void *); - --n1; - } - else - { - *(void **) tmp = *(void **) b2; - b2 += sizeof (void *); - --n2; - } - tmp += sizeof (void *); - } - break; - default: - while (n1 > 0 && n2 > 0) - { - if ((*cmp) (b1, b2, arg) <= 0) - { - tmp = (char *) __mempcpy (tmp, b1, s); - b1 += s; - --n1; - } - else - { - tmp = (char *) __mempcpy (tmp, b2, s); - b2 += s; - --n2; - } - } - break; - } - - if (n1 > 0) - memcpy (tmp, b1, n1 * s); - memcpy (b, p->t, (n - n2) * s); -} - - -void -__qsort_r (void *b, size_t n, size_t s, __compar_d_fn_t cmp, void *arg) -{ - size_t size = n * s; - char *tmp = NULL; - struct msort_param p; - - /* For large object sizes use indirect sorting. */ - if (s > 32) - size = 2 * n * sizeof (void *) + s; - - if (size < 1024) - /* The temporary array is small, so put it on the stack. */ - p.t = __alloca (size); - else - { - /* We should avoid allocating too much memory since this might - have to be backed up by swap space. */ - static long int phys_pages; - static int pagesize; - - if (pagesize == 0) - { - phys_pages = __sysconf (_SC_PHYS_PAGES); - - if (phys_pages == -1) - /* Error while determining the memory size. So let's - assume there is enough memory. Otherwise the - implementer should provide a complete implementation of - the `sysconf' function. */ - phys_pages = (long int) (~0ul >> 1); - - /* The following determines that we will never use more than - a quarter of the physical memory. */ - phys_pages /= 4; - - /* Make sure phys_pages is written to memory. */ - atomic_write_barrier (); - - pagesize = __sysconf (_SC_PAGESIZE); - } - - /* Just a comment here. We cannot compute - phys_pages * pagesize - and compare the needed amount of memory against this value. - The problem is that some systems might have more physical - memory then can be represented with a `size_t' value (when - measured in bytes. */ - - /* If the memory requirements are too high don't allocate memory. */ - if (size / pagesize > (size_t) phys_pages) - { - _quicksort (b, n, s, cmp, arg); - return; - } - - /* It's somewhat large, so malloc it. */ - int save = errno; - tmp = malloc (size); - __set_errno (save); - if (tmp == NULL) - { - /* Couldn't get space, so use the slower algorithm - that doesn't need a temporary array. */ - _quicksort (b, n, s, cmp, arg); - return; - } - p.t = tmp; - } - - p.s = s; - p.var = 4; - p.cmp = cmp; - p.arg = arg; - - if (s > 32) - { - /* Indirect sorting. */ - char *ip = (char *) b; - void **tp = (void **) (p.t + n * sizeof (void *)); - void **t = tp; - void *tmp_storage = (void *) (tp + n); - - while ((void *) t < tmp_storage) - { - *t++ = ip; - ip += s; - } - p.s = sizeof (void *); - p.var = 3; - msort_with_tmp (&p, p.t + n * sizeof (void *), n); - - /* tp[0] .. tp[n - 1] is now sorted, copy around entries of - the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */ - char *kp; - size_t i; - for (i = 0, ip = (char *) b; i < n; i++, ip += s) - if ((kp = tp[i]) != ip) - { - size_t j = i; - char *jp = ip; - memcpy (tmp_storage, ip, s); - - do - { - size_t k = (kp - (char *) b) / s; - tp[j] = jp; - memcpy (jp, kp, s); - j = k; - jp = kp; - kp = tp[k]; - } - while (kp != ip); - - tp[j] = jp; - memcpy (jp, tmp_storage, s); - } - } - else - { - if ((s & (sizeof (uint32_t) - 1)) == 0 - && ((char *) b - (char *) 0) % __alignof__ (uint32_t) == 0) - { - if (s == sizeof (uint32_t)) - p.var = 0; - else if (s == sizeof (uint64_t) - && ((char *) b - (char *) 0) % __alignof__ (uint64_t) == 0) - p.var = 1; - else if ((s & (sizeof (unsigned long) - 1)) == 0 - && ((char *) b - (char *) 0) - % __alignof__ (unsigned long) == 0) - p.var = 2; - } - msort_with_tmp (&p, b, n); - } - free (tmp); -} -libc_hidden_def (__qsort_r) -weak_alias (__qsort_r, qsort_r) - - -void -qsort (void *b, size_t n, size_t s, __compar_fn_t cmp) -{ - return __qsort_r (b, n, s, (__compar_d_fn_t) cmp, NULL); -} -libc_hidden_def (qsort) diff --git a/stdlib/qsort.c b/stdlib/qsort.c index 264a06b8a9..f14b469989 100644 --- a/stdlib/qsort.c +++ b/stdlib/qsort.c @@ -1,10 +1,10 @@ -/* Copyright (C) 1991-2018 Free Software Foundation, Inc. +/* qsort, sort an array. This file is part of the GNU C Library. - Written by Douglas C. Schmidt (schmidt@ics.uci.edu). + Copyright (C) 2018 Free Software Foundation, Inc. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either + GLicense as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, @@ -16,234 +16,224 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -/* If you consider tuning this algorithm, you should consult first: - Engineering a sort function; Jon Bentley and M. Douglas McIlroy; - Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */ +/* The qsort{_r} implementation is based on Smoothsort by Dijkstra [1] + with the optimization to used O(1) extra space (implicit Leonardo + Heaps). A extensive analysis on algorithm details was written by + Keith Schwarz [2]. This implementation is based on paper [3]. + + The Smoothsort is used because: + + 1. follows the comparison sort with an average-case lower bound of + O(n * log (n)). + + 2. It uses O(1) extra memory (the implicit Leonardo heap plus stack + function usage). It allows the function to be as-safe and also + to avoid extra latency (due a possible malloc or extra function + calls on a mergesort for instance). + + 3. It has a the advantage over heapsort to approximate of O(n) for + inputs already sorted in some degree. + + [1] http://www.cs.utexas.edu/users/EWD/ewd07xx/EWD796a.PDF + [2] http://www.keithschwarz.com/smoothsort/ + [3] http://www.enterag.ch/hartwig/order/smoothsort.pdf + */ #include <alloca.h> #include <limits.h> #include <stdlib.h> #include <string.h> +#include <stdbool.h> -/* Byte-wise swap two items of size SIZE. */ -#define SWAP(a, b, size) \ - do \ - { \ - size_t __size = (size); \ - char *__a = (a), *__b = (b); \ - do \ - { \ - char __tmp = *__a; \ - *__a++ = *__b; \ - *__b++ = __tmp; \ - } while (--__size > 0); \ - } while (0) - -/* Discontinue quicksort algorithm when partition gets below this size. - This particular magic number was chosen to work best on a Sun 4/260. */ -#define MAX_THRESH 4 - -/* Stack node declarations used to store unfulfilled partition obligations. */ -typedef struct - { - char *lo; - char *hi; - } stack_node; - -/* The next 4 #defines implement a very fast in-line stack abstraction. */ -/* The stack needs log (total_elements) entries (we could even subtract - log(MAX_THRESH)). Since total_elements has type size_t, we get as - upper bound for log (total_elements): - bits per byte (CHAR_BIT) * sizeof(size_t). */ -#define STACK_SIZE (CHAR_BIT * sizeof(size_t)) -#define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top)) -#define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi))) -#define STACK_NOT_EMPTY (stack < top) - - -/* Order size using quicksort. This implementation incorporates - four optimizations discussed in Sedgewick: - - 1. Non-recursive, using an explicit stack of pointer that store the - next array partition to sort. To save time, this maximum amount - of space required to store an array of SIZE_MAX is allocated on the - stack. Assuming a 32-bit (64 bit) integer for size_t, this needs - only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes). - Pretty cheap, actually. - - 2. Chose the pivot element using a median-of-three decision tree. - This reduces the probability of selecting a bad pivot value and - eliminates certain extraneous comparisons. - - 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving - insertion sort to order the MAX_THRESH items within each partition. - This is a big win, since insertion sort is faster for small, mostly - sorted array segments. - - 4. The larger of the two sub-partitions is always pushed onto the - stack first, with the algorithm then concentrating on the - smaller partition. This *guarantees* no more than log (total_elems) - stack size is needed (actually O(1) in this case)! */ +/* Helper to get the address of BASE access as an array of elements of + size SIZE. */ +static inline void * +arr (void *base, size_t idx, size_t size) +{ + return (void*)((uintptr_t)base + (idx * size)); +} -void -_quicksort (void *const pbase, size_t total_elems, size_t size, - __compar_d_fn_t cmp, void *arg) +/* Bitvector to encode the used Leonardo Heaps. We need at most 1.7i bits + to encode all the Leonardo numbers that can fit in a machine with word + of size 2^i. */ +typedef size_t lnbitvector_t[2]; + +static inline void +incr (lnbitvector_t p) { - char *base_ptr = (char *) pbase; + size_t r = p[0] + 1; + p[1] += ((p[0] ^ r) & p[0]) >> (sizeof(size_t) * 8 - 1); + p[0] = r; +} - const size_t max_thresh = MAX_THRESH * size; +static inline void +decr (lnbitvector_t p) +{ + size_t r = p[0] - 1; + p[1] -= ((r ^ p[0]) & r) >> (sizeof(size_t) * 8 - 1); + p[0] = r; +} - if (total_elems == 0) - /* Avoid lossage with unsigned arithmetic below. */ - return; +static inline void +shr (lnbitvector_t p) +{ + p[0] >>= 1; + p[0] |= p[1] << (sizeof(size_t) * 8 - 1); + p[1] >>= 1; +} + +static inline void +shl (lnbitvector_t p) +{ + p[1] <<= 1; + p[1] |= p[0] >> (sizeof(size_t) * 8 - 1); + p[0] <<= 1; +} - if (total_elems > MAX_THRESH) + +/* Swap SIZE bytes between addresses A and B. Helper to generic types + are provided as an optimization. */ + +typedef void (*swap_t)(void *, void *, size_t); + +static inline bool +check_alignment (const void *base, size_t align) +{ + return _STRING_ARCH_unaligned || ((uintptr_t)base % (align - 1)) == 0; +} + +static void +swap_u32 (void *a, void *b, size_t size) +{ + uint32_t tmp = *(uint32_t*) a; + *(uint32_t*) a = *(uint32_t*) b; + *(uint32_t*) b = tmp; +} + +static void +swap_u64 (void *a, void *b, size_t size) +{ + uint64_t tmp = *(uint64_t*) a; + *(uint64_t*) a = *(uint64_t*) b; + *(uint64_t*) b = tmp; +} + +static inline void +swap_generic (void *a, void *b, size_t size) +{ + unsigned char tmp[256]; + do { - char *lo = base_ptr; - char *hi = &lo[size * (total_elems - 1)]; - stack_node stack[STACK_SIZE]; - stack_node *top = stack; - - PUSH (NULL, NULL); - - while (STACK_NOT_EMPTY) - { - char *left_ptr; - char *right_ptr; - - /* Select median value from among LO, MID, and HI. Rearrange - LO and HI so the three values are sorted. This lowers the - probability of picking a pathological pivot value and - skips a comparison for both the LEFT_PTR and RIGHT_PTR in - the while loops. */ - - char *mid = lo + size * ((hi - lo) / size >> 1); - - if ((*cmp) ((void *) mid, (void *) lo, arg) < 0) - SWAP (mid, lo, size); - if ((*cmp) ((void *) hi, (void *) mid, arg) < 0) - SWAP (mid, hi, size); - else - goto jump_over; - if ((*cmp) ((void *) mid, (void *) lo, arg) < 0) - SWAP (mid, lo, size); - jump_over:; - - left_ptr = lo + size; - right_ptr = hi - size; - - /* Here's the famous ``collapse the walls'' section of quicksort. - Gotta like those tight inner loops! They are the main reason - that this algorithm runs much faster than others. */ - do - { - while ((*cmp) ((void *) left_ptr, (void *) mid, arg) < 0) - left_ptr += size; - - while ((*cmp) ((void *) mid, (void *) right_ptr, arg) < 0) - right_ptr -= size; - - if (left_ptr < right_ptr) - { - SWAP (left_ptr, right_ptr, size); - if (mid == left_ptr) - mid = right_ptr; - else if (mid == right_ptr) - mid = left_ptr; - left_ptr += size; - right_ptr -= size; - } - else if (left_ptr == right_ptr) - { - left_ptr += size; - right_ptr -= size; - break; - } - } - while (left_ptr <= right_ptr); - - /* Set up pointers for next iteration. First determine whether - left and right partitions are below the threshold size. If so, - ignore one or both. Otherwise, push the larger partition's - bounds on the stack and continue sorting the smaller one. */ - - if ((size_t) (right_ptr - lo) <= max_thresh) - { - if ((size_t) (hi - left_ptr) <= max_thresh) - /* Ignore both small partitions. */ - POP (lo, hi); - else - /* Ignore small left partition. */ - lo = left_ptr; - } - else if ((size_t) (hi - left_ptr) <= max_thresh) - /* Ignore small right partition. */ - hi = right_ptr; - else if ((right_ptr - lo) > (hi - left_ptr)) - { - /* Push larger left partition indices. */ - PUSH (lo, right_ptr); - lo = left_ptr; - } - else - { - /* Push larger right partition indices. */ - PUSH (left_ptr, hi); - hi = right_ptr; - } - } + size_t s = size > sizeof (tmp) ? sizeof (tmp) : size; + memcpy (tmp, a, s); + a = __mempcpy (a, b, s); + b = __mempcpy (b, tmp, s); + size -= s; } + while (size > 0); +} - /* Once the BASE_PTR array is partially sorted by quicksort the rest - is completely sorted using insertion sort, since this is efficient - for partitions below MAX_THRESH size. BASE_PTR points to the beginning - of the array to sort, and END_PTR points at the very last element in - the array (*not* one beyond it!). */ - -#define min(x, y) ((x) < (y) ? (x) : (y)) - - { - char *const end_ptr = &base_ptr[size * (total_elems - 1)]; - char *tmp_ptr = base_ptr; - char *thresh = min(end_ptr, base_ptr + max_thresh); - char *run_ptr; - - /* Find smallest element in first threshold and place it at the - array's beginning. This is the smallest array element, - and the operation speeds up insertion sort's inner loop. */ - - for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size) - if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0) - tmp_ptr = run_ptr; - - if (tmp_ptr != base_ptr) - SWAP (tmp_ptr, base_ptr, size); - - /* Insertion sort, running from left-hand-side up to right-hand-side. */ - - run_ptr = base_ptr + size; - while ((run_ptr += size) <= end_ptr) - { - tmp_ptr = run_ptr - size; - while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0) - tmp_ptr -= size; - - tmp_ptr += size; - if (tmp_ptr != run_ptr) - { - char *trav; - - trav = run_ptr + size; - while (--trav >= run_ptr) - { - char c = *trav; - char *hi, *lo; - - for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo) - *hi = *lo; - *hi = c; - } - } - } - } +static inline swap_t +select_swap_func (const void *base, size_t size) +{ + if (size == 4 && check_alignment (base, 4)) + return swap_u32; + else if (size == 8 && check_alignment (base, 8)) + return swap_u64; + return swap_generic; } + + +struct state_t +{ + size_t q; /* Length of the unsorted prefix (1 <= q <= n). */ + size_t n; /* Total number of elements. */ + size_t r; + size_t b; /* Leonardo number. */ + size_t c; /* Next Leonardo number. */ + lnbitvector_t p; /* Bitvector to track the Leonardo heaps. */ +}; + +static inline size_t +up (struct state_t *s) +{ + shr (s->p); + + size_t next = s->b + s->c + 1; + s->c = s->b; + s->b = next; + return next; +} + +static inline size_t +down (struct state_t *s, size_t bit) +{ + shl (s->p); + + s->p[0] |= bit; + + size_t prev = s->c; + s->c = s->b - s->c - 1; + s->b = prev; + return prev; +} + +/* Invariant arguments used generic functions. */ +struct args_t +{ + void *m; /* Base pointer to array. */ + size_t s; /* Element size. */ + __compar_fn_t cmp; /* Comparison function. */ + swap_t swap; /* Swap function. */ +}; + +#include "qsort_common.c" + +void +qsort (void *base, size_t nmemb, size_t size, __compar_fn_t cmp) +{ + struct args_t args = { base, size, cmp, select_swap_func (base, size) }; + struct state_t s = { 1, nmemb, 0, 1, 1, { 1, 0 } }; + + if (nmemb <= 0) + return; + + buildtree (&args, &s); + + trinkle (&args, s.r, s); + + buildsorted (&args, &s); +} +libc_hidden_def (qsort) + +struct args_t_r +{ + void *m; + size_t s; + __compar_d_fn_t cmp; + void *a; + swap_t swap; +}; + +#define R_VERSION +#include "qsort_common.c" + +void +__qsort_r (void *base, size_t nmemb, size_t size, __compar_d_fn_t cmp, + void *arg) +{ + struct args_t_r args = { base, size, cmp, arg, select_swap_func (base, size) }; + struct state_t s = { 1, nmemb, 0, 1, 1, { 1, 0 } }; + + if (nmemb <= 1) + return; + + buildtree_r (&args, &s); + + trinkle_r (&args, s.r, s); + + buildsorted_r (&args, &s); +} + +libc_hidden_def (__qsort_r) +weak_alias (__qsort_r, qsort_r) diff --git a/stdlib/qsort_common.c b/stdlib/qsort_common.c new file mode 100644 index 0000000000..20cae9b784 --- /dev/null +++ b/stdlib/qsort_common.c @@ -0,0 +1,169 @@ +/* Common implementation for qsort and qsort_r. + This file is part of the GNU C Library. + Copyright (C) 2017 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + GLicense as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef R_VERSION +# define R_NAME(func) func ## _r +# define R_CMP_TYPE __compar_fn_t +# define R_CMP(args, p1, p2) (args)->cmp (p1, p2, args->a) +#else +# define R_NAME(func) func +# define R_CMP_TYPE __compar_d_fn_t +# define R_CMP(args, p1, p2) (args)->cmp (p1, p2) +#endif + +static void +R_NAME (sift) (const struct R_NAME(args_t) *args, size_t r, struct state_t s) +{ + while (s.b >= 3) + { + size_t r2 = r - s.b + s.c; + if (R_CMP (args, arr (args->m, r - 1, args->s), + arr (args->m, r2, args->s)) >= 0) + { + r2 = r - 1; + down (&s, 0); + } + if (R_CMP (args, arr (args->m, r2, args->s), + arr (args->m, r, args->s)) < 0) + break; + args->swap (arr (args->m, r, args->s), + arr (args->m, r2, args->s), + args->s); + r = r2; + down (&s, 0); + } +} + +static void +R_NAME (trinkle) (const struct R_NAME(args_t) *args, size_t r, + struct state_t s) +{ + while (s.p[0] != 0) + { + while ((s.p[0] & 1) == 0) + up (&s); + + if (s.p[0] == 1) + break; + size_t r3 = r - s.b; + if (R_CMP (args, arr (args->m, r3, args->s), + arr (args->m, r, args->s)) < 0) + break; + decr (s.p); + if (s.b < 3) + { + args->swap (arr (args->m, r, args->s), + arr (args->m, r3, args->s), args->s); + r = r3; + continue; + } + size_t r2 = r - s.b + s.c; + if (R_CMP (args, arr (args->m, r2, args->s), + arr (args->m, r - 1, args->s)) < 0) + { + r2 = r - 1; + down (&s, 0); + } + + if (R_CMP (args, arr (args->m, r2, args->s), + arr (args->m, r3, args->s)) < 0) + { + args->swap (arr (args->m, r, args->s), + arr (args->m, r3, args->s), + args->s); + r = r3; + continue; + } + + args->swap (arr (args->m, r, args->s), + arr (args->m, r2, args->s), + args->s); + r = r2; + down (&s, 0); + break; + } + + R_NAME (sift) (args, r, s); +} + +static void +R_NAME (semitrinkle) (const struct R_NAME (args_t) *args, size_t r, + struct state_t s) +{ + size_t r1 = r - s.c; + if (R_CMP (args, arr (args->m, r, args->s), arr (args->m, r1, args->s)) < 0) + { + args->swap (arr (args->m, r, args->s), + arr (args->m, r1, args->s), + args->s); + R_NAME (trinkle) (args, r1, s); + } +} + +/* Construct the implicit Leonardo heap based on state S and argumetn ARGS. + The state is expected to set Q to 1 (initial state), N to total number + of elements and R to 0. */ +static inline void +R_NAME (buildtree) (const struct R_NAME (args_t) *args, struct state_t *s) +{ + for (; s->q != s->n; s->q++, s->r++, incr (s->p)) + { + if ((s->p[0] & 7) == 3) + { + R_NAME (sift) (args, s->r, *s); + up (s); + up (s); + } + else /* if ((s.p[0] & 3) == 1) */ + { + if (s->q + s->c < s->n) + R_NAME (sift) (args, s->r, *s); + else + R_NAME (trinkle) (args, s->r, *s); + while (down (s, 0) > 1); + } + } +} + +static inline void +R_NAME (buildsorted) (const struct R_NAME (args_t) *args, struct state_t *s) +{ + for (; s->q > 1; s->q--) + { + decr (s->p); + if (s->b <= 1) + { + while ((s->p[0] & 1) == 0) + up (s); + --s->r; + } + else /* if s.b >= 3 */ + { + if (s->p[0]) + R_NAME (semitrinkle) (args, s->r - (s->b - s->c), *s); + down (s, 1); + R_NAME (semitrinkle) (args, --s->r, *s); + down (s, 1); + } + } +} + +#undef R_NAME +#undef R_CMP +#undef R_CMP_TYPE +#undef R_VERSION |