diff options
Diffstat (limited to 'Modules/_decimal/libmpdec/transpose.c')
-rw-r--r-- | Modules/_decimal/libmpdec/transpose.c | 276 |
1 files changed, 276 insertions, 0 deletions
diff --git a/Modules/_decimal/libmpdec/transpose.c b/Modules/_decimal/libmpdec/transpose.c new file mode 100644 index 0000000000..55d6d89922 --- /dev/null +++ b/Modules/_decimal/libmpdec/transpose.c @@ -0,0 +1,276 @@ +/* + * Copyright (c) 2008-2016 Stefan Krah. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + +#include "mpdecimal.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <assert.h> +#include "bits.h" +#include "constants.h" +#include "typearith.h" +#include "transpose.h" + + +#define BUFSIZE 4096 +#define SIDE 128 + + +/* Bignum: The transpose functions are used for very large transforms + in sixstep.c and fourstep.c. */ + + +/* Definition of the matrix transpose */ +void +std_trans(mpd_uint_t dest[], mpd_uint_t src[], mpd_size_t rows, mpd_size_t cols) +{ + mpd_size_t idest, isrc; + mpd_size_t r, c; + + for (r = 0; r < rows; r++) { + isrc = r * cols; + idest = r; + for (c = 0; c < cols; c++) { + dest[idest] = src[isrc]; + isrc += 1; + idest += rows; + } + } +} + +/* + * Swap half-rows of 2^n * (2*2^n) matrix. + * FORWARD_CYCLE: even/odd permutation of the halfrows. + * BACKWARD_CYCLE: reverse the even/odd permutation. + */ +static int +swap_halfrows_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols, int dir) +{ + mpd_uint_t buf1[BUFSIZE]; + mpd_uint_t buf2[BUFSIZE]; + mpd_uint_t *readbuf, *writebuf, *hp; + mpd_size_t *done, dbits; + mpd_size_t b = BUFSIZE, stride; + mpd_size_t hn, hmax; /* halfrow number */ + mpd_size_t m, r=0; + mpd_size_t offset; + mpd_size_t next; + + + assert(cols == mul_size_t(2, rows)); + + if (dir == FORWARD_CYCLE) { + r = rows; + } + else if (dir == BACKWARD_CYCLE) { + r = 2; + } + else { + abort(); /* GCOV_NOT_REACHED */ + } + + m = cols - 1; + hmax = rows; /* cycles start at odd halfrows */ + dbits = 8 * sizeof *done; + if ((done = mpd_calloc(hmax/(sizeof *done) + 1, sizeof *done)) == NULL) { + return 0; + } + + for (hn = 1; hn <= hmax; hn += 2) { + + if (done[hn/dbits] & mpd_bits[hn%dbits]) { + continue; + } + + readbuf = buf1; writebuf = buf2; + + for (offset = 0; offset < cols/2; offset += b) { + + stride = (offset + b < cols/2) ? b : cols/2-offset; + + hp = matrix + hn*cols/2; + memcpy(readbuf, hp+offset, stride*(sizeof *readbuf)); + pointerswap(&readbuf, &writebuf); + + next = mulmod_size_t(hn, r, m); + hp = matrix + next*cols/2; + + while (next != hn) { + + memcpy(readbuf, hp+offset, stride*(sizeof *readbuf)); + memcpy(hp+offset, writebuf, stride*(sizeof *writebuf)); + pointerswap(&readbuf, &writebuf); + + done[next/dbits] |= mpd_bits[next%dbits]; + + next = mulmod_size_t(next, r, m); + hp = matrix + next*cols/2; + + } + + memcpy(hp+offset, writebuf, stride*(sizeof *writebuf)); + + done[hn/dbits] |= mpd_bits[hn%dbits]; + } + } + + mpd_free(done); + return 1; +} + +/* In-place transpose of a square matrix */ +static inline void +squaretrans(mpd_uint_t *buf, mpd_size_t cols) +{ + mpd_uint_t tmp; + mpd_size_t idest, isrc; + mpd_size_t r, c; + + for (r = 0; r < cols; r++) { + c = r+1; + isrc = r*cols + c; + idest = c*cols + r; + for (c = r+1; c < cols; c++) { + tmp = buf[isrc]; + buf[isrc] = buf[idest]; + buf[idest] = tmp; + isrc += 1; + idest += cols; + } + } +} + +/* + * Transpose 2^n * 2^n matrix. For cache efficiency, the matrix is split into + * square blocks with side length 'SIDE'. First, the blocks are transposed, + * then a square transposition is done on each individual block. + */ +static void +squaretrans_pow2(mpd_uint_t *matrix, mpd_size_t size) +{ + mpd_uint_t buf1[SIDE*SIDE]; + mpd_uint_t buf2[SIDE*SIDE]; + mpd_uint_t *to, *from; + mpd_size_t b = size; + mpd_size_t r, c; + mpd_size_t i; + + while (b > SIDE) b >>= 1; + + for (r = 0; r < size; r += b) { + + for (c = r; c < size; c += b) { + + from = matrix + r*size + c; + to = buf1; + for (i = 0; i < b; i++) { + memcpy(to, from, b*(sizeof *to)); + from += size; + to += b; + } + squaretrans(buf1, b); + + if (r == c) { + to = matrix + r*size + c; + from = buf1; + for (i = 0; i < b; i++) { + memcpy(to, from, b*(sizeof *to)); + from += b; + to += size; + } + continue; + } + else { + from = matrix + c*size + r; + to = buf2; + for (i = 0; i < b; i++) { + memcpy(to, from, b*(sizeof *to)); + from += size; + to += b; + } + squaretrans(buf2, b); + + to = matrix + c*size + r; + from = buf1; + for (i = 0; i < b; i++) { + memcpy(to, from, b*(sizeof *to)); + from += b; + to += size; + } + + to = matrix + r*size + c; + from = buf2; + for (i = 0; i < b; i++) { + memcpy(to, from, b*(sizeof *to)); + from += b; + to += size; + } + } + } + } + +} + +/* + * In-place transposition of a 2^n x 2^n or a 2^n x (2*2^n) + * or a (2*2^n) x 2^n matrix. + */ +int +transpose_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols) +{ + mpd_size_t size = mul_size_t(rows, cols); + + assert(ispower2(rows)); + assert(ispower2(cols)); + + if (cols == rows) { + squaretrans_pow2(matrix, rows); + } + else if (cols == mul_size_t(2, rows)) { + if (!swap_halfrows_pow2(matrix, rows, cols, FORWARD_CYCLE)) { + return 0; + } + squaretrans_pow2(matrix, rows); + squaretrans_pow2(matrix+(size/2), rows); + } + else if (rows == mul_size_t(2, cols)) { + squaretrans_pow2(matrix, cols); + squaretrans_pow2(matrix+(size/2), cols); + if (!swap_halfrows_pow2(matrix, cols, rows, BACKWARD_CYCLE)) { + return 0; + } + } + else { + abort(); /* GCOV_NOT_REACHED */ + } + + return 1; +} + + |