diff options
Diffstat (limited to 'src/util/fast_idiv_by_const.h')
-rw-r--r-- | src/util/fast_idiv_by_const.h | 137 |
1 files changed, 137 insertions, 0 deletions
diff --git a/src/util/fast_idiv_by_const.h b/src/util/fast_idiv_by_const.h new file mode 100644 index 00000000000..ac10cf79ba8 --- /dev/null +++ b/src/util/fast_idiv_by_const.h @@ -0,0 +1,137 @@ +/* + * Copyright © 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef FAST_IDIV_BY_CONST_H +#define FAST_IDIV_BY_CONST_H + +/* Imported from: + * https://raw.githubusercontent.com/ridiculousfish/libdivide/master/divide_by_constants_codegen_reference.c + */ + +#include <inttypes.h> +#include <limits.h> +#include <assert.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* You can set these to different types to get different precision. */ +typedef int32_t sint_t; +typedef uint32_t uint_t; + +/* Computes "magic info" for performing signed division by a fixed integer D. + * The type 'sint_t' is assumed to be defined as a signed integer type large + * enough to hold both the dividend and the divisor. + * Here >> is arithmetic (signed) shift, and >>> is logical shift. + * + * To emit code for n/d, rounding towards zero, use the following sequence: + * + * m = compute_signed_magic_info(D) + * emit("result = (m.multiplier * n) >> SINT_BITS"); + * if d > 0 and m.multiplier < 0: emit("result += n") + * if d < 0 and m.multiplier > 0: emit("result -= n") + * if m.post_shift > 0: emit("result >>= m.shift") + * emit("result += (result < 0)") + * + * The shifts by SINT_BITS may be "free" if the high half of the full multiply + * is put in a separate register. + * + * The final add can of course be implemented via the sign bit, e.g. + * result += (result >>> (SINT_BITS - 1)) + * or + * result -= (result >> (SINT_BITS - 1)) + * + * This code is heavily indebted to Hacker's Delight by Henry Warren. + * See http://www.hackersdelight.org/HDcode/magic.c.txt + * Used with permission from http://www.hackersdelight.org/permissions.htm + */ + +struct util_fast_sdiv_info { + sint_t multiplier; /* the "magic number" multiplier */ + unsigned shift; /* shift for the dividend after multiplying */ +}; + +struct util_fast_sdiv_info +util_compute_fast_sdiv_info(sint_t D); + +/* Computes "magic info" for performing unsigned division by a fixed positive + * integer D. The type 'uint_t' is assumed to be defined as an unsigned + * integer type large enough to hold both the dividend and the divisor. + * num_bits can be set appropriately if n is known to be smaller than + * the largest uint_t; if this is not known then pass + * "(sizeof(uint_t) * CHAR_BIT)" for num_bits. + * + * Assume we have a hardware register of width UINT_BITS, a known constant D + * which is not zero and not a power of 2, and a variable n of width num_bits + * (which may be up to UINT_BITS). To emit code for n/d, use one of the two + * following sequences (here >>> refers to a logical bitshift): + * + * m = compute_unsigned_magic_info(D, num_bits) + * if m.pre_shift > 0: emit("n >>>= m.pre_shift") + * if m.increment: emit("n = saturated_increment(n)") + * emit("result = (m.multiplier * n) >>> UINT_BITS") + * if m.post_shift > 0: emit("result >>>= m.post_shift") + * + * or + * + * m = compute_unsigned_magic_info(D, num_bits) + * if m.pre_shift > 0: emit("n >>>= m.pre_shift") + * emit("result = m.multiplier * n") + * if m.increment: emit("result = result + m.multiplier") + * emit("result >>>= UINT_BITS") + * if m.post_shift > 0: emit("result >>>= m.post_shift") + * + * The shifts by UINT_BITS may be "free" if the high half of the full multiply + * is put in a separate register. + * + * saturated_increment(n) means "increment n unless it would wrap to 0," i.e. + * if n == (1 << UINT_BITS)-1: result = n + * else: result = n+1 + * A common way to implement this is with the carry bit. For example, on x86: + * add 1 + * sbb 0 + * + * Some invariants: + * 1: At least one of pre_shift and increment is zero + * 2: multiplier is never zero + * + * This code incorporates the "round down" optimization per ridiculous_fish. + */ + +struct util_fast_udiv_info { + uint_t multiplier; /* the "magic number" multiplier */ + unsigned pre_shift; /* shift for the dividend before multiplying */ + unsigned post_shift; /* shift for the dividend after multiplying */ + int increment; /* 0 or 1; if set then increment the numerator, using one of + the two strategies */ +}; + +struct util_fast_udiv_info +util_compute_fast_udiv_info(uint_t D, unsigned num_bits); + +#ifdef __cplusplus +} /* extern C */ +#endif + +#endif /* FAST_IDIV_BY_CONST_H */ |