1 files changed, 137 insertions, 0 deletions
diff --git a/src/util/fast_idiv_by_const.h b/src/util/fast_idiv_by_const.h
new file mode 100644
index 00000000000..ac10cf79ba8
--- /dev/null
+++ b/src/util/fast_idiv_by_const.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright © 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef FAST_IDIV_BY_CONST_H
+#define FAST_IDIV_BY_CONST_H
+
+/* Imported from:
+ *   https://raw.githubusercontent.com/ridiculousfish/libdivide/master/divide_by_constants_codegen_reference.c
+ */
+
+#include <inttypes.h>
+#include <limits.h>
+#include <assert.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* You can set these to different types to get different precision. */
+typedef int32_t sint_t;
+typedef uint32_t uint_t;
+
+/* Computes "magic info" for performing signed division by a fixed integer D.
+ * The type 'sint_t' is assumed to be defined as a signed integer type large
+ * enough to hold both the dividend and the divisor.
+ * Here >> is arithmetic (signed) shift, and >>> is logical shift.
+ *
+ * To emit code for n/d, rounding towards zero, use the following sequence:
+ *
+ *   m = compute_signed_magic_info(D)
+ *   emit("result = (m.multiplier * n) >> SINT_BITS");
+ *   if d > 0 and m.multiplier < 0: emit("result += n")
+ *   if d < 0 and m.multiplier > 0: emit("result -= n")
+ *   if m.post_shift > 0: emit("result >>= m.shift")
+ *   emit("result += (result < 0)")
+ *
+ * The shifts by SINT_BITS may be "free" if the high half of the full multiply
+ * is put in a separate register.
+ *
+ * The final add can of course be implemented via the sign bit, e.g.
+ *    result += (result >>> (SINT_BITS - 1))
+ * or
+ *    result -= (result >> (SINT_BITS - 1))
+ *
+ * This code is heavily indebted to Hacker's Delight by Henry Warren.
+ * See http://www.hackersdelight.org/HDcode/magic.c.txt
+ * Used with permission from http://www.hackersdelight.org/permissions.htm
+ */
+
+struct util_fast_sdiv_info {
+   sint_t multiplier; /* the "magic number" multiplier */
+   unsigned shift; /* shift for the dividend after multiplying */
+};
+
+struct util_fast_sdiv_info
+util_compute_fast_sdiv_info(sint_t D);
+
+/* Computes "magic info" for performing unsigned division by a fixed positive
+ * integer D. The type 'uint_t' is assumed to be defined as an unsigned
+ * integer type large enough to hold both the dividend and the divisor.
+ * num_bits can be set appropriately if n is known to be smaller than
+ * the largest uint_t; if this is not known then pass
+ * "(sizeof(uint_t) * CHAR_BIT)" for num_bits.
+ *
+ * Assume we have a hardware register of width UINT_BITS, a known constant D
+ * which is not zero and not a power of 2, and a variable n of width num_bits
+ * (which may be up to UINT_BITS). To emit code for n/d, use one of the two
+ * following sequences (here >>> refers to a logical bitshift):
+ *
+ *   m = compute_unsigned_magic_info(D, num_bits)
+ *   if m.pre_shift > 0: emit("n >>>= m.pre_shift")
+ *   if m.increment: emit("n = saturated_increment(n)")
+ *   emit("result = (m.multiplier * n) >>> UINT_BITS")
+ *   if m.post_shift > 0: emit("result >>>= m.post_shift")
+ *
+ * or
+ *
+ *   m = compute_unsigned_magic_info(D, num_bits)
+ *   if m.pre_shift > 0: emit("n >>>= m.pre_shift")
+ *   emit("result = m.multiplier * n")
+ *   if m.increment: emit("result = result + m.multiplier")
+ *   emit("result >>>= UINT_BITS")
+ *   if m.post_shift > 0: emit("result >>>= m.post_shift")
+ *
+ * The shifts by UINT_BITS may be "free" if the high half of the full multiply
+ * is put in a separate register.
+ *
+ * saturated_increment(n) means "increment n unless it would wrap to 0," i.e.
+ *   if n == (1 << UINT_BITS)-1: result = n
+ *   else: result = n+1
+ * A common way to implement this is with the carry bit. For example, on x86:
+ *   add 1
+ *   sbb 0
+ *
+ * Some invariants:
+ *   1: At least one of pre_shift and increment is zero
+ *   2: multiplier is never zero
+ *
+ * This code incorporates the "round down" optimization per ridiculous_fish.
+ */
+
+struct util_fast_udiv_info {
+   uint_t multiplier; /* the "magic number" multiplier */
+   unsigned pre_shift; /* shift for the dividend before multiplying */
+   unsigned post_shift; /* shift for the dividend after multiplying */
+   int increment; /* 0 or 1; if set then increment the numerator, using one of
+                     the two strategies */
+};
+
+struct util_fast_udiv_info
+util_compute_fast_udiv_info(uint_t D, unsigned num_bits);
+
+#ifdef __cplusplus
+} /* extern C */
+#endif
+
+#endif /* FAST_IDIV_BY_CONST_H */