//===-- Utility class to manipulate wide floats. ----------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "FPBits.h" #include "NormalFloat.h" #include "src/__support/UInt.h" #include namespace __llvm_libc { namespace fputil { // Store and manipulate positive double precision numbers at |Precision| bits. template class XFloat { static constexpr uint64_t OneMask = (uint64_t(1) << 63); UInt man; static constexpr uint64_t WORDCOUNT = Precision / 64; int exp; size_t bit_width(uint64_t x) { if (x == 0) return 0; size_t shift = 0; while ((OneMask & x) == 0) { ++shift; x <<= 1; } return 64 - shift; } public: XFloat() : exp(0) { for (int i = 0; i < WORDCOUNT; ++i) man[i] = 0; } XFloat(const XFloat &other) : exp(other.exp) { for (int i = 0; i < WORDCOUNT; ++i) man[i] = other.man[i]; } explicit XFloat(double x) { auto xn = NormalFloat(x); exp = xn.exponent; man[WORDCOUNT - 1] = xn.mantissa << 11; for (int i = 0; i < WORDCOUNT - 1; ++i) man[i] = 0; } XFloat(int e, const UInt &bits) : exp(e) { for (size_t i = 0; i < WORDCOUNT; ++i) man[i] = bits[i]; } // Multiply this number with x and store the result in this number. void mul(double x) { auto xn = NormalFloat(x); exp += xn.exponent; uint64_t carry = man.mul(xn.mantissa << 11); size_t carry_width = bit_width(carry); carry_width = (carry_width == 64 ? 64 : 63); man.shift_right(carry_width); man[WORDCOUNT - 1] = man[WORDCOUNT - 1] + (carry << (64 - carry_width)); exp += carry_width == 64 ? 1 : 0; normalize(); } void drop_int() { if (exp < 0) return; if (exp > int(Precision - 1)) { for (size_t i = 0; i < WORDCOUNT; ++i) man[i] = 0; return; } man.shift_left(exp + 1); man.shift_right(exp + 1); normalize(); } double mul(const XFloat &other) { constexpr size_t row_words = 2 * WORDCOUNT + 1; constexpr size_t row_precision = row_words * 64; constexpr size_t result_bits = 2 * Precision; UInt rows[WORDCOUNT]; for (size_t r = 0; r < WORDCOUNT; ++r) { for (size_t i = 0; i < row_words; ++i) { if (i < WORDCOUNT) rows[r][i] = man[i]; else rows[r][i] = 0; } rows[r].mul(other.man[r]); rows[r].shift_left(r * 64); } for (size_t r = 1; r < WORDCOUNT; ++r) { rows[0].add(rows[r]); } int result_exp = exp + other.exp; uint64_t carry = rows[0][row_words - 1]; if (carry) { size_t carry_width = bit_width(carry); rows[0].shift_right(carry_width); rows[0][row_words - 2] = rows[0][row_words - 2] + (carry << (64 - carry_width)); result_exp += carry_width; } if (rows[0][row_words - 2] & OneMask) { ++result_exp; } else { rows[0].shift_left(1); } UInt result_man; for (size_t i = 0; i < result_bits / 64; ++i) result_man[i] = rows[0][i]; XFloat result(result_exp, result_man); result.normalize(); return double(result); } explicit operator double() { normalize(); constexpr uint64_t one = uint64_t(1) << 10; constexpr uint64_t excess_mask = (one << 1) - 1; uint64_t excess = man[WORDCOUNT - 1] & excess_mask; uint64_t new_man = man[WORDCOUNT - 1] >> 11; if (excess > one) { // We have to round up. ++new_man; } else if (excess == one) { bool greater_than_one = false; for (size_t i = 0; i < WORDCOUNT - 1; ++i) { greater_than_one = (man[i] != 0); if (greater_than_one) break; } if (greater_than_one || (new_man & 1) != 0) { ++new_man; } } if (new_man == (uint64_t(1) << 53)) ++exp; // We use NormalFloat as it can produce subnormal numbers or underflow to 0 // if necessary. NormalFloat d(exp, new_man, 0); return double(d); } // Normalizes this number. void normalize() { uint64_t man_bits = 0; for (size_t i = 0; i < WORDCOUNT; ++i) man_bits |= man[i]; if (man_bits == 0) return; while ((man[WORDCOUNT - 1] & OneMask) == 0) { man.shift_left(1); --exp; } } }; } // namespace fputil } // namespace __llvm_libc