libstdc++-v3/src/c++17/uint128_t.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297

// A relatively minimal unsigned 128-bit integer class type, used by the
// floating-point std::to_chars implementation on targets that lack __int128.

// Copyright (C) 2021-2023 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library.  This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.

// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.

// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
// <http://www.gnu.org/licenses/>.

struct uint128_t
{
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  uint64_t lo, hi;
#else
  uint64_t hi, lo;
#endif

  uint128_t() = default;

  constexpr
  uint128_t(uint64_t lo, uint64_t hi = 0)
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    : lo(lo), hi(hi)
#else
    : hi(hi), lo(lo)
#endif
  { }

  constexpr explicit
  operator bool() const
  { return *this != 0; }

  template<typename T, typename = std::enable_if_t<std::is_integral_v<T>>>
    constexpr explicit
    operator T() const
    {
      static_assert(sizeof(T) <= sizeof(uint64_t));
      return static_cast<T>(lo);
    }

  friend constexpr uint128_t
  operator&(uint128_t x, const uint128_t y)
  {
    x.lo &= y.lo;
    x.hi &= y.hi;
    return x;
  }

  friend constexpr uint128_t
  operator|(uint128_t x, const uint128_t y)
  {
    x.lo |= y.lo;
    x.hi |= y.hi;
    return x;
  }

  friend constexpr uint128_t
  operator<<(uint128_t x, const uint128_t y)
  {
    __glibcxx_assert(y < 128);
    // TODO: Convince GCC to use shldq on x86 here.
    if (y.lo >= 64)
      {
	x.hi = x.lo << (y.lo - 64);
	x.lo = 0;
      }
    else if (y.lo != 0)
      {
	x.hi <<= y.lo;
	x.hi |= x.lo >> (64 - y.lo);
	x.lo <<= y.lo;
      }
    return x;
  }

  friend constexpr uint128_t
  operator>>(uint128_t x, const uint128_t y)
  {
    __glibcxx_assert(y < 128);
    // TODO: Convince GCC to use shrdq on x86 here.
    if (y.lo >= 64)
      {
	x.lo = x.hi >> (y.lo - 64);
	x.hi = 0;
      }
    else if (y.lo != 0)
      {
	x.lo >>= y.lo;
	x.lo |= x.hi << (64 - y.lo);
	x.hi >>= y.lo;
      }
    return x;
  }

  constexpr uint128_t
  operator~() const
  { return {~lo, ~hi}; }

  constexpr uint128_t
  operator-() const
  { return operator~() + 1; }

  friend constexpr uint128_t
  operator+(uint128_t x, const uint128_t y)
  {
    x.hi += __builtin_add_overflow(x.lo, y.lo, &x.lo);
    x.hi += y.hi;
    return x;
  }

  friend constexpr uint128_t
  operator-(uint128_t x, const uint128_t y)
  {
    x.hi -= __builtin_sub_overflow(x.lo, y.lo, &x.lo);
    x.hi -= y.hi;
    return x;
  }

  static constexpr uint128_t
  umul64_64_128(const uint64_t x, const uint64_t y)
  {
    const uint64_t xl = x & 0xffffffff;
    const uint64_t xh = x >> 32;
    const uint64_t yl = y & 0xffffffff;
    const uint64_t yh = y >> 32;
    const uint64_t ll = xl * yl;
    const uint64_t lh = xl * yh;
    const uint64_t hl = xh * yl;
    const uint64_t hh = xh * yh;
    const uint64_t m = (ll >> 32) + lh + (hl & 0xffffffff);
    const uint64_t l = (ll & 0xffffffff ) | (m << 32);
    const uint64_t h = (m >> 32) + (hl >> 32) + hh;
    return {l, h};
  }

  friend constexpr uint128_t
  operator*(const uint128_t x, const uint128_t y)
  {
    uint128_t z = umul64_64_128(x.lo, y.lo);
    z.hi += x.lo * y.hi + x.hi * y.lo;
    return z;
  }

  friend constexpr uint128_t
  operator/(const uint128_t x, const uint128_t y)
  {
    // Ryu performs 128-bit division only by 5 and 10, so that's what we
    // implement.  The strategy here is to relate division of x with that of
    // x.hi and x.lo separately.
    __glibcxx_assert(y == 5 || y == 10);
    // The following implements division by 5 and 10.  In either case, we
    // first compute division by 5:
    //   x/5 = (x.hi*2^64 + x.lo)/5
    //       = (x.hi*(2^64-1) + x.hi + x.lo)/5
    //       = x.hi*((2^64-1)/5) + (x.hi + x.lo)/5 since CST=(2^64-1)/5 is exact
    //       = x.hi*CST + x.hi/5 + x.lo/5 + ((x.lo%5) + (x.hi%5) >= 5)
    // We go a step further and replace the last adjustment term with a
    // lookup table, which we encode as a binary literal.  This seems to
    // yield smaller code on x86 at least.
    constexpr auto cst = ~uint64_t(0) / 5;
    uint128_t q = uint128_t{x.hi}*cst + uint128_t{x.hi/5 + x.lo/5};
    constexpr auto lookup = 0b111100000u;
    q += (lookup >> ((x.hi % 5) + (x.lo % 5))) & 1;
    if (y == 10)
      q >>= 1;
    return q;
  }

  friend constexpr uint128_t
  operator%(const uint128_t x, const uint128_t y)
  {
    // Ryu performs 128-bit modulus only by 2, 5 and 10, so that's what we
    // implement.  The strategy here is to relate modulus of x with that of
    // x.hi and x.lo separately.
    if (y == 2)
      return x & 1;
    __glibcxx_assert(y == 5 || y == 10);
    // The following implements modulus by 5 and 10.  In either case,
    // we first compute modulus by 5:
    //   x (mod 5) = x.hi*2^64 + x.lo (mod 5)
    //             = x.hi + x.lo (mod 5) since 2^64 ≡ 1 (mod 5)
    // So the straightforward implementation would be
    //   ((x.hi % 5) + (x.lo % 5)) % 5
    // But we go a step further and replace the outermost % with a
    // lookup table:
    //             = {0,1,2,3,4,0,1,2,3}[(x.hi % 5) + (x.lo % 5)] (mod 5)
    // which we encode as an octal literal.
    constexpr auto lookup = 0321043210u;
    auto r = (lookup >> 3*((x.hi % 5) + (x.lo % 5))) & 7;
    if (y == 10)
      // x % 10 = (x % 5)      if x / 5 is even
      //          (x % 5) + 5  if x / 5 is odd
      // The compiler should be able to CSE the below computation of x/5 and
      // the above modulus operations with a nearby inlined computation of x/10.
      r += 5 * ((x/5).lo & 1);
    return r;
  }

  friend constexpr bool
  operator==(const uint128_t x, const uint128_t y)
  { return x.hi == y.hi && x.lo == y.lo; }

  friend constexpr bool
  operator<(const uint128_t x, const uint128_t y)
  { return x.hi < y.hi || (x.hi == y.hi && x.lo < y.lo); }

  friend constexpr auto
  __bit_width(const uint128_t x)
  {
    if (auto w = std::__bit_width(x.hi))
      return w + 64;
    else
      return std::__bit_width(x.lo);
  }

  friend constexpr auto
  __countr_zero(const uint128_t x)
  {
    auto c = std::__countr_zero(x.lo);
    if (c == 64)
      return 64 + std::__countr_zero(x.hi);
    else
      return c;
  }

  constexpr uint128_t&
  operator--()
  { return *this -= 1; }

  constexpr uint128_t&
  operator++()
  { return *this += 1; }

  constexpr uint128_t&
  operator+=(const uint128_t y)
  { return *this = *this + y; }

  constexpr uint128_t&
  operator-=(const uint128_t y)
  { return *this = *this - y; }

  constexpr uint128_t&
  operator*=(const uint128_t y)
  { return *this = *this * y; }

  constexpr uint128_t&
  operator<<=(const uint128_t y)
  { return *this = *this << y; }

  constexpr uint128_t&
  operator>>=(const uint128_t y)
  { return *this = *this >> y; }

  constexpr uint128_t&
  operator|=(const uint128_t y)
  { return *this = *this | y; }

  constexpr uint128_t&
  operator&=(const uint128_t y)
  { return *this = *this & y; }

  constexpr uint128_t&
  operator%=(const uint128_t y)
  { return *this = *this % y; }

  constexpr uint128_t&
  operator/=(const uint128_t y)
  { return *this = *this / y; }

  friend constexpr bool
  operator!=(const uint128_t x, const uint128_t y)
  { return !(x == y); }

  friend constexpr bool
  operator>(const uint128_t x, const uint128_t y)
  { return y < x; }

  friend constexpr bool
  operator>=(const uint128_t x, const uint128_t y)
  { return !(x < y); }
};