10#if defined(__SIZEOF_INT128__) && !defined(_MSC_VER)
11#if defined(__clang__) || defined(__GNUC__)
12#pragma GCC diagnostic push
13#pragma GCC diagnostic ignored "-Wpedantic"
21#if defined(__GNUC__) || defined(__clang__)
22 return 64u - (size_t)__builtin_clzll(value);
34#if defined(__SIZEOF_INT128__) && !defined(_MSC_VER)
35 unsigned __int128 value = (
unsigned __int128)lhs * (
unsigned __int128)rhs;
36 *hi = (uint64_t)(value >> 64);
37 return (uint64_t)value;
39 const uint64_t mask32 = 0xffffffffULL;
40 uint64_t lhs_lo = lhs & mask32;
41 uint64_t lhs_hi = lhs >> 32;
42 uint64_t rhs_lo = rhs & mask32;
43 uint64_t rhs_hi = rhs >> 32;
44 uint64_t lo_lo = lhs_lo * rhs_lo;
45 uint64_t hi_lo = lhs_hi * rhs_lo;
46 uint64_t lo_hi = lhs_lo * rhs_hi;
47 uint64_t hi_hi = lhs_hi * rhs_hi;
48 uint64_t cross = (lo_lo >> 32) + (hi_lo & mask32) + (lo_hi & mask32);
49 *hi = hi_hi + (hi_lo >> 32) + (lo_hi >> 32) + (cross >> 32);
50 return (lo_lo & mask32) | (cross << 32);
55#if defined(__SIZEOF_INT128__) && !defined(_MSC_VER)
56 unsigned __int128 accum = (
unsigned __int128)value + (
unsigned __int128)addend + ((
unsigned __int128)(*hi) << 64);
57 *hi = (uint64_t)(accum >> 64);
58 return (uint64_t)accum;
60 uint64_t out = value + addend;
61 *hi += out < value ? 1u : 0u;
67 return (uint64_t)0 - (uint64_t)(flag != 0);
73 for (i = 0; i < 8u; ++i) {
80 uint64_t carry =
bit & 1u;
82 for (i = 0; i < 8u; ++i) {
83 uint64_t next = value[i] >> 63;
84 value[i] = (value[i] << 1) | carry;
92 for (i = 0; i < 8u; ++i) {
93 uint64_t rhs_word = rhs[i] + borrow;
94 uint64_t rhs_overflow = rhs_word < rhs[i] ? 1u : 0u;
95 uint64_t diff = lhs[i] - rhs_word;
96 uint64_t needs_borrow = lhs[i] < rhs_word ? 1u : 0u;
98 borrow = rhs_overflow | needs_borrow;
105 for (i = 0; i < 8u; ++i) {
106 dst[i] ^= mask & (dst[i] ^ src[i]);
111#if defined(__SIZEOF_INT128__) && !defined(_MSC_VER)
112 unsigned __int128 value = ((
unsigned __int128)hi << 64) | lo;
113 uint64_t quotient = (uint64_t)(value / divisor);
114 *rem_out = (uint32_t)(value % divisor);
117 const uint64_t mask32 = 0xffffffffULL;
125 cur = (rem << 32) | (lo >> 32);
126 q1 = (uint32_t)(cur / divisor);
128 cur = (rem << 32) | (lo & mask32);
129 q0 = (uint32_t)(cur / divisor);
133 q2 = (uint32_t)(hi & mask32);
136 *rem_out = (uint32_t)rem;
137 return ((uint64_t)q1 << 32) | q0;
141#if defined(__SIZEOF_INT128__) && !defined(_MSC_VER)
142#if defined(__clang__) || defined(__GNUC__)
143#pragma GCC diagnostic pop
147#define PURIFY_UINT_FN(name) purify_u256_##name
148#define PURIFY_UINT_WORDS 4
150#undef PURIFY_UINT_WORDS
153#define PURIFY_UINT_FN(name) purify_u320_##name
154#define PURIFY_UINT_WORDS 5
156#undef PURIFY_UINT_WORDS
159#define PURIFY_UINT_FN(name) purify_u512_##name
160#define PURIFY_UINT_WORDS 8
162#undef PURIFY_UINT_WORDS
166 purify_u320_set_zero(out);
167 memcpy(out, value, 4 *
sizeof(uint64_t));
171 purify_u512_set_zero(out);
172 memcpy(out, value, 4 *
sizeof(uint64_t));
179 memcpy(out, value, 4 *
sizeof(uint64_t));
185 for (i = 4; i < 8; ++i) {
190 memcpy(out, value, 4 *
sizeof(uint64_t));
195 const uint64_t numerator[8],
const uint64_t denominator[8]) {
202 if (purify_u512_is_zero(denominator)) {
206 memcpy(remainder, numerator, 8 *
sizeof(uint64_t));
207 purify_u512_set_zero(quotient);
208 n_bits = purify_u512_bit_length(remainder);
209 d_bits = purify_u512_bit_length(denominator);
210 if (n_bits < d_bits) {
214 shift = n_bits - d_bits;
215 purify_u512_shifted_left(shifted, denominator, shift);
216 for (i = shift + 1u; i != 0; --i) {
218 if (purify_u512_compare(remainder, shifted) >= 0) {
219 int sub_ok = purify_u512_try_sub(remainder, shifted);
220 int bit_ok = purify_u512_try_set_bit(quotient, idx);
223 if (sub_ok == 0 || bit_ok == 0) {
228 purify_u512_shift_right_one(shifted);
236 const uint64_t numerator[8],
const uint64_t denominator[8]) {
237 uint64_t candidate[8];
241 purify_u512_set_zero(quotient);
242 purify_u512_set_zero(remainder);
243 for (bit_index = 512u; bit_index != 0; --bit_index) {
244 const size_t idx = bit_index - 1u;
245 const size_t word = idx / 64u;
246 const size_t shift = idx % 64u;
247 const uint64_t quotient_bit = (uint64_t)1u << shift;
248 const uint64_t next_bit = (numerator[word] >> shift) & 1u;
256 quotient[word] |= ge_mask & quotient_bit;
260 return denominator_mask != 0;
265 purify_u512_set_zero(out);
266 for (i = 0; i < 4; ++i) {
269 for (j = 0; j < 4; ++j) {
int purify_u256_try_narrow_u512(uint64_t out[4], const uint64_t value[8])
static size_t purify_uint_bit_length_u64(uint64_t value)
static uint64_t purify_uint_add_u64_carry(uint64_t value, uint64_t addend, uint64_t *hi)
static uint64_t purify_u512_sub_with_borrow_ct(uint64_t out[8], const uint64_t lhs[8], const uint64_t rhs[8])
static int purify_u512_is_nonzero_ct(const uint64_t value[8])
static uint64_t purify_uint_divmod_u32(uint64_t hi, uint64_t lo, uint32_t divisor, uint32_t *rem_out)
void purify_u512_widen_u256(uint64_t out[8], const uint64_t value[4])
void purify_u320_widen_u256(uint64_t out[5], const uint64_t value[4])
static uint64_t purify_uint_mul_u64(uint64_t lhs, uint64_t rhs, uint64_t *hi)
static void purify_u512_cmov_words(uint64_t dst[8], const uint64_t src[8], uint64_t mask)
void purify_u512_multiply_u256(uint64_t out[8], const uint64_t lhs[4], const uint64_t rhs[4])
int purify_u512_try_divmod_same(uint64_t quotient[8], uint64_t remainder[8], const uint64_t numerator[8], const uint64_t denominator[8])
int purify_u256_try_narrow_u320(uint64_t out[4], const uint64_t value[5])
int purify_u512_try_divmod_same_consttime(uint64_t quotient[8], uint64_t remainder[8], const uint64_t numerator[8], const uint64_t denominator[8])
static uint64_t purify_uint_mask_u64(int flag)
static void purify_u512_shift_left_one_or_bit(uint64_t value[8], uint64_t bit)
Fixed-width unsigned integer helpers implemented in C for Purify.
int PURIFY_UINT_FN() bit(const uint64_t value[PURIFY_UINT_WORDS], size_t index)