parent
c894f3ec1d
commit
3ab23f7ebf
|
@ -11,9 +11,11 @@
|
|||
#ifndef LIBDIVIDE_H
|
||||
#define LIBDIVIDE_H
|
||||
|
||||
#define LIBDIVIDE_VERSION "5.1"
|
||||
// *** Version numbers are auto generated - do not edit ***
|
||||
#define LIBDIVIDE_VERSION "5.2.0"
|
||||
#define LIBDIVIDE_VERSION_MAJOR 5
|
||||
#define LIBDIVIDE_VERSION_MINOR 1
|
||||
#define LIBDIVIDE_VERSION_MINOR 2
|
||||
#define LIBDIVIDE_VERSION_PATCH 0
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
@ -34,8 +36,15 @@
|
|||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
// Clang-cl prior to Visual Studio 2022 doesn't include __umulh/__mulh intrinsics
|
||||
#if defined(_MSC_VER) && defined(LIBDIVIDE_X86_64) && (!defined(__clang__) || _MSC_VER>1930)
|
||||
#define LIBDIVIDE_X64_INTRINSICS
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if defined(LIBDIVIDE_X64_INTRINSICS)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#pragma warning(push)
|
||||
// disable warning C4146: unary minus operator applied
|
||||
// to unsigned type, result still unsigned
|
||||
|
@ -238,18 +247,28 @@ static LIBDIVIDE_INLINE struct libdivide_u32_branchfree_t libdivide_u32_branchfr
|
|||
static LIBDIVIDE_INLINE struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen(int64_t d);
|
||||
static LIBDIVIDE_INLINE struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d);
|
||||
|
||||
static LIBDIVIDE_INLINE int16_t libdivide_s16_do_raw(int16_t numer, int16_t magic, uint8_t more);
|
||||
static LIBDIVIDE_INLINE int16_t libdivide_s16_do_raw(
|
||||
int16_t numer, int16_t magic, uint8_t more);
|
||||
static LIBDIVIDE_INLINE int16_t libdivide_s16_do(
|
||||
int16_t numer, const struct libdivide_s16_t *denom);
|
||||
static LIBDIVIDE_INLINE uint16_t libdivide_u16_do_raw(uint16_t numer, uint16_t magic, uint8_t more);
|
||||
static LIBDIVIDE_INLINE uint16_t libdivide_u16_do_raw(
|
||||
uint16_t numer, uint16_t magic, uint8_t more);
|
||||
static LIBDIVIDE_INLINE uint16_t libdivide_u16_do(
|
||||
uint16_t numer, const struct libdivide_u16_t *denom);
|
||||
static LIBDIVIDE_INLINE int32_t libdivide_s32_do_raw(
|
||||
int32_t numer, int32_t magic, uint8_t more);
|
||||
static LIBDIVIDE_INLINE int32_t libdivide_s32_do(
|
||||
int32_t numer, const struct libdivide_s32_t *denom);
|
||||
static LIBDIVIDE_INLINE uint32_t libdivide_u32_do_raw(
|
||||
uint32_t numer, uint32_t magic, uint8_t more);
|
||||
static LIBDIVIDE_INLINE uint32_t libdivide_u32_do(
|
||||
uint32_t numer, const struct libdivide_u32_t *denom);
|
||||
static LIBDIVIDE_INLINE int64_t libdivide_s64_do_raw(
|
||||
int64_t numer, int64_t magic, uint8_t more);
|
||||
static LIBDIVIDE_INLINE int64_t libdivide_s64_do(
|
||||
int64_t numer, const struct libdivide_s64_t *denom);
|
||||
static LIBDIVIDE_INLINE uint64_t libdivide_u64_do_raw(
|
||||
uint64_t numer, uint64_t magic, uint8_t more);
|
||||
static LIBDIVIDE_INLINE uint64_t libdivide_u64_do(
|
||||
uint64_t numer, const struct libdivide_u64_t *denom);
|
||||
|
||||
|
@ -315,7 +334,7 @@ static LIBDIVIDE_INLINE int32_t libdivide_mullhi_s32(int32_t x, int32_t y) {
|
|||
}
|
||||
|
||||
static LIBDIVIDE_INLINE uint64_t libdivide_mullhi_u64(uint64_t x, uint64_t y) {
|
||||
#if defined(LIBDIVIDE_VC) && defined(LIBDIVIDE_X86_64)
|
||||
#if defined(LIBDIVIDE_X64_INTRINSICS)
|
||||
return __umulh(x, y);
|
||||
#elif defined(HAS_INT128_T)
|
||||
__uint128_t xl = x, yl = y;
|
||||
|
@ -341,7 +360,7 @@ static LIBDIVIDE_INLINE uint64_t libdivide_mullhi_u64(uint64_t x, uint64_t y) {
|
|||
}
|
||||
|
||||
static LIBDIVIDE_INLINE int64_t libdivide_mullhi_s64(int64_t x, int64_t y) {
|
||||
#if defined(LIBDIVIDE_VC) && defined(LIBDIVIDE_X86_64)
|
||||
#if defined(LIBDIVIDE_X64_INTRINSICS)
|
||||
return __mulh(x, y);
|
||||
#elif defined(HAS_INT128_T)
|
||||
__int128_t xl = x, yl = y;
|
||||
|
@ -914,12 +933,11 @@ struct libdivide_u32_branchfree_t libdivide_u32_branchfree_gen(uint32_t d) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) {
|
||||
uint8_t more = denom->more;
|
||||
if (!denom->magic) {
|
||||
uint32_t libdivide_u32_do_raw(uint32_t numer, uint32_t magic, uint8_t more) {
|
||||
if (!magic) {
|
||||
return numer >> more;
|
||||
} else {
|
||||
uint32_t q = libdivide_mullhi_u32(denom->magic, numer);
|
||||
uint32_t q = libdivide_mullhi_u32(magic, numer);
|
||||
if (more & LIBDIVIDE_ADD_MARKER) {
|
||||
uint32_t t = ((numer - q) >> 1) + q;
|
||||
return t >> (more & LIBDIVIDE_32_SHIFT_MASK);
|
||||
|
@ -931,6 +949,10 @@ uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) {
|
|||
}
|
||||
}
|
||||
|
||||
uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) {
|
||||
return libdivide_u32_do_raw(numer, denom->magic, denom->more);
|
||||
}
|
||||
|
||||
uint32_t libdivide_u32_branchfree_do(
|
||||
uint32_t numer, const struct libdivide_u32_branchfree_t *denom) {
|
||||
uint32_t q = libdivide_mullhi_u32(denom->magic, numer);
|
||||
|
@ -1074,12 +1096,11 @@ struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) {
|
||||
uint8_t more = denom->more;
|
||||
if (!denom->magic) {
|
||||
uint64_t libdivide_u64_do_raw(uint64_t numer, uint64_t magic, uint8_t more) {
|
||||
if (!magic) {
|
||||
return numer >> more;
|
||||
} else {
|
||||
uint64_t q = libdivide_mullhi_u64(denom->magic, numer);
|
||||
uint64_t q = libdivide_mullhi_u64(magic, numer);
|
||||
if (more & LIBDIVIDE_ADD_MARKER) {
|
||||
uint64_t t = ((numer - q) >> 1) + q;
|
||||
return t >> (more & LIBDIVIDE_64_SHIFT_MASK);
|
||||
|
@ -1091,6 +1112,10 @@ uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) {
|
|||
}
|
||||
}
|
||||
|
||||
uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) {
|
||||
return libdivide_u64_do_raw(numer, denom->magic, denom->more);
|
||||
}
|
||||
|
||||
uint64_t libdivide_u64_branchfree_do(
|
||||
uint64_t numer, const struct libdivide_u64_branchfree_t *denom) {
|
||||
uint64_t q = libdivide_mullhi_u64(denom->magic, numer);
|
||||
|
@ -1430,11 +1455,10 @@ struct libdivide_s32_branchfree_t libdivide_s32_branchfree_gen(int32_t d) {
|
|||
return result;
|
||||
}
|
||||
|
||||
int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) {
|
||||
uint8_t more = denom->more;
|
||||
int32_t libdivide_s32_do_raw(int32_t numer, int32_t magic, uint8_t more) {
|
||||
uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK;
|
||||
|
||||
if (!denom->magic) {
|
||||
if (!magic) {
|
||||
uint32_t sign = (int8_t)more >> 7;
|
||||
uint32_t mask = ((uint32_t)1 << shift) - 1;
|
||||
uint32_t uq = numer + ((numer >> 31) & mask);
|
||||
|
@ -1443,7 +1467,7 @@ int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) {
|
|||
q = (q ^ sign) - sign;
|
||||
return q;
|
||||
} else {
|
||||
uint32_t uq = (uint32_t)libdivide_mullhi_s32(denom->magic, numer);
|
||||
uint32_t uq = (uint32_t)libdivide_mullhi_s32(magic, numer);
|
||||
if (more & LIBDIVIDE_ADD_MARKER) {
|
||||
// must be arithmetic shift and then sign extend
|
||||
int32_t sign = (int8_t)more >> 7;
|
||||
|
@ -1458,6 +1482,10 @@ int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) {
|
|||
}
|
||||
}
|
||||
|
||||
int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) {
|
||||
return libdivide_s32_do_raw(numer, denom->magic, denom->more);
|
||||
}
|
||||
|
||||
int32_t libdivide_s32_branchfree_do(int32_t numer, const struct libdivide_s32_branchfree_t *denom) {
|
||||
uint8_t more = denom->more;
|
||||
uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK;
|
||||
|
@ -1599,11 +1627,10 @@ struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen(int64_t d) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) {
|
||||
uint8_t more = denom->more;
|
||||
int64_t libdivide_s64_do_raw(int64_t numer, int64_t magic, uint8_t more) {
|
||||
uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK;
|
||||
|
||||
if (!denom->magic) { // shift path
|
||||
if (!magic) { // shift path
|
||||
uint64_t mask = ((uint64_t)1 << shift) - 1;
|
||||
uint64_t uq = numer + ((numer >> 63) & mask);
|
||||
int64_t q = (int64_t)uq;
|
||||
|
@ -1613,7 +1640,7 @@ int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) {
|
|||
q = (q ^ sign) - sign;
|
||||
return q;
|
||||
} else {
|
||||
uint64_t uq = (uint64_t)libdivide_mullhi_s64(denom->magic, numer);
|
||||
uint64_t uq = (uint64_t)libdivide_mullhi_s64(magic, numer);
|
||||
if (more & LIBDIVIDE_ADD_MARKER) {
|
||||
// must be arithmetic shift and then sign extend
|
||||
int64_t sign = (int8_t)more >> 7;
|
||||
|
@ -1628,6 +1655,10 @@ int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) {
|
|||
}
|
||||
}
|
||||
|
||||
int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) {
|
||||
return libdivide_s64_do_raw(numer, denom->magic, denom->more);
|
||||
}
|
||||
|
||||
int64_t libdivide_s64_branchfree_do(int64_t numer, const struct libdivide_s64_branchfree_t *denom) {
|
||||
uint8_t more = denom->more;
|
||||
uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK;
|
||||
|
|
Loading…
Reference in New Issue