47#ifndef CPL_FLOAT_H_INCLUDED
48#define CPL_FLOAT_H_INCLUDED
58#ifdef HAVE_STD_FLOAT16_T
72GUInt16 CPL_DLL CPLConvertFloatToHalf(
float fFloat32);
73float CPL_DLL CPLConvertHalfToFloat(
GUInt16 nHalf);
77#ifndef GDALCopyXMMToInt16_defined
78#define GDALCopyXMMToInt16_defined
80static inline void GDALCopyXMMToInt16(
const __m128i xmm,
void *pDest)
82 GInt16 i =
static_cast<GInt16>(_mm_extract_epi16(xmm, 0));
100#ifndef HAVE_STD_FLOAT16_T
109 struct make_from_bits_and_value
116 using repr = _Float16;
119 using compute = _Float16;
125 constexpr Float16(make_from_bits_and_value,
CPL_UNUSED std::uint16_t bits,
127 : rValue(repr(fValue))
131 static constexpr repr computeToRepr(compute fValue)
136 static constexpr compute reprToCompute(repr rValue)
141 template <
typename T>
static constexpr repr toRepr(T fValue)
143 return static_cast<repr
>(fValue);
146 template <
typename T>
static constexpr T fromRepr(repr rValue)
148 return static_cast<T
>(rValue);
154 using repr = std::uint16_t;
157 using compute = float;
163 constexpr Float16(make_from_bits_and_value, std::uint16_t bits,
169 static unsigned float2unsigned(
float f)
172 std::memcpy(&u, &f, 4);
176 static float unsigned2float(
unsigned u)
179 std::memcpy(&f, &u, 4);
184 static std::uint16_t computeToRepr(
float fFloat32)
186 std::uint32_t iFloat32 = float2unsigned(fFloat32);
188 std::uint32_t iSign = (iFloat32 >> 31) & 0x00000001;
189 std::uint32_t iExponent = (iFloat32 >> 23) & 0x000000ff;
190 std::uint32_t iMantissa = iFloat32 & 0x007fffff;
192 if (iExponent == 255)
197 return static_cast<std::int16_t
>((iSign << 15) | 0x7C00);
202 return static_cast<std::int16_t
>((iSign << 15) | 0x7C00 |
204 return static_cast<std::int16_t
>((iSign << 15) | 0x7E00);
207 if (iExponent <= 127 - 15)
211 if (13 + 1 + 127 - 15 - iExponent >= 32)
212 return static_cast<std::int16_t
>(iSign << 15);
215 return static_cast<std::int16_t
>(
217 ((iMantissa | 0x00800000) >> (13 + 1 + 127 - 15 - iExponent)));
220 if (iExponent - (127 - 15) >= 31)
222 return static_cast<std::int16_t
>((iSign << 15) |
227 iExponent = iExponent - (127 - 15);
228 iMantissa = iMantissa >> 13;
232 return static_cast<std::int16_t
>((iSign << 15) | (iExponent << 10) |
237 static float reprToCompute(std::uint16_t iHalf)
239 std::uint32_t iSign = (iHalf >> 15) & 0x00000001;
240 int iExponent = (iHalf >> 10) & 0x0000001f;
241 std::uint32_t iMantissa = iHalf & 0x000003ff;
248 return unsigned2float((iSign << 31) | 0x7f800000);
252 return unsigned2float((iSign << 31) | 0x7f800000 |
261 return unsigned2float(iSign << 31);
265 while (!(iMantissa & 0x00000400))
272 iMantissa &= ~0x00000400U;
276 iExponent = iExponent + (127 - 15);
277 iMantissa = iMantissa << 13;
281 return unsigned2float((iSign << 31) |
282 (
static_cast<std::uint32_t
>(iExponent) << 23) |
286 template <
typename T>
static repr toRepr(T value)
289 float fValue =
static_cast<float>(value);
290 __m128 xmm_float = _mm_load_ss(&fValue);
291 const __m128i xmm_hfloat =
292 _mm_cvtps_ph(xmm_float, _MM_FROUND_TO_NEAREST_INT);
294 GDALCopyXMMToInt16(xmm_hfloat, &hfValueOut);
297 return computeToRepr(
static_cast<compute
>(value));
301 template <
typename T>
static T fromRepr(repr rValue)
305 memcpy(&xmm, &rValue,
sizeof(repr));
307 _mm_store_ss(&fValueOut, _mm_cvtph_ps(xmm));
308 return static_cast<T
>(fValueOut);
310 return static_cast<T
>(reprToCompute(rValue));
322 return reprToCompute(rValue);
327 Float16(
const Float16 &) =
default;
328 Float16(Float16 &&) =
default;
329 Float16 &operator=(
const Float16 &) =
default;
330 Float16 &operator=(Float16 &&) =
default;
336 constexpr Float16(_Float16 hfValue) : rValue(hfValue)
340 constexpr operator _Float16()
const
347#define GDAL_DEFINE_CONVERSION(TYPE) \
349 Float16(TYPE fValue) : rValue(toRepr(fValue)) \
353 operator TYPE() const \
355 return fromRepr<TYPE>(rValue); \
358 GDAL_DEFINE_CONVERSION(
float)
359 GDAL_DEFINE_CONVERSION(
double)
360 GDAL_DEFINE_CONVERSION(
char)
361 GDAL_DEFINE_CONVERSION(
signed char)
362 GDAL_DEFINE_CONVERSION(
short)
363 GDAL_DEFINE_CONVERSION(
int)
364 GDAL_DEFINE_CONVERSION(
long)
365 GDAL_DEFINE_CONVERSION(
long long)
366 GDAL_DEFINE_CONVERSION(
unsigned char)
367 GDAL_DEFINE_CONVERSION(
unsigned short)
368 GDAL_DEFINE_CONVERSION(
unsigned int)
369 GDAL_DEFINE_CONVERSION(
unsigned long)
370 GDAL_DEFINE_CONVERSION(
unsigned long long)
372#undef GDAL_DEFINE_CONVERSION
376 friend Float16 operator+(Float16 x)
381 friend Float16 operator-(Float16 x)
386#define GDAL_DEFINE_ARITHOP(OP) \
388 friend Float16 operator OP(Float16 x, Float16 y) \
390 return x.get() OP y.get(); \
393 friend double operator OP(double x, Float16 y) \
395 return x OP static_cast<double>(y.get()); \
398 friend float operator OP(float x, Float16 y) \
400 return x OP static_cast<float>(y.get()); \
403 friend Float16 operator OP(int x, Float16 y) \
405 return x OP static_cast<float>(y.get()); \
408 friend double operator OP(Float16 x, double y) \
410 return static_cast<double>(x.get()) OP y; \
413 friend float operator OP(Float16 x, float y) \
415 return static_cast<float>(x.get()) OP y; \
418 friend Float16 operator OP(Float16 x, int y) \
420 return static_cast<float>(x.get()) OP y; \
423 GDAL_DEFINE_ARITHOP(+)
424 GDAL_DEFINE_ARITHOP(-)
425 GDAL_DEFINE_ARITHOP(*)
426 GDAL_DEFINE_ARITHOP(/)
428#undef GDAL_DEFINE_ARITHOP
432#define GDAL_DEFINE_COMPARISON(OP) \
434 friend bool operator OP(Float16 x, Float16 y) \
436 return x.get() OP y.get(); \
439 friend bool operator OP(float x, Float16 y) \
441 return x OP static_cast<float>(y.get()); \
444 friend bool operator OP(double x, Float16 y) \
446 return x OP static_cast<double>(y.get()); \
449 friend bool operator OP(int x, Float16 y) \
451 return x OP static_cast<float>(y.get()); \
454 friend bool operator OP(Float16 x, float y) \
456 return static_cast<float>(x.get()) OP y; \
459 friend bool operator OP(Float16 x, double y) \
461 return static_cast<double>(x.get()) OP y; \
464 friend bool operator OP(Float16 x, int y) \
466 return static_cast<float>(x.get()) OP y; \
469 GDAL_DEFINE_COMPARISON(==)
470 GDAL_DEFINE_COMPARISON(!=)
471 GDAL_DEFINE_COMPARISON(<)
472 GDAL_DEFINE_COMPARISON(>)
473 GDAL_DEFINE_COMPARISON(<=)
474 GDAL_DEFINE_COMPARISON(>=)
476#undef GDAL_DEFINE_COMPARISON
480 friend bool isfinite(Float16 x)
483 return isfinite(
float(x));
486 friend bool isinf(Float16 x)
489 return isinf(
float(x));
492 friend bool isnan(Float16 x)
495 return isnan(
float(x));
498 friend bool isnormal(Float16 x)
501 return isnormal(
float(x));
504 friend bool signbit(Float16 x)
507 return signbit(
float(x));
510 friend Float16 abs(Float16 x)
513 return Float16(abs(
float(x)));
516 friend Float16 cbrt(Float16 x)
519 return Float16(cbrt(
float(x)));
522 friend Float16 ceil(Float16 x)
525 return Float16(ceil(
float(x)));
528 friend Float16 copysign(Float16 x, Float16 y)
531 return Float16(copysign(
float(x),
float(y)));
534 friend Float16 fabs(Float16 x)
537 return Float16(fabs(
float(x)));
540 friend Float16 floor(Float16 x)
543 return Float16(floor(
float(x)));
546 friend Float16 fmax(Float16 x, Float16 y)
549 return Float16(fmax(
float(x),
float(y)));
552 friend Float16 fmin(Float16 x, Float16 y)
555 return Float16(fmin(
float(x),
float(y)));
558 friend Float16 hypot(Float16 x, Float16 y)
561 return Float16(hypot(
float(x),
float(y)));
564 friend Float16 max(Float16 x, Float16 y)
567 return Float16(max(
float(x),
float(y)));
570 friend Float16 min(Float16 x, Float16 y)
573 return Float16(min(
float(x),
float(y)));
577 friend Float16 nextafter(Float16 x, Float16 y)
589 std::memcpy(&bits, &x.rValue, 2);
590 if ((x < y) == (x > Float16(0)))
597 bits = (signbit(y) << 15) | 0x0001;
601 std::memcpy(&r.rValue, &bits, 2);
606 friend Float16 pow(Float16 x, Float16 y)
609 return Float16(pow(
float(x),
float(y)));
612 friend Float16 pow(Float16 x,
int n)
615 return Float16(pow(
float(x), n));
618 friend Float16 round(Float16 x)
621 return Float16(round(
float(x)));
624 friend Float16 sqrt(Float16 x)
627 return Float16(sqrt(
float(x)));
633 static constexpr bool is_specialized =
true;
634 static constexpr bool is_signed =
true;
635 static constexpr bool is_integer =
false;
636 static constexpr bool is_exact =
false;
637 static constexpr bool has_infinity =
true;
638 static constexpr bool has_quiet_NaN =
true;
639 static constexpr bool has_signaling_NaN =
true;
640 static constexpr bool has_denorm =
true;
641 static constexpr bool is_iec559 =
true;
643 static constexpr int digits = 11;
644 static constexpr int digits10 = 3;
645 static constexpr int max_digits10 = 5;
646 static constexpr int radix = 2;
648 static constexpr Float16 epsilon()
650 return Float16(Float16::make_from_bits_and_value{}, 0x1400, 0.000977f);
653 static constexpr Float16 min()
655 return Float16(Float16::make_from_bits_and_value{}, 0x0001, 6.0e-8f);
658 static constexpr Float16 lowest()
660 return Float16(Float16::make_from_bits_and_value{}, 0xfbff, -65504.0f);
663 static constexpr Float16 max()
665 return Float16(Float16::make_from_bits_and_value{}, 0x7bff, +65504.0f);
668 static constexpr Float16 infinity()
670 return Float16(Float16::make_from_bits_and_value{}, 0x7c00,
671 std::numeric_limits<float>::infinity());
674 static constexpr Float16 quiet_NaN()
676 return Float16(Float16::make_from_bits_and_value{}, 0x7e00,
677 std::numeric_limits<float>::quiet_NaN());
680 static constexpr Float16 signaling_NaN()
682 return Float16(Float16::make_from_bits_and_value{}, 0xfe00,
683 std::numeric_limits<float>::signaling_NaN());
693#ifdef HAVE_STD_FLOAT16_T
694using GFloat16 = std::float16_t;
696using GFloat16 = cpl::Float16;
703template <
typename T>
constexpr int CPLIsNan(T x)
714template <
typename T>
constexpr int CPLIsInf(T x)
720template <
typename T>
constexpr int CPLIsFinite(T x)
728double CPL_DLL CPLGreatestCommonDivisor(
double x,
double y);
Core portability definitions for CPL.
short GInt16
Int16 type.
Definition cpl_port.h:171
#define CPL_C_END
Macro to end a block of C symbols.
Definition cpl_port.h:289
#define CPL_UNUSED
Qualifier for an argument that is unused.
Definition cpl_port.h:879
#define CPL_C_START
Macro to start a block of C symbols.
Definition cpl_port.h:285
unsigned int GUInt32
Unsigned int32 type.
Definition cpl_port.h:167
unsigned short GUInt16
Unsigned int16 type.
Definition cpl_port.h:173
Definition cpl_float.h:97