diff --git a/dace/runtime/include/dace/types.h b/dace/runtime/include/dace/types.h index 5a3ca5f62f..675ab6980e 100644 --- a/dace/runtime/include/dace/types.h +++ b/dace/runtime/include/dace/types.h @@ -96,17 +96,60 @@ namespace dace typedef std::complex complex64; typedef std::complex complex128; struct half { - // source: https://stackoverflow.com/a/26779139/15853075 + half() {} + + // IEEE-754 binary32 -> binary16, round-to-nearest-even. + // Based on (https://gist.github.com/rygorous/2156668) half(float f) { - union { float fval; uint32_t x; } u; - u.fval = f; - h = ((u.x>>16)&0x8000)|((((u.x&0x7f800000)-0x38000000)>>13)&0x7c00)|((u.x>>13)&0x03ff); + union { float f; uint32_t u; } in; + in.f = f; + uint32_t sign = in.u & 0x80000000u; + in.u ^= sign; // work on the magnitude + + if (in.u >= 0x47800000u) { + // Inf or NaN (exponent overflows half range): NaN -> qNaN, else Inf. + h = (in.u > 0x7f800000u) ? 0x7e00 : 0x7c00; + } else if (in.u < 0x38800000u) { + // Subnormal half or zero. Adding this magic constant and relying on + // round-to-nearest-even FP addition aligns the mantissa correctly. + union { uint32_t u; float f; } magic; + magic.u = 0x3f000000u; // 126 << 23 + in.f += magic.f; + h = (uint16_t)(in.u - magic.u); + } else { + // Normal half: rebias exponent (127 -> 15) and round mantissa to even. + uint32_t mant_odd = (in.u >> 13) & 1; + in.u += 0xc8000000u + 0xfffu; // ((15 - 127) << 23) + rounding bias + in.u += mant_odd; + h = (uint16_t)(in.u >> 13); + } + h |= (uint16_t)(sign >> 16); } - operator float() { - union { float f; uint32_t tmp; } u; - u.tmp = ((h&0x8000)<<16) | (((h&0x7c00)+0x1C000)<<13) | ((h&0x03FF)<<13); - return u.f; + + // IEEE-754 binary16 -> binary32 (exact; every binary16 fits in binary32). + operator float() const { + uint32_t sign = (uint32_t)(h & 0x8000) << 16; + uint32_t exp = (h >> 10) & 0x1f; + uint32_t mant = h & 0x3ff; + union { uint32_t u; float f; } out; + if (exp == 0) { + if (mant == 0) { + out.u = sign; // signed zero + } else { + // Subnormal half: normalize into a float normal. + exp = 1; + do { exp--; mant <<= 1; } while ((mant & 0x400) == 0); + mant &= 0x3ff; + out.u = sign | ((exp + (127 - 15)) << 23) | (mant << 13); + } + } else if (exp == 0x1f) { + out.u = sign | 0x7f800000u | (mant << 13); // Inf or NaN + } else { + out.u = sign | ((exp + (127 - 15)) << 23) | (mant << 13); // normal + } + return out.f; } + uint16_t h; }; typedef half float16; diff --git a/tests/half_cpu_test.py b/tests/half_cpu_test.py new file mode 100644 index 0000000000..6e0175cf7d --- /dev/null +++ b/tests/half_cpu_test.py @@ -0,0 +1,87 @@ +# Copyright 2019-2026 ETH Zurich and the DaCe authors. All rights reserved. + +import dace +import numpy as np + +N = dace.symbol("N") + + +def _roundtrip_program(): + """A program that stores float32 input as float16 and reads it back as float32. + + This forces both conversion directions of the host ``dace::half``: + ``half(float)`` on the store and ``operator float()`` on the load. + """ + + @dace.program + def f32_to_f16_to_f32(inp: dace.float32[N], out: dace.float32[N]): + tmp = np.ndarray([N], dace.float16) + for i in dace.map[0:N]: + with dace.tasklet: + a << inp[i] + t >> tmp[i] + t = a # float32 -> float16 + for i in dace.map[0:N]: + with dace.tasklet: + t << tmp[i] + o >> out[i] + o = t # float16 -> float32 + + return f32_to_f16_to_f32 + + +def test_float16_cpu_roundtrip_matches_numpy(): + """Host float32<->float16 conversion must agree with IEEE-754 (NumPy float16). + + Covers zero, exact-rounding, small/subnormal magnitudes, inf, and NaN. + """ + prog = _roundtrip_program() + inp = np.array( + [ + 0.0, + -0.0, + 1.0, + -1.0, + 2.0, + 3.0, + 0.5, + -0.5, + 1.25, + -2.5, + 100.0, + 1024.0, + 65504.0, + 1e-3, + 6e-5, + 6e-8, + -6e-8, + np.inf, + -np.inf, + np.nan, + ], + dtype=np.float32, + ) + out = np.full(inp.shape, np.nan, dtype=np.float32) + prog(inp=inp, out=out, N=inp.size) + + # NumPy's float16 is IEEE-754 round-to-nearest-even -- the reference. + expected = inp.astype(np.float16).astype(np.float32) + np.testing.assert_array_equal(out, expected) + + +def test_float16_cpu_random_roundtrip_matches_numpy(): + """Randomized sweep of the host conversion against NumPy float16.""" + rng = np.random.default_rng(0) + inp = (rng.standard_normal(4096) * 10.0).astype(np.float32) + out = np.full(inp.shape, np.nan, dtype=np.float32) + + prog = _roundtrip_program() + prog(inp=inp, out=out, N=inp.size) + + expected = inp.astype(np.float16).astype(np.float32) + np.testing.assert_array_equal(out, expected) + + +if __name__ == "__main__": + test_float16_cpu_roundtrip_matches_numpy() + test_float16_cpu_random_roundtrip_matches_numpy()