Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 51 additions & 8 deletions dace/runtime/include/dace/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,17 +96,60 @@ namespace dace
typedef std::complex<float> complex64;
typedef std::complex<double> complex128;
struct half {
// source: https://stackoverflow.com/a/26779139/15853075
half() {}

// IEEE-754 binary32 -> binary16, round-to-nearest-even.
// Based on (https://gist.github.com/rygorous/2156668)
half(float f) {
union { float fval; uint32_t x; } u;
u.fval = f;
h = ((u.x>>16)&0x8000)|((((u.x&0x7f800000)-0x38000000)>>13)&0x7c00)|((u.x>>13)&0x03ff);
union { float f; uint32_t u; } in;
in.f = f;
uint32_t sign = in.u & 0x80000000u;
in.u ^= sign; // work on the magnitude

if (in.u >= 0x47800000u) {
// Inf or NaN (exponent overflows half range): NaN -> qNaN, else Inf.
h = (in.u > 0x7f800000u) ? 0x7e00 : 0x7c00;
} else if (in.u < 0x38800000u) {
// Subnormal half or zero. Adding this magic constant and relying on
// round-to-nearest-even FP addition aligns the mantissa correctly.
union { uint32_t u; float f; } magic;
magic.u = 0x3f000000u; // 126 << 23
in.f += magic.f;
h = (uint16_t)(in.u - magic.u);
} else {
// Normal half: rebias exponent (127 -> 15) and round mantissa to even.
uint32_t mant_odd = (in.u >> 13) & 1;
in.u += 0xc8000000u + 0xfffu; // ((15 - 127) << 23) + rounding bias
in.u += mant_odd;
h = (uint16_t)(in.u >> 13);
}
h |= (uint16_t)(sign >> 16);
}
operator float() {
union { float f; uint32_t tmp; } u;
u.tmp = ((h&0x8000)<<16) | (((h&0x7c00)+0x1C000)<<13) | ((h&0x03FF)<<13);
return u.f;

// IEEE-754 binary16 -> binary32 (exact; every binary16 fits in binary32).
operator float() const {
uint32_t sign = (uint32_t)(h & 0x8000) << 16;
uint32_t exp = (h >> 10) & 0x1f;
uint32_t mant = h & 0x3ff;
union { uint32_t u; float f; } out;
if (exp == 0) {
if (mant == 0) {
out.u = sign; // signed zero
} else {
// Subnormal half: normalize into a float normal.
exp = 1;
do { exp--; mant <<= 1; } while ((mant & 0x400) == 0);
mant &= 0x3ff;
out.u = sign | ((exp + (127 - 15)) << 23) | (mant << 13);
}
} else if (exp == 0x1f) {
out.u = sign | 0x7f800000u | (mant << 13); // Inf or NaN
} else {
out.u = sign | ((exp + (127 - 15)) << 23) | (mant << 13); // normal
}
return out.f;
}

uint16_t h;
};
typedef half float16;
Expand Down
87 changes: 87 additions & 0 deletions tests/half_cpu_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# Copyright 2019-2026 ETH Zurich and the DaCe authors. All rights reserved.

import dace
import numpy as np

N = dace.symbol("N")


def _roundtrip_program():
"""A program that stores float32 input as float16 and reads it back as float32.

This forces both conversion directions of the host ``dace::half``:
``half(float)`` on the store and ``operator float()`` on the load.
"""

@dace.program
def f32_to_f16_to_f32(inp: dace.float32[N], out: dace.float32[N]):
tmp = np.ndarray([N], dace.float16)
for i in dace.map[0:N]:
with dace.tasklet:
a << inp[i]
t >> tmp[i]
t = a # float32 -> float16
for i in dace.map[0:N]:
with dace.tasklet:
t << tmp[i]
o >> out[i]
o = t # float16 -> float32

return f32_to_f16_to_f32


def test_float16_cpu_roundtrip_matches_numpy():
"""Host float32<->float16 conversion must agree with IEEE-754 (NumPy float16).

Covers zero, exact-rounding, small/subnormal magnitudes, inf, and NaN.
"""
prog = _roundtrip_program()
inp = np.array(
[
0.0,
-0.0,
1.0,
-1.0,
2.0,
3.0,
0.5,
-0.5,
1.25,
-2.5,
100.0,
1024.0,
65504.0,
1e-3,
6e-5,
6e-8,
-6e-8,

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a subnormal number to your tests?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added inf and nan tests

np.inf,
-np.inf,
np.nan,
],
dtype=np.float32,
)
out = np.full(inp.shape, np.nan, dtype=np.float32)
prog(inp=inp, out=out, N=inp.size)

# NumPy's float16 is IEEE-754 round-to-nearest-even -- the reference.
expected = inp.astype(np.float16).astype(np.float32)
np.testing.assert_array_equal(out, expected)


def test_float16_cpu_random_roundtrip_matches_numpy():
"""Randomized sweep of the host conversion against NumPy float16."""
rng = np.random.default_rng(0)
inp = (rng.standard_normal(4096) * 10.0).astype(np.float32)
out = np.full(inp.shape, np.nan, dtype=np.float32)

prog = _roundtrip_program()
prog(inp=inp, out=out, N=inp.size)

expected = inp.astype(np.float16).astype(np.float32)
np.testing.assert_array_equal(out, expected)


if __name__ == "__main__":
test_float16_cpu_roundtrip_matches_numpy()
test_float16_cpu_random_roundtrip_matches_numpy()