Halide 19.0.0
Halide compiler and libraries
Loading...
Searching...
No Matches
Float16.h
Go to the documentation of this file.
1#ifndef HALIDE_FLOAT16_H
2#define HALIDE_FLOAT16_H
3
5#include <cstdint>
6#include <string>
7
8namespace Halide {
9
10/** Class that provides a type that implements half precision
11 * floating point (IEEE754 2008 binary16) in software.
12 *
13 * This type is enforced to be 16-bits wide and maintains no state
14 * other than the raw IEEE754 binary16 bits so that it can passed
15 * to code that checks a type's size and used for halide_buffer_t allocation.
16 * */
17struct float16_t {
18
19 static const int mantissa_bits = 10;
20 static const uint16_t sign_mask = 0x8000;
21 static const uint16_t exponent_mask = 0x7c00;
22 static const uint16_t mantissa_mask = 0x03ff;
23
24 /// \name Constructors
25 /// @{
26
27 /** Construct from a float, double, or int using
28 * round-to-nearest-ties-to-even. Out-of-range values become +/-
29 * infinity.
30 */
31 // @{
32 explicit float16_t(float value);
33 explicit float16_t(double value);
34 explicit float16_t(int value);
35 // @}
36
37 /** Construct a float16_t with the bits initialised to 0. This represents
38 * positive zero.*/
39 float16_t() = default;
40
41#ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
42 /** Construct a float16_t from compiler's built-in _Float16 type. */
43 explicit float16_t(_Float16 value) {
44 memcpy(&data, &value, sizeof(_Float16));
45 }
46#endif
47
48 /// @}
49
50 // Use explicit to avoid accidently raising the precision
51 /** Cast to float */
52 explicit operator float() const;
53 /** Cast to double */
54 explicit operator double() const;
55 /** Cast to int */
56 explicit operator int() const;
57
58#ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
59 /** Cast to compiler's built-in _Float16 type. */
60 explicit operator _Float16() const {
61 _Float16 result;
62 memcpy(&result, &data, sizeof(_Float16));
63 return result;
64 }
65#endif
66
67 /** Get a new float16_t that represents a special value */
68 // @{
74 // @}
75
76 /** Get a new float16_t with the given raw bits
77 *
78 * \param bits The bits conformant to IEEE754 binary16
79 */
81
82 /** Return a new float16_t with a negated sign bit*/
84
85 /** Arithmetic operators. */
86 // @{
92 return (*this = *this + rhs);
93 }
95 return (*this = *this - rhs);
96 }
98 return (*this = *this * rhs);
99 }
101 return (*this = *this / rhs);
102 }
103 // @}
104
105 /** Comparison operators */
106 // @{
107 bool operator==(float16_t rhs) const;
108 bool operator!=(float16_t rhs) const {
109 return !(*this == rhs);
110 }
111 bool operator>(float16_t rhs) const;
112 bool operator<(float16_t rhs) const;
113 bool operator>=(float16_t rhs) const {
114 return (*this > rhs) || (*this == rhs);
115 }
116 bool operator<=(float16_t rhs) const {
117 return (*this < rhs) || (*this == rhs);
118 }
119 // @}
120
121 /** Properties */
122 // @{
123 bool is_nan() const;
124 bool is_infinity() const;
125 bool is_negative() const;
126 bool is_zero() const;
127 // @}
128
129 /** Returns the bits that represent this float16_t.
130 *
131 * An alternative method to access the bits is to cast a pointer
132 * to this instance as a pointer to a uint16_t.
133 **/
135
136private:
137 // The raw bits.
138 uint16_t data = 0;
139};
140
141static_assert(sizeof(float16_t) == 2, "float16_t should occupy two bytes");
142
143} // namespace Halide
144
145template<>
149
150namespace Halide {
151
152/** Class that provides a type that implements half precision
153 * floating point using the bfloat16 format.
154 *
155 * This type is enforced to be 16-bits wide and maintains no state
156 * other than the raw bits so that it can passed to code that checks
157 * a type's size and used for halide_buffer_t allocation. */
159
160 static const int mantissa_bits = 7;
161 static const uint16_t sign_mask = 0x8000;
162 static const uint16_t exponent_mask = 0x7f80;
163 static const uint16_t mantissa_mask = 0x007f;
164
166
167 /// \name Constructors
168 /// @{
169
170 /** Construct from a float, double, or int using
171 * round-to-nearest-ties-to-even. Out-of-range values become +/-
172 * infinity.
173 */
174 // @{
175 explicit bfloat16_t(float value);
176 explicit bfloat16_t(double value);
177 explicit bfloat16_t(int value);
178 // @}
179
180 /** Construct a bfloat16_t with the bits initialised to 0. This represents
181 * positive zero.*/
182 bfloat16_t() = default;
183
184 /// @}
185
186 // Use explicit to avoid accidently raising the precision
187 /** Cast to float */
188 explicit operator float() const;
189 /** Cast to double */
190 explicit operator double() const;
191 /** Cast to int */
192 explicit operator int() const;
193
194 /** Get a new bfloat16_t that represents a special value */
195 // @{
201 // @}
202
203 /** Get a new bfloat16_t with the given raw bits
204 *
205 * \param bits The bits conformant to IEEE754 binary16
206 */
208
209 /** Return a new bfloat16_t with a negated sign bit*/
211
212 /** Arithmetic operators. */
213 // @{
219 return (*this = *this + rhs);
220 }
222 return (*this = *this - rhs);
223 }
225 return (*this = *this * rhs);
226 }
228 return (*this = *this / rhs);
229 }
230 // @}
231
232 /** Comparison operators */
233 // @{
234 bool operator==(bfloat16_t rhs) const;
235 bool operator!=(bfloat16_t rhs) const {
236 return !(*this == rhs);
237 }
238 bool operator>(bfloat16_t rhs) const;
239 bool operator<(bfloat16_t rhs) const;
240 bool operator>=(bfloat16_t rhs) const {
241 return (*this > rhs) || (*this == rhs);
242 }
243 bool operator<=(bfloat16_t rhs) const {
244 return (*this < rhs) || (*this == rhs);
245 }
246 // @}
247
248 /** Properties */
249 // @{
250 bool is_nan() const;
251 bool is_infinity() const;
252 bool is_negative() const;
253 bool is_zero() const;
254 // @}
255
256 /** Returns the bits that represent this bfloat16_t.
257 *
258 * An alternative method to access the bits is to cast a pointer
259 * to this instance as a pointer to a uint16_t.
260 **/
262
263private:
264 // The raw bits.
265 uint16_t data = 0;
266};
267
268static_assert(sizeof(bfloat16_t) == 2, "bfloat16_t should occupy two bytes");
269
270} // namespace Halide
271
272template<>
276
277#endif
HALIDE_ALWAYS_INLINE constexpr halide_type_t halide_type_of< Halide::bfloat16_t >()
Definition Float16.h:273
HALIDE_ALWAYS_INLINE constexpr halide_type_t halide_type_of< Halide::float16_t >()
Definition Float16.h:146
This file declares the routines used by Halide internally in its runtime.
@ halide_type_float
IEEE floating point numbers.
@ halide_type_bfloat
floating point numbers in the bfloat format
#define HALIDE_ALWAYS_INLINE
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
unsigned __INT16_TYPE__ uint16_t
void * memcpy(void *s1, const void *s2, size_t n)
Class that provides a type that implements half precision floating point using the bfloat16 format.
Definition Float16.h:158
bfloat16_t operator-=(bfloat16_t rhs)
Definition Float16.h:221
bool operator>(bfloat16_t rhs) const
bool operator<=(bfloat16_t rhs) const
Definition Float16.h:243
static const bfloat16_t zero
Definition Float16.h:165
bfloat16_t operator-() const
Return a new bfloat16_t with a negated sign bit.
bool operator<(bfloat16_t rhs) const
static const uint16_t exponent_mask
Definition Float16.h:162
bfloat16_t operator*(bfloat16_t rhs) const
bool is_infinity() const
static const uint16_t sign_mask
Definition Float16.h:161
uint16_t to_bits() const
Returns the bits that represent this bfloat16_t.
static const bfloat16_t infinity
Definition Float16.h:165
static const uint16_t mantissa_mask
Definition Float16.h:163
bool operator==(bfloat16_t rhs) const
Comparison operators.
static const bfloat16_t nan
Definition Float16.h:165
bfloat16_t operator+(bfloat16_t rhs) const
Arithmetic operators.
bfloat16_t operator*=(bfloat16_t rhs)
Definition Float16.h:224
static const bfloat16_t negative_zero
Definition Float16.h:165
bfloat16_t operator-(bfloat16_t rhs) const
bfloat16_t operator/=(bfloat16_t rhs)
Definition Float16.h:227
bool is_negative() const
bfloat16_t()=default
Construct a bfloat16_t with the bits initialised to 0.
bfloat16_t operator/(bfloat16_t rhs) const
static bfloat16_t make_zero()
Get a new bfloat16_t that represents a special value.
static bfloat16_t make_negative_zero()
static const int mantissa_bits
Definition Float16.h:160
static bfloat16_t make_infinity()
bfloat16_t(float value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
static bfloat16_t make_from_bits(uint16_t bits)
Get a new bfloat16_t with the given raw bits.
bool is_zero() const
bfloat16_t(int value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
bool is_nan() const
Properties.
static bfloat16_t make_negative_infinity()
bfloat16_t(double value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
bool operator!=(bfloat16_t rhs) const
Definition Float16.h:235
static const bfloat16_t negative_infinity
Definition Float16.h:165
bfloat16_t operator+=(bfloat16_t rhs)
Definition Float16.h:218
bool operator>=(bfloat16_t rhs) const
Definition Float16.h:240
static bfloat16_t make_nan()
Class that provides a type that implements half precision floating point (IEEE754 2008 binary16) in s...
Definition Float16.h:17
static float16_t make_infinity()
float16_t operator/(float16_t rhs) const
bool is_negative() const
float16_t(double value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
static const uint16_t sign_mask
Definition Float16.h:20
float16_t operator+(float16_t rhs) const
Arithmetic operators.
bool is_zero() const
float16_t(int value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
bool operator>=(float16_t rhs) const
Definition Float16.h:113
static float16_t make_zero()
Get a new float16_t that represents a special value.
uint16_t to_bits() const
Returns the bits that represent this float16_t.
bool operator<(float16_t rhs) const
static const uint16_t mantissa_mask
Definition Float16.h:22
bool operator==(float16_t rhs) const
Comparison operators.
static float16_t make_negative_zero()
static float16_t make_from_bits(uint16_t bits)
Get a new float16_t with the given raw bits.
float16_t operator/=(float16_t rhs)
Definition Float16.h:100
static float16_t make_nan()
float16_t(float value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
float16_t()=default
Construct a float16_t with the bits initialised to 0.
static const uint16_t exponent_mask
Definition Float16.h:21
float16_t operator-(float16_t rhs) const
bool is_nan() const
Properties.
static float16_t make_negative_infinity()
static const int mantissa_bits
Definition Float16.h:19
bool is_infinity() const
float16_t operator-=(float16_t rhs)
Definition Float16.h:94
float16_t operator*(float16_t rhs) const
float16_t operator-() const
Return a new float16_t with a negated sign bit.
bool operator!=(float16_t rhs) const
Definition Float16.h:108
bool operator<=(float16_t rhs) const
Definition Float16.h:116
bool operator>(float16_t rhs) const
float16_t operator*=(float16_t rhs)
Definition Float16.h:97
float16_t operator+=(float16_t rhs)
Definition Float16.h:91
A runtime tag for a type in the halide type system.