Halide
Float16.h
Go to the documentation of this file.
1 #ifndef HALIDE_FLOAT16_H
2 #define HALIDE_FLOAT16_H
3 
5 #include <cstdint>
6 #include <string>
7 
8 namespace Halide {
9 
10 /** Class that provides a type that implements half precision
11  * floating point (IEEE754 2008 binary16) in software.
12  *
13  * This type is enforced to be 16-bits wide and maintains no state
14  * other than the raw IEEE754 binary16 bits so that it can passed
15  * to code that checks a type's size and used for halide_buffer_t allocation.
16  * */
17 struct float16_t {
18 
19  static const int mantissa_bits = 10;
20  static const uint16_t sign_mask = 0x8000;
21  static const uint16_t exponent_mask = 0x7c00;
22  static const uint16_t mantissa_mask = 0x03ff;
23 
24  /// \name Constructors
25  /// @{
26 
27  /** Construct from a float, double, or int using
28  * round-to-nearest-ties-to-even. Out-of-range values become +/-
29  * infinity.
30  */
31  // @{
32  explicit float16_t(float value);
33  explicit float16_t(double value);
34  explicit float16_t(int value);
35  // @}
36 
37  /** Construct a float16_t with the bits initialised to 0. This represents
38  * positive zero.*/
39  float16_t() = default;
40 
41 #ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
42  /** Construct a float16_t from compiler's built-in _Float16 type. */
43  explicit float16_t(_Float16 value) {
44  memcpy(&data, &value, sizeof(_Float16));
45  }
46 #endif
47 
48  /// @}
49 
50  // Use explicit to avoid accidently raising the precision
51  /** Cast to float */
52  explicit operator float() const;
53  /** Cast to double */
54  explicit operator double() const;
55  /** Cast to int */
56  explicit operator int() const;
57 
58 #ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
59  /** Cast to compiler's built-in _Float16 type. */
60  explicit operator _Float16() const {
61  _Float16 result;
62  memcpy(&result, &data, sizeof(_Float16));
63  return result;
64  }
65 #endif
66 
67  /** Get a new float16_t that represents a special value */
68  // @{
69  static float16_t make_zero();
71  static float16_t make_infinity();
73  static float16_t make_nan();
74  // @}
75 
76  /** Get a new float16_t with the given raw bits
77  *
78  * \param bits The bits conformant to IEEE754 binary16
79  */
80  static float16_t make_from_bits(uint16_t bits);
81 
82  /** Return a new float16_t with a negated sign bit*/
83  float16_t operator-() const;
84 
85  /** Arithmetic operators. */
86  // @{
87  float16_t operator+(float16_t rhs) const;
88  float16_t operator-(float16_t rhs) const;
89  float16_t operator*(float16_t rhs) const;
90  float16_t operator/(float16_t rhs) const;
92  return (*this = *this + rhs);
93  }
95  return (*this = *this - rhs);
96  }
98  return (*this = *this * rhs);
99  }
101  return (*this = *this / rhs);
102  }
103  // @}
104 
105  /** Comparison operators */
106  // @{
107  bool operator==(float16_t rhs) const;
108  bool operator!=(float16_t rhs) const {
109  return !(*this == rhs);
110  }
111  bool operator>(float16_t rhs) const;
112  bool operator<(float16_t rhs) const;
113  bool operator>=(float16_t rhs) const {
114  return (*this > rhs) || (*this == rhs);
115  }
116  bool operator<=(float16_t rhs) const {
117  return (*this < rhs) || (*this == rhs);
118  }
119  // @}
120 
121  /** Properties */
122  // @{
123  bool is_nan() const;
124  bool is_infinity() const;
125  bool is_negative() const;
126  bool is_zero() const;
127  // @}
128 
129  /** Returns the bits that represent this float16_t.
130  *
131  * An alternative method to access the bits is to cast a pointer
132  * to this instance as a pointer to a uint16_t.
133  **/
134  uint16_t to_bits() const;
135 
136 private:
137  // The raw bits.
138  uint16_t data = 0;
139 };
140 
141 static_assert(sizeof(float16_t) == 2, "float16_t should occupy two bytes");
142 
143 } // namespace Halide
144 
145 template<>
146 HALIDE_ALWAYS_INLINE constexpr halide_type_t halide_type_of<Halide::float16_t>() {
147  return halide_type_t(halide_type_float, 16);
148 }
149 
150 namespace Halide {
151 
152 /** Class that provides a type that implements half precision
153  * floating point using the bfloat16 format.
154  *
155  * This type is enforced to be 16-bits wide and maintains no state
156  * other than the raw bits so that it can passed to code that checks
157  * a type's size and used for halide_buffer_t allocation. */
158 struct bfloat16_t {
159 
160  static const int mantissa_bits = 7;
161  static const uint16_t sign_mask = 0x8000;
162  static const uint16_t exponent_mask = 0x7f80;
163  static const uint16_t mantissa_mask = 0x007f;
164 
166 
167  /// \name Constructors
168  /// @{
169 
170  /** Construct from a float, double, or int using
171  * round-to-nearest-ties-to-even. Out-of-range values become +/-
172  * infinity.
173  */
174  // @{
175  explicit bfloat16_t(float value);
176  explicit bfloat16_t(double value);
177  explicit bfloat16_t(int value);
178  // @}
179 
180  /** Construct a bfloat16_t with the bits initialised to 0. This represents
181  * positive zero.*/
182  bfloat16_t() = default;
183 
184  /// @}
185 
186  // Use explicit to avoid accidently raising the precision
187  /** Cast to float */
188  explicit operator float() const;
189  /** Cast to double */
190  explicit operator double() const;
191  /** Cast to int */
192  explicit operator int() const;
193 
194  /** Get a new bfloat16_t that represents a special value */
195  // @{
196  static bfloat16_t make_zero();
198  static bfloat16_t make_infinity();
200  static bfloat16_t make_nan();
201  // @}
202 
203  /** Get a new bfloat16_t with the given raw bits
204  *
205  * \param bits The bits conformant to IEEE754 binary16
206  */
207  static bfloat16_t make_from_bits(uint16_t bits);
208 
209  /** Return a new bfloat16_t with a negated sign bit*/
210  bfloat16_t operator-() const;
211 
212  /** Arithmetic operators. */
213  // @{
214  bfloat16_t operator+(bfloat16_t rhs) const;
215  bfloat16_t operator-(bfloat16_t rhs) const;
216  bfloat16_t operator*(bfloat16_t rhs) const;
217  bfloat16_t operator/(bfloat16_t rhs) const;
219  return (*this = *this + rhs);
220  }
222  return (*this = *this - rhs);
223  }
225  return (*this = *this * rhs);
226  }
228  return (*this = *this / rhs);
229  }
230  // @}
231 
232  /** Comparison operators */
233  // @{
234  bool operator==(bfloat16_t rhs) const;
235  bool operator!=(bfloat16_t rhs) const {
236  return !(*this == rhs);
237  }
238  bool operator>(bfloat16_t rhs) const;
239  bool operator<(bfloat16_t rhs) const;
240  bool operator>=(bfloat16_t rhs) const {
241  return (*this > rhs) || (*this == rhs);
242  }
243  bool operator<=(bfloat16_t rhs) const {
244  return (*this < rhs) || (*this == rhs);
245  }
246  // @}
247 
248  /** Properties */
249  // @{
250  bool is_nan() const;
251  bool is_infinity() const;
252  bool is_negative() const;
253  bool is_zero() const;
254  // @}
255 
256  /** Returns the bits that represent this bfloat16_t.
257  *
258  * An alternative method to access the bits is to cast a pointer
259  * to this instance as a pointer to a uint16_t.
260  **/
261  uint16_t to_bits() const;
262 
263 private:
264  // The raw bits.
265  uint16_t data = 0;
266 };
267 
268 static_assert(sizeof(bfloat16_t) == 2, "bfloat16_t should occupy two bytes");
269 
270 } // namespace Halide
271 
272 template<>
273 HALIDE_ALWAYS_INLINE constexpr halide_type_t halide_type_of<Halide::bfloat16_t>() {
274  return halide_type_t(halide_type_bfloat, 16);
275 }
276 
277 #endif
Halide::float16_t::operator-=
float16_t operator-=(float16_t rhs)
Definition: Float16.h:94
Halide::bfloat16_t::operator/
bfloat16_t operator/(bfloat16_t rhs) const
Halide::float16_t::sign_mask
static const uint16_t sign_mask
Definition: Float16.h:20
Halide::bfloat16_t::is_negative
bool is_negative() const
Halide::bfloat16_t::operator==
bool operator==(bfloat16_t rhs) const
Comparison operators.
halide_type_bfloat
@ halide_type_bfloat
floating point numbers in the bfloat format
Definition: HalideRuntime.h:458
Halide::float16_t::operator+
float16_t operator+(float16_t rhs) const
Arithmetic operators.
Halide::float16_t::is_zero
bool is_zero() const
uint16_t
unsigned __INT16_TYPE__ uint16_t
Definition: runtime_internal.h:27
Halide::float16_t::make_nan
static float16_t make_nan()
Halide::bfloat16_t::negative_zero
static const bfloat16_t negative_zero
Definition: Float16.h:165
halide_type_float
@ halide_type_float
IEEE floating point numbers.
Definition: HalideRuntime.h:456
Halide::bfloat16_t::operator>=
bool operator>=(bfloat16_t rhs) const
Definition: Float16.h:240
Halide::bfloat16_t::is_zero
bool is_zero() const
Halide::bfloat16_t::operator-=
bfloat16_t operator-=(bfloat16_t rhs)
Definition: Float16.h:221
Halide::bfloat16_t::zero
static const bfloat16_t zero
Definition: Float16.h:165
Halide::bfloat16_t::make_infinity
static bfloat16_t make_infinity()
Halide::bfloat16_t::exponent_mask
static const uint16_t exponent_mask
Definition: Float16.h:162
Halide::float16_t::make_zero
static float16_t make_zero()
Get a new float16_t that represents a special value.
Halide::float16_t::operator-
float16_t operator-() const
Return a new float16_t with a negated sign bit.
Halide::float16_t::make_negative_zero
static float16_t make_negative_zero()
halide_type_t
A runtime tag for a type in the halide type system.
Definition: HalideRuntime.h:476
Halide::float16_t::operator==
bool operator==(float16_t rhs) const
Comparison operators.
Halide::bfloat16_t::operator<=
bool operator<=(bfloat16_t rhs) const
Definition: Float16.h:243
Halide::bfloat16_t::sign_mask
static const uint16_t sign_mask
Definition: Float16.h:161
Halide::float16_t::mantissa_mask
static const uint16_t mantissa_mask
Definition: Float16.h:22
Halide::bfloat16_t::operator!=
bool operator!=(bfloat16_t rhs) const
Definition: Float16.h:235
Halide::float16_t::is_infinity
bool is_infinity() const
Halide
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
Definition: AbstractGenerator.h:19
Halide::bfloat16_t::operator<
bool operator<(bfloat16_t rhs) const
Halide::bfloat16_t::infinity
static const bfloat16_t infinity
Definition: Float16.h:165
HALIDE_ALWAYS_INLINE
#define HALIDE_ALWAYS_INLINE
Definition: HalideRuntime.h:40
Halide::float16_t::is_nan
bool is_nan() const
Properties.
Halide::bfloat16_t::is_infinity
bool is_infinity() const
Halide::bfloat16_t::make_from_bits
static bfloat16_t make_from_bits(uint16_t bits)
Get a new bfloat16_t with the given raw bits.
Halide::bfloat16_t::operator+=
bfloat16_t operator+=(bfloat16_t rhs)
Definition: Float16.h:218
Halide::bfloat16_t::operator*
bfloat16_t operator*(bfloat16_t rhs) const
Halide::float16_t::float16_t
float16_t()=default
Construct a float16_t with the bits initialised to 0.
Halide::bfloat16_t::bfloat16_t
bfloat16_t()=default
Construct a bfloat16_t with the bits initialised to 0.
Halide::bfloat16_t::operator>
bool operator>(bfloat16_t rhs) const
Halide::bfloat16_t::nan
static const bfloat16_t nan
Definition: Float16.h:165
Halide::float16_t::operator+=
float16_t operator+=(float16_t rhs)
Definition: Float16.h:91
Halide::float16_t::operator/=
float16_t operator/=(float16_t rhs)
Definition: Float16.h:100
Halide::float16_t::make_from_bits
static float16_t make_from_bits(uint16_t bits)
Get a new float16_t with the given raw bits.
Halide::bfloat16_t::operator*=
bfloat16_t operator*=(bfloat16_t rhs)
Definition: Float16.h:224
Halide::float16_t::operator*
float16_t operator*(float16_t rhs) const
Halide::float16_t::make_infinity
static float16_t make_infinity()
Halide::bfloat16_t
Class that provides a type that implements half precision floating point using the bfloat16 format.
Definition: Float16.h:158
Halide::bfloat16_t::make_negative_zero
static bfloat16_t make_negative_zero()
Halide::bfloat16_t::operator/=
bfloat16_t operator/=(bfloat16_t rhs)
Definition: Float16.h:227
Halide::float16_t::operator>=
bool operator>=(float16_t rhs) const
Definition: Float16.h:113
Halide::float16_t::operator<=
bool operator<=(float16_t rhs) const
Definition: Float16.h:116
Halide::float16_t::to_bits
uint16_t to_bits() const
Returns the bits that represent this float16_t.
HalideRuntime.h
memcpy
void * memcpy(void *s1, const void *s2, size_t n)
Halide::bfloat16_t::is_nan
bool is_nan() const
Properties.
Halide::bfloat16_t::negative_infinity
static const bfloat16_t negative_infinity
Definition: Float16.h:165
Halide::bfloat16_t::mantissa_mask
static const uint16_t mantissa_mask
Definition: Float16.h:163
Halide::float16_t::operator*=
float16_t operator*=(float16_t rhs)
Definition: Float16.h:97
Halide::float16_t::operator/
float16_t operator/(float16_t rhs) const
Halide::float16_t::is_negative
bool is_negative() const
Halide::float16_t::exponent_mask
static const uint16_t exponent_mask
Definition: Float16.h:21
Halide::float16_t::mantissa_bits
static const int mantissa_bits
Definition: Float16.h:19
Halide::bfloat16_t::operator-
bfloat16_t operator-() const
Return a new bfloat16_t with a negated sign bit.
Halide::float16_t::make_negative_infinity
static float16_t make_negative_infinity()
Halide::bfloat16_t::make_negative_infinity
static bfloat16_t make_negative_infinity()
Halide::float16_t
Class that provides a type that implements half precision floating point (IEEE754 2008 binary16) in s...
Definition: Float16.h:17
Halide::float16_t::operator>
bool operator>(float16_t rhs) const
Halide::bfloat16_t::to_bits
uint16_t to_bits() const
Returns the bits that represent this bfloat16_t.
Halide::float16_t::operator<
bool operator<(float16_t rhs) const
Halide::bfloat16_t::operator+
bfloat16_t operator+(bfloat16_t rhs) const
Arithmetic operators.
Halide::bfloat16_t::make_nan
static bfloat16_t make_nan()
Halide::bfloat16_t::mantissa_bits
static const int mantissa_bits
Definition: Float16.h:160
Halide::bfloat16_t::make_zero
static bfloat16_t make_zero()
Get a new bfloat16_t that represents a special value.
Halide::float16_t::operator!=
bool operator!=(float16_t rhs) const
Definition: Float16.h:108