Halide 21.0.0
Halide compiler and libraries
Loading...
Searching...
No Matches
IROperator.h
Go to the documentation of this file.
1#ifndef HALIDE_IR_OPERATOR_H
2#define HALIDE_IR_OPERATOR_H
3
4/** \file
5 *
6 * Defines various operator overloads and utility functions that make
7 * it more pleasant to work with Halide expressions.
8 */
9
10#include <cmath>
11#include <map>
12#include <optional>
13
14#include "ConstantInterval.h"
15#include "Expr.h"
16#include "Scope.h"
17#include "Target.h"
18#include "Tuple.h"
19
20namespace Halide {
21
22namespace Internal {
23/** Is the expression either an IntImm, a FloatImm, a StringImm, or a
24 * Cast of the same, or a Ramp or Broadcast of the same. Doesn't do
25 * any constant folding. */
26bool is_const(const Expr &e);
27
28/** Is the expression an IntImm, FloatImm of a particular value, or a
29 * Cast, or Broadcast of the same. */
30bool is_const(const Expr &e, int64_t v);
31
32/** If an expression is an IntImm or a Broadcast of an IntImm, return
33 * a its value. Otherwise returns std::nullopt. */
34std::optional<int64_t> as_const_int(const Expr &e);
35
36/** If an expression is a UIntImm or a Broadcast of a UIntImm, return
37 * its value. Otherwise returns std::nullopt. */
38std::optional<uint64_t> as_const_uint(const Expr &e);
39
40/** If an expression is a FloatImm or a Broadcast of a FloatImm,
41 * return its value. Otherwise returns std::nullopt. */
42std::optional<double> as_const_float(const Expr &e);
43
44/** Is the expression a constant integer power of two. Returns log base two of
45 * the expression if it is, or std::nullopt if not. Also returns std::nullopt
46 * for non-integer types. */
47// @{
48std::optional<int> is_const_power_of_two_integer(const Expr &e);
51// @}
52
53/** Is the expression a const (as defined by is_const), and also
54 * strictly greater than zero (in all lanes, if a vector expression) */
55bool is_positive_const(const Expr &e);
56
57/** Is the expression a const (as defined by is_const), and also
58 * strictly less than zero (in all lanes, if a vector expression) */
59bool is_negative_const(const Expr &e);
60
61/** Is the expression an undef */
62bool is_undef(const Expr &e);
63
64/** Is the expression a const (as defined by is_const), and also equal
65 * to zero (in all lanes, if a vector expression) */
66bool is_const_zero(const Expr &e);
67
68/** Is the expression a const (as defined by is_const), and also equal
69 * to one (in all lanes, if a vector expression) */
70bool is_const_one(const Expr &e);
71
72/** Is the statement a no-op (which we represent as either an
73 * undefined Stmt, or as an Evaluate node of a constant) */
74bool is_no_op(const Stmt &s);
75
76/** Does the expression
77 * 1) Take on the same value no matter where it appears in a Stmt, and
78 * 2) Evaluating it has no side-effects
79 */
80bool is_pure(const Expr &e);
81
82/** Construct an immediate of the given type from any numeric C++ type. */
83// @{
86Expr make_const(Type t, double val);
87inline Expr make_const(Type t, int32_t val) {
88 return make_const(t, (int64_t)val);
89}
90inline Expr make_const(Type t, uint32_t val) {
91 return make_const(t, (uint64_t)val);
92}
93inline Expr make_const(Type t, int16_t val) {
94 return make_const(t, (int64_t)val);
95}
96inline Expr make_const(Type t, uint16_t val) {
97 return make_const(t, (uint64_t)val);
98}
99inline Expr make_const(Type t, int8_t val) {
100 return make_const(t, (int64_t)val);
101}
102inline Expr make_const(Type t, uint8_t val) {
103 return make_const(t, (uint64_t)val);
104}
105inline Expr make_const(Type t, bool val) {
106 return make_const(t, (uint64_t)val);
107}
108inline Expr make_const(Type t, float val) {
109 return make_const(t, (double)val);
110}
112 return make_const(t, (double)val);
113}
114// @}
115
116/** Construct a unique signed_integer_overflow Expr */
118
119/** Check if an expression is a signed_integer_overflow */
121
122/** Check if a constant value can be correctly represented as the given type. */
124
125/** Construct a boolean constant from a C++ boolean value.
126 * May also be a vector if width is given.
127 * It is not possible to coerce a C++ boolean to Expr because
128 * if we provide such a path then char objects can ambiguously
129 * be converted to Halide Expr or to std::string. The problem
130 * is that C++ does not have a real bool type - it is in fact
131 * close enough to char that C++ does not know how to distinguish them.
132 * make_bool is the explicit coercion. */
133Expr make_bool(bool val, int lanes = 1);
134
135/** Construct the representation of zero in the given type */
137
138/** Construct the representation of one in the given type */
140
141/** Construct the representation of two in the given type */
143
144/** Construct the constant boolean true. May also be a vector of
145 * trues, if a lanes argument is given. */
146Expr const_true(int lanes = 1);
147
148/** Construct the constant boolean false. May also be a vector of
149 * falses, if a lanes argument is given. */
150Expr const_false(int lanes = 1);
151
152/** Attempt to cast an expression to a smaller type while provably not losing
153 * information. If it can't be done, return an undefined Expr.
154 *
155 * Optionally accepts a scope giving the constant bounds of any variables, and a
156 * map that gives the constant bounds of exprs already analyzed to avoid redoing
157 * work across many calls to lossless_cast. It is not safe to use this optional
158 * map in contexts where the same Expr object may take on a different value. For
159 * example: (let x = 4 in some_expr_object) + (let x = 5 in
160 * the_same_expr_object)). It is safe to use it after uniquify_variable_names
161 * has been run. */
164 std::map<Expr, ConstantInterval, ExprCompare> *cache = nullptr);
165
166/** Attempt to negate x without introducing new IR and without overflow.
167 * If it can't be done, return an undefined Expr. */
169
170/** Coerce the two expressions to have the same type, using C-style
171 * casting rules. For the purposes of casting, a boolean type is
172 * UInt(1). We use the following procedure:
173 *
174 * If the types already match, do nothing.
175 *
176 * Then, if one type is a vector and the other is a scalar, the scalar
177 * is broadcast to match the vector width, and we continue.
178 *
179 * Then, if one type is floating-point and the other is not, the
180 * non-float is cast to the floating-point type, and we're done.
181 *
182 * Then, if both types are unsigned ints, the one with fewer bits is
183 * cast to match the one with more bits and we're done.
184 *
185 * Then, if both types are signed ints, the one with fewer bits is
186 * cast to match the one with more bits and we're done.
187 *
188 * Finally, if one type is an unsigned int and the other type is a signed
189 * int, both are cast to a signed int with the greater of the two
190 * bit-widths. For example, matching an Int(8) with a UInt(16) results
191 * in an Int(16).
192 *
193 */
194void match_types(Expr &a, Expr &b);
195
196/** Asserts that both expressions are integer types and are either
197 * both signed or both unsigned. If one argument is scalar and the
198 * other a vector, the scalar is broadcasted to have the same number
199 * of lanes as the vector. If one expression is of narrower type than
200 * the other, it is widened to the bit width of the wider. */
201void match_types_bitwise(Expr &a, Expr &b, const char *op_name);
202
203/** Halide's vectorizable transcendentals. */
204// @{
208// @}
209
210/** Raise an expression to an integer power by repeatedly multiplying
211 * it by itself. */
213
214/** Split a boolean condition into vector of ANDs. If 'cond' is undefined,
215 * return an empty vector. */
216void split_into_ands(const Expr &cond, std::vector<Expr> &result);
217
218/** A builder to help create Exprs representing halide_buffer_t
219 * structs (e.g. foo.buffer) via calls to halide_buffer_init. Fill out
220 * the fields and then call build. The resulting Expr will be a call
221 * to halide_buffer_init with the struct members as arguments. If the
222 * buffer_memory field is undefined, it uses a call to alloca to make
223 * some stack memory for the buffer. If the shape_memory field is
224 * undefined, it similarly uses stack memory for the shape. If the
225 * shape_memory field is null, it uses the dim field already in the
226 * buffer. Other unitialized fields will take on a value of zero in
227 * the constructed buffer. */
237
238/** If e is a ramp expression with stride, default 1, return the base,
239 * otherwise undefined. */
240Expr strided_ramp_base(const Expr &e, int stride = 1);
241
242/** Implementations of division and mod that are specific to Halide.
243 * Use these implementations; do not use native C division or mod to
244 * simplify Halide expressions. Halide division and modulo satisify
245 * the Euclidean definition of division for integers a and b:
246 *
247 /code
248 when b != 0, (a/b)*b + a%b = a
249 0 <= a%b < |b|
250 /endcode
251 *
252 * Additionally, mod by zero returns zero, and div by zero returns
253 * zero. This makes mod and div total functions.
254 */
255// @{
256template<typename T>
257inline T mod_imp(T a, T b) {
258 Type t = type_of<T>();
259 if (!t.is_float() && b == 0) {
260 return 0;
261 } else if (t.is_int()) {
262 int64_t ia = a;
263 int64_t ib = b;
264 int64_t a_neg = ia >> 63;
265 int64_t b_neg = ib >> 63;
266 int64_t b_zero = (ib == 0) ? -1 : 0;
267 ia -= a_neg;
268 int64_t r = ia % (ib | b_zero);
269 r += (a_neg & ((ib ^ b_neg) + ~b_neg));
270 r &= ~b_zero;
271 return r;
272 } else {
273 return a % b;
274 }
275}
276
277template<typename T>
278inline T div_imp(T a, T b) {
279 Type t = type_of<T>();
280 if (!t.is_float() && b == 0) {
281 return (T)0;
282 } else if (t.is_int()) {
283 // Do it as 64-bit
284 int64_t ia = a;
285 int64_t ib = b;
286 int64_t a_neg = ia >> 63;
287 int64_t b_neg = ib >> 63;
288 int64_t b_zero = (ib == 0) ? -1 : 0;
289 ib -= b_zero;
290 ia -= a_neg;
291 int64_t q = ia / ib;
292 q += a_neg & (~b_neg - b_neg);
293 q &= ~b_zero;
294 return (T)q;
295 } else {
296 return a / b;
297 }
298}
299// @}
300
301// Special cases for float, double.
302template<>
303inline float mod_imp<float>(float a, float b) {
304 float f = a - b * (floorf(a / b));
305 // The remainder has the same sign as b.
306 return f;
307}
308template<>
309inline double mod_imp<double>(double a, double b) {
310 double f = a - b * (std::floor(a / b));
311 return f;
312}
313
314template<>
315inline float div_imp<float>(float a, float b) {
316 return a / b;
317}
318template<>
319inline double div_imp<double>(double a, double b) {
320 return a / b;
321}
322
323/** Return an Expr that is identical to the input Expr, but with
324 * all calls to likely() and likely_if_innermost() removed. */
326
327/** Return a Stmt that is identical to the input Stmt, but with
328 * all calls to likely() and likely_if_innermost() removed. */
330
331/** Return an Expr that is identical to the input Expr, but with
332 * all calls to promise_clamped() and unsafe_promise_clamped() removed. */
334
335/** Return a Stmt that is identical to the input Stmt, but with
336 * all calls to promise_clamped() and unsafe_promise_clamped() removed. */
338
339/** If the expression is a tag helper call, remove it and return
340 * the tagged expression. If not, returns the expression. */
342
343template<typename T>
345 static constexpr bool value = std::is_convertible<T, const char *>::value ||
346 std::is_convertible<T, Halide::Expr>::value;
347};
348
349template<typename... Args>
350struct all_are_printable_args : meta_and<is_printable_arg<Args>...> {};
351
352// Secondary args to print can be Exprs or const char *
353inline HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector<Expr> &args) {
354}
355
356template<typename... Args>
357inline HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector<Expr> &args, const char *arg, Args &&...more_args) {
358 args.emplace_back(std::string(arg));
359 collect_print_args(args, std::forward<Args>(more_args)...);
360}
361
362template<typename... Args>
363inline HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector<Expr> &args, Expr arg, Args &&...more_args) {
364 args.push_back(std::move(arg));
365 collect_print_args(args, std::forward<Args>(more_args)...);
366}
367
368Expr requirement_failed_error(Expr condition, const std::vector<Expr> &args);
369
370Expr memoize_tag_helper(Expr result, const std::vector<Expr> &cache_key_values);
371
372/** Reset the counters used for random-number seeds in random_float/int/uint.
373 * (Note that the counters are incremented for each call, even if a seed is passed in.)
374 * This is used for multitarget compilation to ensure that each subtarget gets
375 * the same sequence of random numbers. */
377
378} // namespace Internal
379
380/** Cast an expression to the halide type corresponding to the C++ type T. */
381template<typename T>
382inline Expr cast(Expr a) {
383 return cast(type_of<T>(), std::move(a));
384}
385
386/** Cast an expression to a new type. */
388
389/** Return the sum of two expressions, doing any necessary type
390 * coercion using \ref Internal::match_types */
392
393/** Add an expression and a constant integer. Coerces the type of the
394 * integer to match the type of the expression. Errors if the integer
395 * cannot be represented in the type of the expression. */
396// @{
398
399/** Add a constant integer and an expression. Coerces the type of the
400 * integer to match the type of the expression. Errors if the integer
401 * cannot be represented in the type of the expression. */
403
404/** Modify the first expression to be the sum of two expressions,
405 * without changing its type. This casts the second argument to match
406 * the type of the first. */
408
409/** Return the difference of two expressions, doing any necessary type
410 * coercion using \ref Internal::match_types */
412
413/** Subtracts a constant integer from an expression. Coerces the type of the
414 * integer to match the type of the expression. Errors if the integer
415 * cannot be represented in the type of the expression. */
417
418/** Subtracts an expression from a constant integer. Coerces the type
419 * of the integer to match the type of the expression. Errors if the
420 * integer cannot be represented in the type of the expression. */
422
423/** Return the negative of the argument. Does no type casting, so more
424 * formally: return that number which when added to the original,
425 * yields zero of the same type. For unsigned integers the negative is
426 * still an unsigned integer. E.g. in UInt(8), the negative of 56 is
427 * 200, because 56 + 200 == 0 */
429
430/** Modify the first expression to be the difference of two expressions,
431 * without changing its type. This casts the second argument to match
432 * the type of the first. */
434
435/** Return the product of two expressions, doing any necessary type
436 * coercion using \ref Internal::match_types */
438
439/** Multiply an expression and a constant integer. Coerces the type of the
440 * integer to match the type of the expression. Errors if the integer
441 * cannot be represented in the type of the expression. */
443
444/** Multiply a constant integer and an expression. Coerces the type of
445 * the integer to match the type of the expression. Errors if the
446 * integer cannot be represented in the type of the expression. */
448
449/** Modify the first expression to be the product of two expressions,
450 * without changing its type. This casts the second argument to match
451 * the type of the first. */
453
454/** Return the ratio of two expressions, doing any necessary type
455 * coercion using \ref Internal::match_types. Note that integer
456 * division in Halide is not the same as integer division in C-like
457 * languages in two ways.
458 *
459 * First, signed integer division in Halide rounds according to the
460 * sign of the denominator. This means towards minus infinity for
461 * positive denominators, and towards positive infinity for negative
462 * denominators. This is unlike C, which rounds towards zero. This
463 * decision ensures that upsampling expressions like f(x/2, y/2) don't
464 * have funny discontinuities when x and y cross zero.
465 *
466 * Second, division by zero returns zero instead of faulting. For
467 * types where overflow is defined behavior, division of the largest
468 * negative signed integer by -1 returns the larged negative signed
469 * integer for the type (i.e. it wraps). This ensures that a division
470 * operation can never have a side-effect, which is helpful in Halide
471 * because scheduling directives can expand the domain of computation
472 * of a Func, potentially introducing new zero-division.
473 */
475
476/** Modify the first expression to be the ratio of two expressions,
477 * without changing its type. This casts the second argument to match
478 * the type of the first. Note that signed integer division in Halide
479 * rounds towards minus infinity, unlike C, which rounds towards
480 * zero. */
482
483/** Divides an expression by a constant integer. Coerces the type
484 * of the integer to match the type of the expression. Errors if the
485 * integer cannot be represented in the type of the expression. */
487
488/** Divides a constant integer by an expression. Coerces the type
489 * of the integer to match the type of the expression. Errors if the
490 * integer cannot be represented in the type of the expression. */
492
493/** Return the first argument reduced modulo the second, doing any
494 * necessary type coercion using \ref Internal::match_types. There are
495 * two key differences between C-like languages and Halide for the
496 * modulo operation, which complement the way division works.
497 *
498 * First, the result is never negative, so x % 2 is always zero or
499 * one, unlike in C-like languages. x % -2 is equivalent, and is also
500 * always zero or one. Second, mod by zero evaluates to zero (unlike
501 * in C, where it faults). This makes modulo, like division, a
502 * side-effect-free operation. */
504
505/** Mods an expression by a constant integer. Coerces the type
506 * of the integer to match the type of the expression. Errors if the
507 * integer cannot be represented in the type of the expression. */
509
510/** Mods a constant integer by an expression. Coerces the type
511 * of the integer to match the type of the expression. Errors if the
512 * integer cannot be represented in the type of the expression. */
514
515/** Return a boolean expression that tests whether the first argument
516 * is greater than the second, after doing any necessary type coercion
517 * using \ref Internal::match_types */
519
520/** Return a boolean expression that tests whether an expression is
521 * greater than a constant integer. Coerces the integer to the type of
522 * the expression. Errors if the integer is not representable in that
523 * type. */
525
526/** Return a boolean expression that tests whether a constant integer is
527 * greater than an expression. Coerces the integer to the type of
528 * the expression. Errors if the integer is not representable in that
529 * type. */
531
532/** Return a boolean expression that tests whether the first argument
533 * is less than the second, after doing any necessary type coercion
534 * using \ref Internal::match_types */
536
537/** Return a boolean expression that tests whether an expression is
538 * less than a constant integer. Coerces the integer to the type of
539 * the expression. Errors if the integer is not representable in that
540 * type. */
542
543/** Return a boolean expression that tests whether a constant integer is
544 * less than an expression. Coerces the integer to the type of
545 * the expression. Errors if the integer is not representable in that
546 * type. */
548
549/** Return a boolean expression that tests whether the first argument
550 * is less than or equal to the second, after doing any necessary type
551 * coercion using \ref Internal::match_types */
553
554/** Return a boolean expression that tests whether an expression is
555 * less than or equal to a constant integer. Coerces the integer to
556 * the type of the expression. Errors if the integer is not
557 * representable in that type. */
559
560/** Return a boolean expression that tests whether a constant integer
561 * is less than or equal to an expression. Coerces the integer to the
562 * type of the expression. Errors if the integer is not representable
563 * in that type. */
565
566/** Return a boolean expression that tests whether the first argument
567 * is greater than or equal to the second, after doing any necessary
568 * type coercion using \ref Internal::match_types */
570
571/** Return a boolean expression that tests whether an expression is
572 * greater than or equal to a constant integer. Coerces the integer to
573 * the type of the expression. Errors if the integer is not
574 * representable in that type. */
575Expr operator>=(const Expr &a, int b);
576
577/** Return a boolean expression that tests whether a constant integer
578 * is greater than or equal to an expression. Coerces the integer to the
579 * type of the expression. Errors if the integer is not representable
580 * in that type. */
581Expr operator>=(int a, const Expr &b);
582
583/** Return a boolean expression that tests whether the first argument
584 * is equal to the second, after doing any necessary type coercion
585 * using \ref Internal::match_types */
587
588/** Return a boolean expression that tests whether an expression is
589 * equal to a constant integer. Coerces the integer to the type of the
590 * expression. Errors if the integer is not representable in that
591 * type. */
593
594/** Return a boolean expression that tests whether a constant integer
595 * is equal to an expression. Coerces the integer to the type of the
596 * expression. Errors if the integer is not representable in that
597 * type. */
599
600/** Return a boolean expression that tests whether the first argument
601 * is not equal to the second, after doing any necessary type coercion
602 * using \ref Internal::match_types */
604
605/** Return a boolean expression that tests whether an expression is
606 * not equal to a constant integer. Coerces the integer to the type of
607 * the expression. Errors if the integer is not representable in that
608 * type. */
610
611/** Return a boolean expression that tests whether a constant integer
612 * is not equal to an expression. Coerces the integer to the type of
613 * the expression. Errors if the integer is not representable in that
614 * type. */
616
617/** Returns the logical and of the two arguments */
619
620/** Logical and of an Expr and a bool. Either returns the Expr or an
621 * Expr representing false, depending on the bool. */
622// @{
625// @}
626
627/** Returns the logical or of the two arguments */
629
630/** Logical or of an Expr and a bool. Either returns the Expr or an
631 * Expr representing true, depending on the bool. */
632// @{
635// @}
636
637/** Returns the logical not the argument */
639
640/** Returns an expression representing the greater of the two
641 * arguments, after doing any necessary type coercion using
642 * \ref Internal::match_types. Vectorizes cleanly on most platforms
643 * (with the exception of integer types on x86 without SSE4). */
645
646/** Returns an expression representing the greater of an expression
647 * and a constant integer. The integer is coerced to the type of the
648 * expression. Errors if the integer is not representable as that
649 * type. Vectorizes cleanly on most platforms (with the exception of
650 * integer types on x86 without SSE4). */
651Expr max(Expr a, int b);
652
653/** Returns an expression representing the greater of a constant
654 * integer and an expression. The integer is coerced to the type of
655 * the expression. Errors if the integer is not representable as that
656 * type. Vectorizes cleanly on most platforms (with the exception of
657 * integer types on x86 without SSE4). */
658Expr max(int a, Expr b);
659
660inline Expr max(float a, Expr b) {
661 return max(Expr(a), std::move(b));
662}
663inline Expr max(Expr a, float b) {
664 return max(std::move(a), Expr(b));
665}
666
667/** Returns an expression representing the greater of an expressions
668 * vector, after doing any necessary type coersion using
669 * \ref Internal::match_types. Vectorizes cleanly on most platforms
670 * (with the exception of integer types on x86 without SSE4).
671 * The expressions are folded from right ie. max(.., max(.., ..)).
672 * The arguments can be any mix of types but must all be convertible to Expr. */
673template<typename A, typename B, typename C, typename... Rest,
674 typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Rest...>::value>::type * = nullptr>
675inline Expr max(A &&a, B &&b, C &&c, Rest &&...rest) {
676 return max(std::forward<A>(a), max(std::forward<B>(b), std::forward<C>(c), std::forward<Rest>(rest)...));
677}
678
680
681/** Returns an expression representing the lesser of an expression
682 * and a constant integer. The integer is coerced to the type of the
683 * expression. Errors if the integer is not representable as that
684 * type. Vectorizes cleanly on most platforms (with the exception of
685 * integer types on x86 without SSE4). */
686Expr min(Expr a, int b);
687
688/** Returns an expression representing the lesser of a constant
689 * integer and an expression. The integer is coerced to the type of
690 * the expression. Errors if the integer is not representable as that
691 * type. Vectorizes cleanly on most platforms (with the exception of
692 * integer types on x86 without SSE4). */
693Expr min(int a, Expr b);
694
695inline Expr min(float a, Expr b) {
696 return min(Expr(a), std::move(b));
697}
698inline Expr min(Expr a, float b) {
699 return min(std::move(a), Expr(b));
700}
701
702/** Returns an expression representing the lesser of an expressions
703 * vector, after doing any necessary type coersion using
704 * \ref Internal::match_types. Vectorizes cleanly on most platforms
705 * (with the exception of integer types on x86 without SSE4).
706 * The expressions are folded from right ie. min(.., min(.., ..)).
707 * The arguments can be any mix of types but must all be convertible to Expr. */
708template<typename A, typename B, typename C, typename... Rest,
709 typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Rest...>::value>::type * = nullptr>
710inline Expr min(A &&a, B &&b, C &&c, Rest &&...rest) {
711 return min(std::forward<A>(a), min(std::forward<B>(b), std::forward<C>(c), std::forward<Rest>(rest)...));
712}
713
714/** Operators on floats treats those floats as Exprs. Making these
715 * explicit prevents implicit float->int casts that might otherwise
716 * occur. */
717// @{
718inline Expr operator+(Expr a, float b) {
719 return std::move(a) + Expr(b);
720}
721inline Expr operator+(float a, Expr b) {
722 return Expr(a) + std::move(b);
723}
724inline Expr operator-(Expr a, float b) {
725 return std::move(a) - Expr(b);
726}
727inline Expr operator-(float a, Expr b) {
728 return Expr(a) - std::move(b);
729}
730inline Expr operator*(Expr a, float b) {
731 return std::move(a) * Expr(b);
732}
733inline Expr operator*(float a, Expr b) {
734 return Expr(a) * std::move(b);
735}
736inline Expr operator/(Expr a, float b) {
737 return std::move(a) / Expr(b);
738}
739inline Expr operator/(float a, Expr b) {
740 return Expr(a) / std::move(b);
741}
742inline Expr operator%(Expr a, float b) {
743 return std::move(a) % Expr(b);
744}
745inline Expr operator%(float a, Expr b) {
746 return Expr(a) % std::move(b);
747}
748inline Expr operator>(Expr a, float b) {
749 return std::move(a) > Expr(b);
750}
751inline Expr operator>(float a, Expr b) {
752 return Expr(a) > std::move(b);
753}
754inline Expr operator<(Expr a, float b) {
755 return std::move(a) < Expr(b);
756}
757inline Expr operator<(float a, Expr b) {
758 return Expr(a) < std::move(b);
759}
760inline Expr operator>=(Expr a, float b) {
761 return std::move(a) >= Expr(b);
762}
763inline Expr operator>=(float a, Expr b) {
764 return Expr(a) >= std::move(b);
765}
766inline Expr operator<=(Expr a, float b) {
767 return std::move(a) <= Expr(b);
768}
769inline Expr operator<=(float a, Expr b) {
770 return Expr(a) <= std::move(b);
771}
772inline Expr operator==(Expr a, float b) {
773 return std::move(a) == Expr(b);
774}
775inline Expr operator==(float a, Expr b) {
776 return Expr(a) == std::move(b);
777}
778inline Expr operator!=(Expr a, float b) {
779 return std::move(a) != Expr(b);
780}
781inline Expr operator!=(float a, Expr b) {
782 return Expr(a) != std::move(b);
783}
784// @}
785
786/** Clamps an expression to lie within the given bounds. The bounds
787 * are type-cast to match the expression. Vectorizes as well as min/max. */
788Expr clamp(Expr a, const Expr &min_val, const Expr &max_val);
789
790/** Returns the absolute value of a signed integer or floating-point
791 * expression. Vectorizes cleanly. Unlike in C, abs of a signed
792 * integer returns an unsigned integer of the same bit width. This
793 * means that abs of the most negative integer doesn't overflow. */
795
796/** Return the absolute difference between two values. Vectorizes
797 * cleanly. Returns an unsigned value of the same bit width. There are
798 * various ways to write this yourself, but they contain numerous
799 * gotchas and don't always compile to good code, so use this
800 * instead. */
802
803/** Returns an expression similar to the ternary operator in C, except
804 * that it always evaluates all arguments. If the first argument is
805 * true, then return the second, else return the third. Typically
806 * vectorizes cleanly, but benefits from SSE41 or newer on x86. */
807Expr select(Expr condition, Expr true_value, Expr false_value);
808
809/** A multi-way variant of select similar to a switch statement in C,
810 * which can accept multiple conditions and values in pairs. Evaluates
811 * to the first value for which the condition is true. Returns the
812 * final value if all conditions are false. */
813template<typename... Args,
814 typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Args...>::value>::type * = nullptr>
815inline Expr select(Expr c0, Expr v0, Expr c1, Expr v1, Args &&...args) {
816 return select(std::move(c0), std::move(v0), select(std::move(c1), std::move(v1), std::forward<Args>(args)...));
817}
818
819/** Equivalent of ternary select(), but taking/returning tuples. If the condition is
820 * a Tuple, it must match the size of the true and false Tuples. */
821// @{
822Tuple select(const Tuple &condition, const Tuple &true_value, const Tuple &false_value);
823Tuple select(const Expr &condition, const Tuple &true_value, const Tuple &false_value);
824// @}
825
826/** Equivalent of multiway select(), but taking/returning tuples. If the condition is
827 * a Tuple, it must match the size of the true and false Tuples. */
828// @{
829template<typename... Args>
830inline Tuple select(const Tuple &c0, const Tuple &v0, const Tuple &c1, const Tuple &v1, Args &&...args) {
831 return select(c0, v0, select(c1, v1, std::forward<Args>(args)...));
832}
833template<typename... Args>
834inline Tuple select(const Expr &c0, const Tuple &v0, const Expr &c1, const Tuple &v1, Args &&...args) {
835 return select(c0, v0, select(c1, v1, std::forward<Args>(args)...));
836}
837// @}
838
839/** select applied to FuncRefs (e.g. select(x < 100, f(x), g(x))) is assumed to
840 * return an Expr. A runtime error is produced if this is applied to
841 * tuple-valued Funcs. In that case you should explicitly cast the second and
842 * third args to Tuple to remove the ambiguity. */
843// @{
844Expr select(const Expr &condition, const FuncRef &true_value, const FuncRef &false_value);
845template<typename... Args>
846inline Expr select(const Expr &c0, const FuncRef &v0, const Expr &c1, const FuncRef &v1, Args &&...args) {
847 return select(c0, v0, select(c1, v1, std::forward<Args>(args)...));
848}
849// @}
850
851/** Oftentimes we want to pack a list of expressions with the same type
852 * into a channel dimension, e.g.,
853 * img(x, y, c) = select(c == 0, 100, // Red
854 * c == 1, 50, // Green
855 * 25); // Blue
856 * This is tedious when the list is long. The following function
857 * provide convinent syntax that allow one to write:
858 * img(x, y, c) = mux(c, {100, 50, 25});
859 *
860 * As with the select equivalent, if the first argument (the index) is
861 * out of range, the expression evaluates to the last value.
862 */
863// @{
864Expr mux(const Expr &id, const std::initializer_list<Expr> &values);
865Expr mux(const Expr &id, const std::vector<Expr> &values);
866Expr mux(const Expr &id, const Tuple &values);
867Expr mux(const Expr &id, const std::initializer_list<FuncRef> &values);
868Tuple mux(const Expr &id, const std::initializer_list<Tuple> &values);
869Tuple mux(const Expr &id, const std::vector<Tuple> &values);
870// @}
871
872/** Return the sine of a floating-point expression. If the argument is
873 * not floating-point, it is cast to Float(32). Does not vectorize
874 * well. */
876
877/** Return the arcsine of a floating-point expression. If the argument
878 * is not floating-point, it is cast to Float(32). Does not vectorize
879 * well. */
881
882/** Return the cosine of a floating-point expression. If the argument
883 * is not floating-point, it is cast to Float(32). Does not vectorize
884 * well. */
886
887/** Return the arccosine of a floating-point expression. If the
888 * argument is not floating-point, it is cast to Float(32). Does not
889 * vectorize well. */
891
892/** Return the tangent of a floating-point expression. If the argument
893 * is not floating-point, it is cast to Float(32). Does not vectorize
894 * well. */
896
897/** Return the arctangent of a floating-point expression. If the
898 * argument is not floating-point, it is cast to Float(32). Does not
899 * vectorize well. */
901
902/** Return the angle of a floating-point gradient. If the argument is
903 * not floating-point, it is cast to Float(32). Does not vectorize
904 * well. */
906
907/** Return the hyperbolic sine of a floating-point expression. If the
908 * argument is not floating-point, it is cast to Float(32). Does not
909 * vectorize well. */
911
912/** Return the hyperbolic arcsinhe of a floating-point expression. If
913 * the argument is not floating-point, it is cast to Float(32). Does
914 * not vectorize well. */
916
917/** Return the hyperbolic cosine of a floating-point expression. If
918 * the argument is not floating-point, it is cast to Float(32). Does
919 * not vectorize well. */
921
922/** Return the hyperbolic arccosine of a floating-point expression.
923 * If the argument is not floating-point, it is cast to
924 * Float(32). Does not vectorize well. */
926
927/** Return the hyperbolic tangent of a floating-point expression. If
928 * the argument is not floating-point, it is cast to Float(32). Does
929 * not vectorize well. */
931
932/** Return the hyperbolic arctangent of a floating-point expression.
933 * If the argument is not floating-point, it is cast to
934 * Float(32). Does not vectorize well. */
936
937/** Return the square root of a floating-point expression. If the
938 * argument is not floating-point, it is cast to Float(32). Typically
939 * vectorizes cleanly. */
941
942/** Return the square root of the sum of the squares of two
943 * floating-point expressions. If the argument is not floating-point,
944 * it is cast to Float(32). Vectorizes cleanly. */
945Expr hypot(const Expr &x, const Expr &y);
946
947/** Return the exponential of a floating-point expression. If the
948 * argument is not floating-point, it is cast to Float(32). For
949 * Float(64) arguments, this calls the system exp function, and does
950 * not vectorize well. For Float(32) arguments, this function is
951 * vectorizable, does the right thing for extremely small or extremely
952 * large inputs, and is accurate up to the last bit of the
953 * mantissa. Vectorizes cleanly. */
955
956/** Return the logarithm of a floating-point expression. If the
957 * argument is not floating-point, it is cast to Float(32). For
958 * Float(64) arguments, this calls the system log function, and does
959 * not vectorize well. For Float(32) arguments, this function is
960 * vectorizable, does the right thing for inputs <= 0 (returns -inf or
961 * nan), and is accurate up to the last bit of the
962 * mantissa. Vectorizes cleanly. */
964
965/** Return one floating point expression raised to the power of
966 * another. The type of the result is given by the type of the first
967 * argument. If the first argument is not a floating-point type, it is
968 * cast to Float(32). For Float(32), cleanly vectorizable, and
969 * accurate up to the last few bits of the mantissa. Gets worse when
970 * approaching overflow. Vectorizes cleanly. */
972
973/** Evaluate the error function erf. Only available for
974 * Float(32). Accurate up to the last three bits of the
975 * mantissa. Vectorizes cleanly. */
976Expr erf(const Expr &x);
977
978/** Fast vectorizable approximation to some trigonometric functions for
979 * Float(32). Absolute approximation error is less than 1e-5. Slow on x86 if
980 * you don't have at least sse 4.1. */
981// @{
984// @}
985
986/** Fast approximate cleanly vectorizable log for Float(32). Returns
987 * nonsense for x <= 0.0f. Accurate up to the last 5 bits of the
988 * mantissa. Vectorizes cleanly. Slow on x86 if you don't
989 * have at least sse 4.1. */
991
992/** Fast approximate cleanly vectorizable exp for Float(32). Returns
993 * nonsense for inputs that would overflow or underflow. Typically
994 * accurate up to the last 5 bits of the mantissa. Gets worse when
995 * approaching overflow. Vectorizes cleanly. Slow on x86 if you don't
996 * have at least sse 4.1. */
998
999/** Fast approximate cleanly vectorizable pow for Float(32). Returns
1000 * nonsense for x < 0.0f. Accurate up to the last 5 bits of the
1001 * mantissa for typical exponents. Gets worse when approaching
1002 * overflow. Vectorizes cleanly. Slow on x86 if you don't
1003 * have at least sse 4.1. */
1005
1006/** Fast approximate inverse for Float(32). Corresponds to the rcpps
1007 * instruction on x86, and the vrecpe instruction on ARM. Vectorizes
1008 * cleanly. Note that this can produce slightly different results
1009 * across different implementations of the same architecture (e.g. AMD vs Intel),
1010 * even when strict_float is enabled. */
1012
1013/** Fast approximate inverse square root for Float(32). Corresponds to
1014 * the rsqrtps instruction on x86, and the vrsqrte instruction on
1015 * ARM. Vectorizes cleanly. Note that this can produce slightly different results
1016 * across different implementations of the same architecture (e.g. AMD vs Intel),
1017 * even when strict_float is enabled. */
1019
1020/** Return the greatest whole number less than or equal to a
1021 * floating-point expression. If the argument is not floating-point,
1022 * it is cast to Float(32). The return value is still in floating
1023 * point, despite being a whole number. Vectorizes cleanly. */
1025
1026/** Return the least whole number greater than or equal to a
1027 * floating-point expression. If the argument is not floating-point,
1028 * it is cast to Float(32). The return value is still in floating
1029 * point, despite being a whole number. Vectorizes cleanly. */
1031
1032/** Return the whole number closest to a floating-point expression. If the
1033 * argument is not floating-point, it is cast to Float(32). The return value is
1034 * still in floating point, despite being a whole number. On ties, we round
1035 * towards the nearest even integer. Note that this is not the same as
1036 * std::round in C, which rounds away from zero. On platforms without a native
1037 * instruction for this, it is emulated, and may be more expensive than
1038 * cast<int>(x + 0.5f) or similar. */
1040
1041/** Return the integer part of a floating-point expression. If the argument is
1042 * not floating-point, it is cast to Float(32). The return value is still in
1043 * floating point, despite being a whole number. Vectorizes cleanly. */
1045
1046/** Returns true if the argument is a Not a Number (NaN). Requires a
1047 * floating point argument. Vectorizes cleanly.
1048 * Note that the Expr passed in will be evaluated in strict_float mode,
1049 * regardless of whether strict_float mode is enabled in the current Target. */
1051
1052/** Returns true if the argument is Inf or -Inf. Requires a
1053 * floating point argument. Vectorizes cleanly.
1054 * Note that the Expr passed in will be evaluated in strict_float mode,
1055 * regardless of whether strict_float mode is enabled in the current Target. */
1057
1058/** Returns true if the argument is a finite value (ie, neither NaN nor Inf).
1059 * Requires a floating point argument. Vectorizes cleanly.
1060 * Note that the Expr passed in will be evaluated in strict_float mode,
1061 * regardless of whether strict_float mode is enabled in the current Target. */
1063
1064/** Return the fractional part of a floating-point expression. If the argument
1065 * is not floating-point, it is cast to Float(32). The return value has the
1066 * same sign as the original expression. Vectorizes cleanly. */
1067Expr fract(const Expr &x);
1068
1069/** Reinterpret the bits of one value as another type. */
1071
1072template<typename T>
1074 return reinterpret(type_of<T>(), std::move(e));
1075}
1076
1077/** Return the bitwise and of two expressions (which need not have the
1078 * same type). The result type is the wider of the two expressions.
1079 * Only integral types are allowed and both expressions must be signed
1080 * or both must be unsigned. */
1082
1083/** Return the bitwise and of an expression and an integer. The type
1084 * of the result is the type of the expression argument. */
1085// @{
1088// @}
1089
1090/** Return the bitwise or of two expressions (which need not have the
1091 * same type). The result type is the wider of the two expressions.
1092 * Only integral types are allowed and both expressions must be signed
1093 * or both must be unsigned. */
1095
1096/** Return the bitwise or of an expression and an integer. The type of
1097 * the result is the type of the expression argument. */
1098// @{
1101// @}
1102
1103/** Return the bitwise xor of two expressions (which need not have the
1104 * same type). The result type is the wider of the two expressions.
1105 * Only integral types are allowed and both expressions must be signed
1106 * or both must be unsigned. */
1108
1109/** Return the bitwise xor of an expression and an integer. The type
1110 * of the result is the type of the expression argument. */
1111// @{
1114// @}
1115
1116/** Return the bitwise not of an expression. */
1118
1119/** Shift the bits of an integer value left. This is actually less
1120 * efficient than multiplying by 2^n, because Halide's optimization
1121 * passes understand multiplication, and will compile it to
1122 * shifting. This operator is only for if you really really need bit
1123 * shifting (e.g. because the exponent is a run-time parameter). The
1124 * type of the result is equal to the type of the first argument. Both
1125 * arguments must have integer type. */
1126// @{
1129// @}
1130
1131/** Shift the bits of an integer value right. Does sign extension for
1132 * signed integers. This is less efficient than dividing by a power of
1133 * two. Halide's definition of division (always round to negative
1134 * infinity) means that all divisions by powers of two get compiled to
1135 * bit-shifting, and Halide's optimization routines understand
1136 * division and can work with it. The type of the result is equal to
1137 * the type of the first argument. Both arguments must have integer
1138 * type. */
1139// @{
1142// @}
1143
1144/** Linear interpolate between the two values according to a weight.
1145 * \param zero_val The result when weight is 0
1146 * \param one_val The result when weight is 1
1147 * \param weight The interpolation amount
1148 *
1149 * Both zero_val and one_val must have the same type. All types are
1150 * supported, including bool.
1151 *
1152 * The weight is treated as its own type and must be float or an
1153 * unsigned integer type. It is scaled to the bit-size of the type of
1154 * x and y if they are integer, or converted to float if they are
1155 * float. Integer weights are converted to float via division by the
1156 * full-range value of the weight's type. Floating-point weights used
1157 * to interpolate between integer values must be between 0.0f and
1158 * 1.0f, and an error may be signaled if it is not provably so. (clamp
1159 * operators can be added to provide proof. Currently an error is only
1160 * signalled for constant weights.)
1161 *
1162 * For integer linear interpolation, out of range values cannot be
1163 * represented. In particular, weights that are conceptually less than
1164 * 0 or greater than 1.0 are not representable. As such the result is
1165 * always between x and y (inclusive of course). For lerp with
1166 * floating-point values and floating-point weight, the full range of
1167 * a float is valid, however underflow and overflow can still occur.
1168 *
1169 * Ordering is not required between zero_val and one_val:
1170 * lerp(42, 69, .5f) == lerp(69, 42, .5f) == 56
1171 *
1172 * Results for integer types are for exactly rounded arithmetic. As
1173 * such, there are cases where 16-bit and float differ because 32-bit
1174 * floating-point (float) does not have enough precision to produce
1175 * the exact result. (Likely true for 32-bit integer
1176 * vs. double-precision floating-point as well.)
1177 *
1178 * At present, double precision and 64-bit integers are not supported.
1179 *
1180 * Generally, lerp will vectorize as if it were an operation on a type
1181 * twice the bit size of the inferred type for x and y.
1182 *
1183 * Some examples:
1184 * \code
1185 *
1186 * // Since Halide does not have direct type delcarations, casts
1187 * // below are used to indicate the types of the parameters.
1188 * // Such casts not required or expected in actual code where types
1189 * // are inferred.
1190 *
1191 * lerp(cast<float>(x), cast<float>(y), cast<float>(w)) ->
1192 * x * (1.0f - w) + y * w
1193 *
1194 * lerp(cast<uint8_t>(x), cast<uint8_t>(y), cast<uint8_t>(w)) ->
1195 * cast<uint8_t>(cast<uint8_t>(x) * (1.0f - cast<uint8_t>(w) / 255.0f) +
1196 * cast<uint8_t>(y) * cast<uint8_t>(w) / 255.0f + .5f)
1197 *
1198 * // Note addition in Halide promoted uint8_t + int8_t to int16_t already,
1199 * // the outer cast is added for clarity.
1200 * lerp(cast<uint8_t>(x), cast<int8_t>(y), cast<uint8_t>(w)) ->
1201 * cast<int16_t>(cast<uint8_t>(x) * (1.0f - cast<uint8_t>(w) / 255.0f) +
1202 * cast<int8_t>(y) * cast<uint8_t>(w) / 255.0f + .5f)
1203 *
1204 * lerp(cast<int8_t>(x), cast<int8_t>(y), cast<float>(w)) ->
1205 * cast<int8_t>(cast<int8_t>(x) * (1.0f - cast<float>(w)) +
1206 * cast<int8_t>(y) * cast<uint8_t>(w))
1207 *
1208 * \endcode
1209 * */
1210Expr lerp(Expr zero_val, Expr one_val, Expr weight);
1211
1212/** Count the number of set bits in an expression. */
1214
1215/** Count the number of leading zero bits in an expression. If the expression is
1216 * zero, the result is the number of bits in the type. */
1218
1219/** Count the number of trailing zero bits in an expression. If the expression is
1220 * zero, the result is the number of bits in the type. */
1222
1223/** Divide two integers, rounding towards zero. This is the typical
1224 * behavior of most hardware architectures, which differs from
1225 * Halide's division operator, which is Euclidean (rounds towards
1226 * -infinity). Will throw a runtime error if y is zero, or if y is -1
1227 * and x is the minimum signed integer. */
1229
1230/** Compute the remainder of dividing two integers, when division is
1231 * rounding toward zero. This is the typical behavior of most hardware
1232 * architectures, which differs from Halide's mod operator, which is
1233 * Euclidean (produces the remainder when division rounds towards
1234 * -infinity). Will throw a runtime error if y is zero. */
1236
1237/** Return a random variable representing a uniformly distributed
1238 * float in the half-open interval [0.0f, 1.0f). For random numbers of
1239 * other types, use lerp with a random float as the last parameter.
1240 *
1241 * Optionally takes a seed.
1242 *
1243 * Note that:
1244 \code
1245 Expr x = random_float();
1246 Expr y = x + x;
1247 \endcode
1248 *
1249 * is very different to
1250 *
1251 \code
1252 Expr y = random_float() + random_float();
1253 \endcode
1254 *
1255 * The first doubles a random variable, and the second adds two
1256 * independent random variables.
1257 *
1258 * A given random variable takes on a unique value that depends
1259 * deterministically on the pure variables of the function they belong
1260 * to, the identity of the function itself, and which definition of
1261 * the function it is used in. They are, however, shared across tuple
1262 * elements.
1263 *
1264 * This function vectorizes cleanly.
1265 */
1267
1268/** Return a random variable representing a uniformly distributed
1269 * unsigned 32-bit integer. See \ref random_float. Vectorizes cleanly. */
1271
1272/** Return a random variable representing a uniformly distributed
1273 * 32-bit integer. See \ref random_float. Vectorizes cleanly. */
1275
1276/** Create an Expr that prints out its value whenever it is
1277 * evaluated. It also prints out everything else in the arguments
1278 * list, separated by spaces. This can include string literals. */
1279//@{
1280Expr print(const std::vector<Expr> &values);
1281
1282template<typename... Args>
1283inline HALIDE_NO_USER_CODE_INLINE Expr print(Expr a, Args &&...args) {
1284 std::vector<Expr> collected_args = {std::move(a)};
1285 Internal::collect_print_args(collected_args, std::forward<Args>(args)...);
1286 return print(collected_args);
1287}
1288//@}
1289
1290/** Create an Expr that prints whenever it is evaluated, provided that
1291 * the condition is true. */
1292// @{
1293Expr print_when(Expr condition, const std::vector<Expr> &values);
1294
1295template<typename... Args>
1296inline HALIDE_NO_USER_CODE_INLINE Expr print_when(Expr condition, Expr a, Args &&...args) {
1297 std::vector<Expr> collected_args = {std::move(a)};
1298 Internal::collect_print_args(collected_args, std::forward<Args>(args)...);
1299 return print_when(std::move(condition), collected_args);
1300}
1301
1302// @}
1303
1304/** Create an Expr that that guarantees a precondition.
1305 * If 'condition' is true, the return value is equal to the first Expr.
1306 * If 'condition' is false, halide_error() is called, and the return value
1307 * is arbitrary. Any additional arguments after the first Expr are stringified
1308 * and passed as a user-facing message to halide_error(), similar to print().
1309 *
1310 * Note that this essentially *always* inserts a runtime check into the
1311 * generated code (except when the condition can be proven at compile time);
1312 * as such, it should be avoided inside inner loops, except for debugging
1313 * or testing purposes. Note also that it does not vectorize cleanly (vector
1314 * values will be scalarized for the check).
1315 *
1316 * However, using this to make assertions about (say) input values
1317 * can be useful, both in terms of correctness and (potentially) in terms
1318 * of code generation, e.g.
1319 \code
1320 Param<int> p;
1321 Expr y = require(p > 0, p);
1322 \endcode
1323 * will allow the optimizer to assume positive, nonzero values for y.
1324 */
1325// @{
1326Expr require(Expr condition, const std::vector<Expr> &values);
1327
1328template<typename... Args>
1329inline HALIDE_NO_USER_CODE_INLINE Expr require(Expr condition, Expr value, Args &&...args) {
1330 std::vector<Expr> collected_args = {std::move(value)};
1331 Internal::collect_print_args(collected_args, std::forward<Args>(args)...);
1332 return require(std::move(condition), collected_args);
1333}
1334// @}
1335
1336/** Return an undef value of the given type. Halide skips stores that
1337 * depend on undef values, so you can use this to mean "do not modify
1338 * this memory location". This is an escape hatch that can be used for
1339 * several things:
1340 *
1341 * You can define a reduction with no pure step, by setting the pure
1342 * step to undef. Do this only if you're confident that the update
1343 * steps are sufficient to correctly fill in the domain.
1344 *
1345 * For a tuple-valued reduction, you can write an update step that
1346 * only updates some tuple elements.
1347 *
1348 * You can define single-stage pipeline that only has update steps,
1349 * and depends on the values already in the output buffer.
1350 *
1351 * Use this feature with great caution, as you can use it to load from
1352 * uninitialized memory.
1353 */
1355
1356template<typename T>
1357inline Expr undef() {
1358 return undef(type_of<T>());
1359}
1360
1361namespace Internal {
1362
1363/** Return an expression that should never be evaluated. Expressions
1364 * that depend on unreachabale values are also unreachable, and
1365 * statements that execute unreachable expressions are also considered
1366 * unreachable. */
1368
1369template<typename T>
1371 return unreachable(type_of<T>());
1372}
1373
1374} // namespace Internal
1375
1376/** Control the values used in the memoization cache key for memoize.
1377 * Normally parameters and other external dependencies are
1378 * automatically inferred and added to the cache key. The memoize_tag
1379 * operator allows computing one expression and using either the
1380 * computed value, or one or more other expressions in the cache key
1381 * instead of the parameter dependencies of the computation. The
1382 * single argument version is completely safe in that the cache key
1383 * will use the actual computed value -- it is difficult or imposible
1384 * to produce erroneous caching this way. The more-than-one argument
1385 * version allows generating cache keys that do not uniquely identify
1386 * the computation and thus can result in caching errors.
1387 *
1388 * A potential use for the single argument version is to handle a
1389 * floating-point parameter that is quantized to a small
1390 * integer. Mutliple values of the float will produce the same integer
1391 * and moving the caching to using the integer for the key is more
1392 * efficient.
1393 *
1394 * The main use for the more-than-one argument version is to provide
1395 * cache key information for Handles and ImageParams, which otherwise
1396 * are not allowed inside compute_cached operations. E.g. when passing
1397 * a group of parameters to an external array function via a Handle,
1398 * memoize_tag can be used to isolate the actual values used by that
1399 * computation. If an ImageParam is a constant image with a persistent
1400 * digest, memoize_tag can be used to key computations using that image
1401 * on the digest. */
1402// @{
1403template<typename... Args>
1404inline HALIDE_NO_USER_CODE_INLINE Expr memoize_tag(Expr result, Args &&...args) {
1405 std::vector<Expr> collected_args{std::forward<Args>(args)...};
1406 return Internal::memoize_tag_helper(std::move(result), collected_args);
1407}
1408// @}
1409
1410/** Expressions tagged with this intrinsic are considered to be part
1411 * of the steady state of some loop with a nasty beginning and end
1412 * (e.g. a boundary condition). When Halide encounters likely
1413 * intrinsics, it splits the containing loop body into three, and
1414 * tries to simplify down all conditions that lead to the likely. For
1415 * example, given the expression: select(x < 1, bar, x > 10, bar,
1416 * likely(foo)), Halide will split the loop over x into portions where
1417 * x < 1, 1 <= x <= 10, and x > 10.
1418 *
1419 * You're unlikely to want to call this directly. You probably want to
1420 * use the boundary condition helpers in the BoundaryConditions
1421 * namespace instead.
1422 */
1424
1425/** Equivalent to likely, but only triggers a loop partitioning if
1426 * found in an innermost loop. */
1428
1429/** Cast an expression to the halide type corresponding to the C++
1430 * type T. As part of the cast, clamp to the minimum and maximum
1431 * values of the result type. */
1432template<typename T>
1434 return saturating_cast(type_of<T>(), std::move(e));
1435}
1436
1437/** Cast an expression to a new type, clamping to the minimum and
1438 * maximum values of the result type. */
1440
1441/** Makes a best effort attempt to preserve IEEE floating-point
1442 * semantics in evaluating an expression. May not be implemented for
1443 * all backends. (E.g. it is difficult to do this for C++ code
1444 * generation as it depends on the compiler flags used to compile the
1445 * generated code. */
1447
1448/** Create an Expr that that promises another Expr is clamped but do
1449 * not generate code to check the assertion or modify the value. No
1450 * attempt is made to prove the bound at compile time. (If it is
1451 * proved false as a result of something else, an error might be
1452 * generated, but it is also possible the compiler will crash.) The
1453 * promised bound is used in bounds inference so it will allow
1454 * satisfying bounds checks as well as possibly aiding optimization.
1455 *
1456 * unsafe_promise_clamped returns its first argument, the Expr 'value'
1457 *
1458 * This is a very easy way to make Halide generate erroneous code if
1459 * the bound promises is not kept. Use sparingly when there is no
1460 * other way to convey the information to the compiler and it is
1461 * required for a valuable optimization.
1462 *
1463 * Unsafe promises can be checked by turning on
1464 * Target::CheckUnsafePromises. This is intended for debugging only.
1465 */
1466Expr unsafe_promise_clamped(const Expr &value, const Expr &min, const Expr &max);
1467
1468namespace Internal {
1469/**
1470 * FOR INTERNAL USE ONLY.
1471 *
1472 * An entirely unchecked version of unsafe_promise_clamped, used
1473 * inside the compiler as an annotation of the known bounds of an Expr
1474 * when it has proved something is bounded and wants to record that
1475 * fact for later passes (notably bounds inference) to exploit. This
1476 * gets introduced by GuardWithIf tail strategies, because the bounds
1477 * machinery has a hard time exploiting if statement conditions.
1478 *
1479 * Unlike unsafe_promise_clamped, this expression is
1480 * context-dependent, because 'value' might be statically bounded at
1481 * some point in the IR (e.g. due to a containing if statement), but
1482 * not elsewhere.
1483 *
1484 * This intrinsic always evaluates to its first argument. If this value is
1485 * used by a side-effecting operation and it is outside the range specified
1486 * by its second and third arguments, behavior is undefined. The compiler can
1487 * therefore assume that the value is within the range given and optimize
1488 * accordingly. Note that this permits promise_clamped to evaluate to
1489 * something outside of the range, provided that this value is not used.
1490 *
1491 * Note that this produces an intrinsic that is marked as 'pure' and thus is
1492 * allowed to be hoisted, etc.; thus, extra care must be taken with its use.
1493 **/
1494Expr promise_clamped(const Expr &value, const Expr &min, const Expr &max);
1495} // namespace Internal
1496
1497/** Scatter and gather are used for update definition which must store
1498 * multiple values to distinct locations at the same time. The
1499 * multiple expressions on the right-hand-side are bundled together
1500 * into a "gather", which must match a "scatter" the the same number
1501 * of arguments on the left-hand-size. For example, to store the
1502 * values 1 and 2 to the locations (x, y, 3) and (x, y, 4),
1503 * respectively:
1504 *
1505\code
1506f(x, y, scatter(3, 4)) = gather(1, 2);
1507\endcode
1508 *
1509 * The result of gather or scatter can be treated as an
1510 * expression. Any containing operations on it can be assumed to
1511 * distribute over the elements. If two gather expressions are
1512 * combined with an arithmetic operator (e.g. added), they combine
1513 * element-wise. The following example stores the values 2 * x, 2 * y,
1514 * and 2 * c to the locations (x + 1, y, c), (x, y + 3, c), and (x, y,
1515 * c + 2) respectively:
1516 *
1517\code
1518f(x + scatter(1, 0, 0), y + scatter(0, 3, 0), c + scatter(0, 0, 2)) = 2 * gather(x, y, c);
1519\endcode
1520*
1521* Repeated values in the scatter cause multiple stores to the same
1522* location. The stores happen in order from left to right, so the
1523* rightmost value wins. The following code is equivalent to f(x) = 5
1524*
1525\code
1526f(scatter(x, x)) = gather(3, 5);
1527\endcode
1528*
1529* Gathers are most useful for algorithms which require in-place
1530* swapping or permutation of multiple elements, or other kinds of
1531* in-place mutations that require loading multiple inputs, doing some
1532* operations to them jointly, then storing them again. The following
1533* update definition swaps the values of f at locations 3 and 5 if an
1534* input parameter p is true:
1535*
1536\code
1537f(scatter(3, 5)) = f(select(p, gather(5, 3), gather(3, 5)));
1538\endcode
1539*
1540* For more examples of the use of scatter and gather, see
1541* test/correctness/multiple_scatter.cpp
1542*
1543* It is not currently possible to use scatter and gather to write an
1544* update definition in which the *number* of values loaded or stored
1545* varies, as the size of the scatter/gather packet must be fixed a
1546* compile-time. A workaround is to make the unwanted extra operations
1547* a redundant copy of the last operation, which will be
1548* dead-code-eliminated by the compiler. For example, the following
1549* update definition swaps the values at locations 3 and 5 when the
1550* parameter p is true, and rotates the values at locations 1, 2, and 3
1551* when it is false. The load from 3 and store to 5 will be redundantly
1552* repeated:
1553*
1554\code
1555f(select(p, scatter(3, 5, 5), scatter(1, 2, 3))) = f(select(p, gather(5, 3, 3), gather(2, 3, 1)));
1556\endcode
1557*
1558* Note that in the p == true case, we redundantly load from 3 and write
1559* to 5 twice.
1560*/
1561//@{
1562Expr scatter(const std::vector<Expr> &args);
1563Expr gather(const std::vector<Expr> &args);
1564
1565template<typename... Args>
1566Expr scatter(const Expr &e, Args &&...args) {
1567 return scatter({e, std::forward<Args>(args)...});
1568}
1569
1570template<typename... Args>
1571Expr gather(const Expr &e, Args &&...args) {
1572 return gather({e, std::forward<Args>(args)...});
1573}
1574// @}
1575
1576/** Extract a contiguous subsequence of the bits of 'e', starting at the bit
1577 * index given by 'lsb', where zero is the least-significant bit, returning a
1578 * value of type 't'. Any out-of-range bits requested are filled with zeros.
1579 *
1580 * extract_bits is especially useful when one wants to load a small vector of a
1581 * wide type, and treat it as a larger vector of a smaller type. For example,
1582 * loading a vector of 32 uint8 values from a uint32 Func can be done as
1583 * follows:
1584\code
1585f8(x) = extract_bits<uint8_t>(f32(x/4), 8*(x%4));
1586f8.align_bounds(x, 4).vectorize(x, 32);
1587\endcode
1588 * Note that the align_bounds call is critical so that the narrow Exprs are
1589 * aligned to the wider Exprs. This makes the x%4 term collapse to a
1590 * constant. If f8 is an output Func, then constraining the min value of x to be
1591 * a known multiple of four would also be sufficient, e.g. via:
1592\code
1593f8.output_buffer().dim(0).set_min(0);
1594\endcode
1595 *
1596 * See test/correctness/extract_concat_bits.cpp for a complete example. */
1597// @{
1598Expr extract_bits(Type t, const Expr &e, const Expr &lsb);
1599
1600template<typename T>
1601Expr extract_bits(const Expr &e, const Expr &lsb) {
1602 return extract_bits(type_of<T>(), e, lsb);
1603}
1604// @}
1605
1606/** Given a number of Exprs of the same type, concatenate their bits producing a
1607 * single Expr of the same type code of the input but with more bits. The
1608 * number of arguments must be a power of two.
1609 *
1610 * concat_bits is especially useful when one wants to treat a Func containing
1611 * values of a narrow type as a Func containing fewer values of a wider
1612 * type. For example, the following code reinterprets vectors of 32 uint8 values
1613 * as a vector of 8 uint32s:
1614 *
1615\code
1616f32(x) = concat_bits({f8(4*x), f8(4*x + 1), f8(4*x + 2), f8(4*x + 3)});
1617f32.vectorize(x, 8);
1618\endcode
1619 *
1620 * See test/correctness/extract_concat_bits.cpp for a complete example.
1621 */
1622Expr concat_bits(const std::vector<Expr> &e);
1623
1624/** Below is a collection of intrinsics for fixed-point programming. Most of
1625 * them can be expressed via other means, but this is more natural for some, as
1626 * it avoids ghost widened intermediates that don't (or shouldn't) actually show
1627 * up in codegen, and doesn't rely on pattern-matching inside the compiler to
1628 * succeed to get good instruction selection.
1629 *
1630 * The semantics of each call are defined in terms of a non-existent 'widen' and
1631 * 'narrow' operators, which stand in for casts that double or halve the
1632 * bit-width of a type respectively.
1633 */
1634
1635/** Compute a + widen(b). */
1637
1638/** Compute a * widen(b). */
1640
1641/** Compute a - widen(b). */
1643
1644/** Compute widen(a) + widen(b). */
1646
1647/** Compute widen(a) * widen(b). a and b may have different signedness, in which
1648 * case the result is signed. */
1650
1651/** Compute widen(a) - widen(b). The result is always signed. */
1653
1654/** Compute widen(a) << b. */
1655//@{
1658//@}
1659
1660/** Compute widen(a) >> b. */
1661//@{
1664//@}
1665
1666/** Compute saturating_narrow(widening_add(a, (1 >> min(b, 0)) / 2) << b).
1667 * When b is positive indicating a left shift, the rounding term is zero. */
1668//@{
1671//@}
1672
1673/** Compute saturating_narrow(widening_add(a, (1 << max(b, 0)) / 2) >> b).
1674 * When b is negative indicating a left shift, the rounding term is zero. */
1675//@{
1678//@}
1679
1680/** Compute saturating_narrow(widen(a) + widen(b)) */
1682
1683/** Compute saturating_narrow(widen(a) - widen(b)) */
1685
1686/** Compute narrow((widen(a) + widen(b)) / 2) */
1688
1689/** Compute narrow((widen(a) + widen(b) + 1) / 2) */
1691
1692/** Compute narrow((widen(a) - widen(b)) / 2) */
1694
1695/** Compute saturating_narrow(shift_right(widening_mul(a, b), q)) */
1696//@{
1699//@}
1700
1701/** Compute saturating_narrow(rounding_shift_right(widening_mul(a, b), q)) */
1702//@{
1705//@}
1706
1707/** Return a boolean Expr for the corresponding field of the Target
1708 * being used during lowering; they can be useful in writing library
1709 * code without having to plumb a Target through call sites, so that you
1710 * can do things like
1711 \code
1712 Expr e = select(target_arch_is(Target::ARM), something, something_else);
1713 \endcode
1714 * Note that this doesn't do any checking at runtime to verify that the Target
1715 * is valid for the current hardware configuration.
1716 */
1717//@{
1721//@}
1722
1723/** Return the bit width of the Target used during lowering; this can be useful
1724 * in writing library code without having to plumb a Target through call sites,
1725 * so that you can do things like
1726 \code
1727 Expr e = select(target_bits() == 32, something, something_else);
1728 \endcode
1729 * Note that this doesn't do any checking at runtime to verify that the Target
1730 * is valid for the current hardware configuration.
1731 */
1733
1734/** Return the natural vector width for the given Type for the Target
1735 * being used during lowering; this can be useful in writing library
1736 * code without having to plumb a Target through call sites, so that you
1737 * can do things like
1738 \code
1739 f.vectorize(x, target_natural_vector_size(Float(32)));
1740 \endcode
1741 * Note that this doesn't do any checking at runtime to verify that the Target
1742 * is valid for the current hardware configuration.
1743 */
1744//@{
1746template<typename data_t>
1750//@}
1751
1752} // namespace Halide
1753
1754#endif
Defines the ConstantInterval class, and operators on it.
Base classes for Halide expressions (Halide::Expr) and statements (Halide::Internal::Stmt)
Defines the Scope class, which is used for keeping track of names in a scope while traversing IR.
Defines the structure that describes a Halide target.
Defines Tuple - the front-end handle on small arrays of expressions.
#define HALIDE_NO_USER_CODE_INLINE
Definition Util.h:47
A fragment of front-end syntax of the form f(x, y, z), where x, y, z are Vars or Exprs.
Definition Func.h:494
A common pattern when traversing Halide IR is that you need to keep track of stuff when you find a Le...
Definition Scope.h:94
static const Scope< T > & empty_scope()
A const ref to an empty scope.
Definition Scope.h:120
Create a small array of Exprs for defining and calling functions with multiple outputs.
Definition Tuple.h:18
Expr make_one(Type t)
Construct the representation of one in the given type.
T div_imp(T a, T b)
Definition IROperator.h:278
bool is_const_zero(const Expr &e)
Is the expression a const (as defined by is_const), and also equal to zero (in all lanes,...
std::optional< int64_t > as_const_int(const Expr &e)
If an expression is an IntImm or a Broadcast of an IntImm, return a its value.
Expr memoize_tag_helper(Expr result, const std::vector< Expr > &cache_key_values)
Expr make_zero(Type t)
Construct the representation of zero in the given type.
bool is_negative_const(const Expr &e)
Is the expression a const (as defined by is_const), and also strictly less than zero (in all lanes,...
bool is_undef(const Expr &e)
Is the expression an undef.
std::optional< uint64_t > as_const_uint(const Expr &e)
If an expression is a UIntImm or a Broadcast of a UIntImm, return its value.
Expr requirement_failed_error(Expr condition, const std::vector< Expr > &args)
Expr make_two(Type t)
Construct the representation of two in the given type.
void check_representable(Type t, int64_t val)
Check if a constant value can be correctly represented as the given type.
Expr halide_erf(const Expr &a)
bool is_const_one(const Expr &e)
Is the expression a const (as defined by is_const), and also equal to one (in all lanes,...
void match_types(Expr &a, Expr &b)
Coerce the two expressions to have the same type, using C-style casting rules.
double div_imp< double >(double a, double b)
Definition IROperator.h:319
ConstantInterval min(const ConstantInterval &a, const ConstantInterval &b)
std::optional< double > as_const_float(const Expr &e)
If an expression is a FloatImm or a Broadcast of a FloatImm, return its value.
Expr halide_exp(const Expr &a)
Expr make_const(Type t, int64_t val)
Construct an immediate of the given type from any numeric C++ type.
std::optional< int > is_const_power_of_two_integer(const Expr &e)
Is the expression a constant integer power of two.
bool is_positive_const(const Expr &e)
Is the expression a const (as defined by is_const), and also strictly greater than zero (in all lanes...
Expr const_true(int lanes=1)
Construct the constant boolean true.
ConstantInterval max(const ConstantInterval &a, const ConstantInterval &b)
bool is_signed_integer_overflow(const Expr &expr)
Check if an expression is a signed_integer_overflow.
T mod_imp(T a, T b)
Implementations of division and mod that are specific to Halide.
Definition IROperator.h:257
void reset_random_counters()
Reset the counters used for random-number seeds in random_float/int/uint.
Expr halide_log(const Expr &a)
Halide's vectorizable transcendentals.
bool is_pure(const Expr &e)
Does the expression 1) Take on the same value no matter where it appears in a Stmt,...
void split_into_ands(const Expr &cond, std::vector< Expr > &result)
Split a boolean condition into vector of ANDs.
Expr promise_clamped(const Expr &value, const Expr &min, const Expr &max)
FOR INTERNAL USE ONLY.
bool is_no_op(const Stmt &s)
Is the statement a no-op (which we represent as either an undefined Stmt, or as an Evaluate node of a...
Expr unwrap_tags(const Expr &e)
If the expression is a tag helper call, remove it and return the tagged expression.
float div_imp< float >(float a, float b)
Definition IROperator.h:315
Expr lossless_negate(const Expr &x)
Attempt to negate x without introducing new IR and without overflow.
Expr strided_ramp_base(const Expr &e, int stride=1)
If e is a ramp expression with stride, default 1, return the base, otherwise undefined.
Expr remove_promises(const Expr &e)
Return an Expr that is identical to the input Expr, but with all calls to promise_clamped() and unsaf...
Expr const_false(int lanes=1)
Construct the constant boolean false.
Expr lossless_cast(Type t, Expr e, const Scope< ConstantInterval > &scope=Scope< ConstantInterval >::empty_scope(), std::map< Expr, ConstantInterval, ExprCompare > *cache=nullptr)
Attempt to cast an expression to a smaller type while provably not losing information.
double mod_imp< double >(double a, double b)
Definition IROperator.h:309
Expr make_bool(bool val, int lanes=1)
Construct a boolean constant from a C++ boolean value.
HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector< Expr > &args)
Definition IROperator.h:353
void match_types_bitwise(Expr &a, Expr &b, const char *op_name)
Asserts that both expressions are integer types and are either both signed or both unsigned.
float mod_imp< float >(float a, float b)
Definition IROperator.h:303
Expr raise_to_integer_power(Expr a, int64_t b)
Raise an expression to an integer power by repeatedly multiplying it by itself.
Expr make_signed_integer_overflow(Type type)
Construct a unique signed_integer_overflow Expr.
bool is_const(const Expr &e)
Is the expression either an IntImm, a FloatImm, a StringImm, or a Cast of the same,...
Expr remove_likelies(const Expr &e)
Return an Expr that is identical to the input Expr, but with all calls to likely() and likely_if_inne...
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
auto operator>=(const Other &a, const GeneratorParam< T > &b) -> decltype(a >=(T) b)
Greater than or equal comparison between GeneratorParam<T> and any type that supports operator>= with...
Definition Generator.h:1104
Expr log(Expr x)
Return the logarithm of a floating-point expression.
Expr operator>>(Expr x, Expr y)
Shift the bits of an integer value right.
Expr ceil(Expr x)
Return the least whole number greater than or equal to a floating-point expression.
Expr widen_right_add(Expr a, Expr b)
Below is a collection of intrinsics for fixed-point programming.
Expr rounding_shift_right(Expr a, Expr b)
Compute saturating_narrow(widening_add(a, (1 << max(b, 0)) / 2) >> b).
Expr target_natural_vector_size()
HALIDE_NO_USER_CODE_INLINE Expr memoize_tag(Expr result, Args &&...args)
Control the values used in the memoization cache key for memoize.
Expr fast_log(const Expr &x)
Fast approximate cleanly vectorizable log for Float(32).
Expr count_leading_zeros(Expr x)
Count the number of leading zero bits in an expression.
Expr reinterpret(Type t, Expr e)
Reinterpret the bits of one value as another type.
Expr saturating_add(Expr a, Expr b)
Compute saturating_narrow(widen(a) + widen(b))
auto operator==(const Other &a, const GeneratorParam< T > &b) -> decltype(a==(T) b)
Equality comparison between GeneratorParam<T> and any type that supports operator== with T.
Definition Generator.h:1130
Expr fast_cos(const Expr &x)
Expr & operator*=(Expr &a, Expr b)
Modify the first expression to be the product of two expressions, without changing its type.
Expr random_uint(Expr seed=Expr())
Return a random variable representing a uniformly distributed unsigned 32-bit integer.
Expr fract(const Expr &x)
Return the fractional part of a floating-point expression.
Expr halving_add(Expr a, Expr b)
Compute narrow((widen(a) + widen(b)) / 2)
Expr & operator-=(Expr &a, Expr b)
Modify the first expression to be the difference of two expressions, without changing its type.
auto operator<(const Other &a, const GeneratorParam< T > &b) -> decltype(a<(T) b)
Less than comparison between GeneratorParam<T> and any type that supports operator< with T.
Definition Generator.h:1091
Expr widening_shift_right(Expr a, Expr b)
Compute widen(a) >> b.
Type type_of()
Construct the halide equivalent of a C type.
Definition Type.h:572
auto operator*(const Other &a, const GeneratorParam< T > &b) -> decltype(a *(T) b)
Multiplication between GeneratorParam<T> and any type that supports operator* with T.
Definition Generator.h:1039
Expr strict_float(const Expr &e)
Makes a best effort attempt to preserve IEEE floating-point semantics in evaluating an expression.
Expr trunc(Expr x)
Return the integer part of a floating-point expression.
Expr halving_sub(Expr a, Expr b)
Compute narrow((widen(a) - widen(b)) / 2)
auto operator||(const Other &a, const GeneratorParam< T > &b) -> decltype(a||(T) b)
Logical or between between GeneratorParam<T> and any type that supports operator|| with T.
Definition Generator.h:1173
Expr acosh(Expr x)
Return the hyperbolic arccosine of a floating-point expression.
Expr fast_inverse(Expr x)
Fast approximate inverse for Float(32).
Expr target_arch_is(Target::Arch arch)
Return a boolean Expr for the corresponding field of the Target being used during lowering; they can ...
Expr asin(Expr x)
Return the arcsine of a floating-point expression.
Expr rounding_shift_left(Expr a, Expr b)
Compute saturating_narrow(widening_add(a, (1 >> min(b, 0)) / 2) << b).
auto operator-(const Other &a, const GeneratorParam< T > &b) -> decltype(a -(T) b)
Subtraction between GeneratorParam<T> and any type that supports operator- with T.
Definition Generator.h:1026
Expr clamp(Expr a, const Expr &min_val, const Expr &max_val)
Clamps an expression to lie within the given bounds.
Expr hypot(const Expr &x, const Expr &y)
Return the square root of the sum of the squares of two floating-point expressions.
Expr popcount(Expr x)
Count the number of set bits in an expression.
Expr saturating_sub(Expr a, Expr b)
Compute saturating_narrow(widen(a) - widen(b))
Expr gather(const std::vector< Expr > &args)
Expr print_when(Expr condition, const std::vector< Expr > &values)
Create an Expr that prints whenever it is evaluated, provided that the condition is true.
Expr widening_shift_left(Expr a, Expr b)
Compute widen(a) << b.
Expr pow(Expr x, Expr y)
Return one floating point expression raised to the power of another.
Expr operator&(Expr x, Expr y)
Return the bitwise and of two expressions (which need not have the same type).
Expr undef()
auto operator!(const GeneratorParam< T > &a) -> decltype(!(T) a)
Not operator for GeneratorParam.
Definition Generator.h:1245
Expr lerp(Expr zero_val, Expr one_val, Expr weight)
Linear interpolate between the two values according to a weight.
Expr atan2(Expr y, Expr x)
Return the angle of a floating-point gradient.
Expr random_float(Expr seed=Expr())
Return a random variable representing a uniformly distributed float in the half-open interval [0....
Expr sin(Expr x)
Return the sine of a floating-point expression.
Expr unsafe_promise_clamped(const Expr &value, const Expr &min, const Expr &max)
Create an Expr that that promises another Expr is clamped but do not generate code to check the asser...
Expr rounding_halving_add(Expr a, Expr b)
Compute narrow((widen(a) + widen(b) + 1) / 2)
Expr extract_bits(Type t, const Expr &e, const Expr &lsb)
Extract a contiguous subsequence of the bits of 'e', starting at the bit index given by 'lsb',...
Expr concat_bits(const std::vector< Expr > &e)
Given a number of Exprs of the same type, concatenate their bits producing a single Expr of the same ...
Expr mux(const Expr &id, const std::initializer_list< Expr > &values)
Oftentimes we want to pack a list of expressions with the same type into a channel dimension,...
Expr cosh(Expr x)
Return the hyperbolic cosine of a floating-point expression.
std::ostream & operator<<(std::ostream &stream, const Expr &)
Emit an expression on an output stream (such as std::cout) in human-readable form.
Type Int(int bits, int lanes=1)
Constructing a signed integer type.
Definition Type.h:541
Expr acos(Expr x)
Return the arccosine of a floating-point expression.
Expr fast_exp(const Expr &x)
Fast approximate cleanly vectorizable exp for Float(32).
Expr widening_add(Expr a, Expr b)
Compute widen(a) + widen(b).
Expr target_os_is(Target::OS os)
Expr cos(Expr x)
Return the cosine of a floating-point expression.
auto operator+(const Other &a, const GeneratorParam< T > &b) -> decltype(a+(T) b)
Addition between GeneratorParam<T> and any type that supports operator+ with T.
Definition Generator.h:1013
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Definition Func.h:603
Expr exp(Expr x)
Return the exponential of a floating-point expression.
Expr widen_right_mul(Expr a, Expr b)
Compute a * widen(b).
Expr absd(Expr a, Expr b)
Return the absolute difference between two values.
auto operator&&(const Other &a, const GeneratorParam< T > &b) -> decltype(a &&(T) b)
Logical and between between GeneratorParam<T> and any type that supports operator&& with T.
Definition Generator.h:1156
Expr fast_sin(const Expr &x)
Fast vectorizable approximation to some trigonometric functions for Float(32).
Expr fast_pow(Expr x, Expr y)
Fast approximate cleanly vectorizable pow for Float(32).
auto operator%(const Other &a, const GeneratorParam< T > &b) -> decltype(a %(T) b)
Modulo between GeneratorParam<T> and any type that supports operator% with T.
Definition Generator.h:1065
@ C
No name mangling.
Definition Function.h:28
Expr round(Expr x)
Return the whole number closest to a floating-point expression.
Expr select(Expr condition, Expr true_value, Expr false_value)
Returns an expression similar to the ternary operator in C, except that it always evaluates all argum...
Expr count_trailing_zeros(Expr x)
Count the number of trailing zero bits in an expression.
Expr scatter(const std::vector< Expr > &args)
Scatter and gather are used for update definition which must store multiple values to distinct locati...
auto operator<=(const Other &a, const GeneratorParam< T > &b) -> decltype(a<=(T) b)
Less than or equal comparison between GeneratorParam<T> and any type that supports operator<= with T.
Definition Generator.h:1117
Expr rounding_mul_shift_right(Expr a, Expr b, Expr q)
Compute saturating_narrow(rounding_shift_right(widening_mul(a, b), q))
Expr random_int(Expr seed=Expr())
Return a random variable representing a uniformly distributed 32-bit integer.
Expr mod_round_to_zero(Expr x, Expr y)
Compute the remainder of dividing two integers, when division is rounding toward zero.
Expr & operator/=(Expr &a, Expr b)
Modify the first expression to be the ratio of two expressions, without changing its type.
Expr widening_mul(Expr a, Expr b)
Compute widen(a) * widen(b).
auto operator>(const Other &a, const GeneratorParam< T > &b) -> decltype(a >(T) b)
Greater than comparison between GeneratorParam<T> and any type that supports operator> with T.
Definition Generator.h:1078
Expr is_nan(Expr x)
Returns true if the argument is a Not a Number (NaN).
Expr asinh(Expr x)
Return the hyperbolic arcsinhe of a floating-point expression.
Expr sqrt(Expr x)
Return the square root of a floating-point expression.
Expr sinh(Expr x)
Return the hyperbolic sine of a floating-point expression.
Expr atan(Expr x)
Return the arctangent of a floating-point expression.
Expr operator|(Expr x, Expr y)
Return the bitwise or of two expressions (which need not have the same type).
auto operator!=(const Other &a, const GeneratorParam< T > &b) -> decltype(a !=(T) b)
Inequality comparison between between GeneratorParam<T> and any type that supports operator!...
Definition Generator.h:1143
Expr target_bits()
Return the bit width of the Target used during lowering; this can be useful in writing library code w...
Internal::ConstantInterval cast(Type t, const Internal::ConstantInterval &a)
Cast operators for ConstantIntervals.
Expr require(Expr condition, const std::vector< Expr > &values)
Create an Expr that that guarantees a precondition.
Expr is_inf(Expr x)
Returns true if the argument is Inf or -Inf.
Expr is_finite(Expr x)
Returns true if the argument is a finite value (ie, neither NaN nor Inf).
Expr tanh(Expr x)
Return the hyperbolic tangent of a floating-point expression.
Expr likely_if_innermost(Expr e)
Equivalent to likely, but only triggers a loop partitioning if found in an innermost loop.
Expr atanh(Expr x)
Return the hyperbolic arctangent of a floating-point expression.
Expr tan(Expr x)
Return the tangent of a floating-point expression.
Internal::ConstantInterval saturating_cast(Type t, const Internal::ConstantInterval &a)
Expr fast_inverse_sqrt(Expr x)
Fast approximate inverse square root for Float(32).
Expr print(const std::vector< Expr > &values)
Create an Expr that prints out its value whenever it is evaluated.
Expr mul_shift_right(Expr a, Expr b, Expr q)
Compute saturating_narrow(shift_right(widening_mul(a, b), q))
auto operator/(const Other &a, const GeneratorParam< T > &b) -> decltype(a/(T) b)
Division between GeneratorParam<T> and any type that supports operator/ with T.
Definition Generator.h:1052
Expr & operator+=(Expr &a, Expr b)
Modify the first expression to be the sum of two expressions, without changing its type.
Expr abs(Expr a)
Returns the absolute value of a signed integer or floating-point expression.
Expr widen_right_sub(Expr a, Expr b)
Compute a - widen(b).
Expr max(const FuncRef &a, const FuncRef &b)
Definition Func.h:606
Expr floor(Expr x)
Return the greatest whole number less than or equal to a floating-point expression.
Expr div_round_to_zero(Expr x, Expr y)
Divide two integers, rounding towards zero.
Expr widening_sub(Expr a, Expr b)
Compute widen(a) - widen(b).
Expr likely(Expr e)
Expressions tagged with this intrinsic are considered to be part of the steady state of some loop wit...
Expr operator~(Expr x)
Return the bitwise not of an expression.
Expr erf(const Expr &x)
Evaluate the error function erf.
Expr target_has_feature(Target::Feature feat)
Expr operator^(Expr x, Expr y)
Return the bitwise xor of two expressions (which need not have the same type).
unsigned __INT64_TYPE__ uint64_t
signed __INT64_TYPE__ int64_t
signed __INT32_TYPE__ int32_t
unsigned __INT8_TYPE__ uint8_t
unsigned __INT16_TYPE__ uint16_t
unsigned __INT32_TYPE__ uint32_t
signed __INT16_TYPE__ int16_t
signed __INT8_TYPE__ int8_t
A fragment of Halide syntax.
Definition Expr.h:258
A builder to help create Exprs representing halide_buffer_t structs (e.g.
Definition IROperator.h:228
std::vector< Expr > strides
Definition IROperator.h:233
std::vector< Expr > extents
Definition IROperator.h:233
A reference-counted handle to a statement node.
Definition Expr.h:427
static constexpr bool value
Definition IROperator.h:345
Feature
Optional features a target can have.
Definition Target.h:84
Arch
The architecture used by the target.
Definition Target.h:39
OS
The operating system used by the target.
Definition Target.h:23
Types in the halide type system.
Definition Type.h:283
HALIDE_ALWAYS_INLINE bool is_int() const
Is this type a signed integer type?
Definition Type.h:435
HALIDE_ALWAYS_INLINE bool is_float() const
Is this type a floating point type (float or double).
Definition Type.h:423
Class that provides a type that implements half precision floating point (IEEE754 2008 binary16) in s...
Definition Float16.h:17