Halide 21.0.0
Halide compiler and libraries
Loading...
Searching...
No Matches
CodeGen_LLVM.h
Go to the documentation of this file.
1#ifndef HALIDE_CODEGEN_LLVM_H
2#define HALIDE_CODEGEN_LLVM_H
3
4/** \file
5 *
6 * Defines the base-class for all architecture-specific code
7 * generators that use llvm.
8 */
9
10namespace llvm {
11class Value;
12class Module;
13class Function;
14class FunctionType;
15class IRBuilderDefaultInserter;
16class ConstantFolder;
17template<typename, typename>
18class IRBuilder;
19class LLVMContext;
20class Type;
21class PointerType;
22class StructType;
23class Instruction;
24class CallInst;
25class ExecutionEngine;
26class AllocaInst;
27class Constant;
28class Triple;
29class MDNode;
30class NamedMDNode;
31class DataLayout;
32class BasicBlock;
33class GlobalVariable;
34} // namespace llvm
35
36#include <map>
37#include <memory>
38#include <optional>
39#include <string>
40#include <variant>
41#include <vector>
42
43#include "IRVisitor.h"
44#include "Module.h"
45#include "Scope.h"
46#include "Target.h"
47
48namespace Halide {
49
50struct ExternSignature;
51
52namespace Internal {
53
54/** A code generator abstract base class. Actual code generators
55 * (e.g. CodeGen_X86) inherit from this. This class is responsible
56 * for taking a Halide Stmt and producing llvm bitcode, machine
57 * code in an object file, or machine code accessible through a
58 * function pointer.
59 */
60class CodeGen_LLVM : public IRVisitor {
61public:
62 /** Create an instance of CodeGen_LLVM suitable for the target. */
63 static std::unique_ptr<CodeGen_LLVM> new_for_target(const Target &target, llvm::LLVMContext &context);
64
65 /** Takes a halide Module and compiles it to an llvm Module. */
66 virtual std::unique_ptr<llvm::Module> compile(const Module &module);
67
68 /** The target we're generating code for */
69 const Target &get_target() const {
70 return target;
71 }
72
73 /** Tell the code generator which LLVM context to use. */
74 void set_context(llvm::LLVMContext &context);
75
76 /** Initialize internal llvm state for the enabled targets. */
77 static void initialize_llvm();
78
79 static std::unique_ptr<llvm::Module> compile_trampolines(
80 const Target &target,
81 llvm::LLVMContext &context,
82 const std::string &suffix,
83 const std::vector<std::pair<std::string, ExternSignature>> &externs);
84
87 }
88
89protected:
91
92 /** Compile a specific halide declaration into the llvm Module. */
93 // @{
94 virtual void compile_func(const LoweredFunc &func, const std::string &simple_name, const std::string &extern_name);
95 virtual void compile_buffer(const Buffer<> &buffer);
96 // @}
97
98 /** Helper functions for compiling Halide functions to llvm
99 * functions. begin_func performs all the work necessary to begin
100 * generating code for a function with a given argument list with
101 * the IRBuilder. A call to begin_func should be a followed by a
102 * call to end_func with the same arguments, to generate the
103 * appropriate cleanup code. */
104 // @{
105 virtual void begin_func(LinkageType linkage, const std::string &simple_name,
106 const std::string &extern_name, const std::vector<LoweredArgument> &args);
107 virtual void end_func(const std::vector<LoweredArgument> &args);
108 // @}
109
110 /** What should be passed as -mcpu (warning: implies attrs!), -mattrs,
111 * and related for compilation. The architecture-specific code generator
112 * should define these.
113 *
114 * `mcpu_target()` - target this specific CPU, in the sense of the allowed
115 * ISA sets *and* the CPU-specific tuning/assembly instruction scheduling.
116 *
117 * `mcpu_tune()` - expect that we will be running on this specific CPU,
118 * so perform CPU-specific tuning/assembly instruction scheduling, *but*
119 * DON'T sacrifice the portability, support running on other CPUs, only
120 * make use of the ISAs that are enabled by `mcpu_target()`+`mattrs()`.
121 */
122 // @{
123 virtual std::string mcpu_target() const = 0;
124 virtual std::string mcpu_tune() const = 0;
125 virtual std::string mattrs() const = 0;
126 virtual std::string mabi() const;
127 virtual bool use_soft_float_abi() const = 0;
128 virtual bool use_pic() const;
129 // @}
130
131 /** Should indexing math be promoted to 64-bit on platforms with
132 * 64-bit pointers? */
133 virtual bool promote_indices() const {
134 return true;
135 }
136
137 /** What's the natural vector bit-width to use for loads, stores, etc. */
138 virtual int native_vector_bits() const = 0;
139
140 /** Used to decide whether to break a vector up into multiple smaller
141 * operations. This is the largest size the architecture supports. */
142 virtual int maximum_vector_bits() const {
143 return native_vector_bits();
144 }
145 /** For architectures that have vscale vectors, return the constant vscale to use.
146 * Default of 0 means do not use vscale vectors. Generally will depend on
147 * the target flags and vector_bits settings.
148 */
149 virtual int target_vscale() const {
150 return 0;
151 }
152
153 /** Return the type in which arithmetic should be done for the
154 * given storage type. */
155 virtual Type upgrade_type_for_arithmetic(const Type &) const;
156
157 /** Return the type that a given Halide type should be
158 * stored/loaded from memory as. */
159 virtual Type upgrade_type_for_storage(const Type &) const;
160
161 /** Return the type that a Halide type should be passed in and out
162 * of functions as. */
164
165 std::unique_ptr<llvm::Module> module;
166 llvm::Function *function = nullptr;
167 llvm::LLVMContext *context = nullptr;
168 std::unique_ptr<llvm::IRBuilder<llvm::ConstantFolder, llvm::IRBuilderDefaultInserter>> builder;
169 llvm::Value *value = nullptr;
170 llvm::MDNode *very_likely_branch = nullptr;
171 llvm::MDNode *fast_fp_math_md = nullptr;
172 llvm::MDNode *strict_fp_math_md = nullptr;
173 std::vector<LoweredArgument> current_function_args;
174
175 bool in_strict_float = false;
176 bool any_strict_float = false;
177
178 /** Change floating-point math op emission to use fast flags. */
180
181 /** Change floating-point math op emission to use strict flags. */
183
184 /** If any_strict_float is true, sets fast math flags for the lifetime of
185 * this object, then sets them to strict on destruction. If any_strict_float
186 * is false, does nothing. Any call to an IRBuilder method that starts with
187 * "CreateF" should probably be wrapped in one of these, but it's safe to
188 * miss one - we just miss out on some optimizations. In this way codegen is
189 * designed to fail safe. */
195
196 /** The target we're generating code for */
198
199 /** Grab all the context specific internal state. */
200 virtual void init_context();
201 /** Initialize the CodeGen_LLVM internal state to compile a fresh
202 * module. This allows reuse of one CodeGen_LLVM object to compiled
203 * multiple related modules (e.g. multiple device kernels). */
204 virtual void init_module();
205
206 /** Run all of llvm's optimization passes on the module. */
208
209 /** Add an entry to the symbol table, hiding previous entries with
210 * the same name. Call this when new values come into scope. */
211 void sym_push(const std::string &name, llvm::Value *value);
212
213 /** Remove an entry for the symbol table, revealing any previous
214 * entries with the same name. Call this when values go out of
215 * scope. */
216 void sym_pop(const std::string &name);
217
218 /** Fetch an entry from the symbol table. If the symbol is not
219 * found, it either errors out (if the second arg is true), or
220 * returns nullptr. */
221 llvm::Value *sym_get(const std::string &name,
222 bool must_succeed = true) const;
223
224 /** Test if an item exists in the symbol table. */
225 bool sym_exists(const std::string &name) const;
226
227 /** Given a Halide ExternSignature, return the equivalent llvm::FunctionType. */
228 llvm::FunctionType *signature_to_type(const ExternSignature &signature);
229
230 /** Some useful llvm types */
231 // @{
232 llvm::Type *void_t = nullptr, *i1_t = nullptr, *i8_t = nullptr, *i16_t = nullptr, *i32_t = nullptr, *i64_t = nullptr, *f16_t = nullptr, *f32_t = nullptr, *f64_t = nullptr;
233 llvm::PointerType *ptr_t = nullptr;
234 llvm::StructType *halide_buffer_t_type = nullptr,
237 *metadata_t_type = nullptr,
238 *argument_t_type = nullptr,
243
244 // @}
245
246 /** Some wildcard variables used for peephole optimizations in
247 * subclasses */
248 // @{
252
253 // Wildcards for scalars.
257 // @}
258
259 /** Emit code that evaluates an expression, and return the llvm
260 * representation of the result of the expression. */
261 llvm::Value *codegen(const Expr &);
262
263 /** Emit code that runs a statement. */
264 void codegen(const Stmt &);
265
266 /** Codegen a vector Expr by codegenning each lane and combining. */
267 void scalarize(const Expr &);
268
269 /** Some destructors should always be called. Others should only
270 * be called if the pipeline is exiting with an error code. */
274
275 /* Call this at the location of object creation to register how an
276 * object should be destroyed. This does three things:
277 * 1) Emits code here that puts the object in a unique
278 * null-initialized stack slot
279 * 2) Adds an instruction to the destructor block that calls the
280 * destructor on that stack slot if it's not null.
281 * 3) Returns that stack slot, so you can neuter the destructor
282 * (by storing null to the stack slot) or destroy the object early
283 * (by calling trigger_destructor).
284 */
285 llvm::Value *register_destructor(llvm::Function *destructor_fn, llvm::Value *obj, DestructorType when);
286
287 /** Call a destructor early. Pass in the value returned by register destructor. */
288 void trigger_destructor(llvm::Function *destructor_fn, llvm::Value *stack_slot);
289
290 /** Retrieves the block containing the error handling
291 * code. Creates it if it doesn't already exist for this
292 * function. */
293 llvm::BasicBlock *get_destructor_block();
294
295 /** Codegen an assertion. If false, returns the error code (if not
296 * null), or evaluates and returns the message, which must be an
297 * Int(32) expression. */
298 // @{
299 void create_assertion(llvm::Value *condition, const Expr &message, llvm::Value *error_code = nullptr);
300 // @}
301
302 /** Codegen a block of asserts with pure conditions */
303 void codegen_asserts(const std::vector<const AssertStmt *> &asserts);
304
305 /** Return the the pipeline with the given error code. Will run
306 * the destructor block. */
307 void return_with_error_code(llvm::Value *error_code);
308
309 /** Put a string constant in the module as a global variable and return a pointer to it. */
310 llvm::Constant *create_string_constant(const std::string &str);
311
312 /** Put a binary blob in the module as a global variable and return a pointer to it. */
313 llvm::Constant *create_binary_blob(const std::vector<char> &data, const std::string &name, bool constant = true);
314
315 /** Widen an llvm scalar into an llvm vector with the given number of lanes. */
316 llvm::Value *create_broadcast(llvm::Value *, int lanes);
317
318 /** Generate a pointer into a named buffer at a given index, of a
319 * given type. The index counts according to the scalar type of
320 * the type passed in. */
321 // @{
322 llvm::Value *codegen_buffer_pointer(const std::string &buffer, Type type, llvm::Value *index);
323 llvm::Value *codegen_buffer_pointer(const std::string &buffer, Type type, Expr index);
324 llvm::Value *codegen_buffer_pointer(llvm::Value *base_address, Type type, Expr index);
325 llvm::Value *codegen_buffer_pointer(llvm::Value *base_address, Type type, llvm::Value *index);
326 // @}
327
328 /** Return type string for LLVM type using LLVM IR intrinsic type mangling.
329 * E.g. ".i32 or ".f32" for scalars, ".p0" for pointers,
330 * ".nxv4i32" for a scalable vector of four 32-bit integers,
331 * or ".v4f32" for a fixed vector of four 32-bit floats.
332 * The dot is included in the result.
333 */
334 std::string mangle_llvm_type(llvm::Type *type);
335
336 /** Turn a Halide Type into an llvm::Value representing a constant halide_type_t */
337 llvm::Value *make_halide_type_t(const Type &);
338
339 /** Mark a load or store with type-based-alias-analysis metadata
340 * so that llvm knows it can reorder loads and stores across
341 * different buffers */
342 void add_tbaa_metadata(llvm::Instruction *inst, std::string buffer, const Expr &index);
343
344 /** Get a unique name for the actual block of memory that an
345 * allocate node uses. Used so that alias analysis understands
346 * when multiple Allocate nodes shared the same memory. */
347 virtual std::string get_allocation_name(const std::string &n) {
348 return n;
349 }
350
351 /** Add the appropriate function attribute to tell LLVM that the function
352 * doesn't access memory. */
353 void function_does_not_access_memory(llvm::Function *fn);
354
355 using IRVisitor::visit;
356
357 /** Generate code for various IR nodes. These can be overridden by
358 * architecture-specific code to perform peephole
359 * optimizations. The result of each is stored in \ref value */
360 // @{
361 void visit(const IntImm *) override;
362 void visit(const UIntImm *) override;
363 void visit(const FloatImm *) override;
364 void visit(const StringImm *) override;
365 void visit(const Cast *) override;
366 void visit(const Reinterpret *) override;
367 void visit(const Variable *) override;
368 void visit(const Add *) override;
369 void visit(const Sub *) override;
370 void visit(const Mul *) override;
371 void visit(const Div *) override;
372 void visit(const Mod *) override;
373 void visit(const Min *) override;
374 void visit(const Max *) override;
375 void visit(const EQ *) override;
376 void visit(const NE *) override;
377 void visit(const LT *) override;
378 void visit(const LE *) override;
379 void visit(const GT *) override;
380 void visit(const GE *) override;
381 void visit(const And *) override;
382 void visit(const Or *) override;
383 void visit(const Not *) override;
384 void visit(const Select *) override;
385 void visit(const Load *) override;
386 void visit(const Ramp *) override;
387 void visit(const Broadcast *) override;
388 void visit(const Call *) override;
389 void visit(const Let *) override;
390 void visit(const LetStmt *) override;
391 void visit(const AssertStmt *) override;
392 void visit(const ProducerConsumer *) override;
393 void visit(const For *) override;
394 void visit(const Store *) override;
395 void visit(const Block *) override;
396 void visit(const IfThenElse *) override;
397 void visit(const Evaluate *) override;
398 void visit(const Shuffle *) override;
399 void visit(const VectorReduce *) override;
400 void visit(const Prefetch *) override;
401 void visit(const Atomic *) override;
402 // @}
403
404 /** Generate code for an allocate node. It has no default
405 * implementation - it must be handled in an architecture-specific
406 * way. */
407 void visit(const Allocate *) override = 0;
408
409 /** Generate code for a free node. It has no default
410 * implementation and must be handled in an architecture-specific
411 * way. */
412 void visit(const Free *) override = 0;
413
414 /** These IR nodes should have been removed during
415 * lowering. CodeGen_LLVM will error out if they are present */
416 // @{
417 void visit(const Provide *) override;
418 void visit(const Realize *) override;
419 // @}
420
421 /** Get the llvm type equivalent to the given halide type in the
422 * current context. */
423 virtual llvm::Type *llvm_type_of(const Type &) const;
424
425 /** Get the llvm type equivalent to a given halide type. If
426 * effective_vscale is nonzero and the type is a vector type with lanes
427 * a multiple of effective_vscale, a scalable vector type is generated
428 * with total lanes divided by effective_vscale. That is a scalable
429 * vector intended to be used with a fixed vscale of effective_vscale.
430 */
431 llvm::Type *llvm_type_of(llvm::LLVMContext *context, Halide::Type t,
432 int effective_vscale) const;
433
434 /** Perform an alloca at the function entrypoint. Will be cleaned
435 * on function exit. */
436 llvm::Value *create_alloca_at_entry(llvm::Type *type, int n,
437 bool zero_initialize = false,
438 const std::string &name = "");
439
440 /** A (very) conservative guess at the size of all alloca() storage requested
441 * (including alignment padding). It's currently meant only to be used as
442 * a very coarse way to ensure there is enough stack space when testing
443 * on the WebAssembly backend.
444 *
445 * It is *not* meant to be a useful proxy for "stack space needed", for a
446 * number of reasons:
447 * - allocas with non-overlapping lifetimes will share space
448 * - on some backends, LLVM may promote register-sized allocas into registers
449 * - while this accounts for alloca() calls we know about, it doesn't attempt
450 * to account for stack spills, function call overhead, etc.
451 */
453
454 /** The user_context argument. May be a constant null if the
455 * function is being compiled without a user context. */
456 llvm::Value *get_user_context() const;
457
458 /** Implementation of the intrinsic call to
459 * interleave_vectors. This implementation allows for interleaving
460 * an arbitrary number of vectors.*/
461 virtual llvm::Value *interleave_vectors(const std::vector<llvm::Value *> &);
462
463 /** Description of an intrinsic function overload. Overloads are resolved
464 * using both argument and return types. The scalar types of the arguments
465 * and return type must match exactly for an overload resolution to succeed. */
466 struct Intrinsic {
468 std::vector<Type> arg_types;
469 llvm::Function *impl;
470
471 Intrinsic(Type result_type, std::vector<Type> arg_types, llvm::Function *impl)
473 }
474 };
475 /** Mapping of intrinsic functions to the various overloads implementing it. */
476 std::map<std::string, std::vector<Intrinsic>> intrinsics;
477
478 /** Get an LLVM intrinsic declaration. If it doesn't exist, it will be created. */
479 llvm::Function *get_llvm_intrin(const Type &ret_type, const std::string &name, const std::vector<Type> &arg_types, bool scalars_are_vectors = false);
480 llvm::Function *get_llvm_intrin(llvm::Type *ret_type, const std::string &name, const std::vector<llvm::Type *> &arg_types);
481 /** Declare an intrinsic function that participates in overload resolution. */
482 llvm::Function *declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector<Type> arg_types, bool scalars_are_vectors = false);
483 void declare_intrin_overload(const std::string &name, const Type &ret_type, llvm::Function *impl, std::vector<Type> arg_types);
484 /** Call an overloaded intrinsic function. Returns nullptr if no suitable overload is found. */
485 llvm::Value *call_overloaded_intrin(const Type &result_type, const std::string &name, const std::vector<Expr> &args);
486
487 /** Generate a call to a vector intrinsic or runtime inlined
488 * function. The arguments are sliced up into vectors of the width
489 * given by 'intrin_lanes', the intrinsic is called on each
490 * piece, then the results (if any) are concatenated back together
491 * into the original type 't'. For the version that takes an
492 * llvm::Type *, the type may be void, so the vector width of the
493 * arguments must be specified explicitly as
494 * 'called_lanes'. */
495 // @{
496 llvm::Value *call_intrin(const Type &t, int intrin_lanes,
497 const std::string &name, std::vector<Expr>);
498 llvm::Value *call_intrin(const Type &t, int intrin_lanes,
499 llvm::Function *intrin, std::vector<Expr>);
500 llvm::Value *call_intrin(const llvm::Type *t, int intrin_lanes,
501 const std::string &name, std::vector<llvm::Value *>,
502 bool scalable_vector_result = false, bool is_reduction = false);
503 llvm::Value *call_intrin(const llvm::Type *t, int intrin_lanes,
504 llvm::Function *intrin, std::vector<llvm::Value *>,
505 bool is_reduction = false);
506 // @}
507
508 /** Take a slice of lanes out of an llvm vector. Pads with undefs
509 * if you ask for more lanes than the vector has. */
510 virtual llvm::Value *slice_vector(llvm::Value *vec, int start, int extent);
511
512 /** Concatenate a bunch of llvm vectors. Must be of the same type. */
513 virtual llvm::Value *concat_vectors(const std::vector<llvm::Value *> &);
514
515 /** Create an LLVM shuffle vectors instruction. Takes a combination of
516 * fixed or scalable vectors as input, so long as the effective lengths match,
517 * but always returns a fixed vector. */
518 virtual llvm::Value *shuffle_vectors(llvm::Value *a, llvm::Value *b,
519 const std::vector<int> &indices);
520 /** Shorthand for shuffling a single vector. */
521 llvm::Value *shuffle_vectors(llvm::Value *v, const std::vector<int> &indices);
522
523 /** Go looking for a vector version of a runtime function. Will
524 * return the best match. Matches in the following order:
525 *
526 * 1) The requested vector width.
527 *
528 * 2) The width which is the smallest power of two
529 * greater than or equal to the vector width.
530 *
531 * 3) All the factors of 2) greater than one, in decreasing order.
532 *
533 * 4) The smallest power of two not yet tried.
534 *
535 * So for a 5-wide vector, it tries: 5, 8, 4, 2, 16.
536 *
537 * If there's no match, returns (nullptr, 0).
538 */
539 std::pair<llvm::Function *, int> find_vector_runtime_function(const std::string &name, int lanes);
540
541 virtual bool supports_atomic_add(const Type &t) const;
542
543 /** Compile a horizontal reduction that starts with an explicit
544 * initial value. There are lots of complex ways to peephole
545 * optimize this pattern, especially with the proliferation of
546 * dot-product instructions, and they can usefully share logic
547 * across backends. */
548 virtual void codegen_vector_reduce(const VectorReduce *op, const Expr &init);
549
550 /** Are we inside an atomic node that uses mutex locks?
551 This is used for detecting deadlocks from nested atomics & illegal vectorization. */
553
554 /** Emit atomic store instructions? */
555 bool emit_atomic_stores = false;
556
557 /** Can we call this operation with float16 type?
558 This is used to avoid "emulated" equivalent code-gen in case target has FP16 feature **/
559 virtual bool supports_call_as_float16(const Call *op) const;
560
561 /** call_intrin does far too much to be useful and generally breaks things
562 * when one has carefully set things up for a specific architecture. This
563 * just does the bare minimum. call_intrin should be refactored and could
564 * call this, possibly with renaming of the methods. */
565 llvm::Value *simple_call_intrin(const std::string &intrin,
566 const std::vector<llvm::Value *> &args,
567 llvm::Type *result_type);
568
569 /** Ensure that a vector value is either fixed or vscale depending to match desired_type.
570 */
571 llvm::Value *normalize_fixed_scalable_vector_type(llvm::Type *desired_type, llvm::Value *result);
572
573 /** Convert between two LLVM vectors of potentially different scalable/fixed and size.
574 * Used to handle converting to/from fixed vectors that are smaller than the minimum
575 * size scalable vector. */
576 llvm::Value *convert_fixed_or_scalable_vector_type(llvm::Value *arg,
577 llvm::Type *desired_type);
578
579 /** Convert an LLVM fixed vector value to the corresponding vscale vector value. */
580 llvm::Value *fixed_to_scalable_vector_type(llvm::Value *fixed);
581
582 /** Convert an LLVM vscale vector value to the corresponding fixed vector value. */
583 llvm::Value *scalable_to_fixed_vector_type(llvm::Value *scalable);
584
585 /** Get number of vector elements, taking into account scalable vectors. Returns 1 for scalars. */
586 int get_vector_num_elements(const llvm::Type *t);
587
588 /** Interface to abstract vector code generation as LLVM is now
589 * providing multiple options to express even simple vector
590 * operations. Specifically traditional fixed length vectors, vscale
591 * based variable length vectors, and the vector predicate based approach
592 * where an explict length is passed with each instruction.
593 */
594 // @{
596 None, /// Use default for current target.
597 Fixed, /// Force use of fixed size vectors.
598 VScale, /// For use of scalable vectors.
599 };
600 llvm::Type *get_vector_type(llvm::Type *, int n,
601 VectorTypeConstraint type_constraint = VectorTypeConstraint::None) const;
602 // @}
603
604 llvm::Constant *get_splat(int lanes, llvm::Constant *value,
605 VectorTypeConstraint type_constraint = VectorTypeConstraint::None) const;
606
607 /** Make sure a value type has the same scalable/fixed vector type as a guide. */
608 // @{
609 llvm::Value *match_vector_type_scalable(llvm::Value *value, VectorTypeConstraint constraint);
610 llvm::Value *match_vector_type_scalable(llvm::Value *value, llvm::Type *guide);
611 llvm::Value *match_vector_type_scalable(llvm::Value *value, llvm::Value *guide);
612 // @}
613
614 /** Support for generating LLVM vector predication intrinsics
615 * ("@llvm.vp.*" and "@llvm.experimental.vp.*")
616 */
617 // @{
618 /** Struct to hold descriptor for an argument to a vector
619 * predicated intrinsic. This includes the value, whether the
620 * type of the argument should be mangled into the intrisic name
621 * and if so, where, and the alignment for pointer arguments. */
622 struct VPArg {
623 llvm::Value *value;
624 // If provided, put argument's type into the intrinsic name via LLVM IR type mangling.
625 std::optional<size_t> mangle_index;
627 VPArg(llvm::Value *value, std::optional<size_t> mangle_index = std::nullopt, int32_t alignment = 0)
629 }
630 };
631
632 /** Type indicating an intrinsic does not take a mask. */
633 struct NoMask {
634 };
635
636 /** Type indicating mask to use is all true -- all lanes enabled. */
638 };
639
640 /** Predication mask using the above two types for special cases
641 * and an llvm::Value for the general one. */
642 using MaskVariant = std::variant<NoMask, AllEnabledMask, llvm::Value *>;
643
644 /** Generate a vector predicated comparison intrinsic call if
645 * use_llvm_vp_intrinsics is true and result_type is a vector
646 * type. If generated, assigns result of vp intrinsic to value and
647 * returns true if it an instuction is generated, otherwise
648 * returns false. */
649 bool try_vector_predication_comparison(const std::string &name, const Type &result_type,
650 MaskVariant mask, llvm::Value *a, llvm::Value *b,
651 const char *cmp_op);
652
654 llvm::Type *type;
655 std::optional<size_t> mangle_index;
656 VPResultType(llvm::Type *type, std::optional<size_t> mangle_index = std::nullopt)
658 }
659 };
660
661 /** Generate an intrisic call if use_llvm_vp_intrinsics is true
662 * and length is greater than 1. If generated, assigns result
663 * of vp intrinsic to value and returns true if it an instuction
664 * is generated, otherwise returns false. */
665 bool try_vector_predication_intrinsic(const std::string &name, VPResultType result_type,
666 int32_t length, MaskVariant mask, std::vector<VPArg> args);
667
668 /** Controls use of vector predicated intrinsics for vector operations.
669 * Will be set by certain backends (e.g. RISC V) to control codegen. */
671 // @}
672
673 /** Generate a basic dense vector load, with an optional predicate and
674 * control over whether or not we should slice the load into native
675 * vectors. Used by CodeGen_ARM to help with vld2/3/4 emission. */
676 llvm::Value *codegen_dense_vector_load(const Load *load, llvm::Value *vpred = nullptr, bool slice_to_native = true);
677
678 /** Warning messages which we want to avoid displaying number of times */
679 enum class WarningKind {
681 };
682 std::map<WarningKind, std::string> onetime_warnings;
683
684private:
685 /** All the values in scope at the current code location during
686 * codegen. Use sym_push and sym_pop to access. */
687 Scope<llvm::Value *> symbol_table;
688
689 /** String constants already emitted to the module. Tracked to
690 * prevent emitting the same string many times. */
691 std::map<std::string, llvm::Constant *> string_constants;
692
693 /** A basic block to branch to on error that triggers all
694 * destructors. As destructors are registered, code gets added
695 * to this block. */
696 llvm::BasicBlock *destructor_block = nullptr;
697
698 /** Turn off all unsafe math flags in scopes while this is set. */
699 bool strict_float;
700
701 /** Use the LLVM large code model when this is set. */
702 bool llvm_large_code_model;
703
704 /** Cache the result of target_vscale from architecture specific implementation
705 * as this is used on every Halide to LLVM type conversion.
706 */
707 int effective_vscale = 0;
708
709 /** Assign a unique ID to each producer-consumer and for-loop node. The IDs
710 * are printed as comments in assembly and used to link visualizations with
711 * the generated assembly code within `StmtToViz`
712 */
713 int producer_consumer_id = 0;
714 int for_loop_id = 0;
715
716 /** Embed an instance of halide_filter_metadata_t in the code, using
717 * the given name (by convention, this should be ${FUNCTIONNAME}_metadata)
718 * as extern "C" linkage. Note that the return value is a function-returning-
719 * pointer-to-constant-data.
720 */
721 llvm::Function *embed_metadata_getter(const std::string &metadata_getter_name,
722 const std::string &function_name, const std::vector<LoweredArgument> &args,
723 const MetadataNameMap &metadata_name_map);
724
725 /** Embed a constant expression as a global variable. */
726 llvm::Constant *embed_constant_expr(Expr e, llvm::Type *t);
727 llvm::Constant *embed_constant_scalar_value_t(const Expr &e);
728
729 llvm::Function *add_argv_wrapper(llvm::Function *fn, const std::string &name,
730 bool result_in_argv, std::vector<bool> &arg_is_buffer);
731
732 llvm::Value *codegen_vector_load(const Type &type, const std::string &name, const Expr &base,
733 const Buffer<> &image, const Parameter &param, const ModulusRemainder &alignment,
734 llvm::Value *vpred = nullptr, bool slice_to_native = true, llvm::Value *stride = nullptr);
735
736 virtual void codegen_predicated_load(const Load *op);
737 virtual void codegen_predicated_store(const Store *op);
738
739 void codegen_atomic_rmw(const Store *op);
740
741 void init_codegen(const std::string &name);
742 std::unique_ptr<llvm::Module> finish_codegen();
743
744 /** A helper routine for generating folded vector reductions. */
745 template<typename Op>
746 bool try_to_fold_vector_reduce(const Expr &a, Expr b);
747
748 /** Records the StructType for pointer values returned from
749 * make_struct intrinsic. Required for opaque pointer support.
750 * This map should never grow without bound as each entry
751 * represents a unique struct type created by a closure or similar.
752 */
753 std::map<llvm::Value *, llvm::Type *> struct_type_recovery;
754};
755
756} // namespace Internal
757
758/** Given a Halide module, generate an llvm::Module. */
759std::unique_ptr<llvm::Module> codegen_llvm(const Module &module,
760 llvm::LLVMContext &context);
761
762} // namespace Halide
763
764#endif
Defines the base class for things that recursively walk over the IR.
Defines Module, an IR container that fully describes a Halide program.
Defines the Scope class, which is used for keeping track of names in a scope while traversing IR.
Defines the structure that describes a Halide target.
llvm::Constant * get_splat(int lanes, llvm::Constant *value, VectorTypeConstraint type_constraint=VectorTypeConstraint::None) const
void visit(const Let *) override
std::pair< llvm::Function *, int > find_vector_runtime_function(const std::string &name, int lanes)
Go looking for a vector version of a runtime function.
llvm::StructType * metadata_t_type
virtual void end_func(const std::vector< LoweredArgument > &args)
llvm::StructType * argument_t_type
llvm::Constant * create_binary_blob(const std::vector< char > &data, const std::string &name, bool constant=true)
Put a binary blob in the module as a global variable and return a pointer to it.
void visit(const Select *) override
virtual int maximum_vector_bits() const
Used to decide whether to break a vector up into multiple smaller operations.
void visit(const Or *) override
void visit(const AssertStmt *) override
bool emit_atomic_stores
Emit atomic store instructions?
void sym_pop(const std::string &name)
Remove an entry for the symbol table, revealing any previous entries with the same name.
virtual void init_context()
Grab all the context specific internal state.
virtual int target_vscale() const
For architectures that have vscale vectors, return the constant vscale to use.
llvm::StructType * device_interface_t_type
void visit(const Mul *) override
llvm::StructType * semaphore_t_type
virtual bool supports_atomic_add(const Type &t) const
virtual llvm::Type * llvm_type_of(const Type &) const
Get the llvm type equivalent to the given halide type in the current context.
std::vector< LoweredArgument > current_function_args
const Target & get_target() const
The target we're generating code for.
void return_with_error_code(llvm::Value *error_code)
Return the the pipeline with the given error code.
llvm::Value * call_intrin(const llvm::Type *t, int intrin_lanes, const std::string &name, std::vector< llvm::Value * >, bool scalable_vector_result=false, bool is_reduction=false)
virtual std::unique_ptr< llvm::Module > compile(const Module &module)
Takes a halide Module and compiles it to an llvm Module.
llvm::Value * codegen_buffer_pointer(const std::string &buffer, Type type, Expr index)
llvm::Function * get_llvm_intrin(llvm::Type *ret_type, const std::string &name, const std::vector< llvm::Type * > &arg_types)
virtual std::string mcpu_tune() const =0
bool try_vector_predication_intrinsic(const std::string &name, VPResultType result_type, int32_t length, MaskVariant mask, std::vector< VPArg > args)
Generate an intrisic call if use_llvm_vp_intrinsics is true and length is greater than 1.
void trigger_destructor(llvm::Function *destructor_fn, llvm::Value *stack_slot)
Call a destructor early.
void visit(const Reinterpret *) override
void visit(const Min *) override
void visit(const For *) override
llvm::Value * codegen_buffer_pointer(llvm::Value *base_address, Type type, llvm::Value *index)
void visit(const Not *) override
void visit(const Sub *) override
void sym_push(const std::string &name, llvm::Value *value)
Add an entry to the symbol table, hiding previous entries with the same name.
llvm::StructType * type_t_type
VectorTypeConstraint
Interface to abstract vector code generation as LLVM is now providing multiple options to express eve...
DestructorType
Some destructors should always be called.
Halide::Target target
The target we're generating code for.
virtual void begin_func(LinkageType linkage, const std::string &simple_name, const std::string &extern_name, const std::vector< LoweredArgument > &args)
Helper functions for compiling Halide functions to llvm functions.
bool try_vector_predication_comparison(const std::string &name, const Type &result_type, MaskVariant mask, llvm::Value *a, llvm::Value *b, const char *cmp_op)
Generate a vector predicated comparison intrinsic call if use_llvm_vp_intrinsics is true and result_t...
llvm::StructType * scalar_value_t_type
std::unique_ptr< llvm::IRBuilder< llvm::ConstantFolder, llvm::IRBuilderDefaultInserter > > builder
bool use_llvm_vp_intrinsics
Controls use of vector predicated intrinsics for vector operations.
llvm::Value * create_broadcast(llvm::Value *, int lanes)
Widen an llvm scalar into an llvm vector with the given number of lanes.
std::unique_ptr< llvm::Module > module
virtual std::string mcpu_target() const =0
What should be passed as -mcpu (warning: implies attrs!), -mattrs, and related for compilation.
void visit(const Max *) override
void codegen(const Stmt &)
Emit code that runs a statement.
virtual void compile_buffer(const Buffer<> &buffer)
virtual bool promote_indices() const
Should indexing math be promoted to 64-bit on platforms with 64-bit pointers?
void visit(const GE *) override
void visit(const Variable *) override
void declare_intrin_overload(const std::string &name, const Type &ret_type, llvm::Function *impl, std::vector< Type > arg_types)
llvm::Value * codegen_dense_vector_load(const Load *load, llvm::Value *vpred=nullptr, bool slice_to_native=true)
Generate a basic dense vector load, with an optional predicate and control over whether or not we sho...
void visit(const Atomic *) override
void create_assertion(llvm::Value *condition, const Expr &message, llvm::Value *error_code=nullptr)
Codegen an assertion.
virtual Type upgrade_type_for_storage(const Type &) const
Return the type that a given Halide type should be stored/loaded from memory as.
void visit(const Shuffle *) override
void visit(const Allocate *) override=0
Generate code for an allocate node.
void visit(const Prefetch *) override
void visit(const Provide *) override
These IR nodes should have been removed during lowering.
virtual Type upgrade_type_for_arithmetic(const Type &) const
Return the type in which arithmetic should be done for the given storage type.
llvm::Value * normalize_fixed_scalable_vector_type(llvm::Type *desired_type, llvm::Value *result)
Ensure that a vector value is either fixed or vscale depending to match desired_type.
void visit(const Div *) override
llvm::StructType * halide_buffer_t_type
WarningKind
Warning messages which we want to avoid displaying number of times.
void visit(const EQ *) override
virtual int native_vector_bits() const =0
What's the natural vector bit-width to use for loads, stores, etc.
llvm::Value * match_vector_type_scalable(llvm::Value *value, llvm::Type *guide)
llvm::StructType * dimension_t_type
std::map< WarningKind, std::string > onetime_warnings
static std::unique_ptr< llvm::Module > compile_trampolines(const Target &target, llvm::LLVMContext &context, const std::string &suffix, const std::vector< std::pair< std::string, ExternSignature > > &externs)
virtual void compile_func(const LoweredFunc &func, const std::string &simple_name, const std::string &extern_name)
Compile a specific halide declaration into the llvm Module.
llvm::FunctionType * signature_to_type(const ExternSignature &signature)
Given a Halide ExternSignature, return the equivalent llvm::FunctionType.
llvm::Constant * create_string_constant(const std::string &str)
Put a string constant in the module as a global variable and return a pointer to it.
virtual llvm::Value * concat_vectors(const std::vector< llvm::Value * > &)
Concatenate a bunch of llvm vectors.
llvm::LLVMContext * context
void set_strict_fp_math()
Change floating-point math op emission to use strict flags.
llvm::Value * create_alloca_at_entry(llvm::Type *type, int n, bool zero_initialize=false, const std::string &name="")
Perform an alloca at the function entrypoint.
llvm::Value * match_vector_type_scalable(llvm::Value *value, llvm::Value *guide)
virtual std::string get_allocation_name(const std::string &n)
Get a unique name for the actual block of memory that an allocate node uses.
virtual bool use_soft_float_abi() const =0
virtual std::string mabi() const
void visit(const Evaluate *) override
std::map< std::string, std::vector< Intrinsic > > intrinsics
Mapping of intrinsic functions to the various overloads implementing it.
std::string mangle_llvm_type(llvm::Type *type)
Return type string for LLVM type using LLVM IR intrinsic type mangling.
void visit(const LE *) override
llvm::Value * codegen_buffer_pointer(const std::string &buffer, Type type, llvm::Value *index)
Generate a pointer into a named buffer at a given index, of a given type.
void visit(const NE *) override
void add_tbaa_metadata(llvm::Instruction *inst, std::string buffer, const Expr &index)
Mark a load or store with type-based-alias-analysis metadata so that llvm knows it can reorder loads ...
void visit(const And *) override
virtual bool use_pic() const
void scalarize(const Expr &)
Codegen a vector Expr by codegenning each lane and combining.
void visit(const StringImm *) override
virtual void codegen_vector_reduce(const VectorReduce *op, const Expr &init)
Compile a horizontal reduction that starts with an explicit initial value.
void codegen_asserts(const std::vector< const AssertStmt * > &asserts)
Codegen a block of asserts with pure conditions.
llvm::Function * get_llvm_intrin(const Type &ret_type, const std::string &name, const std::vector< Type > &arg_types, bool scalars_are_vectors=false)
Get an LLVM intrinsic declaration.
size_t get_requested_alloca_total() const
static std::unique_ptr< CodeGen_LLVM > new_for_target(const Target &target, llvm::LLVMContext &context)
Create an instance of CodeGen_LLVM suitable for the target.
virtual Type upgrade_type_for_argument_passing(const Type &) const
Return the type that a Halide type should be passed in and out of functions as.
void visit(const GT *) override
llvm::Value * fixed_to_scalable_vector_type(llvm::Value *fixed)
Convert an LLVM fixed vector value to the corresponding vscale vector value.
void visit(const Cast *) override
void visit(const Ramp *) override
void visit(const Broadcast *) override
void visit(const Mod *) override
llvm::Value * make_halide_type_t(const Type &)
Turn a Halide Type into an llvm::Value representing a constant halide_type_t.
void visit(const Call *) override
static void initialize_llvm()
Initialize internal llvm state for the enabled targets.
llvm::Value * simple_call_intrin(const std::string &intrin, const std::vector< llvm::Value * > &args, llvm::Type *result_type)
call_intrin does far too much to be useful and generally breaks things when one has carefully set thi...
void set_context(llvm::LLVMContext &context)
Tell the code generator which LLVM context to use.
void visit(const Store *) override
int get_vector_num_elements(const llvm::Type *t)
Get number of vector elements, taking into account scalable vectors.
llvm::Value * call_intrin(const Type &t, int intrin_lanes, llvm::Function *intrin, std::vector< Expr >)
llvm::Value * register_destructor(llvm::Function *destructor_fn, llvm::Value *obj, DestructorType when)
llvm::Value * codegen(const Expr &)
Emit code that evaluates an expression, and return the llvm representation of the result of the expre...
void optimize_module()
Run all of llvm's optimization passes on the module.
llvm::Value * codegen_buffer_pointer(llvm::Value *base_address, Type type, Expr index)
void set_fast_fp_math()
Change floating-point math op emission to use fast flags.
llvm::Value * sym_get(const std::string &name, bool must_succeed=true) const
Fetch an entry from the symbol table.
void visit(const ProducerConsumer *) override
virtual llvm::Value * shuffle_vectors(llvm::Value *a, llvm::Value *b, const std::vector< int > &indices)
Create an LLVM shuffle vectors instruction.
void visit(const LT *) override
void visit(const Load *) override
std::variant< NoMask, AllEnabledMask, llvm::Value * > MaskVariant
Predication mask using the above two types for special cases and an llvm::Value for the general one.
llvm::Value * convert_fixed_or_scalable_vector_type(llvm::Value *arg, llvm::Type *desired_type)
Convert between two LLVM vectors of potentially different scalable/fixed and size.
virtual llvm::Value * slice_vector(llvm::Value *vec, int start, int extent)
Take a slice of lanes out of an llvm vector.
llvm::Function * declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector< Type > arg_types, bool scalars_are_vectors=false)
Declare an intrinsic function that participates in overload resolution.
bool inside_atomic_mutex_node
Are we inside an atomic node that uses mutex locks?
void visit(const FloatImm *) override
void visit(const IntImm *) override
Generate code for various IR nodes.
void function_does_not_access_memory(llvm::Function *fn)
Add the appropriate function attribute to tell LLVM that the function doesn't access memory.
llvm::Value * call_intrin(const Type &t, int intrin_lanes, const std::string &name, std::vector< Expr >)
Generate a call to a vector intrinsic or runtime inlined function.
llvm::BasicBlock * get_destructor_block()
Retrieves the block containing the error handling code.
virtual void init_module()
Initialize the CodeGen_LLVM internal state to compile a fresh module.
llvm::Value * get_user_context() const
The user_context argument.
virtual std::string mattrs() const =0
void visit(const Realize *) override
void visit(const VectorReduce *) override
void visit(const IfThenElse *) override
size_t requested_alloca_total
A (very) conservative guess at the size of all alloca() storage requested (including alignment paddin...
Expr wild_u1x_
Some wildcard variables used for peephole optimizations in subclasses.
llvm::StructType * pseudostack_slot_t_type
llvm::Type * llvm_type_of(llvm::LLVMContext *context, Halide::Type t, int effective_vscale) const
Get the llvm type equivalent to a given halide type.
llvm::Value * scalable_to_fixed_vector_type(llvm::Value *scalable)
Convert an LLVM vscale vector value to the corresponding fixed vector value.
llvm::Value * call_intrin(const llvm::Type *t, int intrin_lanes, llvm::Function *intrin, std::vector< llvm::Value * >, bool is_reduction=false)
bool sym_exists(const std::string &name) const
Test if an item exists in the symbol table.
llvm::Value * match_vector_type_scalable(llvm::Value *value, VectorTypeConstraint constraint)
Make sure a value type has the same scalable/fixed vector type as a guide.
void visit(const Free *) override=0
Generate code for a free node.
llvm::Value * shuffle_vectors(llvm::Value *v, const std::vector< int > &indices)
Shorthand for shuffling a single vector.
void visit(const Add *) override
void visit(const Block *) override
virtual bool supports_call_as_float16(const Call *op) const
Can we call this operation with float16 type?
void visit(const UIntImm *) override
void visit(const LetStmt *) override
llvm::Type * void_t
Some useful llvm types.
llvm::Value * call_overloaded_intrin(const Type &result_type, const std::string &name, const std::vector< Expr > &args)
Call an overloaded intrinsic function.
virtual llvm::Value * interleave_vectors(const std::vector< llvm::Value * > &)
Implementation of the intrinsic call to interleave_vectors.
llvm::Type * get_vector_type(llvm::Type *, int n, VectorTypeConstraint type_constraint=VectorTypeConstraint::None) const
virtual void visit(const IntImm *)
A common pattern when traversing Halide IR is that you need to keep track of stuff when you find a Le...
Definition Scope.h:94
A halide module.
Definition Module.h:142
A reference-counted handle to a parameter to a halide pipeline.
Definition Parameter.h:40
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
LinkageType
Type of linkage a function in a lowered Halide module can have.
Definition Module.h:52
std::unique_ptr< llvm::Module > codegen_llvm(const Module &module, llvm::LLVMContext &context)
Given a Halide module, generate an llvm::Module.
std::map< std::string, std::string > MetadataNameMap
Definition Module.h:138
signed __INT32_TYPE__ int32_t
A fragment of Halide syntax.
Definition Expr.h:258
The sum of two expressions.
Definition IR.h:56
Allocate a scratch area called with the given name, type, and size.
Definition IR.h:371
Logical and - are both expressions true.
Definition IR.h:175
If the 'condition' is false, then evaluate and return the message, which should be a call to an error...
Definition IR.h:294
Lock all the Store nodes in the body statement.
Definition IR.h:994
A sequence of statements to be executed in-order.
Definition IR.h:442
A vector with 'lanes' elements, in which every element is 'value'.
Definition IR.h:259
A function call.
Definition IR.h:490
The actual IR nodes begin here.
Definition IR.h:30
Type indicating mask to use is all true – all lanes enabled.
Intrinsic(Type result_type, std::vector< Type > arg_types, llvm::Function *impl)
Type indicating an intrinsic does not take a mask.
std::optional< size_t > mangle_index
VPArg(llvm::Value *value, std::optional< size_t > mangle_index=std::nullopt, int32_t alignment=0)
VPResultType(llvm::Type *type, std::optional< size_t > mangle_index=std::nullopt)
The ratio of two expressions.
Definition IR.h:83
Is the first expression equal to the second.
Definition IR.h:121
Evaluate and discard an expression, presumably because it has some side-effect.
Definition IR.h:476
Floating point constants.
Definition Expr.h:236
A for loop.
Definition IR.h:848
Free the resources associated with the given buffer.
Definition IR.h:413
Is the first expression greater than or equal to the second.
Definition IR.h:166
Is the first expression greater than the second.
Definition IR.h:157
An if-then-else block.
Definition IR.h:466
Integer constants.
Definition Expr.h:218
Is the first expression less than or equal to the second.
Definition IR.h:148
Is the first expression less than the second.
Definition IR.h:139
A let expression, like you might find in a functional language.
Definition IR.h:271
The statement form of a let node.
Definition IR.h:282
Load a value from a named symbol if predicate is true.
Definition IR.h:217
Definition of a lowered function.
Definition Module.h:101
The greater of two values.
Definition IR.h:112
The lesser of two values.
Definition IR.h:103
The remainder of a / b.
Definition IR.h:94
The result of modulus_remainder analysis.
The product of two expressions.
Definition IR.h:74
Is the first expression not equal to the second.
Definition IR.h:130
Logical not - true if the expression false.
Definition IR.h:193
Logical or - is at least one of the expression true.
Definition IR.h:184
Represent a multi-dimensional region of a Func or an ImageParam that needs to be prefetched.
Definition IR.h:956
This node is a helpful annotation to do with permissions.
Definition IR.h:315
This defines the value of a function at a multi-dimensional location.
Definition IR.h:354
A linear ramp vector node.
Definition IR.h:247
Allocate a multi-dimensional buffer of the given type and size.
Definition IR.h:427
Reinterpret value as another type, without affecting any of the bits (on little-endian systems).
Definition IR.h:47
A ternary operator.
Definition IR.h:204
Construct a new vector by taking elements from another sequence of vectors.
Definition IR.h:884
A reference-counted handle to a statement node.
Definition Expr.h:427
Store a 'value' to the buffer called 'name' at a given 'index' if 'predicate' is true.
Definition IR.h:333
String constants.
Definition Expr.h:245
The difference of two expressions.
Definition IR.h:65
Unsigned integer constants.
Definition Expr.h:227
A named variable.
Definition IR.h:801
Horizontally reduce a vector to a scalar or narrower vector using the given commutative and associati...
Definition IR.h:1012
A struct representing a target machine and os to generate code for.
Definition Target.h:19
Types in the halide type system.
Definition Type.h:283