Halide
CodeGen_LLVM.h
Go to the documentation of this file.
1 #ifndef HALIDE_CODEGEN_LLVM_H
2 #define HALIDE_CODEGEN_LLVM_H
3 
4 /** \file
5  *
6  * Defines the base-class for all architecture-specific code
7  * generators that use llvm.
8  */
9 
10 namespace llvm {
11 class Value;
12 class Module;
13 class Function;
14 class FunctionType;
15 class IRBuilderDefaultInserter;
16 class ConstantFolder;
17 template<typename, typename>
18 class IRBuilder;
19 class LLVMContext;
20 class Type;
21 class StructType;
22 class Instruction;
23 class CallInst;
24 class ExecutionEngine;
25 class AllocaInst;
26 class Constant;
27 class Triple;
28 class MDNode;
29 class NamedMDNode;
30 class DataLayout;
31 class BasicBlock;
32 class GlobalVariable;
33 } // namespace llvm
34 
35 #include <map>
36 #include <memory>
37 #include <optional>
38 #include <string>
39 #include <variant>
40 #include <vector>
41 
42 #include "IRVisitor.h"
43 #include "Module.h"
44 #include "Scope.h"
45 #include "Target.h"
46 
47 namespace Halide {
48 
49 struct ExternSignature;
50 
51 namespace Internal {
52 
53 /** A code generator abstract base class. Actual code generators
54  * (e.g. CodeGen_X86) inherit from this. This class is responsible
55  * for taking a Halide Stmt and producing llvm bitcode, machine
56  * code in an object file, or machine code accessible through a
57  * function pointer.
58  */
59 class CodeGen_LLVM : public IRVisitor {
60 public:
61  /** Create an instance of CodeGen_LLVM suitable for the target. */
62  static std::unique_ptr<CodeGen_LLVM> new_for_target(const Target &target, llvm::LLVMContext &context);
63 
64  /** Takes a halide Module and compiles it to an llvm Module. */
65  virtual std::unique_ptr<llvm::Module> compile(const Module &module);
66 
67  /** The target we're generating code for */
68  const Target &get_target() const {
69  return target;
70  }
71 
72  /** Tell the code generator which LLVM context to use. */
73  void set_context(llvm::LLVMContext &context);
74 
75  /** Initialize internal llvm state for the enabled targets. */
76  static void initialize_llvm();
77 
78  static std::unique_ptr<llvm::Module> compile_trampolines(
79  const Target &target,
80  llvm::LLVMContext &context,
81  const std::string &suffix,
82  const std::vector<std::pair<std::string, ExternSignature>> &externs);
83 
84  size_t get_requested_alloca_total() const {
86  }
87 
88 protected:
89  CodeGen_LLVM(const Target &t);
90 
91  /** Compile a specific halide declaration into the llvm Module. */
92  // @{
93  virtual void compile_func(const LoweredFunc &func, const std::string &simple_name, const std::string &extern_name);
94  virtual void compile_buffer(const Buffer<> &buffer);
95  // @}
96 
97  /** Helper functions for compiling Halide functions to llvm
98  * functions. begin_func performs all the work necessary to begin
99  * generating code for a function with a given argument list with
100  * the IRBuilder. A call to begin_func should be a followed by a
101  * call to end_func with the same arguments, to generate the
102  * appropriate cleanup code. */
103  // @{
104  virtual void begin_func(LinkageType linkage, const std::string &simple_name,
105  const std::string &extern_name, const std::vector<LoweredArgument> &args);
106  virtual void end_func(const std::vector<LoweredArgument> &args);
107  // @}
108 
109  /** What should be passed as -mcpu (warning: implies attrs!), -mattrs,
110  * and related for compilation. The architecture-specific code generator
111  * should define these.
112  *
113  * `mcpu_target()` - target this specific CPU, in the sense of the allowed
114  * ISA sets *and* the CPU-specific tuning/assembly instruction scheduling.
115  *
116  * `mcpu_tune()` - expect that we will be running on this specific CPU,
117  * so perform CPU-specific tuning/assembly instruction scheduling, *but*
118  * DON'T sacrifice the portability, support running on other CPUs, only
119  * make use of the ISAs that are enabled by `mcpu_target()`+`mattrs()`.
120  */
121  // @{
122  virtual std::string mcpu_target() const = 0;
123  virtual std::string mcpu_tune() const = 0;
124  virtual std::string mattrs() const = 0;
125  virtual std::string mabi() const;
126  virtual bool use_soft_float_abi() const = 0;
127  virtual bool use_pic() const;
128  // @}
129 
130  /** Should indexing math be promoted to 64-bit on platforms with
131  * 64-bit pointers? */
132  virtual bool promote_indices() const {
133  return true;
134  }
135 
136  /** What's the natural vector bit-width to use for loads, stores, etc. */
137  virtual int native_vector_bits() const = 0;
138 
139  /** Used to decide whether to break a vector up into multiple smaller
140  * operations. This is the largest size the architecture supports. */
141  virtual int maximum_vector_bits() const {
142  return native_vector_bits();
143  }
144  /** For architectures that have vscale vectors, return the constant vscale to use.
145  * Default of 0 means do not use vscale vectors. Generally will depend on
146  * the target flags and vector_bits settings.
147  */
148  virtual int target_vscale() const {
149  return 0;
150  }
151 
152  /** Return the type in which arithmetic should be done for the
153  * given storage type. */
154  virtual Type upgrade_type_for_arithmetic(const Type &) const;
155 
156  /** Return the type that a given Halide type should be
157  * stored/loaded from memory as. */
158  virtual Type upgrade_type_for_storage(const Type &) const;
159 
160  /** Return the type that a Halide type should be passed in and out
161  * of functions as. */
162  virtual Type upgrade_type_for_argument_passing(const Type &) const;
163 
164  std::unique_ptr<llvm::Module> module;
165  llvm::Function *function;
166  llvm::LLVMContext *context;
167  std::unique_ptr<llvm::IRBuilder<llvm::ConstantFolder, llvm::IRBuilderDefaultInserter>> builder;
168  llvm::Value *value;
169  llvm::MDNode *very_likely_branch;
170  llvm::MDNode *default_fp_math_md;
171  llvm::MDNode *strict_fp_math_md;
172  std::vector<LoweredArgument> current_function_args;
173 
174  /** The target we're generating code for */
176 
177  /** Grab all the context specific internal state. */
178  virtual void init_context();
179  /** Initialize the CodeGen_LLVM internal state to compile a fresh
180  * module. This allows reuse of one CodeGen_LLVM object to compiled
181  * multiple related modules (e.g. multiple device kernels). */
182  virtual void init_module();
183 
184  /** Run all of llvm's optimization passes on the module. */
185  void optimize_module();
186 
187  /** Add an entry to the symbol table, hiding previous entries with
188  * the same name. Call this when new values come into scope. */
189  void sym_push(const std::string &name, llvm::Value *value);
190 
191  /** Remove an entry for the symbol table, revealing any previous
192  * entries with the same name. Call this when values go out of
193  * scope. */
194  void sym_pop(const std::string &name);
195 
196  /** Fetch an entry from the symbol table. If the symbol is not
197  * found, it either errors out (if the second arg is true), or
198  * returns nullptr. */
199  llvm::Value *sym_get(const std::string &name,
200  bool must_succeed = true) const;
201 
202  /** Test if an item exists in the symbol table. */
203  bool sym_exists(const std::string &name) const;
204 
205  /** Given a Halide ExternSignature, return the equivalent llvm::FunctionType. */
206  llvm::FunctionType *signature_to_type(const ExternSignature &signature);
207 
208  /** Some useful llvm types */
209  // @{
210  llvm::Type *void_t, *i1_t, *i8_t, *i16_t, *i32_t, *i64_t, *f16_t, *f32_t, *f64_t;
211  llvm::StructType *halide_buffer_t_type,
220 
221  // @}
222 
223  /** Some wildcard variables used for peephole optimizations in
224  * subclasses */
225  // @{
229 
230  // Wildcards for scalars.
234  // @}
235 
236  /** Emit code that evaluates an expression, and return the llvm
237  * representation of the result of the expression. */
238  llvm::Value *codegen(const Expr &);
239 
240  /** Emit code that runs a statement. */
241  void codegen(const Stmt &);
242 
243  /** Codegen a vector Expr by codegenning each lane and combining. */
244  void scalarize(const Expr &);
245 
246  /** Some destructors should always be called. Others should only
247  * be called if the pipeline is exiting with an error code. */
251 
252  /* Call this at the location of object creation to register how an
253  * object should be destroyed. This does three things:
254  * 1) Emits code here that puts the object in a unique
255  * null-initialized stack slot
256  * 2) Adds an instruction to the destructor block that calls the
257  * destructor on that stack slot if it's not null.
258  * 3) Returns that stack slot, so you can neuter the destructor
259  * (by storing null to the stack slot) or destroy the object early
260  * (by calling trigger_destructor).
261  */
262  llvm::Value *register_destructor(llvm::Function *destructor_fn, llvm::Value *obj, DestructorType when);
263 
264  /** Call a destructor early. Pass in the value returned by register destructor. */
265  void trigger_destructor(llvm::Function *destructor_fn, llvm::Value *stack_slot);
266 
267  /** Retrieves the block containing the error handling
268  * code. Creates it if it doesn't already exist for this
269  * function. */
270  llvm::BasicBlock *get_destructor_block();
271 
272  /** Codegen an assertion. If false, returns the error code (if not
273  * null), or evaluates and returns the message, which must be an
274  * Int(32) expression. */
275  // @{
276  void create_assertion(llvm::Value *condition, const Expr &message, llvm::Value *error_code = nullptr);
277  // @}
278 
279  /** Codegen a block of asserts with pure conditions */
280  void codegen_asserts(const std::vector<const AssertStmt *> &asserts);
281 
282  /** Return the the pipeline with the given error code. Will run
283  * the destructor block. */
284  void return_with_error_code(llvm::Value *error_code);
285 
286  /** Put a string constant in the module as a global variable and return a pointer to it. */
287  llvm::Constant *create_string_constant(const std::string &str);
288 
289  /** Put a binary blob in the module as a global variable and return a pointer to it. */
290  llvm::Constant *create_binary_blob(const std::vector<char> &data, const std::string &name, bool constant = true);
291 
292  /** Widen an llvm scalar into an llvm vector with the given number of lanes. */
293  llvm::Value *create_broadcast(llvm::Value *, int lanes);
294 
295  /** Generate a pointer into a named buffer at a given index, of a
296  * given type. The index counts according to the scalar type of
297  * the type passed in. */
298  // @{
299  llvm::Value *codegen_buffer_pointer(const std::string &buffer, Type type, llvm::Value *index);
300  llvm::Value *codegen_buffer_pointer(const std::string &buffer, Type type, Expr index);
301  llvm::Value *codegen_buffer_pointer(llvm::Value *base_address, Type type, Expr index);
302  llvm::Value *codegen_buffer_pointer(llvm::Value *base_address, Type type, llvm::Value *index);
303  // @}
304 
305  /** Return type string for LLVM type using LLVM IR intrinsic type mangling.
306  * E.g. ".i32 or ".f32" for scalars, ".p0" for pointers,
307  * ".nxv4i32" for a scalable vector of four 32-bit integers,
308  * or ".v4f32" for a fixed vector of four 32-bit floats.
309  * The dot is included in the result.
310  */
311  std::string mangle_llvm_type(llvm::Type *type);
312 
313  /** Turn a Halide Type into an llvm::Value representing a constant halide_type_t */
314  llvm::Value *make_halide_type_t(const Type &);
315 
316  /** Mark a load or store with type-based-alias-analysis metadata
317  * so that llvm knows it can reorder loads and stores across
318  * different buffers */
319  void add_tbaa_metadata(llvm::Instruction *inst, std::string buffer, const Expr &index);
320 
321  /** Get a unique name for the actual block of memory that an
322  * allocate node uses. Used so that alias analysis understands
323  * when multiple Allocate nodes shared the same memory. */
324  virtual std::string get_allocation_name(const std::string &n) {
325  return n;
326  }
327 
328  /** Add the appropriate function attribute to tell LLVM that the function
329  * doesn't access memory. */
330  void function_does_not_access_memory(llvm::Function *fn);
331 
332  using IRVisitor::visit;
333 
334  /** Generate code for various IR nodes. These can be overridden by
335  * architecture-specific code to perform peephole
336  * optimizations. The result of each is stored in \ref value */
337  // @{
338  void visit(const IntImm *) override;
339  void visit(const UIntImm *) override;
340  void visit(const FloatImm *) override;
341  void visit(const StringImm *) override;
342  void visit(const Cast *) override;
343  void visit(const Reinterpret *) override;
344  void visit(const Variable *) override;
345  void visit(const Add *) override;
346  void visit(const Sub *) override;
347  void visit(const Mul *) override;
348  void visit(const Div *) override;
349  void visit(const Mod *) override;
350  void visit(const Min *) override;
351  void visit(const Max *) override;
352  void visit(const EQ *) override;
353  void visit(const NE *) override;
354  void visit(const LT *) override;
355  void visit(const LE *) override;
356  void visit(const GT *) override;
357  void visit(const GE *) override;
358  void visit(const And *) override;
359  void visit(const Or *) override;
360  void visit(const Not *) override;
361  void visit(const Select *) override;
362  void visit(const Load *) override;
363  void visit(const Ramp *) override;
364  void visit(const Broadcast *) override;
365  void visit(const Call *) override;
366  void visit(const Let *) override;
367  void visit(const LetStmt *) override;
368  void visit(const AssertStmt *) override;
369  void visit(const ProducerConsumer *) override;
370  void visit(const For *) override;
371  void visit(const Store *) override;
372  void visit(const Block *) override;
373  void visit(const IfThenElse *) override;
374  void visit(const Evaluate *) override;
375  void visit(const Shuffle *) override;
376  void visit(const VectorReduce *) override;
377  void visit(const Prefetch *) override;
378  void visit(const Atomic *) override;
379  // @}
380 
381  /** Generate code for an allocate node. It has no default
382  * implementation - it must be handled in an architecture-specific
383  * way. */
384  void visit(const Allocate *) override = 0;
385 
386  /** Generate code for a free node. It has no default
387  * implementation and must be handled in an architecture-specific
388  * way. */
389  void visit(const Free *) override = 0;
390 
391  /** These IR nodes should have been removed during
392  * lowering. CodeGen_LLVM will error out if they are present */
393  // @{
394  void visit(const Provide *) override;
395  void visit(const Realize *) override;
396  // @}
397 
398  /** If we have to bail out of a pipeline midway, this should
399  * inject the appropriate target-specific cleanup code. */
400  virtual void prepare_for_early_exit() {
401  }
402 
403  /** Get the llvm type equivalent to the given halide type in the
404  * current context. */
405  virtual llvm::Type *llvm_type_of(const Type &) const;
406 
407  /** Get the llvm type equivalent to a given halide type. If
408  * effective_vscale is nonzero and the type is a vector type with lanes
409  * a multiple of effective_vscale, a scalable vector type is generated
410  * with total lanes divided by effective_vscale. That is a scalable
411  * vector intended to be used with a fixed vscale of effective_vscale.
412  */
413  llvm::Type *llvm_type_of(llvm::LLVMContext *context, Halide::Type t,
414  int effective_vscale) const;
415 
416  /** Perform an alloca at the function entrypoint. Will be cleaned
417  * on function exit. */
418  llvm::Value *create_alloca_at_entry(llvm::Type *type, int n,
419  bool zero_initialize = false,
420  const std::string &name = "");
421 
422  /** A (very) conservative guess at the size of all alloca() storage requested
423  * (including alignment padding). It's currently meant only to be used as
424  * a very coarse way to ensure there is enough stack space when testing
425  * on the WebAssembly backend.
426  *
427  * It is *not* meant to be a useful proxy for "stack space needed", for a
428  * number of reasons:
429  * - allocas with non-overlapping lifetimes will share space
430  * - on some backends, LLVM may promote register-sized allocas into registers
431  * - while this accounts for alloca() calls we know about, it doesn't attempt
432  * to account for stack spills, function call overhead, etc.
433  */
435 
436  /** The user_context argument. May be a constant null if the
437  * function is being compiled without a user context. */
438  llvm::Value *get_user_context() const;
439 
440  /** Implementation of the intrinsic call to
441  * interleave_vectors. This implementation allows for interleaving
442  * an arbitrary number of vectors.*/
443  virtual llvm::Value *interleave_vectors(const std::vector<llvm::Value *> &);
444 
445  /** Description of an intrinsic function overload. Overloads are resolved
446  * using both argument and return types. The scalar types of the arguments
447  * and return type must match exactly for an overload resolution to succeed. */
448  struct Intrinsic {
450  std::vector<Type> arg_types;
451  llvm::Function *impl;
452 
453  Intrinsic(Type result_type, std::vector<Type> arg_types, llvm::Function *impl)
454  : result_type(result_type), arg_types(std::move(arg_types)), impl(impl) {
455  }
456  };
457  /** Mapping of intrinsic functions to the various overloads implementing it. */
458  std::map<std::string, std::vector<Intrinsic>> intrinsics;
459 
460  /** Get an LLVM intrinsic declaration. If it doesn't exist, it will be created. */
461  llvm::Function *get_llvm_intrin(const Type &ret_type, const std::string &name, const std::vector<Type> &arg_types, bool scalars_are_vectors = false);
462  llvm::Function *get_llvm_intrin(llvm::Type *ret_type, const std::string &name, const std::vector<llvm::Type *> &arg_types);
463  /** Declare an intrinsic function that participates in overload resolution. */
464  llvm::Function *declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector<Type> arg_types, bool scalars_are_vectors = false);
465  void declare_intrin_overload(const std::string &name, const Type &ret_type, llvm::Function *impl, std::vector<Type> arg_types);
466  /** Call an overloaded intrinsic function. Returns nullptr if no suitable overload is found. */
467  llvm::Value *call_overloaded_intrin(const Type &result_type, const std::string &name, const std::vector<Expr> &args);
468 
469  /** Generate a call to a vector intrinsic or runtime inlined
470  * function. The arguments are sliced up into vectors of the width
471  * given by 'intrin_lanes', the intrinsic is called on each
472  * piece, then the results (if any) are concatenated back together
473  * into the original type 't'. For the version that takes an
474  * llvm::Type *, the type may be void, so the vector width of the
475  * arguments must be specified explicitly as
476  * 'called_lanes'. */
477  // @{
478  llvm::Value *call_intrin(const Type &t, int intrin_lanes,
479  const std::string &name, std::vector<Expr>);
480  llvm::Value *call_intrin(const Type &t, int intrin_lanes,
481  llvm::Function *intrin, std::vector<Expr>);
482  llvm::Value *call_intrin(const llvm::Type *t, int intrin_lanes,
483  const std::string &name, std::vector<llvm::Value *>,
484  bool scalable_vector_result = false, bool is_reduction = false);
485  llvm::Value *call_intrin(const llvm::Type *t, int intrin_lanes,
486  llvm::Function *intrin, std::vector<llvm::Value *>,
487  bool is_reduction = false);
488  // @}
489 
490  /** Take a slice of lanes out of an llvm vector. Pads with undefs
491  * if you ask for more lanes than the vector has. */
492  virtual llvm::Value *slice_vector(llvm::Value *vec, int start, int extent);
493 
494  /** Concatenate a bunch of llvm vectors. Must be of the same type. */
495  virtual llvm::Value *concat_vectors(const std::vector<llvm::Value *> &);
496 
497  /** Create an LLVM shuffle vectors instruction. */
498  virtual llvm::Value *shuffle_vectors(llvm::Value *a, llvm::Value *b,
499  const std::vector<int> &indices);
500  /** Shorthand for shuffling a single vector. */
501  llvm::Value *shuffle_vectors(llvm::Value *v, const std::vector<int> &indices);
502 
503  /** Go looking for a vector version of a runtime function. Will
504  * return the best match. Matches in the following order:
505  *
506  * 1) The requested vector width.
507  *
508  * 2) The width which is the smallest power of two
509  * greater than or equal to the vector width.
510  *
511  * 3) All the factors of 2) greater than one, in decreasing order.
512  *
513  * 4) The smallest power of two not yet tried.
514  *
515  * So for a 5-wide vector, it tries: 5, 8, 4, 2, 16.
516  *
517  * If there's no match, returns (nullptr, 0).
518  */
519  std::pair<llvm::Function *, int> find_vector_runtime_function(const std::string &name, int lanes);
520 
521  virtual bool supports_atomic_add(const Type &t) const;
522 
523  /** Compile a horizontal reduction that starts with an explicit
524  * initial value. There are lots of complex ways to peephole
525  * optimize this pattern, especially with the proliferation of
526  * dot-product instructions, and they can usefully share logic
527  * across backends. */
528  virtual void codegen_vector_reduce(const VectorReduce *op, const Expr &init);
529 
530  /** Are we inside an atomic node that uses mutex locks?
531  This is used for detecting deadlocks from nested atomics & illegal vectorization. */
533 
534  /** Emit atomic store instructions? */
536 
537  /** Can we call this operation with float16 type?
538  This is used to avoid "emulated" equivalent code-gen in case target has FP16 feature **/
539  virtual bool supports_call_as_float16(const Call *op) const;
540 
541  /** call_intrin does far too much to be useful and generally breaks things
542  * when one has carefully set things up for a specific architecture. This
543  * just does the bare minimum. call_intrin should be refactored and could
544  * call this, possibly with renaming of the methods. */
545  llvm::Value *simple_call_intrin(const std::string &intrin,
546  const std::vector<llvm::Value *> &args,
547  llvm::Type *result_type);
548 
549  /** Ensure that a vector value is either fixed or vscale depending to match desired_type.
550  */
551  llvm::Value *normalize_fixed_scalable_vector_type(llvm::Type *desired_type, llvm::Value *result);
552 
553  /** Convert between two LLVM vectors of potentially different scalable/fixed and size.
554  * Used to handle converting to/from fixed vectors that are smaller than the minimum
555  * size scalable vector. */
556  llvm::Value *convert_fixed_or_scalable_vector_type(llvm::Value *arg,
557  llvm::Type *desired_type);
558 
559  /** Convert an LLVM fixed vector value to the corresponding vscale vector value. */
560  llvm::Value *fixed_to_scalable_vector_type(llvm::Value *fixed);
561 
562  /** Convert an LLVM vscale vector value to the corresponding fixed vector value. */
563  llvm::Value *scalable_to_fixed_vector_type(llvm::Value *scalable);
564 
565  /** Get number of vector elements, taking into account scalable vectors. Returns 1 for scalars. */
566  int get_vector_num_elements(const llvm::Type *t);
567 
568  /** Interface to abstract vector code generation as LLVM is now
569  * providing multiple options to express even simple vector
570  * operations. Specifically traditional fixed length vectors, vscale
571  * based variable length vectors, and the vector predicate based approach
572  * where an explict length is passed with each instruction.
573  */
574  // @{
575  enum class VectorTypeConstraint {
576  None, /// Use default for current target.
577  Fixed, /// Force use of fixed size vectors.
578  VScale, /// For use of scalable vectors.
579  };
580  llvm::Type *get_vector_type(llvm::Type *, int n,
581  VectorTypeConstraint type_constraint = VectorTypeConstraint::None) const;
582  // @}
583 
584  llvm::Constant *get_splat(int lanes, llvm::Constant *value,
585  VectorTypeConstraint type_constraint = VectorTypeConstraint::None) const;
586 
587  /** Support for generating LLVM vector predication intrinsics
588  * ("@llvm.vp.*" and "@llvm.experimental.vp.*")
589  */
590  // @{
591  /** Struct to hold descriptor for an argument to a vector
592  * predicated intrinsic. This includes the value, whether the
593  * type of the argument should be mangled into the intrisic name
594  * and if so, where, and the alignment for pointer arguments. */
595  struct VPArg {
596  llvm::Value *value;
597  // If provided, put argument's type into the intrinsic name via LLVM IR type mangling.
598  std::optional<size_t> mangle_index;
600  VPArg(llvm::Value *value, std::optional<size_t> mangle_index = std::nullopt, int32_t alignment = 0)
602  }
603  };
604 
605  /** Type indicating an intrinsic does not take a mask. */
606  struct NoMask {
607  };
608 
609  /** Type indicating mask to use is all true -- all lanes enabled. */
610  struct AllEnabledMask {
611  };
612 
613  /** Predication mask using the above two types for special cases
614  * and an llvm::Value for the general one. */
615  using MaskVariant = std::variant<NoMask, AllEnabledMask, llvm::Value *>;
616 
617  /** Generate a vector predicated comparison intrinsic call if
618  * use_llvm_vp_intrinsics is true and result_type is a vector
619  * type. If generated, assigns result of vp intrinsic to value and
620  * returns true if it an instuction is generated, otherwise
621  * returns false. */
622  bool try_vector_predication_comparison(const std::string &name, const Type &result_type,
623  MaskVariant mask, llvm::Value *a, llvm::Value *b,
624  const char *cmp_op);
625 
626  struct VPResultType {
627  llvm::Type *type;
628  std::optional<size_t> mangle_index;
629  VPResultType(llvm::Type *type, std::optional<size_t> mangle_index = std::nullopt)
631  }
632  };
633 
634  /** Generate an intrisic call if use_llvm_vp_intrinsics is true
635  * and length is greater than 1. If generated, assigns result
636  * of vp intrinsic to value and returns true if it an instuction
637  * is generated, otherwise returns false. */
638  bool try_vector_predication_intrinsic(const std::string &name, VPResultType result_type,
639  int32_t length, MaskVariant mask, std::vector<VPArg> args);
640 
641  /** Controls use of vector predicated intrinsics for vector operations.
642  * Will be set by certain backends (e.g. RISC V) to control codegen. */
644  // @}
645 
646  /** Generate a basic dense vector load, with an optional predicate and
647  * control over whether or not we should slice the load into native
648  * vectors. Used by CodeGen_ARM to help with vld2/3/4 emission. */
649  llvm::Value *codegen_dense_vector_load(const Load *load, llvm::Value *vpred = nullptr, bool slice_to_native = true);
650 
651  /** Warning messages which we want to avoid displaying number of times */
652  enum class WarningKind {
654  };
655  std::map<WarningKind, std::string> onetime_warnings;
656 
657 private:
658  /** All the values in scope at the current code location during
659  * codegen. Use sym_push and sym_pop to access. */
660  Scope<llvm::Value *> symbol_table;
661 
662  /** String constants already emitted to the module. Tracked to
663  * prevent emitting the same string many times. */
664  std::map<std::string, llvm::Constant *> string_constants;
665 
666  /** A basic block to branch to on error that triggers all
667  * destructors. As destructors are registered, code gets added
668  * to this block. */
669  llvm::BasicBlock *destructor_block;
670 
671  /** Turn off all unsafe math flags in scopes while this is set. */
672  bool strict_float;
673 
674  /** Use the LLVM large code model when this is set. */
675  bool llvm_large_code_model;
676 
677  /** Cache the result of target_vscale from architecture specific implementation
678  * as this is used on every Halide to LLVM type conversion.
679  */
680  int effective_vscale;
681 
682  /** Assign a unique ID to each producer-consumer and for-loop node. The IDs
683  * are printed as comments in assembly and used to link visualizations with
684  * the generated assembly code within `StmtToViz`
685  */
686  int producer_consumer_id = 0;
687  int for_loop_id = 0;
688 
689  /** Embed an instance of halide_filter_metadata_t in the code, using
690  * the given name (by convention, this should be ${FUNCTIONNAME}_metadata)
691  * as extern "C" linkage. Note that the return value is a function-returning-
692  * pointer-to-constant-data.
693  */
694  llvm::Function *embed_metadata_getter(const std::string &metadata_getter_name,
695  const std::string &function_name, const std::vector<LoweredArgument> &args,
696  const MetadataNameMap &metadata_name_map);
697 
698  /** Embed a constant expression as a global variable. */
699  llvm::Constant *embed_constant_expr(Expr e, llvm::Type *t);
700  llvm::Constant *embed_constant_scalar_value_t(const Expr &e);
701 
702  llvm::Function *add_argv_wrapper(llvm::Function *fn, const std::string &name,
703  bool result_in_argv, std::vector<bool> &arg_is_buffer);
704 
705  llvm::Value *codegen_vector_load(const Type &type, const std::string &name, const Expr &base,
706  const Buffer<> &image, const Parameter &param, const ModulusRemainder &alignment,
707  llvm::Value *vpred = nullptr, bool slice_to_native = true, llvm::Value *stride = nullptr);
708 
709  virtual void codegen_predicated_load(const Load *op);
710  virtual void codegen_predicated_store(const Store *op);
711 
712  void codegen_atomic_rmw(const Store *op);
713 
714  void init_codegen(const std::string &name, bool any_strict_float = false);
715  std::unique_ptr<llvm::Module> finish_codegen();
716 
717  /** A helper routine for generating folded vector reductions. */
718  template<typename Op>
719  bool try_to_fold_vector_reduce(const Expr &a, Expr b);
720 
721  /** Records the StructType for pointer values returned from
722  * make_struct intrinsic. Required for opaque pointer support.
723  * This map should never grow without bound as each entry
724  * represents a unique struct type created by a closure or similar.
725  */
726  std::map<llvm::Value *, llvm::Type *> struct_type_recovery;
727 };
728 
729 } // namespace Internal
730 
731 /** Given a Halide module, generate an llvm::Module. */
732 std::unique_ptr<llvm::Module> codegen_llvm(const Module &module,
733  llvm::LLVMContext &context);
734 
735 } // namespace Halide
736 
737 #endif
int32_t
signed __INT32_TYPE__ int32_t
Definition: runtime_internal.h:24
Halide::Internal::CodeGen_LLVM::maximum_vector_bits
virtual int maximum_vector_bits() const
Used to decide whether to break a vector up into multiple smaller operations.
Definition: CodeGen_LLVM.h:141
Halide::Internal::CodeGen_LLVM::NoMask
Type indicating an intrinsic does not take a mask.
Definition: CodeGen_LLVM.h:606
Halide::Internal::CodeGen_LLVM::upgrade_type_for_argument_passing
virtual Type upgrade_type_for_argument_passing(const Type &) const
Return the type that a Halide type should be passed in and out of functions as.
Halide::Internal::Allocate
Allocate a scratch area called with the given name, type, and size.
Definition: IR.h:363
Halide::Internal::IRMatcher::intrin
HALIDE_ALWAYS_INLINE auto intrin(Call::IntrinsicOp intrinsic_op, Args... args) noexcept -> Intrin< decltype(pattern_arg(args))... >
Definition: IRMatch.h:1534
Halide::Internal::Add
The sum of two expressions.
Definition: IR.h:48
Scope.h
llvm
Definition: CodeGen_Internal.h:18
Halide::Internal::CodeGen_LLVM::wild_f32_
Expr wild_f32_
Definition: CodeGen_LLVM.h:233
Halide::Internal::CodeGen_LLVM::prepare_for_early_exit
virtual void prepare_for_early_exit()
If we have to bail out of a pipeline midway, this should inject the appropriate target-specific clean...
Definition: CodeGen_LLVM.h:400
Halide::Internal::CodeGen_LLVM::wild_u8x_
Expr wild_u8x_
Definition: CodeGen_LLVM.h:226
Halide::Internal::CodeGen_LLVM::mcpu_target
virtual std::string mcpu_target() const =0
What should be passed as -mcpu (warning: implies attrs!), -mattrs, and related for compilation.
Halide::Internal::CodeGen_LLVM
A code generator abstract base class.
Definition: CodeGen_LLVM.h:59
Halide::Internal::CodeGen_LLVM::wild_u8_
Expr wild_u8_
Definition: CodeGen_LLVM.h:231
Halide::Internal::IRVisitor::visit
virtual void visit(const IntImm *)
Halide::Internal::Monotonic::Constant
@ Constant
Halide::Internal::CodeGen_LLVM::sym_pop
void sym_pop(const std::string &name)
Remove an entry for the symbol table, revealing any previous entries with the same name.
Halide::Internal::CodeGen_LLVM::wild_i64x_
Expr wild_i64x_
Definition: CodeGen_LLVM.h:227
Halide::Internal::CodeGen_LLVM::compile_buffer
virtual void compile_buffer(const Buffer<> &buffer)
Halide::Internal::CodeGen_LLVM::set_context
void set_context(llvm::LLVMContext &context)
Tell the code generator which LLVM context to use.
Halide::Internal::CodeGen_LLVM::signature_to_type
llvm::FunctionType * signature_to_type(const ExternSignature &signature)
Given a Halide ExternSignature, return the equivalent llvm::FunctionType.
Halide::Internal::CodeGen_LLVM::initialize_llvm
static void initialize_llvm()
Initialize internal llvm state for the enabled targets.
Halide::Internal::ArgInfoKind::Function
@ Function
Halide::Internal::CodeGen_LLVM::shuffle_vectors
virtual llvm::Value * shuffle_vectors(llvm::Value *a, llvm::Value *b, const std::vector< int > &indices)
Create an LLVM shuffle vectors instruction.
Halide::Internal::CodeGen_LLVM::wild_i16x_
Expr wild_i16x_
Definition: CodeGen_LLVM.h:226
Halide::Internal::VectorReduce
Horizontally reduce a vector to a scalar or narrower vector using the given commutative and associati...
Definition: IR.h:929
Halide::Internal::CodeGen_LLVM::convert_fixed_or_scalable_vector_type
llvm::Value * convert_fixed_or_scalable_vector_type(llvm::Value *arg, llvm::Type *desired_type)
Convert between two LLVM vectors of potentially different scalable/fixed and size.
Halide::Internal::CodeGen_LLVM::inside_atomic_mutex_node
bool inside_atomic_mutex_node
Are we inside an atomic node that uses mutex locks? This is used for detecting deadlocks from nested ...
Definition: CodeGen_LLVM.h:532
Halide::Internal::CodeGen_LLVM::sym_get
llvm::Value * sym_get(const std::string &name, bool must_succeed=true) const
Fetch an entry from the symbol table.
Halide::Internal::Parameter
A reference-counted handle to a parameter to a halide pipeline.
Definition: Parameter.h:28
Halide::Internal::CodeGen_LLVM::fixed_to_scalable_vector_type
llvm::Value * fixed_to_scalable_vector_type(llvm::Value *fixed)
Convert an LLVM fixed vector value to the corresponding vscale vector value.
Halide::Internal::CodeGen_LLVM::create_alloca_at_entry
llvm::Value * create_alloca_at_entry(llvm::Type *type, int n, bool zero_initialize=false, const std::string &name="")
Perform an alloca at the function entrypoint.
Halide::Internal::CodeGen_LLVM::wild_f64_
Expr wild_f64_
Definition: CodeGen_LLVM.h:233
Halide::Internal::CodeGen_LLVM::end_func
virtual void end_func(const std::vector< LoweredArgument > &args)
Halide::Internal::CodeGen_LLVM::optimize_module
void optimize_module()
Run all of llvm's optimization passes on the module.
Halide::Internal::CodeGen_LLVM::make_halide_type_t
llvm::Value * make_halide_type_t(const Type &)
Turn a Halide Type into an llvm::Value representing a constant halide_type_t.
Halide::Internal::CodeGen_LLVM::function_does_not_access_memory
void function_does_not_access_memory(llvm::Function *fn)
Add the appropriate function attribute to tell LLVM that the function doesn't access memory.
Halide::Internal::CodeGen_LLVM::i64_t
llvm::Type * i64_t
Definition: CodeGen_LLVM.h:210
Halide::Internal::GE
Is the first expression greater than or equal to the second.
Definition: IR.h:158
Halide::Internal::IRVisitor
A base class for algorithms that need to recursively walk over the IR.
Definition: IRVisitor.h:19
Halide::Internal::For
A for loop.
Definition: IR.h:788
Halide::Internal::CodeGen_LLVM::get_target
const Target & get_target() const
The target we're generating code for.
Definition: CodeGen_LLVM.h:68
Halide::Internal::CodeGen_LLVM::init_context
virtual void init_context()
Grab all the context specific internal state.
Halide::Internal::CodeGen_LLVM::VPResultType::type
llvm::Type * type
Definition: CodeGen_LLVM.h:627
Halide::Internal::CodeGen_LLVM::CodeGen_LLVM
CodeGen_LLVM(const Target &t)
Halide::Internal::CodeGen_LLVM::wild_f32x_
Expr wild_f32x_
Definition: CodeGen_LLVM.h:228
Halide::Internal::CodeGen_LLVM::VectorTypeConstraint::VScale
@ VScale
Force use of fixed size vectors.
Halide::Internal::CodeGen_LLVM::Intrinsic::impl
llvm::Function * impl
Definition: CodeGen_LLVM.h:451
Halide::Internal::FloatImm
Floating point constants.
Definition: Expr.h:235
Halide::Internal::CodeGen_LLVM::Always
@ Always
Definition: CodeGen_LLVM.h:248
Halide::Internal::CodeGen_LLVM::target_vscale
virtual int target_vscale() const
For architectures that have vscale vectors, return the constant vscale to use.
Definition: CodeGen_LLVM.h:148
Halide::Internal::CodeGen_LLVM::codegen_dense_vector_load
llvm::Value * codegen_dense_vector_load(const Load *load, llvm::Value *vpred=nullptr, bool slice_to_native=true)
Generate a basic dense vector load, with an optional predicate and control over whether or not we sho...
Halide::Internal::Broadcast
A vector with 'lanes' elements, in which every element is 'value'.
Definition: IR.h:251
Halide::Internal::Div
The ratio of two expressions.
Definition: IR.h:75
Halide::Internal::CodeGen_LLVM::upgrade_type_for_storage
virtual Type upgrade_type_for_storage(const Type &) const
Return the type that a given Halide type should be stored/loaded from memory as.
Halide::Internal::CodeGen_LLVM::find_vector_runtime_function
std::pair< llvm::Function *, int > find_vector_runtime_function(const std::string &name, int lanes)
Go looking for a vector version of a runtime function.
Halide::Internal::CodeGen_LLVM::codegen
llvm::Value * codegen(const Expr &)
Emit code that evaluates an expression, and return the llvm representation of the result of the expre...
Halide::Internal::IntImm
Integer constants.
Definition: Expr.h:217
Target.h
Halide::Internal::LetStmt
The statement form of a let node.
Definition: IR.h:274
Halide::Internal::CodeGen_LLVM::OnError
@ OnError
Definition: CodeGen_LLVM.h:249
Halide::Internal::CodeGen_LLVM::i16_t
llvm::Type * i16_t
Definition: CodeGen_LLVM.h:210
Halide::Internal::CodeGen_LLVM::native_vector_bits
virtual int native_vector_bits() const =0
What's the natural vector bit-width to use for loads, stores, etc.
Halide::Internal::CodeGen_LLVM::return_with_error_code
void return_with_error_code(llvm::Value *error_code)
Return the the pipeline with the given error code.
Halide::Internal::CodeGen_LLVM::compile_func
virtual void compile_func(const LoweredFunc &func, const std::string &simple_name, const std::string &extern_name)
Compile a specific halide declaration into the llvm Module.
Halide::Internal::CodeGen_LLVM::wild_i8x_
Expr wild_i8x_
Definition: CodeGen_LLVM.h:226
Halide::Internal::Scope< llvm::Value * >
Halide::Internal::CodeGen_LLVM::pseudostack_slot_t_type
llvm::StructType * pseudostack_slot_t_type
Definition: CodeGen_LLVM.h:218
Halide::Internal::CodeGen_LLVM::get_user_context
llvm::Value * get_user_context() const
The user_context argument.
Halide::Internal::CodeGen_LLVM::try_vector_predication_intrinsic
bool try_vector_predication_intrinsic(const std::string &name, VPResultType result_type, int32_t length, MaskVariant mask, std::vector< VPArg > args)
Generate an intrisic call if use_llvm_vp_intrinsics is true and length is greater than 1.
Halide::Internal::CodeGen_LLVM::f64_t
llvm::Type * f64_t
Definition: CodeGen_LLVM.h:210
Halide::Internal::Cast
The actual IR nodes begin here.
Definition: IR.h:29
Halide::Internal::LE
Is the first expression less than or equal to the second.
Definition: IR.h:140
Halide::Internal::CodeGen_LLVM::get_allocation_name
virtual std::string get_allocation_name(const std::string &n)
Get a unique name for the actual block of memory that an allocate node uses.
Definition: CodeGen_LLVM.h:324
Halide::Internal::CodeGen_LLVM::VPArg
Support for generating LLVM vector predication intrinsics ("@llvm.vp.*" and "@llvm....
Definition: CodeGen_LLVM.h:595
Halide::Internal::CodeGen_LLVM::trigger_destructor
void trigger_destructor(llvm::Function *destructor_fn, llvm::Value *stack_slot)
Call a destructor early.
Halide::Internal::CodeGen_LLVM::normalize_fixed_scalable_vector_type
llvm::Value * normalize_fixed_scalable_vector_type(llvm::Type *desired_type, llvm::Value *result)
Ensure that a vector value is either fixed or vscale depending to match desired_type.
Halide::Internal::NE
Is the first expression not equal to the second.
Definition: IR.h:122
Halide::Internal::CodeGen_LLVM::i32_t
llvm::Type * i32_t
Definition: CodeGen_LLVM.h:210
Halide::Internal::CodeGen_LLVM::get_splat
llvm::Constant * get_splat(int lanes, llvm::Constant *value, VectorTypeConstraint type_constraint=VectorTypeConstraint::None) const
Halide::Internal::CodeGen_LLVM::halide_buffer_t_type
llvm::StructType * halide_buffer_t_type
Definition: CodeGen_LLVM.h:211
Halide::Internal::CodeGen_LLVM::VPArg::mangle_index
std::optional< size_t > mangle_index
Definition: CodeGen_LLVM.h:598
Halide::Internal::Stmt
A reference-counted handle to a statement node.
Definition: Expr.h:418
Halide::Internal::CodeGen_LLVM::builder
std::unique_ptr< llvm::IRBuilder< llvm::ConstantFolder, llvm::IRBuilderDefaultInserter > > builder
Definition: CodeGen_LLVM.h:167
Halide::Internal::CodeGen_LLVM::simple_call_intrin
llvm::Value * simple_call_intrin(const std::string &intrin, const std::vector< llvm::Value * > &args, llvm::Type *result_type)
call_intrin does far too much to be useful and generally breaks things when one has carefully set thi...
Halide::Internal::CodeGen_LLVM::VPResultType
Definition: CodeGen_LLVM.h:626
Halide::Internal::CodeGen_LLVM::create_assertion
void create_assertion(llvm::Value *condition, const Expr &message, llvm::Value *error_code=nullptr)
Codegen an assertion.
Halide::Internal::CodeGen_LLVM::VectorTypeConstraint
VectorTypeConstraint
Interface to abstract vector code generation as LLVM is now providing multiple options to express eve...
Definition: CodeGen_LLVM.h:575
Halide::Internal::CodeGen_LLVM::emit_atomic_stores
bool emit_atomic_stores
Emit atomic store instructions?
Definition: CodeGen_LLVM.h:535
Halide::Module
A halide module.
Definition: Module.h:138
Halide::Type
Types in the halide type system.
Definition: Type.h:276
Halide::Internal::Load
Load a value from a named symbol if predicate is true.
Definition: IR.h:209
Halide::Internal::Free
Free the resources associated with the given buffer.
Definition: IR.h:405
Halide::Internal::Realize
Allocate a multi-dimensional buffer of the given type and size.
Definition: IR.h:419
Halide
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
Definition: AbstractGenerator.h:19
Halide::Internal::Or
Logical or - is at least one of the expression true.
Definition: IR.h:176
Halide::Internal::CodeGen_LLVM::supports_atomic_add
virtual bool supports_atomic_add(const Type &t) const
Halide::Internal::EQ
Is the first expression equal to the second.
Definition: IR.h:113
Halide::Internal::CodeGen_LLVM::VectorTypeConstraint::None
@ None
Halide::Internal::CodeGen_LLVM::i8_t
llvm::Type * i8_t
Definition: CodeGen_LLVM.h:210
Halide::Internal::CodeGen_LLVM::value
llvm::Value * value
Definition: CodeGen_LLVM.h:168
Halide::Internal::Provide
This defines the value of a function at a multi-dimensional location.
Definition: IR.h:346
Halide::Internal::CodeGen_LLVM::f32_t
llvm::Type * f32_t
Definition: CodeGen_LLVM.h:210
Halide::Internal::CodeGen_LLVM::wild_u1_
Expr wild_u1_
Definition: CodeGen_LLVM.h:231
Halide::Internal::CodeGen_LLVM::module
std::unique_ptr< llvm::Module > module
Definition: CodeGen_LLVM.h:164
Halide::Internal::CodeGen_LLVM::wild_u16_
Expr wild_u16_
Definition: CodeGen_LLVM.h:231
Halide::LinkageType::Internal
@ Internal
Not visible externally, similar to 'static' linkage in C.
Halide::Internal::CodeGen_LLVM::Intrinsic
Description of an intrinsic function overload.
Definition: CodeGen_LLVM.h:448
Halide::Internal::Max
The greater of two values.
Definition: IR.h:104
Halide::Internal::CodeGen_LLVM::Intrinsic::Intrinsic
Intrinsic(Type result_type, std::vector< Type > arg_types, llvm::Function *impl)
Definition: CodeGen_LLVM.h:453
Halide::Internal::CodeGen_LLVM::codegen_buffer_pointer
llvm::Value * codegen_buffer_pointer(const std::string &buffer, Type type, llvm::Value *index)
Generate a pointer into a named buffer at a given index, of a given type.
Halide::Internal::CodeGen_LLVM::get_destructor_block
llvm::BasicBlock * get_destructor_block()
Retrieves the block containing the error handling code.
Halide::Internal::CodeGen_LLVM::VPResultType::mangle_index
std::optional< size_t > mangle_index
Definition: CodeGen_LLVM.h:628
Halide::Internal::CodeGen_LLVM::f16_t
llvm::Type * f16_t
Definition: CodeGen_LLVM.h:210
Halide::Internal::CodeGen_LLVM::VPResultType::VPResultType
VPResultType(llvm::Type *type, std::optional< size_t > mangle_index=std::nullopt)
Definition: CodeGen_LLVM.h:629
Halide::Internal::CodeGen_LLVM::interleave_vectors
virtual llvm::Value * interleave_vectors(const std::vector< llvm::Value * > &)
Implementation of the intrinsic call to interleave_vectors.
Halide::Internal::CodeGen_LLVM::scalable_to_fixed_vector_type
llvm::Value * scalable_to_fixed_vector_type(llvm::Value *scalable)
Convert an LLVM vscale vector value to the corresponding fixed vector value.
Halide::Internal::CodeGen_LLVM::slice_vector
virtual llvm::Value * slice_vector(llvm::Value *vec, int start, int extent)
Take a slice of lanes out of an llvm vector.
Halide::LinkageType
LinkageType
Type of linkage a function in a lowered Halide module can have.
Definition: Module.h:48
Halide::Internal::CodeGen_LLVM::WarningKind
WarningKind
Warning messages which we want to avoid displaying number of times.
Definition: CodeGen_LLVM.h:652
Halide::Internal::CodeGen_LLVM::WarningKind::EmulatedFloat16
@ EmulatedFloat16
Halide::Buffer<>
Halide::Internal::CodeGen_LLVM::requested_alloca_total
size_t requested_alloca_total
A (very) conservative guess at the size of all alloca() storage requested (including alignment paddin...
Definition: CodeGen_LLVM.h:434
Halide::codegen_llvm
std::unique_ptr< llvm::Module > codegen_llvm(const Module &module, llvm::LLVMContext &context)
Given a Halide module, generate an llvm::Module.
Halide::Internal::CodeGen_LLVM::promote_indices
virtual bool promote_indices() const
Should indexing math be promoted to 64-bit on platforms with 64-bit pointers?
Definition: CodeGen_LLVM.h:132
Halide::Internal::Let
A let expression, like you might find in a functional language.
Definition: IR.h:263
Halide::Internal::CodeGen_LLVM::sym_push
void sym_push(const std::string &name, llvm::Value *value)
Add an entry to the symbol table, hiding previous entries with the same name.
Halide::Internal::CodeGen_LLVM::DestructorType
DestructorType
Some destructors should always be called.
Definition: CodeGen_LLVM.h:248
Halide::Internal::CodeGen_LLVM::MaskVariant
std::variant< NoMask, AllEnabledMask, llvm::Value * > MaskVariant
Predication mask using the above two types for special cases and an llvm::Value for the general one.
Definition: CodeGen_LLVM.h:615
Halide::Internal::CodeGen_LLVM::try_vector_predication_comparison
bool try_vector_predication_comparison(const std::string &name, const Type &result_type, MaskVariant mask, llvm::Value *a, llvm::Value *b, const char *cmp_op)
Generate a vector predicated comparison intrinsic call if use_llvm_vp_intrinsics is true and result_t...
Halide::Internal::CodeGen_LLVM::Intrinsic::result_type
Type result_type
Definition: CodeGen_LLVM.h:449
Halide::Internal::CodeGen_LLVM::create_broadcast
llvm::Value * create_broadcast(llvm::Value *, int lanes)
Widen an llvm scalar into an llvm vector with the given number of lanes.
Halide::Internal::CodeGen_LLVM::target
Halide::Target target
The target we're generating code for.
Definition: CodeGen_LLVM.h:175
Halide::Internal::CodeGen_LLVM::VectorTypeConstraint::Fixed
@ Fixed
Use default for current target.
Halide::Internal::CodeGen_LLVM::wild_i16_
Expr wild_i16_
Definition: CodeGen_LLVM.h:231
Halide::Internal::Ramp
A linear ramp vector node.
Definition: IR.h:239
Halide::Internal::CodeGen_LLVM::scalar_value_t_type
llvm::StructType * scalar_value_t_type
Definition: CodeGen_LLVM.h:216
Halide::Internal::CodeGen_LLVM::mabi
virtual std::string mabi() const
IRVisitor.h
Halide::Internal::CodeGen_LLVM::register_destructor
llvm::Value * register_destructor(llvm::Function *destructor_fn, llvm::Value *obj, DestructorType when)
Halide::Internal::CodeGen_LLVM::wild_u32_
Expr wild_u32_
Definition: CodeGen_LLVM.h:232
Halide::Internal::Evaluate
Evaluate and discard an expression, presumably because it has some side-effect.
Definition: IR.h:468
Halide::Internal::CodeGen_LLVM::wild_u32x_
Expr wild_u32x_
Definition: CodeGen_LLVM.h:227
Halide::Internal::CodeGen_LLVM::new_for_target
static std::unique_ptr< CodeGen_LLVM > new_for_target(const Target &target, llvm::LLVMContext &context)
Create an instance of CodeGen_LLVM suitable for the target.
Halide::Internal::CodeGen_LLVM::begin_func
virtual void begin_func(LinkageType linkage, const std::string &simple_name, const std::string &extern_name, const std::vector< LoweredArgument > &args)
Helper functions for compiling Halide functions to llvm functions.
Halide::Internal::CodeGen_LLVM::call_overloaded_intrin
llvm::Value * call_overloaded_intrin(const Type &result_type, const std::string &name, const std::vector< Expr > &args)
Call an overloaded intrinsic function.
Halide::Internal::CodeGen_LLVM::use_pic
virtual bool use_pic() const
Halide::MetadataNameMap
std::map< std::string, std::string > MetadataNameMap
Definition: Module.h:134
Halide::Internal::Store
Store a 'value' to the buffer called 'name' at a given 'index' if 'predicate' is true.
Definition: IR.h:325
Halide::Internal::LoweredFunc
Definition of a lowered function.
Definition: Module.h:97
Halide::Internal::Variable
A named variable.
Definition: IR.h:741
Halide::Internal::CodeGen_LLVM::onetime_warnings
std::map< WarningKind, std::string > onetime_warnings
Definition: CodeGen_LLVM.h:655
Halide::Internal::Min
The lesser of two values.
Definition: IR.h:95
Halide::Internal::CodeGen_LLVM::OnSuccess
@ OnSuccess
Definition: CodeGen_LLVM.h:250
Halide::Internal::CodeGen_LLVM::create_string_constant
llvm::Constant * create_string_constant(const std::string &str)
Put a string constant in the module as a global variable and return a pointer to it.
Halide::Internal::ProducerConsumer
This node is a helpful annotation to do with permissions.
Definition: IR.h:307
Halide::Internal::CodeGen_LLVM::visit
void visit(const IntImm *) override
Generate code for various IR nodes.
Halide::Internal::Mod
The remainder of a / b.
Definition: IR.h:86
Halide::Internal::CodeGen_LLVM::codegen_vector_reduce
virtual void codegen_vector_reduce(const VectorReduce *op, const Expr &init)
Compile a horizontal reduction that starts with an explicit initial value.
Halide::Internal::CodeGen_LLVM::context
llvm::LLVMContext * context
Definition: CodeGen_LLVM.h:166
Halide::Internal::CodeGen_LLVM::compile_trampolines
static std::unique_ptr< llvm::Module > compile_trampolines(const Target &target, llvm::LLVMContext &context, const std::string &suffix, const std::vector< std::pair< std::string, ExternSignature >> &externs)
Halide::Internal::AssertStmt
If the 'condition' is false, then evaluate and return the message, which should be a call to an error...
Definition: IR.h:286
Halide::Internal::CodeGen_LLVM::get_llvm_intrin
llvm::Function * get_llvm_intrin(const Type &ret_type, const std::string &name, const std::vector< Type > &arg_types, bool scalars_are_vectors=false)
Get an LLVM intrinsic declaration.
Halide::Internal::CodeGen_LLVM::mattrs
virtual std::string mattrs() const =0
Halide::Internal::CodeGen_LLVM::declare_intrin_overload
llvm::Function * declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector< Type > arg_types, bool scalars_are_vectors=false)
Declare an intrinsic function that participates in overload resolution.
Halide::Internal::CodeGen_LLVM::get_vector_num_elements
int get_vector_num_elements(const llvm::Type *t)
Get number of vector elements, taking into account scalable vectors.
Halide::Internal::CodeGen_LLVM::semaphore_t_type
llvm::StructType * semaphore_t_type
Definition: CodeGen_LLVM.h:219
Halide::Internal::CodeGen_LLVM::upgrade_type_for_arithmetic
virtual Type upgrade_type_for_arithmetic(const Type &) const
Return the type in which arithmetic should be done for the given storage type.
Halide::Internal::CodeGen_LLVM::current_function_args
std::vector< LoweredArgument > current_function_args
Definition: CodeGen_LLVM.h:172
Halide::Internal::CodeGen_LLVM::call_intrin
llvm::Value * call_intrin(const Type &t, int intrin_lanes, const std::string &name, std::vector< Expr >)
Generate a call to a vector intrinsic or runtime inlined function.
Halide::Internal::Call
A function call.
Definition: IR.h:482
Halide::Internal::CodeGen_LLVM::wild_u64x_
Expr wild_u64x_
Definition: CodeGen_LLVM.h:227
Halide::Internal::CodeGen_LLVM::wild_f64x_
Expr wild_f64x_
Definition: CodeGen_LLVM.h:228
Halide::Internal::CodeGen_LLVM::wild_u1x_
Expr wild_u1x_
Some wildcard variables used for peephole optimizations in subclasses.
Definition: CodeGen_LLVM.h:226
Halide::Internal::CodeGen_LLVM::scalarize
void scalarize(const Expr &)
Codegen a vector Expr by codegenning each lane and combining.
Halide::Internal::CodeGen_LLVM::metadata_t_type
llvm::StructType * metadata_t_type
Definition: CodeGen_LLVM.h:214
Halide::Internal::CodeGen_LLVM::supports_call_as_float16
virtual bool supports_call_as_float16(const Call *op) const
Can we call this operation with float16 type? This is used to avoid "emulated" equivalent code-gen in...
Halide::Internal::CodeGen_LLVM::sym_exists
bool sym_exists(const std::string &name) const
Test if an item exists in the symbol table.
Halide::Internal::CodeGen_LLVM::void_t
llvm::Type * void_t
Some useful llvm types.
Definition: CodeGen_LLVM.h:210
Halide::ExternSignature
Definition: Pipeline.h:498
Halide::Internal::CodeGen_LLVM::wild_i32x_
Expr wild_i32x_
Definition: CodeGen_LLVM.h:227
Halide::Internal::Reinterpret
Reinterpret value as another type, without affecting any of the bits (on little-endian systems).
Definition: IR.h:39
Halide::Internal::CodeGen_LLVM::wild_i64_
Expr wild_i64_
Definition: CodeGen_LLVM.h:232
Halide::Internal::SyntheticParamType::Type
@ Type
Halide::Internal::CodeGen_LLVM::wild_i32_
Expr wild_i32_
Definition: CodeGen_LLVM.h:232
Halide::Internal::CodeGen_LLVM::dimension_t_type
llvm::StructType * dimension_t_type
Definition: CodeGen_LLVM.h:213
Halide::Internal::CodeGen_LLVM::wild_i8_
Expr wild_i8_
Definition: CodeGen_LLVM.h:231
Halide::Internal::Select
A ternary operator.
Definition: IR.h:196
Halide::Internal::CodeGen_LLVM::create_binary_blob
llvm::Constant * create_binary_blob(const std::vector< char > &data, const std::string &name, bool constant=true)
Put a binary blob in the module as a global variable and return a pointer to it.
Halide::Internal::CodeGen_LLVM::strict_fp_math_md
llvm::MDNode * strict_fp_math_md
Definition: CodeGen_LLVM.h:171
Halide::Internal::CodeGen_LLVM::mangle_llvm_type
std::string mangle_llvm_type(llvm::Type *type)
Return type string for LLVM type using LLVM IR intrinsic type mangling.
Halide::Internal::CodeGen_LLVM::very_likely_branch
llvm::MDNode * very_likely_branch
Definition: CodeGen_LLVM.h:169
Halide::Expr
A fragment of Halide syntax.
Definition: Expr.h:257
Halide::Internal::CodeGen_LLVM::argument_t_type
llvm::StructType * argument_t_type
Definition: CodeGen_LLVM.h:215
Halide::Internal::CodeGen_LLVM::codegen_asserts
void codegen_asserts(const std::vector< const AssertStmt * > &asserts)
Codegen a block of asserts with pure conditions.
Halide::Internal::Prefetch
Represent a multi-dimensional region of a Func or an ImageParam that needs to be prefetched.
Definition: IR.h:888
Halide::Internal::CodeGen_LLVM::VPArg::value
llvm::Value * value
Definition: CodeGen_LLVM.h:596
Halide::Internal::GT
Is the first expression greater than the second.
Definition: IR.h:149
Halide::Internal::Atomic
Lock all the Store nodes in the body statement.
Definition: IR.h:911
Halide::Internal::Shuffle
Construct a new vector by taking elements from another sequence of vectors.
Definition: IR.h:819
Halide::Internal::CodeGen_LLVM::i1_t
llvm::Type * i1_t
Definition: CodeGen_LLVM.h:210
Halide::Internal::CodeGen_LLVM::mcpu_tune
virtual std::string mcpu_tune() const =0
Halide::Internal::CodeGen_LLVM::concat_vectors
virtual llvm::Value * concat_vectors(const std::vector< llvm::Value * > &)
Concatenate a bunch of llvm vectors.
Module.h
Halide::Internal::CodeGen_LLVM::AllEnabledMask
Type indicating mask to use is all true – all lanes enabled.
Definition: CodeGen_LLVM.h:610
Halide::Internal::CodeGen_LLVM::add_tbaa_metadata
void add_tbaa_metadata(llvm::Instruction *inst, std::string buffer, const Expr &index)
Mark a load or store with type-based-alias-analysis metadata so that llvm knows it can reorder loads ...
Halide::Internal::UIntImm
Unsigned integer constants.
Definition: Expr.h:226
Halide::Internal::CodeGen_LLVM::init_module
virtual void init_module()
Initialize the CodeGen_LLVM internal state to compile a fresh module.
Halide::Internal::ModulusRemainder
The result of modulus_remainder analysis.
Definition: ModulusRemainder.h:31
Halide::Internal::IfThenElse
An if-then-else block.
Definition: IR.h:458
Halide::Internal::Sub
The difference of two expressions.
Definition: IR.h:57
Halide::Internal::CodeGen_LLVM::default_fp_math_md
llvm::MDNode * default_fp_math_md
Definition: CodeGen_LLVM.h:170
Halide::Internal::CodeGen_LLVM::use_soft_float_abi
virtual bool use_soft_float_abi() const =0
Halide::Internal::CodeGen_LLVM::wild_u64_
Expr wild_u64_
Definition: CodeGen_LLVM.h:232
Halide::Internal::CodeGen_LLVM::wild_u16x_
Expr wild_u16x_
Definition: CodeGen_LLVM.h:226
Halide::Internal::CodeGen_LLVM::type_t_type
llvm::StructType * type_t_type
Definition: CodeGen_LLVM.h:212
Halide::Target
A struct representing a target machine and os to generate code for.
Definition: Target.h:19
Halide::Internal::And
Logical and - are both expressions true.
Definition: IR.h:167
Halide::Internal::CodeGen_LLVM::intrinsics
std::map< std::string, std::vector< Intrinsic > > intrinsics
Mapping of intrinsic functions to the various overloads implementing it.
Definition: CodeGen_LLVM.h:458
Halide::Internal::CodeGen_LLVM::llvm_type_of
virtual llvm::Type * llvm_type_of(const Type &) const
Get the llvm type equivalent to the given halide type in the current context.
Halide::Internal::CodeGen_LLVM::device_interface_t_type
llvm::StructType * device_interface_t_type
Definition: CodeGen_LLVM.h:217
Halide::Internal::CodeGen_LLVM::get_vector_type
llvm::Type * get_vector_type(llvm::Type *, int n, VectorTypeConstraint type_constraint=VectorTypeConstraint::None) const
Halide::Internal::CodeGen_LLVM::VPArg::VPArg
VPArg(llvm::Value *value, std::optional< size_t > mangle_index=std::nullopt, int32_t alignment=0)
Definition: CodeGen_LLVM.h:600
Halide::Internal::CodeGen_LLVM::Intrinsic::arg_types
std::vector< Type > arg_types
Definition: CodeGen_LLVM.h:450
Halide::Internal::StringImm
String constants.
Definition: Expr.h:244
Halide::Internal::Not
Logical not - true if the expression false.
Definition: IR.h:185
Halide::Internal::CodeGen_LLVM::compile
virtual std::unique_ptr< llvm::Module > compile(const Module &module)
Takes a halide Module and compiles it to an llvm Module.
Halide::Internal::LT
Is the first expression less than the second.
Definition: IR.h:131
Halide::Internal::CodeGen_LLVM::get_requested_alloca_total
size_t get_requested_alloca_total() const
Definition: CodeGen_LLVM.h:84
Halide::Internal::CodeGen_LLVM::use_llvm_vp_intrinsics
bool use_llvm_vp_intrinsics
Controls use of vector predicated intrinsics for vector operations.
Definition: CodeGen_LLVM.h:643
Halide::Internal::Mul
The product of two expressions.
Definition: IR.h:66
Halide::Internal::CodeGen_LLVM::VPArg::alignment
int alignment
Definition: CodeGen_LLVM.h:599
Halide::Internal::Block
A sequence of statements to be executed in-order.
Definition: IR.h:434