Halide 19.0.0
Halide compiler and libraries
Loading...
Searching...
No Matches
Halide::Target Struct Reference

A struct representing a target machine and os to generate code for. More...

#include <Target.h>

Public Types

enum  OS {
  OSUnknown = 0 , Linux , Windows , OSX ,
  Android , IOS , QuRT , NoOS ,
  Fuchsia , WebAssemblyRuntime
}
 The operating system used by the target. More...
 
enum  Arch {
  ArchUnknown = 0 , X86 , ARM , Hexagon ,
  POWERPC , WebAssembly , RISCV
}
 The architecture used by the target. More...
 
enum  Processor {
  ProcessorGeneric = 0 , K8 , K8_SSE3 , AMDFam10 ,
  BtVer1 , BdVer1 , BdVer2 , BdVer3 ,
  BdVer4 , BtVer2 , ZnVer1 , ZnVer2 ,
  ZnVer3 , ZnVer4
}
 The specific processor to be targeted, tuned for. More...
 
enum  Feature {
  JIT = halide_target_feature_jit , Debug = halide_target_feature_debug , NoAsserts = halide_target_feature_no_asserts , NoBoundsQuery = halide_target_feature_no_bounds_query ,
  SSE41 = halide_target_feature_sse41 , AVX = halide_target_feature_avx , AVX2 = halide_target_feature_avx2 , FMA = halide_target_feature_fma ,
  FMA4 = halide_target_feature_fma4 , F16C = halide_target_feature_f16c , ARMv7s = halide_target_feature_armv7s , NoNEON = halide_target_feature_no_neon ,
  VSX = halide_target_feature_vsx , POWER_ARCH_2_07 = halide_target_feature_power_arch_2_07 , CUDA = halide_target_feature_cuda , CUDACapability30 = halide_target_feature_cuda_capability30 ,
  CUDACapability32 = halide_target_feature_cuda_capability32 , CUDACapability35 = halide_target_feature_cuda_capability35 , CUDACapability50 = halide_target_feature_cuda_capability50 , CUDACapability61 = halide_target_feature_cuda_capability61 ,
  CUDACapability70 = halide_target_feature_cuda_capability70 , CUDACapability75 = halide_target_feature_cuda_capability75 , CUDACapability80 = halide_target_feature_cuda_capability80 , CUDACapability86 = halide_target_feature_cuda_capability86 ,
  OpenCL = halide_target_feature_opencl , CLDoubles = halide_target_feature_cl_doubles , CLHalf = halide_target_feature_cl_half , CLAtomics64 = halide_target_feature_cl_atomic64 ,
  EGL = halide_target_feature_egl , UserContext = halide_target_feature_user_context , Profile = halide_target_feature_profile , NoRuntime = halide_target_feature_no_runtime ,
  Metal = halide_target_feature_metal , CPlusPlusMangling = halide_target_feature_c_plus_plus_mangling , LargeBuffers = halide_target_feature_large_buffers , HexagonDma = halide_target_feature_hexagon_dma ,
  HVX_128 = halide_target_feature_hvx_128 , HVX = HVX_128 , HVX_v62 = halide_target_feature_hvx_v62 , HVX_v65 = halide_target_feature_hvx_v65 ,
  HVX_v66 = halide_target_feature_hvx_v66 , HVX_v68 = halide_target_feature_hvx_v68 , FuzzFloatStores = halide_target_feature_fuzz_float_stores , SoftFloatABI = halide_target_feature_soft_float_abi ,
  MSAN = halide_target_feature_msan , AVX512 = halide_target_feature_avx512 , AVX512_KNL = halide_target_feature_avx512_knl , AVX512_Skylake = halide_target_feature_avx512_skylake ,
  AVX512_Cannonlake = halide_target_feature_avx512_cannonlake , AVX512_SapphireRapids = halide_target_feature_avx512_sapphirerapids , AVX512_Zen4 = halide_target_feature_avx512_zen4 , TraceLoads = halide_target_feature_trace_loads ,
  TraceStores = halide_target_feature_trace_stores , TraceRealizations = halide_target_feature_trace_realizations , TracePipeline = halide_target_feature_trace_pipeline , D3D12Compute = halide_target_feature_d3d12compute ,
  StrictFloat = halide_target_feature_strict_float , TSAN = halide_target_feature_tsan , ASAN = halide_target_feature_asan , CheckUnsafePromises = halide_target_feature_check_unsafe_promises ,
  EmbedBitcode = halide_target_feature_embed_bitcode , EnableLLVMLoopOpt = halide_target_feature_enable_llvm_loop_opt , WasmMvpOnly = halide_target_feature_wasm_mvponly , WasmSimd128 = halide_target_feature_wasm_simd128 ,
  WasmThreads = halide_target_feature_wasm_threads , WasmBulkMemory = halide_target_feature_wasm_bulk_memory , WebGPU = halide_target_feature_webgpu , SVE = halide_target_feature_sve ,
  SVE2 = halide_target_feature_sve2 , ARMDotProd = halide_target_feature_arm_dot_prod , ARMFp16 = halide_target_feature_arm_fp16 , LLVMLargeCodeModel = halide_llvm_large_code_model ,
  RVV = halide_target_feature_rvv , ARMv8a = halide_target_feature_armv8a , ARMv81a = halide_target_feature_armv81a , ARMv82a = halide_target_feature_armv82a ,
  ARMv83a = halide_target_feature_armv83a , ARMv84a = halide_target_feature_armv84a , ARMv85a = halide_target_feature_armv85a , ARMv86a = halide_target_feature_armv86a ,
  ARMv87a = halide_target_feature_armv87a , ARMv88a = halide_target_feature_armv88a , ARMv89a = halide_target_feature_armv89a , SanitizerCoverage = halide_target_feature_sanitizer_coverage ,
  ProfileByTimer = halide_target_feature_profile_by_timer , SPIRV = halide_target_feature_spirv , Vulkan = halide_target_feature_vulkan , VulkanInt8 = halide_target_feature_vulkan_int8 ,
  VulkanInt16 = halide_target_feature_vulkan_int16 , VulkanInt64 = halide_target_feature_vulkan_int64 , VulkanFloat16 = halide_target_feature_vulkan_float16 , VulkanFloat64 = halide_target_feature_vulkan_float64 ,
  VulkanV10 = halide_target_feature_vulkan_version10 , VulkanV12 = halide_target_feature_vulkan_version12 , VulkanV13 = halide_target_feature_vulkan_version13 , Semihosting = halide_target_feature_semihosting ,
  AVX10_1 = halide_target_feature_avx10_1 , X86APX = halide_target_feature_x86_apx , FeatureEnd = halide_target_feature_end
}
 Optional features a target can have. More...
 

Public Member Functions

 Target ()=default
 
 Target (OS o, Arch a, int b, Processor pt, const std::vector< Feature > &initial_features=std::vector< Feature >(), int vb=0)
 
 Target (OS o, Arch a, int b, const std::vector< Feature > &initial_features=std::vector< Feature >())
 
 Target (const std::string &s)
 Given a string of the form used in HL_TARGET (e.g.
 
 Target (const char *s)
 
bool has_unknowns () const
 Return true if any of the arch/bits/os fields are "unknown"/0; return false otherwise.
 
void set_feature (Feature f, bool value=true)
 
void set_features (const std::vector< Feature > &features_to_set, bool value=true)
 
bool has_feature (Feature f) const
 
bool has_feature (halide_target_feature_t f) const
 
bool features_any_of (const std::vector< Feature > &test_features) const
 
bool features_all_of (const std::vector< Feature > &test_features) const
 
Target with_feature (Feature f) const
 Return a copy of the target with the given feature set.
 
Target without_feature (Feature f) const
 Return a copy of the target with the given feature cleared.
 
bool has_gpu_feature () const
 Is a fully feature GPU compute runtime enabled? I.e.
 
bool supports_type (const Type &t) const
 Does this target allow using a certain type.
 
bool supports_type (const Type &t, DeviceAPI device) const
 Does this target allow using a certain type on a certain device.
 
bool supports_device_api (DeviceAPI api) const
 Returns whether a particular device API can be used with this Target.
 
DeviceAPI get_required_device_api () const
 If this Target (including all Features) requires a specific DeviceAPI, return it.
 
bool operator== (const Target &other) const
 
bool operator!= (const Target &other) const
 
bool get_runtime_compatible_target (const Target &other, Target &result)
 Create a "greatest common denominator" runtime target that is compatible with both this target and other.
 
std::string to_string () const
 Convert the Target into a string form that can be reconstituted by merge_string(), which will always be of the form.
 
int natural_vector_size (const Halide::Type &t) const
 Given a data type, return an estimate of the "natural" vector size for that data type when compiling for this Target.
 
template<typename data_t >
int natural_vector_size () const
 Given a data type, return an estimate of the "natural" vector size for that data type when compiling for this Target.
 
bool has_large_buffers () const
 Return true iff 64 bits and has_feature(LargeBuffers).
 
int64_t maximum_buffer_size () const
 Return the maximum buffer size in bytes supported on this Target.
 
int get_cuda_capability_lower_bound () const
 Get the minimum cuda capability found as an integer.
 
int get_vulkan_capability_lower_bound () const
 Get the minimum Vulkan capability found as an integer.
 
int get_arm_v8_lower_bound () const
 Get the minimum ARM v8.x capability found as an integer.
 
bool supported () const
 Was libHalide compiled with support for this target?
 
const std::bitset< FeatureEnd > & get_features_bitset () const
 Return a bitset of the Featuress set in this Target (set = 1).
 

Static Public Member Functions

static bool validate_target_string (const std::string &s)
 Check if a target string is valid.
 
static std::string feature_to_name (Target::Feature feature)
 Return the name corresponding to a given Feature, in the form used to construct Target strings (e.g., Feature::Debug is "debug" and not "Debug").
 
static Target::Feature feature_from_name (const std::string &name)
 Return the feature corresponding to a given name, in the form used to construct Target strings (e.g., Feature::Debug is "debug" and not "Debug").
 

Public Attributes

enum Halide::Target::OS os = OSUnknown
 
enum Halide::Target::Arch arch = ArchUnknown
 
int bits = 0
 The bit-width of the target machine.
 
int vector_bits = 0
 The bit-width of a vector register for targets where this is configurable and targeting a fixed size is desired.
 
enum Halide::Target::Processor processor_tune = ProcessorGeneric
 

Detailed Description

A struct representing a target machine and os to generate code for.

Examples
tutorial/lesson_11_cross_compilation.cpp, and tutorial/lesson_12_using_the_gpu.cpp.

Definition at line 19 of file Target.h.

Member Enumeration Documentation

◆ OS

The operating system used by the target.

Determines which system calls to generate. Corresponds to os_name_map in Target.cpp.

Enumerator
OSUnknown 
Linux 
Windows 
OSX 
Android 
IOS 
QuRT 
NoOS 
Fuchsia 
WebAssemblyRuntime 

Definition at line 23 of file Target.h.

◆ Arch

The architecture used by the target.

Determines the instruction set to use. Corresponds to arch_name_map in Target.cpp.

Enumerator
ArchUnknown 
X86 
ARM 
Hexagon 
POWERPC 
WebAssembly 
RISCV 

Definition at line 39 of file Target.h.

◆ Processor

The specific processor to be targeted, tuned for.

Corresponds to processor_name_map in Target.cpp.

New entries should be added to the end.

Enumerator
ProcessorGeneric 

Do not tune for any specific CPU. In practice, this means that halide will decide the tune CPU based on the enabled features.

K8 
K8_SSE3 

Tune for AMD K8 Hammer CPU (AMD Family 0Fh, launched 2003).

AMDFam10 

Tune for later versions of AMD K8 CPU, with SSE3 support.

BtVer1 

Tune for AMD K10 "Barcelona" CPU (AMD Family 10h, launched 2007).

BdVer1 

Tune for AMD Bobcat CPU (AMD Family 14h, launched 2011).

BdVer2 

Tune for AMD Bulldozer CPU (AMD Family 15h, launched 2011).

BdVer3 

Tune for AMD Piledriver CPU (AMD Family 15h (2nd-gen), launched 2012).

BdVer4 

Tune for AMD Steamroller CPU (AMD Family 15h (3nd-gen), launched 2014).

BtVer2 

Tune for AMD Excavator CPU (AMD Family 15h (4th-gen), launched 2015).

ZnVer1 

Tune for AMD Jaguar CPU (AMD Family 16h, launched 2013).

ZnVer2 

Tune for AMD Zen CPU (AMD Family 17h, launched 2017).

ZnVer3 

Tune for AMD Zen 2 CPU (AMD Family 17h, launched 2019).

ZnVer4 

Tune for AMD Zen 3 CPU (AMD Family 19h, launched 2020).

Definition at line 61 of file Target.h.

◆ Feature

Optional features a target can have.

Corresponds to feature_name_map in Target.cpp. See definitions in HalideRuntime.h for full information.

Enumerator
JIT 
Debug 
NoAsserts 
NoBoundsQuery 
SSE41 
AVX 
AVX2 
FMA 
FMA4 
F16C 
ARMv7s 
NoNEON 
VSX 
POWER_ARCH_2_07 
CUDA 
CUDACapability30 
CUDACapability32 
CUDACapability35 
CUDACapability50 
CUDACapability61 
CUDACapability70 
CUDACapability75 
CUDACapability80 
CUDACapability86 
OpenCL 
CLDoubles 
CLHalf 
CLAtomics64 
EGL 
UserContext 
Profile 
NoRuntime 
Metal 
CPlusPlusMangling 
LargeBuffers 
HexagonDma 
HVX_128 
HVX 
HVX_v62 
HVX_v65 
HVX_v66 
HVX_v68 
FuzzFloatStores 
SoftFloatABI 
MSAN 
AVX512 
AVX512_KNL 
AVX512_Skylake 
AVX512_Cannonlake 
AVX512_SapphireRapids 
AVX512_Zen4 
TraceLoads 
TraceStores 
TraceRealizations 
TracePipeline 
D3D12Compute 
StrictFloat 
TSAN 
ASAN 
CheckUnsafePromises 
EmbedBitcode 
EnableLLVMLoopOpt 
WasmMvpOnly 
WasmSimd128 
WasmThreads 
WasmBulkMemory 
WebGPU 
SVE 
SVE2 
ARMDotProd 
ARMFp16 
LLVMLargeCodeModel 
RVV 
ARMv8a 
ARMv81a 
ARMv82a 
ARMv83a 
ARMv84a 
ARMv85a 
ARMv86a 
ARMv87a 
ARMv88a 
ARMv89a 
SanitizerCoverage 
ProfileByTimer 
SPIRV 
Vulkan 
VulkanInt8 
VulkanInt16 
VulkanInt64 
VulkanFloat16 
VulkanFloat64 
VulkanV10 
VulkanV12 
VulkanV13 
Semihosting 
AVX10_1 
X86APX 
FeatureEnd 

Definition at line 83 of file Target.h.

Constructor & Destructor Documentation

◆ Target() [1/5]

Halide::Target::Target ( )
default

◆ Target() [2/5]

Halide::Target::Target ( OS o,
Arch a,
int b,
Processor pt,
const std::vector< Feature > & initial_features = std::vector<Feature>(),
int vb = 0 )
inline

Definition at line 185 of file Target.h.

References set_feature().

◆ Target() [3/5]

Halide::Target::Target ( OS o,
Arch a,
int b,
const std::vector< Feature > & initial_features = std::vector<Feature>() )
inline

Definition at line 194 of file Target.h.

◆ Target() [4/5]

Halide::Target::Target ( const std::string & s)
explicit

Given a string of the form used in HL_TARGET (e.g.

"x86-64-avx"), construct the Target it specifies. Note that this always starts with the result of get_host_target(), replacing only the parts found in the target string, so if you omit (say) an OS specification, the host OS will be used instead. An empty string is exactly equivalent to get_host_target().

Invalid target strings will fail with a user_error.

◆ Target() [5/5]

Halide::Target::Target ( const char * s)
explicit

Member Function Documentation

◆ validate_target_string()

static bool Halide::Target::validate_target_string ( const std::string & s)
static

Check if a target string is valid.

◆ has_unknowns()

bool Halide::Target::has_unknowns ( ) const

Return true if any of the arch/bits/os fields are "unknown"/0; return false otherwise.

◆ set_feature()

void Halide::Target::set_feature ( Feature f,
bool value = true )

Referenced by Target().

◆ set_features()

void Halide::Target::set_features ( const std::vector< Feature > & features_to_set,
bool value = true )

◆ has_feature() [1/2]

◆ has_feature() [2/2]

bool Halide::Target::has_feature ( halide_target_feature_t f) const
inline

Definition at line 226 of file Target.h.

References has_feature().

◆ features_any_of()

bool Halide::Target::features_any_of ( const std::vector< Feature > & test_features) const

◆ features_all_of()

bool Halide::Target::features_all_of ( const std::vector< Feature > & test_features) const

◆ with_feature()

Target Halide::Target::with_feature ( Feature f) const

Return a copy of the target with the given feature set.

This is convenient when enabling certain features (e.g. NoBoundsQuery) in an initialization list, where the target to be mutated may be a const reference.

Examples
tutorial/lesson_12_using_the_gpu.cpp.

Referenced by Halide::SimdOpCheckTest::SimdOpCheckTest().

◆ without_feature()

Target Halide::Target::without_feature ( Feature f) const

Return a copy of the target with the given feature cleared.

This is convenient when disabling certain features (e.g. NoBoundsQuery) in an initialization list, where the target to be mutated may be a const reference.

Referenced by Halide::SimdOpCheckTest::get_run_target().

◆ has_gpu_feature()

bool Halide::Target::has_gpu_feature ( ) const

Is a fully feature GPU compute runtime enabled? I.e.

is Func::gpu_tile and similar going to work? Currently includes CUDA, OpenCL, Metal and D3D12Compute.

Examples
tutorial/lesson_12_using_the_gpu.cpp.

Referenced by Halide::Internal::Autoscheduler::LoopNest::is_gpu_block(), Halide::Internal::Autoscheduler::LoopNest::is_gpu_serial(), Halide::Internal::Autoscheduler::LoopNest::is_gpu_thread(), and Halide::Internal::schedule_scalar().

◆ supports_type() [1/2]

bool Halide::Target::supports_type ( const Type & t) const

Does this target allow using a certain type.

Generally all types except 64-bit float and int/uint should be supported by all backends.

It is likely better to call the version below which takes a DeviceAPI.

◆ supports_type() [2/2]

bool Halide::Target::supports_type ( const Type & t,
DeviceAPI device ) const

Does this target allow using a certain type on a certain device.

This is the prefered version of this routine.

◆ supports_device_api()

bool Halide::Target::supports_device_api ( DeviceAPI api) const

Returns whether a particular device API can be used with this Target.

◆ get_required_device_api()

DeviceAPI Halide::Target::get_required_device_api ( ) const

If this Target (including all Features) requires a specific DeviceAPI, return it.

If it doesn't, return DeviceAPI::None. If the Target has features with multiple (different) DeviceAPI requirements, the result will be an arbitrary DeviceAPI.

◆ operator==()

bool Halide::Target::operator== ( const Target & other) const
inline

Definition at line 274 of file Target.h.

References arch, bits, os, and processor_tune.

◆ operator!=()

bool Halide::Target::operator!= ( const Target & other) const
inline

Definition at line 282 of file Target.h.

◆ get_runtime_compatible_target()

bool Halide::Target::get_runtime_compatible_target ( const Target & other,
Target & result )

Create a "greatest common denominator" runtime target that is compatible with both this target and other.

Used by generators to conveniently select a suitable runtime when linking together multiple functions.

Parameters
otherThe other target from which we compute the gcd target.
[out]resultThe gcd target if we return true, otherwise unmodified. Can be the same as *this.
Returns
Whether it was possible to find a compatible target (true) or not.

◆ to_string()

std::string Halide::Target::to_string ( ) const

Convert the Target into a string form that can be reconstituted by merge_string(), which will always be of the form.

arch-bits-os-processor-feature1-feature2...featureN.

Note that is guaranteed that Target(t1.to_string()) == t1, but not that Target(s).to_string() == s (since there can be multiple strings that parse to the same Target)... unless t1 contains 'unknown' fields (in which case you'll get a string that can't be parsed, which is intentional).

Examples
tutorial/lesson_12_using_the_gpu.cpp.

Referenced by Halide::SimdOpCheckTest::compile_and_check(), and Halide::SimdOpCheckTest::test_all().

◆ natural_vector_size() [1/2]

int Halide::Target::natural_vector_size ( const Halide::Type & t) const

Given a data type, return an estimate of the "natural" vector size for that data type when compiling for this Target.

◆ natural_vector_size() [2/2]

template<typename data_t >
int Halide::Target::natural_vector_size ( ) const
inline

Given a data type, return an estimate of the "natural" vector size for that data type when compiling for this Target.

Definition at line 317 of file Target.h.

References natural_vector_size(), and Halide::type_of().

Referenced by natural_vector_size().

◆ has_large_buffers()

bool Halide::Target::has_large_buffers ( ) const
inline

Return true iff 64 bits and has_feature(LargeBuffers).

Definition at line 322 of file Target.h.

References bits, has_feature(), and LargeBuffers.

Referenced by maximum_buffer_size().

◆ maximum_buffer_size()

int64_t Halide::Target::maximum_buffer_size ( ) const
inline

Return the maximum buffer size in bytes supported on this Target.

This is 2^31 - 1 except on 64-bit targets when the LargeBuffers feature is enabled, which expands the maximum to 2^63 - 1.

Definition at line 329 of file Target.h.

References has_large_buffers().

◆ get_cuda_capability_lower_bound()

int Halide::Target::get_cuda_capability_lower_bound ( ) const

Get the minimum cuda capability found as an integer.

Returns 20 (our minimum supported cuda compute capability) if no cuda features are set.

◆ get_vulkan_capability_lower_bound()

int Halide::Target::get_vulkan_capability_lower_bound ( ) const

Get the minimum Vulkan capability found as an integer.

Returns 10 (our minimum supported Vulkan compute capability) if no Vulkan features are set.

◆ get_arm_v8_lower_bound()

int Halide::Target::get_arm_v8_lower_bound ( ) const

Get the minimum ARM v8.x capability found as an integer.

Returns -1 if no ARM v8.x features are set.

◆ supported()

bool Halide::Target::supported ( ) const

Was libHalide compiled with support for this target?

◆ get_features_bitset()

const std::bitset< FeatureEnd > & Halide::Target::get_features_bitset ( ) const
inline

Return a bitset of the Featuress set in this Target (set = 1).

Note that while this happens to be the current internal representation, that might not always be the case.

Definition at line 357 of file Target.h.

◆ feature_to_name()

static std::string Halide::Target::feature_to_name ( Target::Feature feature)
static

Return the name corresponding to a given Feature, in the form used to construct Target strings (e.g., Feature::Debug is "debug" and not "Debug").

◆ feature_from_name()

static Target::Feature Halide::Target::feature_from_name ( const std::string & name)
static

Return the feature corresponding to a given name, in the form used to construct Target strings (e.g., Feature::Debug is "debug" and not "Debug").

If the string is not a known feature name, return FeatureEnd.

Member Data Documentation

◆ os

◆ arch

◆ bits

int Halide::Target::bits = 0

The bit-width of the target machine.

Must be 0 for unknown, or 32 or 64.

Examples
tutorial/lesson_11_cross_compilation.cpp.

Definition at line 50 of file Target.h.

Referenced by Halide::SimdOpCheckTest::can_run_code(), has_large_buffers(), and operator==().

◆ vector_bits

int Halide::Target::vector_bits = 0

The bit-width of a vector register for targets where this is configurable and targeting a fixed size is desired.

The default of 0 indicates no assumption of fixed size is allowed.

Definition at line 55 of file Target.h.

◆ processor_tune

enum Halide::Target::Processor Halide::Target::processor_tune = ProcessorGeneric

Referenced by operator==().


The documentation for this struct was generated from the following file: