Halide
tutorial/lesson_11_cross_compilation.cpp
// Halide tutorial lesson 11: Cross-compilation
// This lesson demonstrates how to use Halide as a cross-compiler to
// generate code for any platform from any platform.
// On linux, you can compile and run it like so:
// g++ lesson_11*.cpp -g -std=c++17 -I <path/to/Halide.h> -L <path/to/libHalide.so> -lHalide -lpthread -ldl -o lesson_11
// LD_LIBRARY_PATH=<path/to/libHalide.so> ./lesson_11
// On os x:
// g++ lesson_11*.cpp -g -std=c++17 -I <path/to/Halide.h> -L <path/to/libHalide.so> -lHalide -o lesson_11
// DYLD_LIBRARY_PATH=<path/to/libHalide.dylib> ./lesson_11
// If you have the entire Halide source tree, you can also build it by
// running:
// make tutorial_lesson_11_cross_compilation
// in a shell with the current directory at the top of the halide
// source tree.
#include "Halide.h"
#include <stdio.h>
using namespace Halide;
int main(int argc, char **argv) {
// We'll define the simple one-stage pipeline that we used in lesson 10.
Func brighter;
Var x, y;
// Declare the arguments.
ImageParam input(type_of<uint8_t>(), 2);
std::vector<Argument> args(2);
args[0] = input;
args[1] = offset;
// Define the Func.
brighter(x, y) = input(x, y) + offset;
// Schedule it.
brighter.vectorize(x, 16).parallel(y);
// The following line is what we did in lesson 10. It compiles an
// object file suitable for the system that you're running this
// program on. For example, if you compile and run this file on
// 64-bit linux on an x86 cpu with sse4.1, then the generated code
// will be suitable for 64-bit linux on x86 with sse4.1.
brighter.compile_to_file("lesson_11_host", args, "brighter");
// We can also compile object files suitable for other cpus and
// operating systems. You do this with an optional third argument
// to compile_to_file which specifies the target to compile for.
// Let's use this to compile a 32-bit arm android version of this code:
Target target;
target.os = Target::Android; // The operating system
target.arch = Target::ARM; // The CPU architecture
target.bits = 32; // The bit-width of the architecture
std::vector<Target::Feature> arm_features; // A list of features to set
target.set_features(arm_features);
// We then pass the target as the last argument to compile_to_file.
brighter.compile_to_file("lesson_11_arm_32_android", args, "brighter", target);
// And now a Windows object file for 64-bit x86 with AVX and SSE 4.1:
target.os = Target::Windows;
target.arch = Target::X86;
target.bits = 64;
std::vector<Target::Feature> x86_features;
x86_features.push_back(Target::AVX);
x86_features.push_back(Target::SSE41);
target.set_features(x86_features);
brighter.compile_to_file("lesson_11_x86_64_windows", args, "brighter", target);
// And finally an iOS mach-o object file for one of Apple's 32-bit
// ARM processors - the A6. It's used in the iPhone 5. The A6 uses
// a slightly modified ARM architecture called ARMv7s. We specify
// this using the target features field. Support for Apple's
// 64-bit ARM processors is very new in llvm, and still somewhat
// flaky.
target.os = Target::IOS;
target.arch = Target::ARM;
target.bits = 32;
std::vector<Target::Feature> armv7s_features;
armv7s_features.push_back(Target::ARMv7s);
target.set_features(armv7s_features);
brighter.compile_to_file("lesson_11_arm_32_ios", args, "brighter", target);
// Now let's check these files are what they claim, by examining
// their first few bytes.
// 32-arm android object files start with the magic bytes:
uint8_t arm_32_android_magic[] = {0x7f, 'E', 'L', 'F', // ELF format
1, // 32-bit
1, // 2's complement little-endian
1}; // Current version of elf
FILE *f = fopen("lesson_11_arm_32_android.o", "rb");
uint8_t header[32];
if (!f || fread(header, 32, 1, f) != 1) {
printf("Object file not generated\n");
return -1;
}
fclose(f);
if (memcmp(header, arm_32_android_magic, sizeof(arm_32_android_magic))) {
printf("Unexpected header bytes in 32-bit arm object file.\n");
return -1;
}
// 64-bit windows object files start with the magic 16-bit value 0x8664
// (presumably referring to x86-64)
uint8_t win_64_magic[] = {0x64, 0x86};
f = fopen("lesson_11_x86_64_windows.obj", "rb");
if (!f || fread(header, 32, 1, f) != 1) {
printf("Object file not generated\n");
return -1;
}
fclose(f);
if (memcmp(header, win_64_magic, sizeof(win_64_magic))) {
printf("Unexpected header bytes in 64-bit windows object file.\n");
return -1;
}
// 32-bit arm iOS mach-o files start with the following magic bytes:
uint32_t arm_32_ios_magic[] = {0xfeedface, // Mach-o magic bytes
12, // CPU type is ARM
11, // CPU subtype is ARMv7s
1}; // It's a relocatable object file.
f = fopen("lesson_11_arm_32_ios.o", "rb");
if (!f || fread(header, 32, 1, f) != 1) {
printf("Object file not generated\n");
return -1;
}
fclose(f);
if (memcmp(header, arm_32_ios_magic, sizeof(arm_32_ios_magic))) {
printf("Unexpected header bytes in 32-bit arm ios object file.\n");
return -1;
}
// It looks like the object files we produced are plausible for
// those targets. We'll count that as a success for the purposes
// of this tutorial. For a real application you'd then need to
// figure out how to integrate Halide into your cross-compilation
// toolchain. There are several small examples of this in the
// Halide repository under the apps folder. See HelloAndroid and
// HelloiOS here:
// https://github.com/halide/Halide/tree/main/apps/
printf("Success!\n");
return 0;
}
Halide::Target::Android
@ Android
Definition: Target.h:28
Halide::Var
A Halide variable, to be used when defining functions.
Definition: Var.h:19
Halide::Target::AVX
@ AVX
Definition: Target.h:88
uint8_t
unsigned __INT8_TYPE__ uint8_t
Definition: runtime_internal.h:29
memcmp
int memcmp(const void *s1, const void *s2, size_t n)
Halide::ImageParam
An Image parameter to a halide pipeline.
Definition: ImageParam.h:23
Halide::Func::vectorize
Func & vectorize(const VarOrRVar &var)
Mark a dimension to be computed all-at-once as a single vector.
Halide::Target::bits
int bits
The bit-width of the target machine.
Definition: Target.h:50
Halide::Target::SSE41
@ SSE41
Definition: Target.h:87
Halide
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
Definition: AbstractGenerator.h:19
Halide::Target::Windows
@ Windows
Definition: Target.h:26
Halide::Target::os
enum Halide::Target::OS os
Halide::Target::X86
@ X86
Definition: Target.h:41
Halide::Func::compile_to_file
void compile_to_file(const std::string &filename_prefix, const std::vector< Argument > &args, const std::string &fn_name="", const Target &target=get_target_from_environment())
Compile to object file and header pair, with the given arguments.
Halide::Func::parallel
Func & parallel(const VarOrRVar &var)
Mark a dimension to be traversed in parallel.
Halide::Func
A halide function.
Definition: Func.h:687
Halide::Target::set_features
void set_features(const std::vector< Feature > &features_to_set, bool value=true)
Halide::Target::IOS
@ IOS
Definition: Target.h:29
fclose
int fclose(void *)
Halide::Target::arch
enum Halide::Target::Arch arch
uint32_t
unsigned __INT32_TYPE__ uint32_t
Definition: runtime_internal.h:25
Halide::Target::ARMv7s
@ ARMv7s
Definition: Target.h:93
Halide::Target::ARM
@ ARM
Definition: Target.h:42
Halide::Param
A scalar parameter to a halide pipeline.
Definition: Param.h:22
Halide::Target
A struct representing a target machine and os to generate code for.
Definition: Target.h:19