#include "Halide.h"
#include <algorithm>
#include <stdio.h>
int main(int argc, char **argv) {
{
Func gradient(
"gradient");
gradient(x, y) = x + y;
printf("Evaluating gradient row-major\n");
printf("Equivalent C:\n");
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 4; x++) {
printf("Evaluating at x = %d, y = %d: %d\n", x, y, x + y);
}
}
printf("\n\n");
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func gradient(
"gradient_col_major");
gradient(x, y) = x + y;
printf("Evaluating gradient column-major\n");
printf("Equivalent C:\n");
for (int x = 0; x < 4; x++) {
for (int y = 0; y < 4; y++) {
printf("Evaluating at x = %d, y = %d: %d\n", x, y, x + y);
}
}
printf("\n");
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func gradient(
"gradient_split");
gradient(x, y) = x + y;
gradient.
split(x, x_outer, x_inner, 2);
printf("Evaluating gradient with x split into x_outer and x_inner \n");
printf("Equivalent C:\n");
for (int y = 0; y < 4; y++) {
for (int x_outer = 0; x_outer < 2; x_outer++) {
for (int x_inner = 0; x_inner < 2; x_inner++) {
int x = x_outer * 2 + x_inner;
printf("Evaluating at x = %d, y = %d: %d\n", x, y, x + y);
}
}
}
printf("\n");
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func gradient(
"gradient_fused");
gradient(x, y) = x + y;
gradient.
fuse(x, y, fused);
printf("Evaluating gradient with x and y fused\n");
printf("Equivalent C:\n");
for (int fused = 0; fused < 4 * 4; fused++) {
int y = fused / 4;
int x = fused % 4;
printf("Evaluating at x = %d, y = %d: %d\n", x, y, x + y);
}
printf("\n");
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func gradient(
"gradient_tiled");
gradient(x, y) = x + y;
Var x_outer, x_inner, y_outer, y_inner;
gradient.
split(x, x_outer, x_inner, 4);
gradient.
split(y, y_outer, y_inner, 4);
gradient.
reorder(x_inner, y_inner, x_outer, y_outer);
printf("Evaluating gradient in 4x4 tiles\n");
printf("Equivalent C:\n");
for (int y_outer = 0; y_outer < 2; y_outer++) {
for (int x_outer = 0; x_outer < 2; x_outer++) {
for (int y_inner = 0; y_inner < 4; y_inner++) {
for (int x_inner = 0; x_inner < 4; x_inner++) {
int x = x_outer * 4 + x_inner;
int y = y_outer * 4 + y_inner;
printf("Evaluating at x = %d, y = %d: %d\n", x, y, x + y);
}
}
}
}
printf("\n");
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func gradient(
"gradient_in_vectors");
gradient(x, y) = x + y;
gradient.
split(x, x_outer, x_inner, 4);
printf("Evaluating gradient with x_inner vectorized \n");
printf("Equivalent C:\n");
for (int y = 0; y < 4; y++) {
for (int x_outer = 0; x_outer < 2; x_outer++) {
int x_vec[] = {x_outer * 4 + 0,
x_outer * 4 + 1,
x_outer * 4 + 2,
x_outer * 4 + 3};
int val[] = {x_vec[0] + y,
x_vec[1] + y,
x_vec[2] + y,
x_vec[3] + y};
printf("Evaluating at <%d, %d, %d, %d>, <%d, %d, %d, %d>:"
" <%d, %d, %d, %d>\n",
x_vec[0], x_vec[1], x_vec[2], x_vec[3],
y, y, y, y,
val[0], val[1], val[2], val[3]);
}
}
printf("\n");
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func gradient(
"gradient_unroll");
gradient(x, y) = x + y;
gradient.
split(x, x_outer, x_inner, 2);
printf("Evaluating gradient unrolled by a factor of two\n");
printf("Equivalent C:\n");
for (int y = 0; y < 4; y++) {
for (int x_outer = 0; x_outer < 2; x_outer++) {
{
int x_inner = 0;
int x = x_outer * 2 + x_inner;
printf("Evaluating at x = %d, y = %d: %d\n", x, y, x + y);
}
{
int x_inner = 1;
int x = x_outer * 2 + x_inner;
printf("Evaluating at x = %d, y = %d: %d\n", x, y, x + y);
}
}
}
printf("\n");
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func gradient(
"gradient_split_7x2");
gradient(x, y) = x + y;
gradient.
split(x, x_outer, x_inner, 3);
printf("Evaluating gradient over a 7x2 box with x split by three \n");
printf("Equivalent C:\n");
for (int y = 0; y < 2; y++) {
for (int x_outer = 0; x_outer < 3; x_outer++) {
for (int x_inner = 0; x_inner < 3; x_inner++) {
int x = x_outer * 3;
if (x > 4) x = 4;
x += x_inner;
printf("Evaluating at x = %d, y = %d: %d\n", x, y, x + y);
}
}
}
printf("\n");
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func gradient(
"gradient_fused_tiles");
gradient(x, y) = x + y;
Var x_outer, y_outer, x_inner, y_inner, tile_index;
gradient.
tile(x, y, x_outer, y_outer, x_inner, y_inner, 4, 4);
gradient.
fuse(x_outer, y_outer, tile_index);
printf("Evaluating gradient tiles in parallel\n");
printf("Equivalent (serial) C:\n");
for (int tile_index = 0; tile_index < 4; tile_index++) {
int y_outer = tile_index / 2;
int x_outer = tile_index % 2;
for (int y_inner = 0; y_inner < 4; y_inner++) {
for (int x_inner = 0; x_inner < 4; x_inner++) {
int y = y_outer * 4 + y_inner;
int x = x_outer * 4 + x_inner;
printf("Evaluating at x = %d, y = %d: %d\n", x, y, x + y);
}
}
}
printf("\n");
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func gradient_fast(
"gradient_fast");
gradient_fast(x, y) = x + y;
Var x_outer, y_outer, x_inner, y_inner, tile_index;
gradient_fast
.
tile(x, y, x_outer, y_outer, x_inner, y_inner, 64, 64)
.
fuse(x_outer, y_outer, tile_index)
Var x_inner_outer, y_inner_outer, x_vectors, y_pairs;
gradient_fast
.
tile(x_inner, y_inner, x_inner_outer, y_inner_outer, x_vectors, y_pairs, 4, 2)
printf("Checking Halide result against equivalent C...\n");
for (int tile_index = 0; tile_index < 6 * 4; tile_index++) {
int y_outer = tile_index / 4;
int x_outer = tile_index % 4;
for (int y_inner_outer = 0; y_inner_outer < 64 / 2; y_inner_outer++) {
for (int x_inner_outer = 0; x_inner_outer < 64 / 4; x_inner_outer++) {
int x = std::min(x_outer * 64, 350 - 64) + x_inner_outer * 4;
int x_vec[4] = {x + 0,
x + 1,
x + 2,
x + 3};
int y_base = std::min(y_outer * 64, 250 - 64) + y_inner_outer * 2;
{
int y = y_base + 0;
int y_vec[4] = {y, y, y, y};
int val[4] = {x_vec[0] + y_vec[0],
x_vec[1] + y_vec[1],
x_vec[2] + y_vec[2],
x_vec[3] + y_vec[3]};
for (int i = 0; i < 4; i++) {
if (result(x_vec[i], y_vec[i]) != val[i]) {
printf("There was an error at %d %d!\n",
x_vec[i], y_vec[i]);
return -1;
}
}
}
{
int y = y_base + 1;
int y_vec[4] = {y, y, y, y};
int val[4] = {x_vec[0] + y_vec[0],
x_vec[1] + y_vec[1],
x_vec[2] + y_vec[2],
x_vec[3] + y_vec[3]};
for (int i = 0; i < 4; i++) {
if (result(x_vec[i], y_vec[i]) != val[i]) {
printf("There was an error at %d %d!\n",
x_vec[i], y_vec[i]);
return -1;
}
}
}
}
}
}
printf("\n");
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
printf("Success!\n");
return 0;
}
A Halide::Buffer is a named shared reference to a Halide::Runtime::Buffer.
void print_loop_nest()
Write out the loop nests specified by the schedule for this Function.
Func & trace_stores()
Trace all stores to the buffer backing this Func by emitting calls to halide_trace.
Func & reorder(const std::vector< VarOrRVar > &vars)
Reorder variables to have the given nesting order, from innermost out.
Func & split(const VarOrRVar &old, const VarOrRVar &outer, const VarOrRVar &inner, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Split a dimension into inner and outer subdimensions with the given names, where the inner dimension ...
Func & tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xo, const VarOrRVar &yo, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto)
Split two dimensions at once by the given factors, and then reorder the resulting dimensions to be xi...
Func & unroll(const VarOrRVar &var)
Mark a dimension to be completely unrolled.
Realization realize(std::vector< int32_t > sizes={}, const Target &target=Target())
Evaluate this function over some rectangular domain and return the resulting buffer or buffers.
Func & parallel(const VarOrRVar &var)
Mark a dimension to be traversed in parallel.
Func & vectorize(const VarOrRVar &var)
Mark a dimension to be computed all-at-once as a single vector.
Func & fuse(const VarOrRVar &inner, const VarOrRVar &outer, const VarOrRVar &fused)
Join two dimensions into a single fused dimension.
A Halide variable, to be used when defining functions.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...