#include "Halide.h"
#include <stdio.h>
#ifdef __SSE2__
#include <emmintrin.h>
#endif
#include "clock.h"
#include "halide_image_io.h"
using namespace Halide::Tools;
int main(int argc, char **argv) {
{
f(x, y) = x + y;
f(3, 7) = 42;
f(x, y) = f(x, y) + 17;
f(x, 3) = f(x, 0) * f(x, 10);
f(0, y) = f(0, y) / f(3, y);
f(x, 17) = x + 8;
f(0, y) = y * 8;
f(x, x + 1) = x + 8;
f(y / 2, y) = f(0, y) * 17;
g(x, y) = x + y;
g(2, 1) = 42;
g(x, 0) = g(x, 1);
int result[4][4];
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 4; x++) {
result[y][x] = x + y;
}
}
result[1][2] = 42;
for (int x = 0; x < 4; x++) {
result[0][x] = result[1][x];
}
}
{
f(x, y) = (x + y) / 100.0f;
f(x, r) = f(x, r) * f(x, r);
float c_result[100][100];
for (int y = 0; y < 100; y++) {
for (int x = 0; x < 100; x++) {
c_result[y][x] = (x + y) / 100.0f;
}
}
for (int x = 0; x < 100; x++) {
for (int r = 0; r < 50; r++) {
c_result[r][x] = c_result[r][x] * c_result[r][x];
}
}
for (int y = 0; y < 100; y++) {
for (int x = 0; x < 100; x++) {
if (fabs(halide_result(x, y) - c_result[y][x]) > 0.01f) {
printf("halide_result(%d, %d) = %f instead of %f\n",
x, y, halide_result(x, y), c_result[y][x]);
return -1;
}
}
}
}
{
Func histogram(
"histogram");
histogram(x) = 0;
RDom r(0, input.width(), 0, input.height());
histogram(input(r.
x, r.
y)) += 1;
int c_result[256];
for (int x = 0; x < 256; x++) {
c_result[x] = 0;
}
for (int r_y = 0; r_y < input.height(); r_y++) {
for (int r_x = 0; r_x < input.width(); r_x++) {
c_result[input(r_x, r_y)] += 1;
}
}
for (int x = 0; x < 256; x++) {
if (c_result[x] != halide_result(x)) {
printf("halide_result(%d) = %d instead of %d\n",
x, halide_result(x), c_result[x]);
return -1;
}
}
}
{
f(x, y) = x * y;
f(x, 0) = f(x, 8);
f(0, y) = f(8, y) + 2;
f.
update(1).
split(y, yo, yi, 4).parallel(yo);
int c_result[16][16];
for (int y = 0; y < 16; y++) {
for (int x_vec = 0; x_vec < 4; x_vec++) {
int x[] = {x_vec * 4, x_vec * 4 + 1, x_vec * 4 + 2, x_vec * 4 + 3};
c_result[y][x[0]] = x[0] * y;
c_result[y][x[1]] = x[1] * y;
c_result[y][x[2]] = x[2] * y;
c_result[y][x[3]] = x[3] * y;
}
}
for (int x_vec = 0; x_vec < 4; x_vec++) {
int x[] = {x_vec * 4, x_vec * 4 + 1, x_vec * 4 + 2, x_vec * 4 + 3};
c_result[0][x[0]] = c_result[8][x[0]];
c_result[0][x[1]] = c_result[8][x[1]];
c_result[0][x[2]] = c_result[8][x[2]];
c_result[0][x[3]] = c_result[8][x[3]];
}
for (int yo = 0; yo < 4; yo++) {
for (int yi = 0; yi < 4; yi++) {
int y = yo * 4 + yi;
c_result[y][0] = c_result[y][8] + 2;
}
}
for (int y = 0; y < 16; y++) {
for (int x = 0; x < 16; x++) {
if (halide_result(x, y) != c_result[y][x]) {
printf("halide_result(%d, %d) = %d instead of %d\n",
x, y, halide_result(x, y), c_result[y][x]);
return -1;
}
}
}
}
{
producer(x) = x * 2;
producer(x) += 10;
consumer(x) = 2 * producer(x);
int c_result[10];
for (int x = 0; x < 10; x++) {
int producer_storage[1];
producer_storage[0] = x * 2;
producer_storage[0] = producer_storage[0] + 10;
c_result[x] = 2 * producer_storage[0];
}
for (int x = 0; x < 10; x++) {
if (halide_result(x) != c_result[x]) {
printf("halide_result(%d) = %d instead of %d\n",
x, halide_result(x), c_result[x]);
return -1;
}
}
}
{
producer(x) = x * 17;
consumer(x) = 2 * producer(x);
consumer(x) += 50;
int c_result[10];
for (int x = 0; x < 10; x++) {
int producer_storage[1];
producer_storage[0] = x * 17;
c_result[x] = 2 * producer_storage[0];
}
for (int x = 0; x < 10; x++) {
c_result[x] += 50;
}
for (int x = 0; x < 10; x++) {
if (halide_result(x) != c_result[x]) {
printf("halide_result(%d) = %d instead of %d\n",
x, halide_result(x), c_result[x]);
return -1;
}
}
}
{
producer(x) = x * 17;
consumer(x) = 100 - x * 10;
consumer(x) += producer(x);
int c_result[10];
for (int x = 0; x < 10; x++) {
c_result[x] = 100 - x * 10;
}
for (int x = 0; x < 10; x++) {
int producer_storage[1];
producer_storage[0] = x * 17;
c_result[x] += producer_storage[0];
}
for (int x = 0; x < 10; x++) {
if (halide_result(x) != c_result[x]) {
printf("halide_result(%d) = %d instead of %d\n",
x, halide_result(x), c_result[x]);
return -1;
}
}
}
{
producer(x) = x * 17;
consumer(x) = 170 - producer(x);
consumer(x) += producer(x) / 2;
int c_result[10];
for (int x = 0; x < 10; x++) {
int producer_storage[1];
producer_storage[0] = x * 17;
c_result[x] = 170 - producer_storage[0];
}
for (int x = 0; x < 10; x++) {
int producer_storage[1];
producer_storage[0] = x * 17;
c_result[x] += producer_storage[0] / 2;
}
for (int x = 0; x < 10; x++) {
if (halide_result(x) != c_result[x]) {
printf("halide_result(%d) = %d instead of %d\n",
x, halide_result(x), c_result[x]);
return -1;
}
}
}
{
producer(x, y) = (x * y) / 10 + 8;
consumer(x, y) = x + y;
consumer(x, 0) += producer(x, x);
consumer(0, y) += producer(y, 9 - y);
Func producer_1, producer_2, consumer_2;
producer_1(x, y) = producer(x, y);
producer_2(x, y) = producer(x, y);
consumer_2(x, y) = x + y;
consumer_2(x, 0) += producer_1(x, x);
consumer_2(0, y) += producer_2(y, 9 - y);
int c_result[10][10];
for (int y = 0; y < 10; y++) {
for (int x = 0; x < 10; x++) {
c_result[y][x] = x + y;
}
}
for (int x = 0; x < 10; x++) {
int producer_1_storage[1];
producer_1_storage[0] = (x * x) / 10 + 8;
c_result[0][x] += producer_1_storage[0];
}
for (int y = 0; y < 10; y++) {
int producer_2_storage[1];
producer_2_storage[0] = (y * (9 - y)) / 10 + 8;
c_result[y][0] += producer_2_storage[0];
}
for (int y = 0; y < 10; y++) {
for (int x = 0; x < 10; x++) {
if (halide_result(x, y) != c_result[y][x]) {
printf("halide_result(%d, %d) = %d instead of %d\n",
x, y, halide_result(x, y), c_result[y][x]);
return -1;
}
}
}
}
{
producer(x) = x % 8;
consumer(x) = x + 10;
consumer(x) += r + producer(x + r);
int c_result[10];
for (int x = 0; x < 10; x++) {
c_result[x] = x + 10;
}
for (int x = 0; x < 10; x++) {
for (int r = 0; r < 5; r++) {
int producer_storage[1];
producer_storage[0] = (x + r) % 8;
c_result[x] += r + producer_storage[0];
}
}
for (int x = 0; x < 10; x++) {
if (halide_result(x) != c_result[x]) {
printf("halide_result(%d) = %d instead of %d\n",
x, halide_result(x), c_result[x]);
return -1;
}
}
}
{
Func clamped = BoundaryConditions::repeat_edge(input);
local_sum(x, y) = 0;
local_sum(x, y) += clamped(x + r.
x, y + r.
y);
blurry(x, y) = cast<uint8_t>(local_sum(x, y) / 25);
for (int y = 0; y < input.height(); y++) {
for (int x = 0; x < input.width(); x++) {
int local_sum[1];
local_sum[0] = 0;
for (int r_y = -2; r_y <= 2; r_y++) {
for (int r_x = -2; r_x <= 2; r_x++) {
int clamped_x = std::min(std::max(x + r_x, 0), input.width() - 1);
int clamped_y = std::min(std::max(y + r_y, 0), input.height() - 1);
local_sum[0] += input(clamped_x, clamped_y);
}
}
c_result(x, y) = (
uint8_t)(local_sum[0] / 25);
}
}
for (int y = 0; y < input.height(); y++) {
for (int x = 0; x < input.width(); x++) {
if (halide_result(x, y) != c_result(x, y)) {
printf("halide_result(%d, %d) = %d instead of %d\n",
x, y, halide_result(x, y), c_result(x, y));
return -1;
}
}
}
}
{
f1(x) = sum(r + x) * 7;
anon(x) = 0;
anon(x) += r + x;
f2(x) = anon(x) * 7;
int c_result[10];
for (int x = 0; x < 10; x++) {
int anon[1];
anon[0] = 0;
for (int r = 0; r < 100; r++) {
anon[0] += r + x;
}
c_result[x] = anon[0] * 7;
}
for (int x = 0; x < 10; x++) {
if (halide_result_1(x) != c_result[x]) {
printf("halide_result_1(%d) = %d instead of %d\n",
x, halide_result_1(x), c_result[x]);
return -1;
}
if (halide_result_2(x) != c_result[x]) {
printf("halide_result_2(%d) = %d instead of %d\n",
x, halide_result_2(x), c_result[x]);
return -1;
}
}
}
{
Expr x_clamped = clamp(x, 0, input.width() - 1);
Expr y_clamped = clamp(y, 0, input.height() - 1);
clamped(x, y) = input(x_clamped, y_clamped);
spread(x, y) = (maximum(clamped(x + box.
x, y + box.
y)) -
minimum(clamped(x + box.
x, y + box.
y)));
#ifdef __SSE2__
#ifdef _OPENMP
double t1 = current_time();
#endif
for (int iters = 0; iters < 100; iters++) {
#pragma omp parallel for
for (int yo = 0; yo < (input.height() + 31) / 32; yo++) {
int y_base = std::min(yo * 32, input.height() - 32);
int clamped_width = input.width() + 4;
for (int yi = 0; yi < 32; yi++) {
int y = y_base + yi;
uint8_t *output_row = &c_result(0, y);
int min_y_clamped = (yi == 0) ? (y - 2) : (y + 2);
int max_y_clamped = (y + 2);
for (int cy = min_y_clamped; cy <= max_y_clamped; cy++) {
clamped_storage + (cy & 7) * clamped_width;
int clamped_y = std::min(std::max(cy, 0), input.height() - 1);
uint8_t *input_row = &input(0, clamped_y);
for (int x = -2; x < input.width() + 2; x++) {
int clamped_x = std::min(std::max(x, 0), input.width() - 1);
*clamped_row++ = input_row[clamped_x];
}
}
for (int x_vec = 0; x_vec < (input.width() + 15) / 16; x_vec++) {
int x_base = std::min(x_vec * 16, input.width() - 16);
__m128i minimum_storage, maximum_storage;
maximum_storage = _mm_setzero_si128();
for (int max_y = y - 2; max_y <= y + 2; max_y++) {
clamped_storage + (max_y & 7) * clamped_width;
for (int max_x = x_base - 2; max_x <= x_base + 2; max_x++) {
__m128i v = _mm_loadu_si128(
(__m128i const *)(clamped_row + max_x + 2));
maximum_storage = _mm_max_epu8(maximum_storage, v);
}
}
minimum_storage = _mm_cmpeq_epi32(_mm_setzero_si128(),
_mm_setzero_si128());
for (int min_y = y - 2; min_y <= y + 2; min_y++) {
clamped_storage + (min_y & 7) * clamped_width;
for (int min_x = x_base - 2; min_x <= x_base + 2; min_x++) {
__m128i v = _mm_loadu_si128(
(__m128i const *)(clamped_row + min_x + 2));
minimum_storage = _mm_min_epu8(minimum_storage, v);
}
}
__m128i spread = _mm_sub_epi8(maximum_storage, minimum_storage);
_mm_storeu_si128((__m128i *)(output_row + x_base), spread);
}
}
}
}
#ifdef _OPENMP
double t2 = current_time();
for (int iters = 0; iters < 100; iters++) {
}
double t3 = current_time();
printf("Halide spread took %f ms. C equivalent took %f ms\n",
(t3 - t2) / 100, (t2 - t1) / 100);
#endif
for (int y = 0; y < input.height(); y++) {
for (int x = 0; x < input.width(); x++) {
if (halide_result(x, y) != c_result(x, y)) {
printf("halide_result(%d, %d) = %d instead of %d\n",
x, y, halide_result(x, y), c_result(x, y));
return -1;
}
}
}
#endif
}
printf("Success!\n");
return 0;
}
A Halide::Buffer is a named shared reference to a Halide::Runtime::Buffer.
Func & trace_stores()
Trace all stores to the buffer backing this Func by emitting calls to halide_trace.
Func & trace_loads()
Trace all loads from this Func by emitting calls to halide_trace.
Stage update(int idx=0)
Get a handle on an update step for the purposes of scheduling it.
Func & split(const VarOrRVar &old, const VarOrRVar &outer, const VarOrRVar &inner, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Split a dimension into inner and outer subdimensions with the given names, where the inner dimension ...
Func & store_at(const Func &f, const Var &var)
Allocate storage for this function within f's loop over var.
Realization realize(std::vector< int32_t > sizes={}, const Target &target=Target())
Evaluate this function over some rectangular domain and return the resulting buffer or buffers.
Func & parallel(const VarOrRVar &var)
Mark a dimension to be traversed in parallel.
Func & vectorize(const VarOrRVar &var)
Mark a dimension to be computed all-at-once as a single vector.
Func & compute_at(const Func &f, const Var &var)
Compute this function as needed for each unique value of the given var for the given calling function...
A multi-dimensional domain over which to iterate.
RVar x
Direct access to the first four dimensions of the reduction domain.
Stage & vectorize(const VarOrRVar &var)
A Halide variable, to be used when defining functions.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
unsigned __INT8_TYPE__ uint8_t
A fragment of Halide syntax.