#include <stdio.h>
#include "Halide.h"
#include "clock.h"
#include "halide_image_io.h"
using namespace Halide::Tools;
Var x, y, c, i, ii, xo, yo, xi, yi;
class MyPipeline {
public:
Func lut, padded, padded16, sharpen, curved;
: input(in) {
lut(i) = cast<uint8_t>(
clamp(
pow(i / 255.0f, 1.2f) * 255.0f, 0, 255));
padded(x, y, c) = input(
clamp(x, 0, input.width() - 1),
clamp(y, 0, input.height() - 1), c);
padded16(x, y, c) = cast<uint16_t>(padded(x, y, c));
sharpen(x, y, c) = (padded16(x, y, c) * 2 -
(padded16(x - 1, y, c) +
padded16(x, y - 1, c) +
padded16(x + 1, y, c) +
padded16(x, y + 1, c)) /
4);
curved(x, y, c) = lut(sharpen(x, y, c));
}
void schedule_for_cpu() {
curved.
split(y, yo, yi, 16)
}
bool schedule_for_gpu() {
Target target = find_gpu_target();
return false;
}
lut.
split(i, block, thread, 16);
curved.
gpu_tile(x, y, xo, yo, xi, yi, 8, 8);
printf(
"Target: %s\n", target.
to_string().c_str());
return true;
}
void test_performance() {
double best_time = 0.0;
for (int i = 0; i < 3; i++) {
double t1 = current_time();
for (int j = 0; j < 100; j++) {
}
output.copy_to_host();
double t2 = current_time();
double elapsed = (t2 - t1) / 100;
if (i == 0 || elapsed < best_time) {
best_time = elapsed;
}
}
printf("%1.4f milliseconds\n", best_time);
}
curved.
realize({input.width(), input.height(), input.channels()});
for (int c = 0; c < input.channels(); c++) {
for (int y = 0; y < input.height(); y++) {
for (int x = 0; x < input.width(); x++) {
if (output(x, y, c) != reference_output(x, y, c)) {
printf("Mismatch between output (%d) and "
"reference output (%d) at %d, %d, %d\n",
output(x, y, c),
reference_output(x, y, c),
x, y, c);
exit(1);
}
}
}
}
}
};
int main(int argc, char **argv) {
Buffer<uint8_t> reference_output(input.width(), input.height(), input.channels());
printf("Running pipeline on CPU:\n");
MyPipeline p1(input);
p1.schedule_for_cpu();
p1.curved.realize(reference_output);
printf("Running pipeline on GPU:\n");
MyPipeline p2(input);
bool has_gpu_target = p2.schedule_for_gpu();
if (has_gpu_target) {
printf("Testing GPU correctness:\n");
p2.test_correctness(reference_output);
} else {
printf("No GPU target available on the host\n");
}
printf("Testing performance on CPU:\n");
p1.test_performance();
if (has_gpu_target) {
printf("Testing performance on GPU:\n");
p2.test_performance();
}
return 0;
}
std::vector<Target::Feature> features_to_try;
if (sizeof(void*) == 8) {
}
} else {
}
return new_target;
}
}
printf("Requested GPU(s) are not supported. (Do you have the proper hardware and/or driver installed?)\n");
return target;
}