Halide 19.0.0
Halide compiler and libraries
Loading...
Searching...
No Matches
simd_op_check.h
Go to the documentation of this file.
1#ifndef SIMD_OP_CHECK_H
2#define SIMD_OP_CHECK_H
3
4#include "Halide.h"
5#include "halide_test_dirs.h"
6#include "halide_thread_pool.h"
7#include "test_sharding.h"
8
9#include <fstream>
10#include <iostream>
11
12namespace {
13
14using namespace Halide;
15
16// Some exprs of each type to use in checked expressions. These will turn
17// into loads to thread-local image params.
18Expr input(const Type &t, const Expr &arg) {
19 return Internal::Call::make(t, "input", {arg}, Internal::Call::Extern);
20}
21Expr in_f16(const Expr &arg) {
22 return input(Float(16), arg);
23}
24Expr in_bf16(const Expr &arg) {
25 return input(BFloat(16), arg);
26}
27Expr in_f32(const Expr &arg) {
28 return input(Float(32), arg);
29}
30Expr in_f64(const Expr &arg) {
31 return input(Float(64), arg);
32}
33Expr in_i8(const Expr &arg) {
34 return input(Int(8), arg);
35}
36Expr in_i16(const Expr &arg) {
37 return input(Int(16), arg);
38}
39Expr in_i32(const Expr &arg) {
40 return input(Int(32), arg);
41}
42Expr in_i64(const Expr &arg) {
43 return input(Int(64), arg);
44}
45Expr in_u8(const Expr &arg) {
46 return input(UInt(8), arg);
47}
48Expr in_u16(const Expr &arg) {
49 return input(UInt(16), arg);
50}
51Expr in_u32(const Expr &arg) {
52 return input(UInt(32), arg);
53}
54Expr in_u64(const Expr &arg) {
55 return input(UInt(64), arg);
56}
57} // namespace
58
59namespace Halide {
60struct TestResult {
61 std::string op;
62 std::string error_msg;
63};
64
65struct Task {
66 std::string op;
67 std::string name;
70};
71
73public:
74 static constexpr int max_i8 = 127;
75 static constexpr int max_i16 = 32767;
76 static constexpr int max_i32 = 0x7fffffff;
77 static constexpr int max_u8 = 255;
78 static constexpr int max_u16 = 65535;
79 const Expr max_u32 = UInt(32).max();
80
81 std::string filter{"*"};
83 std::vector<Task> tasks;
84
86
87 int W;
88 int H;
89
91
93
94 SimdOpCheckTest(const Target t, int w, int h)
95 : target(t), W(w), H(h), rng_seed(0) {
98 .with_feature(Target::NoAsserts)
99 .with_feature(Target::NoRuntime);
100 }
101 virtual ~SimdOpCheckTest() = default;
102
103 void set_seed(int seed) {
104 rng_seed = seed;
105 }
106
107 virtual bool can_run_code() const {
110 }
111 // If we can (target matches host), run the error checking Halide::Func.
112 Target host_target = get_host_target();
113 bool can_run_the_code =
114 (target.arch == host_target.arch &&
115 target.bits == host_target.bits &&
116 target.os == host_target.os);
117 // A bunch of feature flags also need to match between the
118 // compiled code and the host in order to run the code.
119 for (Target::Feature f : {
150 }) {
151 if (target.has_feature(f) != host_target.has_feature(f)) {
152 can_run_the_code = false;
153 }
154 }
155 return can_run_the_code;
156 }
157
158 virtual void compile_and_check(Func error,
159 const std::string &op,
160 const std::string &name,
161 int vector_width,
162 const std::vector<Argument> &arg_types,
163 std::ostringstream &error_msg) {
164 std::string fn_name = "test_" + name;
165 std::string file_name = output_directory + fn_name;
166
168 std::map<OutputFileType, std::string> outputs = {
169 {OutputFileType::c_header, file_name + ext.at(OutputFileType::c_header).extension},
170 {OutputFileType::object, file_name + ext.at(OutputFileType::object).extension},
171 {OutputFileType::assembly, file_name + ".s"},
172 };
173 error.compile_to(outputs, arg_types, fn_name, target);
174
175 std::ifstream asm_file;
176 asm_file.open(file_name + ".s");
177
178 bool found_it = false;
179
180 std::ostringstream msg;
181 msg << op << " did not generate for target=" << get_run_target().to_string() << " vector_width=" << vector_width << ". Instead we got:\n";
182
183 std::string line;
184 while (getline(asm_file, line)) {
185 msg << line << "\n";
186
187 // Check for the op in question
188 found_it |= wildcard_search(op, line) && !wildcard_search("_" + op, line);
189 }
190
191 if (!found_it) {
192 error_msg << "Failed: " << msg.str() << "\n";
193 }
194
195 asm_file.close();
196 }
197
198 // Check if pattern p matches str, allowing for wildcards (*).
199 bool wildcard_match(const char *p, const char *str) const {
200 // Match all non-wildcard characters.
201 while (*p && *str && *p == *str && *p != '*') {
202 str++;
203 p++;
204 }
205
206 if (!*p) {
207 return *str == 0;
208 } else if (*p == '*') {
209 p++;
210 do {
211 if (wildcard_match(p, str)) {
212 return true;
213 }
214 } while (*str++);
215 } else if (*p == ' ') { // ignore whitespace in pattern
216 p++;
217 if (wildcard_match(p, str)) {
218 return true;
219 }
220 } else if (*str == ' ') { // ignore whitespace in string
221 str++;
222 if (wildcard_match(p, str)) {
223 return true;
224 }
225 }
226 return !*p;
227 }
228
229 bool wildcard_match(const std::string &p, const std::string &str) const {
230 return wildcard_match(p.c_str(), str.c_str());
231 }
232
233 // Check if a substring of str matches a pattern p.
234 bool wildcard_search(const std::string &p, const std::string &str) const {
235 return wildcard_match("*" + p + "*", str);
236 }
237
244
245 TestResult check_one(const std::string &op, const std::string &name, int vector_width, Expr e) {
246 std::ostringstream error_msg;
247
248 // Map the input calls in the Expr to loads to local
249 // imageparams, so that we're not sharing state across threads.
250 std::vector<ImageParam> image_params{
251 ImageParam{Float(32), 1, "in_f32"},
252 ImageParam{Float(64), 1, "in_f64"},
253 ImageParam{Float(16), 1, "in_f16"},
254 ImageParam{BFloat(16), 1, "in_bf16"},
255 ImageParam{Int(8), 1, "in_i8"},
256 ImageParam{UInt(8), 1, "in_u8"},
257 ImageParam{Int(16), 1, "in_i16"},
258 ImageParam{UInt(16), 1, "in_u16"},
259 ImageParam{Int(32), 1, "in_i32"},
260 ImageParam{UInt(32), 1, "in_u32"},
261 ImageParam{Int(64), 1, "in_i64"},
262 ImageParam{UInt(64), 1, "in_u64"}};
263
264 for (auto &p : image_params) {
265 const int alignment_bytes = image_param_alignment();
266 p.set_host_alignment(alignment_bytes);
267 const int alignment = alignment_bytes / p.type().bytes();
268 p.dim(0).set_min((p.dim(0).min() / alignment) * alignment);
269 }
270
271 const std::vector<Argument> arg_types(image_params.begin(), image_params.end());
272
273 class HookUpImageParams : public Internal::IRMutator {
275
276 Expr visit(const Internal::Call *op) override {
277 if (op->name == "input") {
278 for (auto &p : image_params) {
279 if (p.type() == op->type) {
280 return p(mutate(op->args[0]));
281 }
282 }
283 } else if (op->call_type == Internal::Call::Halide && !op->func.weak) {
285 f.mutate(this);
286 }
288 }
289 const std::vector<ImageParam> &image_params;
290
291 public:
292 HookUpImageParams(const std::vector<ImageParam> &image_params)
293 : image_params(image_params) {
294 }
295 } hook_up_image_params(image_params);
296 e = hook_up_image_params.mutate(e);
297
298 class HasInlineReduction : public Internal::IRVisitor {
300 void visit(const Internal::Call *op) override {
303 if (f.has_update_definition() &&
304 f.update(0).schedule().rvars().size() > 0) {
305 inline_reduction = f;
306 result = true;
307 }
308 }
309 IRVisitor::visit(op);
310 }
311
312 public:
313 Internal::Function inline_reduction;
314 bool result = false;
315 } has_inline_reduction;
316 e.accept(&has_inline_reduction);
317
318 // Define a vectorized Halide::Func that uses the pattern.
319 Halide::Func f(name);
320 f(x, y) = e;
321 f.bound(x, 0, W).vectorize(x, vector_width);
322 f.compute_root();
323
324 // Include a scalar version
325 Halide::Func f_scalar("scalar_" + name);
326 f_scalar(x, y) = e;
327
328 if (has_inline_reduction.result) {
329 // If there's an inline reduction, we want to vectorize it
330 // over the RVar.
331 Var xo, xi;
332 RVar rxi;
333 Func g{has_inline_reduction.inline_reduction};
334
335 // Do the reduction separately in f_scalar
336 g.clone_in(f_scalar);
337
338 g.compute_at(f, x)
339 .update()
340 .split(x, xo, xi, vector_width)
341 .atomic(true)
342 .vectorize(g.rvars()[0])
343 .vectorize(xi);
344 }
345
346 // The output to the pipeline is the maximum absolute difference as a double.
347 RDom r_check(0, W, 0, H);
348 Halide::Func error("error_" + name);
349 error() = Halide::cast<double>(maximum(absd(f(r_check.x, r_check.y), f_scalar(r_check.x, r_check.y))));
350
351 compile_and_check(error, op, name, vector_width, arg_types, error_msg);
352
353 bool can_run_the_code = can_run_code();
354 if (can_run_the_code) {
355 Target run_target = get_run_target();
356
357 // Make some unallocated input buffers
358 std::vector<Runtime::Buffer<>> inputs(image_params.size());
359
360 std::vector<Argument> args(image_params.size());
361 for (size_t i = 0; i < args.size(); i++) {
362 args[i] = image_params[i];
363 inputs[i] = Runtime::Buffer<>(args[i].type, nullptr, 0);
364 }
365 auto callable = error.compile_to_callable(args, run_target);
366
368 output(0) = 1; // To ensure we'll fail if it's never written to
369
370 // Do the bounds query call
371 assert(inputs.size() == 12);
372 (void)callable(inputs[0], inputs[1], inputs[2], inputs[3],
373 inputs[4], inputs[5], inputs[6], inputs[7],
374 inputs[8], inputs[9], inputs[10], inputs[11],
375 output);
376
377 std::mt19937 rng;
378 rng.seed(rng_seed);
379
380 // Allocate the input buffers and fill them with noise
381 for (size_t i = 0; i < inputs.size(); i++) {
382 if (inputs[i].size_in_bytes()) {
383 inputs[i].allocate();
384
385 Type t = inputs[i].type();
386 // For floats/doubles, we only use values that aren't
387 // subject to rounding error that may differ between
388 // vectorized and non-vectorized versions
389 if (t == Float(32)) {
390 inputs[i].as<float>().for_each_value([&](float &f) { f = (rng() & 0xfff) / 8.0f - 0xff; });
391 } else if (t == Float(64)) {
392 inputs[i].as<double>().for_each_value([&](double &f) { f = (rng() & 0xfff) / 8.0 - 0xff; });
393 } else if (t == Float(16)) {
394 inputs[i].as<float16_t>().for_each_value([&](float16_t &f) { f = float16_t((rng() & 0xff) / 8.0f - 0xf); });
395 } else {
396 // Random bits is fine
397 for (uint32_t *ptr = (uint32_t *)inputs[i].data();
398 ptr != (uint32_t *)inputs[i].data() + inputs[i].size_in_bytes() / 4;
399 ptr++) {
400 // Never use the top four bits, to avoid
401 // signed integer overflow.
402 *ptr = ((uint32_t)rng()) & 0x0fffffff;
403 }
404 }
405 }
406 }
407
408 // Do the real call
409 (void)callable(inputs[0], inputs[1], inputs[2], inputs[3],
410 inputs[4], inputs[5], inputs[6], inputs[7],
411 inputs[8], inputs[9], inputs[10], inputs[11],
412 output);
413
414 double e = output(0);
415 // Use a very loose tolerance for floating point tests. The
416 // kinds of bugs we're looking for are codegen bugs that
417 // return the wrong value entirely, not floating point
418 // accuracy differences between vectors and scalars.
419 if (e > 0.001) {
420 error_msg << "The vector and scalar versions of " << name << " disagree. Maximum error: " << e << "\n";
421
422 std::string error_filename = output_directory + "error_" + name + ".s";
423 error.compile_to_assembly(error_filename, arg_types, target);
424
425 std::ifstream error_file;
426 error_file.open(error_filename);
427
428 error_msg << "Error assembly: \n";
429 std::string line;
430 while (getline(error_file, line)) {
431 error_msg << line << "\n";
432 }
433
434 error_file.close();
435 }
436 }
437
438 return {op, error_msg.str()};
439 }
440
441 void check(std::string op, int vector_width, Expr e) {
442 // Make a name for the test by uniquing then sanitizing the op name
443 std::string name = "op_" + op;
444 for (size_t i = 0; i < name.size(); i++) {
445 if (!isalnum(name[i])) name[i] = '_';
446 }
447
448 name += "_" + std::to_string(tasks.size());
449
450 // Bail out after generating the unique_name, so that names are
451 // unique across different processes and don't depend on filter
452 // settings.
453 if (!wildcard_match(filter, op)) return;
454
455 tasks.emplace_back(Task{op, name, vector_width, e});
456 }
457 virtual void add_tests() = 0;
458 virtual int image_param_alignment() {
459 return 16;
460 }
461
462 virtual bool use_multiple_threads() const {
463 return true;
464 }
465
466 virtual bool test_all() {
467 /* First add some tests based on the target */
468 add_tests();
469
470 // Remove irrelevant noise from output
471 const Target run_target = get_run_target();
472 const std::string run_target_str = run_target.to_string();
473
474 Sharder sharder;
475
476 Halide::Tools::ThreadPool<TestResult> pool(
478 Halide::Tools::ThreadPool<TestResult>::num_processors_online() :
479 1);
480 std::vector<std::future<TestResult>> futures;
481
482 for (size_t t = 0; t < tasks.size(); t++) {
483 if (!sharder.should_run(t)) continue;
484 const auto &task = tasks.at(t);
485 futures.push_back(pool.async([&]() {
486 return check_one(task.op, task.name, task.vector_width, task.expr);
487 }));
488 }
489
490 for (auto &f : futures) {
491 auto result = f.get();
492 constexpr int tabstop = 32;
493 const int spaces = std::max(1, tabstop - (int)result.op.size());
494 std::cout << result.op << std::string(spaces, ' ') << "(" << run_target_str << ")\n";
495 if (!result.error_msg.empty()) {
496 std::cerr << result.error_msg;
497 // The thread-pool destructor will block until in-progress tasks
498 // are done, and then will discard any tasks that haven't been
499 // launched yet.
500 return false;
501 }
502 }
503
504 return true;
505 }
506
507 template<typename SIMDOpCheckT>
508 static int main(int argc, char **argv, const std::vector<Target> &targets_to_test) {
509 Target host = get_host_target();
510 std::cout << "host is: " << host << "\n";
511
512 const int seed = argc > 2 ? atoi(argv[2]) : time(nullptr);
513 std::cout << "simd_op_check test seed: " << seed << "\n";
514
515 for (const auto &t : targets_to_test) {
516 if (!t.supported()) {
517 std::cout << "[SKIP] Unsupported target: " << t << "\n";
518 return 0;
519 }
520 SIMDOpCheckT test(t);
521
522 if (!t.supported()) {
523 std::cout << "Halide was compiled without support for " << t.to_string() << ". Skipping.\n";
524 continue;
525 }
526
527 if (argc > 1) {
528 test.filter = argv[1];
529 }
530
531 if (getenv("HL_SIMD_OP_CHECK_FILTER")) {
532 test.filter = getenv("HL_SIMD_OP_CHECK_FILTER");
533 }
534
535 test.set_seed(seed);
536
537 if (argc > 2) {
538 // Don't forget: if you want to run the standard tests to a specific output
539 // directory, you'll need to invoke with the first arg enclosed
540 // in quotes (to avoid it being wildcard-expanded by the shell):
541 //
542 // correctness_simd_op_check "*" /path/to/output
543 //
544 test.output_directory = argv[2];
545 }
546
547 bool success = test.test_all();
548
549 // Compile a runtime for this target, for use in the static test.
550 compile_standalone_runtime(test.output_directory + "simd_op_check_runtime.o", test.target);
551
552 if (!success) {
553 return 1;
554 }
555 }
556
557 std::cout << "Success!\n";
558 return 0;
559 }
560
561private:
562 const Halide::Var x{"x"}, y{"y"};
563};
564
565} // namespace Halide
566
567#endif // SIMD_OP_CHECK_H
A halide function.
Definition Func.h:700
void compile_to_assembly(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name, const Target &target=get_target_from_environment())
Statically compile this function to text assembly equivalent to the object file generated by compile_...
Stage update(int idx=0)
Get a handle on an update step for the purposes of scheduling it.
Func & compute_root()
Compute all of this function once ahead of time.
Callable compile_to_callable(const std::vector< Argument > &args, const Target &target=get_jit_target_from_environment())
Eagerly jit compile the function to machine code and return a callable struct that behaves like a fun...
void compile_to(const std::map< OutputFileType, std::string > &output_files, const std::vector< Argument > &args, const std::string &fn_name, const Target &target=get_target_from_environment())
Compile and generate multiple target files with single call.
Func clone_in(const Func &f)
Similar to Func::in; however, instead of replacing the call to this Func with an identity Func that r...
Func & vectorize(const VarOrRVar &var)
Mark a dimension to be computed all-at-once as a single vector.
Func & bound(const Var &var, Expr min, Expr extent)
Statically declare that the range over which a function should be evaluated is given by the second an...
Func & compute_at(const Func &f, const Var &var)
Compute this function as needed for each unique value of the given var for the given calling function...
An Image parameter to a halide pipeline.
Definition ImageParam.h:23
const StageSchedule & schedule() const
Get the default (no-specialization) stage-specific schedule associated with this definition.
A reference-counted handle to Halide's internal representation of a function.
Definition Function.h:39
bool has_update_definition() const
Does this function have an update definition?
void mutate(IRMutator *mutator)
Accept a mutator to mutator all of the definitions and arguments of this function.
Definition & update(int idx=0)
Get a mutable handle to this function's update definition at index 'idx'.
A base class for passes over the IR which modify it (e.g.
Definition IRMutator.h:26
virtual Expr visit(const IntImm *)
A base class for algorithms that need to recursively walk over the IR.
Definition IRVisitor.h:19
virtual void visit(const IntImm *)
const std::vector< ReductionVariable > & rvars() const
RVars of reduction domain associated with this schedule if there is any.
bool should_run(size_t task_index) const
A multi-dimensional domain over which to iterate.
Definition RDom.h:193
RVar x
Direct access to the first four dimensions of the reduction domain.
Definition RDom.h:339
RVar y
Definition RDom.h:339
A reduction variable represents a single dimension of a reduction domain (RDom).
Definition RDom.h:29
A templated Buffer class that wraps halide_buffer_t and adds functionality.
static Buffer< T, Dims, InClassDimStorage > make_scalar()
Make a zero-dimensional Buffer.
static constexpr int max_u8
static constexpr int max_i32
virtual bool use_multiple_threads() const
virtual void add_tests()=0
virtual int image_param_alignment()
bool wildcard_match(const std::string &p, const std::string &str) const
static constexpr int max_i8
static constexpr int max_u16
bool wildcard_search(const std::string &p, const std::string &str) const
bool wildcard_match(const char *p, const char *str) const
virtual ~SimdOpCheckTest()=default
SimdOpCheckTest(const Target t, int w, int h)
void check(std::string op, int vector_width, Expr e)
Target get_run_target() const
static int main(int argc, char **argv, const std::vector< Target > &targets_to_test)
static constexpr int max_i16
TestResult check_one(const std::string &op, const std::string &name, int vector_width, Expr e)
virtual bool can_run_code() const
std::vector< Task > tasks
virtual void compile_and_check(Func error, const std::string &op, const std::string &name, int vector_width, const std::vector< Argument > &arg_types, std::ostringstream &error_msg)
A Halide variable, to be used when defining functions.
Definition Var.h:19
std::map< OutputFileType, const OutputInfo > get_output_info(const Target &target)
std::string get_test_tmp_dir()
Return the path to a directory that can be safely written to when running tests; the contents directo...
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
Target get_host_target()
Return the target corresponding to the host machine.
Type BFloat(int bits, int lanes=1)
Construct a floating-point type in the bfloat format.
Definition Type.h:556
Type UInt(int bits, int lanes=1)
Constructing an unsigned integer type.
Definition Type.h:546
Type Float(int bits, int lanes=1)
Construct a floating-point type.
Definition Type.h:551
Expr maximum(Expr, const std::string &s="maximum")
Type Int(int bits, int lanes=1)
Constructing a signed integer type.
Definition Type.h:541
Expr absd(Expr a, Expr b)
Return the absolute difference between two values.
void compile_standalone_runtime(const std::string &object_filename, const Target &t)
Create an object file containing the Halide runtime for a given target.
Internal::ConstantInterval cast(Type t, const Internal::ConstantInterval &a)
Cast operators for ConstantIntervals.
int atoi(const char *)
unsigned __INT32_TYPE__ uint32_t
char * getenv(const char *)
A fragment of Halide syntax.
Definition Expr.h:258
A function call.
Definition IR.h:490
@ Extern
A call to an external C-ABI function, possibly with side-effects.
Definition IR.h:494
@ Halide
A call to a Func.
Definition IR.h:497
std::string name
Definition IR.h:491
FunctionPtr func
Definition IR.h:674
CallType call_type
Definition IR.h:501
std::vector< Expr > args
Definition IR.h:492
static Expr make(Type type, IntrinsicOp op, const std::vector< Expr > &args, CallType call_type, FunctionPtr func=FunctionPtr(), int value_index=0, const Buffer<> &image=Buffer<>(), Parameter param=Parameter())
void accept(IRVisitor *v) const
Dispatch to the correct visitor method for this node.
Definition Expr.h:192
static bool can_jit_target(const Target &target)
If the given target can be executed via the wasm executor, return true.
A struct representing a target machine and os to generate code for.
Definition Target.h:19
enum Halide::Target::Arch arch
bool has_feature(Feature f) const
int bits
The bit-width of the target machine.
Definition Target.h:50
enum Halide::Target::OS os
std::string to_string() const
Convert the Target into a string form that can be reconstituted by merge_string(),...
Target without_feature(Feature f) const
Return a copy of the target with the given feature cleared.
Feature
Optional features a target can have.
Definition Target.h:83
@ AVX512_Cannonlake
Definition Target.h:132
@ AVX512_SapphireRapids
Definition Target.h:133
@ POWER_ARCH_2_07
Definition Target.h:97
Target with_feature(Feature f) const
Return a copy of the target with the given feature set.
std::string op
std::string name
std::string error_msg
Types in the halide type system.
Definition Type.h:283
Expr max() const
Return an expression which is the maximum value of this type.
Class that provides a type that implements half precision floating point (IEEE754 2008 binary16) in s...
Definition Float16.h:17