Halide
simd_op_check.h
Go to the documentation of this file.
1 #ifndef SIMD_OP_CHECK_H
2 #define SIMD_OP_CHECK_H
3 
4 #include "Halide.h"
5 #include "halide_test_dirs.h"
6 
7 #include <fstream>
8 
9 namespace Halide {
10 struct TestResult {
11  std::string op;
12  std::string error_msg;
13 };
14 
15 struct Task {
16  std::string op;
17  std::string name;
20 };
21 
23 public:
24  std::string filter{"*"};
26  std::vector<Task> tasks;
27 
29 
30  ImageParam in_f32{Float(32), 1, "in_f32"};
31  ImageParam in_f64{Float(64), 1, "in_f64"};
32  ImageParam in_i8{Int(8), 1, "in_i8"};
33  ImageParam in_u8{UInt(8), 1, "in_u8"};
34  ImageParam in_i16{Int(16), 1, "in_i16"};
35  ImageParam in_u16{UInt(16), 1, "in_u16"};
36  ImageParam in_i32{Int(32), 1, "in_i32"};
37  ImageParam in_u32{UInt(32), 1, "in_u32"};
38  ImageParam in_i64{Int(64), 1, "in_i64"};
39  ImageParam in_u64{UInt(64), 1, "in_u64"};
40 
41  const std::vector<ImageParam> image_params{in_f32, in_f64, in_i8, in_u8, in_i16, in_u16, in_i32, in_u32, in_i64, in_u64};
42  const std::vector<Argument> arg_types{in_f32, in_f64, in_i8, in_u8, in_i16, in_u16, in_i32, in_u32, in_i64, in_u64};
43  int W;
44  int H;
45 
46  SimdOpCheckTest(const Target t, int w, int h)
47  : target(t), W(w), H(h) {
48  target = target
54  }
55  virtual ~SimdOpCheckTest() = default;
56  size_t get_num_threads() const {
57  return num_threads;
58  }
59 
60  void set_num_threads(size_t n) {
61  num_threads = n;
62  }
63  bool can_run_code() const {
64  // Assume we are configured to run wasm if requested
65  // (we'll fail further downstream if not)
67  return true;
68  }
69  // If we can (target matches host), run the error checking Halide::Func.
70  Target host_target = get_host_target();
71  bool can_run_the_code =
72  (target.arch == host_target.arch &&
73  target.bits == host_target.bits &&
74  target.os == host_target.os);
75  // A bunch of feature flags also need to match between the
76  // compiled code and the host in order to run the code.
83  if (target.has_feature(f) != host_target.has_feature(f)) {
84  can_run_the_code = false;
85  }
86  }
87  return can_run_the_code;
88  }
89 
90  // Check if pattern p matches str, allowing for wildcards (*).
91  bool wildcard_match(const char *p, const char *str) const {
92  // Match all non-wildcard characters.
93  while (*p && *str && *p == *str && *p != '*') {
94  str++;
95  p++;
96  }
97 
98  if (!*p) {
99  return *str == 0;
100  } else if (*p == '*') {
101  p++;
102  do {
103  if (wildcard_match(p, str)) {
104  return true;
105  }
106  } while (*str++);
107  } else if (*p == ' ') { // ignore whitespace in pattern
108  p++;
109  if (wildcard_match(p, str)) {
110  return true;
111  }
112  } else if (*str == ' ') { // ignore whitespace in string
113  str++;
114  if (wildcard_match(p, str)) {
115  return true;
116  }
117  }
118  return !*p;
119  }
120 
121  bool wildcard_match(const std::string &p, const std::string &str) const {
122  return wildcard_match(p.c_str(), str.c_str());
123  }
124 
125  // Check if a substring of str matches a pattern p.
126  bool wildcard_search(const std::string &p, const std::string &str) const {
127  return wildcard_match("*" + p + "*", str);
128  }
129 
130  TestResult check_one(const std::string &op, const std::string &name, int vector_width, Expr e) {
131  std::ostringstream error_msg;
132 
133  class HasInlineReduction : public Internal::IRVisitor {
135  void visit(const Internal::Call *op) override {
136  if (op->call_type == Internal::Call::Halide) {
137  Internal::Function f(op->func);
138  if (f.has_update_definition()) {
139  inline_reduction = f;
140  result = true;
141  }
142  }
143  IRVisitor::visit(op);
144  }
145 
146  public:
147  Internal::Function inline_reduction;
148  bool result = false;
149  } has_inline_reduction;
150  e.accept(&has_inline_reduction);
151 
152  // Define a vectorized Halide::Func that uses the pattern.
153  Halide::Func f(name);
154  f(x, y) = e;
155  f.bound(x, 0, W).vectorize(x, vector_width);
156  f.compute_root();
157 
158  // Include a scalar version
159  Halide::Func f_scalar("scalar_" + name);
160  f_scalar(x, y) = e;
161  f_scalar.bound(x, 0, W);
162  f_scalar.compute_root();
163 
164  if (has_inline_reduction.result) {
165  // If there's an inline reduction, we want to vectorize it
166  // over the RVar.
167  Var xo, xi;
168  RVar rxi;
169  Func g{has_inline_reduction.inline_reduction};
170 
171  // Do the reduction separately in f_scalar
172  g.clone_in(f_scalar);
173 
174  g.compute_at(f, x)
175  .update()
176  .split(x, xo, xi, vector_width)
177  .fuse(g.rvars()[0], xi, rxi)
178  .atomic()
179  .vectorize(rxi);
180  }
181 
182  // The output to the pipeline is the maximum absolute difference as a double.
183  RDom r_check(0, W, 0, H);
184  Halide::Func error("error_" + name);
185  error() = Halide::cast<double>(maximum(absd(f(r_check.x, r_check.y), f_scalar(r_check.x, r_check.y))));
186 
187  setup_images();
188  {
189  // Compile just the vector Func to assembly.
190  std::string asm_filename = output_directory + "check_" + name + ".s";
191  f.compile_to_assembly(asm_filename, arg_types, target);
192 
193  std::ifstream asm_file;
194  asm_file.open(asm_filename);
195 
196  bool found_it = false;
197 
198  std::ostringstream msg;
199  msg << op << " did not generate for target=" << target.to_string() << " vector_width=" << vector_width << ". Instead we got:\n";
200 
201  std::string line;
202  while (getline(asm_file, line)) {
203  msg << line << "\n";
204 
205  // Check for the op in question
206  found_it |= wildcard_search(op, line) && !wildcard_search("_" + op, line);
207  }
208 
209  if (!found_it) {
210  error_msg << "Failed: " << msg.str() << "\n";
211  }
212 
213  asm_file.close();
214  }
215 
216  // Also compile the error checking Func (to be sure it compiles without error)
217  std::string fn_name = "test_" + name;
218  error.compile_to_file(output_directory + fn_name, arg_types, fn_name, target);
219 
220  bool can_run_the_code = can_run_code();
221  if (can_run_the_code) {
222  Target run_target = target
226 
227  error.infer_input_bounds({}, run_target);
228  // Fill the inputs with noise
229  std::mt19937 rng(123);
230  for (auto p : image_params) {
231  Halide::Buffer<> buf = p.get();
232  if (!buf.defined()) continue;
233  assert(buf.data());
234  Type t = buf.type();
235  // For floats/doubles, we only use values that aren't
236  // subject to rounding error that may differ between
237  // vectorized and non-vectorized versions
238  if (t == Float(32)) {
239  buf.as<float>().for_each_value([&](float &f) { f = (rng() & 0xfff) / 8.0f - 0xff; });
240  } else if (t == Float(64)) {
241  buf.as<double>().for_each_value([&](double &f) { f = (rng() & 0xfff) / 8.0 - 0xff; });
242  } else {
243  // Random bits is fine
244  for (uint32_t *ptr = (uint32_t *)buf.data();
245  ptr != (uint32_t *)buf.data() + buf.size_in_bytes() / 4;
246  ptr++) {
247  // Never use the top four bits, to avoid
248  // signed integer overflow.
249  *ptr = ((uint32_t)rng()) & 0x0fffffff;
250  }
251  }
252  }
253  Realization r = error.realize();
254  double e = Buffer<double>(r[0])();
255  // Use a very loose tolerance for floating point tests. The
256  // kinds of bugs we're looking for are codegen bugs that
257  // return the wrong value entirely, not floating point
258  // accuracy differences between vectors and scalars.
259  if (e > 0.001) {
260  error_msg << "The vector and scalar versions of " << name << " disagree. Maximum error: " << e << "\n";
261 
262  std::string error_filename = output_directory + "error_" + name + ".s";
263  error.compile_to_assembly(error_filename, arg_types, target);
264 
265  std::ifstream error_file;
266  error_file.open(error_filename);
267 
268  error_msg << "Error assembly: \n";
269  std::string line;
270  while (getline(error_file, line)) {
271  error_msg << line << "\n";
272  }
273 
274  error_file.close();
275  }
276  }
277 
278  return {op, error_msg.str()};
279  }
280 
281  void check(std::string op, int vector_width, Expr e) {
282  // Make a name for the test by uniquing then sanitizing the op name
283  std::string name = "op_" + op;
284  for (size_t i = 0; i < name.size(); i++) {
285  if (!isalnum(name[i])) name[i] = '_';
286  }
287 
288  name += "_" + std::to_string(tasks.size());
289 
290  // Bail out after generating the unique_name, so that names are
291  // unique across different processes and don't depend on filter
292  // settings.
293  if (!wildcard_match(filter, op)) return;
294 
295  tasks.emplace_back(Task{op, name, vector_width, e});
296  }
297  virtual void add_tests() = 0;
298  virtual void setup_images() {
299  for (auto p : image_params) {
300  p.reset();
301  }
302  }
303  virtual bool test_all() {
304  /* First add some tests based on the target */
305  add_tests();
306  Internal::ThreadPool<TestResult> pool(num_threads);
307  std::vector<std::future<TestResult>> futures;
308  for (const Task &task : tasks) {
309  futures.push_back(pool.async([this, task]() {
310  return check_one(task.op, task.name, task.vector_width, task.expr);
311  }));
312  }
313 
314  bool success = true;
315  for (auto &f : futures) {
316  const TestResult &result = f.get();
317  std::cout << result.op << "\n";
318  if (!result.error_msg.empty()) {
319  std::cerr << result.error_msg;
320  success = false;
321  }
322  }
323 
324  return success;
325  }
326 
327 private:
328  size_t num_threads;
329  const Halide::Var x{"x"}, y{"y"};
330 };
331 } // namespace Halide
332 #endif // SIMD_OP_CHECK_H
Halide::SimdOpCheckTest::in_u64
ImageParam in_u64
Definition: simd_op_check.h:39
Halide::Target::has_feature
bool has_feature(Feature f) const
halide_test_dirs.h
Halide::Func::realize
Realization realize(std::vector< int32_t > sizes, const Target &target=Target(), const ParamMap &param_map=ParamMap::empty_map())
Evaluate this function over some rectangular domain and return the resulting buffer or buffers.
Halide::TestResult::op
std::string op
Definition: simd_op_check.h:11
Halide::Internal::IRVisitor::visit
virtual void visit(const IntImm *)
Halide::Var
A Halide variable, to be used when defining functions.
Definition: Var.h:19
Halide::SimdOpCheckTest::check_one
TestResult check_one(const std::string &op, const std::string &name, int vector_width, Expr e)
Definition: simd_op_check.h:130
Halide::SimdOpCheckTest::wildcard_match
bool wildcard_match(const std::string &p, const std::string &str) const
Definition: simd_op_check.h:121
Halide::SimdOpCheckTest::in_f64
ImageParam in_f64
Definition: simd_op_check.h:31
Halide::Task::name
std::string name
Definition: simd_op_check.h:17
Halide::Target::NoNEON
@ NoNEON
Definition: Target.h:69
Halide::Target::AVX
@ AVX
Definition: Target.h:63
Halide::Internal::Call::func
FunctionPtr func
Definition: IR.h:583
Halide::SimdOpCheckTest::arg_types
const std::vector< Argument > arg_types
Definition: simd_op_check.h:42
Halide::Task::expr
Expr expr
Definition: simd_op_check.h:19
Halide::get_host_target
Target get_host_target()
Return the target corresponding to the host machine.
Halide::SimdOpCheckTest::in_i16
ImageParam in_i16
Definition: simd_op_check.h:34
Halide::Target::DisableLLVMLoopOpt
@ DisableLLVMLoopOpt
Definition: Target.h:120
Halide::Float
Type Float(int bits, int lanes=1)
Construct a floating-point type.
Definition: Type.h:482
Halide::Internal::IRVisitor
A base class for algorithms that need to recursively walk over the IR.
Definition: IRVisitor.h:21
Halide::TestResult::error_msg
std::string error_msg
Definition: simd_op_check.h:12
Halide::Target::WebAssembly
@ WebAssembly
Definition: Target.h:46
Halide::maximum
Expr maximum(Expr, const std::string &s="maximum")
Halide::Target::Feature
Feature
Optional features a target can have.
Definition: Target.h:57
Halide::SimdOpCheckTest::in_f32
ImageParam in_f32
Definition: simd_op_check.h:30
Halide::Internal::Function::has_update_definition
bool has_update_definition() const
Does this function have an update definition?
Halide::Task::op
std::string op
Definition: simd_op_check.h:16
Halide::Target::to_string
std::string to_string() const
Convert the Target into a string form that can be reconstituted by merge_string(),...
Halide::absd
Expr absd(Expr a, Expr b)
Return the absolute difference between two values.
Halide::ImageParam
An Image parameter to a halide pipeline.
Definition: ImageParam.h:23
Halide::Func::vectorize
Func & vectorize(const VarOrRVar &var)
Mark a dimension to be computed all-at-once as a single vector.
Halide::Func::compute_at
Func & compute_at(const Func &f, const Var &var)
Compute this function as needed for each unique value of the given var for the given calling function...
Halide::SimdOpCheckTest::filter
std::string filter
Definition: simd_op_check.h:24
Halide::SimdOpCheckTest::check
void check(std::string op, int vector_width, Expr e)
Definition: simd_op_check.h:281
Halide::SimdOpCheckTest::in_i32
ImageParam in_i32
Definition: simd_op_check.h:36
Halide::RDom::y
RVar y
Definition: RDom.h:335
Halide::Internal::Call::Halide
@ Halide
A call to a Func.
Definition: IR.h:471
Halide::SimdOpCheckTest::W
int W
Definition: simd_op_check.h:43
Halide::Target::NoAsserts
@ NoAsserts
Definition: Target.h:60
Halide::Target::bits
int bits
The bit-width of the target machine.
Definition: Target.h:51
Halide::SimdOpCheckTest::set_num_threads
void set_num_threads(size_t n)
Definition: simd_op_check.h:60
Halide::Target::SSE41
@ SSE41
Definition: Target.h:62
Halide::Type
Types in the halide type system.
Definition: Type.h:269
Halide::Internal::IRHandle::accept
void accept(IRVisitor *v) const
Dispatch to the correct visitor method for this node.
Definition: Expr.h:190
Halide
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
Definition: AddAtomicMutex.h:21
Halide::Target::FMA
@ FMA
Definition: Target.h:65
Halide::SimdOpCheckTest::in_i64
ImageParam in_i64
Definition: simd_op_check.h:38
Halide::Func::update
Stage update(int idx=0)
Get a handle on an update step for the purposes of scheduling it.
Halide::SimdOpCheckTest::in_u32
ImageParam in_u32
Definition: simd_op_check.h:37
Halide::SimdOpCheckTest
Definition: simd_op_check.h:22
Halide::Buffer<>
Halide::Func::infer_input_bounds
void infer_input_bounds(const std::vector< int32_t > &sizes, const Target &target=get_jit_target_from_environment(), const ParamMap &param_map=ParamMap::empty_map())
For a given size of output, or a given output buffer, determine the bounds required of all unbound Im...
Halide::SimdOpCheckTest::in_u16
ImageParam in_u16
Definition: simd_op_check.h:35
Halide::SimdOpCheckTest::wildcard_search
bool wildcard_search(const std::string &p, const std::string &str) const
Definition: simd_op_check.h:126
Halide::Target::F16C
@ F16C
Definition: Target.h:67
Halide::SimdOpCheckTest::output_directory
std::string output_directory
Definition: simd_op_check.h:25
Halide::Target::FMA4
@ FMA4
Definition: Target.h:66
Halide::SimdOpCheckTest::target
Target target
Definition: simd_op_check.h:28
Halide::Internal::ThreadPool::async
std::future< T > async(Func func, Args... args)
Definition: ThreadPool.h:117
Halide::Target::os
enum Halide::Target::OS os
Halide::Func::compile_to_assembly
void compile_to_assembly(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name, const Target &target=get_target_from_environment())
Statically compile this function to text assembly equivalent to the object file generated by compile_...
Halide::Func::compile_to_file
void compile_to_file(const std::string &filename_prefix, const std::vector< Argument > &args, const std::string &fn_name="", const Target &target=get_target_from_environment())
Compile to object file and header pair, with the given arguments.
Halide::Target::VSX
@ VSX
Definition: Target.h:70
Halide::UInt
Type UInt(int bits, int lanes=1)
Constructing an unsigned integer type.
Definition: Type.h:477
Halide::Func::clone_in
Func clone_in(const Func &f)
Similar to Func::in; however, instead of replacing the call to this Func with an identity Func that r...
Halide::Target::NoRuntime
@ NoRuntime
Definition: Target.h:91
Halide::SimdOpCheckTest::wildcard_match
bool wildcard_match(const char *p, const char *str) const
Definition: simd_op_check.h:91
Halide::Internal::ThreadPool::num_processors_online
static size_t num_processors_online()
Definition: ThreadPool.h:79
Halide::SimdOpCheckTest::image_params
const std::vector< ImageParam > image_params
Definition: simd_op_check.h:41
Halide::SimdOpCheckTest::in_i8
ImageParam in_i8
Definition: simd_op_check.h:32
Halide::Func
A halide function.
Definition: Func.h:667
Halide::Internal::ThreadPool
Definition: ThreadPool.h:40
Halide::SimdOpCheckTest::test_all
virtual bool test_all()
Definition: simd_op_check.h:303
Halide::Func::compute_root
Func & compute_root()
Compute all of this function once ahead of time.
Halide::Func::bound
Func & bound(const Var &var, Expr min, Expr extent)
Statically declare that the range over which a function should be evaluated is given by the second an...
Halide::Task::vector_width
int vector_width
Definition: simd_op_check.h:18
Halide::Internal::Function
A reference-counted handle to Halide's internal representation of a function.
Definition: Function.h:38
Halide::Target::WasmSimd128
@ WasmSimd128
Definition: Target.h:121
Halide::Target::POWER_ARCH_2_07
@ POWER_ARCH_2_07
Definition: Target.h:71
Halide::SimdOpCheckTest::setup_images
virtual void setup_images()
Definition: simd_op_check.h:298
Halide::SimdOpCheckTest::in_u8
ImageParam in_u8
Definition: simd_op_check.h:33
Halide::Internal::Call
A function call.
Definition: IR.h:464
Halide::SimdOpCheckTest::tasks
std::vector< Task > tasks
Definition: simd_op_check.h:26
Halide::RDom::x
RVar x
Direct access to the first four dimensions of the reduction domain.
Definition: RDom.h:335
Halide::RVar
A reduction variable represents a single dimension of a reduction domain (RDom).
Definition: RDom.h:29
Halide::RDom
A multi-dimensional domain over which to iterate.
Definition: RDom.h:191
Halide::SimdOpCheckTest::H
int H
Definition: simd_op_check.h:44
Halide::Target::with_feature
Target with_feature(Feature f) const
Return a copy of the target with the given feature set.
Halide::Target::arch
enum Halide::Target::Arch arch
buf
char * buf
Definition: printer.h:32
Halide::SimdOpCheckTest::SimdOpCheckTest
SimdOpCheckTest(const Target t, int w, int h)
Definition: simd_op_check.h:46
Halide::Internal::get_test_tmp_dir
std::string get_test_tmp_dir()
Return the path to a directory that can be safely written to when running tests; the contents directo...
Definition: halide_test_dirs.h:75
Halide::Expr
A fragment of Halide syntax.
Definition: Expr.h:256
Halide::Realization
A Realization is a vector of references to existing Buffer objects.
Definition: Realization.h:21
Halide::Target::AVX2
@ AVX2
Definition: Target.h:64
Halide::SimdOpCheckTest::~SimdOpCheckTest
virtual ~SimdOpCheckTest()=default
Halide::Task
Definition: simd_op_check.h:15
uint32_t
unsigned __INT32_TYPE__ uint32_t
Definition: runtime_internal.h:21
Halide::Target::ARMv7s
@ ARMv7s
Definition: Target.h:68
Halide::Target::NoBoundsQuery
@ NoBoundsQuery
Definition: Target.h:61
Halide::Internal::Call::call_type
CallType call_type
Definition: IR.h:475
Halide::Target::without_feature
Target without_feature(Feature f) const
Return a copy of the target with the given feature cleared.
Halide::SimdOpCheckTest::can_run_code
bool can_run_code() const
Definition: simd_op_check.h:63
Halide::Target
A struct representing a target machine and os to generate code for.
Definition: Target.h:19
Halide::SimdOpCheckTest::add_tests
virtual void add_tests()=0
Halide::Target::AVX512
@ AVX512
Definition: Target.h:105
Halide::TestResult
Definition: simd_op_check.h:10
Halide::SimdOpCheckTest::get_num_threads
size_t get_num_threads() const
Definition: simd_op_check.h:56
Halide::Int
Type Int(int bits, int lanes=1)
Constructing a signed integer type.
Definition: Type.h:472