let f1.extent.0.required.s = (min((((((f1.extent.0 + -1) / 32) * 32) + f1.min.0) + 31), ((f1.min.0 + f1.extent.0) + -1)) - min(f1.min.0, ((f1.min.0 + f1.extent.0) + -32)))

let f1.min.0.required = min(f1.min.0, ((f1.min.0 + f1.extent.0) + -32))

let f1.extent.1.required.s = (min((((((f1.extent.1 + -1) / 16) * 16) + f1.min.1) + 15), ((f1.min.1 + f1.extent.1) + -1)) - min(f1.min.1, ((f1.min.1 + f1.extent.1) + -16)))

let f1.min.1.required = min(f1.min.1, ((f1.min.1 + f1.extent.1) + -16))

if (f1.host_and_dev_are_null)
{
rewrite_buffer(f1.buffer, 4, f1.min.0.required, (f1.extent.0.required.s + 1), 1, f1.min.1.required, (f1.extent.1.required.s + 1), (f1.extent.0.required.s + 1))
}
if (!f1.host_and_dev_are_null)
{
assert((f1.elem_size == 4), "Output buffer f1 has type int32, but elem_size of the buffer_t passed in is %d instead of 4", f1.elem_size)
assert((f1.min.0 <= f1.min.0.required), "Output buffer f1 is accessed at %d, which is before the min (%d) in dimension 0", f1.min.0.required, f1.min.0)
assert(((((f1.min.0.required + f1.extent.0.required.s) - f1.extent.0) + 1) <= f1.min.0), "Output buffer f1 is accessed at %d, which is beyond the max (%d) in dimension 0", (f1.min.0.required + f1.extent.0.required.s), ((f1.min.0 + f1.extent.0) + -1))
assert((f1.min.1 <= f1.min.1.required), "Output buffer f1 is accessed at %d, which is before the min (%d) in dimension 1", f1.min.1.required, f1.min.1)
assert(((((f1.min.1.required + f1.extent.1.required.s) - f1.extent.1) + 1) <= f1.min.1), "Output buffer f1 is accessed at %d, which is beyond the max (%d) in dimension 1", (f1.min.1.required + f1.extent.1.required.s), ((f1.min.1 + f1.extent.1) + -1))
assert((f1.stride.0 == 1), "Static constraint violated: f1.stride.0 == 1")

let f1.total_extent.1 = (int64(f1.extent.1) * int64(f1.extent.0))

assert((int64(f1.extent.0) <= int64(2147483647)), "Total allocation for buffer f1 exceeds 2^31 - 1")
assert(((int64(f1.extent.1) * int64(f1.stride.1)) <= int64(2147483647)), "Total allocation for buffer f1 exceeds 2^31 - 1")
assert((f1.total_extent.1 <= int64(2147483647)), "Product of extents for buffer f1 exceeds 2^31 - 1")
produce f1
{
for (f1.s0.v1.v1, 0, ((f1.extent.1 + 15) / 16))
{

let f1.s0.v1.v7.base = min(((f1.s0.v1.v1 * 16) + f1.min.1), ((f1.min.1 + f1.extent.1) + -16))

let f0.s0.v1.max_1 = select((p0 < 0), (p0 * f1.s0.v1.v7.base), (p0 * (f1.s0.v1.v7.base + 15)))

let f0.s0.v1.min_1 = select((p0 < 0), (p0 * (f1.s0.v1.v7.base + 15)), (p0 * f1.s0.v1.v7.base))

let f0.s0.v0.max_1 = select((p0 < 0), (p0 * f1.min.0.required), (p0 * ((f1.extent.0 + f1.min.0) + -1)))

let f0.s0.v0.min_1 = select((p0 < 0), (p0 * ((f1.extent.0 + f1.min.0) + -1)), (p0 * f1.min.0.required))

let f0.v0.min_realized = min(min(f0.s0.v0.min_1, (f0.s0.v0.max_1 + -7)), select((p0 < 0), (p0 * ((f1.extent.0 + f1.min.0) + -1)), (p0 * f1.min.0.required)))

let f0.v0.extent_realized.s = (max(min((((((f0.s0.v0.max_1 - f0.s0.v0.min_1) / 8) * 8) + f0.s0.v0.min_1) + 7), f0.s0.v0.max_1), select((p0 < 0), (p0 * f1.min.0.required), (p0 * ((f1.extent.0 + f1.min.0) + -1)))) - min(min(f0.s0.v0.min_1, (f0.s0.v0.max_1 + -7)), select((p0 < 0), (p0 * ((f1.extent.0 + f1.min.0) + -1)), (p0 * f1.min.0.required))))

allocate f0[int32 * (f0.v0.extent_realized.s + 1) * ((f0.s0.v1.max_1 - f0.s0.v1.min_1) + 1)]
produce f0
{
parallel (f0.s0.v1, f0.s0.v1.min_1, ((f0.s0.v1.max_1 - f0.s0.v1.min_1) + 1))
{
for (f0.s0.v0.v0.v0, 0, (((f0.s0.v0.max_1 - f0.s0.v0.min_1) + 8) / 8))
{

let f0.s0.s1.base = min(((f0.s0.v0.v0.v0 * 8) + f0.s0.v0.min_1), (f0.s0.v0.max_1 + -7))

f0[ramp(((f0.s0.s1.base - f0.v0.min_realized) + ((f0.s0.v1 - f0.s0.v1.min_1) * (f0.v0.extent_realized.s + 1))), 1, 4)] = ramp((f0.s0.s1.base + f0.s0.v1), 1, 4)
f0[ramp((((f0.s0.s1.base - f0.v0.min_realized) + ((f0.s0.v1 - f0.s0.v1.min_1) * (f0.v0.extent_realized.s + 1))) + 4), 1, 4)] = ramp(((f0.s0.s1.base + f0.s0.v1) + 4), 1, 4)
}
}
}
for (f1.s0.v0.v0, 0, ((f1.extent.0 + 31) / 32))
{

let f1.s0.v0.v6.base = min(((f1.s0.v0.v0 * 32) + f1.min.0), ((f1.min.0 + f1.extent.0) + -32))

for (f1.s0.v1.v7, 0, 16)
{
for (f1.s0.v0.v6, 0, 32)
{
f1[(((f1.s0.v0.v6.base + f1.s0.v0.v6) + ((f1.s0.v1.v7.base + f1.s0.v1.v7) * f1.stride.1)) - (f1.min.0 + (f1.min.1 * f1.stride.1)))] = f0[(((p0 * (f1.s0.v0.v6.base + f1.s0.v0.v6)) - f0.v0.min_realized) + (((p0 * (f1.s0.v1.v7.base + f1.s0.v1.v7)) - f0.s0.v1.min_1) * (f0.v0.extent_realized.s + 1)))]
}
}
}
free f0
}
}
0
}