You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Describe the bug
sycl::vec "as" operation usage leads to significant performance drop. Is this expected?
Have to use reinterpret_cast workaround to get expected performance back.
To Reproduce
Test program:
# cat as.cpp
#include <sycl.hpp>
#include <chrono>
const unsigned N = 1024 * 1024;
sycl::float4 Mem[N] = { sycl::float4(0) };
// Kernel as1 execution time: 17.643s
inline sycl::float4 as1(sycl::float4 a, const uint32_t b) {
return (a.as<sycl::uint4>() & b).as<sycl::float4>();
}
// Kernel as2 execution time: 0.052s
inline sycl::float4 as2(const sycl::float4 a, const uint32_t b) {
const sycl::uint4 i = reinterpret_cast<const sycl::uint4&>(a) & b;
return reinterpret_cast<const sycl::float4&>(i);
}
typedef sycl::float4 (*FOO)(sycl::float4, uint32_t);
template<FOO foo> void do_sycl(const char* const name) {
try {
auto exception_handler = [] (sycl::exception_list exceptions) {
for (std::exception_ptr const& e : exceptions) {
try {
std::rethrow_exception(e);
} catch(sycl::exception const& e) {
std::cerr << "Caught asynchronous SYCL exception:\n" << e.what() << std::endl;
}
}
};
auto q = sycl::queue{sycl::gpu_selector_v, exception_handler};
auto bMem = sycl::buffer(Mem, sycl::range(N));
// compile kernels
auto kb_begin = std::chrono::high_resolution_clock::now();
auto kb = sycl::get_kernel_bundle<sycl::bundle_state::executable>(q.get_context());
{ auto kb_end = std::chrono::high_resolution_clock::now();
auto kb_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(kb_end - kb_begin);
printf("Kernel %s compile time: %.3fs\n", name, kb_elapsed.count() * 1e-9);
}
auto begin = std::chrono::high_resolution_clock::now();
Mem[0] = 1;
q.submit([&](sycl::handler& h) {
auto mem = bMem.get_access<sycl::access::mode::read_write>(h);
h.use_kernel_bundle(kb);
h.parallel_for(sycl::range(N), [=](sycl::id<1> n) {
for (int i = 0; i != 1000*1000; ++ i)
mem[n] = foo(mem[n], 0x3F400000);
});
});
q.wait_and_throw();
{ auto end = std::chrono::high_resolution_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
printf("Kernel %s execution time: %.3fs\n", name, elapsed.count() * 1e-9);
}
} catch (sycl::exception const& e) {
std::cerr << "Caught synchronous SYCL exception:\n" << e.what() << std::endl;
}
}
int main() {
do_sycl<as1>("as1");
if (Mem[0][0] != 0.5f) {
printf("FAILED: %f (%X)\n", Mem[0][0], *(unsigned*)Mem);
return 1;
}
do_sycl<as2>("as2");
if (Mem[0][0] != 0.5f) {
printf("FAILED: %f (%X)\n", Mem[0][0], *(unsigned*)Mem);
return 1;
}
puts("PASSED");
return 0;
}
Describe the bug
sycl::vec "as" operation usage leads to significant performance drop. Is this expected?
Have to use reinterpret_cast workaround to get expected performance back.
To Reproduce
Test program:
Test program output:
Environment (please complete the following information):
The text was updated successfully, but these errors were encountered: