diff --git a/benchmarks/cutlass_kernel/attention/attention.hpp b/benchmarks/cutlass_kernel/attention/attention.hpp index 5b40bb2831..7b779c50e4 100644 --- a/benchmarks/cutlass_kernel/attention/attention.hpp +++ b/benchmarks/cutlass_kernel/attention/attention.hpp @@ -20,11 +20,11 @@ template static auto run(typename FMHA::Params params) -> void { int smem_size = FMHA::SharedStorageSize; - const auto sycl_block = syclcompat::dim3(block.x, block.y, block.z); - const auto sycl_grid = syclcompat::dim3(grid.x, grid.y, grid.z); + const auto sycl_block = compat::dim3(block.x, block.y, block.z); + const auto sycl_grid = compat::dim3(grid.x, grid.y, grid.z); #if !defined(SYCL_EXT_ONEAPI_WORK_GROUP_SCRATCH_MEMORY) - using namespace syclcompat::experimental; + using namespace compat::experimental; auto event = launch>( launch_policy{ sycl_grid, sycl_block, @@ -33,15 +33,15 @@ template static auto run(typename FMHA::Params params) -> void { sycl_exp::sub_group_size}}, params); #else - syclcompat::experimental::launch_properties launch_props{ + compat::experimental::launch_properties launch_props{ sycl::ext::oneapi::experimental::work_group_scratch_size(smem_size), }; - syclcompat::experimental::kernel_properties kernel_props{ + compat::experimental::kernel_properties kernel_props{ sycl::ext::oneapi::experimental::sub_group_size< FMHA::DispatchPolicy::SubgroupSize>}; - syclcompat::experimental::launch_policy policy{sycl_grid, sycl_block, - launch_props, kernel_props}; - auto event = syclcompat::experimental::launch>( + compat::experimental::launch_policy policy{sycl_grid, sycl_block, + launch_props, kernel_props}; + auto event = compat::experimental::launch>( policy, params); #endif @@ -102,8 +102,9 @@ static auto attention_run(const at::Tensor &Q, const at::Tensor &K, using CollectiveEpilogue = cutlass::flash_attention::collective::FlashPrefillEpilogue< EpilogueDispatchPolicy, MMAOperation, TileShapeOutput, SubgroupLayout, - ElementAccumulator, cutlass::gemm::TagToStrideC_t, - ElementOutput, GmemTiledCopyStore>; + ElementAccumulator, ElementOutput, + cutlass::gemm::TagToStrideC_t, ElementOutput, + GmemTiledCopyStore>; /// FA /// @@ -181,7 +182,7 @@ static auto attention_run(const at::Tensor &Q, const at::Tensor &K, FMHAPrefillKernel::to_underlying_arguments(arguments, workspace_ptr); run(params); - syclcompat::wait(); + compat::wait(); } catch (std::exception &e) { std::cerr << "Runtime error: " << e.what() << std::endl; diff --git a/benchmarks/cutlass_kernel/cutlass-library.conf b/benchmarks/cutlass_kernel/cutlass-library.conf index f3ea9d4f5b..4c597bbd30 100644 --- a/benchmarks/cutlass_kernel/cutlass-library.conf +++ b/benchmarks/cutlass_kernel/cutlass-library.conf @@ -1 +1 @@ -dd43242ea2f3e08e73a73153f00a5dbe5a31c41c +b0cb10e655d8f9b1d0474e9538a82d218f74c694 diff --git a/benchmarks/cutlass_kernel/gemm/gemm.hpp b/benchmarks/cutlass_kernel/gemm/gemm.hpp index b4ccf29649..ced638b299 100644 --- a/benchmarks/cutlass_kernel/gemm/gemm.hpp +++ b/benchmarks/cutlass_kernel/gemm/gemm.hpp @@ -86,7 +86,7 @@ static auto gemm_run(const at::Tensor &A, const at::Tensor &B, at::Tensor &C, CUTLASS_CHECK(gemm_op.initialize(arguments, workspace.get())); CUTLASS_CHECK(gemm_op.run()); - syclcompat::wait(); + compat::wait(); } catch (std::exception &e) { std::cerr << "Runtime error: " << e.what() << std::endl;