[BENCHMARKS] Update Cutlass to v0.5

anmyachev · anmyachev · commit 7c741099d2bc · 2025-11-18T21:33:14.000+01:00
Signed-off-by: Anatoly Myachev &lt;anatoly.myachev@intel.com&gt;
diff --git a/benchmarks/cutlass_kernel/attention/attention.hpp b/benchmarks/cutlass_kernel/attention/attention.hpp
@@ -20,11 +20,11 @@ template <typename FMHA> static auto run(typename FMHA::Params params) -> void {
 
   int smem_size = FMHA::SharedStorageSize;
 
-  const auto sycl_block = syclcompat::dim3(block.x, block.y, block.z);
-  const auto sycl_grid = syclcompat::dim3(grid.x, grid.y, grid.z);
+  const auto sycl_block = compat::dim3(block.x, block.y, block.z);
+  const auto sycl_grid = compat::dim3(grid.x, grid.y, grid.z);
 
 #if !defined(SYCL_EXT_ONEAPI_WORK_GROUP_SCRATCH_MEMORY)
-  using namespace syclcompat::experimental;
+  using namespace compat::experimental;
   auto event = launch<cutlass::device_kernel<FMHA>>(
       launch_policy{
           sycl_grid, sycl_block,
@@ -33,15 +33,15 @@ template <typename FMHA> static auto run(typename FMHA::Params params) -> void {
               sycl_exp::sub_group_size<FMHA::DispatchPolicy::SubgroupSize>}},
       params);
 #else
-  syclcompat::experimental::launch_properties launch_props{
+  compat::experimental::launch_properties launch_props{
       sycl::ext::oneapi::experimental::work_group_scratch_size(smem_size),
   };
-  syclcompat::experimental::kernel_properties kernel_props{
+  compat::experimental::kernel_properties kernel_props{
       sycl::ext::oneapi::experimental::sub_group_size<
           FMHA::DispatchPolicy::SubgroupSize>};
-  syclcompat::experimental::launch_policy policy{sycl_grid, sycl_block,
+  compat::experimental::launch_policy policy{sycl_grid, sycl_block,
                                                  launch_props, kernel_props};
-  auto event = syclcompat::experimental::launch<cutlass::device_kernel<FMHA>>(
+  auto event = compat::experimental::launch<cutlass::device_kernel<FMHA>>(
       policy, params);
 #endif
 
@@ -102,7 +102,7 @@ static auto attention_run(const at::Tensor &Q, const at::Tensor &K,
   using CollectiveEpilogue =
       cutlass::flash_attention::collective::FlashPrefillEpilogue<
           EpilogueDispatchPolicy, MMAOperation, TileShapeOutput, SubgroupLayout,
-          ElementAccumulator, cutlass::gemm::TagToStrideC_t<LayoutO>,
+          ElementAccumulator, ElementOutput, cutlass::gemm::TagToStrideC_t<LayoutO>,
           ElementOutput, GmemTiledCopyStore>;
 
   /// FA ///
@@ -181,7 +181,7 @@ static auto attention_run(const at::Tensor &Q, const at::Tensor &K,
         FMHAPrefillKernel::to_underlying_arguments(arguments, workspace_ptr);
     run<FMHAPrefillKernel>(params);
 
-    syclcompat::wait();
+    compat::wait();
 
   } catch (std::exception &e) {
     std::cerr << "Runtime error: " << e.what() << std::endl;
diff --git a/benchmarks/cutlass_kernel/cutlass-library.conf b/benchmarks/cutlass_kernel/cutlass-library.conf
@@ -1 +1 @@
-dd43242ea2f3e08e73a73153f00a5dbe5a31c41c
+b0cb10e655d8f9b1d0474e9538a82d218f74c694
diff --git a/benchmarks/cutlass_kernel/gemm/gemm.hpp b/benchmarks/cutlass_kernel/gemm/gemm.hpp
@@ -86,7 +86,7 @@ static auto gemm_run(const at::Tensor &A, const at::Tensor &B, at::Tensor &C,
     CUTLASS_CHECK(gemm_op.initialize(arguments, workspace.get()));
     CUTLASS_CHECK(gemm_op.run());
 
-    syclcompat::wait();
+    compat::wait();
 
   } catch (std::exception &e) {
     std::cerr << "Runtime error: " << e.what() << std::endl;

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-dd43242ea2f3e08e73a73153f00a5dbe5a31c41c`
	`1`	`+b0cb10e655d8f9b1d0474e9538a82d218f74c694`