diff --git a/benchmarks/cutlass_kernel/attention/attention.hpp b/benchmarks/cutlass_kernel/attention/attention.hpp
index 5b40bb2831..7b779c50e4 100644
--- a/benchmarks/cutlass_kernel/attention/attention.hpp
+++ b/benchmarks/cutlass_kernel/attention/attention.hpp
@@ -20,11 +20,11 @@ template <typename FMHA> static auto run(typename FMHA::Params params) -> void {
 
   int smem_size = FMHA::SharedStorageSize;
 
-  const auto sycl_block = syclcompat::dim3(block.x, block.y, block.z);
-  const auto sycl_grid = syclcompat::dim3(grid.x, grid.y, grid.z);
+  const auto sycl_block = compat::dim3(block.x, block.y, block.z);
+  const auto sycl_grid = compat::dim3(grid.x, grid.y, grid.z);
 
 #if !defined(SYCL_EXT_ONEAPI_WORK_GROUP_SCRATCH_MEMORY)
-  using namespace syclcompat::experimental;
+  using namespace compat::experimental;
   auto event = launch<cutlass::device_kernel<FMHA>>(
       launch_policy{
           sycl_grid, sycl_block,
@@ -33,15 +33,15 @@ template <typename FMHA> static auto run(typename FMHA::Params params) -> void {
               sycl_exp::sub_group_size<FMHA::DispatchPolicy::SubgroupSize>}},
       params);
 #else
-  syclcompat::experimental::launch_properties launch_props{
+  compat::experimental::launch_properties launch_props{
       sycl::ext::oneapi::experimental::work_group_scratch_size(smem_size),
   };
-  syclcompat::experimental::kernel_properties kernel_props{
+  compat::experimental::kernel_properties kernel_props{
       sycl::ext::oneapi::experimental::sub_group_size<
           FMHA::DispatchPolicy::SubgroupSize>};
-  syclcompat::experimental::launch_policy policy{sycl_grid, sycl_block,
-                                                 launch_props, kernel_props};
-  auto event = syclcompat::experimental::launch<cutlass::device_kernel<FMHA>>(
+  compat::experimental::launch_policy policy{sycl_grid, sycl_block,
+                                             launch_props, kernel_props};
+  auto event = compat::experimental::launch<cutlass::device_kernel<FMHA>>(
       policy, params);
 #endif
 
@@ -102,8 +102,9 @@ static auto attention_run(const at::Tensor &Q, const at::Tensor &K,
   using CollectiveEpilogue =
       cutlass::flash_attention::collective::FlashPrefillEpilogue<
           EpilogueDispatchPolicy, MMAOperation, TileShapeOutput, SubgroupLayout,
-          ElementAccumulator, cutlass::gemm::TagToStrideC_t<LayoutO>,
-          ElementOutput, GmemTiledCopyStore>;
+          ElementAccumulator, ElementOutput,
+          cutlass::gemm::TagToStrideC_t<LayoutO>, ElementOutput,
+          GmemTiledCopyStore>;
 
   /// FA ///
 
@@ -181,7 +182,7 @@ static auto attention_run(const at::Tensor &Q, const at::Tensor &K,
         FMHAPrefillKernel::to_underlying_arguments(arguments, workspace_ptr);
     run<FMHAPrefillKernel>(params);
 
-    syclcompat::wait();
+    compat::wait();
 
   } catch (std::exception &e) {
     std::cerr << "Runtime error: " << e.what() << std::endl;
diff --git a/benchmarks/cutlass_kernel/cutlass-library.conf b/benchmarks/cutlass_kernel/cutlass-library.conf
index f3ea9d4f5b..4c597bbd30 100644
--- a/benchmarks/cutlass_kernel/cutlass-library.conf
+++ b/benchmarks/cutlass_kernel/cutlass-library.conf
@@ -1 +1 @@
-dd43242ea2f3e08e73a73153f00a5dbe5a31c41c
+b0cb10e655d8f9b1d0474e9538a82d218f74c694
diff --git a/benchmarks/cutlass_kernel/gemm/gemm.hpp b/benchmarks/cutlass_kernel/gemm/gemm.hpp
index b4ccf29649..ced638b299 100644
--- a/benchmarks/cutlass_kernel/gemm/gemm.hpp
+++ b/benchmarks/cutlass_kernel/gemm/gemm.hpp
@@ -86,7 +86,7 @@ static auto gemm_run(const at::Tensor &A, const at::Tensor &B, at::Tensor &C,
     CUTLASS_CHECK(gemm_op.initialize(arguments, workspace.get()));
     CUTLASS_CHECK(gemm_op.run());
 
-    syclcompat::wait();
+    compat::wait();
 
   } catch (std::exception &e) {
     std::cerr << "Runtime error: " << e.what() << std::endl;