Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
607239b
adding tests and removing default ctor req
danhoeflinger Jun 13, 2025
f733678
reduce improvements
danhoeflinger Jun 24, 2025
b151166
fix for kernel name clash
danhoeflinger Jun 24, 2025
d693ba4
fixes for omp
danhoeflinger Jun 24, 2025
bca2f61
relocate utils, protect type checks for only std::plus
danhoeflinger Jun 24, 2025
b57a620
add transform reduce testing
danhoeflinger Jun 24, 2025
7bed152
testing transform_reduce
danhoeflinger Jun 25, 2025
5fa2663
revert non reduce changes
danhoeflinger Jun 25, 2025
f411980
revert accidental change
danhoeflinger Jun 25, 2025
72a92cc
formatting
danhoeflinger Jun 25, 2025
8b593a9
remove part of misleading static assert
danhoeflinger Jun 25, 2025
c0849fd
fix serial backend
danhoeflinger Jun 25, 2025
97454e4
removing unnecessary and harmful move of temp obj
danhoeflinger Jun 25, 2025
c0c53bc
moving instead of forwarding the simple cases
danhoeflinger Jul 1, 2025
ecf1f88
skipping vector scan for move only types
danhoeflinger Jul 1, 2025
c95bf7e
switching delayed construction storage to use __lazy_ctor_storage
danhoeflinger Jul 1, 2025
4be3708
skipping apex when it is identity
danhoeflinger Jul 1, 2025
2e1d778
partial fix for downsweep, unfinished
danhoeflinger Jul 1, 2025
acaa877
Additional move handling of init in glue
danhoeflinger Jul 1, 2025
2c49188
adjust bricks to support move-only types
danhoeflinger Jul 1, 2025
d8e4407
more move only type changes, reverting some irrelavant ones
danhoeflinger Jul 1, 2025
0effc0a
fix serial scan impl
danhoeflinger Jul 1, 2025
2eaa338
additional fixes for move only types
danhoeflinger Jul 1, 2025
1ee0fc0
simd scan requires default constructible, so use non vector when this…
danhoeflinger Jul 1, 2025
e983a68
Improve constructors for type wrappers
danhoeflinger Jul 1, 2025
4b65d6d
add tests for scan to check move only types and non default construct…
danhoeflinger Jul 1, 2025
29844e3
fix bug in return type
danhoeflinger Jul 2, 2025
753ce80
add tests to transform_scan.pass
danhoeflinger Jul 2, 2025
f15ac25
clang formatting
danhoeflinger Jul 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 29 additions & 22 deletions include/oneapi/dpl/pstl/glue_numeric_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,16 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp>
reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init,
_BinaryOperation __binary_op)
{
return transform_reduce(std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, __binary_op,
return transform_reduce(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::move(__init), __binary_op,
oneapi::dpl::identity{});
}

template <class _ExecutionPolicy, class _ForwardIterator, class _Tp>
oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp>
reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init)
{
return transform_reduce(std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, std::plus<_Tp>(),
oneapi::dpl::identity{});
return transform_reduce(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::move(__init),
std::plus<_Tp>(), oneapi::dpl::identity{});
}

template <class _ExecutionPolicy, class _ForwardIterator>
Expand All @@ -74,7 +74,7 @@ transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forward
const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2);

return oneapi::dpl::__internal::__pattern_transform_reduce(
__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __init,
__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, std::move(__init),
::std::plus<_InputType>(), ::std::multiplies<_InputType>());
}

Expand All @@ -87,8 +87,8 @@ transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forward
const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2);

return oneapi::dpl::__internal::__pattern_transform_reduce(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec),
__first1, __last1, __first2, __init, __binary_op1,
__binary_op2);
__first1, __last1, __first2, std::move(__init),
__binary_op1, __binary_op2);
}

template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation>
Expand All @@ -99,7 +99,8 @@ transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIt
const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first);

return oneapi::dpl::__internal::__pattern_transform_reduce(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec),
__first, __last, __init, __binary_op, __unary_op);
__first, __last, std::move(__init), __binary_op,
__unary_op);
}

// [exclusive.scan]
Expand All @@ -109,8 +110,8 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward
exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
_ForwardIterator2 __result, _Tp __init)
{
return transform_exclusive_scan(std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __init,
std::plus<_Tp>(), oneapi::dpl::identity{});
return transform_exclusive_scan(std::forward<_ExecutionPolicy>(__exec), __first, __last, __result,
std::move(__init), std::plus<_Tp>(), oneapi::dpl::identity{});
}

#if !_ONEDPL_EXCLUSIVE_SCAN_WITH_BINARY_OP_AMBIGUITY
Expand All @@ -119,40 +120,44 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward
exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
_ForwardIterator2 __result, _Tp __init, _BinaryOperation __binary_op)
{
return transform_exclusive_scan(std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __init,
__binary_op, oneapi::dpl::identity{});
return transform_exclusive_scan(std::forward<_ExecutionPolicy>(__exec), __first, __last, __result,
std::move(__init), __binary_op, oneapi::dpl::identity{});
}
#else
template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation>
_ForwardIterator2
exclusive_scan(oneapi::dpl::execution::sequenced_policy __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
_ForwardIterator2 __result, _Tp __init, _BinaryOperation __binary_op)
{
return transform_exclusive_scan(__exec, __first, __last, __result, __init, __binary_op, oneapi::dpl::identity{});
return transform_exclusive_scan(__exec, __first, __last, __result, std::move(__init), __binary_op,
oneapi::dpl::identity{});
}

template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation>
_ForwardIterator2
exclusive_scan(oneapi::dpl::execution::unsequenced_policy __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
_ForwardIterator2 __result, _Tp __init, _BinaryOperation __binary_op)
{
return transform_exclusive_scan(__exec, __first, __last, __result, __init, __binary_op, oneapi::dpl::identity{});
return transform_exclusive_scan(__exec, __first, __last, __result, std::move(__init), __binary_op,
oneapi::dpl::identity{});
}

template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation>
_ForwardIterator2
exclusive_scan(oneapi::dpl::execution::parallel_policy __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
_ForwardIterator2 __result, _Tp __init, _BinaryOperation __binary_op)
{
return transform_exclusive_scan(__exec, __first, __last, __result, __init, __binary_op, oneapi::dpl::identity{});
return transform_exclusive_scan(__exec, __first, __last, __result, std::move(__init), __binary_op,
oneapi::dpl::identity{});
}

template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation>
_ForwardIterator2
exclusive_scan(oneapi::dpl::execution::parallel_unsequenced_policy __exec, _ForwardIterator1 __first,
_ForwardIterator1 __last, _ForwardIterator2 __result, _Tp __init, _BinaryOperation __binary_op)
{
return transform_exclusive_scan(__exec, __first, __last, __result, __init, __binary_op, oneapi::dpl::identity{});
return transform_exclusive_scan(__exec, __first, __last, __result, std::move(__init), __binary_op,
oneapi::dpl::identity{});
}

# if _ONEDPL_BACKEND_SYCL
Expand All @@ -161,7 +166,8 @@ _ForwardIterator2
exclusive_scan(const oneapi::dpl::execution::device_policy<PolicyParams...>& __exec, _ForwardIterator1 __first,
_ForwardIterator1 __last, _ForwardIterator2 __result, _Tp __init, _BinaryOperation __binary_op)
{
return transform_exclusive_scan(__exec, __first, __last, __result, __init, __binary_op, oneapi::dpl::identity{});
return transform_exclusive_scan(__exec, __first, __last, __result, std::move(__init), __binary_op,
oneapi::dpl::identity{});
}

# if _ONEDPL_FPGA_DEVICE
Expand All @@ -171,7 +177,8 @@ _ForwardIterator2
exclusive_scan(const oneapi::dpl::execution::fpga_policy<factor, KernelName>& __exec, _ForwardIterator1 __first,
_ForwardIterator1 __last, _ForwardIterator2 __result, _Tp __init, _BinaryOperation __binary_op)
{
return transform_exclusive_scan(__exec, __first, __last, __result, __init, __binary_op, oneapi::dpl::identity{});
return transform_exclusive_scan(__exec, __first, __last, __result, std::move(__init), __binary_op,
oneapi::dpl::identity{});
}
# endif // _ONEDPL_FPGA_DEVICE
# endif // _ONEDPL_BACKEND_SYCL
Expand Down Expand Up @@ -205,7 +212,7 @@ inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIte
_ForwardIterator2 __result, _BinaryOperation __binary_op, _Tp __init)
{
return transform_inclusive_scan(std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __binary_op,
oneapi::dpl::identity{}, __init);
oneapi::dpl::identity{}, std::move(__init));
}

// [transform.exclusive.scan]
Expand All @@ -220,8 +227,8 @@ transform_exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _
const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result);

return oneapi::dpl::__internal::__pattern_transform_scan(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec),
__first, __last, __result, __unary_op, __init, __binary_op,
/*inclusive=*/::std::false_type());
__first, __last, __result, __unary_op, std::move(__init),
__binary_op, /*inclusive=*/::std::false_type());
}

// [transform.inclusive.scan]
Expand All @@ -236,8 +243,8 @@ transform_inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _
const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result);

return oneapi::dpl::__internal::__pattern_transform_scan(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec),
__first, __last, __result, __unary_op, __init, __binary_op,
/*inclusive=*/::std::true_type());
__first, __last, __result, __unary_op, std::move(__init),
__binary_op, /*inclusive=*/::std::true_type());
}

template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _UnaryOperation,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -667,9 +667,15 @@ struct __result_and_scratch_storage : __result_and_scratch_storage_base
}
else if (__supports_USM_device)
{
_T __tmp;
__q.memcpy(&__tmp, __scratch_buf.get() + __scratch_n + _Idx, 1 * sizeof(_T)).wait();
return __tmp;
static_assert(sycl::is_device_copyable_v<_T>,
"The type _T must be device copyable to use __result_and_scratch_storage.");
// Avoid default constructor for _T, we know that _T is device copyable and therefore a copy construction
// is equivalent to a bitwise copy
_T* __tmp = static_cast<_T*>(malloc(sizeof(_T)));
__q.memcpy(__tmp, __scratch_buf.get() + __scratch_n + _Idx, 1 * sizeof(_T)).wait();
_T __return_tmp = std::move(*__tmp);
free(__tmp);
return __return_tmp;
}
else
{
Expand Down
Loading