Skip to content

Commit 168767c

Browse files
authored
[SYCL] Add support for sorting using sub-group (#7374)
Use provided work-group or sub-group instead of creating a separate object in sort algorithms.
1 parent c6d1caf commit 168767c

File tree

4 files changed

+62
-40
lines changed

4 files changed

+62
-40
lines changed

sycl/include/sycl/detail/group_sort_impl.hpp

+6-7
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010

1111
#pragma once
1212

13-
#if __cplusplus >= 201703L
1413
#include <sycl/detail/helpers.hpp>
14+
#include <sycl/group_barrier.hpp>
15+
#include <sycl/multi_ptr.hpp>
1516

1617
#ifdef __SYCL_DEVICE_ONLY__
1718

@@ -204,14 +205,13 @@ template <typename Group, typename Iter, typename Compare>
204205
void merge_sort(Group group, Iter first, const std::size_t n, Compare comp,
205206
std::byte *scratch) {
206207
using T = typename GetValueType<Iter>::type;
207-
auto id = sycl::detail::Builder::getNDItem<Group::dimensions>();
208-
const std::size_t idx = id.get_local_linear_id();
208+
const std::size_t idx = group.get_local_linear_id();
209209
const std::size_t local = group.get_local_range().size();
210210
const std::size_t chunk = (n - 1) / local + 1;
211211

212212
// we need to sort within work item first
213213
bubble_sort(first, idx * chunk, sycl::min((idx + 1) * chunk, n), comp);
214-
id.barrier();
214+
sycl::group_barrier(group);
215215

216216
T *temp = reinterpret_cast<T *>(scratch);
217217
bool data_in_temp = false;
@@ -231,7 +231,7 @@ void merge_sort(Group group, Iter first, const std::size_t n, Compare comp,
231231
merge(offset, temp, first, start_1, end_1, end_2, start_1, comp, chunk,
232232
/*is_first*/ false);
233233
}
234-
id.barrier();
234+
sycl::group_barrier(group);
235235

236236
data_in_temp = !data_in_temp;
237237
sorted_size *= 2;
@@ -246,12 +246,11 @@ void merge_sort(Group group, Iter first, const std::size_t n, Compare comp,
246246
first[idx * chunk + i] = temp[idx * chunk + i];
247247
}
248248
}
249-
id.barrier();
249+
sycl::group_barrier(group);
250250
}
251251
}
252252

253253
} // namespace detail
254254
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
255255
} // namespace sycl
256256
#endif
257-
#endif // __cplusplus >=201703L

sycl/include/sycl/ext/oneapi/experimental/group_helpers_sorters.hpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,7 @@ template <typename Compare = std::less<>> class default_sorter {
6262
#ifdef __SYCL_DEVICE_ONLY__
6363
auto range_size = g.get_local_range().size();
6464
if (scratch_size >= memory_required<T>(Group::fence_scope, range_size)) {
65-
auto id = sycl::detail::Builder::getNDItem<Group::dimensions>();
66-
std::size_t local_id = id.get_local_linear_id();
65+
std::size_t local_id = g.get_local_linear_id();
6766
T *temp = reinterpret_cast<T *>(scratch);
6867
::new (temp + local_id) T(val);
6968
sycl::detail::merge_sort(g, temp, range_size, comp,

sycl/include/sycl/group_algorithm.hpp

+1-31
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <sycl/ext/oneapi/functional.hpp>
1919
#include <sycl/functional.hpp>
2020
#include <sycl/group.hpp>
21+
#include <sycl/group_barrier.hpp>
2122
#include <sycl/known_identity.hpp>
2223
#include <sycl/nd_item.hpp>
2324
#include <sycl/sub_group.hpp>
@@ -1006,36 +1007,5 @@ joint_inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result,
10061007
return joint_inclusive_scan(g, first, last, result, binary_op, init);
10071008
}
10081009

1009-
namespace detail {
1010-
template <typename G> struct group_barrier_scope {};
1011-
template <> struct group_barrier_scope<sycl::sub_group> {
1012-
constexpr static auto Scope = __spv::Scope::Subgroup;
1013-
};
1014-
template <int D> struct group_barrier_scope<sycl::group<D>> {
1015-
constexpr static auto Scope = __spv::Scope::Workgroup;
1016-
};
1017-
} // namespace detail
1018-
1019-
template <typename Group>
1020-
typename std::enable_if<is_group_v<Group>>::type
1021-
group_barrier(Group, memory_scope FenceScope = Group::fence_scope) {
1022-
(void)FenceScope;
1023-
#ifdef __SYCL_DEVICE_ONLY__
1024-
// Per SYCL spec, group_barrier must perform both control barrier and memory
1025-
// fence operations. All work-items execute a release fence prior to
1026-
// barrier and acquire fence afterwards. The rest of semantics flags specify
1027-
// which type of memory this behavior is applied to.
1028-
__spirv_ControlBarrier(detail::group_barrier_scope<Group>::Scope,
1029-
sycl::detail::spirv::getScope(FenceScope),
1030-
__spv::MemorySemanticsMask::SequentiallyConsistent |
1031-
__spv::MemorySemanticsMask::SubgroupMemory |
1032-
__spv::MemorySemanticsMask::WorkgroupMemory |
1033-
__spv::MemorySemanticsMask::CrossWorkgroupMemory);
1034-
#else
1035-
throw sycl::runtime_error("Barriers are not supported on host device",
1036-
PI_ERROR_INVALID_DEVICE);
1037-
#endif
1038-
}
1039-
10401010
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
10411011
} // namespace sycl

sycl/include/sycl/group_barrier.hpp

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
2+
//==------------------------- group_barrier.hpp ----------------------------==//
3+
//
4+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
#pragma once
11+
12+
#include <CL/__spirv/spirv_ops.hpp>
13+
#include <CL/__spirv/spirv_types.hpp>
14+
#include <CL/__spirv/spirv_vars.hpp>
15+
#include <sycl/detail/spirv.hpp>
16+
#include <sycl/detail/type_traits.hpp>
17+
#include <sycl/group.hpp>
18+
19+
namespace sycl {
20+
__SYCL_INLINE_VER_NAMESPACE(_V1) {
21+
22+
namespace detail {
23+
template <typename G> struct group_barrier_scope {};
24+
template <> struct group_barrier_scope<sycl::sub_group> {
25+
constexpr static auto Scope = __spv::Scope::Subgroup;
26+
};
27+
template <int D> struct group_barrier_scope<sycl::group<D>> {
28+
constexpr static auto Scope = __spv::Scope::Workgroup;
29+
};
30+
} // namespace detail
31+
32+
template <typename Group>
33+
typename std::enable_if<is_group_v<Group>>::type
34+
group_barrier(Group, memory_scope FenceScope = Group::fence_scope) {
35+
(void)FenceScope;
36+
#ifdef __SYCL_DEVICE_ONLY__
37+
// Per SYCL spec, group_barrier must perform both control barrier and memory
38+
// fence operations. All work-items execute a release fence prior to
39+
// barrier and acquire fence afterwards. The rest of semantics flags specify
40+
// which type of memory this behavior is applied to.
41+
__spirv_ControlBarrier(detail::group_barrier_scope<Group>::Scope,
42+
sycl::detail::spirv::getScope(FenceScope),
43+
__spv::MemorySemanticsMask::SequentiallyConsistent |
44+
__spv::MemorySemanticsMask::SubgroupMemory |
45+
__spv::MemorySemanticsMask::WorkgroupMemory |
46+
__spv::MemorySemanticsMask::CrossWorkgroupMemory);
47+
#else
48+
throw sycl::runtime_error("Barriers are not supported on host device",
49+
PI_ERROR_INVALID_DEVICE);
50+
#endif
51+
}
52+
53+
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
54+
} // namespace sycl

0 commit comments

Comments
 (0)