Skip to content

Commit 01e60f7

Browse files
authored
[SYCL] Optimize operator[] method of host accessor (#6853)
Compiler cannot optimize out calls to getPtr(), getOffset() and getMemoryRange(), that were used in accessor::operator[], because they may have side effects(e.g. printf). The patch adds a member in the accessor which points to the actual data stored in the accessor_impl. This pointer is initialized once during construction and used instead of functions mentioned above.
1 parent 5998d7c commit 01e60f7

File tree

6 files changed

+117
-24
lines changed

6 files changed

+117
-24
lines changed

sycl/include/sycl/accessor.hpp

+75-14
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,22 @@ template <typename DataT, int Dimensions = 1,
227227
class accessor;
228228

229229
namespace detail {
230+
231+
// A helper structure which is shared between buffer accessor and accessor_impl
232+
// TODO: Unify with AccessorImplDevice?
233+
struct AccHostDataT {
234+
AccHostDataT(const sycl::id<3> &Offset, const sycl::range<3> &Range,
235+
const sycl::range<3> &MemoryRange, void *Data = nullptr)
236+
: MOffset(Offset), MAccessRange(Range), MMemoryRange(MemoryRange),
237+
MData(Data) {}
238+
239+
sycl::id<3> MOffset;
240+
sycl::range<3> MAccessRange;
241+
sycl::range<3> MMemoryRange;
242+
void *MData = nullptr;
243+
void *Reserved = nullptr;
244+
};
245+
230246
// To ensure loop unrolling is done when processing dimensions.
231247
template <size_t... Inds, class F>
232248
void dim_loop_impl(std::integer_sequence<size_t, Inds...>, F &&f) {
@@ -474,6 +490,8 @@ class __SYCL_EXPORT AccessorBaseHost {
474490
const range<3> &getMemoryRange() const;
475491
void *getPtr() const;
476492

493+
detail::AccHostDataT &getAccData();
494+
477495
const property_list &getPropList() const;
478496

479497
void *getMemoryObject() const;
@@ -1106,21 +1124,42 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
11061124
detail::InitializedVal<AdjustedDim, range>::template get<0>()) {}
11071125

11081126
#else
1109-
id<3> &getOffset() { return AccessorBaseHost::getOffset(); }
1127+
id<3> &getOffset() {
1128+
if constexpr (IsHostBuf)
1129+
return MAccData->MOffset;
1130+
else
1131+
return AccessorBaseHost::getOffset();
1132+
}
1133+
11101134
range<3> &getAccessRange() { return AccessorBaseHost::getAccessRange(); }
1111-
range<3> &getMemoryRange() { return AccessorBaseHost::getMemoryRange(); }
1135+
range<3> &getMemoryRange() {
1136+
if constexpr (IsHostBuf)
1137+
return MAccData->MMemoryRange;
1138+
else
1139+
return AccessorBaseHost::getMemoryRange();
1140+
}
11121141
void *getPtr() { return AccessorBaseHost::getPtr(); }
11131142

1114-
const id<3> &getOffset() const { return AccessorBaseHost::getOffset(); }
1143+
const id<3> &getOffset() const {
1144+
if constexpr (IsHostBuf)
1145+
return MAccData->MOffset;
1146+
else
1147+
return AccessorBaseHost::getOffset();
1148+
}
11151149
const range<3> &getAccessRange() const {
11161150
return AccessorBaseHost::getAccessRange();
11171151
}
11181152
const range<3> &getMemoryRange() const {
1119-
return AccessorBaseHost::getMemoryRange();
1153+
if constexpr (IsHostBuf)
1154+
return MAccData->MMemoryRange;
1155+
else
1156+
return AccessorBaseHost::getMemoryRange();
11201157
}
11211158

11221159
void *getPtr() const { return AccessorBaseHost::getPtr(); }
11231160

1161+
void initHostAcc() { MAccData = &getAccData(); }
1162+
11241163
// The function references helper methods required by GDB pretty-printers
11251164
void GDBMethodsAnchor() {
11261165
#ifndef NDEBUG
@@ -1131,11 +1170,17 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
11311170
#endif
11321171
}
11331172

1173+
detail::AccHostDataT *MAccData = nullptr;
1174+
11341175
char padding[sizeof(detail::AccessorImplDevice<AdjustedDim>) +
1135-
sizeof(PtrType) - sizeof(detail::AccessorBaseHost)];
1176+
sizeof(PtrType) - sizeof(detail::AccessorBaseHost) -
1177+
sizeof(MAccData)];
11361178

11371179
PtrType getQualifiedPtr() const {
1138-
return reinterpret_cast<PtrType>(AccessorBaseHost::getPtr());
1180+
if constexpr (IsHostBuf)
1181+
return reinterpret_cast<PtrType>(MAccData->MData);
1182+
else
1183+
return reinterpret_cast<PtrType>(AccessorBaseHost::getPtr());
11391184
}
11401185

11411186
#endif // __SYCL_DEVICE_ONLY__
@@ -1197,9 +1242,11 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
11971242
preScreenAccessor(BufferRef.size(), PropertyList);
11981243
if (!IsPlaceH)
11991244
addHostAccessorAndWait(AccessorBaseHost::impl.get());
1245+
initHostAcc();
12001246
detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(),
12011247
detail::AccessorBaseHost::impl.get(),
12021248
AccessTarget, AccessMode, CodeLoc);
1249+
GDBMethodsAnchor();
12031250
#endif
12041251
}
12051252

@@ -1228,9 +1275,11 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
12281275
preScreenAccessor(BufferRef.size(), PropertyList);
12291276
if (!IsPlaceH)
12301277
addHostAccessorAndWait(AccessorBaseHost::impl.get());
1278+
initHostAcc();
12311279
detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(),
12321280
detail::AccessorBaseHost::impl.get(),
12331281
AccessTarget, AccessMode, CodeLoc);
1282+
GDBMethodsAnchor();
12341283
#endif
12351284
}
12361285

@@ -1257,9 +1306,11 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
12571306
BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) {
12581307
preScreenAccessor(BufferRef.size(), PropertyList);
12591308
detail::associateWithHandler(CommandGroupHandler, this, AccessTarget);
1309+
initHostAcc();
12601310
detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(),
12611311
detail::AccessorBaseHost::impl.get(),
12621312
AccessTarget, AccessMode, CodeLoc);
1313+
GDBMethodsAnchor();
12631314
}
12641315
#endif
12651316

@@ -1288,9 +1339,11 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
12881339
BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) {
12891340
preScreenAccessor(BufferRef.size(), PropertyList);
12901341
detail::associateWithHandler(CommandGroupHandler, this, AccessTarget);
1342+
initHostAcc();
12911343
detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(),
12921344
detail::AccessorBaseHost::impl.get(),
12931345
AccessTarget, AccessMode, CodeLoc);
1346+
GDBMethodsAnchor();
12941347
}
12951348
#endif
12961349

@@ -1316,13 +1369,14 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
13161369
getAdjustedMode(PropertyList),
13171370
detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT),
13181371
BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) {
1319-
GDBMethodsAnchor();
13201372
preScreenAccessor(BufferRef.size(), PropertyList);
13211373
if (!IsPlaceH)
13221374
addHostAccessorAndWait(AccessorBaseHost::impl.get());
1375+
initHostAcc();
13231376
detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(),
13241377
detail::AccessorBaseHost::impl.get(),
13251378
AccessTarget, AccessMode, CodeLoc);
1379+
GDBMethodsAnchor();
13261380
}
13271381
#endif
13281382

@@ -1350,13 +1404,14 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
13501404
getAdjustedMode(PropertyList),
13511405
detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT),
13521406
BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) {
1353-
GDBMethodsAnchor();
13541407
preScreenAccessor(BufferRef.size(), PropertyList);
13551408
if (!IsPlaceH)
13561409
addHostAccessorAndWait(AccessorBaseHost::impl.get());
1410+
initHostAcc();
13571411
detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(),
13581412
detail::AccessorBaseHost::impl.get(),
13591413
AccessTarget, AccessMode, CodeLoc);
1414+
GDBMethodsAnchor();
13601415
}
13611416
#endif
13621417

@@ -1414,12 +1469,13 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
14141469
getAdjustedMode(PropertyList),
14151470
detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT),
14161471
BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) {
1417-
GDBMethodsAnchor();
14181472
preScreenAccessor(BufferRef.size(), PropertyList);
14191473
detail::associateWithHandler(CommandGroupHandler, this, AccessTarget);
1474+
initHostAcc();
14201475
detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(),
14211476
detail::AccessorBaseHost::impl.get(),
14221477
AccessTarget, AccessMode, CodeLoc);
1478+
GDBMethodsAnchor();
14231479
}
14241480
#endif
14251481

@@ -1447,12 +1503,13 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
14471503
getAdjustedMode(PropertyList),
14481504
detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT),
14491505
BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) {
1450-
GDBMethodsAnchor();
14511506
preScreenAccessor(BufferRef.size(), PropertyList);
1507+
initHostAcc();
14521508
detail::associateWithHandler(CommandGroupHandler, this, AccessTarget);
14531509
detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(),
14541510
detail::AccessorBaseHost::impl.get(),
14551511
AccessTarget, AccessMode, CodeLoc);
1512+
GDBMethodsAnchor();
14561513
}
14571514
#endif
14581515

@@ -1634,7 +1691,6 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
16341691
detail::getSyclObjImpl(BufferRef).get(), Dimensions,
16351692
sizeof(DataT), BufferRef.OffsetInBytes,
16361693
BufferRef.IsSubBuffer, PropertyList) {
1637-
GDBMethodsAnchor();
16381694
preScreenAccessor(BufferRef.size(), PropertyList);
16391695
if (BufferRef.isOutOfBounds(AccessOffset, AccessRange,
16401696
BufferRef.get_range()))
@@ -1645,9 +1701,11 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
16451701

16461702
if (!IsPlaceH)
16471703
addHostAccessorAndWait(AccessorBaseHost::impl.get());
1704+
initHostAcc();
16481705
detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(),
16491706
detail::AccessorBaseHost::impl.get(),
16501707
AccessTarget, AccessMode, CodeLoc);
1708+
GDBMethodsAnchor();
16511709
}
16521710
#endif
16531711

@@ -1676,7 +1734,6 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
16761734
detail::getSyclObjImpl(BufferRef).get(), Dimensions,
16771735
sizeof(DataT), BufferRef.OffsetInBytes,
16781736
BufferRef.IsSubBuffer, PropertyList) {
1679-
GDBMethodsAnchor();
16801737
preScreenAccessor(BufferRef.size(), PropertyList);
16811738
if (BufferRef.isOutOfBounds(AccessOffset, AccessRange,
16821739
BufferRef.get_range()))
@@ -1687,9 +1744,11 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
16871744

16881745
if (!IsPlaceH)
16891746
addHostAccessorAndWait(AccessorBaseHost::impl.get());
1747+
initHostAcc();
16901748
detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(),
16911749
detail::AccessorBaseHost::impl.get(),
16921750
AccessTarget, AccessMode, CodeLoc);
1751+
GDBMethodsAnchor();
16931752
}
16941753
#endif
16951754

@@ -1749,7 +1808,6 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
17491808
detail::getSyclObjImpl(BufferRef).get(), Dimensions,
17501809
sizeof(DataT), BufferRef.OffsetInBytes,
17511810
BufferRef.IsSubBuffer, PropertyList) {
1752-
GDBMethodsAnchor();
17531811
preScreenAccessor(BufferRef.size(), PropertyList);
17541812
if (BufferRef.isOutOfBounds(AccessOffset, AccessRange,
17551813
BufferRef.get_range()))
@@ -1758,10 +1816,12 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
17581816
"the buffer",
17591817
PI_ERROR_INVALID_VALUE);
17601818

1819+
initHostAcc();
17611820
detail::associateWithHandler(CommandGroupHandler, this, AccessTarget);
17621821
detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(),
17631822
detail::AccessorBaseHost::impl.get(),
17641823
AccessTarget, AccessMode, CodeLoc);
1824+
GDBMethodsAnchor();
17651825
}
17661826
#endif
17671827

@@ -1790,7 +1850,6 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
17901850
detail::getSyclObjImpl(BufferRef).get(), Dimensions,
17911851
sizeof(DataT), BufferRef.OffsetInBytes,
17921852
BufferRef.IsSubBuffer, PropertyList) {
1793-
GDBMethodsAnchor();
17941853
preScreenAccessor(BufferRef.size(), PropertyList);
17951854
if (BufferRef.isOutOfBounds(AccessOffset, AccessRange,
17961855
BufferRef.get_range()))
@@ -1799,10 +1858,12 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
17991858
"the buffer",
18001859
PI_ERROR_INVALID_VALUE);
18011860

1861+
initHostAcc();
18021862
detail::associateWithHandler(CommandGroupHandler, this, AccessTarget);
18031863
detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(),
18041864
detail::AccessorBaseHost::impl.get(),
18051865
AccessTarget, AccessMode, CodeLoc);
1866+
GDBMethodsAnchor();
18061867
}
18071868
#endif
18081869

sycl/source/accessor.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ range<3> &AccessorBaseHost::getAccessRange() { return impl->MAccessRange; }
3333
range<3> &AccessorBaseHost::getMemoryRange() { return impl->MMemoryRange; }
3434
void *AccessorBaseHost::getPtr() { return impl->MData; }
3535

36+
detail::AccHostDataT &AccessorBaseHost::getAccData() { return impl->MAccData; }
37+
3638
const property_list &AccessorBaseHost::getPropList() const {
3739
return impl->MPropertyList;
3840
}

sycl/source/detail/accessor_impl.hpp

+9-8
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#pragma once
1010

1111
#include <sycl/access/access.hpp>
12+
#include <sycl/accessor.hpp>
1213
#include <sycl/detail/export.hpp>
1314
#include <sycl/id.hpp>
1415
#include <sycl/property_list.hpp>
@@ -45,17 +46,15 @@ class __SYCL_EXPORT AccessorImplHost {
4546
int ElemSize, int OffsetInBytes = 0,
4647
bool IsSubBuffer = false,
4748
const property_list &PropertyList = {})
48-
: MOffset(Offset), MAccessRange(AccessRange), MMemoryRange(MemoryRange),
49-
MAccessMode(AccessMode),
49+
: MAccData(Offset, AccessRange, MemoryRange), MAccessMode(AccessMode),
5050
MSYCLMemObj((detail::SYCLMemObjI *)SYCLMemObject), MDims(Dims),
5151
MElemSize(ElemSize), MOffsetInBytes(OffsetInBytes),
5252
MIsSubBuffer(IsSubBuffer), MPropertyList(PropertyList) {}
5353

5454
~AccessorImplHost();
5555

5656
AccessorImplHost(const AccessorImplHost &Other)
57-
: MOffset(Other.MOffset), MAccessRange(Other.MAccessRange),
58-
MMemoryRange(Other.MMemoryRange), MAccessMode(Other.MAccessMode),
57+
: MAccData(Other.MAccData), MAccessMode(Other.MAccessMode),
5958
MSYCLMemObj(Other.MSYCLMemObj), MDims(Other.MDims),
6059
MElemSize(Other.MElemSize), MOffsetInBytes(Other.MOffsetInBytes),
6160
MIsSubBuffer(Other.MIsSubBuffer), MPropertyList(Other.MPropertyList) {}
@@ -71,11 +70,13 @@ class __SYCL_EXPORT AccessorImplHost {
7170

7271
void resize(size_t GlobalSize);
7372

74-
id<3> MOffset;
73+
detail::AccHostDataT MAccData;
74+
75+
id<3> &MOffset = MAccData.MOffset;
7576
// The size of accessing region.
76-
range<3> MAccessRange;
77+
range<3> &MAccessRange = MAccData.MAccessRange;
7778
// The size of memory object this requirement is created for.
78-
range<3> MMemoryRange;
79+
range<3> &MMemoryRange = MAccData.MMemoryRange;
7980
access::mode MAccessMode;
8081

8182
detail::SYCLMemObjI *MSYCLMemObj;
@@ -85,7 +86,7 @@ class __SYCL_EXPORT AccessorImplHost {
8586
unsigned int MOffsetInBytes;
8687
bool MIsSubBuffer;
8788

88-
void *MData = nullptr;
89+
void *&MData = MAccData.MData;
8990

9091
Command *MBlockedCmd = nullptr;
9192

sycl/test/abi/layout_accessors_host.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ void hostAcc(accessor<int, 1, access::mode::read, access::target::host_buffer> A
2525
// CHECK-NEXT: 8 | class std::__shared_count<__gnu_cxx::_S_atomic> _M_refcount
2626
// CHECK-NEXT: 8 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi
2727
// CHECK-NEXT: 0 | class sycl::detail::accessor_common<int, 1, sycl::access::mode::read, sycl::access::target::host_buffer, sycl::access::placeholder::false_t> (base) (empty)
28-
// CHECK-NEXT: 16 | char[16] padding
28+
// CHECK-NEXT: 16 | detail::AccHostDataT * MAccData
29+
// CHECK-NEXT: 24 | char[8] padding
2930
// CHECK-NEXT: [sizeof=32, dsize=32, align=8,
3031
// CHECK-NEXT: nvsize=32, nvalign=8]
3132

@@ -46,7 +47,8 @@ void hostAcc(accessor<int, 1, access::mode::read, access::target::global_buffer>
4647
// CHECK-NEXT: 8 | class std::__shared_count<__gnu_cxx::_S_atomic> _M_refcount
4748
// CHECK-NEXT: 8 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi
4849
// CHECK-NEXT: 0 | class sycl::detail::accessor_common<int, 1, sycl::access::mode::read, sycl::access::target::global_buffer, sycl::access::placeholder::false_t> (base) (empty)
49-
// CHECK-NEXT: 16 | char[16] padding
50+
// CHECK-NEXT: 16 | detail::AccHostDataT * MAccData
51+
// CHECK-NEXT: 24 | char[8] padding
5052
// CHECK-NEXT: [sizeof=32, dsize=32, align=8,
5153
// CHECK-NEXT: nvsize=32, nvalign=8]
5254

sycl/test/abi/sycl_symbols_linux.dump

+1
Original file line numberDiff line numberDiff line change
@@ -3812,6 +3812,7 @@ _ZN4sycl3_V16detail13select_deviceERKSt8functionIFiRKNS0_6deviceEEE
38123812
_ZN4sycl3_V16detail13select_deviceERKSt8functionIFiRKNS0_6deviceEEERKNS0_7contextE
38133813
_ZN4sycl3_V16detail14getBorderColorENS0_19image_channel_orderE
38143814
_ZN4sycl3_V16detail15getOrWaitEventsESt6vectorINS0_5eventESaIS3_EESt10shared_ptrINS1_12context_implEE
3815+
_ZN4sycl3_V16detail16AccessorBaseHost10getAccDataEv
38153816
_ZN4sycl3_V16detail16AccessorBaseHost14getAccessRangeEv
38163817
_ZN4sycl3_V16detail16AccessorBaseHost14getMemoryRangeEv
38173818
_ZN4sycl3_V16detail16AccessorBaseHost6getPtrEv

0 commit comments

Comments
 (0)