Skip to content

Commit 3916d3b

Browse files
[SYCL] Redesign pointer handling for OpenCL kernel generation (#6728)
Requirement - Do not decompose types with pointers when generating OpenCL kernel arguments. This PR adds logic to stop decomposing trivial types containing pointers. For every SYCL kernel argument which is a record type containing a pointer (or has a field or a base class with a pointer), we generate a new record type with all pointers in __global address space. This compiler generated type is the openCL kernel argument. In the kernel body, we initialize the local clone via memcpy. Limitations: 1. Array of pointers or array of types with pointers are still decomposed to it's elements. 2. Due to current implementation restrictions, types which are not default constructible, continue to trigger decomposition if they contain pointers. Both limitations above will hopefully be fixed in follow-up PRs. Signed-off-by: Elizabeth Andrews <[email protected]>
1 parent 60c634c commit 3916d3b

12 files changed

+928
-154
lines changed

clang/include/clang/Basic/Attr.td

+8
Original file line numberDiff line numberDiff line change
@@ -1429,6 +1429,14 @@ def SYCLRequiresDecomposition : InheritableAttr {
14291429
let Documentation = [InternalOnly];
14301430
}
14311431

1432+
def SYCLGenerateNewType : InheritableAttr {
1433+
// No spellings, as this is for internal use.
1434+
let Spellings = [];
1435+
let Subjects = SubjectList<[Named]>;
1436+
let LangOpts = [SYCLIsDevice, SYCLIsHost];
1437+
let Documentation = [InternalOnly];
1438+
}
1439+
14321440
def SYCLIntelKernelArgsRestrict : InheritableAttr {
14331441
let Spellings = [CXX11<"intel", "kernel_args_restrict">];
14341442
let Subjects = SubjectList<[Function], ErrorDiag>;

clang/lib/Sema/SemaSYCL.cpp

+414-41
Large diffs are not rendered by default.

clang/test/CodeGenSYCL/inheritance.cpp

+26-19
Original file line numberDiff line numberDiff line change
@@ -39,31 +39,38 @@ int main() {
3939
return 0;
4040
}
4141

42+
// CHECK: %struct.base = type { i32, %class.InnerField }
43+
// CHECK: %class.InnerField = type { %class.InnerFieldBase, i32 }
44+
// CHECK: %class.InnerFieldBase = type { i32 }
45+
// CHECK: %class.__generated_second_base = type { ptr addrspace(1) }
46+
// CHECK: %struct.derived = type <{ %struct.base, [4 x i8], %class.second_base, i32, [4 x i8] }>
47+
// CHECK: %class.second_base = type { ptr addrspace(4) }
48+
4249
// Check kernel paramters
43-
// CHECK: define {{.*}}spir_kernel void @{{.*}}derived(ptr noundef byval(%struct.base) align 4 %_arg__base, ptr noundef byval(%struct.__wrapper_class) align 8 %_arg_e, i32 noundef %_arg_a)
50+
// CHECK: define {{.*}}spir_kernel void @{{.*}}derived
51+
// CHECK-SAME: ptr noundef byval(%struct.base) align 4 %_arg__base
52+
// CHECK-SAME: ptr noundef byval(%class.__generated_second_base) align 8 %_arg__base1
53+
// CHECK-SAME: i32 noundef %_arg_a
4454

45-
// Check alloca for kernel paramters
46-
// CHECK: %[[ARG_AA:[a-zA-Z0-9_.]+]] = alloca i32, align 4
47-
// Check alloca for local functor object
48-
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = alloca %struct.derived, align 8
49-
// CHECK: %[[ARG_A:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[ARG_AA]] to ptr addrspace(4)
50-
// CHECK: %[[BASE_TO_PTR:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[LOCAL_OBJECT]] to ptr addrspace(4)
55+
// Check allocas for kernel parameters and local functor object
56+
// CHECK: %[[ARG_A_ALLOCA:[a-zA-Z0-9_.]+]] = alloca i32, align 4
57+
// CHECK: %[[LOCAL_OBJECT_ALLOCA:[a-zA-Z0-9_.]+]] = alloca %struct.derived, align 8
58+
// CHECK: %[[ARG_A:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[ARG_A_ALLOCA]] to ptr addrspace(4)
59+
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[LOCAL_OBJECT_ALLOCA]] to ptr addrspace(4)
60+
// CHECK: %[[ARG_BASE:[a-zA-Z0-9_.]+]] = addrspacecast ptr %_arg__base to ptr addrspace(4)
61+
// CHECK: %[[ARG_BASE1:[a-zA-Z0-9_.]+]] = addrspacecast ptr %_arg__base1 to ptr addrspace(4)
5162
// CHECK: store i32 %_arg_a, ptr addrspace(4) %[[ARG_A]], align 4
5263

5364
// Initialize 'base' subobject
54-
// CHECK: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 8 %[[BASE_TO_PTR]], ptr addrspace(4) align 4 %_arg__base.ascast, i64 12, i1 false)
55-
56-
// Initialize 'second_base' subobject
57-
// First, derived-to-base cast with offset:
58-
// CHECK: %[[OFFSET_CALC:.*]] = getelementptr inbounds i8, ptr addrspace(4) %[[LOCAL_OBJECT]].ascast, i64 16
59-
// Initialize 'second_base::e'
60-
// CHECK: %[[SECOND_BASE_PTR:.*]] = getelementptr inbounds %class.second_base, ptr addrspace(4) %[[OFFSET_CALC]], i32 0, i32 0
61-
// CHECK: %[[PTR_TO_WRAPPER:.*]] = getelementptr inbounds %struct.__wrapper_class, ptr addrspace(4) %_arg_e.ascast, i32 0, i32 0
62-
// CHECK: %[[LOAD_PTR:.*]] = load ptr addrspace(1), ptr addrspace(4) %[[PTR_TO_WRAPPER]]
63-
// CHECK: %[[AS_CAST:.*]] = addrspacecast ptr addrspace(1) %[[LOAD_PTR]] to ptr addrspace(4)
64-
// CHECK: store ptr addrspace(4) %[[AS_CAST]], ptr addrspace(4) %[[SECOND_BASE_PTR]]
65+
// CHECK: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 8 %[[LOCAL_OBJECT]], ptr addrspace(4) align 4 %[[ARG_BASE]], i64 12, i1 false)
6566

6667
// Initialize field 'a'
67-
// CHECK: %[[GEP_A:[a-zA-Z0-9]+]] = getelementptr inbounds %struct.derived, ptr addrspace(4) %[[LOCAL_OBJECT]].ascast, i32 0, i32 3
68+
// CHECK: %[[GEP_A:[a-zA-Z0-9]+]] = getelementptr inbounds %struct.derived, ptr addrspace(4) %[[LOCAL_OBJECT]], i32 0, i32 3
6869
// CHECK: %[[LOAD_A:[0-9]+]] = load i32, ptr addrspace(4) %[[ARG_A]], align 4
6970
// CHECK: store i32 %[[LOAD_A]], ptr addrspace(4) %[[GEP_A]]
71+
72+
// Initialize 'second_base' subobject
73+
// First, derived-to-base cast with offset:
74+
// CHECK: %[[OFFSET_CALC:.*]] = getelementptr inbounds i8, ptr addrspace(4) %[[LOCAL_OBJECT]], i64 16
75+
// Initialize 'second_base'
76+
// CHECK: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 8 %[[OFFSET_CALC]], ptr addrspace(4) align 8 %[[ARG_BASE1]], i64 8, i1 false)

clang/test/CodeGenSYCL/no_opaque_inheritance.cpp

+29-19
Original file line numberDiff line numberDiff line change
@@ -39,35 +39,45 @@ int main() {
3939
return 0;
4040
}
4141

42+
// CHECK: %struct.base = type { i32, %class.InnerField }
43+
// CHECK: %class.InnerField = type { %class.InnerFieldBase, i32 }
44+
// CHECK: %class.InnerFieldBase = type { i32 }
45+
// CHECK: %class.__generated_second_base = type { i32 addrspace(1)* }
46+
// CHECK: %struct.derived = type <{ %struct.base, [4 x i8], %class.second_base, i32, [4 x i8] }>
47+
// CHECK: %class.second_base = type { i32 addrspace(4)* }
48+
4249
// Check kernel paramters
43-
// CHECK: define {{.*}}spir_kernel void @{{.*}}derived(%struct.base* noundef byval(%struct.base) align 4 %_arg__base, %struct.__wrapper_class* noundef byval(%struct.__wrapper_class) align 8 %_arg_e, i32 noundef %_arg_a)
50+
// CHECK: define {{.*}}spir_kernel void @{{.*}}derived
51+
// CHECK-SAME: %struct.base* noundef byval(%struct.base) align 4 %_arg__base
52+
// CHECK-SAME: %class.__generated_second_base* noundef byval(%class.__generated_second_base) align 8 %_arg__base1
53+
// CHECK-SAME: i32 noundef %_arg_a
4454

45-
// Check alloca for kernel paramters
46-
// CHECK: %[[ARG_AA:[a-zA-Z0-9_.]+]] = alloca i32, align 4
47-
// Check alloca for local functor object
48-
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = alloca %struct.derived, align 8
49-
// CHECK: %[[ARG_A:[a-zA-Z0-9_.]+]] = addrspacecast i32* %[[ARG_AA]] to i32 addrspace(4)*
55+
// Check allocas for kernel parameters and local functor object
56+
// CHECK: %[[ARG_A_ALLOCA:[a-zA-Z0-9_.]+]] = alloca i32, align 4
57+
// CHECK: %[[LOCAL_OBJECT_ALLOCA:[a-zA-Z0-9_.]+]] = alloca %struct.derived, align 8
58+
// CHECK: %[[ARG_A:[a-zA-Z0-9_.]+]] = addrspacecast i32* %[[ARG_A_ALLOCA]] to i32 addrspace(4)*
59+
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast %struct.derived* %[[LOCAL_OBJECT_ALLOCA]] to %struct.derived addrspace(4)*
60+
// CHECK: %[[ARG_BASE:[a-zA-Z0-9_.]+]] = addrspacecast %struct.base* %_arg__base to %struct.base addrspace(4)*
61+
// CHECK: %[[ARG_BASE1:[a-zA-Z0-9_.]+]] = addrspacecast %class.__generated_second_base* %_arg__base1 to %class.__generated_second_base addrspace(4)*
5062
// CHECK: store i32 %_arg_a, i32 addrspace(4)* %[[ARG_A]], align 4
5163

5264
// Initialize 'base' subobject
53-
// CHECK: %[[DERIVED_TO_BASE:.*]] = bitcast %struct.derived addrspace(4)* %[[LOCAL_OBJECT]].ascast to %struct.base addrspace(4)*
65+
// CHECK: %[[DERIVED_TO_BASE:.*]] = bitcast %struct.derived addrspace(4)* %[[LOCAL_OBJECT]] to %struct.base addrspace(4)*
5466
// CHECK: %[[BASE_TO_PTR:.*]] = bitcast %struct.base addrspace(4)* %[[DERIVED_TO_BASE]] to i8 addrspace(4)*
55-
// CHECK: %[[PARAM_TO_PTR:.*]] = bitcast %struct.base addrspace(4)* %_arg__base.ascast to i8 addrspace(4)*
67+
// CHECK: %[[PARAM_TO_PTR:.*]] = bitcast %struct.base addrspace(4)* %[[ARG_BASE]] to i8 addrspace(4)*
5668
// CHECK: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 8 %[[BASE_TO_PTR]], i8 addrspace(4)* align 4 %[[PARAM_TO_PTR]], i64 12, i1 false)
5769

70+
// Initialize field 'a'
71+
// CHECK: %[[GEP_A:[a-zA-Z0-9]+]] = getelementptr inbounds %struct.derived, %struct.derived addrspace(4)* %[[LOCAL_OBJECT]], i32 0, i32 3
72+
// CHECK: %[[LOAD_A:[0-9]+]] = load i32, i32 addrspace(4)* %[[ARG_A]], align 4
73+
// CHECK: store i32 %[[LOAD_A]], i32 addrspace(4)* %[[GEP_A]]
74+
5875
// Initialize 'second_base' subobject
5976
// First, derived-to-base cast with offset:
60-
// CHECK: %[[DERIVED_PTR:.*]] = bitcast %struct.derived addrspace(4)* %[[LOCAL_OBJECT]].ascast to i8 addrspace(4)*
77+
// CHECK: %[[DERIVED_PTR:.*]] = bitcast %struct.derived addrspace(4)* %[[LOCAL_OBJECT]] to i8 addrspace(4)*
6178
// CHECK: %[[OFFSET_CALC:.*]] = getelementptr inbounds i8, i8 addrspace(4)* %[[DERIVED_PTR]], i64 16
6279
// CHECK: %[[TO_SECOND_BASE:.*]] = bitcast i8 addrspace(4)* %[[OFFSET_CALC]] to %class.second_base addrspace(4)*
63-
// Initialize 'second_base::e'
64-
// CHECK: %[[SECOND_BASE_PTR:.*]] = getelementptr inbounds %class.second_base, %class.second_base addrspace(4)* %[[TO_SECOND_BASE]], i32 0, i32 0
65-
// CHECK: %[[PTR_TO_WRAPPER:.*]] = getelementptr inbounds %struct.__wrapper_class, %struct.__wrapper_class addrspace(4)* %_arg_e.ascast, i32 0, i32 0
66-
// CHECK: %[[LOAD_PTR:.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %[[PTR_TO_WRAPPER]]
67-
// CHECK: %[[AS_CAST:.*]] = addrspacecast i32 addrspace(1)* %[[LOAD_PTR]] to i32 addrspace(4)*
68-
// CHECK: store i32 addrspace(4)* %[[AS_CAST]], i32 addrspace(4)* addrspace(4)* %[[SECOND_BASE_PTR]]
80+
// CHECK: %[[SECOND_BASE_TO_PTR:.*]] = bitcast %class.second_base addrspace(4)* %[[TO_SECOND_BASE]] to i8 addrspace(4)*
81+
// CHECK: %[[SECOND_PARAM_TO_PTR:.*]] = bitcast %class.__generated_second_base addrspace(4)* %[[ARG_BASE1]] to i8 addrspace(4)*
82+
// CHECK: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 8 %[[SECOND_BASE_TO_PTR]], i8 addrspace(4)* align 8 %[[SECOND_PARAM_TO_PTR]], i64 8, i1 false)
6983

70-
// Initialize field 'a'
71-
// CHECK: %[[GEP_A:[a-zA-Z0-9]+]] = getelementptr inbounds %struct.derived, %struct.derived addrspace(4)* %[[LOCAL_OBJECT]].ascast, i32 0, i32 3
72-
// CHECK: %[[LOAD_A:[0-9]+]] = load i32, i32 addrspace(4)* %[[ARG_A]], align 4
73-
// CHECK: store i32 %[[LOAD_A]], i32 addrspace(4)* %[[GEP_A]]

clang/test/CodeGenSYCL/no_opaque_pointers-in-structs.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,14 @@ int main() {
3535

3636
// CHECK: %[[WRAPPER_F1:[a-zA-Z0-9_.]+]] = type { i32 addrspace(1)* }
3737
// CHECK: %[[WRAPPER_F2:[a-zA-Z0-9_.]+]] = type { float addrspace(1)* }
38-
// CHECK: %[[WRAPPER_F:[a-zA-Z0-9_.]+]] = type { float addrspace(1)* }
38+
// CHECK: %[[GENERATED_A:[a-zA-Z0-9_.]+]] = type { float addrspace(1)* }
3939
// CHECK: %[[WRAPPER_F4_1:[a-zA-Z0-9_.]+]] = type { i32 addrspace(1)* }
4040
// CHECK: %[[WRAPPER_F4_2:[a-zA-Z0-9_.]+]] = type { i32 addrspace(1)* }
4141
// CHECK: %[[WRAPPER_LAMBDA_PTR:[a-zA-Z0-9_.]+]] = type { float addrspace(1)* }
4242
// CHECK: define {{.*}}spir_kernel void @{{.*}}structs
4343
// CHECK-SAME: %[[WRAPPER_F1]]* noundef byval(%[[WRAPPER_F1]]) align 8 %_arg_F1,
4444
// CHECK-SAME: %[[WRAPPER_F2]]* noundef byval(%[[WRAPPER_F2]]) align 8 %_arg_F2,
45-
// CHECK-SAME: %[[WRAPPER_F]]* noundef byval(%[[WRAPPER_F]]) align 8 %_arg_F,
45+
// CHECK-SAME: %[[GENERATED_A]]* noundef byval(%[[GENERATED_A]]) align 8 %_arg_F3,
4646
// CHECK-SAME: %[[WRAPPER_F4_1]]* noundef byval(%[[WRAPPER_F4_1]]) align 8 %_arg_F4
4747
// CHECK-SAME: %[[WRAPPER_F4_2]]* noundef byval(%[[WRAPPER_F4_2]]) align 8 %_arg_F41
4848
// CHECK: define {{.*}}spir_kernel void @{{.*}}lambdas{{.*}}(%[[WRAPPER_LAMBDA_PTR]]* noundef byval(%[[WRAPPER_LAMBDA_PTR]]) align 8 %_arg_Ptr)

clang/test/CodeGenSYCL/pointers-in-structs.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,14 @@ int main() {
3535

3636
// CHECK: %[[WRAPPER_F1:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
3737
// CHECK: %[[WRAPPER_F2:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
38-
// CHECK: %[[WRAPPER_F:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
38+
// CHECK: %[[GENERATED_A:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
3939
// CHECK: %[[WRAPPER_F4_1:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
4040
// CHECK: %[[WRAPPER_F4_2:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
4141
// CHECK: %[[WRAPPER_LAMBDA_PTR:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
4242
// CHECK: define {{.*}}spir_kernel void @{{.*}}structs
4343
// CHECK-SAME: ptr noundef byval(%[[WRAPPER_F1]]) align 8 %_arg_F1,
4444
// CHECK-SAME: ptr noundef byval(%[[WRAPPER_F2]]) align 8 %_arg_F2,
45-
// CHECK-SAME: ptr noundef byval(%[[WRAPPER_F]]) align 8 %_arg_F,
45+
// CHECK-SAME: ptr noundef byval(%[[GENERATED_A]]) align 8 %_arg_F3,
4646
// CHECK-SAME: ptr noundef byval(%[[WRAPPER_F4_1]]) align 8 %_arg_F4
4747
// CHECK-SAME: ptr noundef byval(%[[WRAPPER_F4_2]]) align 8 %_arg_F41
4848
// CHECK: define {{.*}}spir_kernel void @{{.*}}lambdas{{.*}}(ptr noundef byval(%[[WRAPPER_LAMBDA_PTR]]) align 8 %_arg_Ptr)

clang/test/CodeGenSYCL/struct_kernel_param.cpp

+2-6
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,8 @@
77
// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 0 },
88
// FldInt, offset to 16 because the float* causes the alignment of the structs
99
// to change.
10-
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 16 },
11-
// FldArr
12-
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 24 },
13-
// FldFloat
14-
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 8, 32 },
15-
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 12, 40 },
10+
// MyStruct is not decomposed since it does not contain special types.
11+
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 40, 16 },
1612
// CHECK-EMPTY:
1713
// CHECK-NEXT: { kernel_param_kind_t::kind_invalid, -987654321, -987654321 },
1814
// CHECK-NEXT:};

0 commit comments

Comments
 (0)