Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Null constant inserted by memset handling not added to entry points interface #2221

Open
karolherbst opened this issue Nov 16, 2023 · 2 comments

Comments

@karolherbst
Copy link
Contributor

The handling of memset can create new variables which never get added to the entry points interface. In the example below the variable %30 is not handled correctly.

example opencl C file:

__kernel void test(__global uchar* input, __global int* output)
{
 *output = (int [5]){ 0,0,0,0,0, }[*input];
}

llvm ir:

; ModuleID = '<stdin>'
source_filename = "test.cl"
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "spir64-unknown-unknown"

@constinit = private global [5 x i32] zeroinitializer, align 4

; Function Attrs: convergent noinline norecurse nounwind optnone
define dso_local spir_kernel void @resample_ref_14493953481400187574_0(ptr addrspace(1) noundef align 1 %0, ptr addrspace(1) noundef align 4 %1) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
  %3 = alloca ptr addrspace(1), align 8
  %4 = alloca ptr addrspace(1), align 8
  %5 = alloca [5 x i32], align 4
  store ptr addrspace(1) %0, ptr %3, align 8
  store ptr addrspace(1) %1, ptr %4, align 8
  call void @llvm.memset.p0.i64(ptr align 4 %5, i8 0, i64 20, i1 false)
  %6 = getelementptr inbounds [5 x i32], ptr %5, i64 0, i64 0
  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %5, ptr align 4 @constinit, i64 20, i1 false)
  %7 = load ptr addrspace(1), ptr %3, align 8
  %8 = load i8, ptr addrspace(1) %7, align 1
  %9 = zext i8 %8 to i64
  %10 = getelementptr inbounds [5 x i32], ptr %5, i64 0, i64 %9
  %11 = load i32, ptr %10, align 4
  %12 = load ptr addrspace(1), ptr %4, align 8
  store i32 %11, ptr addrspace(1) %12, align 4
  ret void
}

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #2

attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }

!llvm.module.flags = !{!0, !1}
!opencl.ocl.version = !{!2}
!opencl.spir.version = !{!2}
!llvm.ident = !{!3}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"frame-pointer", i32 2}
!2 = !{i32 3, i32 0}
!3 = !{!"clang version 16.0.6 (Fedora 16.0.6-3.fc38)"}
!4 = !{i32 1, i32 1}
!5 = !{!"none", !"none"}
!6 = !{!"uchar*", !"int*"}
!7 = !{!"", !""}

spirv:

; SPIR-V
; Version: 1.4
; Generator: Khronos LLVM/SPIR-V Translator; 14
; Bound: 51
; Schema: 0
               OpCapability Addresses
               OpCapability Linkage
               OpCapability Kernel
               OpCapability Int64
               OpCapability Int8
          %1 = OpExtInstImport "OpenCL.std"
               OpMemoryModel Physical64 OpenCL
               OpEntryPoint Kernel %46 "resample_ref_14493953481400187574_0" %constinit
               OpSource OpenCL_C 300000
               OpName %constinit "constinit"
               OpName %resample_ref_14493953481400187574_0 "resample_ref_14493953481400187574_0"
               OpDecorate %constinit Alignment 4
               OpDecorate %resample_ref_14493953481400187574_0 LinkageAttributes "resample_ref_14493953481400187574_0" Export
               OpDecorate %14 Alignment 1
               OpDecorate %15 Alignment 4
               OpDecorate %18 Alignment 8
               OpDecorate %19 Alignment 8
               OpDecorate %21 Alignment 4
               OpDecorate %30 Constant
               OpDecorate %47 Alignment 1
               OpDecorate %48 Alignment 4
      %ulong = OpTypeInt 64 0
       %uint = OpTypeInt 32 0
      %uchar = OpTypeInt 8 0
    %ulong_5 = OpConstant %ulong 5
   %ulong_20 = OpConstant %ulong 20
    %ulong_0 = OpConstant %ulong 0
%_arr_uint_ulong_5 = OpTypeArray %uint %ulong_5
%_ptr_UniformConstant__arr_uint_ulong_5 = OpTypePointer UniformConstant %_arr_uint_ulong_5
       %void = OpTypeVoid
%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
         %12 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_uchar
%_ptr_Function__ptr_CrossWorkgroup_uchar = OpTypePointer Function %_ptr_CrossWorkgroup_uchar
%_ptr_Function__arr_uint_ulong_5 = OpTypePointer Function %_arr_uint_ulong_5
%_ptr_Function_uchar = OpTypePointer Function %uchar
%_arr_uchar_ulong_20 = OpTypeArray %uchar %ulong_20
%_ptr_UniformConstant__arr_uchar_ulong_20 = OpTypePointer UniformConstant %_arr_uchar_ulong_20
%_ptr_UniformConstant_uchar = OpTypePointer UniformConstant %uchar
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
%_ptr_Function__ptr_CrossWorkgroup_uint = OpTypePointer Function %_ptr_CrossWorkgroup_uint
          %6 = OpConstantNull %_arr_uint_ulong_5
  %constinit = OpVariable %_ptr_UniformConstant__arr_uint_ulong_5 UniformConstant %6
         %28 = OpConstantNull %_arr_uchar_ulong_20
         %30 = OpVariable %_ptr_UniformConstant__arr_uchar_ulong_20 UniformConstant %28
%resample_ref_14493953481400187574_0 = OpFunction %void DontInline %12
         %14 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
         %15 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
         %16 = OpLabel
         %18 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
         %19 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
         %21 = OpVariable %_ptr_Function__arr_uint_ulong_5 Function
         %22 = OpBitcast %_ptr_Function__ptr_CrossWorkgroup_uchar %18
               OpStore %22 %14 Aligned 8
         %23 = OpBitcast %_ptr_Function__ptr_CrossWorkgroup_uchar %19
               OpStore %23 %15 Aligned 8
         %25 = OpBitcast %_ptr_Function_uchar %21
         %32 = OpBitcast %_ptr_UniformConstant_uchar %30
               OpCopyMemorySized %25 %32 %ulong_20 Aligned 4
         %35 = OpInBoundsPtrAccessChain %_ptr_Function_uint %21 %ulong_0 %ulong_0
               OpCopyMemorySized %21 %constinit %ulong_20 Aligned 4
         %36 = OpBitcast %_ptr_Function__ptr_CrossWorkgroup_uchar %18
         %37 = OpLoad %_ptr_CrossWorkgroup_uchar %36 Aligned 8
         %38 = OpLoad %uchar %37 Aligned 1
         %39 = OpUConvert %ulong %38
         %40 = OpInBoundsPtrAccessChain %_ptr_Function_uint %21 %ulong_0 %39
         %41 = OpLoad %uint %40 Aligned 4
         %44 = OpBitcast %_ptr_Function__ptr_CrossWorkgroup_uint %19
         %45 = OpLoad %_ptr_CrossWorkgroup_uint %44 Aligned 8
               OpStore %45 %41 Aligned 4
               OpReturn
               OpFunctionEnd
         %46 = OpFunction %void DontInline %12
         %47 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
         %48 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
         %49 = OpLabel
         %50 = OpFunctionCall %void %resample_ref_14493953481400187574_0 %47 %48
               OpReturn
               OpFunctionEnd
@karolherbst
Copy link
Contributor Author

patch based on 16, the idea is to just emit the variable in each function using global function private variables:

diff --git a/lib/SPIRV/SPIRVWriter.cpp b/lib/SPIRV/SPIRVWriter.cpp
index e8b67d5e..dbc20d02 100644
--- a/lib/SPIRV/SPIRVWriter.cpp
+++ b/lib/SPIRV/SPIRVWriter.cpp
@@ -1792,6 +1792,12 @@ LLVMToSPIRVBase::transValueWithoutDecoration(Value *V, SPIRVBasicBlock *BB,
   }
 
   if (auto GV = dyn_cast<GlobalVariable>(V)) {
+    auto AddressSpace = static_cast<SPIRAddressSpace>(GV->getAddressSpace());
+    // We can't emit private variables globally, we need to create copies of each value inside each
+    // function
+    if (AddressSpace == SPIRAS_Private && !BB)
+      return nullptr;
+
     llvm::Type *Ty = GV->getValueType();
     // Though variables with common linkage type are initialized by 0,
     // they can be represented in SPIR-V as uninitialized variables with
@@ -1852,7 +1858,6 @@ LLVMToSPIRVBase::transValueWithoutDecoration(Value *V, SPIRVBasicBlock *BB,
     }
 
     SPIRVStorageClassKind StorageClass;
-    auto AddressSpace = static_cast<SPIRAddressSpace>(GV->getAddressSpace());
     bool IsVectorCompute =
         BM->isAllowedToUseExtension(ExtensionID::SPV_INTEL_vector_compute) &&
         GV->hasAttribute(kVCMetadata::VCGlobalVariable);
@@ -1872,10 +1877,14 @@ LLVMToSPIRVBase::transValueWithoutDecoration(Value *V, SPIRVBasicBlock *BB,
       StorageClass = SPIRSPIRVAddrSpaceMap::map(AddressSpace);
     }
 
-    SPIRVType *TranslatedTy = transPointerType(Ty, GV->getAddressSpace());
+    SPIRVType *TranslatedTy = transPointerType(Ty, static_cast<unsigned int>(AddressSpace));
+
+    SPIRVBasicBlock *VarBB = nullptr;
+    if (StorageClass == StorageClassFunction)
+      VarBB = BB;
     auto BVar = static_cast<SPIRVVariable *>(
         BM->addVariable(TranslatedTy, GV->isConstant(), transLinkageType(GV),
-                        BVarInit, GV->getName().str(), StorageClass, nullptr));
+                        BVarInit, GV->getName().str(), StorageClass, VarBB));
 
     if (IsVectorCompute) {
       BVar->addDecorate(DecorationVectorComputeVariableINTEL);
@@ -3946,12 +3955,12 @@ SPIRVValue *LLVMToSPIRVBase::transIntrinsicInst(IntrinsicInst *II,
       std::vector<SPIRVValue *> Elts(TNumElts, transValue(Val, BB));
       Init = BM->addCompositeConstant(CompositeTy, Elts);
     }
-    SPIRVType *VarTy = transPointerType(AT, SPIRV::SPIRAS_Constant);
+    SPIRVType *VarTy = transPointerType(AT, SPIRV::SPIRAS_Private);
     SPIRVValue *Var = BM->addVariable(VarTy, /*isConstant*/ true,
                                       spv::internal::LinkageTypeInternal, Init,
-                                      "", StorageClassUniformConstant, nullptr);
+                                      "", StorageClassFunction, BB->getParent()->getBasicBlock(0));
     SPIRVType *SourceTy =
-        transPointerType(Val->getType(), SPIRV::SPIRAS_Constant);
+        transPointerType(Val->getType(), SPIRV::SPIRAS_Private);
     SPIRVValue *Source = BM->addUnaryInst(OpBitcast, SourceTy, Var, BB);
     SPIRVValue *Target = transValue(MSI->getRawDest(), BB);
     return BM->addCopyMemorySizedInst(Target, Source, CompositeTy->getLength(),
@@ -4544,8 +4553,8 @@ bool LLVMToSPIRVBase::transGlobalVariables() {
       continue;
     } else if (MDNode *IO = ((*I).getMetadata("io_pipe_id")))
       transGlobalIOPipeStorage(&(*I), IO);
-    else if (!transValue(&(*I), nullptr))
-      return false;
+    else
+      transValue(&(*I), nullptr);
   }
   return true;
 }
@@ -4587,6 +4596,8 @@ LLVMToSPIRVBase::collectEntryPointInterfaces(SPIRVFunction *SF, Function *F) {
   std::vector<SPIRVId> Interface;
   for (auto &GV : M->globals()) {
     const auto AS = GV.getAddressSpace();
+    if (AS == SPIRAS_Private)
+      continue;
     SPIRVModule *BM = SF->getModule();
     if (!BM->isAllowedToUseVersion(VersionNumber::SPIRV_1_4))
       if (AS != SPIRAS_Input && AS != SPIRAS_Output)
diff --git a/lib/SPIRV/libSPIRV/SPIRVModule.cpp b/lib/SPIRV/libSPIRV/SPIRVModule.cpp
index 91a2e0b8..6fdf973c 100644
--- a/lib/SPIRV/libSPIRV/SPIRVModule.cpp
+++ b/lib/SPIRV/libSPIRV/SPIRVModule.cpp
@@ -1683,7 +1683,7 @@ SPIRVInstruction *SPIRVModuleImpl::addVariable(
   SPIRVVariable *Variable = new SPIRVVariable(Type, getId(), Initializer, Name,
                                               StorageClass, BB, this);
   if (BB)
-    return addInstruction(Variable, BB);
+    return addInstruction(Variable, BB, BB->getNumInst() ? BB->getInst(0) : nullptr);
 
   add(Variable);
   if (LinkageTy != internal::LinkageTypeInternal)

@karolherbst
Copy link
Contributor Author

My patch also solves a couple of other related issues, just have to port it to recent version and try it out there

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant