diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 21f9c50c352563..00fe57c6eff299 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -154,7 +154,14 @@ class AMDGPUInformationCache : public InformationCache { TargetMachine &TM; - enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 }; + enum ConstantStatus : uint8_t { + NONE = 0, + DS_GLOBAL = 1 << 0, + ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1, + ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2, + ADDR_SPACE_CAST_BOTH_TO_FLAT = + ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT + }; /// Check if the subtarget has aperture regs. bool hasApertureRegs(Function &F) { @@ -234,12 +241,18 @@ class AMDGPUInformationCache : public InformationCache { private: /// Check if the ConstantExpr \p CE requires the queue pointer. - static bool visitConstExpr(const ConstantExpr *CE) { + static uint8_t visitConstExpr(const ConstantExpr *CE) { + uint8_t Status = NONE; + if (CE->getOpcode() == Instruction::AddrSpaceCast) { unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); - return castRequiresQueuePtr(SrcAS); + if (SrcAS == AMDGPUAS::PRIVATE_ADDRESS) + Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT; + else if (SrcAS == AMDGPUAS::LOCAL_ADDRESS) + Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT; } - return false; + + return Status; } /// Get the constant access bitmap for \p C. @@ -254,8 +267,7 @@ class AMDGPUInformationCache : public InformationCache { Result = DS_GLOBAL; if (const auto *CE = dyn_cast(C)) - if (visitConstExpr(CE)) - Result |= ADDR_SPACE_CAST; + Result |= visitConstExpr(CE); for (const Use &U : C->operands()) { const auto *OpC = dyn_cast(U); @@ -284,19 +296,13 @@ class AMDGPUInformationCache : public InformationCache { if (IsNonEntryFunc && (Access & DS_GLOBAL)) return true; - return !HasAperture && (Access & ADDR_SPACE_CAST); + return !HasAperture && (Access & ADDR_SPACE_CAST_BOTH_TO_FLAT); } bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) { SmallPtrSet Visited; uint8_t Access = getConstantAccess(C, Visited); - - if (Access & ADDR_SPACE_CAST) - if (const auto *CE = dyn_cast(C)) - if (CE->getOperand(0)->getType()->getPointerAddressSpace() == - AMDGPUAS::PRIVATE_ADDRESS) - return true; - return false; + return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT; } private: diff --git a/llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll new file mode 100644 index 00000000000000..e8b23f3bf3a701 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -passes=amdgpu-attributor %s | FileCheck %s + +@buf_shared = internal addrspace(3) global [2080 x i8] poison, align 16 + +; Constant expression element may not have a pointer type and the +; addrspacecast may not be the toplevel operation. + + +; This should infer "amdgpu-no-flat-scratch-init". It should not infer "amdgpu-no-queue-ptr" +;. +; CHECK: @buf_shared = internal addrspace(3) global [2080 x i8] poison, align 16 +; CHECK: @buf_private = internal addrspace(5) global [2080 x i8] poison, align 16 +;. +define amdgpu_kernel void @issue120256(ptr addrspace(1) %out) { +; CHECK-LABEL: define amdgpu_kernel void @issue120256( +; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[CONV_I:%.*]] = and i32 trunc (i64 sub (i64 16, i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @buf_shared to ptr) to i64)) to i32), 15 +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(3) @buf_shared, i32 [[CONV_I]] +; CHECK-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) [[ADD_PTR]], align 1 +; CHECK-NEXT: store i8 [[LD]], ptr addrspace(1) [[OUT]], align 1 +; CHECK-NEXT: ret void +; + %conv.i = and i32 trunc (i64 sub (i64 16, i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @buf_shared to ptr) to i64)) to i32), 15 + %add.ptr = getelementptr inbounds nuw i8, ptr addrspace(3) @buf_shared, i32 %conv.i + %ld = load i8, ptr addrspace(3) %add.ptr, align 1 + store i8 %ld, ptr addrspace(1) %out, align 1 + ret void +} + +@buf_private = internal addrspace(5) global [2080 x i8] poison, align 16 + +; Constant expression element may not have a pointer type and the +; addrspacecast may not be the toplevel operation. + +; This should not infer "amdgpu-no-flat-scratch-init" nor "amdgpu-no-queue-ptr" +define amdgpu_kernel void @issue120256_private(ptr addrspace(1) %out) { +; CHECK-LABEL: define amdgpu_kernel void @issue120256_private( +; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[CONV_I:%.*]] = and i32 trunc (i64 sub (i64 16, i64 ptrtoint (ptr addrspacecast (ptr addrspace(5) @buf_private to ptr) to i64)) to i32), 15 +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) @buf_private, i32 [[CONV_I]] +; CHECK-NEXT: [[LD:%.*]] = load i8, ptr addrspace(5) [[ADD_PTR]], align 1 +; CHECK-NEXT: store i8 [[LD]], ptr addrspace(1) [[OUT]], align 1 +; CHECK-NEXT: ret void +; + %conv.i = and i32 trunc (i64 sub (i64 16, i64 ptrtoint (ptr addrspacecast (ptr addrspace(5) @buf_private to ptr) to i64)) to i32), 15 + %add.ptr = getelementptr inbounds nuw i8, ptr addrspace(5) @buf_private, i32 %conv.i + %ld = load i8, ptr addrspace(5) %add.ptr, align 1 + store i8 %ld, ptr addrspace(1) %out, align 1 + ret void +} + +!llvm.module.flags = !{!0} + +; FIXME: Inference of amdgpu-no-queue-ptr should not depend on code object version. +!0 = !{i32 1, !"amdhsa_code_object_version", i32 400} +;. +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx803" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx803" "uniform-work-group-size"="false" } +;. +; CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400} +;.