From 36f1c537e28785720047ecb1829a02083c24ab64 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Sun, 14 Apr 2024 21:37:34 -0700 Subject: [PATCH 1/3] [CHERI-Generic] Add baseline test for capability copies from a constant Copying from a zero-constant can be optimized to a memset() and should therefore not trigger the underaligned capability warnings. This is also a baseline test for a crash found in the Morello backend where this memcpy->memset optimization triggers an assertion in diagnoseInefficientCheriMemOp(). --- .../Inputs/memcpy-from-constant.ll | 93 +++++++++ .../MIPS/memcpy-from-constant.ll | 197 ++++++++++++++++++ .../RISCV32/memcpy-from-constant.ll | 197 ++++++++++++++++++ .../RISCV64/memcpy-from-constant.ll | 195 +++++++++++++++++ 4 files changed, 682 insertions(+) create mode 100644 llvm/test/CodeGen/CHERI-Generic/Inputs/memcpy-from-constant.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/MIPS/memcpy-from-constant.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32/memcpy-from-constant.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64/memcpy-from-constant.ll diff --git a/llvm/test/CodeGen/CHERI-Generic/Inputs/memcpy-from-constant.ll b/llvm/test/CodeGen/CHERI-Generic/Inputs/memcpy-from-constant.ll new file mode 100644 index 000000000000..d5df3382ad8b --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/Inputs/memcpy-from-constant.ll @@ -0,0 +1,93 @@ +;; Copying from a zero constant can be converted to a memset (even with the tag preservation flags) +; RUN: llc @PURECAP_HARDFLOAT_ARGS@ < %s -o - | FileCheck %s + +@a = internal addrspace(200) constant ptr addrspace(200) null +@b = internal addrspace(200) constant ptr addrspace(200) null +@zero_constant = internal addrspace(200) constant [5 x ptr addrspace(200)] zeroinitializer +@constant_ptrs = internal addrspace(200) constant [2 x ptr addrspace(200)] [ptr addrspace(200) @a, ptr addrspace(200) @b] + +declare void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) noalias nocapture writeonly, ptr addrspace(200) noalias nocapture readonly, i64, i1 immarg) addrspace(200) #0 + +define linkonce_odr void @copy_from_zero_constant(ptr addrspace(200) %dst) addrspace(200) { +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align @CAP_BYTES@ %dst, ptr addrspace(200) align @CAP_BYTES@ @zero_constant, i64 @CAP_BYTES@, i1 false) + ret void +} + +define linkonce_odr void @copy_from_zero_constant_with_offset(ptr addrspace(200) %dst) addrspace(200) { +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @zero_constant, i64 @CAP_BYTES@ + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align @CAP_BYTES@ %dst, ptr addrspace(200) align @CAP_BYTES@ %src, i64 @CAP_BYTES@, i1 false) + ret void +} + +define linkonce_odr void @copy_from_large_zero_constant(ptr addrspace(200) %dst) addrspace(200) { +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align @CAP_BYTES@ %dst, ptr addrspace(200) align @CAP_BYTES@ @zero_constant, i64 @CAP_RANGE_BYTES@, i1 false) + ret void +} + +define linkonce_odr void @copy_from_ptr_constant(ptr addrspace(200) %dst) addrspace(200) { +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align @CAP_BYTES@ %dst, ptr addrspace(200) align @CAP_BYTES@ @constant_ptrs, i64 @CAP_BYTES@, i1 false) + ret void +} + +define linkonce_odr void @copy_from_ptr_constant_with_offset(ptr addrspace(200) %dst) addrspace(200) { +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @constant_ptrs, i64 @CAP_BYTES@ + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align @CAP_BYTES@ %dst, ptr addrspace(200) align @CAP_BYTES@ %src, i64 @CAP_BYTES@, i1 false) + ret void +} + +;; Run the same tests again this time with must_preserve_tags to check that we don't call memcpy(). + +define linkonce_odr void @copy_from_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align @CAP_BYTES@ %dst, ptr addrspace(200) align @CAP_BYTES@ @zero_constant, i64 @CAP_BYTES@, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_zero_constant_with_offset_preserve(ptr addrspace(200) %dst) addrspace(200) { +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @zero_constant, i64 @CAP_BYTES@ + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align @CAP_BYTES@ %dst, ptr addrspace(200) align @CAP_BYTES@ %src, i64 @CAP_BYTES@, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_large_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align @CAP_BYTES@ %dst, ptr addrspace(200) align @CAP_BYTES@ @zero_constant, i64 @CAP_RANGE_BYTES@, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_ptr_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align @CAP_BYTES@ %dst, ptr addrspace(200) align @CAP_BYTES@ @constant_ptrs, i64 @CAP_BYTES@, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_ptr_constant_with_offset_preserve(ptr addrspace(200) %dst) addrspace(200) { +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @constant_ptrs, i64 @CAP_BYTES@ + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align @CAP_BYTES@ %dst, ptr addrspace(200) align @CAP_BYTES@ %src, i64 @CAP_BYTES@, i1 false) #1 + ret void +} + +;; Finally, check copying from a zero constant with insufficient known alignment. +;; We should be able to emit this inline since a zero constant source never has tags. + +define linkonce_odr void @copy_from_underaligned_zero_constant(ptr addrspace(200) %dst) addrspace(200) { +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align @CAP_RANGE_BYTES@ %dst, ptr addrspace(200) align @CAP_RANGE_BYTES@ @zero_constant, i64 @CAP_BYTES@, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_underaligned_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align @CAP_RANGE_BYTES@ %dst, ptr addrspace(200) align @CAP_RANGE_BYTES@ @zero_constant, i64 @CAP_BYTES@, i1 false) #1 + ret void +} + +attributes #0 = { argmemonly nocallback nofree nounwind willreturn } +attributes #1 = { must_preserve_cheri_tags "frontend-memtransfer-type"="'const UChar * __capability' (aka 'const char16_t * __capability')" } diff --git a/llvm/test/CodeGen/CHERI-Generic/MIPS/memcpy-from-constant.ll b/llvm/test/CodeGen/CHERI-Generic/MIPS/memcpy-from-constant.ll new file mode 100644 index 000000000000..9bb7762b38d4 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/MIPS/memcpy-from-constant.ll @@ -0,0 +1,197 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --force-update +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/memcpy-from-constant.ll +;; Copying from a zero constant can be converted to a memset (even with the tag preservation flags) +; RUN: llc -mtriple=mips64 -mcpu=cheri128 -mattr=+cheri128 --relocation-model=pic -target-abi purecap < %s -o - | FileCheck %s + +@a = internal addrspace(200) constant ptr addrspace(200) null +@b = internal addrspace(200) constant ptr addrspace(200) null +@zero_constant = internal addrspace(200) constant [5 x ptr addrspace(200)] zeroinitializer +@constant_ptrs = internal addrspace(200) constant [2 x ptr addrspace(200)] [ptr addrspace(200) @a, ptr addrspace(200) @b] + +declare void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) noalias nocapture writeonly, ptr addrspace(200) noalias nocapture readonly, i64, i1 immarg) addrspace(200) #0 + +define linkonce_odr void @copy_from_zero_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: cjr $c17 +; CHECK-NEXT: csc $cnull, $zero, 0($c3) +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 @zero_constant, i64 16, i1 false) + ret void +} + +define linkonce_odr void @copy_from_zero_constant_with_offset(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant_with_offset: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: lui $1, %pcrel_hi(_CHERI_CAPABILITY_TABLE_-8) +; CHECK-NEXT: daddiu $1, $1, %pcrel_lo(_CHERI_CAPABILITY_TABLE_-4) +; CHECK-NEXT: cgetpccincoffset $c1, $1 +; CHECK-NEXT: clcbi $c1, %captab20(zero_constant)($c1) +; CHECK-NEXT: clc $c1, $zero, 16($c1) +; CHECK-NEXT: cjr $c17 +; CHECK-NEXT: csc $c1, $zero, 0($c3) +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @zero_constant, i64 16 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 %src, i64 16, i1 false) + ret void +} + +define linkonce_odr void @copy_from_large_zero_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_large_zero_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: cjr $c17 +; CHECK-NEXT: csd $zero, $zero, 0($c3) +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 @zero_constant, i64 8, i1 false) + ret void +} + +define linkonce_odr void @copy_from_ptr_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: lui $1, %pcrel_hi(_CHERI_CAPABILITY_TABLE_-8) +; CHECK-NEXT: daddiu $1, $1, %pcrel_lo(_CHERI_CAPABILITY_TABLE_-4) +; CHECK-NEXT: cgetpccincoffset $c1, $1 +; CHECK-NEXT: clcbi $c1, %captab20(constant_ptrs)($c1) +; CHECK-NEXT: clc $c1, $zero, 0($c1) +; CHECK-NEXT: cjr $c17 +; CHECK-NEXT: csc $c1, $zero, 0($c3) +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 @constant_ptrs, i64 16, i1 false) + ret void +} + +define linkonce_odr void @copy_from_ptr_constant_with_offset(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant_with_offset: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: lui $1, %pcrel_hi(_CHERI_CAPABILITY_TABLE_-8) +; CHECK-NEXT: daddiu $1, $1, %pcrel_lo(_CHERI_CAPABILITY_TABLE_-4) +; CHECK-NEXT: cgetpccincoffset $c1, $1 +; CHECK-NEXT: clcbi $c1, %captab20(constant_ptrs)($c1) +; CHECK-NEXT: clc $c1, $zero, 16($c1) +; CHECK-NEXT: cjr $c17 +; CHECK-NEXT: csc $c1, $zero, 0($c3) +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @constant_ptrs, i64 16 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 %src, i64 16, i1 false) + ret void +} + +;; Run the same tests again this time with must_preserve_tags to check that we don't call memcpy(). + +define linkonce_odr void @copy_from_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: cjr $c17 +; CHECK-NEXT: csc $cnull, $zero, 0($c3) +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 @zero_constant, i64 16, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_zero_constant_with_offset_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant_with_offset_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: lui $1, %pcrel_hi(_CHERI_CAPABILITY_TABLE_-8) +; CHECK-NEXT: daddiu $1, $1, %pcrel_lo(_CHERI_CAPABILITY_TABLE_-4) +; CHECK-NEXT: cgetpccincoffset $c1, $1 +; CHECK-NEXT: clcbi $c1, %captab20(zero_constant)($c1) +; CHECK-NEXT: clc $c1, $zero, 16($c1) +; CHECK-NEXT: cjr $c17 +; CHECK-NEXT: csc $c1, $zero, 0($c3) +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @zero_constant, i64 16 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 %src, i64 16, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_large_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_large_zero_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: cjr $c17 +; CHECK-NEXT: csd $zero, $zero, 0($c3) +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 @zero_constant, i64 8, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_ptr_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: lui $1, %pcrel_hi(_CHERI_CAPABILITY_TABLE_-8) +; CHECK-NEXT: daddiu $1, $1, %pcrel_lo(_CHERI_CAPABILITY_TABLE_-4) +; CHECK-NEXT: cgetpccincoffset $c1, $1 +; CHECK-NEXT: clcbi $c1, %captab20(constant_ptrs)($c1) +; CHECK-NEXT: clc $c1, $zero, 0($c1) +; CHECK-NEXT: cjr $c17 +; CHECK-NEXT: csc $c1, $zero, 0($c3) +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 @constant_ptrs, i64 16, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_ptr_constant_with_offset_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant_with_offset_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: lui $1, %pcrel_hi(_CHERI_CAPABILITY_TABLE_-8) +; CHECK-NEXT: daddiu $1, $1, %pcrel_lo(_CHERI_CAPABILITY_TABLE_-4) +; CHECK-NEXT: cgetpccincoffset $c1, $1 +; CHECK-NEXT: clcbi $c1, %captab20(constant_ptrs)($c1) +; CHECK-NEXT: clc $c1, $zero, 16($c1) +; CHECK-NEXT: cjr $c17 +; CHECK-NEXT: csc $c1, $zero, 0($c3) +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @constant_ptrs, i64 16 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 %src, i64 16, i1 false) #1 + ret void +} + +;; Finally, check copying from a zero constant with insufficient known alignment. +;; We should be able to emit this inline since a zero constant source never has tags. + +define linkonce_odr void @copy_from_underaligned_zero_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_underaligned_zero_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: cincoffset $c11, $c11, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csc $c17, $zero, 0($c11) # 16-byte Folded Spill +; CHECK-NEXT: .cfi_offset 89, -16 +; CHECK-NEXT: lui $1, %pcrel_hi(_CHERI_CAPABILITY_TABLE_-8) +; CHECK-NEXT: daddiu $1, $1, %pcrel_lo(_CHERI_CAPABILITY_TABLE_-4) +; CHECK-NEXT: cgetpccincoffset $c1, $1 +; CHECK-NEXT: clcbi $c4, %captab20(zero_constant)($c1) +; CHECK-NEXT: clcbi $c12, %capcall20(memcpy)($c1) +; CHECK-NEXT: cjalr $c12, $c17 +; CHECK-NEXT: daddiu $4, $zero, 16 +; CHECK-NEXT: clc $c17, $zero, 0($c11) # 16-byte Folded Reload +; CHECK-NEXT: cjr $c17 +; CHECK-NEXT: cincoffset $c11, $c11, 16 +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 16, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_underaligned_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_underaligned_zero_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: cincoffset $c11, $c11, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csc $c17, $zero, 0($c11) # 16-byte Folded Spill +; CHECK-NEXT: .cfi_offset 89, -16 +; CHECK-NEXT: lui $1, %pcrel_hi(_CHERI_CAPABILITY_TABLE_-8) +; CHECK-NEXT: daddiu $1, $1, %pcrel_lo(_CHERI_CAPABILITY_TABLE_-4) +; CHECK-NEXT: cgetpccincoffset $c1, $1 +; CHECK-NEXT: clcbi $c4, %captab20(zero_constant)($c1) +; CHECK-NEXT: clcbi $c12, %capcall20(memcpy)($c1) +; CHECK-NEXT: cjalr $c12, $c17 +; CHECK-NEXT: daddiu $4, $zero, 16 +; CHECK-NEXT: clc $c17, $zero, 0($c11) # 16-byte Folded Reload +; CHECK-NEXT: cjr $c17 +; CHECK-NEXT: cincoffset $c11, $c11, 16 +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 16, i1 false) #1 + ret void +} + +attributes #0 = { argmemonly nocallback nofree nounwind willreturn } +attributes #1 = { must_preserve_cheri_tags "frontend-memtransfer-type"="'const UChar * __capability' (aka 'const char16_t * __capability')" } diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32/memcpy-from-constant.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32/memcpy-from-constant.ll new file mode 100644 index 000000000000..f63ca2b923fc --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32/memcpy-from-constant.ll @@ -0,0 +1,197 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --force-update +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/memcpy-from-constant.ll +;; Copying from a zero constant can be converted to a memset (even with the tag preservation flags) +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+xcheri,+cap-mode,+f < %s -o - | FileCheck %s + +@a = internal addrspace(200) constant ptr addrspace(200) null +@b = internal addrspace(200) constant ptr addrspace(200) null +@zero_constant = internal addrspace(200) constant [5 x ptr addrspace(200)] zeroinitializer +@constant_ptrs = internal addrspace(200) constant [2 x ptr addrspace(200)] [ptr addrspace(200) @a, ptr addrspace(200) @b] + +declare void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) noalias nocapture writeonly, ptr addrspace(200) noalias nocapture readonly, i64, i1 immarg) addrspace(200) #0 + +define linkonce_odr void @copy_from_zero_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: csc cnull, 0(ca0) +; CHECK-NEXT: cret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 8, i1 false) + ret void +} + +define linkonce_odr void @copy_from_zero_constant_with_offset(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant_with_offset: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB1_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant) +; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB1_1)(ca1) +; CHECK-NEXT: clc ca1, 8(ca1) +; CHECK-NEXT: csc ca1, 0(ca0) +; CHECK-NEXT: cret +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @zero_constant, i64 8 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 %src, i64 8, i1 false) + ret void +} + +define linkonce_odr void @copy_from_large_zero_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_large_zero_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: csw zero, 0(ca0) +; CHECK-NEXT: cret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 4, i1 false) + ret void +} + +define linkonce_odr void @copy_from_ptr_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB3_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(constant_ptrs) +; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB3_1)(ca1) +; CHECK-NEXT: clc ca1, 0(ca1) +; CHECK-NEXT: csc ca1, 0(ca0) +; CHECK-NEXT: cret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @constant_ptrs, i64 8, i1 false) + ret void +} + +define linkonce_odr void @copy_from_ptr_constant_with_offset(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant_with_offset: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB4_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(constant_ptrs) +; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB4_1)(ca1) +; CHECK-NEXT: clc ca1, 8(ca1) +; CHECK-NEXT: csc ca1, 0(ca0) +; CHECK-NEXT: cret +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @constant_ptrs, i64 8 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 %src, i64 8, i1 false) + ret void +} + +;; Run the same tests again this time with must_preserve_tags to check that we don't call memcpy(). + +define linkonce_odr void @copy_from_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: csc cnull, 0(ca0) +; CHECK-NEXT: cret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 8, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_zero_constant_with_offset_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant_with_offset_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB6_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant) +; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB6_1)(ca1) +; CHECK-NEXT: clc ca1, 8(ca1) +; CHECK-NEXT: csc ca1, 0(ca0) +; CHECK-NEXT: cret +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @zero_constant, i64 8 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 %src, i64 8, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_large_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_large_zero_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: csw zero, 0(ca0) +; CHECK-NEXT: cret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 4, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_ptr_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB8_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(constant_ptrs) +; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB8_1)(ca1) +; CHECK-NEXT: clc ca1, 0(ca1) +; CHECK-NEXT: csc ca1, 0(ca0) +; CHECK-NEXT: cret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @constant_ptrs, i64 8, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_ptr_constant_with_offset_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant_with_offset_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB9_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(constant_ptrs) +; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB9_1)(ca1) +; CHECK-NEXT: clc ca1, 8(ca1) +; CHECK-NEXT: csc ca1, 0(ca0) +; CHECK-NEXT: cret +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @constant_ptrs, i64 8 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 %src, i64 8, i1 false) #1 + ret void +} + +;; Finally, check copying from a zero constant with insufficient known alignment. +;; We should be able to emit this inline since a zero constant source never has tags. + +define linkonce_odr void @copy_from_underaligned_zero_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_underaligned_zero_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: cincoffset csp, csp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csc cra, 8(csp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: .LBB10_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant) +; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB10_1)(ca1) +; CHECK-NEXT: li a2, 8 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: ccall memcpy +; CHECK-NEXT: clc cra, 8(csp) # 8-byte Folded Reload +; CHECK-NEXT: cincoffset csp, csp, 16 +; CHECK-NEXT: cret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 4 %dst, ptr addrspace(200) align 4 @zero_constant, i64 8, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_underaligned_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_underaligned_zero_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: cincoffset csp, csp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csc cra, 8(csp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: .LBB11_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant) +; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB11_1)(ca1) +; CHECK-NEXT: li a2, 8 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: ccall memcpy +; CHECK-NEXT: clc cra, 8(csp) # 8-byte Folded Reload +; CHECK-NEXT: cincoffset csp, csp, 16 +; CHECK-NEXT: cret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 4 %dst, ptr addrspace(200) align 4 @zero_constant, i64 8, i1 false) #1 + ret void +} + +attributes #0 = { argmemonly nocallback nofree nounwind willreturn } +attributes #1 = { must_preserve_cheri_tags "frontend-memtransfer-type"="'const UChar * __capability' (aka 'const char16_t * __capability')" } diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64/memcpy-from-constant.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64/memcpy-from-constant.ll new file mode 100644 index 000000000000..c05dcba9f612 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64/memcpy-from-constant.ll @@ -0,0 +1,195 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --force-update +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/memcpy-from-constant.ll +;; Copying from a zero constant can be converted to a memset (even with the tag preservation flags) +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+xcheri,+cap-mode,+f,+d < %s -o - | FileCheck %s + +@a = internal addrspace(200) constant ptr addrspace(200) null +@b = internal addrspace(200) constant ptr addrspace(200) null +@zero_constant = internal addrspace(200) constant [5 x ptr addrspace(200)] zeroinitializer +@constant_ptrs = internal addrspace(200) constant [2 x ptr addrspace(200)] [ptr addrspace(200) @a, ptr addrspace(200) @b] + +declare void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) noalias nocapture writeonly, ptr addrspace(200) noalias nocapture readonly, i64, i1 immarg) addrspace(200) #0 + +define linkonce_odr void @copy_from_zero_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: csc cnull, 0(ca0) +; CHECK-NEXT: cret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 @zero_constant, i64 16, i1 false) + ret void +} + +define linkonce_odr void @copy_from_zero_constant_with_offset(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant_with_offset: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB1_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant) +; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB1_1)(ca1) +; CHECK-NEXT: clc ca1, 16(ca1) +; CHECK-NEXT: csc ca1, 0(ca0) +; CHECK-NEXT: cret +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @zero_constant, i64 16 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 %src, i64 16, i1 false) + ret void +} + +define linkonce_odr void @copy_from_large_zero_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_large_zero_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: csd zero, 0(ca0) +; CHECK-NEXT: cret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 @zero_constant, i64 8, i1 false) + ret void +} + +define linkonce_odr void @copy_from_ptr_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB3_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(constant_ptrs) +; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB3_1)(ca1) +; CHECK-NEXT: clc ca1, 0(ca1) +; CHECK-NEXT: csc ca1, 0(ca0) +; CHECK-NEXT: cret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 @constant_ptrs, i64 16, i1 false) + ret void +} + +define linkonce_odr void @copy_from_ptr_constant_with_offset(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant_with_offset: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB4_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(constant_ptrs) +; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB4_1)(ca1) +; CHECK-NEXT: clc ca1, 16(ca1) +; CHECK-NEXT: csc ca1, 0(ca0) +; CHECK-NEXT: cret +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @constant_ptrs, i64 16 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 %src, i64 16, i1 false) + ret void +} + +;; Run the same tests again this time with must_preserve_tags to check that we don't call memcpy(). + +define linkonce_odr void @copy_from_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: csc cnull, 0(ca0) +; CHECK-NEXT: cret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 @zero_constant, i64 16, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_zero_constant_with_offset_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant_with_offset_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB6_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant) +; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB6_1)(ca1) +; CHECK-NEXT: clc ca1, 16(ca1) +; CHECK-NEXT: csc ca1, 0(ca0) +; CHECK-NEXT: cret +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @zero_constant, i64 16 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 %src, i64 16, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_large_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_large_zero_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: csd zero, 0(ca0) +; CHECK-NEXT: cret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 @zero_constant, i64 8, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_ptr_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB8_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(constant_ptrs) +; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB8_1)(ca1) +; CHECK-NEXT: clc ca1, 0(ca1) +; CHECK-NEXT: csc ca1, 0(ca0) +; CHECK-NEXT: cret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 @constant_ptrs, i64 16, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_ptr_constant_with_offset_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant_with_offset_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB9_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(constant_ptrs) +; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB9_1)(ca1) +; CHECK-NEXT: clc ca1, 16(ca1) +; CHECK-NEXT: csc ca1, 0(ca0) +; CHECK-NEXT: cret +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @constant_ptrs, i64 16 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 %src, i64 16, i1 false) #1 + ret void +} + +;; Finally, check copying from a zero constant with insufficient known alignment. +;; We should be able to emit this inline since a zero constant source never has tags. + +define linkonce_odr void @copy_from_underaligned_zero_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_underaligned_zero_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: cincoffset csp, csp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csc cra, 0(csp) # 16-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -16 +; CHECK-NEXT: .LBB10_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant) +; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB10_1)(ca1) +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: ccall memcpy +; CHECK-NEXT: clc cra, 0(csp) # 16-byte Folded Reload +; CHECK-NEXT: cincoffset csp, csp, 16 +; CHECK-NEXT: cret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 16, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_underaligned_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_underaligned_zero_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: cincoffset csp, csp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csc cra, 0(csp) # 16-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -16 +; CHECK-NEXT: .LBB11_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant) +; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB11_1)(ca1) +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: ccall memcpy +; CHECK-NEXT: clc cra, 0(csp) # 16-byte Folded Reload +; CHECK-NEXT: cincoffset csp, csp, 16 +; CHECK-NEXT: cret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 16, i1 false) #1 + ret void +} + +attributes #0 = { argmemonly nocallback nofree nounwind willreturn } +attributes #1 = { must_preserve_cheri_tags "frontend-memtransfer-type"="'const UChar * __capability' (aka 'const char16_t * __capability')" } From 8ce084ee63a4ecb97d59c85e718fcf029e204621 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Sun, 14 Apr 2024 21:41:35 -0700 Subject: [PATCH 2/3] [SelectionDAG] Handle ISD::PTRADD in isMemSrcFromConstant() Noticed while writing tests for copying capabilities from constants. --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +- .../CHERI-Generic/MIPS/memcpy-from-constant.ll | 14 ++------------ .../CHERI-Generic/RISCV32/memcpy-from-constant.ll | 14 ++------------ .../CHERI-Generic/RISCV64/memcpy-from-constant.ll | 14 ++------------ 4 files changed, 7 insertions(+), 37 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index e24f27b020ab..1786c6ed1790 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6768,7 +6768,7 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) { GlobalAddressSDNode *G = nullptr; if (Src.getOpcode() == ISD::GlobalAddress) G = cast(Src); - else if (Src.getOpcode() == ISD::ADD && + else if ((Src.getOpcode() == ISD::ADD || Src.getOpcode() == ISD::PTRADD) && Src.getOperand(0).getOpcode() == ISD::GlobalAddress && Src.getOperand(1).getOpcode() == ISD::Constant) { G = cast(Src.getOperand(0)); diff --git a/llvm/test/CodeGen/CHERI-Generic/MIPS/memcpy-from-constant.ll b/llvm/test/CodeGen/CHERI-Generic/MIPS/memcpy-from-constant.ll index 9bb7762b38d4..1cfc704d93ae 100644 --- a/llvm/test/CodeGen/CHERI-Generic/MIPS/memcpy-from-constant.ll +++ b/llvm/test/CodeGen/CHERI-Generic/MIPS/memcpy-from-constant.ll @@ -23,13 +23,8 @@ do.body: define linkonce_odr void @copy_from_zero_constant_with_offset(ptr addrspace(200) %dst) addrspace(200) { ; CHECK-LABEL: copy_from_zero_constant_with_offset: ; CHECK: # %bb.0: # %do.body -; CHECK-NEXT: lui $1, %pcrel_hi(_CHERI_CAPABILITY_TABLE_-8) -; CHECK-NEXT: daddiu $1, $1, %pcrel_lo(_CHERI_CAPABILITY_TABLE_-4) -; CHECK-NEXT: cgetpccincoffset $c1, $1 -; CHECK-NEXT: clcbi $c1, %captab20(zero_constant)($c1) -; CHECK-NEXT: clc $c1, $zero, 16($c1) ; CHECK-NEXT: cjr $c17 -; CHECK-NEXT: csc $c1, $zero, 0($c3) +; CHECK-NEXT: csc $cnull, $zero, 0($c3) do.body: %src = getelementptr inbounds i8, ptr addrspace(200) @zero_constant, i64 16 call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 %src, i64 16, i1 false) @@ -92,13 +87,8 @@ do.body: define linkonce_odr void @copy_from_zero_constant_with_offset_preserve(ptr addrspace(200) %dst) addrspace(200) { ; CHECK-LABEL: copy_from_zero_constant_with_offset_preserve: ; CHECK: # %bb.0: # %do.body -; CHECK-NEXT: lui $1, %pcrel_hi(_CHERI_CAPABILITY_TABLE_-8) -; CHECK-NEXT: daddiu $1, $1, %pcrel_lo(_CHERI_CAPABILITY_TABLE_-4) -; CHECK-NEXT: cgetpccincoffset $c1, $1 -; CHECK-NEXT: clcbi $c1, %captab20(zero_constant)($c1) -; CHECK-NEXT: clc $c1, $zero, 16($c1) ; CHECK-NEXT: cjr $c17 -; CHECK-NEXT: csc $c1, $zero, 0($c3) +; CHECK-NEXT: csc $cnull, $zero, 0($c3) do.body: %src = getelementptr inbounds i8, ptr addrspace(200) @zero_constant, i64 16 call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 %src, i64 16, i1 false) #1 diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32/memcpy-from-constant.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32/memcpy-from-constant.ll index f63ca2b923fc..bf2d5a128eb5 100644 --- a/llvm/test/CodeGen/CHERI-Generic/RISCV32/memcpy-from-constant.ll +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32/memcpy-from-constant.ll @@ -23,12 +23,7 @@ do.body: define linkonce_odr void @copy_from_zero_constant_with_offset(ptr addrspace(200) %dst) addrspace(200) { ; CHECK-LABEL: copy_from_zero_constant_with_offset: ; CHECK: # %bb.0: # %do.body -; CHECK-NEXT: .LBB1_1: # %do.body -; CHECK-NEXT: # Label of block must be emitted -; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant) -; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB1_1)(ca1) -; CHECK-NEXT: clc ca1, 8(ca1) -; CHECK-NEXT: csc ca1, 0(ca0) +; CHECK-NEXT: csc cnull, 0(ca0) ; CHECK-NEXT: cret do.body: %src = getelementptr inbounds i8, ptr addrspace(200) @zero_constant, i64 8 @@ -92,12 +87,7 @@ do.body: define linkonce_odr void @copy_from_zero_constant_with_offset_preserve(ptr addrspace(200) %dst) addrspace(200) { ; CHECK-LABEL: copy_from_zero_constant_with_offset_preserve: ; CHECK: # %bb.0: # %do.body -; CHECK-NEXT: .LBB6_1: # %do.body -; CHECK-NEXT: # Label of block must be emitted -; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant) -; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB6_1)(ca1) -; CHECK-NEXT: clc ca1, 8(ca1) -; CHECK-NEXT: csc ca1, 0(ca0) +; CHECK-NEXT: csc cnull, 0(ca0) ; CHECK-NEXT: cret do.body: %src = getelementptr inbounds i8, ptr addrspace(200) @zero_constant, i64 8 diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64/memcpy-from-constant.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64/memcpy-from-constant.ll index c05dcba9f612..ff66ae0cf145 100644 --- a/llvm/test/CodeGen/CHERI-Generic/RISCV64/memcpy-from-constant.ll +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64/memcpy-from-constant.ll @@ -23,12 +23,7 @@ do.body: define linkonce_odr void @copy_from_zero_constant_with_offset(ptr addrspace(200) %dst) addrspace(200) { ; CHECK-LABEL: copy_from_zero_constant_with_offset: ; CHECK: # %bb.0: # %do.body -; CHECK-NEXT: .LBB1_1: # %do.body -; CHECK-NEXT: # Label of block must be emitted -; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant) -; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB1_1)(ca1) -; CHECK-NEXT: clc ca1, 16(ca1) -; CHECK-NEXT: csc ca1, 0(ca0) +; CHECK-NEXT: csc cnull, 0(ca0) ; CHECK-NEXT: cret do.body: %src = getelementptr inbounds i8, ptr addrspace(200) @zero_constant, i64 16 @@ -92,12 +87,7 @@ do.body: define linkonce_odr void @copy_from_zero_constant_with_offset_preserve(ptr addrspace(200) %dst) addrspace(200) { ; CHECK-LABEL: copy_from_zero_constant_with_offset_preserve: ; CHECK: # %bb.0: # %do.body -; CHECK-NEXT: .LBB6_1: # %do.body -; CHECK-NEXT: # Label of block must be emitted -; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant) -; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB6_1)(ca1) -; CHECK-NEXT: clc ca1, 16(ca1) -; CHECK-NEXT: csc ca1, 0(ca0) +; CHECK-NEXT: csc cnull, 0(ca0) ; CHECK-NEXT: cret do.body: %src = getelementptr inbounds i8, ptr addrspace(200) @zero_constant, i64 16 From 91363bf9f8a020a29b407f09f27d79abfe4f5ec8 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Sun, 14 Apr 2024 22:00:03 -0700 Subject: [PATCH 3/3] [SelectionDAG] Lower must_preserve_tags memset without capabilities When a must_preserve_tags memcpy is converted to a memset, we may end up using a non-capability MVT as the copy type. This change allow us to use inline integer memset lowering for copies from zero constants that are not necessarily sufficiently aligned. This also fixes an assertion found while compiling ICU4C for Morello where a copy from a large zero constant was lowered using NEON registers instead of capability ones. --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +- .../CodeGen/SelectionDAG/TargetLowering.cpp | 1 + .../MIPS/memcpy-from-constant.ll | 30 +++---------------- .../RISCV32/memcpy-from-constant.ll | 30 +++---------------- .../RISCV64/memcpy-from-constant.ll | 28 +++-------------- 5 files changed, 14 insertions(+), 77 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 1786c6ed1790..a216711f8f0c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6887,7 +6887,7 @@ static SDValue getMemcpyLoadsAndStores( // TODO: the frontend/optimization passes probably shouldn't emit // must-preserve-tags for such small memcpys auto CapTy = TLI.cheriCapabilityType(); - if (CapTy.isValid()) { + if (CapTy.isValid() && !Op.isMemset()) { const uint64_t CapSize = CapTy.getStoreSize(); if (PreserveTags == PreserveCheriTags::Required && !ReachedLimit && Size >= CapSize && (!FoundLowering || !MemOps[0].isFatPointer())) { diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e376a0f2a67d..037a99cf8031 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -208,6 +208,7 @@ bool TargetLowering::findOptimalMemOpLowering( // XXXAR: (ab)use MVT::isVoid to indicate that a memcpy call must be made if (VT == MVT::isVoid) { + assert(!Op.isMemset() && "MVT::isVoid should only be used for copies"); return false; // cannot lower as memops } // If the type is a fat pointer, then forcibly disable overlap. diff --git a/llvm/test/CodeGen/CHERI-Generic/MIPS/memcpy-from-constant.ll b/llvm/test/CodeGen/CHERI-Generic/MIPS/memcpy-from-constant.ll index 1cfc704d93ae..fd4dd1a78385 100644 --- a/llvm/test/CodeGen/CHERI-Generic/MIPS/memcpy-from-constant.ll +++ b/llvm/test/CodeGen/CHERI-Generic/MIPS/memcpy-from-constant.ll @@ -142,20 +142,9 @@ do.body: define linkonce_odr void @copy_from_underaligned_zero_constant(ptr addrspace(200) %dst) addrspace(200) { ; CHECK-LABEL: copy_from_underaligned_zero_constant: ; CHECK: # %bb.0: # %do.body -; CHECK-NEXT: cincoffset $c11, $c11, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csc $c17, $zero, 0($c11) # 16-byte Folded Spill -; CHECK-NEXT: .cfi_offset 89, -16 -; CHECK-NEXT: lui $1, %pcrel_hi(_CHERI_CAPABILITY_TABLE_-8) -; CHECK-NEXT: daddiu $1, $1, %pcrel_lo(_CHERI_CAPABILITY_TABLE_-4) -; CHECK-NEXT: cgetpccincoffset $c1, $1 -; CHECK-NEXT: clcbi $c4, %captab20(zero_constant)($c1) -; CHECK-NEXT: clcbi $c12, %capcall20(memcpy)($c1) -; CHECK-NEXT: cjalr $c12, $c17 -; CHECK-NEXT: daddiu $4, $zero, 16 -; CHECK-NEXT: clc $c17, $zero, 0($c11) # 16-byte Folded Reload +; CHECK-NEXT: csd $zero, $zero, 0($c3) ; CHECK-NEXT: cjr $c17 -; CHECK-NEXT: cincoffset $c11, $c11, 16 +; CHECK-NEXT: csd $zero, $zero, 8($c3) do.body: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 16, i1 false) #1 ret void @@ -164,20 +153,9 @@ do.body: define linkonce_odr void @copy_from_underaligned_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { ; CHECK-LABEL: copy_from_underaligned_zero_constant_preserve: ; CHECK: # %bb.0: # %do.body -; CHECK-NEXT: cincoffset $c11, $c11, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csc $c17, $zero, 0($c11) # 16-byte Folded Spill -; CHECK-NEXT: .cfi_offset 89, -16 -; CHECK-NEXT: lui $1, %pcrel_hi(_CHERI_CAPABILITY_TABLE_-8) -; CHECK-NEXT: daddiu $1, $1, %pcrel_lo(_CHERI_CAPABILITY_TABLE_-4) -; CHECK-NEXT: cgetpccincoffset $c1, $1 -; CHECK-NEXT: clcbi $c4, %captab20(zero_constant)($c1) -; CHECK-NEXT: clcbi $c12, %capcall20(memcpy)($c1) -; CHECK-NEXT: cjalr $c12, $c17 -; CHECK-NEXT: daddiu $4, $zero, 16 -; CHECK-NEXT: clc $c17, $zero, 0($c11) # 16-byte Folded Reload +; CHECK-NEXT: csd $zero, $zero, 0($c3) ; CHECK-NEXT: cjr $c17 -; CHECK-NEXT: cincoffset $c11, $c11, 16 +; CHECK-NEXT: csd $zero, $zero, 8($c3) do.body: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 16, i1 false) #1 ret void diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32/memcpy-from-constant.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32/memcpy-from-constant.ll index bf2d5a128eb5..323d1e2fd94d 100644 --- a/llvm/test/CodeGen/CHERI-Generic/RISCV32/memcpy-from-constant.ll +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32/memcpy-from-constant.ll @@ -142,19 +142,8 @@ do.body: define linkonce_odr void @copy_from_underaligned_zero_constant(ptr addrspace(200) %dst) addrspace(200) { ; CHECK-LABEL: copy_from_underaligned_zero_constant: ; CHECK: # %bb.0: # %do.body -; CHECK-NEXT: cincoffset csp, csp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csc cra, 8(csp) # 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset ra, -8 -; CHECK-NEXT: .LBB10_1: # %do.body -; CHECK-NEXT: # Label of block must be emitted -; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant) -; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB10_1)(ca1) -; CHECK-NEXT: li a2, 8 -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: ccall memcpy -; CHECK-NEXT: clc cra, 8(csp) # 8-byte Folded Reload -; CHECK-NEXT: cincoffset csp, csp, 16 +; CHECK-NEXT: csw zero, 4(ca0) +; CHECK-NEXT: csw zero, 0(ca0) ; CHECK-NEXT: cret do.body: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 4 %dst, ptr addrspace(200) align 4 @zero_constant, i64 8, i1 false) #1 @@ -164,19 +153,8 @@ do.body: define linkonce_odr void @copy_from_underaligned_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { ; CHECK-LABEL: copy_from_underaligned_zero_constant_preserve: ; CHECK: # %bb.0: # %do.body -; CHECK-NEXT: cincoffset csp, csp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csc cra, 8(csp) # 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset ra, -8 -; CHECK-NEXT: .LBB11_1: # %do.body -; CHECK-NEXT: # Label of block must be emitted -; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant) -; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB11_1)(ca1) -; CHECK-NEXT: li a2, 8 -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: ccall memcpy -; CHECK-NEXT: clc cra, 8(csp) # 8-byte Folded Reload -; CHECK-NEXT: cincoffset csp, csp, 16 +; CHECK-NEXT: csw zero, 4(ca0) +; CHECK-NEXT: csw zero, 0(ca0) ; CHECK-NEXT: cret do.body: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 4 %dst, ptr addrspace(200) align 4 @zero_constant, i64 8, i1 false) #1 diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64/memcpy-from-constant.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64/memcpy-from-constant.ll index ff66ae0cf145..663a72de4dcc 100644 --- a/llvm/test/CodeGen/CHERI-Generic/RISCV64/memcpy-from-constant.ll +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64/memcpy-from-constant.ll @@ -142,18 +142,8 @@ do.body: define linkonce_odr void @copy_from_underaligned_zero_constant(ptr addrspace(200) %dst) addrspace(200) { ; CHECK-LABEL: copy_from_underaligned_zero_constant: ; CHECK: # %bb.0: # %do.body -; CHECK-NEXT: cincoffset csp, csp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csc cra, 0(csp) # 16-byte Folded Spill -; CHECK-NEXT: .cfi_offset ra, -16 -; CHECK-NEXT: .LBB10_1: # %do.body -; CHECK-NEXT: # Label of block must be emitted -; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant) -; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB10_1)(ca1) -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: ccall memcpy -; CHECK-NEXT: clc cra, 0(csp) # 16-byte Folded Reload -; CHECK-NEXT: cincoffset csp, csp, 16 +; CHECK-NEXT: csd zero, 8(ca0) +; CHECK-NEXT: csd zero, 0(ca0) ; CHECK-NEXT: cret do.body: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 16, i1 false) #1 @@ -163,18 +153,8 @@ do.body: define linkonce_odr void @copy_from_underaligned_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { ; CHECK-LABEL: copy_from_underaligned_zero_constant_preserve: ; CHECK: # %bb.0: # %do.body -; CHECK-NEXT: cincoffset csp, csp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csc cra, 0(csp) # 16-byte Folded Spill -; CHECK-NEXT: .cfi_offset ra, -16 -; CHECK-NEXT: .LBB11_1: # %do.body -; CHECK-NEXT: # Label of block must be emitted -; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant) -; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB11_1)(ca1) -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: ccall memcpy -; CHECK-NEXT: clc cra, 0(csp) # 16-byte Folded Reload -; CHECK-NEXT: cincoffset csp, csp, 16 +; CHECK-NEXT: csd zero, 8(ca0) +; CHECK-NEXT: csd zero, 0(ca0) ; CHECK-NEXT: cret do.body: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 16, i1 false) #1