diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 389fbce72ad0..094bbe25625b 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1453,6 +1453,12 @@ class AtomicSDNode : public MemSDNode { return MMO->getFailureOrdering(); } + /// Return true if the memory operation ordering is Unordered or higher. + bool isExactCmpXchg() const { + assert(getMemoryVT().isFatPointer()); + return MMO->isExactCompare(); + } + // Methods to support isa and dyn_cast static bool classof(const SDNode *N) { return N->getOpcode() == ISD::ATOMIC_CMP_SWAP || diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 21de4bbd5668..93fab2d91634 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -1888,10 +1888,21 @@ multiclass binary_atomic_op_cap { defm NAME : binary_atomic_op_ord; } -multiclass ternary_atomic_op_cap { +multiclass ternary_atomic_op_cap_inexact { def "" : PatFrag<(ops node:$ptr, node:$cmp, node:$val), (atomic_op node:$ptr, node:$cmp, node:$val), [{ - return cast(N)->getMemoryVT().isFatPointer(); + auto AN = cast(N); + return AN->getMemoryVT().isFatPointer() && !AN->isExactCmpXchg(); + }]>; + + defm NAME : ternary_atomic_op_ord; +} + +multiclass ternary_atomic_op_cap_exact { + def "" : PatFrag<(ops node:$ptr, node:$cmp, node:$val), + (atomic_op node:$ptr, node:$cmp, node:$val), [{ + auto AN = cast(N); + return AN->getMemoryVT().isFatPointer() && AN->isExactCmpXchg(); }]>; defm NAME : ternary_atomic_op_ord; @@ -1910,7 +1921,8 @@ defm atomic_load_max_cap : binary_atomic_op_cap; defm atomic_load_umin_cap : binary_atomic_op_cap; defm atomic_load_umax_cap : binary_atomic_op_cap; defm atomic_store_cap : binary_atomic_op_cap; -defm atomic_cmp_swap_cap : ternary_atomic_op_cap; +defm atomic_cmp_swap_cap_addr : ternary_atomic_op_cap_inexact; +defm atomic_cmp_swap_cap_exact : ternary_atomic_op_cap_exact; def atomic_load_cap : PatFrag<(ops node:$ptr), diff --git a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp index 5c2168d9843a..9af181997df8 100644 --- a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp +++ b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp @@ -212,6 +212,7 @@ bool MipsExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, unsigned Size = -1; bool IsCapCmpXchg = false; + bool UseExactEquals = false; switch(I->getOpcode()) { case Mips::ATOMIC_CMP_SWAP_I32_POSTRA: Size = 4; break; case Mips::ATOMIC_CMP_SWAP_I64_POSTRA: Size = 8; break; @@ -219,7 +220,10 @@ bool MipsExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, case Mips::CAP_ATOMIC_CMP_SWAP_I16_POSTRA: Size = 2; break; case Mips::CAP_ATOMIC_CMP_SWAP_I32_POSTRA: Size = 4; break; case Mips::CAP_ATOMIC_CMP_SWAP_I64_POSTRA: Size = 8; break; - case Mips::CAP_ATOMIC_CMP_SWAP_CAP_POSTRA: + case Mips::CAP_ATOMIC_CMP_SWAP_CAP_EXACT_POSTRA: + UseExactEquals = true; + LLVM_FALLTHROUGH; + case Mips::CAP_ATOMIC_CMP_SWAP_CAP_ADDR_POSTRA: Size = CAP_ATOMIC_SIZE; IsCapCmpXchg = true; break; @@ -327,9 +331,6 @@ bool MipsExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, if (!IsCapOp) LLOp.addImm(0); if (IsCapCmpXchg) { - assert(I->hasOneMemOperand()); - bool UseExactEquals = - STI->useCheriExactEquals() || I->memoperands()[0]->isExactCompare(); unsigned CapCmp = UseExactEquals ? Mips::CEXEQ : Mips::CEQ; // load, compare, and exit if not equal // cllc dest, ptr @@ -1098,7 +1099,8 @@ bool MipsExpandPseudo::expandMI(MachineBasicBlock &MBB, case Mips::CAP_ATOMIC_CMP_SWAP_I16_POSTRA: case Mips::CAP_ATOMIC_CMP_SWAP_I32_POSTRA: case Mips::CAP_ATOMIC_CMP_SWAP_I64_POSTRA: - case Mips::CAP_ATOMIC_CMP_SWAP_CAP_POSTRA: + case Mips::CAP_ATOMIC_CMP_SWAP_CAP_ADDR_POSTRA: + case Mips::CAP_ATOMIC_CMP_SWAP_CAP_EXACT_POSTRA: return expandAtomicCmpSwap(MBB, MBBI, NMBB, /*IsCapOp=*/true); case Mips::PseudoPccRelativeAddressPostRA: return expandPccRelativeAddr(MBB, MBBI, NMBB); diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 6a63b454da4d..198e14539c57 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -1837,7 +1837,8 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case Mips::CAP_ATOMIC_CMP_SWAP_I16: case Mips::CAP_ATOMIC_CMP_SWAP_I32: case Mips::CAP_ATOMIC_CMP_SWAP_I64: - case Mips::CAP_ATOMIC_CMP_SWAP_CAP: + case Mips::CAP_ATOMIC_CMP_SWAP_CAP_ADDR: + case Mips::CAP_ATOMIC_CMP_SWAP_CAP_EXACT: return emitAtomicCmpSwap(MI, BB); @@ -2445,8 +2446,12 @@ MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, AtomicOp = Mips::CAP_ATOMIC_CMP_SWAP_I64_POSTRA; ScratchTy = MVT::i64; break; - case Mips::CAP_ATOMIC_CMP_SWAP_CAP: - AtomicOp = Mips::CAP_ATOMIC_CMP_SWAP_CAP_POSTRA; + case Mips::CAP_ATOMIC_CMP_SWAP_CAP_ADDR: + AtomicOp = Mips::CAP_ATOMIC_CMP_SWAP_CAP_ADDR_POSTRA; + ScratchTy = MVT::i64; + break; + case Mips::CAP_ATOMIC_CMP_SWAP_CAP_EXACT: + AtomicOp = Mips::CAP_ATOMIC_CMP_SWAP_CAP_EXACT_POSTRA; ScratchTy = MVT::i64; break; default: diff --git a/llvm/lib/Target/Mips/MipsInstrCheri.td b/llvm/lib/Target/Mips/MipsInstrCheri.td index ae2ba9306a48..0996bbab3d6f 100644 --- a/llvm/lib/Target/Mips/MipsInstrCheri.td +++ b/llvm/lib/Target/Mips/MipsInstrCheri.td @@ -763,8 +763,9 @@ let usesCustomInserter = 1 in { // Capability atomics: // FIXME: this seems wrong it should be CheriGPROrCNULL - def CAP_ATOMIC_SWAP_CAP : CapAtomic2Ops; - def CAP_ATOMIC_CMP_SWAP_CAP : CapAtomicCmpSwap; + def CAP_ATOMIC_SWAP_CAP : CapAtomic2Ops; + def CAP_ATOMIC_CMP_SWAP_CAP_ADDR : CapAtomicCmpSwap; + def CAP_ATOMIC_CMP_SWAP_CAP_EXACT : CapAtomicCmpSwap; // TODO: implement these: // def ATOMIC_LOAD_ADD_CAP : Atomic2Ops; @@ -816,8 +817,9 @@ def CAP_ATOMIC_CMP_SWAP_I64_POSTRA : CapAtomicCmpSwapPostRA; // Capability postra atomics: // TODO: do we want add/sub/or/xor/nand/and for capabilities? // I guess add/sub makes sense but the others don't -def CAP_ATOMIC_SWAP_CAP_POSTRA : CapAtomic2OpsPostRA; -def CAP_ATOMIC_CMP_SWAP_CAP_POSTRA : CapAtomicCmpSwapPostRA; +def CAP_ATOMIC_SWAP_CAP_POSTRA : CapAtomic2OpsPostRA; +def CAP_ATOMIC_CMP_SWAP_CAP_ADDR_POSTRA : CapAtomicCmpSwapPostRA; +def CAP_ATOMIC_CMP_SWAP_CAP_EXACT_POSTRA : CapAtomicCmpSwapPostRA; // TODO: // def CAP_ATOMIC_LOAD_ADD_CAP_POSTRA : CapAtomic2OpsPostRA; // def CAP_ATOMIC_LOAD_SUB_CAP_POSTRA : CapAtomic2OpsPostRA; @@ -853,8 +855,8 @@ def : MipsPat<(atomic_store_cap GPR64Opnd:$a, CheriOpnd:$v), (STORECAP $v, GPR64Opnd:$a, (i64 0), DDC)>; def : MipsPat<(atomic_swap_cap GPR64Opnd:$a, CheriOpnd:$swap), (CAP_ATOMIC_SWAP_CAP (CFromPtr DDC, GPR64Opnd:$a), CheriOpnd:$swap)>; -def : MipsPat<(atomic_cmp_swap_cap GPR64Opnd:$a, CheriOpnd:$cmp, CheriOpnd:$swap), - (CAP_ATOMIC_CMP_SWAP_CAP (CFromPtr DDC, GPR64Opnd:$a), CheriOpnd:$cmp, CheriOpnd:$swap)>; +def : MipsPat<(atomic_cmp_swap_cap_addr GPR64Opnd:$a, CheriOpnd:$cmp, CheriOpnd:$swap), + (CAP_ATOMIC_CMP_SWAP_CAP_ADDR (CFromPtr DDC, GPR64Opnd:$a), CheriOpnd:$cmp, CheriOpnd:$swap)>; } //////////////////////////////////////////////////////////////////////////////// // Helpers for capability-using calls and returns diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp index f0730a0193d0..30df4d79f7f4 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp @@ -160,7 +160,8 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, case RISCV::PseudoAtomicLoadUMinCap: return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, CLenVT, false, NextMBBI); - case RISCV::PseudoCmpXchgCap: + case RISCV::PseudoCmpXchgCapAddr: + case RISCV::PseudoCmpXchgCapExact: return expandAtomicCmpXchg(MBB, MBBI, false, CLenVT, false, NextMBBI); case RISCV::PseudoCheriAtomicSwap8: return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, MVT::i8, @@ -272,7 +273,8 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, case RISCV::PseudoCheriAtomicLoadUMinCap: return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, CLenVT, true, NextMBBI); - case RISCV::PseudoCheriCmpXchgCap: + case RISCV::PseudoCheriCmpXchgCapAddr: + case RISCV::PseudoCheriCmpXchgCapExact: return expandAtomicCmpXchg(MBB, MBBI, false, CLenVT, true, NextMBBI); } @@ -1020,8 +1022,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(PtrIsCap, Ordering, VT)), DestReg) .addReg(AddrReg); - assert(MI.hasOneMemOperand()); - if (VT.isFatPointer() && MI.memoperands()[0]->isExactCompare()) { + bool ExactCapCompare = MI.getOpcode() == RISCV::PseudoCheriCmpXchgCapExact; + if (VT.isFatPointer() && ExactCapCompare) { BuildMI(LoopHeadMBB, DL, TII->get(RISCV::CSEQX), ScratchReg) .addReg(DestReg, 0) .addReg(CmpValReg, 0); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCheri.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCheri.td index afd22b507f16..c728e2b3f2b8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCheri.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCheri.td @@ -1621,7 +1621,8 @@ def PseudoAtomicLoadMinCap : PseudoAMO { let Size = 24; } def PseudoAtomicLoadUMaxCap : PseudoAMO { let Size = 24; } def PseudoAtomicLoadUMinCap : PseudoAMO { let Size = 24; } def PseudoAtomicLoadNandCap : PseudoAMO { let Size = 24; } -def PseudoCmpXchgCap : PseudoCmpXchg { let Size = 16; } +def PseudoCmpXchgCapAddr : PseudoCmpXchg { let Size = 16; } +def PseudoCmpXchgCapExact : PseudoCmpXchg { let Size = 16; } } // Predicates = [HasCheri, HasStdExtA]f let Predicates = [HasCheri, HasStdExtA, NotCapMode] in { @@ -1635,7 +1636,8 @@ defm : PseudoAMOPat<"atomic_load_min_cap", PseudoAtomicLoadMinCap, GPCR>; defm : PseudoAMOPat<"atomic_load_umax_cap", PseudoAtomicLoadUMaxCap, GPCR>; defm : PseudoAMOPat<"atomic_load_umin_cap", PseudoAtomicLoadUMinCap, GPCR>; defm : PseudoAMOPat<"atomic_load_nand_cap", PseudoAtomicLoadNandCap, GPCR>; -defm : PseudoCmpXchgPat<"atomic_cmp_swap_cap", PseudoCmpXchgCap, GPCR>; +defm : PseudoCmpXchgPat<"atomic_cmp_swap_cap_addr", PseudoCmpXchgCapAddr, GPCR>; +defm : PseudoCmpXchgPat<"atomic_cmp_swap_cap_exact", PseudoCmpXchgCapExact, GPCR>; } // Predicates = [HasCheri, HasStdExtA, NotCapMode] /// Capability Mode Instructions @@ -1782,7 +1784,8 @@ def PseudoCheriAtomicLoadMinCap : PseudoCheriAMO { let Size = 24; } def PseudoCheriAtomicLoadUMaxCap : PseudoCheriAMO { let Size = 24; } def PseudoCheriAtomicLoadUMinCap : PseudoCheriAMO { let Size = 24; } def PseudoCheriAtomicLoadNandCap : PseudoCheriAMO { let Size = 24; } -def PseudoCheriCmpXchgCap : PseudoCheriCmpXchg { let Size = 16; } +def PseudoCheriCmpXchgCapAddr : PseudoCheriCmpXchg { let Size = 16; } +def PseudoCheriCmpXchgCapExact : PseudoCheriCmpXchg { let Size = 16; } } // Predicates = [HasCheri, HasStdExtA] let Predicates = [HasCheri, HasStdExtA, IsRV64] in { @@ -1981,7 +1984,8 @@ defm : PseudoCheriCmpXchgPat<"atomic_cmp_swap_8", PseudoCheriCmpXchg8>; defm : PseudoCheriCmpXchgPat<"atomic_cmp_swap_16", PseudoCheriCmpXchg16>; defm : PseudoCheriCmpXchgPat<"atomic_cmp_swap_32", PseudoCheriCmpXchg32>; -defm : PseudoCheriCmpXchgPat<"atomic_cmp_swap_cap", PseudoCheriCmpXchgCap, GPCR>; +defm : PseudoCheriCmpXchgPat<"atomic_cmp_swap_cap_addr", PseudoCheriCmpXchgCapAddr, GPCR>; +defm : PseudoCheriCmpXchgPat<"atomic_cmp_swap_cap_exact", PseudoCheriCmpXchgCapExact, GPCR>; } // Predicates = [HasCheri, HasStdExtA, IsCapMode] diff --git a/llvm/test/CodeGen/CHERI-Generic/Inputs/cmpxchg-exact-branch-folder.ll b/llvm/test/CodeGen/CHERI-Generic/Inputs/cmpxchg-exact-branch-folder.ll index 57ee81ce26ae..ca08047500ba 100644 --- a/llvm/test/CodeGen/CHERI-Generic/Inputs/cmpxchg-exact-branch-folder.ll +++ b/llvm/test/CodeGen/CHERI-Generic/Inputs/cmpxchg-exact-branch-folder.ll @@ -2,15 +2,17 @@ ; CHERI-GENERIC-UTC: mir @IF-RISCV@; RUN: llc @PURECAP_HARDFLOAT_ARGS@ -mattr=+a < %s --stop-after=branch-folder | FileCheck %s --check-prefixes=MIR @IFNOT-RISCV@; RUN: llc @PURECAP_HARDFLOAT_ARGS@ < %s --stop-after=branch-folder --enable-tail-merge | FileCheck %s --check-prefixes=MIR -@IF-RISCV@; RUN: not --crash llc @PURECAP_HARDFLOAT_ARGS@ -mattr=+a < %s -@IFNOT-RISCV@; RUN: not --crash llc @PURECAP_HARDFLOAT_ARGS@ --enable-tail-merge < %s +; Note: cat %s is needed so that update_mir_test_checks.py does not process these RUN lines. +@IF-RISCV@; RUN: cat %s | llc @PURECAP_HARDFLOAT_ARGS@ -mattr=+a | FileCheck %s +@IFNOT-RISCV@; RUN: cat %s | llc @PURECAP_HARDFLOAT_ARGS@ --enable-tail-merge | FileCheck %s ; REQUIRES: asserts ; The branch-folder MIR pass will merge the two blocks inside these functions but ; since the base pointer is distinct it will have two MachineMemOperands. ; The cmpxchg exact logic stored the exact flag in the MachineMemOperand and ; previously assumed there would only ever be one operand, so this test ensures -; we can handle the merged logic. +; we can handle the merged logic by adding separate pseudo instructions (which +; ensures that the branches with different comparisons can no longer be merged). define dso_local signext i32 @merge_i32(i1 %cond1, ptr addrspace(200) %ptr, i32 %newval, i32 %cmpval) { entry: @@ -66,7 +68,6 @@ end: ret i32 0 } -; FIXME: these two branches should not be merged! define dso_local signext i32 @merge_ptr_mismatch_exact_flag(i1 %cond1, ptr addrspace(200) %ptr, ptr addrspace(200) %newval, ptr addrspace(200) %cmpval) { entry: br i1 %cond1, label %if.then, label %if.else diff --git a/llvm/test/CodeGen/CHERI-Generic/MIPS/cmpxchg-exact-branch-folder.ll b/llvm/test/CodeGen/CHERI-Generic/MIPS/cmpxchg-exact-branch-folder.ll index ec1750b4e589..3e3ee4c0a633 100644 --- a/llvm/test/CodeGen/CHERI-Generic/MIPS/cmpxchg-exact-branch-folder.ll +++ b/llvm/test/CodeGen/CHERI-Generic/MIPS/cmpxchg-exact-branch-folder.ll @@ -4,16 +4,38 @@ ; CHERI-GENERIC-UTC: llc ; CHERI-GENERIC-UTC: mir ; RUN: llc -mtriple=mips64 -mcpu=cheri128 -mattr=+cheri128 --relocation-model=pic -target-abi purecap < %s --stop-after=branch-folder --enable-tail-merge | FileCheck %s --check-prefixes=MIR -; RUN: not --crash llc -mtriple=mips64 -mcpu=cheri128 -mattr=+cheri128 --relocation-model=pic -target-abi purecap --enable-tail-merge < %s +; RUN: cat %s | llc -mtriple=mips64 -mcpu=cheri128 -mattr=+cheri128 --relocation-model=pic -target-abi purecap --enable-tail-merge | FileCheck %s ; REQUIRES: asserts ; The branch-folder MIR pass will merge the two blocks inside these functions but ; since the base pointer is distinct it will have two MachineMemOperands. ; The cmpxchg exact logic stored the exact flag in the MachineMemOperand and ; previously assumed there would only ever be one operand, so this test ensures -; we can handle the merged logic. +; we can handle the merged logic by adding separate pseudo instructions (which +; ensures that the branches with different comparisons can no longer be merged). define dso_local signext i32 @merge_i32(i1 %cond1, ptr addrspace(200) %ptr, i32 %newval, i32 %cmpval) { +; CHECK-LABEL: merge_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sll $2, $6, 0 +; CHECK-NEXT: sll $1, $4, 0 +; CHECK-NEXT: andi $1, $1, 1 +; CHECK-NEXT: sll $3, $5, 0 +; CHECK-NEXT: clc $c1, $zero, 0($c3) +; CHECK-NEXT: sync +; CHECK-NEXT: .LBB0_1: # %entry +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cllw $1, $c1 +; CHECK-NEXT: bne $1, $2, .LBB0_3 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: cscw $4, $3, $c1 +; CHECK-NEXT: beqz $4, .LBB0_1 +; CHECK-NEXT: nop +; CHECK-NEXT: .LBB0_3: # %entry +; CHECK-NEXT: cjr $c17 +; CHECK-NEXT: daddiu $2, $zero, 0 ; MIR-LABEL: name: merge_i32 ; MIR: bb.0.entry: ; MIR-NEXT: liveins: $c3, $a0_64, $a1_64, $a2_64 @@ -24,7 +46,7 @@ define dso_local signext i32 @merge_i32(i1 %cond1, ptr addrspace(200) %ptr, i32 ; MIR-NEXT: renamable $v1 = SLL renamable $a1, 0, implicit killed $a1_64 ; MIR-NEXT: renamable $c1 = LOADCAP $zero_64, 0, killed renamable $c3 :: (load (s128) from %ir.ptr, addrspace 200) ; MIR-NEXT: SYNC 0 - ; MIR-NEXT: dead early-clobber renamable $at = CAP_ATOMIC_CMP_SWAP_I32_POSTRA killed renamable $c1, killed renamable $v0, killed renamable $v1, implicit-def dead early-clobber renamable $a0 :: (load store monotonic monotonic (s32) on %ir.ld2, addrspace 200), (load store monotonic monotonic (s32) on %ir.ld1, addrspace 200) + ; MIR-NEXT: dead early-clobber renamable $at = CAP_ATOMIC_CMP_SWAP_I32_POSTRA killed renamable $c1, killed renamable $v0, killed renamable $v1, implicit-def dead early-clobber renamable $a0 ; MIR-NEXT: $v0_64 = DADDiu $zero_64, 0 ; MIR-NEXT: CapRetPseudo implicit $v0_64 entry: @@ -45,6 +67,26 @@ end: } define dso_local signext i32 @merge_ptr_addr(i1 %cond1, ptr addrspace(200) %ptr, ptr addrspace(200) %newval, ptr addrspace(200) %cmpval) { +; CHECK-LABEL: merge_ptr_addr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sll $1, $4, 0 +; CHECK-NEXT: andi $1, $1, 1 +; CHECK-NEXT: clc $c1, $zero, 0($c3) +; CHECK-NEXT: sync +; CHECK-NEXT: .LBB1_1: # %entry +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cllc $c2, $c1 +; CHECK-NEXT: ceq $1, $c2, $c5 +; CHECK-NEXT: beqz $1, .LBB1_3 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: cscc $1, $c4, $c1 +; CHECK-NEXT: beqz $1, .LBB1_1 +; CHECK-NEXT: nop +; CHECK-NEXT: .LBB1_3: # %entry +; CHECK-NEXT: cjr $c17 +; CHECK-NEXT: daddiu $2, $zero, 0 ; MIR-LABEL: name: merge_ptr_addr ; MIR: bb.0.entry: ; MIR-NEXT: liveins: $c3, $c4, $c5, $a0_64 @@ -53,7 +95,7 @@ define dso_local signext i32 @merge_ptr_addr(i1 %cond1, ptr addrspace(200) %ptr, ; MIR-NEXT: renamable $at = ANDi killed renamable $at, 1 ; MIR-NEXT: renamable $c1 = LOADCAP $zero_64, 0, killed renamable $c3 :: (load (s128) from %ir.ptr, addrspace 200) ; MIR-NEXT: SYNC 0 - ; MIR-NEXT: dead early-clobber renamable $c2 = CAP_ATOMIC_CMP_SWAP_CAP_POSTRA killed renamable $c1, killed renamable $c5, killed renamable $c4, implicit-def dead early-clobber renamable $at_64 :: (load store monotonic monotonic (s128) on %ir.ld2, addrspace 200), (load store monotonic monotonic (s128) on %ir.ld1, addrspace 200) + ; MIR-NEXT: dead early-clobber renamable $c2 = CAP_ATOMIC_CMP_SWAP_CAP_ADDR_POSTRA killed renamable $c1, killed renamable $c5, killed renamable $c4, implicit-def dead early-clobber renamable $at_64 ; MIR-NEXT: $v0_64 = DADDiu $zero_64, 0 ; MIR-NEXT: CapRetPseudo implicit killed $v0_64 entry: @@ -74,6 +116,26 @@ end: } define dso_local signext i32 @merge_ptr_exact(i1 %cond1, ptr addrspace(200) %ptr, ptr addrspace(200) %newval, ptr addrspace(200) %cmpval) { +; CHECK-LABEL: merge_ptr_exact: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sll $1, $4, 0 +; CHECK-NEXT: andi $1, $1, 1 +; CHECK-NEXT: clc $c1, $zero, 0($c3) +; CHECK-NEXT: sync +; CHECK-NEXT: .LBB2_1: # %entry +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cllc $c2, $c1 +; CHECK-NEXT: cexeq $1, $c2, $c5 +; CHECK-NEXT: beqz $1, .LBB2_3 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: # in Loop: Header=BB2_1 Depth=1 +; CHECK-NEXT: cscc $1, $c4, $c1 +; CHECK-NEXT: beqz $1, .LBB2_1 +; CHECK-NEXT: nop +; CHECK-NEXT: .LBB2_3: # %entry +; CHECK-NEXT: cjr $c17 +; CHECK-NEXT: daddiu $2, $zero, 0 ; MIR-LABEL: name: merge_ptr_exact ; MIR: bb.0.entry: ; MIR-NEXT: liveins: $c3, $c4, $c5, $a0_64 @@ -82,7 +144,7 @@ define dso_local signext i32 @merge_ptr_exact(i1 %cond1, ptr addrspace(200) %ptr ; MIR-NEXT: renamable $at = ANDi killed renamable $at, 1 ; MIR-NEXT: renamable $c1 = LOADCAP $zero_64, 0, killed renamable $c3 :: (load (s128) from %ir.ptr, addrspace 200) ; MIR-NEXT: SYNC 0 - ; MIR-NEXT: dead early-clobber renamable $c2 = CAP_ATOMIC_CMP_SWAP_CAP_POSTRA killed renamable $c1, killed renamable $c5, killed renamable $c4, implicit-def dead early-clobber renamable $at_64 :: (load store monotonic monotonic exact (s128) on %ir.ld2, addrspace 200), (load store monotonic monotonic exact (s128) on %ir.ld1, addrspace 200) + ; MIR-NEXT: dead early-clobber renamable $c2 = CAP_ATOMIC_CMP_SWAP_CAP_EXACT_POSTRA killed renamable $c1, killed renamable $c5, killed renamable $c4, implicit-def dead early-clobber renamable $at_64 ; MIR-NEXT: $v0_64 = DADDiu $zero_64, 0 ; MIR-NEXT: CapRetPseudo implicit killed $v0_64 entry: @@ -102,17 +164,74 @@ end: ret i32 0 } -; FIXME: these two branches should not be merged! define dso_local signext i32 @merge_ptr_mismatch_exact_flag(i1 %cond1, ptr addrspace(200) %ptr, ptr addrspace(200) %newval, ptr addrspace(200) %cmpval) { +; CHECK-LABEL: merge_ptr_mismatch_exact_flag: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sll $1, $4, 0 +; CHECK-NEXT: andi $1, $1, 1 +; CHECK-NEXT: beqz $1, .LBB3_5 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: clc $c1, $zero, 0($c3) +; CHECK-NEXT: sync +; CHECK-NEXT: .LBB3_2: # %if.then +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cllc $c2, $c1 +; CHECK-NEXT: cexeq $1, $c2, $c5 +; CHECK-NEXT: beqz $1, .LBB3_4 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.3: # %if.then +; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 +; CHECK-NEXT: cscc $1, $c4, $c1 +; CHECK-NEXT: beqz $1, .LBB3_2 +; CHECK-NEXT: nop +; CHECK-NEXT: .LBB3_4: # %if.then +; CHECK-NEXT: cjr $c17 +; CHECK-NEXT: daddiu $2, $zero, 0 +; CHECK-NEXT: .LBB3_5: # %if.else +; CHECK-NEXT: clc $c1, $zero, 0($c3) +; CHECK-NEXT: sync +; CHECK-NEXT: .LBB3_6: # %if.else +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cllc $c2, $c1 +; CHECK-NEXT: ceq $1, $c2, $c5 +; CHECK-NEXT: beqz $1, .LBB3_8 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.7: # %if.else +; CHECK-NEXT: # in Loop: Header=BB3_6 Depth=1 +; CHECK-NEXT: cscc $1, $c4, $c1 +; CHECK-NEXT: beqz $1, .LBB3_6 +; CHECK-NEXT: nop +; CHECK-NEXT: .LBB3_8: # %if.else +; CHECK-NEXT: cjr $c17 +; CHECK-NEXT: daddiu $2, $zero, 0 ; MIR-LABEL: name: merge_ptr_mismatch_exact_flag ; MIR: bb.0.entry: + ; MIR-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; MIR-NEXT: liveins: $c3, $c4, $c5, $a0_64 ; MIR-NEXT: {{ $}} ; MIR-NEXT: renamable $at = SLL renamable $a0, 0, implicit killed $a0_64 ; MIR-NEXT: renamable $at = ANDi killed renamable $at, 1 + ; MIR-NEXT: BEQ killed renamable $at, $zero, %bb.2, implicit-def $at + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: bb.1.if.then: + ; MIR-NEXT: successors: %bb.3(0x80000000) + ; MIR-NEXT: liveins: $c3, $c4, $c5 + ; MIR-NEXT: {{ $}} ; MIR-NEXT: renamable $c1 = LOADCAP $zero_64, 0, killed renamable $c3 :: (load (s128) from %ir.ptr, addrspace 200) ; MIR-NEXT: SYNC 0 - ; MIR-NEXT: dead early-clobber renamable $c2 = CAP_ATOMIC_CMP_SWAP_CAP_POSTRA killed renamable $c1, killed renamable $c5, killed renamable $c4, implicit-def dead early-clobber renamable $at_64 :: (load store monotonic monotonic (s128) on %ir.ld2, addrspace 200), (load store monotonic monotonic exact (s128) on %ir.ld1, addrspace 200) + ; MIR-NEXT: dead early-clobber renamable $c2 = CAP_ATOMIC_CMP_SWAP_CAP_EXACT_POSTRA killed renamable $c1, killed renamable $c5, killed renamable $c4, implicit-def dead early-clobber renamable $at_64 + ; MIR-NEXT: B %bb.3, implicit-def $at + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: bb.2.if.else: + ; MIR-NEXT: successors: %bb.3(0x80000000) + ; MIR-NEXT: liveins: $c3, $c4, $c5 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: renamable $c1 = LOADCAP $zero_64, 0, killed renamable $c3 :: (load (s128) from %ir.ptr, addrspace 200) + ; MIR-NEXT: SYNC 0 + ; MIR-NEXT: dead early-clobber renamable $c2 = CAP_ATOMIC_CMP_SWAP_CAP_ADDR_POSTRA killed renamable $c1, killed renamable $c5, killed renamable $c4, implicit-def dead early-clobber renamable $at_64 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: bb.3.end: ; MIR-NEXT: $v0_64 = DADDiu $zero_64, 0 ; MIR-NEXT: CapRetPseudo implicit killed $v0_64 entry: diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32/cmpxchg-exact-branch-folder.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32/cmpxchg-exact-branch-folder.ll index cc3940f55040..88637e638a72 100644 --- a/llvm/test/CodeGen/CHERI-Generic/RISCV32/cmpxchg-exact-branch-folder.ll +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32/cmpxchg-exact-branch-folder.ll @@ -4,16 +4,32 @@ ; CHERI-GENERIC-UTC: llc ; CHERI-GENERIC-UTC: mir ; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+xcheri,+cap-mode,+f -mattr=+a < %s --stop-after=branch-folder | FileCheck %s --check-prefixes=MIR -; RUN: not --crash llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+xcheri,+cap-mode,+f -mattr=+a < %s +; RUN: cat %s | llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+xcheri,+cap-mode,+f -mattr=+a | FileCheck %s ; REQUIRES: asserts ; The branch-folder MIR pass will merge the two blocks inside these functions but ; since the base pointer is distinct it will have two MachineMemOperands. ; The cmpxchg exact logic stored the exact flag in the MachineMemOperand and ; previously assumed there would only ever be one operand, so this test ensures -; we can handle the merged logic. +; we can handle the merged logic by adding separate pseudo instructions (which +; ensures that the branches with different comparisons can no longer be merged). define dso_local signext i32 @merge_i32(i1 %cond1, ptr addrspace(200) %ptr, i32 %newval, i32 %cmpval) { +; CHECK-LABEL: merge_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: clc ca0, 0(ca1) +; CHECK-NEXT: .LBB0_1: # %entry +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: clr.w a1, (ca0) +; CHECK-NEXT: bne a1, a3, .LBB0_3 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: csc.w.rl a4, a2, (ca0) +; CHECK-NEXT: bnez a4, .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %entry +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: cret ; MIR-LABEL: name: merge_i32 ; MIR: bb.0.entry: ; MIR-NEXT: liveins: $c11, $x10, $x12, $x13 @@ -41,13 +57,28 @@ end: } define dso_local signext i32 @merge_ptr_addr(i1 %cond1, ptr addrspace(200) %ptr, ptr addrspace(200) %newval, ptr addrspace(200) %cmpval) { +; CHECK-LABEL: merge_ptr_addr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: clc ca0, 0(ca1) +; CHECK-NEXT: .LBB1_1: # %entry +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: clr.c.rl ca1, (ca0) +; CHECK-NEXT: bne a1, a3, .LBB1_3 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: csc.c a4, ca2, (ca0) +; CHECK-NEXT: bnez a4, .LBB1_1 +; CHECK-NEXT: .LBB1_3: # %entry +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: cret ; MIR-LABEL: name: merge_ptr_addr ; MIR: bb.0.entry: ; MIR-NEXT: liveins: $c11, $c12, $c13, $x10 ; MIR-NEXT: {{ $}} ; MIR-NEXT: renamable $x10 = ANDI killed renamable $x10, 1 ; MIR-NEXT: renamable $c10 = CLC_64 killed renamable $c11, 0 :: (load (s64) from %ir.ptr, align 16, addrspace 200) - ; MIR-NEXT: dead early-clobber renamable $c11, dead early-clobber renamable $x14 = PseudoCheriCmpXchgCap killed renamable $c10, killed renamable $c13, killed renamable $c12, 5 :: (load store release monotonic (s64) on %ir.ld2, addrspace 200), (load store release monotonic (s64) on %ir.ld1, addrspace 200) + ; MIR-NEXT: dead early-clobber renamable $c11, dead early-clobber renamable $x14 = PseudoCheriCmpXchgCapAddr killed renamable $c10, killed renamable $c13, killed renamable $c12, 5 :: (load store release monotonic (s64) on %ir.ld2, addrspace 200), (load store release monotonic (s64) on %ir.ld1, addrspace 200) ; MIR-NEXT: $x10 = COPY $x0 ; MIR-NEXT: PseudoCRET implicit $x10 entry: @@ -68,13 +99,29 @@ end: } define dso_local signext i32 @merge_ptr_exact(i1 %cond1, ptr addrspace(200) %ptr, ptr addrspace(200) %newval, ptr addrspace(200) %cmpval) { +; CHECK-LABEL: merge_ptr_exact: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: clc ca0, 0(ca1) +; CHECK-NEXT: .LBB2_1: # %entry +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: clr.c.rl ca1, (ca0) +; CHECK-NEXT: cseqx a4, ca1, ca3 +; CHECK-NEXT: beqz a4, .LBB2_3 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: # in Loop: Header=BB2_1 Depth=1 +; CHECK-NEXT: csc.c a4, ca2, (ca0) +; CHECK-NEXT: bnez a4, .LBB2_1 +; CHECK-NEXT: .LBB2_3: # %entry +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: cret ; MIR-LABEL: name: merge_ptr_exact ; MIR: bb.0.entry: ; MIR-NEXT: liveins: $c11, $c12, $c13, $x10 ; MIR-NEXT: {{ $}} ; MIR-NEXT: renamable $x10 = ANDI killed renamable $x10, 1 ; MIR-NEXT: renamable $c10 = CLC_64 killed renamable $c11, 0 :: (load (s64) from %ir.ptr, align 16, addrspace 200) - ; MIR-NEXT: dead early-clobber renamable $c11, dead early-clobber renamable $x14 = PseudoCheriCmpXchgCap killed renamable $c10, killed renamable $c13, killed renamable $c12, 5 :: (load store release monotonic exact (s64) on %ir.ld2, addrspace 200), (load store release monotonic exact (s64) on %ir.ld1, addrspace 200) + ; MIR-NEXT: dead early-clobber renamable $c11, dead early-clobber renamable $x14 = PseudoCheriCmpXchgCapExact killed renamable $c10, killed renamable $c13, killed renamable $c12, 5 :: (load store release monotonic exact (s64) on %ir.ld2, addrspace 200), (load store release monotonic exact (s64) on %ir.ld1, addrspace 200) ; MIR-NEXT: $x10 = COPY $x0 ; MIR-NEXT: PseudoCRET implicit $x10 entry: @@ -94,15 +141,62 @@ end: ret i32 0 } -; FIXME: these two branches should not be merged! define dso_local signext i32 @merge_ptr_mismatch_exact_flag(i1 %cond1, ptr addrspace(200) %ptr, ptr addrspace(200) %newval, ptr addrspace(200) %cmpval) { +; CHECK-LABEL: merge_ptr_mismatch_exact_flag: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: beqz a0, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: clc ca0, 0(ca1) +; CHECK-NEXT: .LBB3_3: # %if.then +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: clr.c.rl ca1, (ca0) +; CHECK-NEXT: cseqx a4, ca1, ca3 +; CHECK-NEXT: beqz a4, .LBB3_5 +; CHECK-NEXT: # %bb.4: # %if.then +; CHECK-NEXT: # in Loop: Header=BB3_3 Depth=1 +; CHECK-NEXT: csc.c a4, ca2, (ca0) +; CHECK-NEXT: bnez a4, .LBB3_3 +; CHECK-NEXT: .LBB3_5: # %if.then +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: cret +; CHECK-NEXT: .LBB3_2: # %if.else +; CHECK-NEXT: clc ca0, 0(ca1) +; CHECK-NEXT: .LBB3_6: # %if.else +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: clr.c.rl ca1, (ca0) +; CHECK-NEXT: bne a1, a3, .LBB3_8 +; CHECK-NEXT: # %bb.7: # %if.else +; CHECK-NEXT: # in Loop: Header=BB3_6 Depth=1 +; CHECK-NEXT: csc.c a4, ca2, (ca0) +; CHECK-NEXT: bnez a4, .LBB3_6 +; CHECK-NEXT: .LBB3_8: # %if.else +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: cret ; MIR-LABEL: name: merge_ptr_mismatch_exact_flag ; MIR: bb.0.entry: + ; MIR-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; MIR-NEXT: liveins: $c11, $c12, $c13, $x10 ; MIR-NEXT: {{ $}} ; MIR-NEXT: renamable $x10 = ANDI killed renamable $x10, 1 + ; MIR-NEXT: BEQ killed renamable $x10, $x0, %bb.2 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: bb.1.if.then: + ; MIR-NEXT: successors: %bb.3(0x80000000) + ; MIR-NEXT: liveins: $c11, $c12, $c13 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: renamable $c10 = CLC_64 killed renamable $c11, 0 :: (load (s64) from %ir.ptr, align 16, addrspace 200) + ; MIR-NEXT: dead early-clobber renamable $c11, dead early-clobber renamable $x14 = PseudoCheriCmpXchgCapExact killed renamable $c10, killed renamable $c13, killed renamable $c12, 5 :: (load store release monotonic exact (s64) on %ir.ld1, addrspace 200) + ; MIR-NEXT: PseudoCBR %bb.3 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: bb.2.if.else: + ; MIR-NEXT: successors: %bb.3(0x80000000) + ; MIR-NEXT: liveins: $c11, $c12, $c13 + ; MIR-NEXT: {{ $}} ; MIR-NEXT: renamable $c10 = CLC_64 killed renamable $c11, 0 :: (load (s64) from %ir.ptr, align 16, addrspace 200) - ; MIR-NEXT: dead early-clobber renamable $c11, dead early-clobber renamable $x14 = PseudoCheriCmpXchgCap killed renamable $c10, killed renamable $c13, killed renamable $c12, 5 :: (load store release monotonic (s64) on %ir.ld2, addrspace 200), (load store release monotonic exact (s64) on %ir.ld1, addrspace 200) + ; MIR-NEXT: dead early-clobber renamable $c11, dead early-clobber renamable $x14 = PseudoCheriCmpXchgCapAddr killed renamable $c10, killed renamable $c13, killed renamable $c12, 5 :: (load store release monotonic (s64) on %ir.ld2, addrspace 200) + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: bb.3.end: ; MIR-NEXT: $x10 = COPY $x0 ; MIR-NEXT: PseudoCRET implicit $x10 entry: diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64/cmpxchg-exact-branch-folder.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64/cmpxchg-exact-branch-folder.ll index d6bf459f2eb8..3d407267ac37 100644 --- a/llvm/test/CodeGen/CHERI-Generic/RISCV64/cmpxchg-exact-branch-folder.ll +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64/cmpxchg-exact-branch-folder.ll @@ -4,16 +4,33 @@ ; CHERI-GENERIC-UTC: llc ; CHERI-GENERIC-UTC: mir ; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+xcheri,+cap-mode,+f,+d -mattr=+a < %s --stop-after=branch-folder | FileCheck %s --check-prefixes=MIR -; RUN: not --crash llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+xcheri,+cap-mode,+f,+d -mattr=+a < %s +; RUN: cat %s | llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+xcheri,+cap-mode,+f,+d -mattr=+a | FileCheck %s ; REQUIRES: asserts ; The branch-folder MIR pass will merge the two blocks inside these functions but ; since the base pointer is distinct it will have two MachineMemOperands. ; The cmpxchg exact logic stored the exact flag in the MachineMemOperand and ; previously assumed there would only ever be one operand, so this test ensures -; we can handle the merged logic. +; we can handle the merged logic by adding separate pseudo instructions (which +; ensures that the branches with different comparisons can no longer be merged). define dso_local signext i32 @merge_i32(i1 %cond1, ptr addrspace(200) %ptr, i32 %newval, i32 %cmpval) { +; CHECK-LABEL: merge_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: clc ca0, 0(ca1) +; CHECK-NEXT: sext.w a1, a3 +; CHECK-NEXT: .LBB0_1: # %entry +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: clr.w a3, (ca0) +; CHECK-NEXT: bne a3, a1, .LBB0_3 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: csc.w.rl a4, a2, (ca0) +; CHECK-NEXT: bnez a4, .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %entry +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: cret ; MIR-LABEL: name: merge_i32 ; MIR: bb.0.entry: ; MIR-NEXT: liveins: $c11, $x10, $x12, $x13 @@ -42,13 +59,28 @@ end: } define dso_local signext i32 @merge_ptr_addr(i1 %cond1, ptr addrspace(200) %ptr, ptr addrspace(200) %newval, ptr addrspace(200) %cmpval) { +; CHECK-LABEL: merge_ptr_addr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: clc ca0, 0(ca1) +; CHECK-NEXT: .LBB1_1: # %entry +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: clr.c.rl ca1, (ca0) +; CHECK-NEXT: bne a1, a3, .LBB1_3 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: csc.c a4, ca2, (ca0) +; CHECK-NEXT: bnez a4, .LBB1_1 +; CHECK-NEXT: .LBB1_3: # %entry +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: cret ; MIR-LABEL: name: merge_ptr_addr ; MIR: bb.0.entry: ; MIR-NEXT: liveins: $c11, $c12, $c13, $x10 ; MIR-NEXT: {{ $}} ; MIR-NEXT: renamable $x10 = ANDI killed renamable $x10, 1 ; MIR-NEXT: renamable $c10 = CLC_128 killed renamable $c11, 0 :: (load (s128) from %ir.ptr, addrspace 200) - ; MIR-NEXT: dead early-clobber renamable $c11, dead early-clobber renamable $x14 = PseudoCheriCmpXchgCap killed renamable $c10, killed renamable $c13, killed renamable $c12, 5 :: (load store release monotonic (s128) on %ir.ld2, addrspace 200), (load store release monotonic (s128) on %ir.ld1, addrspace 200) + ; MIR-NEXT: dead early-clobber renamable $c11, dead early-clobber renamable $x14 = PseudoCheriCmpXchgCapAddr killed renamable $c10, killed renamable $c13, killed renamable $c12, 5 :: (load store release monotonic (s128) on %ir.ld2, addrspace 200), (load store release monotonic (s128) on %ir.ld1, addrspace 200) ; MIR-NEXT: $x10 = COPY $x0 ; MIR-NEXT: PseudoCRET implicit $x10 entry: @@ -69,13 +101,29 @@ end: } define dso_local signext i32 @merge_ptr_exact(i1 %cond1, ptr addrspace(200) %ptr, ptr addrspace(200) %newval, ptr addrspace(200) %cmpval) { +; CHECK-LABEL: merge_ptr_exact: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: clc ca0, 0(ca1) +; CHECK-NEXT: .LBB2_1: # %entry +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: clr.c.rl ca1, (ca0) +; CHECK-NEXT: cseqx a4, ca1, ca3 +; CHECK-NEXT: beqz a4, .LBB2_3 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: # in Loop: Header=BB2_1 Depth=1 +; CHECK-NEXT: csc.c a4, ca2, (ca0) +; CHECK-NEXT: bnez a4, .LBB2_1 +; CHECK-NEXT: .LBB2_3: # %entry +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: cret ; MIR-LABEL: name: merge_ptr_exact ; MIR: bb.0.entry: ; MIR-NEXT: liveins: $c11, $c12, $c13, $x10 ; MIR-NEXT: {{ $}} ; MIR-NEXT: renamable $x10 = ANDI killed renamable $x10, 1 ; MIR-NEXT: renamable $c10 = CLC_128 killed renamable $c11, 0 :: (load (s128) from %ir.ptr, addrspace 200) - ; MIR-NEXT: dead early-clobber renamable $c11, dead early-clobber renamable $x14 = PseudoCheriCmpXchgCap killed renamable $c10, killed renamable $c13, killed renamable $c12, 5 :: (load store release monotonic exact (s128) on %ir.ld2, addrspace 200), (load store release monotonic exact (s128) on %ir.ld1, addrspace 200) + ; MIR-NEXT: dead early-clobber renamable $c11, dead early-clobber renamable $x14 = PseudoCheriCmpXchgCapExact killed renamable $c10, killed renamable $c13, killed renamable $c12, 5 :: (load store release monotonic exact (s128) on %ir.ld2, addrspace 200), (load store release monotonic exact (s128) on %ir.ld1, addrspace 200) ; MIR-NEXT: $x10 = COPY $x0 ; MIR-NEXT: PseudoCRET implicit $x10 entry: @@ -95,15 +143,62 @@ end: ret i32 0 } -; FIXME: these two branches should not be merged! define dso_local signext i32 @merge_ptr_mismatch_exact_flag(i1 %cond1, ptr addrspace(200) %ptr, ptr addrspace(200) %newval, ptr addrspace(200) %cmpval) { +; CHECK-LABEL: merge_ptr_mismatch_exact_flag: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: beqz a0, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: clc ca0, 0(ca1) +; CHECK-NEXT: .LBB3_3: # %if.then +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: clr.c.rl ca1, (ca0) +; CHECK-NEXT: cseqx a4, ca1, ca3 +; CHECK-NEXT: beqz a4, .LBB3_5 +; CHECK-NEXT: # %bb.4: # %if.then +; CHECK-NEXT: # in Loop: Header=BB3_3 Depth=1 +; CHECK-NEXT: csc.c a4, ca2, (ca0) +; CHECK-NEXT: bnez a4, .LBB3_3 +; CHECK-NEXT: .LBB3_5: # %if.then +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: cret +; CHECK-NEXT: .LBB3_2: # %if.else +; CHECK-NEXT: clc ca0, 0(ca1) +; CHECK-NEXT: .LBB3_6: # %if.else +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: clr.c.rl ca1, (ca0) +; CHECK-NEXT: bne a1, a3, .LBB3_8 +; CHECK-NEXT: # %bb.7: # %if.else +; CHECK-NEXT: # in Loop: Header=BB3_6 Depth=1 +; CHECK-NEXT: csc.c a4, ca2, (ca0) +; CHECK-NEXT: bnez a4, .LBB3_6 +; CHECK-NEXT: .LBB3_8: # %if.else +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: cret ; MIR-LABEL: name: merge_ptr_mismatch_exact_flag ; MIR: bb.0.entry: + ; MIR-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; MIR-NEXT: liveins: $c11, $c12, $c13, $x10 ; MIR-NEXT: {{ $}} ; MIR-NEXT: renamable $x10 = ANDI killed renamable $x10, 1 + ; MIR-NEXT: BEQ killed renamable $x10, $x0, %bb.2 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: bb.1.if.then: + ; MIR-NEXT: successors: %bb.3(0x80000000) + ; MIR-NEXT: liveins: $c11, $c12, $c13 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: renamable $c10 = CLC_128 killed renamable $c11, 0 :: (load (s128) from %ir.ptr, addrspace 200) + ; MIR-NEXT: dead early-clobber renamable $c11, dead early-clobber renamable $x14 = PseudoCheriCmpXchgCapExact killed renamable $c10, killed renamable $c13, killed renamable $c12, 5 :: (load store release monotonic exact (s128) on %ir.ld1, addrspace 200) + ; MIR-NEXT: PseudoCBR %bb.3 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: bb.2.if.else: + ; MIR-NEXT: successors: %bb.3(0x80000000) + ; MIR-NEXT: liveins: $c11, $c12, $c13 + ; MIR-NEXT: {{ $}} ; MIR-NEXT: renamable $c10 = CLC_128 killed renamable $c11, 0 :: (load (s128) from %ir.ptr, addrspace 200) - ; MIR-NEXT: dead early-clobber renamable $c11, dead early-clobber renamable $x14 = PseudoCheriCmpXchgCap killed renamable $c10, killed renamable $c13, killed renamable $c12, 5 :: (load store release monotonic (s128) on %ir.ld2, addrspace 200), (load store release monotonic exact (s128) on %ir.ld1, addrspace 200) + ; MIR-NEXT: dead early-clobber renamable $c11, dead early-clobber renamable $x14 = PseudoCheriCmpXchgCapAddr killed renamable $c10, killed renamable $c13, killed renamable $c12, 5 :: (load store release monotonic (s128) on %ir.ld2, addrspace 200) + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: bb.3.end: ; MIR-NEXT: $x10 = COPY $x0 ; MIR-NEXT: PseudoCRET implicit $x10 entry: