From 6dfc55ba53b71072d953fa60d71388bb8c4dbc12 Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Mon, 24 Feb 2020 15:45:43 +0100 Subject: [PATCH 01/16] [AVR] Use correct register class for mul instructions A number of multiplication instructions (muls, mulsu, fmul, fmuls, fmulsu) had the wrong register class for an operand. This resulted in the wrong register being used for the instruction. Example: target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8" target triple = "avr-atmel-none" define i16 @sliceAppend(i16, i16, i16, i16, i16, i16) addrspace(1) { %d = mul i16 %0, %5 ret i16 %d } The first instruction would be muls r24, r31 before this patch. The r31 should have been r15 if you look at the intermediate forms during instruction selection / register allocation, but the generated instruction uses r31. After this patch, an extra movw is inserted to get %5 in range for muls. To make sure this bug is fixed everywhere, I checked all instructions and found that most multiplication instructions suffered from this bug, which I have fixed with this patch. No other instructions appear to be affected. Differential Revision: https://reviews.llvm.org/D74281 --- llvm/lib/Target/AVR/AVRInstrInfo.td | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index acf991dcfbb1fa..0f4672684cfbea 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -555,7 +555,7 @@ Defs = [R1, R0, SREG] in def MULSRdRr : FMUL2RdRr<0, (outs), - (ins GPR8:$lhs, GPR8:$rhs), + (ins LD8:$lhs, LD8:$rhs), "muls\t$lhs, $rhs", []>, Requires<[SupportsMultiplication]>; @@ -563,28 +563,28 @@ Defs = [R1, R0, SREG] in def MULSURdRr : FMUL2RdRr<1, (outs), - (ins GPR8:$lhs, GPR8:$rhs), + (ins LD8lo:$lhs, LD8lo:$rhs), "mulsu\t$lhs, $rhs", []>, Requires<[SupportsMultiplication]>; def FMUL : FFMULRdRr<0b01, (outs), - (ins GPR8:$lhs, GPR8:$rhs), + (ins LD8lo:$lhs, LD8lo:$rhs), "fmul\t$lhs, $rhs", []>, Requires<[SupportsMultiplication]>; def FMULS : FFMULRdRr<0b10, (outs), - (ins GPR8:$lhs, GPR8:$rhs), + (ins LD8lo:$lhs, LD8lo:$rhs), "fmuls\t$lhs, $rhs", []>, Requires<[SupportsMultiplication]>; def FMULSU : FFMULRdRr<0b11, (outs), - (ins GPR8:$lhs, GPR8:$rhs), + (ins LD8lo:$lhs, LD8lo:$rhs), "fmulsu\t$lhs, $rhs", []>, Requires<[SupportsMultiplication]>; From 96075fc433d0e5dd22ae23f14c34d0ea0dc6cd3f Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Fri, 7 Feb 2020 13:06:40 +0100 Subject: [PATCH 02/16] [AVR] Don't adjust addresses by 2 for absolute values Adjusting by 2 breaks DWARF output. With this fix, programs start to compile and produce valid DWARF output. Differential Revision: https://reviews.llvm.org/D74213 --- llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp | 14 ++++++++++++-- llvm/test/MC/AVR/relocations-abs.s | 8 ++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 llvm/test/MC/AVR/relocations-abs.s diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp index e92b16c8ee9d61..1a741c9077507d 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp @@ -244,8 +244,18 @@ void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup, // To handle both cases, we simply un-adjust the temporary label // case so it acts like all other labels. if (const MCSymbolRefExpr *A = Target.getSymA()) { - if (A->getSymbol().isTemporary()) - Value += 2; + if (A->getSymbol().isTemporary()) { + switch (Kind) { + case FK_Data_1: + case FK_Data_2: + case FK_Data_4: + case FK_Data_8: + // Don't shift value for absolute addresses. + break; + default: + Value += 2; + } + } } switch (Kind) { diff --git a/llvm/test/MC/AVR/relocations-abs.s b/llvm/test/MC/AVR/relocations-abs.s new file mode 100644 index 00000000000000..1055ed51310a09 --- /dev/null +++ b/llvm/test/MC/AVR/relocations-abs.s @@ -0,0 +1,8 @@ +; RUN: llvm-mc -filetype=obj -triple=avr %s | llvm-objdump -dr - | FileCheck %s + +; CHECK: bar: +; CHECK-NEXT: 00 00 nop +; CHECK-NEXT: R_AVR_16 .text+0x2 +bar: + .short 1f +1: From 0ed0823fe6085bdd8e4211c63e4d2d4dc6a4c51b Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Tue, 3 Mar 2020 16:52:20 +0800 Subject: [PATCH 03/16] [AVR] Fix incorrect register state for LDRdPtr Summary: LDRdPtr expanded from LDWRdPtr shouldn't define its second operand(SrcReg). The second operand is its source register. Add -verify-machineinstrs into command line of testcases can trigger this error. Reviewers: dylanmckay Reviewed By: dylanmckay Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D75437 --- llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | 4 ++-- llvm/test/CodeGen/AVR/PR37143.ll | 2 +- llvm/test/CodeGen/AVR/brind.ll | 2 +- llvm/test/CodeGen/AVR/load.ll | 2 +- llvm/test/CodeGen/AVR/pseudo/LDWRdPtr.mir | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp index f466c5c053ad54..1c6a5046456e66 100644 --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -597,8 +597,8 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { // Load low byte. auto MIBLO = buildMI(MBB, MBBI, OpLo) - .addReg(CurDstLoReg, RegState::Define) - .addReg(SrcReg, RegState::Define); + .addReg(CurDstLoReg, RegState::Define) + .addReg(SrcReg); // Push low byte onto stack if necessary. if (TmpReg) diff --git a/llvm/test/CodeGen/AVR/PR37143.ll b/llvm/test/CodeGen/AVR/PR37143.ll index 72f4a2fd3722c3..c7cabd3cd0875d 100644 --- a/llvm/test/CodeGen/AVR/PR37143.ll +++ b/llvm/test/CodeGen/AVR/PR37143.ll @@ -1,4 +1,4 @@ -; RUN: llc -mattr=avr6,sram < %s -march=avr | FileCheck %s +; RUN: llc -mattr=avr6,sram < %s -march=avr -verify-machineinstrs | FileCheck %s ; CHECK: ld {{r[0-9]+}}, [[PTR:[XYZ]]] ; CHECK: ldd {{r[0-9]+}}, [[PTR]]+1 diff --git a/llvm/test/CodeGen/AVR/brind.ll b/llvm/test/CodeGen/AVR/brind.ll index ec8262e84a952d..4eea966062db79 100644 --- a/llvm/test/CodeGen/AVR/brind.ll +++ b/llvm/test/CodeGen/AVR/brind.ll @@ -1,4 +1,4 @@ -; RUN: llc -mattr=sram,eijmpcall < %s -march=avr | FileCheck %s +; RUN: llc -mattr=sram,eijmpcall < %s -march=avr -verify-machineinstrs | FileCheck %s @brind.k = private unnamed_addr constant [2 x i8*] [i8* blockaddress(@brind, %return), i8* blockaddress(@brind, %b)], align 1 diff --git a/llvm/test/CodeGen/AVR/load.ll b/llvm/test/CodeGen/AVR/load.ll index dbadacfd5e0de1..53748b3b100b92 100644 --- a/llvm/test/CodeGen/AVR/load.ll +++ b/llvm/test/CodeGen/AVR/load.ll @@ -1,4 +1,4 @@ -; RUN: llc -mattr=avr6,sram < %s -march=avr | FileCheck %s +; RUN: llc -mattr=avr6,sram < %s -march=avr -verify-machineinstrs | FileCheck %s define i8 @load8(i8* %x) { ; CHECK-LABEL: load8: diff --git a/llvm/test/CodeGen/AVR/pseudo/LDWRdPtr.mir b/llvm/test/CodeGen/AVR/pseudo/LDWRdPtr.mir index 5bd4bf2d431c8b..2343d0df49274a 100644 --- a/llvm/test/CodeGen/AVR/pseudo/LDWRdPtr.mir +++ b/llvm/test/CodeGen/AVR/pseudo/LDWRdPtr.mir @@ -17,7 +17,7 @@ body: | ; CHECK-LABEL: test_ldwrdptr - ; CHECK: $r0, $r31r30 = LDRdPtr + ; CHECK: $r0 = LDRdPtr $r31r30 ; CHECK-NEXT: $r1 = LDDRdPtrQ $r31r30, 1 $r1r0 = LDWRdPtr $r31r30 From 1c0ddae73c9acdbb338e938307ec2044a99d07a8 Mon Sep 17 00:00:00 2001 From: Dylan McKay Date: Fri, 13 Mar 2020 00:03:30 +1300 Subject: [PATCH 04/16] [AVR] Fix read of uninitialized variable AVRSubtarget:::ELFArch Found by the LLVM MemorySanitizer tests when switching AVR to a default backend. ELFArch must be initialized before the call to initializeSubtargetDependencies(). The uninitialized read would occur deep within TableGen'd code. --- llvm/lib/Target/AVR/AVRSubtarget.cpp | 4 ++-- llvm/lib/Target/AVR/AVRSubtarget.h | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AVR/AVRSubtarget.cpp b/llvm/lib/Target/AVR/AVRSubtarget.cpp index 6a41036fdd6c89..be5c4c06225b7a 100644 --- a/llvm/lib/Target/AVR/AVRSubtarget.cpp +++ b/llvm/lib/Target/AVR/AVRSubtarget.cpp @@ -29,7 +29,7 @@ namespace llvm { AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const AVRTargetMachine &TM) - : AVRGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(), + : AVRGenSubtargetInfo(TT, CPU, FS), ELFArch(0), InstrInfo(), FrameLowering(), TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)), TSInfo(), // Subtarget features @@ -38,7 +38,7 @@ AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU, m_hasMOVW(false), m_hasLPM(false), m_hasLPMX(false), m_hasELPM(false), m_hasELPMX(false), m_hasSPM(false), m_hasSPMX(false), m_hasDES(false), m_supportsRMW(false), m_supportsMultiplication(false), m_hasBREAK(false), - m_hasTinyEncoding(false), ELFArch(false), m_FeatureSetDummy(false) { + m_hasTinyEncoding(false), m_FeatureSetDummy(false) { // Parse features string. ParseSubtargetFeatures(CPU, FS); } diff --git a/llvm/lib/Target/AVR/AVRSubtarget.h b/llvm/lib/Target/AVR/AVRSubtarget.h index da9289af7c8d72..aa813a15dc0a4d 100644 --- a/llvm/lib/Target/AVR/AVRSubtarget.h +++ b/llvm/lib/Target/AVR/AVRSubtarget.h @@ -81,6 +81,10 @@ class AVRSubtarget : public AVRGenSubtargetInfo { } private: + + /// The ELF e_flags architecture. + unsigned ELFArch; + AVRInstrInfo InstrInfo; AVRFrameLowering FrameLowering; AVRTargetLowering TLInfo; @@ -107,9 +111,6 @@ class AVRSubtarget : public AVRGenSubtargetInfo { bool m_hasBREAK; bool m_hasTinyEncoding; - /// The ELF e_flags architecture. - unsigned ELFArch; - // Dummy member, used by FeatureSet's. We cannot have a SubtargetFeature with // no variable, so we instead bind pseudo features to this variable. bool m_FeatureSetDummy; From 954d0a92205220049442506c8affef0cbdf5e03e Mon Sep 17 00:00:00 2001 From: Dylan McKay Date: Fri, 13 Mar 2020 00:51:30 +1300 Subject: [PATCH 05/16] [AVR] Fix reads of uninitialized variables from constructor of AVRSubtarget The initialization order was not correct. These bugs were discovered by valgrind. They appear to work fine in practice but this patch should unblock switching the AVR backend on by default as now a standard AVR llc invocation runs without memory errors. The AVRISelLowering constructor would run before the subtarget boolean fields were initialized to false. Now, the initialization order is correct. --- llvm/lib/Target/AVR/AVRSubtarget.cpp | 9 ++++++--- llvm/lib/Target/AVR/AVRSubtarget.h | 10 +++++----- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AVR/AVRSubtarget.cpp b/llvm/lib/Target/AVR/AVRSubtarget.cpp index be5c4c06225b7a..bd4a3fcb5fcd99 100644 --- a/llvm/lib/Target/AVR/AVRSubtarget.cpp +++ b/llvm/lib/Target/AVR/AVRSubtarget.cpp @@ -29,8 +29,8 @@ namespace llvm { AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const AVRTargetMachine &TM) - : AVRGenSubtargetInfo(TT, CPU, FS), ELFArch(0), InstrInfo(), FrameLowering(), - TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)), TSInfo(), + : AVRGenSubtargetInfo(TT, CPU, FS), + ELFArch(0), // Subtarget features m_hasSRAM(false), m_hasJMPCALL(false), m_hasIJMPCALL(false), @@ -38,7 +38,10 @@ AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU, m_hasMOVW(false), m_hasLPM(false), m_hasLPMX(false), m_hasELPM(false), m_hasELPMX(false), m_hasSPM(false), m_hasSPMX(false), m_hasDES(false), m_supportsRMW(false), m_supportsMultiplication(false), m_hasBREAK(false), - m_hasTinyEncoding(false), m_FeatureSetDummy(false) { + m_hasTinyEncoding(false), m_FeatureSetDummy(false), + + InstrInfo(), FrameLowering(), + TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)), TSInfo() { // Parse features string. ParseSubtargetFeatures(CPU, FS); } diff --git a/llvm/lib/Target/AVR/AVRSubtarget.h b/llvm/lib/Target/AVR/AVRSubtarget.h index aa813a15dc0a4d..ca4167fcb33599 100644 --- a/llvm/lib/Target/AVR/AVRSubtarget.h +++ b/llvm/lib/Target/AVR/AVRSubtarget.h @@ -85,11 +85,6 @@ class AVRSubtarget : public AVRGenSubtargetInfo { /// The ELF e_flags architecture. unsigned ELFArch; - AVRInstrInfo InstrInfo; - AVRFrameLowering FrameLowering; - AVRTargetLowering TLInfo; - AVRSelectionDAGInfo TSInfo; - // Subtarget feature settings // See AVR.td for details. bool m_hasSRAM; @@ -114,6 +109,11 @@ class AVRSubtarget : public AVRGenSubtargetInfo { // Dummy member, used by FeatureSet's. We cannot have a SubtargetFeature with // no variable, so we instead bind pseudo features to this variable. bool m_FeatureSetDummy; + + AVRInstrInfo InstrInfo; + AVRFrameLowering FrameLowering; + AVRTargetLowering TLInfo; + AVRSelectionDAGInfo TSInfo; }; } // end namespace llvm From cc4286349b4673e436fc30e1287f475fcb56104a Mon Sep 17 00:00:00 2001 From: Dylan McKay Date: Tue, 31 Mar 2020 19:00:18 +1300 Subject: [PATCH 06/16] [AVR] Respect the 'interrupt' function attribute In the past, AVR functions were only lowered with interrupt-specific machine code if the function was defined with the "avr-interrupt" or "avr-signal" calling conventions. This patch modifies the backend so that if the function does not have a special calling convention, but does have an "interrupt" attribute, that function is interpreted as a function with interrupts. This also extracts the "is this function an interrupt" logic from several disparate places in the backend into one AVRMachineFunctionInfo attribute. Bug found by Wilhelm Meier. --- llvm/lib/Target/AVR/AVRFrameLowering.cpp | 16 ++++++---------- llvm/lib/Target/AVR/AVRISelLowering.cpp | 8 +++++--- llvm/lib/Target/AVR/AVRMachineFunctionInfo.h | 17 +++++++++++++++-- llvm/lib/Target/AVR/AVRRegisterInfo.cpp | 13 ++++++++----- llvm/test/CodeGen/AVR/interrupts.ll | 19 +++++++++++++++++++ 5 files changed, 53 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp index e6c48de5a7820a..21dcf93797c301 100644 --- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp +++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp @@ -57,10 +57,11 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF, DebugLoc DL = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc(); const AVRSubtarget &STI = MF.getSubtarget(); const AVRInstrInfo &TII = *STI.getInstrInfo(); + const AVRMachineFunctionInfo *AFI = MF.getInfo(); bool HasFP = hasFP(MF); // Interrupt handlers re-enable interrupts in function entry. - if (CallConv == CallingConv::AVR_INTR) { + if (AFI->isInterruptHandler() && CallConv != CallingConv::AVR_SIGNAL) { BuildMI(MBB, MBBI, DL, TII.get(AVR::BSETs)) .addImm(0x07) .setMIFlag(MachineInstr::FrameSetup); @@ -75,8 +76,7 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF, // Emit special prologue code to save R1, R0 and SREG in interrupt/signal // handlers before saving any other registers. - if (CallConv == CallingConv::AVR_INTR || - CallConv == CallingConv::AVR_SIGNAL) { + if (AFI->isInterruptHandler()) { BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHWRr)) .addReg(AVR::R1R0, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); @@ -100,7 +100,6 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF, } const MachineFrameInfo &MFI = MF.getFrameInfo(); - const AVRMachineFunctionInfo *AFI = MF.getInfo(); unsigned FrameSize = MFI.getStackSize() - AFI->getCalleeSavedFrameSize(); // Skip the callee-saved push instructions. @@ -143,13 +142,11 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF, void AVRFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - CallingConv::ID CallConv = MF.getFunction().getCallingConv(); - bool isHandler = (CallConv == CallingConv::AVR_INTR || - CallConv == CallingConv::AVR_SIGNAL); + const AVRMachineFunctionInfo *AFI = MF.getInfo(); // Early exit if the frame pointer is not needed in this function except for // signal/interrupt handlers where special code generation is required. - if (!hasFP(MF) && !isHandler) { + if (!hasFP(MF) && !AFI->isInterruptHandler()) { return; } @@ -159,14 +156,13 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF, DebugLoc DL = MBBI->getDebugLoc(); const MachineFrameInfo &MFI = MF.getFrameInfo(); - const AVRMachineFunctionInfo *AFI = MF.getInfo(); unsigned FrameSize = MFI.getStackSize() - AFI->getCalleeSavedFrameSize(); const AVRSubtarget &STI = MF.getSubtarget(); const AVRInstrInfo &TII = *STI.getInstrInfo(); // Emit special epilogue code to restore R1, R0 and SREG in interrupt/signal // handlers at the very end of the function, just before reti. - if (isHandler) { + if (AFI->isInterruptHandler()) { BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R0); BuildMI(MBB, MBBI, DL, TII.get(AVR::OUTARr)) .addImm(0x3f) diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index 880688807702d0..2841e3642ef49f 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -1415,10 +1415,12 @@ AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, return Chain; } + const AVRMachineFunctionInfo *AFI = MF.getInfo(); + unsigned RetOpc = - (CallConv == CallingConv::AVR_INTR || CallConv == CallingConv::AVR_SIGNAL) - ? AVRISD::RETI_FLAG - : AVRISD::RET_FLAG; + AFI->isInterruptHandler() + ? AVRISD::RETI_FLAG + : AVRISD::RET_FLAG; RetOps[0] = Chain; // Update chain. diff --git a/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h b/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h index 5226e30491c355..5fa6bb51334876 100644 --- a/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h +++ b/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h @@ -31,6 +31,9 @@ class AVRMachineFunctionInfo : public MachineFunctionInfo { /// used inside the function. bool HasStackArgs; + /// Whether or not the function is an interrupt handler. + bool IsInterruptHandler; + /// Size of the callee-saved register portion of the /// stack frame in bytes. unsigned CalleeSavedFrameSize; @@ -41,11 +44,19 @@ class AVRMachineFunctionInfo : public MachineFunctionInfo { public: AVRMachineFunctionInfo() : HasSpills(false), HasAllocas(false), HasStackArgs(false), - CalleeSavedFrameSize(0), VarArgsFrameIndex(0) {} + IsInterruptHandler(false), CalleeSavedFrameSize(0), + VarArgsFrameIndex(0) {} explicit AVRMachineFunctionInfo(MachineFunction &MF) : HasSpills(false), HasAllocas(false), HasStackArgs(false), - CalleeSavedFrameSize(0), VarArgsFrameIndex(0) {} + CalleeSavedFrameSize(0), VarArgsFrameIndex(0) { + unsigned CallConv = MF.getFunction().getCallingConv(); + + this->IsInterruptHandler = + CallConv == CallingConv::AVR_INTR || + CallConv == CallingConv::AVR_SIGNAL || + MF.getFunction().hasFnAttribute("interrupt"); + } bool getHasSpills() const { return HasSpills; } void setHasSpills(bool B) { HasSpills = B; } @@ -56,6 +67,8 @@ class AVRMachineFunctionInfo : public MachineFunctionInfo { bool getHasStackArgs() const { return HasStackArgs; } void setHasStackArgs(bool B) { HasStackArgs = B; } + bool isInterruptHandler() const { return IsInterruptHandler; } + unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } void setCalleeSavedFrameSize(unsigned Bytes) { CalleeSavedFrameSize = Bytes; } diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp index 8fce05c933bc68..0089f37ac2040a 100644 --- a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp @@ -22,6 +22,7 @@ #include "AVR.h" #include "AVRInstrInfo.h" +#include "AVRMachineFunctionInfo.h" #include "AVRTargetMachine.h" #include "MCTargetDesc/AVRMCTargetDesc.h" @@ -34,19 +35,21 @@ AVRRegisterInfo::AVRRegisterInfo() : AVRGenRegisterInfo(0) {} const uint16_t * AVRRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - CallingConv::ID CC = MF->getFunction().getCallingConv(); + const AVRMachineFunctionInfo *AFI = MF->getInfo(); - return ((CC == CallingConv::AVR_INTR || CC == CallingConv::AVR_SIGNAL) + return AFI->isInterruptHandler() ? CSR_Interrupts_SaveList - : CSR_Normal_SaveList); + : CSR_Normal_SaveList; } const uint32_t * AVRRegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { - return ((CC == CallingConv::AVR_INTR || CC == CallingConv::AVR_SIGNAL) + const AVRMachineFunctionInfo *AFI = MF.getInfo(); + + return AFI->isInterruptHandler() ? CSR_Interrupts_RegMask - : CSR_Normal_RegMask); + : CSR_Normal_RegMask; } BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const { diff --git a/llvm/test/CodeGen/AVR/interrupts.ll b/llvm/test/CodeGen/AVR/interrupts.ll index 5dddf0ea701847..da8a5ce46f465f 100644 --- a/llvm/test/CodeGen/AVR/interrupts.ll +++ b/llvm/test/CodeGen/AVR/interrupts.ll @@ -16,6 +16,22 @@ define avr_intrcc void @interrupt_handler() { ret void } +define void @interrupt_handler_via_ir_attribute() #0 { +; CHECK-LABEL: interrupt_handler_via_ir_attribute: +; CHECK: sei +; CHECK-NEXT: push r0 +; CHECK-NEXT: push r1 +; CHECK-NEXT: in r0, 63 +; CHECK-NEXT: push r0 +; CHECK: clr r0 +; CHECK: pop r0 +; CHECK-NEXT: out 63, r0 +; CHECK-NEXT: pop r1 +; CHECK-NEXT: pop r0 +; CHECK-NEXT: reti + ret void +} + define avr_signalcc void @signal_handler() { ; CHECK-LABEL: signal_handler: ; CHECK-NOT: sei @@ -31,3 +47,6 @@ define avr_signalcc void @signal_handler() { ; CHECK-NEXT: reti ret void } + +attributes #0 = { "interrupt" } +attributes #1 = { "signal" } From 93a3b595d1c96b299f795fd2409a6776e4eb83ca Mon Sep 17 00:00:00 2001 From: Dylan McKay Date: Tue, 31 Mar 2020 19:28:01 +1300 Subject: [PATCH 07/16] [AVR] Generalize the previous interrupt bugfix to signal handlers too --- llvm/lib/Target/AVR/AVRFrameLowering.cpp | 8 ++++---- llvm/lib/Target/AVR/AVRISelLowering.cpp | 2 +- llvm/lib/Target/AVR/AVRMachineFunctionInfo.h | 17 +++++++++++------ llvm/lib/Target/AVR/AVRRegisterInfo.cpp | 4 ++-- llvm/test/CodeGen/AVR/interrupts.ll | 16 ++++++++++++++++ 5 files changed, 34 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp index 21dcf93797c301..3963758187c195 100644 --- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp +++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp @@ -61,7 +61,7 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF, bool HasFP = hasFP(MF); // Interrupt handlers re-enable interrupts in function entry. - if (AFI->isInterruptHandler() && CallConv != CallingConv::AVR_SIGNAL) { + if (AFI->isInterruptHandler()) { BuildMI(MBB, MBBI, DL, TII.get(AVR::BSETs)) .addImm(0x07) .setMIFlag(MachineInstr::FrameSetup); @@ -76,7 +76,7 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF, // Emit special prologue code to save R1, R0 and SREG in interrupt/signal // handlers before saving any other registers. - if (AFI->isInterruptHandler()) { + if (AFI->isInterruptOrSignalHandler()) { BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHWRr)) .addReg(AVR::R1R0, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); @@ -146,7 +146,7 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF, // Early exit if the frame pointer is not needed in this function except for // signal/interrupt handlers where special code generation is required. - if (!hasFP(MF) && !AFI->isInterruptHandler()) { + if (!hasFP(MF) && !AFI->isInterruptOrSignalHandler()) { return; } @@ -162,7 +162,7 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF, // Emit special epilogue code to restore R1, R0 and SREG in interrupt/signal // handlers at the very end of the function, just before reti. - if (AFI->isInterruptHandler()) { + if (AFI->isInterruptOrSignalHandler()) { BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R0); BuildMI(MBB, MBBI, DL, TII.get(AVR::OUTARr)) .addImm(0x3f) diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index 2841e3642ef49f..449e593d8f2483 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -1418,7 +1418,7 @@ AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, const AVRMachineFunctionInfo *AFI = MF.getInfo(); unsigned RetOpc = - AFI->isInterruptHandler() + AFI->isInterruptOrSignalHandler() ? AVRISD::RETI_FLAG : AVRISD::RET_FLAG; diff --git a/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h b/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h index 5fa6bb51334876..5432fac122efb2 100644 --- a/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h +++ b/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h @@ -34,6 +34,9 @@ class AVRMachineFunctionInfo : public MachineFunctionInfo { /// Whether or not the function is an interrupt handler. bool IsInterruptHandler; + /// Whether or not the function is an non-blocking interrupt handler. + bool IsSignalHandler; + /// Size of the callee-saved register portion of the /// stack frame in bytes. unsigned CalleeSavedFrameSize; @@ -44,18 +47,16 @@ class AVRMachineFunctionInfo : public MachineFunctionInfo { public: AVRMachineFunctionInfo() : HasSpills(false), HasAllocas(false), HasStackArgs(false), - IsInterruptHandler(false), CalleeSavedFrameSize(0), - VarArgsFrameIndex(0) {} + IsInterruptHandler(false), IsSignalHandler(false), + CalleeSavedFrameSize(0), VarArgsFrameIndex(0) {} explicit AVRMachineFunctionInfo(MachineFunction &MF) : HasSpills(false), HasAllocas(false), HasStackArgs(false), CalleeSavedFrameSize(0), VarArgsFrameIndex(0) { unsigned CallConv = MF.getFunction().getCallingConv(); - this->IsInterruptHandler = - CallConv == CallingConv::AVR_INTR || - CallConv == CallingConv::AVR_SIGNAL || - MF.getFunction().hasFnAttribute("interrupt"); + this->IsInterruptHandler = CallConv == CallingConv::AVR_INTR || MF.getFunction().hasFnAttribute("interrupt"); + this->IsSignalHandler = CallConv == CallingConv::AVR_SIGNAL || MF.getFunction().hasFnAttribute("signal"); } bool getHasSpills() const { return HasSpills; } @@ -67,7 +68,11 @@ class AVRMachineFunctionInfo : public MachineFunctionInfo { bool getHasStackArgs() const { return HasStackArgs; } void setHasStackArgs(bool B) { HasStackArgs = B; } + /// Checks if the function is some form of interrupt service routine. + bool isInterruptOrSignalHandler() const { return isInterruptHandler() || isSignalHandler(); } + bool isInterruptHandler() const { return IsInterruptHandler; } + bool isSignalHandler() const { return IsSignalHandler; } unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } void setCalleeSavedFrameSize(unsigned Bytes) { CalleeSavedFrameSize = Bytes; } diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp index 0089f37ac2040a..d74c40c03d1539 100644 --- a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp @@ -37,7 +37,7 @@ const uint16_t * AVRRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const AVRMachineFunctionInfo *AFI = MF->getInfo(); - return AFI->isInterruptHandler() + return AFI->isInterruptOrSignalHandler() ? CSR_Interrupts_SaveList : CSR_Normal_SaveList; } @@ -47,7 +47,7 @@ AVRRegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { const AVRMachineFunctionInfo *AFI = MF.getInfo(); - return AFI->isInterruptHandler() + return AFI->isInterruptOrSignalHandler() ? CSR_Interrupts_RegMask : CSR_Normal_RegMask; } diff --git a/llvm/test/CodeGen/AVR/interrupts.ll b/llvm/test/CodeGen/AVR/interrupts.ll index da8a5ce46f465f..b402d867e12b20 100644 --- a/llvm/test/CodeGen/AVR/interrupts.ll +++ b/llvm/test/CodeGen/AVR/interrupts.ll @@ -48,5 +48,21 @@ define avr_signalcc void @signal_handler() { ret void } +define void @signal_handler_via_attribute() #1 { +; CHECK-LABEL: signal_handler_via_attribute: +; CHECK-NOT: sei +; CHECK: push r0 +; CHECK-NEXT: push r1 +; CHECK-NEXT: in r0, 63 +; CHECK-NEXT: push r0 +; CHECK: clr r0 +; CHECK: pop r0 +; CHECK-NEXT: out 63, r0 +; CHECK-NEXT: pop r1 +; CHECK-NEXT: pop r0 +; CHECK-NEXT: reti + ret void +} + attributes #0 = { "interrupt" } attributes #1 = { "signal" } From 65b8b170aef1638fb7d696afc03a07e474f91292 Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Sat, 18 Apr 2020 23:42:48 +0200 Subject: [PATCH 08/16] [AVR] Do not use divmod calls for bigger integers The avr-libc provides *divmodqi4, *divmodhi4, and *divmodsi4 functions, but does not provide a *divmoddi4. Instead it provides regular *divdi3 and *moddi3 functions. Note that avr-libc doesn't support *divti3 or *modti3 for 128-bit integer manipulation. Source: https://github.com/gcc-mirror/gcc/blob/releases/gcc-5.4.0/libgcc/config/avr/lib1funcs.S Differential Revision: https://reviews.llvm.org/D78437 --- llvm/lib/Target/AVR/AVRISelLowering.cpp | 32 +++++++------------------ llvm/test/CodeGen/AVR/div.ll | 24 ++++--------------- 2 files changed, 12 insertions(+), 44 deletions(-) diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index 449e593d8f2483..fcf90248270698 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -151,10 +151,12 @@ AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM, setOperationAction(ISD::SREM, MVT::i16, Expand); // Make division and modulus custom - for (MVT VT : MVT::integer_valuetypes()) { - setOperationAction(ISD::UDIVREM, VT, Custom); - setOperationAction(ISD::SDIVREM, VT, Custom); - } + setOperationAction(ISD::UDIVREM, MVT::i8, Custom); + setOperationAction(ISD::UDIVREM, MVT::i16, Custom); + setOperationAction(ISD::UDIVREM, MVT::i32, Custom); + setOperationAction(ISD::SDIVREM, MVT::i8, Custom); + setOperationAction(ISD::SDIVREM, MVT::i16, Custom); + setOperationAction(ISD::SDIVREM, MVT::i32, Custom); // Do not use MUL. The AVR instructions are closer to SMUL_LOHI &co. setOperationAction(ISD::MUL, MVT::i8, Expand); @@ -190,41 +192,29 @@ AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM, // improvements in how we treat 16-bit "registers" to be feasible. } - // Division rtlib functions (not supported) + // Division rtlib functions (not supported), use divmod functions instead setLibcallName(RTLIB::SDIV_I8, nullptr); setLibcallName(RTLIB::SDIV_I16, nullptr); setLibcallName(RTLIB::SDIV_I32, nullptr); - setLibcallName(RTLIB::SDIV_I64, nullptr); - setLibcallName(RTLIB::SDIV_I128, nullptr); setLibcallName(RTLIB::UDIV_I8, nullptr); setLibcallName(RTLIB::UDIV_I16, nullptr); setLibcallName(RTLIB::UDIV_I32, nullptr); - setLibcallName(RTLIB::UDIV_I64, nullptr); - setLibcallName(RTLIB::UDIV_I128, nullptr); - // Modulus rtlib functions (not supported) + // Modulus rtlib functions (not supported), use divmod functions instead setLibcallName(RTLIB::SREM_I8, nullptr); setLibcallName(RTLIB::SREM_I16, nullptr); setLibcallName(RTLIB::SREM_I32, nullptr); - setLibcallName(RTLIB::SREM_I64, nullptr); - setLibcallName(RTLIB::SREM_I128, nullptr); setLibcallName(RTLIB::UREM_I8, nullptr); setLibcallName(RTLIB::UREM_I16, nullptr); setLibcallName(RTLIB::UREM_I32, nullptr); - setLibcallName(RTLIB::UREM_I64, nullptr); - setLibcallName(RTLIB::UREM_I128, nullptr); // Division and modulus rtlib functions setLibcallName(RTLIB::SDIVREM_I8, "__divmodqi4"); setLibcallName(RTLIB::SDIVREM_I16, "__divmodhi4"); setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); - setLibcallName(RTLIB::SDIVREM_I64, "__divmoddi4"); - setLibcallName(RTLIB::SDIVREM_I128, "__divmodti4"); setLibcallName(RTLIB::UDIVREM_I8, "__udivmodqi4"); setLibcallName(RTLIB::UDIVREM_I16, "__udivmodhi4"); setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); - setLibcallName(RTLIB::UDIVREM_I64, "__udivmoddi4"); - setLibcallName(RTLIB::UDIVREM_I128, "__udivmodti4"); // Several of the runtime library functions use a special calling conv setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::AVR_BUILTIN); @@ -357,12 +347,6 @@ SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { case MVT::i32: LC = IsSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; - case MVT::i64: - LC = IsSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; - break; - case MVT::i128: - LC = IsSigned ? RTLIB::SDIVREM_I128 : RTLIB::UDIVREM_I128; - break; } SDValue InChain = DAG.getEntryNode(); diff --git a/llvm/test/CodeGen/AVR/div.ll b/llvm/test/CodeGen/AVR/div.ll index 7626ecb8172242..b22229421963ba 100644 --- a/llvm/test/CodeGen/AVR/div.ll +++ b/llvm/test/CodeGen/AVR/div.ll @@ -65,15 +65,7 @@ define i32 @sdiv32(i32 %a, i32 %b) { ; Unsigned 64-bit division define i64 @udiv64(i64 %a, i64 %b) { ; CHECK-LABEL: udiv64: -; CHECK: call __udivmoddi4 -; CHECK-NEXT: ldd r18, Y+1 -; CHECK-NEXT: ldd r19, Y+2 -; CHECK-NEXT: ldd r20, Y+3 -; CHECK-NEXT: ldd r21, Y+4 -; CHECK-NEXT: ldd r22, Y+5 -; CHECK-NEXT: ldd r23, Y+6 -; CHECK-NEXT: ldd r24, Y+7 -; CHECK-NEXT: ldd r25, Y+8 +; CHECK: call __udivdi3 ; CHECK: ret %quot = udiv i64 %a, %b ret i64 %quot @@ -82,15 +74,7 @@ define i64 @udiv64(i64 %a, i64 %b) { ; Signed 64-bit division define i64 @sdiv64(i64 %a, i64 %b) { ; CHECK-LABEL: sdiv64: -; CHECK: call __divmoddi4 -; CHECK-NEXT: ldd r18, Y+1 -; CHECK-NEXT: ldd r19, Y+2 -; CHECK-NEXT: ldd r20, Y+3 -; CHECK-NEXT: ldd r21, Y+4 -; CHECK-NEXT: ldd r22, Y+5 -; CHECK-NEXT: ldd r23, Y+6 -; CHECK-NEXT: ldd r24, Y+7 -; CHECK-NEXT: ldd r25, Y+8 +; CHECK: call __divdi3 ; CHECK: ret %quot = sdiv i64 %a, %b ret i64 %quot @@ -99,7 +83,7 @@ define i64 @sdiv64(i64 %a, i64 %b) { ; Unsigned 128-bit division define i128 @udiv128(i128 %a, i128 %b) { ; CHECK-LABEL: udiv128: -; CHECK: call __udivmodti4 +; CHECK: call __udivti3 ; CHECK: ret %quot = udiv i128 %a, %b ret i128 %quot @@ -108,7 +92,7 @@ define i128 @udiv128(i128 %a, i128 %b) { ; Signed 128-bit division define i128 @sdiv128(i128 %a, i128 %b) { ; CHECK-LABEL: sdiv128: -; CHECK: call __divmodti4 +; CHECK: call __divti3 ; CHECK: ret %quot = sdiv i128 %a, %b ret i128 %quot From 962c2415ffbd9ef9e3be66b84c286159591e8d9e Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Wed, 15 Apr 2020 17:30:47 +0200 Subject: [PATCH 09/16] [AVR] Do not place functions in .progmem.data Previously, the AVR backend would put functions in .progmem.data. This is probably a regression from when functions still lived in address space 0. With this change, only global constants are placed in .progmem.data. This is not complete: avr-gcc additionally respects -fdata-sections for progmem global constants, which LLVM doesn't yet do. But fixing that is a bit more complicated (and I believe other backends such as RISC-V might also have similar issues). Differential Revision: https://reviews.llvm.org/D78212 --- llvm/lib/Target/AVR/AVRTargetObjectFile.cpp | 2 +- llvm/test/CodeGen/AVR/sections.ll | 31 +++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AVR/sections.ll diff --git a/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp b/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp index 980096a098351f..14206cdb827631 100644 --- a/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp +++ b/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp @@ -30,7 +30,7 @@ AVRTargetObjectFile::SelectSectionForGlobal(const GlobalObject *GO, const TargetMachine &TM) const { // Global values in flash memory are placed in the progmem.data section // unless they already have a user assigned section. - if (AVR::isProgramMemoryAddress(GO) && !GO->hasSection()) + if (AVR::isProgramMemoryAddress(GO) && !GO->hasSection() && Kind.isReadOnly()) return ProgmemDataSection; // Otherwise, we work the same way as ELF. diff --git a/llvm/test/CodeGen/AVR/sections.ll b/llvm/test/CodeGen/AVR/sections.ll new file mode 100644 index 00000000000000..6d125d37f917ed --- /dev/null +++ b/llvm/test/CodeGen/AVR/sections.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -march=avr | FileCheck --check-prefixes=CHECK,NOSECTIONS %s +; RUN: llc -function-sections -data-sections < %s -march=avr | FileCheck --check-prefixes=CHECK,SECTIONS %s + +; Test that functions (in address space 1) are not considered .progmem data. + +; CHECK: .text +; SECTIONS: .text.somefunc,"ax",@progbits +; CHECK-LABEL: somefunc: +define void @somefunc() addrspace(1) { + ret void +} + + +; Test whether global variables are placed in the correct section. + +; Note: avr-gcc would place this global in .progmem.data.flash with +; -fdata-sections. The AVR backend does not yet respect -fdata-sections in this +; case. +; CHECK: .section .progmem.data,"a",@progbits +; CHECK-LABEL: flash: +@flash = addrspace(1) constant i16 3 + +; NOSECTIONS: .section .rodata,"a",@progbits +; SECTIONS: .section .rodata.ram1,"a",@progbits +; CHECK-LABEL: ram1: +@ram1 = constant i16 3 + +; NOSECTIONS: .data +; SECTIONS: .section .data.ram2,"aw",@progbits +; CHECK-LABEL: ram2: +@ram2 = global i16 3 From 93ee4da19cff2a94907b7fef2986b52e40d21a4e Mon Sep 17 00:00:00 2001 From: Dylan McKay Date: Sun, 17 May 2020 16:25:12 +1200 Subject: [PATCH 10/16] [AVR] Fix I/O instructions on XMEGA Summary: On XMEGA, I/O address space is same as data address space - there is no 0x20 offset, because CPU General Purpose Registers are not mapped in data address space. From https://en.wikipedia.org/wiki/AVR_microcontrollers > In the XMEGA variant, the working register file is not mapped into the data address space; as such, it is not possible to treat any of the XMEGA's working registers as though they were SRAM. Instead, the I/O registers are mapped into the data address space starting at the very beginning of the address space. Reviewers: dylanmckay Reviewed By: dylanmckay Subscribers: hiraditya, Jim, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D77207 Patch by Vlastimil Labsky. --- llvm/lib/Target/AVR/AVRDevices.td | 18 +++++--- llvm/lib/Target/AVR/AVRInstrInfo.td | 19 +++++---- llvm/lib/Target/AVR/AVRSubtarget.cpp | 8 ++-- llvm/lib/Target/AVR/AVRSubtarget.h | 4 ++ llvm/test/CodeGen/AVR/features/xmega_io.ll | 48 ++++++++++++++++++++++ 5 files changed, 81 insertions(+), 16 deletions(-) create mode 100644 llvm/test/CodeGen/AVR/features/xmega_io.ll diff --git a/llvm/lib/Target/AVR/AVRDevices.td b/llvm/lib/Target/AVR/AVRDevices.td index 62def457443727..6730f2e1673e3e 100644 --- a/llvm/lib/Target/AVR/AVRDevices.td +++ b/llvm/lib/Target/AVR/AVRDevices.td @@ -121,6 +121,11 @@ def FeatureTinyEncoding : SubtargetFeature<"tinyencoding", "The device has Tiny core specific " "instruction encodings">; +// The device has CPU registers mapped in data address space +def FeatureMMR : SubtargetFeature<"memmappedregs", "m_hasMemMappedGPR", + "true", "The device has CPU registers " + "mapped in data address space">; + class ELFArch : SubtargetFeature<"", "ELFArch", !strconcat("ELF::",name), "">; @@ -152,7 +157,7 @@ def ELFArchXMEGA7 : ELFArch<"EF_AVR_ARCH_XMEGA7">; // device should have. def FamilyAVR0 : Family<"avr0", []>; -def FamilyAVR1 : Family<"avr1", [FamilyAVR0, FeatureLPM]>; +def FamilyAVR1 : Family<"avr1", [FamilyAVR0, FeatureLPM, FeatureMMR]>; def FamilyAVR2 : Family<"avr2", [FamilyAVR1, FeatureIJMPCALL, FeatureADDSUBIW, @@ -190,11 +195,14 @@ def FamilyAVR6 : Family<"avr6", def FamilyTiny : Family<"avrtiny", [FamilyAVR0, FeatureBREAK, FeatureSRAM, - FeatureTinyEncoding]>; + FeatureTinyEncoding, FeatureMMR]>; def FamilyXMEGA : Family<"xmega", - [FamilyAVR51, FeatureEIJMPCALL, FeatureSPMX, - FeatureDES]>; + [FamilyAVR0, FeatureLPM, FeatureIJMPCALL, FeatureADDSUBIW, + FeatureSRAM, FeatureJMPCALL, FeatureMultiplication, + FeatureMOVW, FeatureLPMX, FeatureSPM, + FeatureBREAK, FeatureEIJMPCALL, FeatureSPMX, + FeatureDES, FeatureELPM, FeatureELPMX]>; def FamilyXMEGAU : Family<"xmegau", [FamilyXMEGA, FeatureRMW]>; @@ -208,7 +216,7 @@ def FeatureSetSpecial : FeatureSet<"special", FeatureLPM, FeatureLPMX, FeatureELPM, FeatureELPMX, FeatureSPM, FeatureSPMX, FeatureDES, FeatureRMW, - FeatureMultiplication, FeatureBREAK]>; + FeatureMultiplication, FeatureBREAK, FeatureMMR]>; //===---------------------------------------------------------------------===// // AVR microcontrollers supported. diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index 0f4672684cfbea..0303f86383f911 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -107,7 +107,9 @@ def imm_com8 : Operand { def ioaddr_XFORM : SDNodeXFormgetTargetConstant(uint8_t(N->getZExtValue()) - 0x20, SDLoc(N), MVT::i8); + uint8_t offset = Subtarget->getIORegisterOffset(); + return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()) - offset, + SDLoc(N), MVT::i8); }]>; def iobitpos8_XFORM : SDNodeXFormgetZExtValue(); - return val >= 0x20 && val < 0x60; + uint8_t offset = Subtarget->getIORegisterOffset(); + uint64_t val = N->getZExtValue() - offset; + return val >= 0x0 && val < 0x40; }], ioaddr_XFORM>; def lowioaddr8 : PatLeaf<(imm), [{ - uint64_t val = N->getZExtValue(); - return val >= 0x20 && val < 0x40; + uint8_t offset = Subtarget->getIORegisterOffset(); + uint64_t val = N->getZExtValue() - offset; + return val >= 0x0 && val < 0x20; }], ioaddr_XFORM>; def ioaddr16 : PatLeaf<(imm), [{ - uint64_t val = N->getZExtValue(); - return val >= 0x20 && val < 0x5f; + uint8_t offset = Subtarget->getIORegisterOffset(); + uint64_t val = N->getZExtValue() - offset; + return val >= 0x0 && val < 0x3f; }], ioaddr_XFORM>; def iobitpos8 : PatLeaf<(imm), diff --git a/llvm/lib/Target/AVR/AVRSubtarget.cpp b/llvm/lib/Target/AVR/AVRSubtarget.cpp index bd4a3fcb5fcd99..195ca95bc3bd85 100644 --- a/llvm/lib/Target/AVR/AVRSubtarget.cpp +++ b/llvm/lib/Target/AVR/AVRSubtarget.cpp @@ -29,16 +29,16 @@ namespace llvm { AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const AVRTargetMachine &TM) - : AVRGenSubtargetInfo(TT, CPU, FS), - ELFArch(0), + : AVRGenSubtargetInfo(TT, CPU, FS), ELFArch(0), // Subtarget features m_hasSRAM(false), m_hasJMPCALL(false), m_hasIJMPCALL(false), m_hasEIJMPCALL(false), m_hasADDSUBIW(false), m_hasSmallStack(false), - m_hasMOVW(false), m_hasLPM(false), m_hasLPMX(false), m_hasELPM(false), + m_hasMOVW(false), m_hasLPM(false), m_hasLPMX(false), m_hasELPM(false), m_hasELPMX(false), m_hasSPM(false), m_hasSPMX(false), m_hasDES(false), m_supportsRMW(false), m_supportsMultiplication(false), m_hasBREAK(false), - m_hasTinyEncoding(false), m_FeatureSetDummy(false), + m_hasTinyEncoding(false), m_hasMemMappedGPR(false), + m_FeatureSetDummy(false), InstrInfo(), FrameLowering(), TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)), TSInfo() { diff --git a/llvm/lib/Target/AVR/AVRSubtarget.h b/llvm/lib/Target/AVR/AVRSubtarget.h index ca4167fcb33599..81d883eb30d9c6 100644 --- a/llvm/lib/Target/AVR/AVRSubtarget.h +++ b/llvm/lib/Target/AVR/AVRSubtarget.h @@ -71,6 +71,9 @@ class AVRSubtarget : public AVRGenSubtargetInfo { bool supportsMultiplication() const { return m_supportsMultiplication; } bool hasBREAK() const { return m_hasBREAK; } bool hasTinyEncoding() const { return m_hasTinyEncoding; } + bool hasMemMappedGPR() const { return m_hasMemMappedGPR; } + + uint8_t getIORegisterOffset() const { return hasMemMappedGPR() ? 0x20 : 0x0; } /// Gets the ELF architecture for the e_flags field /// of an ELF object file. @@ -105,6 +108,7 @@ class AVRSubtarget : public AVRGenSubtargetInfo { bool m_supportsMultiplication; bool m_hasBREAK; bool m_hasTinyEncoding; + bool m_hasMemMappedGPR; // Dummy member, used by FeatureSet's. We cannot have a SubtargetFeature with // no variable, so we instead bind pseudo features to this variable. diff --git a/llvm/test/CodeGen/AVR/features/xmega_io.ll b/llvm/test/CodeGen/AVR/features/xmega_io.ll new file mode 100644 index 00000000000000..713b2dec346a96 --- /dev/null +++ b/llvm/test/CodeGen/AVR/features/xmega_io.ll @@ -0,0 +1,48 @@ +; RUN: llc -O0 < %s -march=avr -mcpu avrxmega1 | FileCheck %s -check-prefix=XMEGA +; RUN: llc -O0 < %s -march=avr -mcpu avrxmega2 | FileCheck %s -check-prefix=XMEGA +; RUN: llc -O0 < %s -march=avr -mcpu avrxmega3 | FileCheck %s -check-prefix=XMEGA +; RUN: llc -O0 < %s -march=avr -mcpu avrxmega4 | FileCheck %s -check-prefix=XMEGA +; RUN: llc -O0 < %s -march=avr -mcpu avrxmega5 | FileCheck %s -check-prefix=XMEGA +; RUN: llc -O0 < %s -march=avr -mcpu avrxmega6 | FileCheck %s -check-prefix=XMEGA +; RUN: llc -O0 < %s -march=avr -mcpu avrxmega7 | FileCheck %s -check-prefix=XMEGA +; RUN: llc -O0 < %s -march=avr -mcpu avr2 | FileCheck %s -check-prefix=AVR +; RUN: llc -O0 < %s -march=avr -mcpu avr25 | FileCheck %s -check-prefix=AVR +; RUN: llc -O0 < %s -march=avr -mcpu avr3 | FileCheck %s -check-prefix=AVR +; RUN: llc -O0 < %s -march=avr -mcpu avr31 | FileCheck %s -check-prefix=AVR +; RUN: llc -O0 < %s -march=avr -mcpu avr35 | FileCheck %s -check-prefix=AVR +; RUN: llc -O0 < %s -march=avr -mcpu avr4 | FileCheck %s -check-prefix=AVR +; RUN: llc -O0 < %s -march=avr -mcpu avr5 | FileCheck %s -check-prefix=AVR +; RUN: llc -O0 < %s -march=avr -mcpu avr51 | FileCheck %s -check-prefix=AVR +; RUN: llc -O0 < %s -march=avr -mcpu avr6 | FileCheck %s -check-prefix=AVR + +define i8 @read8_low_io() { +; CHECK-LABEL: read8_low_io +; XMEGA: in r24, 8 +; AVR: lds r24, 8 + %1 = load i8, i8* inttoptr (i16 8 to i8*) + ret i8 %1 +} + +define i8 @read8_hi_io() { +; CHECK-LABEL: read8_hi_io +; XMEGA: in r24, 40 +; AVR: in r24, 8 + %1 = load i8, i8* inttoptr (i16 40 to i8*) + ret i8 %1 +} + +define i8 @read8_maybe_io() { +; CHECK-LABEL: read8_maybe_io +; XMEGA: lds r24, 80 +; AVR: in r24, 48 + %1 = load i8, i8* inttoptr (i16 80 to i8*) + ret i8 %1 +} + +define i8 @read8_not_io(){ +; CHECK-LABEL: read8_not_io +; XMEGA: lds r24, 160 +; AVR: lds r24, 160 + %1 = load i8, i8* inttoptr (i16 160 to i8*) + ret i8 %1 +} From 6b2445d841e4c6947b718d0832d2c9c4de646a68 Mon Sep 17 00:00:00 2001 From: Dylan McKay Date: Sun, 17 May 2020 16:46:32 +1200 Subject: [PATCH 11/16] [LLVM][AVR] Support for R_AVR_6 fixup Summary: Handle the emission of `R_AVR_6` ELF relocation type. Reviewers: dylanmckay Reviewed By: dylanmckay Subscribers: hiraditya, Jim, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D78721 Patch by @LemonBoy https://reviews.llvm.org/p/LemonBoy/ --- .../lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp | 15 +++++++++++++++ llvm/test/MC/AVR/relocations.s | 3 +++ 2 files changed, 18 insertions(+) diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp index 1a741c9077507d..b6c542a106372d 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp @@ -139,6 +139,18 @@ void fixup_13_pcrel(unsigned Size, const MCFixup &Fixup, uint64_t &Value, Value &= 0xfff; } +/// 6-bit fixup for the immediate operand of the STD/LDD family of +/// instructions. +/// +/// Resolves to: +/// 10q0 qq10 0000 1qqq +static void fixup_6(const MCFixup &Fixup, uint64_t &Value, + MCContext *Ctx = nullptr) { + unsigned_width(6, Value, std::string("immediate"), Fixup, Ctx); + + Value = ((Value & 0x20) << 8) | ((Value & 0x18) << 7) | (Value & 0x07); +} + /// 6-bit fixup for the immediate operand of the ADIW family of /// instructions. /// @@ -336,6 +348,9 @@ void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup, Value &= 0xffff; break; + case AVR::fixup_6: + adjust::fixup_6(Fixup, Value, Ctx); + break; case AVR::fixup_6_adiw: adjust::fixup_6_adiw(Fixup, Value, Ctx); break; diff --git a/llvm/test/MC/AVR/relocations.s b/llvm/test/MC/AVR/relocations.s index 3d22e5d0bad095..666ff3d30ca4c1 100644 --- a/llvm/test/MC/AVR/relocations.s +++ b/llvm/test/MC/AVR/relocations.s @@ -9,6 +9,9 @@ bar: ; CHECK: R_AVR_LDI SYMBOL+0x3 ldi r21, SYMBOL+3 +; CHECK: R_AVR_6 SYMBOL+0x4 +ldd r8, Y+SYMBOL+4 + ; CHECK-NEXT: R_AVR_6_ADIW FOO adiw r24, FOO From 143e1469e962caf1c366fbf8f139c4c1efe9f20f Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Tue, 21 Apr 2020 20:19:56 +0200 Subject: [PATCH 12/16] [AVR] Fix stack size in functions with a frame pointer This patch fixes a bug in stack save/restore code. Because the frame pointer was saved/restored manually (not by marking it as clobbered) the StackSize variable was not updated accordingly. Most code still worked, but code that tried to load a parameter passed on the stack did not. This commit fixes this by marking the frame pointer as a callee-clobbered register. This will let it be saved without any effort in prolog/epilog code and will make sure the correct address is calculated for loading parameters that are passed on the stack. This approach is used by most other targets (such as X86, AArch64 and RISC-V). Differential Revision: https://reviews.llvm.org/D78579 --- llvm/lib/Target/AVR/AVRFrameLowering.cpp | 16 ++++--------- llvm/test/CodeGen/AVR/calling-conv/c/stack.ll | 8 +++---- llvm/test/CodeGen/AVR/return.ll | 24 +++++++++---------- llvm/test/CodeGen/AVR/varargs.ll | 6 ++--- 4 files changed, 23 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp index 3963758187c195..0aadff9bd06559 100644 --- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp +++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp @@ -67,13 +67,6 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } - // Save the frame pointer if we have one. - if (HasFP) { - BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHWRr)) - .addReg(AVR::R29R28, RegState::Kill) - .setMIFlag(MachineInstr::FrameSetup); - } - // Emit special prologue code to save R1, R0 and SREG in interrupt/signal // handlers before saving any other registers. if (AFI->isInterruptOrSignalHandler()) { @@ -170,9 +163,6 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R1R0); } - if (hasFP(MF)) - BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R29R28); - // Early exit if there is no need to restore the frame pointer. if (!FrameSize) { return; @@ -415,8 +405,10 @@ void AVRFrameLowering::determineCalleeSaves(MachineFunction &MF, TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); // If we have a frame pointer, the Y register needs to be saved as well. - // We don't do that here however - the prologue and epilogue generation - // code will handle it specially. + if (hasFP(MF)) { + SavedRegs.set(AVR::R29); + SavedRegs.set(AVR::R28); + } } /// The frame analyzer pass. /// diff --git a/llvm/test/CodeGen/AVR/calling-conv/c/stack.ll b/llvm/test/CodeGen/AVR/calling-conv/c/stack.ll index 52b6427476ab15..00ff7d1acd8022 100644 --- a/llvm/test/CodeGen/AVR/calling-conv/c/stack.ll +++ b/llvm/test/CodeGen/AVR/calling-conv/c/stack.ll @@ -11,15 +11,15 @@ define void @ret_void_args_i64_i64_i32(i64 %a, i64 %b, i32 %c) { ; CHECK-NEXT: in r29, 62 ; Load the top two bytes from the 32-bit int. - ; CHECK-NEXT: ldd r24, Y+5 - ; CHECK-NEXT: ldd r25, Y+6 + ; CHECK-NEXT: ldd r24, Y+7 + ; CHECK-NEXT: ldd r25, Y+8 ; Store the top two bytes of the 32-bit int to memory. ; CHECK-NEXT: sts 7, r25 ; CHECK-NEXT: sts 6, r24 ; Load the bottom two bytes from the 32-bit int. - ; CHECK-NEXT: ldd r24, Y+3 - ; CHECK-NEXT: ldd r25, Y+4 + ; CHECK-NEXT: ldd r24, Y+5 + ; CHECK-NEXT: ldd r25, Y+6 ; Store the bottom two bytes of the 32-bit int to memory. ; CHECK-NEXT: sts 5, r25 ; CHECK-NEXT: sts 4, r24 diff --git a/llvm/test/CodeGen/AVR/return.ll b/llvm/test/CodeGen/AVR/return.ll index 6d81faa6e8d457..15157036f537fe 100644 --- a/llvm/test/CodeGen/AVR/return.ll +++ b/llvm/test/CodeGen/AVR/return.ll @@ -96,14 +96,14 @@ define i64 @return64_arg2(i64 %x, i64 %y, i64 %z) { ; CHECK-LABEL: return64_arg2: ; CHECK: push r28 ; CHECK: push r29 -; CHECK: ldd r18, Y+3 -; CHECK: ldd r19, Y+4 -; CHECK: ldd r20, Y+5 -; CHECK: ldd r21, Y+6 -; CHECK: ldd r22, Y+7 -; CHECK: ldd r23, Y+8 -; CHECK: ldd r24, Y+9 -; CHECK: ldd r25, Y+10 +; CHECK: ldd r18, Y+5 +; CHECK: ldd r19, Y+6 +; CHECK: ldd r20, Y+7 +; CHECK: ldd r21, Y+8 +; CHECK: ldd r22, Y+9 +; CHECK: ldd r23, Y+10 +; CHECK: ldd r24, Y+11 +; CHECK: ldd r25, Y+12 ; CHECK: pop r29 ; CHECK: pop r28 ret i64 %z @@ -113,10 +113,10 @@ define i32 @return64_trunc(i32 %a, i32 %b, i32 %c, i64 %d) { ; CHECK-LABEL: return64_trunc: ; CHECK: push r28 ; CHECK: push r29 -; CHECK: ldd r22, Y+3 -; CHECK: ldd r23, Y+4 -; CHECK: ldd r24, Y+5 -; CHECK: ldd r25, Y+6 +; CHECK: ldd r22, Y+5 +; CHECK: ldd r23, Y+6 +; CHECK: ldd r24, Y+7 +; CHECK: ldd r25, Y+8 ; CHECK: pop r29 ; CHECK: pop r28 %result = trunc i64 %d to i32 diff --git a/llvm/test/CodeGen/AVR/varargs.ll b/llvm/test/CodeGen/AVR/varargs.ll index 7224ac3f2b8da8..c2046d84b48b21 100644 --- a/llvm/test/CodeGen/AVR/varargs.ll +++ b/llvm/test/CodeGen/AVR/varargs.ll @@ -7,12 +7,12 @@ declare void @llvm.va_end(i8*) define i16 @varargs1(i8* nocapture %x, ...) { ; CHECK-LABEL: varargs1: ; CHECK: movw r20, r28 -; CHECK: subi r20, 217 +; CHECK: subi r20, 215 ; CHECK: sbci r21, 255 ; CHECK: movw r24, r28 ; CHECK: adiw r24, 3 -; CHECK: ldd r22, Y+37 -; CHECK: ldd r23, Y+38 +; CHECK: ldd r22, Y+39 +; CHECK: ldd r23, Y+40 ; CHECK: call %buffer = alloca [32 x i8] %ap = alloca i8* From cfbe205a7e88242b0b421e32e55522ea7482487c Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Tue, 21 Apr 2020 14:17:21 +0200 Subject: [PATCH 13/16] [AVR] Remove faulty stack pushing behavior An instruction like this will need to allocate some stack space for the last parameter: %x = call addrspace(1) i16 @bar(i64 undef, i64 undef, i16 undef, i16 0) This worked fine when passing an actual value (in this case 0). However, when passing undef, no value was pushed to the stack and therefore no push instructions were created. This caused an unbalanced stack leading to interesting results. This commit fixes that by replacing the push logic with a regular stack adjustment and stack-relative load/stores. This is less efficient but at least it correctly compiles the code. I can think of a few improvements in the future: * The stack should have been adjusted in the function prologue when there are no allocas in the function. * Many (if not most) stack adjustments can be replaced by pushing/popping the values directly. Exactly like the previous code attempted but didn't do correctly. * Small stack adjustments can be done more efficiently with a few push/pop instructions (pushing/popping bogus values), both for code size and for speed. All in all, as long as there are no allocas in the function I think that it is almost always more efficient to emit regular push/pop instructions. This is however left for future optimizations. Differential Revision: https://reviews.llvm.org/D78581 --- llvm/lib/Target/AVR/AVRFrameLowering.cpp | 65 ++++++++++-------------- llvm/test/CodeGen/AVR/call.ll | 36 ++++++------- llvm/test/CodeGen/AVR/dynalloca.ll | 24 +++++++-- llvm/test/CodeGen/AVR/varargs.ll | 12 ++--- 4 files changed, 73 insertions(+), 64 deletions(-) diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp index 0aadff9bd06559..7f441a0d668824 100644 --- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp +++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp @@ -285,15 +285,10 @@ bool AVRFrameLowering::restoreCalleeSavedRegisters( } /// Replace pseudo store instructions that pass arguments through the stack with -/// real instructions. If insertPushes is true then all instructions are -/// replaced with push instructions, otherwise regular std instructions are -/// inserted. +/// real instructions. static void fixStackStores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const TargetInstrInfo &TII, bool insertPushes) { - const AVRSubtarget &STI = MBB.getParent()->getSubtarget(); - const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); - + const TargetInstrInfo &TII, Register FP) { // Iterate through the BB until we hit a call instruction or we reach the end. for (auto I = MI, E = MBB.end(); I != E && !I->isCall();) { MachineBasicBlock::iterator NextMI = std::next(I); @@ -308,29 +303,6 @@ static void fixStackStores(MachineBasicBlock &MBB, assert(MI.getOperand(0).getReg() == AVR::SP && "Invalid register, should be SP!"); - if (insertPushes) { - // Replace this instruction with a push. - Register SrcReg = MI.getOperand(2).getReg(); - bool SrcIsKill = MI.getOperand(2).isKill(); - - // We can't use PUSHWRr here because when expanded the order of the new - // instructions are reversed from what we need. Perform the expansion now. - if (Opcode == AVR::STDWSPQRr) { - BuildMI(MBB, I, MI.getDebugLoc(), TII.get(AVR::PUSHRr)) - .addReg(TRI.getSubReg(SrcReg, AVR::sub_hi), - getKillRegState(SrcIsKill)); - BuildMI(MBB, I, MI.getDebugLoc(), TII.get(AVR::PUSHRr)) - .addReg(TRI.getSubReg(SrcReg, AVR::sub_lo), - getKillRegState(SrcIsKill)); - } else { - BuildMI(MBB, I, MI.getDebugLoc(), TII.get(AVR::PUSHRr)) - .addReg(SrcReg, getKillRegState(SrcIsKill)); - } - - MI.eraseFromParent(); - I = NextMI; - continue; - } // Replace this instruction with a regular store. Use Y as the base // pointer since it is guaranteed to contain a copy of SP. @@ -338,7 +310,7 @@ static void fixStackStores(MachineBasicBlock &MBB, (Opcode == AVR::STDWSPQRr) ? AVR::STDWPtrQRr : AVR::STDPtrQRr; MI.setDesc(TII.get(STOpc)); - MI.getOperand(0).setReg(AVR::R29R28); + MI.getOperand(0).setReg(FP); I = NextMI; } @@ -354,7 +326,7 @@ MachineBasicBlock::iterator AVRFrameLowering::eliminateCallFramePseudoInstr( // function entry. Delete the call frame pseudo and replace all pseudo stores // with real store instructions. if (hasReservedCallFrame(MF)) { - fixStackStores(MBB, MI, TII, false); + fixStackStores(MBB, MI, TII, AVR::R29R28); return MBB.erase(MI); } @@ -362,18 +334,37 @@ MachineBasicBlock::iterator AVRFrameLowering::eliminateCallFramePseudoInstr( unsigned int Opcode = MI->getOpcode(); int Amount = TII.getFrameSize(*MI); - // Adjcallstackup does not need to allocate stack space for the call, instead - // we insert push instructions that will allocate the necessary stack. - // For adjcallstackdown we convert it into an 'adiw reg, ' handling - // the read and write of SP in I/O space. + // ADJCALLSTACKUP and ADJCALLSTACKDOWN are converted to adiw/subi + // instructions to read and write the stack pointer in I/O space. if (Amount != 0) { assert(getStackAlignment() == 1 && "Unsupported stack alignment"); if (Opcode == TII.getCallFrameSetupOpcode()) { - fixStackStores(MBB, MI, TII, true); + // Update the stack pointer. + // In many cases this can be done far more efficiently by pushing the + // relevant values directly to the stack. However, doing that correctly + // (in the right order, possibly skipping some empty space for undef + // values, etc) is tricky and thus left to be optimized in the future. + BuildMI(MBB, MI, DL, TII.get(AVR::SPREAD), AVR::R31R30).addReg(AVR::SP); + + MachineInstr *New = BuildMI(MBB, MI, DL, TII.get(AVR::SUBIWRdK), AVR::R31R30) + .addReg(AVR::R31R30, RegState::Kill) + .addImm(Amount); + New->getOperand(3).setIsDead(); + + BuildMI(MBB, MI, DL, TII.get(AVR::SPWRITE), AVR::SP) + .addReg(AVR::R31R30, RegState::Kill); + + // Make sure the remaining stack stores are converted to real store + // instructions. + fixStackStores(MBB, MI, TII, AVR::R31R30); } else { assert(Opcode == TII.getCallFrameDestroyOpcode()); + // Note that small stack changes could be implemented more efficiently + // with a few pop instructions instead of the 8-9 instructions now + // required. + // Select the best opcode to adjust SP based on the offset size. unsigned addOpcode; if (isUInt<6>(Amount)) { diff --git a/llvm/test/CodeGen/AVR/call.ll b/llvm/test/CodeGen/AVR/call.ll index a2556e8c1e614a..3cf821ebdb06a8 100644 --- a/llvm/test/CodeGen/AVR/call.ll +++ b/llvm/test/CodeGen/AVR/call.ll @@ -32,8 +32,8 @@ define i8 @calli8_stack() { ; CHECK-LABEL: calli8_stack: ; CHECK: ldi [[REG1:r[0-9]+]], 10 ; CHECK: ldi [[REG2:r[0-9]+]], 11 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+1, [[REG1]] +; CHECK: std Z+2, [[REG2]] ; CHECK: call foo8_3 %result1 = call i8 @foo8_3(i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11) ret i8 %result1 @@ -54,12 +54,12 @@ define i16 @calli16_stack() { ; CHECK-LABEL: calli16_stack: ; CHECK: ldi [[REG1:r[0-9]+]], 9 ; CHECK: ldi [[REG2:r[0-9]+]], 2 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+1, [[REG1]] +; CHECK: std Z+2, [[REG2]] ; CHECK: ldi [[REG1:r[0-9]+]], 10 ; CHECK: ldi [[REG2:r[0-9]+]], 2 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+3, [[REG1]] +; CHECK: std Z+4, [[REG2]] ; CHECK: call foo16_2 %result1 = call i16 @foo16_2(i16 512, i16 513, i16 514, i16 515, i16 516, i16 517, i16 518, i16 519, i16 520, i16 521, i16 522) ret i16 %result1 @@ -84,12 +84,12 @@ define i32 @calli32_stack() { ; CHECK-LABEL: calli32_stack: ; CHECK: ldi [[REG1:r[0-9]+]], 64 ; CHECK: ldi [[REG2:r[0-9]+]], 66 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+1, [[REG1]] +; CHECK: std Z+2, [[REG2]] ; CHECK: ldi [[REG1:r[0-9]+]], 15 ; CHECK: ldi [[REG2:r[0-9]+]], 2 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+3, [[REG1]] +; CHECK: std Z+4, [[REG2]] ; CHECK: call foo32_2 %result1 = call i32 @foo32_2(i32 1, i32 2, i32 3, i32 4, i32 34554432) ret i32 %result1 @@ -115,20 +115,20 @@ define i64 @calli64_stack() { ; CHECK: ldi [[REG1:r[0-9]+]], 76 ; CHECK: ldi [[REG2:r[0-9]+]], 73 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+5, [[REG1]] +; CHECK: std Z+6, [[REG2]] ; CHECK: ldi [[REG1:r[0-9]+]], 31 ; CHECK: ldi [[REG2:r[0-9]+]], 242 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+7, [[REG1]] +; CHECK: std Z+8, [[REG2]] ; CHECK: ldi [[REG1:r[0-9]+]], 155 ; CHECK: ldi [[REG2:r[0-9]+]], 88 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+3, [[REG1]] +; CHECK: std Z+4, [[REG2]] ; CHECK: ldi [[REG1:r[0-9]+]], 255 ; CHECK: ldi [[REG2:r[0-9]+]], 255 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+1, [[REG1]] +; CHECK: std Z+2, [[REG2]] ; CHECK: call foo64_2 %result1 = call i64 @foo64_2(i64 1, i64 2, i64 17446744073709551615) ret i64 %result1 diff --git a/llvm/test/CodeGen/AVR/dynalloca.ll b/llvm/test/CodeGen/AVR/dynalloca.ll index 6aa776e2de6f92..f314fb06f3361d 100644 --- a/llvm/test/CodeGen/AVR/dynalloca.ll +++ b/llvm/test/CodeGen/AVR/dynalloca.ll @@ -53,9 +53,27 @@ define void @dynalloca2(i16 %x) { ; CHECK-LABEL: dynalloca2: ; CHECK: in [[SPCOPY1:r[0-9]+]], 61 ; CHECK: in [[SPCOPY2:r[0-9]+]], 62 -; CHECK: push -; CHECK-NOT: st -; CHECK-NOT: std +; Allocate stack space for call +; CHECK: in {{.*}}, 61 +; CHECK: in {{.*}}, 62 +; CHECK: subi +; CHECK: sbci +; CHECK: in r0, 63 +; CHECK-NEXT: cli +; CHECK-NEXT: out 62, {{.*}} +; CHECK-NEXT: out 63, r0 +; CHECK-NEXT: out 61, {{.*}} +; Store values on the stack +; CHECK: ldi r16, 0 +; CHECK: ldi r17, 0 +; CHECK: std Z+5, r16 +; CHECK: std Z+6, r17 +; CHECK: std Z+7, r16 +; CHECK: std Z+8, r17 +; CHECK: std Z+3, r16 +; CHECK: std Z+4, r17 +; CHECK: std Z+1, r16 +; CHECK: std Z+2, r17 ; CHECK: call ; Call frame restore ; CHECK-NEXT: in r30, 61 diff --git a/llvm/test/CodeGen/AVR/varargs.ll b/llvm/test/CodeGen/AVR/varargs.ll index c2046d84b48b21..a743374db7422a 100644 --- a/llvm/test/CodeGen/AVR/varargs.ll +++ b/llvm/test/CodeGen/AVR/varargs.ll @@ -42,16 +42,16 @@ define void @varargcall() { ; CHECK-LABEL: varargcall: ; CHECK: ldi [[REG1:r[0-9]+]], 189 ; CHECK: ldi [[REG2:r[0-9]+]], 205 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+3, [[REG1]] +; CHECK: std Z+4, [[REG2]] ; CHECK: ldi [[REG1:r[0-9]+]], 191 ; CHECK: ldi [[REG2:r[0-9]+]], 223 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+5, [[REG1]] +; CHECK: std Z+6, [[REG2]] ; CHECK: ldi [[REG1:r[0-9]+]], 205 ; CHECK: ldi [[REG2:r[0-9]+]], 171 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] +; CHECK: std Z+1, [[REG1]] +; CHECK: std Z+2, [[REG2]] ; CHECK: call ; CHECK: adiw r30, 6 tail call void (i16, ...) @var1223(i16 -21555, i16 -12867, i16 -8257) From bc27c282e13a46ab2e0cf51a3831006347cc4f41 Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Sun, 19 Apr 2020 02:22:06 +0200 Subject: [PATCH 14/16] [AVR] Fix miscompilation of zext + add Code like the following: define i32 @foo(i32 %a, i1 zeroext %b) addrspace(1) { entry: %conv = zext i1 %b to i32 %add = add nsw i32 %conv, %a ret i32 %add } Would compile to the following (incorrect) code: foo: mov r18, r20 clr r19 add r22, r18 adc r23, r19 sbci r24, 0 sbci r25, 0 ret Those sbci instructions are clearly wrong, they should have been adc instructions. This commit improves codegen to use adc instead: foo: mov r18, r20 clr r19 ldi r20, 0 ldi r21, 0 add r22, r18 adc r23, r19 adc r24, r20 adc r25, r21 ret This code is not optimal (it could be just 5 instructions instead of the current 9) but at least it doesn't miscompile. Differential Revision: https://reviews.llvm.org/D78439 --- llvm/lib/Target/AVR/AVRInstrInfo.td | 2 -- llvm/test/CodeGen/AVR/add.ll | 45 +++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index 0303f86383f911..ca5600c5049986 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -2047,8 +2047,6 @@ def : Pat<(add i16:$src1, imm:$src2), (SUBIWRdK i16:$src1, (imm16_neg_XFORM imm:$src2))>; def : Pat<(addc i16:$src1, imm:$src2), (SUBIWRdK i16:$src1, (imm16_neg_XFORM imm:$src2))>; -def : Pat<(adde i16:$src1, imm:$src2), - (SBCIWRdK i16:$src1, (imm16_neg_XFORM imm:$src2))>; def : Pat<(add i8:$src1, imm:$src2), (SUBIRdK i8:$src1, (imm8_neg_XFORM imm:$src2))>; diff --git a/llvm/test/CodeGen/AVR/add.ll b/llvm/test/CodeGen/AVR/add.ll index cd3a69e6454c9b..30f23b19937127 100644 --- a/llvm/test/CodeGen/AVR/add.ll +++ b/llvm/test/CodeGen/AVR/add.ll @@ -45,6 +45,17 @@ define i16 @add16_reg_imm_subi(i16 %a) { ret i16 %result } +define i16 @add16_reg_reg_zext(i16 %a, i1 zeroext %b) { +; CHECK-LABEL: add16_reg_reg_zext: +; CHECK: mov r18, r22 +; CHECK: clr r19 +; CHECK: add r24, r18 +; CHECK: adc r25, r19 + %zext = zext i1 %b to i16 + %result = add i16 %a, %zext + ret i16 %result +} + define i32 @add32_reg_reg(i32 %a, i32 %b) { ; CHECK-LABEL: add32_reg_reg: ; CHECK: add r22, r18 @@ -65,6 +76,21 @@ define i32 @add32_reg_imm(i32 %a) { ret i32 %result } +define i32 @add32_reg_reg_zext(i32 %a, i1 zeroext %b) { +; CHECK-LABEL: add32_reg_reg_zext: +; CHECK: mov r18, r20 +; CHECK: clr r19 +; CHECK: ldi r20, 0 +; CHECK: ldi r21, 0 +; CHECK: add r22, r18 +; CHECK: adc r23, r19 +; CHECK: adc r24, r20 +; CHECK: adc r25, r21 + %zext = zext i1 %b to i32 + %result = add i32 %a, %zext + ret i32 %result +} + define i64 @add64_reg_reg(i64 %a, i64 %b) { ; CHECK-LABEL: add64_reg_reg: ; CHECK: add r18, r10 @@ -91,3 +117,22 @@ define i64 @add64_reg_imm(i64 %a) { %result = add i64 %a, 5 ret i64 %result } + +define i64 @add64_reg_reg_zext(i64 %a, i1 zeroext %b) { +; CHECK-LABEL: add64_reg_reg_zext: +; CHECK: mov r30, r16 +; CHECK: clr r31 +; CHECK: ldi r26, 0 +; CHECK: ldi r27, 0 +; CHECK: add r18, r30 +; CHECK: adc r19, r31 +; CHECK: adc r20, r26 +; CHECK: adc r21, r27 +; CHECK: adc r22, r26 +; CHECK: adc r23, r27 +; CHECK: adc r24, r26 +; CHECK: adc r25, r27 + %zext = zext i1 %b to i64 + %result = add i64 %a, %zext + ret i64 %result +} From 118ac53f12bcf5108a42ac052f1613144785b4b1 Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Sun, 19 Apr 2020 20:58:36 +0200 Subject: [PATCH 15/16] [AVR] Don't adjust for instruction size I'm not entirely sure why this was ever needed, but when I remove both adjustments all tests still pass. This fixes a bug where a long branch (using the `jmp` instead of the `rjmp` instruction) was incorrectly adjusted by 2 because it jumps to an absolute address instead of a PC-relative address. I could have added AVR::fixup_call to the list of exceptions, but it seemed more sensible to me to just remove this code. Differential Revision: https://reviews.llvm.org/D78459 --- .../Target/AVR/MCTargetDesc/AVRAsmBackend.cpp | 23 - llvm/test/CodeGen/AVR/jmp-long.ll | 1045 +++++++++++++++++ 2 files changed, 1045 insertions(+), 23 deletions(-) create mode 100644 llvm/test/CodeGen/AVR/jmp-long.ll diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp index b6c542a106372d..9dd385c114806f 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp @@ -89,8 +89,6 @@ void adjustRelativeBranch(unsigned Size, const MCFixup &Fixup, uint64_t &Value, // one. signed_width(Size + 1, Value, std::string("branch target"), Fixup, Ctx); - Value -= 2; - // Rightshifts the value by one. AVR::fixups::adjustBranchTarget(Value); } @@ -249,27 +247,6 @@ void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Size = AVRAsmBackend::getFixupKindInfo(Fixup.getKind()).TargetSize; unsigned Kind = Fixup.getKind(); - - // Parsed LLVM-generated temporary labels are already - // adjusted for instruction size, but normal labels aren't. - // - // To handle both cases, we simply un-adjust the temporary label - // case so it acts like all other labels. - if (const MCSymbolRefExpr *A = Target.getSymA()) { - if (A->getSymbol().isTemporary()) { - switch (Kind) { - case FK_Data_1: - case FK_Data_2: - case FK_Data_4: - case FK_Data_8: - // Don't shift value for absolute addresses. - break; - default: - Value += 2; - } - } - } - switch (Kind) { default: llvm_unreachable("unhandled fixup"); diff --git a/llvm/test/CodeGen/AVR/jmp-long.ll b/llvm/test/CodeGen/AVR/jmp-long.ll new file mode 100644 index 00000000000000..9878fda1028a01 --- /dev/null +++ b/llvm/test/CodeGen/AVR/jmp-long.ll @@ -0,0 +1,1045 @@ +; RUN: llc -filetype=obj -march avr -mattr=jmpcall < %s | llvm-objdump -dr --mattr=jmpcall - | FileCheck %s + +; Test the fix in https://reviews.llvm.org/D78459. +; Long branches (that use jmp instead of rjmp) were broken: the jump was to a +; location 0x20000 away from where it should be. This test is a regression test +; to make sure long branches are correct. + +declare void @bar() addrspace(1) + +define void @foo() addrspace(1) { + br label %1 + +1: ; preds = %0, %1 + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + tail call addrspace(0) void asm sideeffect "nop", ""() + ; CHECK: call 0 + ; CHECK: R_AVR_CALL bar + ; CHECK: jmp 0 + ; CHECK: R_AVR_CALL .text + tail call addrspace(1) void @bar() + br label %1 +} From 12dfdd3aed743ece6ceab7c5da004b066bbb70f1 Mon Sep 17 00:00:00 2001 From: Dylan McKay Date: Sat, 20 Jun 2020 04:34:05 +1200 Subject: [PATCH 16/16] [AVR] Rewrite the function calling convention. Summary: The previous version relied on the standard calling convention using std::reverse() to try to force the AVR ABI. But this only works for simple cases, it fails for example with aggregate types. This patch rewrites the calling convention with custom C++ code, that implements the ABI defined in https://gcc.gnu.org/wiki/avr-gcc. To do that it adds a few 16-bit pseudo registers for unaligned argument passing, such as R24R23. For example this function: define void @fun({ i8, i16 } %a) will pass %a.0 in R22 and %a.1 in R24R23. There are no instructions that can use these pseudo registers, so a new register class, DREGSMOVW, is defined to make them apart. Also the ArgCC_AVR_BUILTIN_DIV is no longer necessary, as it is identical to the C++ behavior (actually the clobber list is more strict for __div* functions, but that is currently unimplemented). Reviewers: dylanmckay Subscribers: Gaelan, Sh4rK, indirect, jwagen, efriedma, dsprenkels, hiraditya, Jim, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68524 Patch by Rodrigo Rivas Costa. --- llvm/lib/Target/AVR/AVRCallingConv.td | 18 +- llvm/lib/Target/AVR/AVRISelLowering.cpp | 343 ++++++++---------- llvm/lib/Target/AVR/AVRISelLowering.h | 6 +- llvm/lib/Target/AVR/AVRInstrInfo.cpp | 2 +- llvm/lib/Target/AVR/AVRRegisterInfo.td | 26 ++ .../CodeGen/AVR/calling-conv/c/basic_aggr.ll | 84 +++++ llvm/test/CodeGen/AVR/calling-conv/c/call.ll | 89 +++++ .../CodeGen/AVR/calling-conv/c/call_aggr.ll | 48 +++ .../CodeGen/AVR/calling-conv/c/return_aggr.ll | 31 ++ 9 files changed, 436 insertions(+), 211 deletions(-) create mode 100644 llvm/test/CodeGen/AVR/calling-conv/c/basic_aggr.ll create mode 100644 llvm/test/CodeGen/AVR/calling-conv/c/call.ll create mode 100644 llvm/test/CodeGen/AVR/calling-conv/c/call_aggr.ll create mode 100644 llvm/test/CodeGen/AVR/calling-conv/c/return_aggr.ll diff --git a/llvm/lib/Target/AVR/AVRCallingConv.td b/llvm/lib/Target/AVR/AVRCallingConv.td index 213e35fca66d1c..65545e531a8825 100644 --- a/llvm/lib/Target/AVR/AVRCallingConv.td +++ b/llvm/lib/Target/AVR/AVRCallingConv.td @@ -6,21 +6,13 @@ // //===----------------------------------------------------------------------===// // This describes the calling conventions for AVR architecture. +// Normal functions use a special calling convention, solved in code. //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // AVR Return Value Calling Convention //===----------------------------------------------------------------------===// -def RetCC_AVR : CallingConv -<[ - // i8 is returned in R24. - CCIfType<[i8], CCAssignToReg<[R24]>>, - - // i16 are returned in R25:R24, R23:R22, R21:R20 and R19:R18. - CCIfType<[i16], CCAssignToReg<[R25R24, R23R22, R21R20, R19R18]>> -]>; - // Special return value calling convention for runtime functions. def RetCC_AVR_BUILTIN : CallingConv <[ @@ -41,14 +33,6 @@ def ArgCC_AVR_Vararg : CallingConv CCAssignToStack<2, 1> ]>; -// Special argument calling convention for -// division runtime functions. -def ArgCC_AVR_BUILTIN_DIV : CallingConv -<[ - CCIfType<[i8], CCAssignToReg<[R24,R22]>>, - CCIfType<[i16], CCAssignToReg<[R25R24, R23R22]>> -]>; - //===----------------------------------------------------------------------===// // Callee-saved register lists. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index fcf90248270698..315d3d87521ce6 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -14,6 +14,7 @@ #include "AVRISelLowering.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -867,173 +868,147 @@ bool AVRTargetLowering::isOffsetFoldingLegal( #include "AVRGenCallingConv.inc" -/// For each argument in a function store the number of pieces it is composed -/// of. -static void parseFunctionArgs(const SmallVectorImpl &Ins, - SmallVectorImpl &Out) { - for (const ISD::InputArg &Arg : Ins) { - if(Arg.PartOffset > 0) continue; - unsigned Bytes = ((Arg.ArgVT.getSizeInBits()) + 7) / 8; - - Out.push_back((Bytes + 1) / 2); - } -} - -/// For external symbols there is no function prototype information so we -/// have to rely directly on argument sizes. -static void parseExternFuncCallArgs(const SmallVectorImpl &In, - SmallVectorImpl &Out) { - for (unsigned i = 0, e = In.size(); i != e;) { - unsigned Size = 0; - unsigned Offset = 0; - while ((i != e) && (In[i].PartOffset == Offset)) { - Offset += In[i].VT.getStoreSize(); - ++i; - ++Size; - } - Out.push_back(Size); - } -} - -static StringRef getFunctionName(TargetLowering::CallLoweringInfo &CLI) { - SDValue Callee = CLI.Callee; - - if (const ExternalSymbolSDNode *G = dyn_cast(Callee)) { - return G->getSymbol(); - } - - if (const GlobalAddressSDNode *G = dyn_cast(Callee)) { - return G->getGlobal()->getName(); - } - - llvm_unreachable("don't know how to get the name for this callee"); -} +/// Registers for calling conventions, ordered in reverse as required by ABI. +/// Both arrays must be of the same length. +static const MCPhysReg RegList8[] = { + AVR::R25, AVR::R24, AVR::R23, AVR::R22, AVR::R21, AVR::R20, + AVR::R19, AVR::R18, AVR::R17, AVR::R16, AVR::R15, AVR::R14, + AVR::R13, AVR::R12, AVR::R11, AVR::R10, AVR::R9, AVR::R8}; +static const MCPhysReg RegList16[] = { + AVR::R26R25, AVR::R25R24, AVR::R24R23, AVR::R23R22, + AVR::R22R21, AVR::R21R20, AVR::R20R19, AVR::R19R18, + AVR::R18R17, AVR::R17R16, AVR::R16R15, AVR::R15R14, + AVR::R14R13, AVR::R13R12, AVR::R12R11, AVR::R11R10, + AVR::R10R9, AVR::R9R8}; + +static_assert(array_lengthof(RegList8) == array_lengthof(RegList16), + "8-bit and 15-bit register arrays must be of equal length"); /// Analyze incoming and outgoing function arguments. We need custom C++ code -/// to handle special constraints in the ABI like reversing the order of the -/// pieces of splitted arguments. In addition, all pieces of a certain argument -/// have to be passed either using registers or the stack but never mixing both. -static void analyzeStandardArguments(TargetLowering::CallLoweringInfo *CLI, - const Function *F, const DataLayout *TD, - const SmallVectorImpl *Outs, - const SmallVectorImpl *Ins, - CallingConv::ID CallConv, - SmallVectorImpl &ArgLocs, - CCState &CCInfo, bool IsCall, bool IsVarArg) { - static const MCPhysReg RegList8[] = {AVR::R24, AVR::R22, AVR::R20, - AVR::R18, AVR::R16, AVR::R14, - AVR::R12, AVR::R10, AVR::R8}; - static const MCPhysReg RegList16[] = {AVR::R25R24, AVR::R23R22, AVR::R21R20, - AVR::R19R18, AVR::R17R16, AVR::R15R14, - AVR::R13R12, AVR::R11R10, AVR::R9R8}; - if (IsVarArg) { - // Variadic functions do not need all the analysis below. - if (IsCall) { - CCInfo.AnalyzeCallOperands(*Outs, ArgCC_AVR_Vararg); - } else { - CCInfo.AnalyzeFormalArguments(*Ins, ArgCC_AVR_Vararg); +/// to handle special constraints in the ABI. +/// In addition, all pieces of a certain argument have to be passed either +/// using registers or the stack but never mixing both. +template +static void +analyzeArguments(TargetLowering::CallLoweringInfo *CLI, const Function *F, + const DataLayout *TD, const SmallVectorImpl &Args, + SmallVectorImpl &ArgLocs, CCState &CCInfo) { + unsigned NumArgs = Args.size(); + // This is the index of the last used register, in RegList*. + // -1 means R26 (R26 is never actually used in CC). + int RegLastIdx = -1; + // Once a value is passed to the stack it will always be used + bool UseStack = false; + for (unsigned i = 0; i != NumArgs;) { + MVT VT = Args[i].VT; + // We have to count the number of bytes for each function argument, that is + // those Args with the same OrigArgIndex. This is important in case the + // function takes an aggregate type. + // Current argument will be between [i..j). + unsigned ArgIndex = Args[i].OrigArgIndex; + unsigned TotalBytes = VT.getStoreSize(); + unsigned j = i + 1; + for (; j != NumArgs; ++j) { + if (Args[j].OrigArgIndex != ArgIndex) + break; + TotalBytes += Args[j].VT.getStoreSize(); } - return; - } - - // Fill in the Args array which will contain original argument sizes. - SmallVector Args; - if (IsCall) { - parseExternFuncCallArgs(*Outs, Args); - } else { - assert(F != nullptr && "function should not be null"); - parseFunctionArgs(*Ins, Args); - } - - unsigned RegsLeft = array_lengthof(RegList8), ValNo = 0; - // Variadic functions always use the stack. - bool UsesStack = false; - for (unsigned i = 0, pos = 0, e = Args.size(); i != e; ++i) { - unsigned Size = Args[i]; - - // If we have a zero-sized argument, don't attempt to lower it. - // AVR-GCC does not support zero-sized arguments and so we need not - // worry about ABI compatibility. - if (Size == 0) continue; - - MVT LocVT = (IsCall) ? (*Outs)[pos].VT : (*Ins)[pos].VT; - - // If we have plenty of regs to pass the whole argument do it. - if (!UsesStack && (Size <= RegsLeft)) { - const MCPhysReg *RegList = (LocVT == MVT::i16) ? RegList16 : RegList8; + // Round up to even number of bytes. + TotalBytes = alignTo(TotalBytes, 2); + // Skip zero sized arguments + if (TotalBytes == 0) + continue; + // The index of the first register to be used + unsigned RegIdx = RegLastIdx + TotalBytes; + RegLastIdx = RegIdx; + // If there are not enough registers, use the stack + if (RegIdx >= array_lengthof(RegList8)) { + UseStack = true; + } + for (; i != j; ++i) { + MVT VT = Args[i].VT; - for (unsigned j = 0; j != Size; ++j) { - unsigned Reg = CCInfo.AllocateReg( - ArrayRef(RegList, array_lengthof(RegList8))); + if (UseStack) { + auto evt = EVT(VT).getTypeForEVT(CCInfo.getContext()); + unsigned Offset = CCInfo.AllocateStack(TD->getTypeAllocSize(evt), + TD->getABITypeAlignment(evt)); CCInfo.addLoc( - CCValAssign::getReg(ValNo++, LocVT, Reg, LocVT, CCValAssign::Full)); - --RegsLeft; - } - - // Reverse the order of the pieces to agree with the "big endian" format - // required in the calling convention ABI. - std::reverse(ArgLocs.begin() + pos, ArgLocs.begin() + pos + Size); - } else { - // Pass the rest of arguments using the stack. - UsesStack = true; - for (unsigned j = 0; j != Size; ++j) { - unsigned Offset = CCInfo.AllocateStack( - TD->getTypeAllocSize(EVT(LocVT).getTypeForEVT(CCInfo.getContext())), - TD->getABITypeAlignment( - EVT(LocVT).getTypeForEVT(CCInfo.getContext()))); - CCInfo.addLoc(CCValAssign::getMem(ValNo++, LocVT, Offset, LocVT, - CCValAssign::Full)); + CCValAssign::getMem(i, VT, Offset, VT, CCValAssign::Full)); + } else { + unsigned Reg; + if (VT == MVT::i8) { + Reg = CCInfo.AllocateReg(RegList8[RegIdx]); + } else if (VT == MVT::i16) { + Reg = CCInfo.AllocateReg(RegList16[RegIdx]); + } else { + llvm_unreachable( + "calling convention can only manage i8 and i16 types"); + } + assert(Reg && "register not available in calling convention"); + CCInfo.addLoc(CCValAssign::getReg(i, VT, Reg, VT, CCValAssign::Full)); + // Registers inside a particular argument are sorted in increasing order + // (remember the array is reversed). + RegIdx -= VT.getStoreSize(); } } - pos += Size; } } -static void analyzeBuiltinArguments(TargetLowering::CallLoweringInfo &CLI, - const Function *F, const DataLayout *TD, - const SmallVectorImpl *Outs, - const SmallVectorImpl *Ins, - CallingConv::ID CallConv, - SmallVectorImpl &ArgLocs, - CCState &CCInfo, bool IsCall, bool IsVarArg) { - StringRef FuncName = getFunctionName(CLI); - - if (FuncName.startswith("__udivmod") || FuncName.startswith("__divmod")) { - CCInfo.AnalyzeCallOperands(*Outs, ArgCC_AVR_BUILTIN_DIV); - } else { - analyzeStandardArguments(&CLI, F, TD, Outs, Ins, - CallConv, ArgLocs, CCInfo, - IsCall, IsVarArg); +/// Count the total number of bytes needed to pass or return these arguments. +template +static unsigned getTotalArgumentsSizeInBytes(const SmallVectorImpl &Args) { + unsigned TotalBytes = 0; + unsigned NumArgs = Args.size(); + + for (unsigned i = 0; i != NumArgs; ++i) { + MVT VT = Args[i].VT; + TotalBytes += VT.getStoreSize(); } + return TotalBytes; } -static void analyzeArguments(TargetLowering::CallLoweringInfo *CLI, - const Function *F, const DataLayout *TD, - const SmallVectorImpl *Outs, - const SmallVectorImpl *Ins, - CallingConv::ID CallConv, - SmallVectorImpl &ArgLocs, - CCState &CCInfo, bool IsCall, bool IsVarArg) { - switch (CallConv) { - case CallingConv::AVR_BUILTIN: { - analyzeBuiltinArguments(*CLI, F, TD, Outs, Ins, - CallConv, ArgLocs, CCInfo, - IsCall, IsVarArg); - return; - } - default: { - analyzeStandardArguments(CLI, F, TD, Outs, Ins, - CallConv, ArgLocs, CCInfo, - IsCall, IsVarArg); - return; +/// Analyze incoming and outgoing value of returning from a function. +/// The algorithm is similar to analyzeArguments, but there can only be +/// one value, possibly an aggregate, and it is limited to 8 bytes. +template +static void analyzeReturnValues(const SmallVectorImpl &Args, + CCState &CCInfo) { + unsigned NumArgs = Args.size(); + unsigned TotalBytes = getTotalArgumentsSizeInBytes(Args); + // CanLowerReturn() guarantees this assertion. + assert(TotalBytes <= 8 && "return values greter than 8 bytes cannot be lowered"); + + // GCC-ABI says that the size is rounded up to the next even number, + // but actually once it is more than 4 it will always round up to 8. + if (TotalBytes > 4) { + TotalBytes = 8; + } else { + TotalBytes = alignTo(TotalBytes, 2); + } + + // The index of the first register to use. + int RegIdx = TotalBytes - 1; + for (unsigned i = 0; i != NumArgs; ++i) { + MVT VT = Args[i].VT; + unsigned Reg; + if (VT == MVT::i8) { + Reg = CCInfo.AllocateReg(RegList8[RegIdx]); + } else if (VT == MVT::i16) { + Reg = CCInfo.AllocateReg(RegList16[RegIdx]); + } else { + llvm_unreachable("calling convention can only manage i8 and i16 types"); } + assert(Reg && "register not available in calling convention"); + CCInfo.addLoc(CCValAssign::getReg(i, VT, Reg, VT, CCValAssign::Full)); + // Registers sort in increasing order + RegIdx -= VT.getStoreSize(); } } SDValue AVRTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const { + const SmallVectorImpl &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); auto DL = DAG.getDataLayout(); @@ -1043,8 +1018,12 @@ SDValue AVRTargetLowering::LowerFormalArguments( CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); - analyzeArguments(nullptr, &MF.getFunction(), &DL, 0, &Ins, CallConv, ArgLocs, CCInfo, - false, isVarArg); + // Variadic functions do not need all the analysis below. + if (isVarArg) { + CCInfo.AnalyzeFormalArguments(Ins, ArgCC_AVR_Vararg); + } else { + analyzeArguments(nullptr, &MF.getFunction(), &DL, Ins, ArgLocs, CCInfo); + } SDValue ArgValue; for (CCValAssign &VA : ArgLocs) { @@ -1165,8 +1144,12 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, getPointerTy(DAG.getDataLayout())); } - analyzeArguments(&CLI, F, &DAG.getDataLayout(), &Outs, 0, CallConv, ArgLocs, CCInfo, - true, isVarArg); + // Variadic functions do not need all the analysis below. + if (isVarArg) { + CCInfo.AnalyzeCallOperands(Outs, ArgCC_AVR_Vararg); + } else { + analyzeArguments(&CLI, F, &DAG.getDataLayout(), Outs, ArgLocs, CCInfo); + } // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -1303,13 +1286,10 @@ SDValue AVRTargetLowering::LowerCallResult( *DAG.getContext()); // Handle runtime calling convs. - auto CCFunction = CCAssignFnForReturn(CallConv); - CCInfo.AnalyzeCallResult(Ins, CCFunction); - - if (CallConv != CallingConv::AVR_BUILTIN && RVLocs.size() > 1) { - // Reverse splitted return values to get the "big endian" format required - // to agree with the calling convention ABI. - std::reverse(RVLocs.begin(), RVLocs.end()); + if (CallConv == CallingConv::AVR_BUILTIN) { + CCInfo.AnalyzeCallResult(Ins, RetCC_AVR_BUILTIN); + } else { + analyzeReturnValues(Ins, CCInfo); } // Copy all of the result registers out of their specified physreg. @@ -1328,26 +1308,17 @@ SDValue AVRTargetLowering::LowerCallResult( // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// -CCAssignFn *AVRTargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const { - switch (CC) { - case CallingConv::AVR_BUILTIN: - return RetCC_AVR_BUILTIN; - default: - return RetCC_AVR; +bool AVRTargetLowering::CanLowerReturn( + CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, + const SmallVectorImpl &Outs, LLVMContext &Context) const { + if (CallConv == CallingConv::AVR_BUILTIN) { + SmallVector RVLocs; + CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC_AVR_BUILTIN); } -} - -bool -AVRTargetLowering::CanLowerReturn(CallingConv::ID CallConv, - MachineFunction &MF, bool isVarArg, - const SmallVectorImpl &Outs, - LLVMContext &Context) const -{ - SmallVector RVLocs; - CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); - auto CCFunction = CCAssignFnForReturn(CallConv); - return CCInfo.CheckReturn(Outs, CCFunction); + unsigned TotalBytes = getTotalArgumentsSizeInBytes(Outs); + return TotalBytes <= 8; } SDValue @@ -1363,25 +1334,19 @@ AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); - // Analyze return values. - auto CCFunction = CCAssignFnForReturn(CallConv); - CCInfo.AnalyzeReturn(Outs, CCFunction); - - // If this is the first return lowered for this function, add the regs to - // the liveout set for the function. MachineFunction &MF = DAG.getMachineFunction(); - unsigned e = RVLocs.size(); - // Reverse splitted return values to get the "big endian" format required - // to agree with the calling convention ABI. - if (e > 1) { - std::reverse(RVLocs.begin(), RVLocs.end()); + // Analyze return values. + if (CallConv == CallingConv::AVR_BUILTIN) { + CCInfo.AnalyzeReturn(Outs, RetCC_AVR_BUILTIN); + } else { + analyzeReturnValues(Outs, CCInfo); } SDValue Flag; SmallVector RetOps(1, Chain); // Copy the result values into the output registers. - for (unsigned i = 0; i != e; ++i) { + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); diff --git a/llvm/lib/Target/AVR/AVRISelLowering.h b/llvm/lib/Target/AVR/AVRISelLowering.h index aca1ea1d50e54c..d1eaf53b15e9cd 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.h +++ b/llvm/lib/Target/AVR/AVRISelLowering.h @@ -146,10 +146,8 @@ class AVRTargetLowering : public TargetLowering { SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; - CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const; - - bool CanLowerReturn(CallingConv::ID CallConv, - MachineFunction &MF, bool isVarArg, + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const override; diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp index a6832f282b3161..534141c7c56cc0 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -48,7 +48,7 @@ void AVRInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Not all AVR devices support the 16-bit `MOVW` instruction. if (AVR::DREGSRegClass.contains(DestReg, SrcReg)) { - if (STI.hasMOVW()) { + if (STI.hasMOVW() && AVR::DREGSMOVWRegClass.contains(DestReg, SrcReg)) { BuildMI(MBB, MI, DL, get(AVR::MOVWRdRr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); } else { diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.td b/llvm/lib/Target/AVR/AVRRegisterInfo.td index ea38fedd22ce98..8e36971dd63d48 100644 --- a/llvm/lib/Target/AVR/AVRRegisterInfo.td +++ b/llvm/lib/Target/AVR/AVRRegisterInfo.td @@ -103,6 +103,16 @@ CoveredBySubRegs = 1 in def R5R4 : AVRReg<4, "r5:r4", [R4, R5]>, DwarfRegNum<[4]>; def R3R2 : AVRReg<2, "r3:r2", [R2, R3]>, DwarfRegNum<[2]>; def R1R0 : AVRReg<0, "r1:r0", [R0, R1]>, DwarfRegNum<[0]>; + // Pseudo registers for unaligned i32 + def R26R25 : AVRReg<25, "r26:r25", [R25, R26]>, DwarfRegNum<[25]>; + def R24R23 : AVRReg<23, "r24:r23", [R23, R24]>, DwarfRegNum<[23]>; + def R22R21 : AVRReg<21, "r22:r21", [R21, R22]>, DwarfRegNum<[21]>; + def R20R19 : AVRReg<19, "r20:r19", [R19, R20]>, DwarfRegNum<[19]>; + def R18R17 : AVRReg<17, "r18:r17", [R17, R18]>, DwarfRegNum<[17]>; + def R16R15 : AVRReg<15, "r16:r15", [R15, R16]>, DwarfRegNum<[15]>; + def R14R13 : AVRReg<13, "r14:r13", [R13, R14]>, DwarfRegNum<[13]>; + def R12R11 : AVRReg<11, "r12:r11", [R11, R12]>, DwarfRegNum<[11]>; + def R10R9 : AVRReg<9, "r10:r9", [R9, R10]>, DwarfRegNum<[9]>; } //===----------------------------------------------------------------------===// @@ -146,6 +156,22 @@ def LD8lo : RegisterClass<"AVR", [i8], 8, // Main 16-bit pair register class. def DREGS : RegisterClass<"AVR", [i16], 8, + ( + // Return value and arguments. + add R25R24, R19R18, R21R20, R23R22, + // Scratch registers. + R31R30, R27R26, + // Callee saved registers. + R29R28, R17R16, R15R14, R13R12, R11R10, + R9R8, R7R6, R5R4, R3R2, R1R0, + // Pseudo regs for unaligned 16-bits + R26R25, R24R23, R22R21, + R20R19, R18R17, R16R15, + R14R13, R12R11, R10R9 + )>; + +// 16-bit pair register class for movw +def DREGSMOVW : RegisterClass<"AVR", [i16], 8, ( // Return value and arguments. add R25R24, R19R18, R21R20, R23R22, diff --git a/llvm/test/CodeGen/AVR/calling-conv/c/basic_aggr.ll b/llvm/test/CodeGen/AVR/calling-conv/c/basic_aggr.ll new file mode 100644 index 00000000000000..0f6cf0ed73d080 --- /dev/null +++ b/llvm/test/CodeGen/AVR/calling-conv/c/basic_aggr.ll @@ -0,0 +1,84 @@ +; RUN: llc < %s -march=avr | FileCheck %s + +; CHECK-LABEL: ret_void_args_struct_i8_i32 +define void @ret_void_args_struct_i8_i32({ i8, i32 } %a) { +start: + ; CHECK: sts 4, r20 + %0 = extractvalue { i8, i32 } %a, 0 + store volatile i8 %0, i8* inttoptr (i64 4 to i8*) + + ; CHECK-NEXT: sts 8, r24 + ; CHECK-NEXT: sts 7, r23 + ; CHECK-NEXT: sts 6, r22 + ; CHECK-NEXT: sts 5, r21 + %1 = extractvalue { i8, i32 } %a, 1 + store volatile i32 %1, i32* inttoptr (i64 5 to i32*) + ret void +} + +; CHECK-LABEL: ret_void_args_struct_i8_i8_i8_i8 +define void @ret_void_args_struct_i8_i8_i8_i8({ i8, i8, i8, i8 } %a) { +start: + ; CHECK: sts 4, r22 + %0 = extractvalue { i8, i8, i8, i8 } %a, 0 + store volatile i8 %0, i8* inttoptr (i64 4 to i8*) + ; CHECK-NEXT: sts 5, r23 + %1 = extractvalue { i8, i8, i8, i8 } %a, 1 + store volatile i8 %1, i8* inttoptr (i64 5 to i8*) + ; CHECK-NEXT: sts 6, r24 + %2 = extractvalue { i8, i8, i8, i8 } %a, 2 + store volatile i8 %2, i8* inttoptr (i64 6 to i8*) + ; CHECK-NEXT: sts 7, r25 + %3 = extractvalue { i8, i8, i8, i8 } %a, 3 + store volatile i8 %3, i8* inttoptr (i64 7 to i8*) + ret void +} + +; CHECK-LABEL: ret_void_args_struct_i32_16_i8 +define void @ret_void_args_struct_i32_16_i8({ i32, i16, i8} %a) { +start: + ; CHECK: sts 7, r21 + ; CHECK-NEXT: sts 6, r20 + ; CHECK-NEXT: sts 5, r19 + ; CHECK-NEXT: sts 4, r18 + %0 = extractvalue { i32, i16, i8 } %a, 0 + store volatile i32 %0, i32* inttoptr (i64 4 to i32*) + + ; CHECK-NEXT: sts 5, r23 + ; CHECK-NEXT: sts 4, r22 + %1 = extractvalue { i32, i16, i8 } %a, 1 + store volatile i16 %1, i16* inttoptr (i64 4 to i16*) + + ; CHECK-NEXT: sts 4, r24 + %2 = extractvalue { i32, i16, i8 } %a, 2 + store volatile i8 %2, i8* inttoptr (i64 4 to i8*) + ret void +} + +; CHECK-LABEL: ret_void_args_struct_i8_i32_struct_i32_i8 +define void @ret_void_args_struct_i8_i32_struct_i32_i8({ i8, i32 } %a, { i32, i8 } %b) { +start: + ; CHECK: sts 4, r20 + %0 = extractvalue { i8, i32 } %a, 0 + store volatile i8 %0, i8* inttoptr (i64 4 to i8*) + + ; CHECK-NEXT: sts 8, r24 + ; CHECK-NEXT: sts 7, r23 + ; CHECK-NEXT: sts 6, r22 + ; CHECK-NEXT: sts 5, r21 + %1 = extractvalue { i8, i32 } %a, 1 + store volatile i32 %1, i32* inttoptr (i64 5 to i32*) + + ; CHECK-NEXT: sts 9, r17 + ; CHECK-NEXT: sts 8, r16 + ; CHECK-NEXT: sts 7, r15 + ; CHECK-NEXT: sts 6, r14 + %2 = extractvalue { i32, i8 } %b, 0 + store volatile i32 %2, i32* inttoptr (i64 6 to i32*) + + ; CHECK-NEXT: sts 7, r18 + %3 = extractvalue { i32, i8 } %b, 1 + store volatile i8 %3, i8* inttoptr (i64 7 to i8*) + ret void +} + diff --git a/llvm/test/CodeGen/AVR/calling-conv/c/call.ll b/llvm/test/CodeGen/AVR/calling-conv/c/call.ll new file mode 100644 index 00000000000000..e218ed37238a35 --- /dev/null +++ b/llvm/test/CodeGen/AVR/calling-conv/c/call.ll @@ -0,0 +1,89 @@ +; RUN: llc < %s -march=avr | FileCheck %s + +declare void @ret_void_args_i8(i8 %a) +declare void @ret_void_args_i8_i32(i8 %a, i32 %b) +declare void @ret_void_args_i8_i8_i8_i8(i8 %a, i8 %b, i8 %c, i8 %d) +declare void @ret_void_args_i32_i16_i8(i32 %a, i16 %b, i8 %c) +declare void @ret_void_args_i64(i64 %a) +declare void @ret_void_args_i64_i64(i64 %a, i64 %b) +declare void @ret_void_args_i64_i64_i16(i64 %a, i64 %b, i16 %c) + +; CHECK-LABEL: call_void_args_i8 +define void @call_void_args_i8() { + ; CHECK: ldi r24, 64 + call void @ret_void_args_i8 (i8 64) + ret void +} + +; CHECK-LABEL: call_void_args_i8_i32 +define void @call_void_args_i8_i32() { + ; CHECK: ldi r20, 4 + ; CHECK-NEXT: ldi r21, 3 + ; CHECK-NEXT: ldi r22, 2 + ; CHECK-NEXT: ldi r23, 1 + ; CHECK-NEXT: ldi r24, 64 + call void @ret_void_args_i8_i32 (i8 64, i32 16909060) + ret void +} + +; CHECK-LABEL: call_void_args_i8_i8_i8_i8 +define void @call_void_args_i8_i8_i8_i8() { + ; CHECK: ldi r24, 1 + ; CHECK-NEXT: ldi r22, 2 + ; CHECK-NEXT: ldi r20, 3 + ; CHECK-NEXT: ldi r18, 4 + call void @ret_void_args_i8_i8_i8_i8(i8 1, i8 2, i8 3, i8 4) + ret void +} + +; CHECK-LABEL: call_void_args_i32_i16_i8 +define void @call_void_args_i32_i16_i8() { + ; CHECK: ldi r22, 4 + ; CHECK-NEXT: ldi r23, 3 + ; CHECK-NEXT: ldi r24, 2 + ; CHECK-NEXT: ldi r25, 1 + ; CHECK-NEXT: ldi r20, 1 + ; CHECK-NEXT: ldi r21, 4 + ; CHECK-NEXT: ldi r18, 64 + call void @ret_void_args_i32_i16_i8(i32 16909060, i16 1025, i8 64) + ret void +} + +; CHECK-LABEL: call_void_args_i64 +define void @call_void_args_i64() { + ; CHECK: ldi r18, 8 + ; CHECK-NEXT: ldi r19, 7 + ; CHECK-NEXT: ldi r20, 6 + ; CHECK-NEXT: ldi r21, 5 + ; CHECK-NEXT: ldi r22, 4 + ; CHECK-NEXT: ldi r23, 3 + ; CHECK-NEXT: ldi r24, 2 + ; CHECK-NEXT: ldi r25, 1 + call void @ret_void_args_i64(i64 72623859790382856) + ret void +} + +; CHECK-LABEL: call_void_args_i64_i64 +define void @call_void_args_i64_i64() { + ; CHECK: ldi r18, 8 + ; CHECK-NEXT: ldi r19, 7 + ; CHECK-NEXT: ldi r20, 6 + ; CHECK-NEXT: ldi r21, 5 + ; CHECK-NEXT: ldi r22, 4 + ; CHECK-NEXT: ldi r23, 3 + ; CHECK-NEXT: ldi r24, 2 + ; CHECK-NEXT: ldi r25, 1 + ; the second arg is in r10:r17, but unordered + ; CHECK: r17, + ; CHECK: r10, + call void @ret_void_args_i64_i64(i64 72623859790382856, i64 651345242494996224) + ret void +} + +; CHECK-LABEL: call_void_args_i64_i64_i16 +define void @call_void_args_i64_i64_i16() { + ; CHECK: r8, + ; CHECK: r9, + call void @ret_void_args_i64_i64_i16(i64 72623859790382856, i64 651345242494996224, i16 5655) + ret void +} diff --git a/llvm/test/CodeGen/AVR/calling-conv/c/call_aggr.ll b/llvm/test/CodeGen/AVR/calling-conv/c/call_aggr.ll new file mode 100644 index 00000000000000..aeba3c8f9757a5 --- /dev/null +++ b/llvm/test/CodeGen/AVR/calling-conv/c/call_aggr.ll @@ -0,0 +1,48 @@ +; RUN: llc < %s -march=avr | FileCheck %s + +declare void @ret_void_args_struct_i8_i32({ i8, i32 } %a) +declare void @ret_void_args_struct_i8_i8_i8_i8({ i8, i8, i8, i8 } %a) +declare void @ret_void_args_struct_i32_i16_i8({ i32, i16, i8} %a) +declare void @ret_void_args_struct_i8_i32_struct_i32_i8({ i8, i32 } %a, { i32, i8 } %b) + +; CHECK-LABEL: call_void_args_struct_i8_i32 +define void @call_void_args_struct_i8_i32() { + ; CHECK: ldi r20, 64 + ; CHECK-NEXT: r21, + ; CHECK-NEXT: r22, + ; CHECK-NEXT: r23, + ; CHECK-NEXT: r24, + call void @ret_void_args_struct_i8_i32({ i8, i32 } { i8 64, i32 16909060 }) + ret void +} + +; CHECK-LABEL: @call_void_args_struct_i8_i8_i8_i8 +define void @call_void_args_struct_i8_i8_i8_i8() { + ; CHECK: ldi r22, 1 + ; CHECK-NEXT: ldi r23, 2 + ; CHECK-NEXT: ldi r24, 3 + ; CHECK-NEXT: ldi r25, 4 + call void @ret_void_args_struct_i8_i8_i8_i8({ i8, i8, i8, i8 } { i8 1, i8 2, i8 3, i8 4 }) + ret void +} + +; CHECK-LABEL: @call_void_args_struct_i32_i16_i8 +define void @call_void_args_struct_i32_i16_i8() { + ; CHECK: ldi r18, 4 + ; CHECK-NEXT: ldi r19, 3 + ; CHECK-NEXT: ldi r20, 2 + ; CHECK-NEXT: ldi r21, 1 + ; CHECK-NEXT: ldi r22, 23 + ; CHECK-NEXT: ldi r23, 22 + ; CHECK-NEXT: ldi r24, 64 + call void @ret_void_args_struct_i32_i16_i8({ i32, i16, i8 } { i32 16909060, i16 5655, i8 64 }) + ret void +} + +; CHECK-LABEL: @call_void_args_struct_i8_i32_struct_i32_i8 +define void @call_void_args_struct_i8_i32_struct_i32_i8() { + ; CHECK: ldi r20, 64 + ; CHECK: ldi r18, 65 + call void @ret_void_args_struct_i8_i32_struct_i32_i8({ i8, i32 } { i8 64, i32 16909060 }, { i32, i8 } { i32 287454020, i8 65 }) + ret void +} diff --git a/llvm/test/CodeGen/AVR/calling-conv/c/return_aggr.ll b/llvm/test/CodeGen/AVR/calling-conv/c/return_aggr.ll new file mode 100644 index 00000000000000..97668f6c05733a --- /dev/null +++ b/llvm/test/CodeGen/AVR/calling-conv/c/return_aggr.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -march=avr | FileCheck %s + +; CHECK-LABEL: ret_struct_i8_i16_i8 +define { i8, i16, i8 } @ret_struct_i8_i16_i8() { +start: + ; for some reason the i16 is loaded to r24:r25 + ; and then moved to r23:r24 + ; CHECK: ldi r22, 64 + ; CHECK-NEXT: r23, + ; CHECK-NEXT: r24, + ; CHECK-NEXT: r25, 11 + %0 = insertvalue {i8, i16, i8} undef, i8 64, 0 + %1 = insertvalue {i8, i16, i8} %0, i16 1024, 1 + %2 = insertvalue {i8, i16, i8} %1, i8 11, 2 + ret {i8, i16, i8} %2 +} + +; CHECK-LABEL: ret_struct_i32_i16 +define { i32, i16 } @ret_struct_i32_i16() { +start: + ; CHECK: ldi r18, 4 + ; CHECK-NEXT: ldi r19, 3 + ; CHECK-NEXT: ldi r20, 2 + ; CHECK-NEXT: ldi r21, 1 + ; CHECK-NEXT: ldi r22, 0 + ; CHECK-NEXT: ldi r23, 8 + %0 = insertvalue { i32, i16 } undef, i32 16909060, 0 + %1 = insertvalue { i32, i16 } %0, i16 2048, 1 + ret { i32, i16} %1 +} +