From bff748956d6209afe47725ef4cdb10923dd0b2f7 Mon Sep 17 00:00:00 2001 From: Torben Hansen <50673096+torben-hansen@users.noreply.github.com> Date: Sat, 20 Apr 2024 14:09:43 -0700 Subject: [PATCH] Support vpinsrq in delocater --- util/fipstools/delocate/delocate.go | 41 ++++++++- util/fipstools/delocate/delocate_test.go | 1 + .../delocate/testdata/x86_64-FourArg/in.s | 13 +++ .../delocate/testdata/x86_64-FourArg/out.s | 85 +++++++++++++++++++ 4 files changed, 138 insertions(+), 2 deletions(-) create mode 100644 util/fipstools/delocate/testdata/x86_64-FourArg/in.s create mode 100644 util/fipstools/delocate/testdata/x86_64-FourArg/out.s diff --git a/util/fipstools/delocate/delocate.go b/util/fipstools/delocate/delocate.go index 4a55f6b3fb..257890fbad 100644 --- a/util/fipstools/delocate/delocate.go +++ b/util/fipstools/delocate/delocate.go @@ -1189,6 +1189,8 @@ const ( instrMemoryVectorCombine // instrThreeArg merges two sources into a destination in some fashion. instrThreeArg + // instrFourArg merges three sources into a destination in some fashion. + instrFourArg // instrCompare takes two arguments and writes outputs to the flags register. instrCompare instrOther @@ -1197,7 +1199,7 @@ const ( func (index instructionType) String() string { return [...]string{"instrPush", "instrMove", "instrTransformingMove", "instrJump", "instrConditionalMove", "instrCombine", - "instrMemoryVectorCombine", "instrThreeArg", + "instrMemoryVectorCombine", "instrThreeArg", "instrFourArg", "instrCompare", "instrOther"}[index] } @@ -1238,6 +1240,11 @@ func classifyInstruction(instr string, args []*node32) instructionType { return instrThreeArg } + case "vpinsrq": + if len(args) == 4 { + return instrFourArg + } + case "vpbroadcastq": if len(args) == 2 { return instrTransformingMove @@ -1346,6 +1353,13 @@ func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest s } } +func fourArgCombineOp(w stringWriter, instructionName, source1, source2, source3, dest string) wrapperFunc { + return func(k func()) { + k() + w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + source3 + ", " + dest + "\n") + } +} + func memoryVectorCombineOp(w stringWriter, instructionName, source, dest string) wrapperFunc { return func(k func()) { k() @@ -1484,7 +1498,7 @@ Args: } classification := classifyInstruction(instructionName, argNodes) - if classification != instrThreeArg && classification != instrCompare && i != 0 { + if classification != instrFourArg && classification != instrThreeArg && classification != instrCompare && i != 0 { return nil, fmt.Errorf("GOT access must be source operand, %s", classification) } @@ -1565,6 +1579,29 @@ Args: wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg)) } targetReg = tempReg + case instrFourArg: + if n := len(argNodes); n != 4 { + return nil, fmt.Errorf("four-argument instruction has %d arguments", n) + } + // Only support vpinsrq where the second argument is the GOT reloc. + if i != 1 { + return nil, errors.New("GOT access must be from source operand") + } + + // vpinsrq imm8, r64/m64, xmm2, xmm1 + targetReg = d.contents(argNodes[3]) + otherSource := d.contents(argNodes[2]) + gotSource := d.contents(argNodes[1]) + immediate := d.contents(argNodes[0]) + + // Choose free register and prepare stack. + saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, gotSource}) + redzoneCleared = true + wrappers = append(wrappers, saveRegWrapper) + + // Rewrite instruction arguments to use the free register. + wrappers = append(wrappers, fourArgCombineOp(d.output, instructionName, immediate, tempReg, otherSource, targetReg)) + targetReg = tempReg default: return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName) } diff --git a/util/fipstools/delocate/delocate_test.go b/util/fipstools/delocate/delocate_test.go index 7da74eefe9..0c12feb400 100644 --- a/util/fipstools/delocate/delocate_test.go +++ b/util/fipstools/delocate/delocate_test.go @@ -55,6 +55,7 @@ var delocateTests = []delocateTest{ {"x86_64-LabelRewrite", nil, []string{"in1.s", "in2.s"}, "out.s", true}, {"x86_64-Sections", nil, []string{"in.s"}, "out.s", true}, {"x86_64-ThreeArg", nil, []string{"in.s"}, "out.s", true}, + {"x86_64-FourArg", nil, []string{"in.s"}, "out.s", true}, {"aarch64-Basic", nil, []string{"in.s"}, "out.s", true}, } diff --git a/util/fipstools/delocate/testdata/x86_64-FourArg/in.s b/util/fipstools/delocate/testdata/x86_64-FourArg/in.s new file mode 100644 index 0000000000..a5551412dd --- /dev/null +++ b/util/fipstools/delocate/testdata/x86_64-FourArg/in.s @@ -0,0 +1,13 @@ + .type foo, @function + .globl foo +foo: + movq %rbx, %rbx # instruction allowing delocator to detect architecture + vpinsrq $0x08, kBoringSSLRSASqrtTwo@GOTPCREL(%rip), %xmm1, %xmm0 + vpinsrq $1, fooExternal@GOTPCREL(%rip), %xmm14, %xmm15 + + .type kBoringSSLRSASqrtTwo,@object # @kBoringSSLRSASqrtTwo + .section .rodata,"a",@progbits,unique,760 + .globl kBoringSSLRSASqrtTwo + .p2align 4 +kBoringSSLRSASqrtTwo: + .quad -2404814165548301886 # 0xdea06241f7aa81c2 diff --git a/util/fipstools/delocate/testdata/x86_64-FourArg/out.s b/util/fipstools/delocate/testdata/x86_64-FourArg/out.s new file mode 100644 index 0000000000..f1e47fb811 --- /dev/null +++ b/util/fipstools/delocate/testdata/x86_64-FourArg/out.s @@ -0,0 +1,85 @@ +.text +.file 1 "inserted_by_delocate.c" +.loc 1 1 0 +BORINGSSL_bcm_text_start: + .type foo, @function + .globl foo +.Lfoo_local_target: +foo: + movq %rbx, %rbx # instruction allowing delocator to detect architecture +# WAS vpinsrq $0x08, kBoringSSLRSASqrtTwo@GOTPCREL(%rip), %xmm1, %xmm0 + leaq -128(%rsp), %rsp + pushq %rax + leaq .LkBoringSSLRSASqrtTwo_local_target(%rip), %rax + vpinsrq $0x08, %rax, %xmm1, %xmm0 + popq %rax + leaq 128(%rsp), %rsp +# WAS vpinsrq $1, fooExternal@GOTPCREL(%rip), %xmm14, %xmm15 + leaq -128(%rsp), %rsp + pushq %rax + pushf + leaq fooExternal_GOTPCREL_external(%rip), %rax + addq (%rax), %rax + movq (%rax), %rax + popf + vpinsrq $1, %rax, %xmm14, %xmm15 + popq %rax + leaq 128(%rsp), %rsp + + .type kBoringSSLRSASqrtTwo,@object # @kBoringSSLRSASqrtTwo +# WAS .section .rodata,"a",@progbits,unique,760 +.text + .globl kBoringSSLRSASqrtTwo + .p2align 4 +.LkBoringSSLRSASqrtTwo_local_target: +kBoringSSLRSASqrtTwo: + .quad -2404814165548301886 # 0xdea06241f7aa81c2 +.text +.loc 1 2 0 +BORINGSSL_bcm_text_end: +.type fooExternal_GOTPCREL_external, @object +.size fooExternal_GOTPCREL_external, 8 +fooExternal_GOTPCREL_external: + .long fooExternal@GOTPCREL + .long 0 +.type OPENSSL_ia32cap_get, @function +.globl OPENSSL_ia32cap_get +.LOPENSSL_ia32cap_get_local_target: +OPENSSL_ia32cap_get: + leaq OPENSSL_ia32cap_P(%rip), %rax + ret +.type BORINGSSL_bcm_text_hash, @object +.size BORINGSSL_bcm_text_hash, 32 +BORINGSSL_bcm_text_hash: +.byte 0xae +.byte 0x2c +.byte 0xea +.byte 0x2a +.byte 0xbd +.byte 0xa6 +.byte 0xf3 +.byte 0xec +.byte 0x97 +.byte 0x7f +.byte 0x9b +.byte 0xf6 +.byte 0x94 +.byte 0x9a +.byte 0xfc +.byte 0x83 +.byte 0x68 +.byte 0x27 +.byte 0xcb +.byte 0xa0 +.byte 0xa0 +.byte 0x9f +.byte 0x6b +.byte 0x6f +.byte 0xde +.byte 0x52 +.byte 0xcd +.byte 0xe2 +.byte 0xcd +.byte 0xff +.byte 0x31 +.byte 0x80