From 51e14db0fd10c2113d73fbc020eef62e68266bee Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Nov 2024 21:11:33 +0000 Subject: [PATCH] Bump github.com/onsi/gomega from 1.34.2 to 1.35.1 Bumps [github.com/onsi/gomega](https://github.com/onsi/gomega) from 1.34.2 to 1.35.1. - [Release notes](https://github.com/onsi/gomega/releases) - [Changelog](https://github.com/onsi/gomega/blob/master/CHANGELOG.md) - [Commits](https://github.com/onsi/gomega/compare/v1.34.2...v1.35.1) --- updated-dependencies: - dependency-name: github.com/onsi/gomega dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- go.mod | 14 +- go.sum | 28 +- vendor/github.com/onsi/gomega/CHANGELOG.md | 20 + vendor/github.com/onsi/gomega/gomega_dsl.go | 26 +- .../onsi/gomega/internal/async_assertion.go | 12 +- .../onsi/gomega/internal/duration_bundle.go | 17 +- .../github.com/onsi/gomega/internal/gomega.go | 8 + .../gomega/internal/polling_signal_error.go | 11 + .../onsi/gomega/matchers/have_field.go | 36 +- vendor/github.com/onsi/gomega/types/types.go | 2 + .../golang.org/x/crypto/argon2/blamka_amd64.s | 2972 +++- .../x/crypto/blake2b/blake2bAVX2_amd64.s | 5167 ++++++- .../x/crypto/blake2b/blake2b_amd64.s | 1681 ++- .../chacha20poly1305/chacha20poly1305_amd64.s | 11503 +++++++++++++--- .../x/crypto/internal/poly1305/sum_amd64.s | 133 +- .../x/crypto/salsa20/salsa/salsa20_amd64.s | 1742 +-- vendor/golang.org/x/crypto/sha3/shake.go | 4 +- vendor/golang.org/x/crypto/ssh/server.go | 4 +- vendor/golang.org/x/net/http2/config.go | 122 + vendor/golang.org/x/net/http2/config_go124.go | 61 + .../x/net/http2/config_pre_go124.go | 16 + vendor/golang.org/x/net/http2/http2.go | 53 +- vendor/golang.org/x/net/http2/server.go | 181 +- vendor/golang.org/x/net/http2/transport.go | 143 +- vendor/golang.org/x/net/http2/write.go | 10 + vendor/golang.org/x/sys/cpu/cpu.go | 19 + .../golang.org/x/sys/cpu/cpu_linux_noinit.go | 2 +- .../golang.org/x/sys/cpu/cpu_linux_riscv64.go | 137 + vendor/golang.org/x/sys/cpu/cpu_riscv64.go | 11 +- vendor/golang.org/x/sys/unix/README.md | 2 +- vendor/golang.org/x/sys/unix/mkerrors.sh | 5 +- vendor/golang.org/x/sys/unix/syscall_aix.go | 2 +- .../golang.org/x/sys/unix/syscall_darwin.go | 37 + vendor/golang.org/x/sys/unix/syscall_hurd.go | 1 + vendor/golang.org/x/sys/unix/syscall_linux.go | 63 +- .../x/sys/unix/syscall_linux_arm64.go | 2 + .../x/sys/unix/syscall_linux_loong64.go | 2 + .../x/sys/unix/syscall_linux_riscv64.go | 2 + .../golang.org/x/sys/unix/vgetrandom_linux.go | 13 + .../x/sys/unix/vgetrandom_unsupported.go | 11 + .../x/sys/unix/zerrors_darwin_amd64.go | 7 + .../x/sys/unix/zerrors_darwin_arm64.go | 7 + vendor/golang.org/x/sys/unix/zerrors_linux.go | 13 +- .../x/sys/unix/zerrors_linux_386.go | 5 + .../x/sys/unix/zerrors_linux_amd64.go | 5 + .../x/sys/unix/zerrors_linux_arm.go | 5 + .../x/sys/unix/zerrors_linux_arm64.go | 5 + .../x/sys/unix/zerrors_linux_loong64.go | 5 + .../x/sys/unix/zerrors_linux_mips.go | 5 + .../x/sys/unix/zerrors_linux_mips64.go | 5 + .../x/sys/unix/zerrors_linux_mips64le.go | 5 + .../x/sys/unix/zerrors_linux_mipsle.go | 5 + .../x/sys/unix/zerrors_linux_ppc.go | 5 + .../x/sys/unix/zerrors_linux_ppc64.go | 5 + .../x/sys/unix/zerrors_linux_ppc64le.go | 5 + .../x/sys/unix/zerrors_linux_riscv64.go | 5 + .../x/sys/unix/zerrors_linux_s390x.go | 5 + .../x/sys/unix/zerrors_linux_sparc64.go | 5 + .../x/sys/unix/zerrors_zos_s390x.go | 2 + .../x/sys/unix/zsyscall_darwin_amd64.go | 20 + .../x/sys/unix/zsyscall_darwin_amd64.s | 5 + .../x/sys/unix/zsyscall_darwin_arm64.go | 20 + .../x/sys/unix/zsyscall_darwin_arm64.s | 5 + .../golang.org/x/sys/unix/zsyscall_linux.go | 17 - .../x/sys/unix/zsysnum_linux_amd64.go | 1 + .../x/sys/unix/zsysnum_linux_arm64.go | 2 +- .../x/sys/unix/zsysnum_linux_loong64.go | 2 + .../x/sys/unix/zsysnum_linux_riscv64.go | 2 +- .../x/sys/unix/ztypes_darwin_amd64.go | 13 + .../x/sys/unix/ztypes_darwin_arm64.go | 13 + .../x/sys/unix/ztypes_freebsd_386.go | 1 + .../x/sys/unix/ztypes_freebsd_amd64.go | 1 + .../x/sys/unix/ztypes_freebsd_arm.go | 1 + .../x/sys/unix/ztypes_freebsd_arm64.go | 1 + .../x/sys/unix/ztypes_freebsd_riscv64.go | 1 + vendor/golang.org/x/sys/unix/ztypes_linux.go | 90 +- .../x/sys/unix/ztypes_linux_riscv64.go | 33 + .../golang.org/x/sys/windows/dll_windows.go | 2 +- .../x/sys/windows/syscall_windows.go | 4 + .../golang.org/x/sys/windows/types_windows.go | 1 + .../x/sys/windows/zsyscall_windows.go | 38 + vendor/golang.org/x/term/term_windows.go | 1 + .../protobuf/encoding/protojson/decode.go | 2 +- .../protobuf/encoding/protojson/encode.go | 4 +- .../protobuf/internal/descopts/options.go | 20 +- .../internal/editionssupport/editions.go | 2 +- .../protobuf/internal/filedesc/desc.go | 4 + .../protobuf/internal/filedesc/desc_init.go | 2 + .../protobuf/internal/filedesc/desc_lazy.go | 2 + .../protobuf/internal/filedesc/editions.go | 2 +- .../protobuf/internal/genid/doc.go | 2 +- .../internal/genid/go_features_gen.go | 15 +- .../protobuf/internal/genid/map_entry.go | 2 +- .../protobuf/internal/genid/wrappers.go | 2 +- .../protobuf/internal/impl/codec_extension.go | 11 +- .../protobuf/internal/impl/codec_field.go | 3 + .../protobuf/internal/impl/codec_message.go | 3 + .../protobuf/internal/impl/codec_reflect.go | 210 - .../protobuf/internal/impl/codec_unsafe.go | 3 - .../protobuf/internal/impl/convert.go | 2 +- .../protobuf/internal/impl/encode.go | 2 +- .../protobuf/internal/impl/equal.go | 224 + .../internal/impl/legacy_extension.go | 1 + .../protobuf/internal/impl/message.go | 4 +- .../protobuf/internal/impl/pointer_reflect.go | 215 - .../protobuf/internal/impl/pointer_unsafe.go | 3 - .../protobuf/internal/strs/strings_pure.go | 28 - .../internal/strs/strings_unsafe_go120.go | 3 +- .../internal/strs/strings_unsafe_go121.go | 3 +- .../protobuf/internal/version/version.go | 4 +- .../google.golang.org/protobuf/proto/equal.go | 9 + .../protobuf/proto/extension.go | 71 + .../protobuf/reflect/protodesc/desc_init.go | 4 + .../protobuf/reflect/protodesc/editions.go | 2 +- .../protobuf/reflect/protoreflect/methods.go | 10 + .../reflect/protoreflect/value_pure.go | 60 - .../protoreflect/value_unsafe_go120.go | 3 +- .../protoreflect/value_unsafe_go121.go | 3 +- .../protobuf/runtime/protoiface/methods.go | 18 + .../types/descriptorpb/descriptor.pb.go | 748 +- .../types/gofeaturespb/go_features.pb.go | 24 +- .../protobuf/types/known/anypb/any.pb.go | 24 +- .../types/known/durationpb/duration.pb.go | 24 +- .../protobuf/types/known/emptypb/empty.pb.go | 24 +- .../types/known/fieldmaskpb/field_mask.pb.go | 24 +- .../types/known/structpb/struct.pb.go | 110 +- .../types/known/timestamppb/timestamp.pb.go | 24 +- .../types/known/wrapperspb/wrappers.pb.go | 200 +- vendor/modules.txt | 16 +- 129 files changed, 20682 insertions(+), 6147 deletions(-) create mode 100644 vendor/golang.org/x/net/http2/config.go create mode 100644 vendor/golang.org/x/net/http2/config_go124.go create mode 100644 vendor/golang.org/x/net/http2/config_pre_go124.go create mode 100644 vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go create mode 100644 vendor/golang.org/x/sys/unix/vgetrandom_linux.go create mode 100644 vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go delete mode 100644 vendor/google.golang.org/protobuf/internal/impl/codec_reflect.go create mode 100644 vendor/google.golang.org/protobuf/internal/impl/equal.go delete mode 100644 vendor/google.golang.org/protobuf/internal/impl/pointer_reflect.go delete mode 100644 vendor/google.golang.org/protobuf/internal/strs/strings_pure.go delete mode 100644 vendor/google.golang.org/protobuf/reflect/protoreflect/value_pure.go diff --git a/go.mod b/go.mod index 8852205b..724a0004 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ toolchain go1.22.4 require ( github.com/cli/go-gh/v2 v2.11.0 github.com/go-errors/errors v1.5.1 - github.com/onsi/gomega v1.34.2 + github.com/onsi/gomega v1.35.1 github.com/sigstore/cosign/v2 v2.2.4 github.com/spf13/cobra v1.8.1 github.com/spf13/pflag v1.0.5 @@ -250,22 +250,22 @@ require ( go.step.sm/crypto v0.44.2 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect - golang.org/x/crypto v0.26.0 // indirect + golang.org/x/crypto v0.28.0 // indirect golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect golang.org/x/mod v0.19.0 // indirect - golang.org/x/net v0.28.0 // indirect + golang.org/x/net v0.30.0 // indirect golang.org/x/oauth2 v0.22.0 // indirect golang.org/x/sync v0.8.0 // indirect - golang.org/x/sys v0.24.0 // indirect - golang.org/x/term v0.23.0 // indirect - golang.org/x/text v0.17.0 // indirect + golang.org/x/sys v0.26.0 // indirect + golang.org/x/term v0.25.0 // indirect + golang.org/x/text v0.19.0 // indirect golang.org/x/time v0.5.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/api v0.181.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142 // indirect google.golang.org/grpc v1.67.0 // indirect - google.golang.org/protobuf v1.34.2 // indirect + google.golang.org/protobuf v1.35.1 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect k8s.io/api v0.30.3 // indirect diff --git a/go.sum b/go.sum index 42482383..92e4dc73 100644 --- a/go.sum +++ b/go.sum @@ -666,8 +666,8 @@ github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7J github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= github.com/onsi/gomega v1.17.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= github.com/onsi/gomega v1.19.0/go.mod h1:LY+I3pBVzYsTBU1AnDwOSxaYi9WoWiqgwooUqq9yPro= -github.com/onsi/gomega v1.34.2 h1:pNCwDkzrsv7MS9kpaQvVb1aVLahQXyJ/Tv5oAZMI3i8= -github.com/onsi/gomega v1.34.2/go.mod h1:v1xfxRgk0KIsG+QOdm7p8UosrOzPYRo60fd3B/1Dukc= +github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4= +github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= github.com/open-policy-agent/opa v0.63.0 h1:ztNNste1v8kH0/vJMJNquE45lRvqwrM5mY9Ctr9xIXw= github.com/open-policy-agent/opa v0.63.0/go.mod h1:9VQPqEfoB2N//AToTxzZ1pVTVPUoF2Mhd64szzjWPpU= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= @@ -940,8 +940,8 @@ golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58 golang.org/x/crypto v0.7.0/go.mod h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU= golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= -golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= -golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= +golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= +golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -1026,8 +1026,8 @@ golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.11.0/go.mod h1:2L/ixqYpgIVXmeoSA/4Lu7BzTG4KIyPIryS4IsOd1oQ= -golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= -golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= +golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= +golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -1111,8 +1111,8 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= -golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= @@ -1121,8 +1121,8 @@ golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/term v0.9.0/go.mod h1:M6DEAAIenWoTxdKrOltXcmDY3rSplQUkrvaDU5FcQyo= golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= -golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= -golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= +golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= +golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -1136,8 +1136,8 @@ golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= -golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= +golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -1290,8 +1290,8 @@ google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp0 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= -google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/vendor/github.com/onsi/gomega/CHANGELOG.md b/vendor/github.com/onsi/gomega/CHANGELOG.md index 7972bbc3..9f6090b8 100644 --- a/vendor/github.com/onsi/gomega/CHANGELOG.md +++ b/vendor/github.com/onsi/gomega/CHANGELOG.md @@ -1,3 +1,23 @@ +## 1.35.1 + +### Fixes +- Export EnforceDefaultTimeoutsWhenUsingContexts and DisableDefaultTimeoutsWhenUsingContext [ca36da1] + +## 1.35.0 + +### Features + +- You can now call `EnforceDefaultTimeoutsWhenUsingContexts()` to have `Eventually` honor the default timeout when passed a context. (prior to this you had to expclility add a timeout) [e4c4265] +- You can call `StopTrying(message).Successfully()` to abort a `Consistently` early without failure [eeca931] + +### Fixes + +- Stop memoizing the result of `HaveField` to avoid unexpected errors when used with async assertions. [3bdbc4e] + +### Maintenance + +- Bump all dependencies [a05a416] + ## 1.34.2 Require Go 1.22+ diff --git a/vendor/github.com/onsi/gomega/gomega_dsl.go b/vendor/github.com/onsi/gomega/gomega_dsl.go index edacf8c1..1038d7dd 100644 --- a/vendor/github.com/onsi/gomega/gomega_dsl.go +++ b/vendor/github.com/onsi/gomega/gomega_dsl.go @@ -22,7 +22,7 @@ import ( "github.com/onsi/gomega/types" ) -const GOMEGA_VERSION = "1.34.2" +const GOMEGA_VERSION = "1.35.1" const nilGomegaPanic = `You are trying to make an assertion, but haven't registered Gomega's fail handler. If you're using Ginkgo then you probably forgot to put your assertion in an It(). @@ -319,7 +319,19 @@ you an also use Eventually().WithContext(ctx) to pass in the context. Passed-in Eventually(client.FetchCount).WithContext(ctx).WithArguments("/users").Should(BeNumerically(">=", 17)) }, SpecTimeout(time.Second)) -Either way the context passd to Eventually is also passed to the underlying function. Now, when Ginkgo cancels the context both the FetchCount client and Gomega will be informed and can exit. +Either way the context pasesd to Eventually is also passed to the underlying function. Now, when Ginkgo cancels the context both the FetchCount client and Gomega will be informed and can exit. + +By default, when a context is passed to Eventually *without* an explicit timeout, Gomega will rely solely on the context's cancellation to determine when to stop polling. If you want to specify a timeout in addition to the context you can do so using the .WithTimeout() method. For example: + + Eventually(client.FetchCount).WithContext(ctx).WithTimeout(10*time.Second).Should(BeNumerically(">=", 17)) + +now either the context cacnellation or the timeout will cause Eventually to stop polling. + +If, instead, you would like to opt out of this behavior and have Gomega's default timeouts govern Eventuallys that take a context you can call: + + EnforceDefaultTimeoutsWhenUsingContexts() + +in the DSL (or on a Gomega instance). Now all calls to Eventually that take a context will fail if eitehr the context is cancelled or the default timeout elapses. **Category 3: Making assertions _in_ the function passed into Eventually** @@ -491,6 +503,16 @@ func SetDefaultConsistentlyPollingInterval(t time.Duration) { Default.SetDefaultConsistentlyPollingInterval(t) } +// EnforceDefaultTimeoutsWhenUsingContexts forces `Eventually` to apply a default timeout even when a context is provided. +func EnforceDefaultTimeoutsWhenUsingContexts() { + Default.EnforceDefaultTimeoutsWhenUsingContexts() +} + +// DisableDefaultTimeoutsWhenUsingContext disables the default timeout when a context is provided to `Eventually`. +func DisableDefaultTimeoutsWhenUsingContext() { + Default.DisableDefaultTimeoutsWhenUsingContext() +} + // AsyncAssertion is returned by Eventually and Consistently and polls the actual value passed into Eventually against // the matcher passed to the Should and ShouldNot methods. // diff --git a/vendor/github.com/onsi/gomega/internal/async_assertion.go b/vendor/github.com/onsi/gomega/internal/async_assertion.go index cde9e2ec..8b4cd1f5 100644 --- a/vendor/github.com/onsi/gomega/internal/async_assertion.go +++ b/vendor/github.com/onsi/gomega/internal/async_assertion.go @@ -335,7 +335,7 @@ func (assertion *AsyncAssertion) afterTimeout() <-chan time.Time { if assertion.asyncType == AsyncAssertionTypeConsistently { return time.After(assertion.g.DurationBundle.ConsistentlyDuration) } else { - if assertion.ctx == nil { + if assertion.ctx == nil || assertion.g.DurationBundle.EnforceDefaultTimeoutsWhenUsingContexts { return time.After(assertion.g.DurationBundle.EventuallyTimeout) } else { return nil @@ -496,7 +496,15 @@ func (assertion *AsyncAssertion) match(matcher types.GomegaMatcher, desiredMatch for _, err := range []error{actualErr, matcherErr} { if pollingSignalErr, ok := AsPollingSignalError(err); ok { if pollingSignalErr.IsStopTrying() { - fail("Told to stop trying") + if pollingSignalErr.IsSuccessful() { + if assertion.asyncType == AsyncAssertionTypeEventually { + fail("Told to stop trying (and ignoring call to Successfully(), as it is only relevant with Consistently)") + } else { + return true // early escape hatch for Consistently + } + } else { + fail("Told to stop trying") + } return false } if pollingSignalErr.IsTryAgainAfter() { diff --git a/vendor/github.com/onsi/gomega/internal/duration_bundle.go b/vendor/github.com/onsi/gomega/internal/duration_bundle.go index 6e0d90d3..2e026c33 100644 --- a/vendor/github.com/onsi/gomega/internal/duration_bundle.go +++ b/vendor/github.com/onsi/gomega/internal/duration_bundle.go @@ -8,10 +8,11 @@ import ( ) type DurationBundle struct { - EventuallyTimeout time.Duration - EventuallyPollingInterval time.Duration - ConsistentlyDuration time.Duration - ConsistentlyPollingInterval time.Duration + EventuallyTimeout time.Duration + EventuallyPollingInterval time.Duration + ConsistentlyDuration time.Duration + ConsistentlyPollingInterval time.Duration + EnforceDefaultTimeoutsWhenUsingContexts bool } const ( @@ -20,15 +21,19 @@ const ( ConsistentlyDurationEnvVarName = "GOMEGA_DEFAULT_CONSISTENTLY_DURATION" ConsistentlyPollingIntervalEnvVarName = "GOMEGA_DEFAULT_CONSISTENTLY_POLLING_INTERVAL" + + EnforceDefaultTimeoutsWhenUsingContextsEnvVarName = "GOMEGA_ENFORCE_DEFAULT_TIMEOUTS_WHEN_USING_CONTEXTS" ) func FetchDefaultDurationBundle() DurationBundle { + _, EnforceDefaultTimeoutsWhenUsingContexts := os.LookupEnv(EnforceDefaultTimeoutsWhenUsingContextsEnvVarName) return DurationBundle{ EventuallyTimeout: durationFromEnv(EventuallyTimeoutEnvVarName, time.Second), EventuallyPollingInterval: durationFromEnv(EventuallyPollingIntervalEnvVarName, 10*time.Millisecond), - ConsistentlyDuration: durationFromEnv(ConsistentlyDurationEnvVarName, 100*time.Millisecond), - ConsistentlyPollingInterval: durationFromEnv(ConsistentlyPollingIntervalEnvVarName, 10*time.Millisecond), + ConsistentlyDuration: durationFromEnv(ConsistentlyDurationEnvVarName, 100*time.Millisecond), + ConsistentlyPollingInterval: durationFromEnv(ConsistentlyPollingIntervalEnvVarName, 10*time.Millisecond), + EnforceDefaultTimeoutsWhenUsingContexts: EnforceDefaultTimeoutsWhenUsingContexts, } } diff --git a/vendor/github.com/onsi/gomega/internal/gomega.go b/vendor/github.com/onsi/gomega/internal/gomega.go index de1f4f33..c6e2fcc0 100644 --- a/vendor/github.com/onsi/gomega/internal/gomega.go +++ b/vendor/github.com/onsi/gomega/internal/gomega.go @@ -127,3 +127,11 @@ func (g *Gomega) SetDefaultConsistentlyDuration(t time.Duration) { func (g *Gomega) SetDefaultConsistentlyPollingInterval(t time.Duration) { g.DurationBundle.ConsistentlyPollingInterval = t } + +func (g *Gomega) EnforceDefaultTimeoutsWhenUsingContexts() { + g.DurationBundle.EnforceDefaultTimeoutsWhenUsingContexts = true +} + +func (g *Gomega) DisableDefaultTimeoutsWhenUsingContext() { + g.DurationBundle.EnforceDefaultTimeoutsWhenUsingContexts = false +} diff --git a/vendor/github.com/onsi/gomega/internal/polling_signal_error.go b/vendor/github.com/onsi/gomega/internal/polling_signal_error.go index 83b04b1a..3a4f7ddd 100644 --- a/vendor/github.com/onsi/gomega/internal/polling_signal_error.go +++ b/vendor/github.com/onsi/gomega/internal/polling_signal_error.go @@ -17,6 +17,7 @@ type PollingSignalError interface { error Wrap(err error) PollingSignalError Attach(description string, obj any) PollingSignalError + Successfully() PollingSignalError Now() } @@ -45,6 +46,7 @@ type PollingSignalErrorImpl struct { wrappedErr error pollingSignalErrorType PollingSignalErrorType duration time.Duration + successful bool Attachments []PollingSignalErrorAttachment } @@ -73,6 +75,11 @@ func (s *PollingSignalErrorImpl) Unwrap() error { return s.wrappedErr } +func (s *PollingSignalErrorImpl) Successfully() PollingSignalError { + s.successful = true + return s +} + func (s *PollingSignalErrorImpl) Now() { panic(s) } @@ -81,6 +88,10 @@ func (s *PollingSignalErrorImpl) IsStopTrying() bool { return s.pollingSignalErrorType == PollingSignalErrorTypeStopTrying } +func (s *PollingSignalErrorImpl) IsSuccessful() bool { + return s.successful +} + func (s *PollingSignalErrorImpl) IsTryAgainAfter() bool { return s.pollingSignalErrorType == PollingSignalErrorTypeTryAgainAfter } diff --git a/vendor/github.com/onsi/gomega/matchers/have_field.go b/vendor/github.com/onsi/gomega/matchers/have_field.go index 6989f78c..8dd3f871 100644 --- a/vendor/github.com/onsi/gomega/matchers/have_field.go +++ b/vendor/github.com/onsi/gomega/matchers/have_field.go @@ -17,7 +17,7 @@ func (e missingFieldError) Error() string { return string(e) } -func extractField(actual interface{}, field string, matchername string) (interface{}, error) { +func extractField(actual interface{}, field string, matchername string) (any, error) { fields := strings.SplitN(field, ".", 2) actualValue := reflect.ValueOf(actual) @@ -64,36 +64,46 @@ func extractField(actual interface{}, field string, matchername string) (interfa type HaveFieldMatcher struct { Field string Expected interface{} +} - extractedField interface{} - expectedMatcher omegaMatcher +func (matcher *HaveFieldMatcher) expectedMatcher() omegaMatcher { + var isMatcher bool + expectedMatcher, isMatcher := matcher.Expected.(omegaMatcher) + if !isMatcher { + expectedMatcher = &EqualMatcher{Expected: matcher.Expected} + } + return expectedMatcher } func (matcher *HaveFieldMatcher) Match(actual interface{}) (success bool, err error) { - matcher.extractedField, err = extractField(actual, matcher.Field, "HaveField") + extractedField, err := extractField(actual, matcher.Field, "HaveField") if err != nil { return false, err } - var isMatcher bool - matcher.expectedMatcher, isMatcher = matcher.Expected.(omegaMatcher) - if !isMatcher { - matcher.expectedMatcher = &EqualMatcher{Expected: matcher.Expected} - } - - return matcher.expectedMatcher.Match(matcher.extractedField) + return matcher.expectedMatcher().Match(extractedField) } func (matcher *HaveFieldMatcher) FailureMessage(actual interface{}) (message string) { + extractedField, err := extractField(actual, matcher.Field, "HaveField") + if err != nil { + // this really shouldn't happen + return fmt.Sprintf("Failed to extract field '%s': %s", matcher.Field, err) + } message = fmt.Sprintf("Value for field '%s' failed to satisfy matcher.\n", matcher.Field) - message += matcher.expectedMatcher.FailureMessage(matcher.extractedField) + message += matcher.expectedMatcher().FailureMessage(extractedField) return message } func (matcher *HaveFieldMatcher) NegatedFailureMessage(actual interface{}) (message string) { + extractedField, err := extractField(actual, matcher.Field, "HaveField") + if err != nil { + // this really shouldn't happen + return fmt.Sprintf("Failed to extract field '%s': %s", matcher.Field, err) + } message = fmt.Sprintf("Value for field '%s' satisfied matcher, but should not have.\n", matcher.Field) - message += matcher.expectedMatcher.NegatedFailureMessage(matcher.extractedField) + message += matcher.expectedMatcher().NegatedFailureMessage(extractedField) return message } diff --git a/vendor/github.com/onsi/gomega/types/types.go b/vendor/github.com/onsi/gomega/types/types.go index 7c7adb94..30f2beed 100644 --- a/vendor/github.com/onsi/gomega/types/types.go +++ b/vendor/github.com/onsi/gomega/types/types.go @@ -29,6 +29,8 @@ type Gomega interface { SetDefaultEventuallyPollingInterval(time.Duration) SetDefaultConsistentlyDuration(time.Duration) SetDefaultConsistentlyPollingInterval(time.Duration) + EnforceDefaultTimeoutsWhenUsingContexts() + DisableDefaultTimeoutsWhenUsingContext() } // All Gomega matchers must implement the GomegaMatcher interface diff --git a/vendor/golang.org/x/crypto/argon2/blamka_amd64.s b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s index 6713acca..c3895478 100644 --- a/vendor/golang.org/x/crypto/argon2/blamka_amd64.s +++ b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s @@ -1,243 +1,2791 @@ -// Copyright 2017 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Code generated by command: go run blamka_amd64.go -out ../blamka_amd64.s -pkg argon2. DO NOT EDIT. //go:build amd64 && gc && !purego #include "textflag.h" -DATA ·c40<>+0x00(SB)/8, $0x0201000706050403 -DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b -GLOBL ·c40<>(SB), (NOPTR+RODATA), $16 - -DATA ·c48<>+0x00(SB)/8, $0x0100070605040302 -DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a -GLOBL ·c48<>(SB), (NOPTR+RODATA), $16 - -#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \ - MOVO v4, t1; \ - MOVO v5, v4; \ - MOVO t1, v5; \ - MOVO v6, t1; \ - PUNPCKLQDQ v6, t2; \ - PUNPCKHQDQ v7, v6; \ - PUNPCKHQDQ t2, v6; \ - PUNPCKLQDQ v7, t2; \ - MOVO t1, v7; \ - MOVO v2, t1; \ - PUNPCKHQDQ t2, v7; \ - PUNPCKLQDQ v3, t2; \ - PUNPCKHQDQ t2, v2; \ - PUNPCKLQDQ t1, t2; \ - PUNPCKHQDQ t2, v3 - -#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \ - MOVO v4, t1; \ - MOVO v5, v4; \ - MOVO t1, v5; \ - MOVO v2, t1; \ - PUNPCKLQDQ v2, t2; \ - PUNPCKHQDQ v3, v2; \ - PUNPCKHQDQ t2, v2; \ - PUNPCKLQDQ v3, t2; \ - MOVO t1, v3; \ - MOVO v6, t1; \ - PUNPCKHQDQ t2, v3; \ - PUNPCKLQDQ v7, t2; \ - PUNPCKHQDQ t2, v6; \ - PUNPCKLQDQ t1, t2; \ - PUNPCKHQDQ t2, v7 - -#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, t0, c40, c48) \ - MOVO v0, t0; \ - PMULULQ v2, t0; \ - PADDQ v2, v0; \ - PADDQ t0, v0; \ - PADDQ t0, v0; \ - PXOR v0, v6; \ - PSHUFD $0xB1, v6, v6; \ - MOVO v4, t0; \ - PMULULQ v6, t0; \ - PADDQ v6, v4; \ - PADDQ t0, v4; \ - PADDQ t0, v4; \ - PXOR v4, v2; \ - PSHUFB c40, v2; \ - MOVO v0, t0; \ - PMULULQ v2, t0; \ - PADDQ v2, v0; \ - PADDQ t0, v0; \ - PADDQ t0, v0; \ - PXOR v0, v6; \ - PSHUFB c48, v6; \ - MOVO v4, t0; \ - PMULULQ v6, t0; \ - PADDQ v6, v4; \ - PADDQ t0, v4; \ - PADDQ t0, v4; \ - PXOR v4, v2; \ - MOVO v2, t0; \ - PADDQ v2, t0; \ - PSRLQ $63, v2; \ - PXOR t0, v2; \ - MOVO v1, t0; \ - PMULULQ v3, t0; \ - PADDQ v3, v1; \ - PADDQ t0, v1; \ - PADDQ t0, v1; \ - PXOR v1, v7; \ - PSHUFD $0xB1, v7, v7; \ - MOVO v5, t0; \ - PMULULQ v7, t0; \ - PADDQ v7, v5; \ - PADDQ t0, v5; \ - PADDQ t0, v5; \ - PXOR v5, v3; \ - PSHUFB c40, v3; \ - MOVO v1, t0; \ - PMULULQ v3, t0; \ - PADDQ v3, v1; \ - PADDQ t0, v1; \ - PADDQ t0, v1; \ - PXOR v1, v7; \ - PSHUFB c48, v7; \ - MOVO v5, t0; \ - PMULULQ v7, t0; \ - PADDQ v7, v5; \ - PADDQ t0, v5; \ - PADDQ t0, v5; \ - PXOR v5, v3; \ - MOVO v3, t0; \ - PADDQ v3, t0; \ - PSRLQ $63, v3; \ - PXOR t0, v3 - -#define LOAD_MSG_0(block, off) \ - MOVOU 8*(off+0)(block), X0; \ - MOVOU 8*(off+2)(block), X1; \ - MOVOU 8*(off+4)(block), X2; \ - MOVOU 8*(off+6)(block), X3; \ - MOVOU 8*(off+8)(block), X4; \ - MOVOU 8*(off+10)(block), X5; \ - MOVOU 8*(off+12)(block), X6; \ - MOVOU 8*(off+14)(block), X7 - -#define STORE_MSG_0(block, off) \ - MOVOU X0, 8*(off+0)(block); \ - MOVOU X1, 8*(off+2)(block); \ - MOVOU X2, 8*(off+4)(block); \ - MOVOU X3, 8*(off+6)(block); \ - MOVOU X4, 8*(off+8)(block); \ - MOVOU X5, 8*(off+10)(block); \ - MOVOU X6, 8*(off+12)(block); \ - MOVOU X7, 8*(off+14)(block) - -#define LOAD_MSG_1(block, off) \ - MOVOU 8*off+0*8(block), X0; \ - MOVOU 8*off+16*8(block), X1; \ - MOVOU 8*off+32*8(block), X2; \ - MOVOU 8*off+48*8(block), X3; \ - MOVOU 8*off+64*8(block), X4; \ - MOVOU 8*off+80*8(block), X5; \ - MOVOU 8*off+96*8(block), X6; \ - MOVOU 8*off+112*8(block), X7 - -#define STORE_MSG_1(block, off) \ - MOVOU X0, 8*off+0*8(block); \ - MOVOU X1, 8*off+16*8(block); \ - MOVOU X2, 8*off+32*8(block); \ - MOVOU X3, 8*off+48*8(block); \ - MOVOU X4, 8*off+64*8(block); \ - MOVOU X5, 8*off+80*8(block); \ - MOVOU X6, 8*off+96*8(block); \ - MOVOU X7, 8*off+112*8(block) - -#define BLAMKA_ROUND_0(block, off, t0, t1, c40, c48) \ - LOAD_MSG_0(block, off); \ - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ - SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \ - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \ - STORE_MSG_0(block, off) - -#define BLAMKA_ROUND_1(block, off, t0, t1, c40, c48) \ - LOAD_MSG_1(block, off); \ - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ - SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \ - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \ - STORE_MSG_1(block, off) - // func blamkaSSE4(b *block) -TEXT ·blamkaSSE4(SB), 4, $0-8 - MOVQ b+0(FP), AX - - MOVOU ·c40<>(SB), X10 - MOVOU ·c48<>(SB), X11 +// Requires: SSE2, SSSE3 +TEXT ·blamkaSSE4(SB), NOSPLIT, $0-8 + MOVQ b+0(FP), AX + MOVOU ·c40<>+0(SB), X10 + MOVOU ·c48<>+0(SB), X11 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU 32(AX), X2 + MOVOU 48(AX), X3 + MOVOU 64(AX), X4 + MOVOU 80(AX), X5 + MOVOU 96(AX), X6 + MOVOU 112(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, 32(AX) + MOVOU X3, 48(AX) + MOVOU X4, 64(AX) + MOVOU X5, 80(AX) + MOVOU X6, 96(AX) + MOVOU X7, 112(AX) + MOVOU 128(AX), X0 + MOVOU 144(AX), X1 + MOVOU 160(AX), X2 + MOVOU 176(AX), X3 + MOVOU 192(AX), X4 + MOVOU 208(AX), X5 + MOVOU 224(AX), X6 + MOVOU 240(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 128(AX) + MOVOU X1, 144(AX) + MOVOU X2, 160(AX) + MOVOU X3, 176(AX) + MOVOU X4, 192(AX) + MOVOU X5, 208(AX) + MOVOU X6, 224(AX) + MOVOU X7, 240(AX) + MOVOU 256(AX), X0 + MOVOU 272(AX), X1 + MOVOU 288(AX), X2 + MOVOU 304(AX), X3 + MOVOU 320(AX), X4 + MOVOU 336(AX), X5 + MOVOU 352(AX), X6 + MOVOU 368(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 256(AX) + MOVOU X1, 272(AX) + MOVOU X2, 288(AX) + MOVOU X3, 304(AX) + MOVOU X4, 320(AX) + MOVOU X5, 336(AX) + MOVOU X6, 352(AX) + MOVOU X7, 368(AX) + MOVOU 384(AX), X0 + MOVOU 400(AX), X1 + MOVOU 416(AX), X2 + MOVOU 432(AX), X3 + MOVOU 448(AX), X4 + MOVOU 464(AX), X5 + MOVOU 480(AX), X6 + MOVOU 496(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 384(AX) + MOVOU X1, 400(AX) + MOVOU X2, 416(AX) + MOVOU X3, 432(AX) + MOVOU X4, 448(AX) + MOVOU X5, 464(AX) + MOVOU X6, 480(AX) + MOVOU X7, 496(AX) + MOVOU 512(AX), X0 + MOVOU 528(AX), X1 + MOVOU 544(AX), X2 + MOVOU 560(AX), X3 + MOVOU 576(AX), X4 + MOVOU 592(AX), X5 + MOVOU 608(AX), X6 + MOVOU 624(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 512(AX) + MOVOU X1, 528(AX) + MOVOU X2, 544(AX) + MOVOU X3, 560(AX) + MOVOU X4, 576(AX) + MOVOU X5, 592(AX) + MOVOU X6, 608(AX) + MOVOU X7, 624(AX) + MOVOU 640(AX), X0 + MOVOU 656(AX), X1 + MOVOU 672(AX), X2 + MOVOU 688(AX), X3 + MOVOU 704(AX), X4 + MOVOU 720(AX), X5 + MOVOU 736(AX), X6 + MOVOU 752(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 640(AX) + MOVOU X1, 656(AX) + MOVOU X2, 672(AX) + MOVOU X3, 688(AX) + MOVOU X4, 704(AX) + MOVOU X5, 720(AX) + MOVOU X6, 736(AX) + MOVOU X7, 752(AX) + MOVOU 768(AX), X0 + MOVOU 784(AX), X1 + MOVOU 800(AX), X2 + MOVOU 816(AX), X3 + MOVOU 832(AX), X4 + MOVOU 848(AX), X5 + MOVOU 864(AX), X6 + MOVOU 880(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 768(AX) + MOVOU X1, 784(AX) + MOVOU X2, 800(AX) + MOVOU X3, 816(AX) + MOVOU X4, 832(AX) + MOVOU X5, 848(AX) + MOVOU X6, 864(AX) + MOVOU X7, 880(AX) + MOVOU 896(AX), X0 + MOVOU 912(AX), X1 + MOVOU 928(AX), X2 + MOVOU 944(AX), X3 + MOVOU 960(AX), X4 + MOVOU 976(AX), X5 + MOVOU 992(AX), X6 + MOVOU 1008(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 896(AX) + MOVOU X1, 912(AX) + MOVOU X2, 928(AX) + MOVOU X3, 944(AX) + MOVOU X4, 960(AX) + MOVOU X5, 976(AX) + MOVOU X6, 992(AX) + MOVOU X7, 1008(AX) + MOVOU (AX), X0 + MOVOU 128(AX), X1 + MOVOU 256(AX), X2 + MOVOU 384(AX), X3 + MOVOU 512(AX), X4 + MOVOU 640(AX), X5 + MOVOU 768(AX), X6 + MOVOU 896(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, (AX) + MOVOU X1, 128(AX) + MOVOU X2, 256(AX) + MOVOU X3, 384(AX) + MOVOU X4, 512(AX) + MOVOU X5, 640(AX) + MOVOU X6, 768(AX) + MOVOU X7, 896(AX) + MOVOU 16(AX), X0 + MOVOU 144(AX), X1 + MOVOU 272(AX), X2 + MOVOU 400(AX), X3 + MOVOU 528(AX), X4 + MOVOU 656(AX), X5 + MOVOU 784(AX), X6 + MOVOU 912(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 16(AX) + MOVOU X1, 144(AX) + MOVOU X2, 272(AX) + MOVOU X3, 400(AX) + MOVOU X4, 528(AX) + MOVOU X5, 656(AX) + MOVOU X6, 784(AX) + MOVOU X7, 912(AX) + MOVOU 32(AX), X0 + MOVOU 160(AX), X1 + MOVOU 288(AX), X2 + MOVOU 416(AX), X3 + MOVOU 544(AX), X4 + MOVOU 672(AX), X5 + MOVOU 800(AX), X6 + MOVOU 928(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 32(AX) + MOVOU X1, 160(AX) + MOVOU X2, 288(AX) + MOVOU X3, 416(AX) + MOVOU X4, 544(AX) + MOVOU X5, 672(AX) + MOVOU X6, 800(AX) + MOVOU X7, 928(AX) + MOVOU 48(AX), X0 + MOVOU 176(AX), X1 + MOVOU 304(AX), X2 + MOVOU 432(AX), X3 + MOVOU 560(AX), X4 + MOVOU 688(AX), X5 + MOVOU 816(AX), X6 + MOVOU 944(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 48(AX) + MOVOU X1, 176(AX) + MOVOU X2, 304(AX) + MOVOU X3, 432(AX) + MOVOU X4, 560(AX) + MOVOU X5, 688(AX) + MOVOU X6, 816(AX) + MOVOU X7, 944(AX) + MOVOU 64(AX), X0 + MOVOU 192(AX), X1 + MOVOU 320(AX), X2 + MOVOU 448(AX), X3 + MOVOU 576(AX), X4 + MOVOU 704(AX), X5 + MOVOU 832(AX), X6 + MOVOU 960(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 64(AX) + MOVOU X1, 192(AX) + MOVOU X2, 320(AX) + MOVOU X3, 448(AX) + MOVOU X4, 576(AX) + MOVOU X5, 704(AX) + MOVOU X6, 832(AX) + MOVOU X7, 960(AX) + MOVOU 80(AX), X0 + MOVOU 208(AX), X1 + MOVOU 336(AX), X2 + MOVOU 464(AX), X3 + MOVOU 592(AX), X4 + MOVOU 720(AX), X5 + MOVOU 848(AX), X6 + MOVOU 976(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 80(AX) + MOVOU X1, 208(AX) + MOVOU X2, 336(AX) + MOVOU X3, 464(AX) + MOVOU X4, 592(AX) + MOVOU X5, 720(AX) + MOVOU X6, 848(AX) + MOVOU X7, 976(AX) + MOVOU 96(AX), X0 + MOVOU 224(AX), X1 + MOVOU 352(AX), X2 + MOVOU 480(AX), X3 + MOVOU 608(AX), X4 + MOVOU 736(AX), X5 + MOVOU 864(AX), X6 + MOVOU 992(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 96(AX) + MOVOU X1, 224(AX) + MOVOU X2, 352(AX) + MOVOU X3, 480(AX) + MOVOU X4, 608(AX) + MOVOU X5, 736(AX) + MOVOU X6, 864(AX) + MOVOU X7, 992(AX) + MOVOU 112(AX), X0 + MOVOU 240(AX), X1 + MOVOU 368(AX), X2 + MOVOU 496(AX), X3 + MOVOU 624(AX), X4 + MOVOU 752(AX), X5 + MOVOU 880(AX), X6 + MOVOU 1008(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 112(AX) + MOVOU X1, 240(AX) + MOVOU X2, 368(AX) + MOVOU X3, 496(AX) + MOVOU X4, 624(AX) + MOVOU X5, 752(AX) + MOVOU X6, 880(AX) + MOVOU X7, 1008(AX) + RET - BLAMKA_ROUND_0(AX, 0, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 16, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 32, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 48, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 64, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 80, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 96, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 112, X8, X9, X10, X11) +DATA ·c40<>+0(SB)/8, $0x0201000706050403 +DATA ·c40<>+8(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·c40<>(SB), RODATA|NOPTR, $16 - BLAMKA_ROUND_1(AX, 0, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 2, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 4, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 6, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 8, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 10, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 12, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 14, X8, X9, X10, X11) - RET +DATA ·c48<>+0(SB)/8, $0x0100070605040302 +DATA ·c48<>+8(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·c48<>(SB), RODATA|NOPTR, $16 -// func mixBlocksSSE2(out, a, b, c *block) -TEXT ·mixBlocksSSE2(SB), 4, $0-32 +// func mixBlocksSSE2(out *block, a *block, b *block, c *block) +// Requires: SSE2 +TEXT ·mixBlocksSSE2(SB), NOSPLIT, $0-32 MOVQ out+0(FP), DX MOVQ a+8(FP), AX MOVQ b+16(FP), BX MOVQ c+24(FP), CX - MOVQ $128, DI + MOVQ $0x00000080, DI loop: - MOVOU 0(AX), X0 - MOVOU 0(BX), X1 - MOVOU 0(CX), X2 + MOVOU (AX), X0 + MOVOU (BX), X1 + MOVOU (CX), X2 PXOR X1, X0 PXOR X2, X0 - MOVOU X0, 0(DX) - ADDQ $16, AX - ADDQ $16, BX - ADDQ $16, CX - ADDQ $16, DX - SUBQ $2, DI + MOVOU X0, (DX) + ADDQ $0x10, AX + ADDQ $0x10, BX + ADDQ $0x10, CX + ADDQ $0x10, DX + SUBQ $0x02, DI JA loop RET -// func xorBlocksSSE2(out, a, b, c *block) -TEXT ·xorBlocksSSE2(SB), 4, $0-32 +// func xorBlocksSSE2(out *block, a *block, b *block, c *block) +// Requires: SSE2 +TEXT ·xorBlocksSSE2(SB), NOSPLIT, $0-32 MOVQ out+0(FP), DX MOVQ a+8(FP), AX MOVQ b+16(FP), BX MOVQ c+24(FP), CX - MOVQ $128, DI + MOVQ $0x00000080, DI loop: - MOVOU 0(AX), X0 - MOVOU 0(BX), X1 - MOVOU 0(CX), X2 - MOVOU 0(DX), X3 + MOVOU (AX), X0 + MOVOU (BX), X1 + MOVOU (CX), X2 + MOVOU (DX), X3 PXOR X1, X0 PXOR X2, X0 PXOR X3, X0 - MOVOU X0, 0(DX) - ADDQ $16, AX - ADDQ $16, BX - ADDQ $16, CX - ADDQ $16, DX - SUBQ $2, DI + MOVOU X0, (DX) + ADDQ $0x10, AX + ADDQ $0x10, BX + ADDQ $0x10, CX + ADDQ $0x10, DX + SUBQ $0x02, DI JA loop RET diff --git a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s index 9ae8206c..f75162e0 100644 --- a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s +++ b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s @@ -1,722 +1,4517 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Code generated by command: go run blake2bAVX2_amd64_asm.go -out ../../blake2bAVX2_amd64.s -pkg blake2b. DO NOT EDIT. //go:build amd64 && gc && !purego #include "textflag.h" -DATA ·AVX2_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 -DATA ·AVX2_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b -DATA ·AVX2_iv0<>+0x10(SB)/8, $0x3c6ef372fe94f82b -DATA ·AVX2_iv0<>+0x18(SB)/8, $0xa54ff53a5f1d36f1 -GLOBL ·AVX2_iv0<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX2_iv1<>+0x00(SB)/8, $0x510e527fade682d1 -DATA ·AVX2_iv1<>+0x08(SB)/8, $0x9b05688c2b3e6c1f -DATA ·AVX2_iv1<>+0x10(SB)/8, $0x1f83d9abfb41bd6b -DATA ·AVX2_iv1<>+0x18(SB)/8, $0x5be0cd19137e2179 -GLOBL ·AVX2_iv1<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX2_c40<>+0x00(SB)/8, $0x0201000706050403 -DATA ·AVX2_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b -DATA ·AVX2_c40<>+0x10(SB)/8, $0x0201000706050403 -DATA ·AVX2_c40<>+0x18(SB)/8, $0x0a09080f0e0d0c0b -GLOBL ·AVX2_c40<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX2_c48<>+0x00(SB)/8, $0x0100070605040302 -DATA ·AVX2_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a -DATA ·AVX2_c48<>+0x10(SB)/8, $0x0100070605040302 -DATA ·AVX2_c48<>+0x18(SB)/8, $0x09080f0e0d0c0b0a -GLOBL ·AVX2_c48<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 -DATA ·AVX_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b -GLOBL ·AVX_iv0<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b -DATA ·AVX_iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1 -GLOBL ·AVX_iv1<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_iv2<>+0x00(SB)/8, $0x510e527fade682d1 -DATA ·AVX_iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f -GLOBL ·AVX_iv2<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b -DATA ·AVX_iv3<>+0x08(SB)/8, $0x5be0cd19137e2179 -GLOBL ·AVX_iv3<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_c40<>+0x00(SB)/8, $0x0201000706050403 -DATA ·AVX_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b -GLOBL ·AVX_c40<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_c48<>+0x00(SB)/8, $0x0100070605040302 -DATA ·AVX_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a -GLOBL ·AVX_c48<>(SB), (NOPTR+RODATA), $16 - -#define VPERMQ_0x39_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x39 -#define VPERMQ_0x93_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x93 -#define VPERMQ_0x4E_Y2_Y2 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2; BYTE $0x4e -#define VPERMQ_0x93_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x93 -#define VPERMQ_0x39_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x39 - -#define ROUND_AVX2(m0, m1, m2, m3, t, c40, c48) \ - VPADDQ m0, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFD $-79, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPSHUFB c40, Y1, Y1; \ - VPADDQ m1, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFB c48, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPADDQ Y1, Y1, t; \ - VPSRLQ $63, Y1, Y1; \ - VPXOR t, Y1, Y1; \ - VPERMQ_0x39_Y1_Y1; \ - VPERMQ_0x4E_Y2_Y2; \ - VPERMQ_0x93_Y3_Y3; \ - VPADDQ m2, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFD $-79, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPSHUFB c40, Y1, Y1; \ - VPADDQ m3, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFB c48, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPADDQ Y1, Y1, t; \ - VPSRLQ $63, Y1, Y1; \ - VPXOR t, Y1, Y1; \ - VPERMQ_0x39_Y3_Y3; \ - VPERMQ_0x4E_Y2_Y2; \ - VPERMQ_0x93_Y1_Y1 - -#define VMOVQ_SI_X11_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x1E -#define VMOVQ_SI_X12_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x26 -#define VMOVQ_SI_X13_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x2E -#define VMOVQ_SI_X14_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x36 -#define VMOVQ_SI_X15_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x3E - -#define VMOVQ_SI_X11(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x5E; BYTE $n -#define VMOVQ_SI_X12(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x66; BYTE $n -#define VMOVQ_SI_X13(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x6E; BYTE $n -#define VMOVQ_SI_X14(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x76; BYTE $n -#define VMOVQ_SI_X15(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x7E; BYTE $n - -#define VPINSRQ_1_SI_X11_0 BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x1E; BYTE $0x01 -#define VPINSRQ_1_SI_X12_0 BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x26; BYTE $0x01 -#define VPINSRQ_1_SI_X13_0 BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x2E; BYTE $0x01 -#define VPINSRQ_1_SI_X14_0 BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x36; BYTE $0x01 -#define VPINSRQ_1_SI_X15_0 BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x3E; BYTE $0x01 - -#define VPINSRQ_1_SI_X11(n) BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x5E; BYTE $n; BYTE $0x01 -#define VPINSRQ_1_SI_X12(n) BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x66; BYTE $n; BYTE $0x01 -#define VPINSRQ_1_SI_X13(n) BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x6E; BYTE $n; BYTE $0x01 -#define VPINSRQ_1_SI_X14(n) BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x76; BYTE $n; BYTE $0x01 -#define VPINSRQ_1_SI_X15(n) BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x7E; BYTE $n; BYTE $0x01 - -#define VMOVQ_R8_X15 BYTE $0xC4; BYTE $0x41; BYTE $0xF9; BYTE $0x6E; BYTE $0xF8 -#define VPINSRQ_1_R9_X15 BYTE $0xC4; BYTE $0x43; BYTE $0x81; BYTE $0x22; BYTE $0xF9; BYTE $0x01 - -// load msg: Y12 = (i0, i1, i2, i3) -// i0, i1, i2, i3 must not be 0 -#define LOAD_MSG_AVX2_Y12(i0, i1, i2, i3) \ - VMOVQ_SI_X12(i0*8); \ - VMOVQ_SI_X11(i2*8); \ - VPINSRQ_1_SI_X12(i1*8); \ - VPINSRQ_1_SI_X11(i3*8); \ - VINSERTI128 $1, X11, Y12, Y12 - -// load msg: Y13 = (i0, i1, i2, i3) -// i0, i1, i2, i3 must not be 0 -#define LOAD_MSG_AVX2_Y13(i0, i1, i2, i3) \ - VMOVQ_SI_X13(i0*8); \ - VMOVQ_SI_X11(i2*8); \ - VPINSRQ_1_SI_X13(i1*8); \ - VPINSRQ_1_SI_X11(i3*8); \ - VINSERTI128 $1, X11, Y13, Y13 - -// load msg: Y14 = (i0, i1, i2, i3) -// i0, i1, i2, i3 must not be 0 -#define LOAD_MSG_AVX2_Y14(i0, i1, i2, i3) \ - VMOVQ_SI_X14(i0*8); \ - VMOVQ_SI_X11(i2*8); \ - VPINSRQ_1_SI_X14(i1*8); \ - VPINSRQ_1_SI_X11(i3*8); \ - VINSERTI128 $1, X11, Y14, Y14 - -// load msg: Y15 = (i0, i1, i2, i3) -// i0, i1, i2, i3 must not be 0 -#define LOAD_MSG_AVX2_Y15(i0, i1, i2, i3) \ - VMOVQ_SI_X15(i0*8); \ - VMOVQ_SI_X11(i2*8); \ - VPINSRQ_1_SI_X15(i1*8); \ - VPINSRQ_1_SI_X11(i3*8); \ - VINSERTI128 $1, X11, Y15, Y15 - -#define LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() \ - VMOVQ_SI_X12_0; \ - VMOVQ_SI_X11(4*8); \ - VPINSRQ_1_SI_X12(2*8); \ - VPINSRQ_1_SI_X11(6*8); \ - VINSERTI128 $1, X11, Y12, Y12; \ - LOAD_MSG_AVX2_Y13(1, 3, 5, 7); \ - LOAD_MSG_AVX2_Y14(8, 10, 12, 14); \ - LOAD_MSG_AVX2_Y15(9, 11, 13, 15) - -#define LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() \ - LOAD_MSG_AVX2_Y12(14, 4, 9, 13); \ - LOAD_MSG_AVX2_Y13(10, 8, 15, 6); \ - VMOVQ_SI_X11(11*8); \ - VPSHUFD $0x4E, 0*8(SI), X14; \ - VPINSRQ_1_SI_X11(5*8); \ - VINSERTI128 $1, X11, Y14, Y14; \ - LOAD_MSG_AVX2_Y15(12, 2, 7, 3) - -#define LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() \ - VMOVQ_SI_X11(5*8); \ - VMOVDQU 11*8(SI), X12; \ - VPINSRQ_1_SI_X11(15*8); \ - VINSERTI128 $1, X11, Y12, Y12; \ - VMOVQ_SI_X13(8*8); \ - VMOVQ_SI_X11(2*8); \ - VPINSRQ_1_SI_X13_0; \ - VPINSRQ_1_SI_X11(13*8); \ - VINSERTI128 $1, X11, Y13, Y13; \ - LOAD_MSG_AVX2_Y14(10, 3, 7, 9); \ - LOAD_MSG_AVX2_Y15(14, 6, 1, 4) - -#define LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() \ - LOAD_MSG_AVX2_Y12(7, 3, 13, 11); \ - LOAD_MSG_AVX2_Y13(9, 1, 12, 14); \ - LOAD_MSG_AVX2_Y14(2, 5, 4, 15); \ - VMOVQ_SI_X15(6*8); \ - VMOVQ_SI_X11_0; \ - VPINSRQ_1_SI_X15(10*8); \ - VPINSRQ_1_SI_X11(8*8); \ - VINSERTI128 $1, X11, Y15, Y15 - -#define LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() \ - LOAD_MSG_AVX2_Y12(9, 5, 2, 10); \ - VMOVQ_SI_X13_0; \ - VMOVQ_SI_X11(4*8); \ - VPINSRQ_1_SI_X13(7*8); \ - VPINSRQ_1_SI_X11(15*8); \ - VINSERTI128 $1, X11, Y13, Y13; \ - LOAD_MSG_AVX2_Y14(14, 11, 6, 3); \ - LOAD_MSG_AVX2_Y15(1, 12, 8, 13) - -#define LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() \ - VMOVQ_SI_X12(2*8); \ - VMOVQ_SI_X11_0; \ - VPINSRQ_1_SI_X12(6*8); \ - VPINSRQ_1_SI_X11(8*8); \ - VINSERTI128 $1, X11, Y12, Y12; \ - LOAD_MSG_AVX2_Y13(12, 10, 11, 3); \ - LOAD_MSG_AVX2_Y14(4, 7, 15, 1); \ - LOAD_MSG_AVX2_Y15(13, 5, 14, 9) - -#define LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() \ - LOAD_MSG_AVX2_Y12(12, 1, 14, 4); \ - LOAD_MSG_AVX2_Y13(5, 15, 13, 10); \ - VMOVQ_SI_X14_0; \ - VPSHUFD $0x4E, 8*8(SI), X11; \ - VPINSRQ_1_SI_X14(6*8); \ - VINSERTI128 $1, X11, Y14, Y14; \ - LOAD_MSG_AVX2_Y15(7, 3, 2, 11) - -#define LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() \ - LOAD_MSG_AVX2_Y12(13, 7, 12, 3); \ - LOAD_MSG_AVX2_Y13(11, 14, 1, 9); \ - LOAD_MSG_AVX2_Y14(5, 15, 8, 2); \ - VMOVQ_SI_X15_0; \ - VMOVQ_SI_X11(6*8); \ - VPINSRQ_1_SI_X15(4*8); \ - VPINSRQ_1_SI_X11(10*8); \ - VINSERTI128 $1, X11, Y15, Y15 - -#define LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() \ - VMOVQ_SI_X12(6*8); \ - VMOVQ_SI_X11(11*8); \ - VPINSRQ_1_SI_X12(14*8); \ - VPINSRQ_1_SI_X11_0; \ - VINSERTI128 $1, X11, Y12, Y12; \ - LOAD_MSG_AVX2_Y13(15, 9, 3, 8); \ - VMOVQ_SI_X11(1*8); \ - VMOVDQU 12*8(SI), X14; \ - VPINSRQ_1_SI_X11(10*8); \ - VINSERTI128 $1, X11, Y14, Y14; \ - VMOVQ_SI_X15(2*8); \ - VMOVDQU 4*8(SI), X11; \ - VPINSRQ_1_SI_X15(7*8); \ - VINSERTI128 $1, X11, Y15, Y15 - -#define LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() \ - LOAD_MSG_AVX2_Y12(10, 8, 7, 1); \ - VMOVQ_SI_X13(2*8); \ - VPSHUFD $0x4E, 5*8(SI), X11; \ - VPINSRQ_1_SI_X13(4*8); \ - VINSERTI128 $1, X11, Y13, Y13; \ - LOAD_MSG_AVX2_Y14(15, 9, 3, 13); \ - VMOVQ_SI_X15(11*8); \ - VMOVQ_SI_X11(12*8); \ - VPINSRQ_1_SI_X15(14*8); \ - VPINSRQ_1_SI_X11_0; \ - VINSERTI128 $1, X11, Y15, Y15 - // func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) -TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment - MOVQ h+0(FP), AX - MOVQ c+8(FP), BX - MOVQ flag+16(FP), CX - MOVQ blocks_base+24(FP), SI - MOVQ blocks_len+32(FP), DI - - MOVQ SP, DX - ADDQ $31, DX - ANDQ $~31, DX - - MOVQ CX, 16(DX) - XORQ CX, CX - MOVQ CX, 24(DX) - - VMOVDQU ·AVX2_c40<>(SB), Y4 - VMOVDQU ·AVX2_c48<>(SB), Y5 - - VMOVDQU 0(AX), Y8 +// Requires: AVX, AVX2 +TEXT ·hashBlocksAVX2(SB), NOSPLIT, $320-48 + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVQ flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DI + MOVQ SP, DX + ADDQ $+31, DX + ANDQ $-32, DX + MOVQ CX, 16(DX) + XORQ CX, CX + MOVQ CX, 24(DX) + VMOVDQU ·AVX2_c40<>+0(SB), Y4 + VMOVDQU ·AVX2_c48<>+0(SB), Y5 + VMOVDQU (AX), Y8 VMOVDQU 32(AX), Y9 - VMOVDQU ·AVX2_iv0<>(SB), Y6 - VMOVDQU ·AVX2_iv1<>(SB), Y7 - - MOVQ 0(BX), R8 - MOVQ 8(BX), R9 - MOVQ R9, 8(DX) + VMOVDQU ·AVX2_iv0<>+0(SB), Y6 + VMOVDQU ·AVX2_iv1<>+0(SB), Y7 + MOVQ (BX), R8 + MOVQ 8(BX), R9 + MOVQ R9, 8(DX) loop: - ADDQ $128, R8 - MOVQ R8, 0(DX) - CMPQ R8, $128 + ADDQ $0x80, R8 + MOVQ R8, (DX) + CMPQ R8, $0x80 JGE noinc INCQ R9 MOVQ R9, 8(DX) noinc: - VMOVDQA Y8, Y0 - VMOVDQA Y9, Y1 - VMOVDQA Y6, Y2 - VPXOR 0(DX), Y7, Y3 - - LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() - VMOVDQA Y12, 32(DX) - VMOVDQA Y13, 64(DX) - VMOVDQA Y14, 96(DX) - VMOVDQA Y15, 128(DX) - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() - VMOVDQA Y12, 160(DX) - VMOVDQA Y13, 192(DX) - VMOVDQA Y14, 224(DX) - VMOVDQA Y15, 256(DX) - - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - - ROUND_AVX2(32(DX), 64(DX), 96(DX), 128(DX), Y10, Y4, Y5) - ROUND_AVX2(160(DX), 192(DX), 224(DX), 256(DX), Y10, Y4, Y5) - - VPXOR Y0, Y8, Y8 - VPXOR Y1, Y9, Y9 - VPXOR Y2, Y8, Y8 - VPXOR Y3, Y9, Y9 - - LEAQ 128(SI), SI - SUBQ $128, DI - JNE loop - - MOVQ R8, 0(BX) - MOVQ R9, 8(BX) - - VMOVDQU Y8, 0(AX) - VMOVDQU Y9, 32(AX) + VMOVDQA Y8, Y0 + VMOVDQA Y9, Y1 + VMOVDQA Y6, Y2 + VPXOR (DX), Y7, Y3 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x26 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x20 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x30 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x08 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x28 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x38 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x40 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x70 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x78 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VMOVDQA Y12, 32(DX) + VMOVDQA Y13, 64(DX) + VMOVDQA Y14, 96(DX) + VMOVDQA Y15, 128(DX) + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x48 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x68 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x78 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x30 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x58 + VPSHUFD $0x4e, (SI), X14 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x28 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x38 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x18 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VMOVDQA Y12, 160(DX) + VMOVDQA Y13, 192(DX) + VMOVDQA Y14, 224(DX) + VMOVDQA Y15, 256(DX) + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x28 + VMOVDQU 88(SI), X12 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x78 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x40 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x10 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x2e + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x68 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x38 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x48 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x08 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x20 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x58 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x70 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x20 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x78 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x30 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x1e + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x40 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x10 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x50 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x2e + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x20 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x78 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x30 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x18 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x08 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x40 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x60 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x68 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x1e + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x40 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x58 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x18 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x20 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x78 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x08 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x70 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x48 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x70 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x20 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x28 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x50 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x36 + VPSHUFD $0x4e, 64(SI), X11 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x30 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x10 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x58 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x18 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x58 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x08 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x48 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x28 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x40 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x10 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x3e + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x30 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x50 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x30 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x58 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x1e + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x78 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x18 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x40 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x08 + VMOVDQU 96(SI), X14 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x50 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x10 + VMOVDQU 32(SI), X11 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x38 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x38 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x08 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x10 + VPSHUFD $0x4e, 40(SI), X11 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x20 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x78 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x18 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x68 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x58 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x1e + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + VPADDQ 32(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ 64(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ 96(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ 128(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + VPADDQ 160(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ 192(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ 224(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ 256(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + VPXOR Y0, Y8, Y8 + VPXOR Y1, Y9, Y9 + VPXOR Y2, Y8, Y8 + VPXOR Y3, Y9, Y9 + LEAQ 128(SI), SI + SUBQ $0x80, DI + JNE loop + MOVQ R8, (BX) + MOVQ R9, 8(BX) + VMOVDQU Y8, (AX) + VMOVDQU Y9, 32(AX) VZEROUPPER - RET -#define VPUNPCKLQDQ_X2_X2_X15 BYTE $0xC5; BYTE $0x69; BYTE $0x6C; BYTE $0xFA -#define VPUNPCKLQDQ_X3_X3_X15 BYTE $0xC5; BYTE $0x61; BYTE $0x6C; BYTE $0xFB -#define VPUNPCKLQDQ_X7_X7_X15 BYTE $0xC5; BYTE $0x41; BYTE $0x6C; BYTE $0xFF -#define VPUNPCKLQDQ_X13_X13_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x11; BYTE $0x6C; BYTE $0xFD -#define VPUNPCKLQDQ_X14_X14_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x09; BYTE $0x6C; BYTE $0xFE - -#define VPUNPCKHQDQ_X15_X2_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x69; BYTE $0x6D; BYTE $0xD7 -#define VPUNPCKHQDQ_X15_X3_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xDF -#define VPUNPCKHQDQ_X15_X6_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x49; BYTE $0x6D; BYTE $0xF7 -#define VPUNPCKHQDQ_X15_X7_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xFF -#define VPUNPCKHQDQ_X15_X3_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xD7 -#define VPUNPCKHQDQ_X15_X7_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xF7 -#define VPUNPCKHQDQ_X15_X13_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xDF -#define VPUNPCKHQDQ_X15_X13_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xFF - -#define SHUFFLE_AVX() \ - VMOVDQA X6, X13; \ - VMOVDQA X2, X14; \ - VMOVDQA X4, X6; \ - VPUNPCKLQDQ_X13_X13_X15; \ - VMOVDQA X5, X4; \ - VMOVDQA X6, X5; \ - VPUNPCKHQDQ_X15_X7_X6; \ - VPUNPCKLQDQ_X7_X7_X15; \ - VPUNPCKHQDQ_X15_X13_X7; \ - VPUNPCKLQDQ_X3_X3_X15; \ - VPUNPCKHQDQ_X15_X2_X2; \ - VPUNPCKLQDQ_X14_X14_X15; \ - VPUNPCKHQDQ_X15_X3_X3; \ - -#define SHUFFLE_AVX_INV() \ - VMOVDQA X2, X13; \ - VMOVDQA X4, X14; \ - VPUNPCKLQDQ_X2_X2_X15; \ - VMOVDQA X5, X4; \ - VPUNPCKHQDQ_X15_X3_X2; \ - VMOVDQA X14, X5; \ - VPUNPCKLQDQ_X3_X3_X15; \ - VMOVDQA X6, X14; \ - VPUNPCKHQDQ_X15_X13_X3; \ - VPUNPCKLQDQ_X7_X7_X15; \ - VPUNPCKHQDQ_X15_X6_X6; \ - VPUNPCKLQDQ_X14_X14_X15; \ - VPUNPCKHQDQ_X15_X7_X7; \ - -#define HALF_ROUND_AVX(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \ - VPADDQ m0, v0, v0; \ - VPADDQ v2, v0, v0; \ - VPADDQ m1, v1, v1; \ - VPADDQ v3, v1, v1; \ - VPXOR v0, v6, v6; \ - VPXOR v1, v7, v7; \ - VPSHUFD $-79, v6, v6; \ - VPSHUFD $-79, v7, v7; \ - VPADDQ v6, v4, v4; \ - VPADDQ v7, v5, v5; \ - VPXOR v4, v2, v2; \ - VPXOR v5, v3, v3; \ - VPSHUFB c40, v2, v2; \ - VPSHUFB c40, v3, v3; \ - VPADDQ m2, v0, v0; \ - VPADDQ v2, v0, v0; \ - VPADDQ m3, v1, v1; \ - VPADDQ v3, v1, v1; \ - VPXOR v0, v6, v6; \ - VPXOR v1, v7, v7; \ - VPSHUFB c48, v6, v6; \ - VPSHUFB c48, v7, v7; \ - VPADDQ v6, v4, v4; \ - VPADDQ v7, v5, v5; \ - VPXOR v4, v2, v2; \ - VPXOR v5, v3, v3; \ - VPADDQ v2, v2, t0; \ - VPSRLQ $63, v2, v2; \ - VPXOR t0, v2, v2; \ - VPADDQ v3, v3, t0; \ - VPSRLQ $63, v3, v3; \ - VPXOR t0, v3, v3 - -// load msg: X12 = (i0, i1), X13 = (i2, i3), X14 = (i4, i5), X15 = (i6, i7) -// i0, i1, i2, i3, i4, i5, i6, i7 must not be 0 -#define LOAD_MSG_AVX(i0, i1, i2, i3, i4, i5, i6, i7) \ - VMOVQ_SI_X12(i0*8); \ - VMOVQ_SI_X13(i2*8); \ - VMOVQ_SI_X14(i4*8); \ - VMOVQ_SI_X15(i6*8); \ - VPINSRQ_1_SI_X12(i1*8); \ - VPINSRQ_1_SI_X13(i3*8); \ - VPINSRQ_1_SI_X14(i5*8); \ - VPINSRQ_1_SI_X15(i7*8) - -// load msg: X12 = (0, 2), X13 = (4, 6), X14 = (1, 3), X15 = (5, 7) -#define LOAD_MSG_AVX_0_2_4_6_1_3_5_7() \ - VMOVQ_SI_X12_0; \ - VMOVQ_SI_X13(4*8); \ - VMOVQ_SI_X14(1*8); \ - VMOVQ_SI_X15(5*8); \ - VPINSRQ_1_SI_X12(2*8); \ - VPINSRQ_1_SI_X13(6*8); \ - VPINSRQ_1_SI_X14(3*8); \ - VPINSRQ_1_SI_X15(7*8) - -// load msg: X12 = (1, 0), X13 = (11, 5), X14 = (12, 2), X15 = (7, 3) -#define LOAD_MSG_AVX_1_0_11_5_12_2_7_3() \ - VPSHUFD $0x4E, 0*8(SI), X12; \ - VMOVQ_SI_X13(11*8); \ - VMOVQ_SI_X14(12*8); \ - VMOVQ_SI_X15(7*8); \ - VPINSRQ_1_SI_X13(5*8); \ - VPINSRQ_1_SI_X14(2*8); \ - VPINSRQ_1_SI_X15(3*8) - -// load msg: X12 = (11, 12), X13 = (5, 15), X14 = (8, 0), X15 = (2, 13) -#define LOAD_MSG_AVX_11_12_5_15_8_0_2_13() \ - VMOVDQU 11*8(SI), X12; \ - VMOVQ_SI_X13(5*8); \ - VMOVQ_SI_X14(8*8); \ - VMOVQ_SI_X15(2*8); \ - VPINSRQ_1_SI_X13(15*8); \ - VPINSRQ_1_SI_X14_0; \ - VPINSRQ_1_SI_X15(13*8) - -// load msg: X12 = (2, 5), X13 = (4, 15), X14 = (6, 10), X15 = (0, 8) -#define LOAD_MSG_AVX_2_5_4_15_6_10_0_8() \ - VMOVQ_SI_X12(2*8); \ - VMOVQ_SI_X13(4*8); \ - VMOVQ_SI_X14(6*8); \ - VMOVQ_SI_X15_0; \ - VPINSRQ_1_SI_X12(5*8); \ - VPINSRQ_1_SI_X13(15*8); \ - VPINSRQ_1_SI_X14(10*8); \ - VPINSRQ_1_SI_X15(8*8) +DATA ·AVX2_c40<>+0(SB)/8, $0x0201000706050403 +DATA ·AVX2_c40<>+8(SB)/8, $0x0a09080f0e0d0c0b +DATA ·AVX2_c40<>+16(SB)/8, $0x0201000706050403 +DATA ·AVX2_c40<>+24(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·AVX2_c40<>(SB), RODATA|NOPTR, $32 -// load msg: X12 = (9, 5), X13 = (2, 10), X14 = (0, 7), X15 = (4, 15) -#define LOAD_MSG_AVX_9_5_2_10_0_7_4_15() \ - VMOVQ_SI_X12(9*8); \ - VMOVQ_SI_X13(2*8); \ - VMOVQ_SI_X14_0; \ - VMOVQ_SI_X15(4*8); \ - VPINSRQ_1_SI_X12(5*8); \ - VPINSRQ_1_SI_X13(10*8); \ - VPINSRQ_1_SI_X14(7*8); \ - VPINSRQ_1_SI_X15(15*8) +DATA ·AVX2_c48<>+0(SB)/8, $0x0100070605040302 +DATA ·AVX2_c48<>+8(SB)/8, $0x09080f0e0d0c0b0a +DATA ·AVX2_c48<>+16(SB)/8, $0x0100070605040302 +DATA ·AVX2_c48<>+24(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·AVX2_c48<>(SB), RODATA|NOPTR, $32 -// load msg: X12 = (2, 6), X13 = (0, 8), X14 = (12, 10), X15 = (11, 3) -#define LOAD_MSG_AVX_2_6_0_8_12_10_11_3() \ - VMOVQ_SI_X12(2*8); \ - VMOVQ_SI_X13_0; \ - VMOVQ_SI_X14(12*8); \ - VMOVQ_SI_X15(11*8); \ - VPINSRQ_1_SI_X12(6*8); \ - VPINSRQ_1_SI_X13(8*8); \ - VPINSRQ_1_SI_X14(10*8); \ - VPINSRQ_1_SI_X15(3*8) +DATA ·AVX2_iv0<>+0(SB)/8, $0x6a09e667f3bcc908 +DATA ·AVX2_iv0<>+8(SB)/8, $0xbb67ae8584caa73b +DATA ·AVX2_iv0<>+16(SB)/8, $0x3c6ef372fe94f82b +DATA ·AVX2_iv0<>+24(SB)/8, $0xa54ff53a5f1d36f1 +GLOBL ·AVX2_iv0<>(SB), RODATA|NOPTR, $32 -// load msg: X12 = (0, 6), X13 = (9, 8), X14 = (7, 3), X15 = (2, 11) -#define LOAD_MSG_AVX_0_6_9_8_7_3_2_11() \ - MOVQ 0*8(SI), X12; \ - VPSHUFD $0x4E, 8*8(SI), X13; \ - MOVQ 7*8(SI), X14; \ - MOVQ 2*8(SI), X15; \ - VPINSRQ_1_SI_X12(6*8); \ - VPINSRQ_1_SI_X14(3*8); \ - VPINSRQ_1_SI_X15(11*8) - -// load msg: X12 = (6, 14), X13 = (11, 0), X14 = (15, 9), X15 = (3, 8) -#define LOAD_MSG_AVX_6_14_11_0_15_9_3_8() \ - MOVQ 6*8(SI), X12; \ - MOVQ 11*8(SI), X13; \ - MOVQ 15*8(SI), X14; \ - MOVQ 3*8(SI), X15; \ - VPINSRQ_1_SI_X12(14*8); \ - VPINSRQ_1_SI_X13_0; \ - VPINSRQ_1_SI_X14(9*8); \ - VPINSRQ_1_SI_X15(8*8) - -// load msg: X12 = (5, 15), X13 = (8, 2), X14 = (0, 4), X15 = (6, 10) -#define LOAD_MSG_AVX_5_15_8_2_0_4_6_10() \ - MOVQ 5*8(SI), X12; \ - MOVQ 8*8(SI), X13; \ - MOVQ 0*8(SI), X14; \ - MOVQ 6*8(SI), X15; \ - VPINSRQ_1_SI_X12(15*8); \ - VPINSRQ_1_SI_X13(2*8); \ - VPINSRQ_1_SI_X14(4*8); \ - VPINSRQ_1_SI_X15(10*8) - -// load msg: X12 = (12, 13), X13 = (1, 10), X14 = (2, 7), X15 = (4, 5) -#define LOAD_MSG_AVX_12_13_1_10_2_7_4_5() \ - VMOVDQU 12*8(SI), X12; \ - MOVQ 1*8(SI), X13; \ - MOVQ 2*8(SI), X14; \ - VPINSRQ_1_SI_X13(10*8); \ - VPINSRQ_1_SI_X14(7*8); \ - VMOVDQU 4*8(SI), X15 - -// load msg: X12 = (15, 9), X13 = (3, 13), X14 = (11, 14), X15 = (12, 0) -#define LOAD_MSG_AVX_15_9_3_13_11_14_12_0() \ - MOVQ 15*8(SI), X12; \ - MOVQ 3*8(SI), X13; \ - MOVQ 11*8(SI), X14; \ - MOVQ 12*8(SI), X15; \ - VPINSRQ_1_SI_X12(9*8); \ - VPINSRQ_1_SI_X13(13*8); \ - VPINSRQ_1_SI_X14(14*8); \ - VPINSRQ_1_SI_X15_0 +DATA ·AVX2_iv1<>+0(SB)/8, $0x510e527fade682d1 +DATA ·AVX2_iv1<>+8(SB)/8, $0x9b05688c2b3e6c1f +DATA ·AVX2_iv1<>+16(SB)/8, $0x1f83d9abfb41bd6b +DATA ·AVX2_iv1<>+24(SB)/8, $0x5be0cd19137e2179 +GLOBL ·AVX2_iv1<>(SB), RODATA|NOPTR, $32 // func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) -TEXT ·hashBlocksAVX(SB), 4, $288-48 // frame size = 272 + 16 byte alignment - MOVQ h+0(FP), AX - MOVQ c+8(FP), BX - MOVQ flag+16(FP), CX - MOVQ blocks_base+24(FP), SI - MOVQ blocks_len+32(FP), DI - - MOVQ SP, R10 - ADDQ $15, R10 - ANDQ $~15, R10 - - VMOVDQU ·AVX_c40<>(SB), X0 - VMOVDQU ·AVX_c48<>(SB), X1 +// Requires: AVX, SSE2 +TEXT ·hashBlocksAVX(SB), NOSPLIT, $288-48 + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVQ flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DI + MOVQ SP, R10 + ADDQ $0x0f, R10 + ANDQ $-16, R10 + VMOVDQU ·AVX_c40<>+0(SB), X0 + VMOVDQU ·AVX_c48<>+0(SB), X1 VMOVDQA X0, X8 VMOVDQA X1, X9 - - VMOVDQU ·AVX_iv3<>(SB), X0 - VMOVDQA X0, 0(R10) - XORQ CX, 0(R10) // 0(R10) = ·AVX_iv3 ^ (CX || 0) - - VMOVDQU 0(AX), X10 + VMOVDQU ·AVX_iv3<>+0(SB), X0 + VMOVDQA X0, (R10) + XORQ CX, (R10) + VMOVDQU (AX), X10 VMOVDQU 16(AX), X11 VMOVDQU 32(AX), X2 VMOVDQU 48(AX), X3 - - MOVQ 0(BX), R8 - MOVQ 8(BX), R9 + MOVQ (BX), R8 + MOVQ 8(BX), R9 loop: - ADDQ $128, R8 - CMPQ R8, $128 + ADDQ $0x80, R8 + CMPQ R8, $0x80 JGE noinc INCQ R9 noinc: - VMOVQ_R8_X15 - VPINSRQ_1_R9_X15 - + BYTE $0xc4 + BYTE $0x41 + BYTE $0xf9 + BYTE $0x6e + BYTE $0xf8 + BYTE $0xc4 + BYTE $0x43 + BYTE $0x81 + BYTE $0x22 + BYTE $0xf9 + BYTE $0x01 VMOVDQA X10, X0 VMOVDQA X11, X1 - VMOVDQU ·AVX_iv0<>(SB), X4 - VMOVDQU ·AVX_iv1<>(SB), X5 - VMOVDQU ·AVX_iv2<>(SB), X6 - + VMOVDQU ·AVX_iv0<>+0(SB), X4 + VMOVDQU ·AVX_iv1<>+0(SB), X5 + VMOVDQU ·AVX_iv2<>+0(SB), X6 VPXOR X15, X6, X6 - VMOVDQA 0(R10), X7 - - LOAD_MSG_AVX_0_2_4_6_1_3_5_7() + VMOVDQA (R10), X7 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x26 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x20 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x08 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x28 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x38 + BYTE $0x01 VMOVDQA X12, 16(R10) VMOVDQA X13, 32(R10) VMOVDQA X14, 48(R10) VMOVDQA X15, 64(R10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX(8, 10, 12, 14, 9, 11, 13, 15) + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x40 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x78 + BYTE $0x01 VMOVDQA X12, 80(R10) VMOVDQA X13, 96(R10) VMOVDQA X14, 112(R10) VMOVDQA X15, 128(R10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(14, 4, 9, 13, 10, 8, 15, 6) + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x78 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x68 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x30 + BYTE $0x01 VMOVDQA X12, 144(R10) VMOVDQA X13, 160(R10) VMOVDQA X14, 176(R10) VMOVDQA X15, 192(R10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_1_0_11_5_12_2_7_3() + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + VPSHUFD $0x4e, (SI), X12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x58 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x38 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x18 + BYTE $0x01 VMOVDQA X12, 208(R10) VMOVDQA X13, 224(R10) VMOVDQA X14, 240(R10) VMOVDQA X15, 256(R10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX_11_12_5_15_8_0_2_13() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX(10, 3, 7, 9, 14, 6, 1, 4) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(7, 3, 13, 11, 9, 1, 12, 14) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_2_5_4_15_6_10_0_8() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX_9_5_2_10_0_7_4_15() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX(14, 11, 6, 3, 1, 12, 8, 13) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX_2_6_0_8_12_10_11_3() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX(4, 7, 15, 1, 13, 5, 14, 9) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(12, 1, 14, 4, 5, 15, 13, 10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_0_6_9_8_7_3_2_11() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(13, 7, 12, 3, 11, 14, 1, 9) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_5_15_8_2_0_4_6_10() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX_6_14_11_0_15_9_3_8() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_12_13_1_10_2_7_4_5() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(10, 8, 7, 1, 2, 4, 6, 5) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_15_9_3_13_11_14_12_0() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X15, X8, X9) - SHUFFLE_AVX() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X15, X8, X9) - SHUFFLE_AVX_INV() - - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X15, X8, X9) - SHUFFLE_AVX() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X15, X8, X9) - SHUFFLE_AVX_INV() - + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + VMOVDQU 88(SI), X12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x28 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x40 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x10 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x36 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x68 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x08 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x20 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x70 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x20 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x30 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x3e + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x40 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x36 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x20 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x78 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x30 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x08 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x40 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x60 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x68 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x2e + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x58 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x18 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x20 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x78 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x70 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x48 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x28 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x50 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + MOVQ (SI), X12 + VPSHUFD $0x4e, 64(SI), X13 + MOVQ 56(SI), X14 + MOVQ 16(SI), X15 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x58 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x58 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x08 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x48 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + MOVQ 40(SI), X12 + MOVQ 64(SI), X13 + MOVQ (SI), X14 + MOVQ 48(SI), X15 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x50 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + MOVQ 48(SI), X12 + MOVQ 88(SI), X13 + MOVQ 120(SI), X14 + MOVQ 24(SI), X15 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x2e + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x40 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + VMOVDQU 96(SI), X12 + MOVQ 8(SI), X13 + MOVQ 16(SI), X14 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x38 + BYTE $0x01 + VMOVDQU 32(SI), X15 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x30 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x28 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + MOVQ 120(SI), X12 + MOVQ 24(SI), X13 + MOVQ 88(SI), X14 + MOVQ 96(SI), X15 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x68 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x3e + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + VPADDQ 16(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 32(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ 48(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 64(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + VPADDQ 80(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 96(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ 112(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 128(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + VPADDQ 144(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 160(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ 176(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 192(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + VPADDQ 208(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 224(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ 240(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 256(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff VMOVDQU 32(AX), X14 VMOVDQU 48(AX), X15 VPXOR X0, X10, X10 @@ -729,16 +4524,36 @@ noinc: VPXOR X7, X15, X3 VMOVDQU X2, 32(AX) VMOVDQU X3, 48(AX) + LEAQ 128(SI), SI + SUBQ $0x80, DI + JNE loop + VMOVDQU X10, (AX) + VMOVDQU X11, 16(AX) + MOVQ R8, (BX) + MOVQ R9, 8(BX) + VZEROUPPER + RET - LEAQ 128(SI), SI - SUBQ $128, DI - JNE loop +DATA ·AVX_c40<>+0(SB)/8, $0x0201000706050403 +DATA ·AVX_c40<>+8(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·AVX_c40<>(SB), RODATA|NOPTR, $16 - VMOVDQU X10, 0(AX) - VMOVDQU X11, 16(AX) +DATA ·AVX_c48<>+0(SB)/8, $0x0100070605040302 +DATA ·AVX_c48<>+8(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·AVX_c48<>(SB), RODATA|NOPTR, $16 - MOVQ R8, 0(BX) - MOVQ R9, 8(BX) - VZEROUPPER +DATA ·AVX_iv3<>+0(SB)/8, $0x1f83d9abfb41bd6b +DATA ·AVX_iv3<>+8(SB)/8, $0x5be0cd19137e2179 +GLOBL ·AVX_iv3<>(SB), RODATA|NOPTR, $16 - RET +DATA ·AVX_iv0<>+0(SB)/8, $0x6a09e667f3bcc908 +DATA ·AVX_iv0<>+8(SB)/8, $0xbb67ae8584caa73b +GLOBL ·AVX_iv0<>(SB), RODATA|NOPTR, $16 + +DATA ·AVX_iv1<>+0(SB)/8, $0x3c6ef372fe94f82b +DATA ·AVX_iv1<>+8(SB)/8, $0xa54ff53a5f1d36f1 +GLOBL ·AVX_iv1<>(SB), RODATA|NOPTR, $16 + +DATA ·AVX_iv2<>+0(SB)/8, $0x510e527fade682d1 +DATA ·AVX_iv2<>+8(SB)/8, $0x9b05688c2b3e6c1f +GLOBL ·AVX_iv2<>(SB), RODATA|NOPTR, $16 diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s index adfac00c..9a0ce212 100644 --- a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s +++ b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s @@ -1,278 +1,1441 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Code generated by command: go run blake2b_amd64_asm.go -out ../../blake2b_amd64.s -pkg blake2b. DO NOT EDIT. //go:build amd64 && gc && !purego #include "textflag.h" -DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 -DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b -GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16 - -DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b -DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1 -GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16 - -DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1 -DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f -GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16 - -DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b -DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179 -GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16 - -DATA ·c40<>+0x00(SB)/8, $0x0201000706050403 -DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b -GLOBL ·c40<>(SB), (NOPTR+RODATA), $16 - -DATA ·c48<>+0x00(SB)/8, $0x0100070605040302 -DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a -GLOBL ·c48<>(SB), (NOPTR+RODATA), $16 - -#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \ - MOVO v4, t1; \ - MOVO v5, v4; \ - MOVO t1, v5; \ - MOVO v6, t1; \ - PUNPCKLQDQ v6, t2; \ - PUNPCKHQDQ v7, v6; \ - PUNPCKHQDQ t2, v6; \ - PUNPCKLQDQ v7, t2; \ - MOVO t1, v7; \ - MOVO v2, t1; \ - PUNPCKHQDQ t2, v7; \ - PUNPCKLQDQ v3, t2; \ - PUNPCKHQDQ t2, v2; \ - PUNPCKLQDQ t1, t2; \ - PUNPCKHQDQ t2, v3 - -#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \ - MOVO v4, t1; \ - MOVO v5, v4; \ - MOVO t1, v5; \ - MOVO v2, t1; \ - PUNPCKLQDQ v2, t2; \ - PUNPCKHQDQ v3, v2; \ - PUNPCKHQDQ t2, v2; \ - PUNPCKLQDQ v3, t2; \ - MOVO t1, v3; \ - MOVO v6, t1; \ - PUNPCKHQDQ t2, v3; \ - PUNPCKLQDQ v7, t2; \ - PUNPCKHQDQ t2, v6; \ - PUNPCKLQDQ t1, t2; \ - PUNPCKHQDQ t2, v7 - -#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \ - PADDQ m0, v0; \ - PADDQ m1, v1; \ - PADDQ v2, v0; \ - PADDQ v3, v1; \ - PXOR v0, v6; \ - PXOR v1, v7; \ - PSHUFD $0xB1, v6, v6; \ - PSHUFD $0xB1, v7, v7; \ - PADDQ v6, v4; \ - PADDQ v7, v5; \ - PXOR v4, v2; \ - PXOR v5, v3; \ - PSHUFB c40, v2; \ - PSHUFB c40, v3; \ - PADDQ m2, v0; \ - PADDQ m3, v1; \ - PADDQ v2, v0; \ - PADDQ v3, v1; \ - PXOR v0, v6; \ - PXOR v1, v7; \ - PSHUFB c48, v6; \ - PSHUFB c48, v7; \ - PADDQ v6, v4; \ - PADDQ v7, v5; \ - PXOR v4, v2; \ - PXOR v5, v3; \ - MOVOU v2, t0; \ - PADDQ v2, t0; \ - PSRLQ $63, v2; \ - PXOR t0, v2; \ - MOVOU v3, t0; \ - PADDQ v3, t0; \ - PSRLQ $63, v3; \ - PXOR t0, v3 - -#define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \ - MOVQ i0*8(src), m0; \ - PINSRQ $1, i1*8(src), m0; \ - MOVQ i2*8(src), m1; \ - PINSRQ $1, i3*8(src), m1; \ - MOVQ i4*8(src), m2; \ - PINSRQ $1, i5*8(src), m2; \ - MOVQ i6*8(src), m3; \ - PINSRQ $1, i7*8(src), m3 - // func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) -TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment - MOVQ h+0(FP), AX - MOVQ c+8(FP), BX - MOVQ flag+16(FP), CX - MOVQ blocks_base+24(FP), SI - MOVQ blocks_len+32(FP), DI - - MOVQ SP, R10 - ADDQ $15, R10 - ANDQ $~15, R10 - - MOVOU ·iv3<>(SB), X0 - MOVO X0, 0(R10) - XORQ CX, 0(R10) // 0(R10) = ·iv3 ^ (CX || 0) - - MOVOU ·c40<>(SB), X13 - MOVOU ·c48<>(SB), X14 - - MOVOU 0(AX), X12 +// Requires: SSE2, SSE4.1, SSSE3 +TEXT ·hashBlocksSSE4(SB), NOSPLIT, $288-48 + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVQ flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DI + MOVQ SP, R10 + ADDQ $0x0f, R10 + ANDQ $-16, R10 + MOVOU ·iv3<>+0(SB), X0 + MOVO X0, (R10) + XORQ CX, (R10) + MOVOU ·c40<>+0(SB), X13 + MOVOU ·c48<>+0(SB), X14 + MOVOU (AX), X12 MOVOU 16(AX), X15 - - MOVQ 0(BX), R8 - MOVQ 8(BX), R9 + MOVQ (BX), R8 + MOVQ 8(BX), R9 loop: - ADDQ $128, R8 - CMPQ R8, $128 + ADDQ $0x80, R8 + CMPQ R8, $0x80 JGE noinc INCQ R9 noinc: - MOVQ R8, X8 - PINSRQ $1, R9, X8 - - MOVO X12, X0 - MOVO X15, X1 - MOVOU 32(AX), X2 - MOVOU 48(AX), X3 - MOVOU ·iv0<>(SB), X4 - MOVOU ·iv1<>(SB), X5 - MOVOU ·iv2<>(SB), X6 - - PXOR X8, X6 - MOVO 0(R10), X7 - - LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7) - MOVO X8, 16(R10) - MOVO X9, 32(R10) - MOVO X10, 48(R10) - MOVO X11, 64(R10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15) - MOVO X8, 80(R10) - MOVO X9, 96(R10) - MOVO X10, 112(R10) - MOVO X11, 128(R10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6) - MOVO X8, 144(R10) - MOVO X9, 160(R10) - MOVO X10, 176(R10) - MOVO X11, 192(R10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3) - MOVO X8, 208(R10) - MOVO X9, 224(R10) - MOVO X10, 240(R10) - MOVO X11, 256(R10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + MOVQ R8, X8 + PINSRQ $0x01, R9, X8 + MOVO X12, X0 + MOVO X15, X1 + MOVOU 32(AX), X2 + MOVOU 48(AX), X3 + MOVOU ·iv0<>+0(SB), X4 + MOVOU ·iv1<>+0(SB), X5 + MOVOU ·iv2<>+0(SB), X6 + PXOR X8, X6 + MOVO (R10), X7 + MOVQ (SI), X8 + PINSRQ $0x01, 16(SI), X8 + MOVQ 32(SI), X9 + PINSRQ $0x01, 48(SI), X9 + MOVQ 8(SI), X10 + PINSRQ $0x01, 24(SI), X10 + MOVQ 40(SI), X11 + PINSRQ $0x01, 56(SI), X11 + MOVO X8, 16(R10) + MOVO X9, 32(R10) + MOVO X10, 48(R10) + MOVO X11, 64(R10) + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 64(SI), X8 + PINSRQ $0x01, 80(SI), X8 + MOVQ 96(SI), X9 + PINSRQ $0x01, 112(SI), X9 + MOVQ 72(SI), X10 + PINSRQ $0x01, 88(SI), X10 + MOVQ 104(SI), X11 + PINSRQ $0x01, 120(SI), X11 + MOVO X8, 80(R10) + MOVO X9, 96(R10) + MOVO X10, 112(R10) + MOVO X11, 128(R10) + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 112(SI), X8 + PINSRQ $0x01, 32(SI), X8 + MOVQ 72(SI), X9 + PINSRQ $0x01, 104(SI), X9 + MOVQ 80(SI), X10 + PINSRQ $0x01, 64(SI), X10 + MOVQ 120(SI), X11 + PINSRQ $0x01, 48(SI), X11 + MOVO X8, 144(R10) + MOVO X9, 160(R10) + MOVO X10, 176(R10) + MOVO X11, 192(R10) + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 8(SI), X8 + PINSRQ $0x01, (SI), X8 + MOVQ 88(SI), X9 + PINSRQ $0x01, 40(SI), X9 + MOVQ 96(SI), X10 + PINSRQ $0x01, 16(SI), X10 + MOVQ 56(SI), X11 + PINSRQ $0x01, 24(SI), X11 + MOVO X8, 208(R10) + MOVO X9, 224(R10) + MOVO X10, 240(R10) + MOVO X11, 256(R10) + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 88(SI), X8 + PINSRQ $0x01, 96(SI), X8 + MOVQ 40(SI), X9 + PINSRQ $0x01, 120(SI), X9 + MOVQ 64(SI), X10 + PINSRQ $0x01, (SI), X10 + MOVQ 16(SI), X11 + PINSRQ $0x01, 104(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 80(SI), X8 + PINSRQ $0x01, 24(SI), X8 + MOVQ 56(SI), X9 + PINSRQ $0x01, 72(SI), X9 + MOVQ 112(SI), X10 + PINSRQ $0x01, 48(SI), X10 + MOVQ 8(SI), X11 + PINSRQ $0x01, 32(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 56(SI), X8 + PINSRQ $0x01, 24(SI), X8 + MOVQ 104(SI), X9 + PINSRQ $0x01, 88(SI), X9 + MOVQ 72(SI), X10 + PINSRQ $0x01, 8(SI), X10 + MOVQ 96(SI), X11 + PINSRQ $0x01, 112(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 16(SI), X8 + PINSRQ $0x01, 40(SI), X8 + MOVQ 32(SI), X9 + PINSRQ $0x01, 120(SI), X9 + MOVQ 48(SI), X10 + PINSRQ $0x01, 80(SI), X10 + MOVQ (SI), X11 + PINSRQ $0x01, 64(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 72(SI), X8 + PINSRQ $0x01, 40(SI), X8 + MOVQ 16(SI), X9 + PINSRQ $0x01, 80(SI), X9 + MOVQ (SI), X10 + PINSRQ $0x01, 56(SI), X10 + MOVQ 32(SI), X11 + PINSRQ $0x01, 120(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 112(SI), X8 + PINSRQ $0x01, 88(SI), X8 + MOVQ 48(SI), X9 + PINSRQ $0x01, 24(SI), X9 + MOVQ 8(SI), X10 + PINSRQ $0x01, 96(SI), X10 + MOVQ 64(SI), X11 + PINSRQ $0x01, 104(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 16(SI), X8 + PINSRQ $0x01, 48(SI), X8 + MOVQ (SI), X9 + PINSRQ $0x01, 64(SI), X9 + MOVQ 96(SI), X10 + PINSRQ $0x01, 80(SI), X10 + MOVQ 88(SI), X11 + PINSRQ $0x01, 24(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 32(SI), X8 + PINSRQ $0x01, 56(SI), X8 + MOVQ 120(SI), X9 + PINSRQ $0x01, 8(SI), X9 + MOVQ 104(SI), X10 + PINSRQ $0x01, 40(SI), X10 + MOVQ 112(SI), X11 + PINSRQ $0x01, 72(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 96(SI), X8 + PINSRQ $0x01, 8(SI), X8 + MOVQ 112(SI), X9 + PINSRQ $0x01, 32(SI), X9 + MOVQ 40(SI), X10 + PINSRQ $0x01, 120(SI), X10 + MOVQ 104(SI), X11 + PINSRQ $0x01, 80(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ (SI), X8 + PINSRQ $0x01, 48(SI), X8 + MOVQ 72(SI), X9 + PINSRQ $0x01, 64(SI), X9 + MOVQ 56(SI), X10 + PINSRQ $0x01, 24(SI), X10 + MOVQ 16(SI), X11 + PINSRQ $0x01, 88(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 104(SI), X8 + PINSRQ $0x01, 56(SI), X8 + MOVQ 96(SI), X9 + PINSRQ $0x01, 24(SI), X9 + MOVQ 88(SI), X10 + PINSRQ $0x01, 112(SI), X10 + MOVQ 8(SI), X11 + PINSRQ $0x01, 72(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 40(SI), X8 + PINSRQ $0x01, 120(SI), X8 + MOVQ 64(SI), X9 + PINSRQ $0x01, 16(SI), X9 + MOVQ (SI), X10 + PINSRQ $0x01, 32(SI), X10 + MOVQ 48(SI), X11 + PINSRQ $0x01, 80(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 48(SI), X8 + PINSRQ $0x01, 112(SI), X8 + MOVQ 88(SI), X9 + PINSRQ $0x01, (SI), X9 + MOVQ 120(SI), X10 + PINSRQ $0x01, 72(SI), X10 + MOVQ 24(SI), X11 + PINSRQ $0x01, 64(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 96(SI), X8 + PINSRQ $0x01, 104(SI), X8 + MOVQ 8(SI), X9 + PINSRQ $0x01, 80(SI), X9 + MOVQ 16(SI), X10 + PINSRQ $0x01, 56(SI), X10 + MOVQ 32(SI), X11 + PINSRQ $0x01, 40(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 80(SI), X8 + PINSRQ $0x01, 64(SI), X8 + MOVQ 56(SI), X9 + PINSRQ $0x01, 8(SI), X9 + MOVQ 16(SI), X10 + PINSRQ $0x01, 32(SI), X10 + MOVQ 48(SI), X11 + PINSRQ $0x01, 40(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 120(SI), X8 + PINSRQ $0x01, 72(SI), X8 + MOVQ 24(SI), X9 + PINSRQ $0x01, 104(SI), X9 + MOVQ 88(SI), X10 + PINSRQ $0x01, 112(SI), X10 + MOVQ 96(SI), X11 + PINSRQ $0x01, (SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + PADDQ 16(R10), X0 + PADDQ 32(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ 48(R10), X0 + PADDQ 64(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + PADDQ 80(R10), X0 + PADDQ 96(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ 112(R10), X0 + PADDQ 128(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + PADDQ 144(R10), X0 + PADDQ 160(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ 176(R10), X0 + PADDQ 192(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + PADDQ 208(R10), X0 + PADDQ 224(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ 240(R10), X0 + PADDQ 256(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU 32(AX), X10 + MOVOU 48(AX), X11 + PXOR X0, X12 + PXOR X1, X15 + PXOR X2, X10 + PXOR X3, X11 + PXOR X4, X12 + PXOR X5, X15 + PXOR X6, X10 + PXOR X7, X11 + MOVOU X10, 32(AX) + MOVOU X11, 48(AX) + LEAQ 128(SI), SI + SUBQ $0x80, DI + JNE loop + MOVOU X12, (AX) + MOVOU X15, 16(AX) + MOVQ R8, (BX) + MOVQ R9, 8(BX) + RET - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) +DATA ·iv3<>+0(SB)/8, $0x1f83d9abfb41bd6b +DATA ·iv3<>+8(SB)/8, $0x5be0cd19137e2179 +GLOBL ·iv3<>(SB), RODATA|NOPTR, $16 - MOVOU 32(AX), X10 - MOVOU 48(AX), X11 - PXOR X0, X12 - PXOR X1, X15 - PXOR X2, X10 - PXOR X3, X11 - PXOR X4, X12 - PXOR X5, X15 - PXOR X6, X10 - PXOR X7, X11 - MOVOU X10, 32(AX) - MOVOU X11, 48(AX) +DATA ·c40<>+0(SB)/8, $0x0201000706050403 +DATA ·c40<>+8(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·c40<>(SB), RODATA|NOPTR, $16 - LEAQ 128(SI), SI - SUBQ $128, DI - JNE loop +DATA ·c48<>+0(SB)/8, $0x0100070605040302 +DATA ·c48<>+8(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·c48<>(SB), RODATA|NOPTR, $16 - MOVOU X12, 0(AX) - MOVOU X15, 16(AX) +DATA ·iv0<>+0(SB)/8, $0x6a09e667f3bcc908 +DATA ·iv0<>+8(SB)/8, $0xbb67ae8584caa73b +GLOBL ·iv0<>(SB), RODATA|NOPTR, $16 - MOVQ R8, 0(BX) - MOVQ R9, 8(BX) +DATA ·iv1<>+0(SB)/8, $0x3c6ef372fe94f82b +DATA ·iv1<>+8(SB)/8, $0xa54ff53a5f1d36f1 +GLOBL ·iv1<>(SB), RODATA|NOPTR, $16 - RET +DATA ·iv2<>+0(SB)/8, $0x510e527fade682d1 +DATA ·iv2<>+8(SB)/8, $0x9b05688c2b3e6c1f +GLOBL ·iv2<>(SB), RODATA|NOPTR, $16 diff --git a/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.s b/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.s index 731d2ac6..fd5ee845 100644 --- a/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.s +++ b/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.s @@ -1,2715 +1,9762 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This file was originally from https://golang.org/cl/24717 by Vlad Krasnov of CloudFlare. +// Code generated by command: go run chacha20poly1305_amd64_asm.go -out ../chacha20poly1305_amd64.s -pkg chacha20poly1305. DO NOT EDIT. //go:build gc && !purego #include "textflag.h" -// General register allocation -#define oup DI -#define inp SI -#define inl BX -#define adp CX // free to reuse, after we hash the additional data -#define keyp R8 // free to reuse, when we copy the key to stack -#define itr2 R9 // general iterator -#define itr1 CX // general iterator -#define acc0 R10 -#define acc1 R11 -#define acc2 R12 -#define t0 R13 -#define t1 R14 -#define t2 R15 -#define t3 R8 -// Register and stack allocation for the SSE code -#define rStore (0*16)(BP) -#define sStore (1*16)(BP) -#define state1Store (2*16)(BP) -#define state2Store (3*16)(BP) -#define tmpStore (4*16)(BP) -#define ctr0Store (5*16)(BP) -#define ctr1Store (6*16)(BP) -#define ctr2Store (7*16)(BP) -#define ctr3Store (8*16)(BP) -#define A0 X0 -#define A1 X1 -#define A2 X2 -#define B0 X3 -#define B1 X4 -#define B2 X5 -#define C0 X6 -#define C1 X7 -#define C2 X8 -#define D0 X9 -#define D1 X10 -#define D2 X11 -#define T0 X12 -#define T1 X13 -#define T2 X14 -#define T3 X15 -#define A3 T0 -#define B3 T1 -#define C3 T2 -#define D3 T3 -// Register and stack allocation for the AVX2 code -#define rsStoreAVX2 (0*32)(BP) -#define state1StoreAVX2 (1*32)(BP) -#define state2StoreAVX2 (2*32)(BP) -#define ctr0StoreAVX2 (3*32)(BP) -#define ctr1StoreAVX2 (4*32)(BP) -#define ctr2StoreAVX2 (5*32)(BP) -#define ctr3StoreAVX2 (6*32)(BP) -#define tmpStoreAVX2 (7*32)(BP) // 256 bytes on stack -#define AA0 Y0 -#define AA1 Y5 -#define AA2 Y6 -#define AA3 Y7 -#define BB0 Y14 -#define BB1 Y9 -#define BB2 Y10 -#define BB3 Y11 -#define CC0 Y12 -#define CC1 Y13 -#define CC2 Y8 -#define CC3 Y15 -#define DD0 Y4 -#define DD1 Y1 -#define DD2 Y2 -#define DD3 Y3 -#define TT0 DD3 -#define TT1 AA3 -#define TT2 BB3 -#define TT3 CC3 -// ChaCha20 constants -DATA ·chacha20Constants<>+0x00(SB)/4, $0x61707865 -DATA ·chacha20Constants<>+0x04(SB)/4, $0x3320646e -DATA ·chacha20Constants<>+0x08(SB)/4, $0x79622d32 -DATA ·chacha20Constants<>+0x0c(SB)/4, $0x6b206574 -DATA ·chacha20Constants<>+0x10(SB)/4, $0x61707865 -DATA ·chacha20Constants<>+0x14(SB)/4, $0x3320646e -DATA ·chacha20Constants<>+0x18(SB)/4, $0x79622d32 -DATA ·chacha20Constants<>+0x1c(SB)/4, $0x6b206574 -// <<< 16 with PSHUFB -DATA ·rol16<>+0x00(SB)/8, $0x0504070601000302 -DATA ·rol16<>+0x08(SB)/8, $0x0D0C0F0E09080B0A -DATA ·rol16<>+0x10(SB)/8, $0x0504070601000302 -DATA ·rol16<>+0x18(SB)/8, $0x0D0C0F0E09080B0A -// <<< 8 with PSHUFB -DATA ·rol8<>+0x00(SB)/8, $0x0605040702010003 -DATA ·rol8<>+0x08(SB)/8, $0x0E0D0C0F0A09080B -DATA ·rol8<>+0x10(SB)/8, $0x0605040702010003 -DATA ·rol8<>+0x18(SB)/8, $0x0E0D0C0F0A09080B - -DATA ·avx2InitMask<>+0x00(SB)/8, $0x0 -DATA ·avx2InitMask<>+0x08(SB)/8, $0x0 -DATA ·avx2InitMask<>+0x10(SB)/8, $0x1 -DATA ·avx2InitMask<>+0x18(SB)/8, $0x0 - -DATA ·avx2IncMask<>+0x00(SB)/8, $0x2 -DATA ·avx2IncMask<>+0x08(SB)/8, $0x0 -DATA ·avx2IncMask<>+0x10(SB)/8, $0x2 -DATA ·avx2IncMask<>+0x18(SB)/8, $0x0 -// Poly1305 key clamp -DATA ·polyClampMask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF -DATA ·polyClampMask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC -DATA ·polyClampMask<>+0x10(SB)/8, $0xFFFFFFFFFFFFFFFF -DATA ·polyClampMask<>+0x18(SB)/8, $0xFFFFFFFFFFFFFFFF - -DATA ·sseIncMask<>+0x00(SB)/8, $0x1 -DATA ·sseIncMask<>+0x08(SB)/8, $0x0 -// To load/store the last < 16 bytes in a buffer -DATA ·andMask<>+0x00(SB)/8, $0x00000000000000ff -DATA ·andMask<>+0x08(SB)/8, $0x0000000000000000 -DATA ·andMask<>+0x10(SB)/8, $0x000000000000ffff -DATA ·andMask<>+0x18(SB)/8, $0x0000000000000000 -DATA ·andMask<>+0x20(SB)/8, $0x0000000000ffffff -DATA ·andMask<>+0x28(SB)/8, $0x0000000000000000 -DATA ·andMask<>+0x30(SB)/8, $0x00000000ffffffff -DATA ·andMask<>+0x38(SB)/8, $0x0000000000000000 -DATA ·andMask<>+0x40(SB)/8, $0x000000ffffffffff -DATA ·andMask<>+0x48(SB)/8, $0x0000000000000000 -DATA ·andMask<>+0x50(SB)/8, $0x0000ffffffffffff -DATA ·andMask<>+0x58(SB)/8, $0x0000000000000000 -DATA ·andMask<>+0x60(SB)/8, $0x00ffffffffffffff -DATA ·andMask<>+0x68(SB)/8, $0x0000000000000000 -DATA ·andMask<>+0x70(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+0x78(SB)/8, $0x0000000000000000 -DATA ·andMask<>+0x80(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+0x88(SB)/8, $0x00000000000000ff -DATA ·andMask<>+0x90(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+0x98(SB)/8, $0x000000000000ffff -DATA ·andMask<>+0xa0(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+0xa8(SB)/8, $0x0000000000ffffff -DATA ·andMask<>+0xb0(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+0xb8(SB)/8, $0x00000000ffffffff -DATA ·andMask<>+0xc0(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+0xc8(SB)/8, $0x000000ffffffffff -DATA ·andMask<>+0xd0(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+0xd8(SB)/8, $0x0000ffffffffffff -DATA ·andMask<>+0xe0(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+0xe8(SB)/8, $0x00ffffffffffffff - -GLOBL ·chacha20Constants<>(SB), (NOPTR+RODATA), $32 -GLOBL ·rol16<>(SB), (NOPTR+RODATA), $32 -GLOBL ·rol8<>(SB), (NOPTR+RODATA), $32 -GLOBL ·sseIncMask<>(SB), (NOPTR+RODATA), $16 -GLOBL ·avx2IncMask<>(SB), (NOPTR+RODATA), $32 -GLOBL ·avx2InitMask<>(SB), (NOPTR+RODATA), $32 -GLOBL ·polyClampMask<>(SB), (NOPTR+RODATA), $32 -GLOBL ·andMask<>(SB), (NOPTR+RODATA), $240 -// No PALIGNR in Go ASM yet (but VPALIGNR is present). -#define shiftB0Left BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xdb; BYTE $0x04 // PALIGNR $4, X3, X3 -#define shiftB1Left BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xe4; BYTE $0x04 // PALIGNR $4, X4, X4 -#define shiftB2Left BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xed; BYTE $0x04 // PALIGNR $4, X5, X5 -#define shiftB3Left BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xed; BYTE $0x04 // PALIGNR $4, X13, X13 -#define shiftC0Left BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xf6; BYTE $0x08 // PALIGNR $8, X6, X6 -#define shiftC1Left BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xff; BYTE $0x08 // PALIGNR $8, X7, X7 -#define shiftC2Left BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xc0; BYTE $0x08 // PALIGNR $8, X8, X8 -#define shiftC3Left BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xf6; BYTE $0x08 // PALIGNR $8, X14, X14 -#define shiftD0Left BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xc9; BYTE $0x0c // PALIGNR $12, X9, X9 -#define shiftD1Left BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xd2; BYTE $0x0c // PALIGNR $12, X10, X10 -#define shiftD2Left BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xdb; BYTE $0x0c // PALIGNR $12, X11, X11 -#define shiftD3Left BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xff; BYTE $0x0c // PALIGNR $12, X15, X15 -#define shiftB0Right BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xdb; BYTE $0x0c // PALIGNR $12, X3, X3 -#define shiftB1Right BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xe4; BYTE $0x0c // PALIGNR $12, X4, X4 -#define shiftB2Right BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xed; BYTE $0x0c // PALIGNR $12, X5, X5 -#define shiftB3Right BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xed; BYTE $0x0c // PALIGNR $12, X13, X13 -#define shiftC0Right shiftC0Left -#define shiftC1Right shiftC1Left -#define shiftC2Right shiftC2Left -#define shiftC3Right shiftC3Left -#define shiftD0Right BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xc9; BYTE $0x04 // PALIGNR $4, X9, X9 -#define shiftD1Right BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xd2; BYTE $0x04 // PALIGNR $4, X10, X10 -#define shiftD2Right BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xdb; BYTE $0x04 // PALIGNR $4, X11, X11 -#define shiftD3Right BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xff; BYTE $0x04 // PALIGNR $4, X15, X15 - -// Some macros - -// ROL rotates the uint32s in register R left by N bits, using temporary T. -#define ROL(N, R, T) \ - MOVO R, T; PSLLL $(N), T; PSRLL $(32-(N)), R; PXOR T, R - -// ROL16 rotates the uint32s in register R left by 16, using temporary T if needed. -#ifdef GOAMD64_v2 -#define ROL16(R, T) PSHUFB ·rol16<>(SB), R -#else -#define ROL16(R, T) ROL(16, R, T) -#endif - -// ROL8 rotates the uint32s in register R left by 8, using temporary T if needed. -#ifdef GOAMD64_v2 -#define ROL8(R, T) PSHUFB ·rol8<>(SB), R -#else -#define ROL8(R, T) ROL(8, R, T) -#endif - -#define chachaQR(A, B, C, D, T) \ - PADDD B, A; PXOR A, D; ROL16(D, T) \ - PADDD D, C; PXOR C, B; MOVO B, T; PSLLL $12, T; PSRLL $20, B; PXOR T, B \ - PADDD B, A; PXOR A, D; ROL8(D, T) \ - PADDD D, C; PXOR C, B; MOVO B, T; PSLLL $7, T; PSRLL $25, B; PXOR T, B - -#define chachaQR_AVX2(A, B, C, D, T) \ - VPADDD B, A, A; VPXOR A, D, D; VPSHUFB ·rol16<>(SB), D, D \ - VPADDD D, C, C; VPXOR C, B, B; VPSLLD $12, B, T; VPSRLD $20, B, B; VPXOR T, B, B \ - VPADDD B, A, A; VPXOR A, D, D; VPSHUFB ·rol8<>(SB), D, D \ - VPADDD D, C, C; VPXOR C, B, B; VPSLLD $7, B, T; VPSRLD $25, B, B; VPXOR T, B, B - -#define polyAdd(S) ADDQ S, acc0; ADCQ 8+S, acc1; ADCQ $1, acc2 -#define polyMulStage1 MOVQ (0*8)(BP), AX; MOVQ AX, t2; MULQ acc0; MOVQ AX, t0; MOVQ DX, t1; MOVQ (0*8)(BP), AX; MULQ acc1; IMULQ acc2, t2; ADDQ AX, t1; ADCQ DX, t2 -#define polyMulStage2 MOVQ (1*8)(BP), AX; MOVQ AX, t3; MULQ acc0; ADDQ AX, t1; ADCQ $0, DX; MOVQ DX, acc0; MOVQ (1*8)(BP), AX; MULQ acc1; ADDQ AX, t2; ADCQ $0, DX -#define polyMulStage3 IMULQ acc2, t3; ADDQ acc0, t2; ADCQ DX, t3 -#define polyMulReduceStage MOVQ t0, acc0; MOVQ t1, acc1; MOVQ t2, acc2; ANDQ $3, acc2; MOVQ t2, t0; ANDQ $-4, t0; MOVQ t3, t1; SHRQ $2, t3, t2; SHRQ $2, t3; ADDQ t0, acc0; ADCQ t1, acc1; ADCQ $0, acc2; ADDQ t2, acc0; ADCQ t3, acc1; ADCQ $0, acc2 - -#define polyMulStage1_AVX2 MOVQ (0*8)(BP), DX; MOVQ DX, t2; MULXQ acc0, t0, t1; IMULQ acc2, t2; MULXQ acc1, AX, DX; ADDQ AX, t1; ADCQ DX, t2 -#define polyMulStage2_AVX2 MOVQ (1*8)(BP), DX; MULXQ acc0, acc0, AX; ADDQ acc0, t1; MULXQ acc1, acc1, t3; ADCQ acc1, t2; ADCQ $0, t3 -#define polyMulStage3_AVX2 IMULQ acc2, DX; ADDQ AX, t2; ADCQ DX, t3 - -#define polyMul polyMulStage1; polyMulStage2; polyMulStage3; polyMulReduceStage -#define polyMulAVX2 polyMulStage1_AVX2; polyMulStage2_AVX2; polyMulStage3_AVX2; polyMulReduceStage -// ---------------------------------------------------------------------------- + +// func polyHashADInternal<>() TEXT polyHashADInternal<>(SB), NOSPLIT, $0 - // adp points to beginning of additional data - // itr2 holds ad length - XORQ acc0, acc0 - XORQ acc1, acc1 - XORQ acc2, acc2 - CMPQ itr2, $13 - JNE hashADLoop - -openFastTLSAD: - // Special treatment for the TLS case of 13 bytes - MOVQ (adp), acc0 - MOVQ 5(adp), acc1 - SHRQ $24, acc1 - MOVQ $1, acc2 - polyMul + // Hack: Must declare #define macros inside of a function due to Avo constraints + // ROL rotates the uint32s in register R left by N bits, using temporary T. + #define ROL(N, R, T) \ + MOVO R, T; \ + PSLLL $(N), T; \ + PSRLL $(32-(N)), R; \ + PXOR T, R + + // ROL8 rotates the uint32s in register R left by 8, using temporary T if needed. + #ifdef GOAMD64_v2 + #define ROL8(R, T) PSHUFB ·rol8<>(SB), R + #else + #define ROL8(R, T) ROL(8, R, T) + #endif + + // ROL16 rotates the uint32s in register R left by 16, using temporary T if needed. + #ifdef GOAMD64_v2 + #define ROL16(R, T) PSHUFB ·rol16<>(SB), R + #else + #define ROL16(R, T) ROL(16, R, T) + #endif + XORQ R10, R10 + XORQ R11, R11 + XORQ R12, R12 + CMPQ R9, $0x0d + JNE hashADLoop + MOVQ (CX), R10 + MOVQ 5(CX), R11 + SHRQ $0x18, R11 + MOVQ $0x00000001, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 RET hashADLoop: // Hash in 16 byte chunks - CMPQ itr2, $16 - JB hashADTail - polyAdd(0(adp)) - LEAQ (1*16)(adp), adp - SUBQ $16, itr2 - polyMul - JMP hashADLoop + CMPQ R9, $0x10 + JB hashADTail + ADDQ (CX), R10 + ADCQ 8(CX), R11 + ADCQ $0x01, R12 + LEAQ 16(CX), CX + SUBQ $0x10, R9 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + JMP hashADLoop hashADTail: - CMPQ itr2, $0 + CMPQ R9, $0x00 JE hashADDone // Hash last < 16 byte tail - XORQ t0, t0 - XORQ t1, t1 - XORQ t2, t2 - ADDQ itr2, adp + XORQ R13, R13 + XORQ R14, R14 + XORQ R15, R15 + ADDQ R9, CX hashADTailLoop: - SHLQ $8, t0, t1 - SHLQ $8, t0 - MOVB -1(adp), t2 - XORQ t2, t0 - DECQ adp - DECQ itr2 - JNE hashADTailLoop - -hashADTailFinish: - ADDQ t0, acc0; ADCQ t1, acc1; ADCQ $1, acc2 - polyMul - - // Finished AD + SHLQ $0x08, R13, R14 + SHLQ $0x08, R13 + MOVB -1(CX), R15 + XORQ R15, R13 + DECQ CX + DECQ R9 + JNE hashADTailLoop + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + hashADDone: RET -// ---------------------------------------------------------------------------- -// func chacha20Poly1305Open(dst, key, src, ad []byte) bool -TEXT ·chacha20Poly1305Open(SB), 0, $288-97 +// func chacha20Poly1305Open(dst []byte, key []uint32, src []byte, ad []byte) bool +// Requires: AVX, AVX2, BMI2, CMOV, SSE2 +TEXT ·chacha20Poly1305Open(SB), $288-97 // For aligned stack access MOVQ SP, BP - ADDQ $32, BP + ADDQ $0x20, BP ANDQ $-32, BP - MOVQ dst+0(FP), oup - MOVQ key+24(FP), keyp - MOVQ src+48(FP), inp - MOVQ src_len+56(FP), inl - MOVQ ad+72(FP), adp + MOVQ dst_base+0(FP), DI + MOVQ key_base+24(FP), R8 + MOVQ src_base+48(FP), SI + MOVQ src_len+56(FP), BX + MOVQ ad_base+72(FP), CX // Check for AVX2 support - CMPB ·useAVX2(SB), $1 + CMPB ·useAVX2+0(SB), $0x01 JE chacha20Poly1305Open_AVX2 // Special optimization, for very short buffers - CMPQ inl, $128 - JBE openSSE128 // About 16% faster + CMPQ BX, $0x80 + JBE openSSE128 // For long buffers, prepare the poly key first - MOVOU ·chacha20Constants<>(SB), A0 - MOVOU (1*16)(keyp), B0 - MOVOU (2*16)(keyp), C0 - MOVOU (3*16)(keyp), D0 - MOVO D0, T1 + MOVOU ·chacha20Constants<>+0(SB), X0 + MOVOU 16(R8), X3 + MOVOU 32(R8), X6 + MOVOU 48(R8), X9 + MOVO X9, X13 // Store state on stack for future use - MOVO B0, state1Store - MOVO C0, state2Store - MOVO D0, ctr3Store - MOVQ $10, itr2 + MOVO X3, 32(BP) + MOVO X6, 48(BP) + MOVO X9, 128(BP) + MOVQ $0x0000000a, R9 openSSEPreparePolyKey: - chachaQR(A0, B0, C0, D0, T0) - shiftB0Left; shiftC0Left; shiftD0Left - chachaQR(A0, B0, C0, D0, T0) - shiftB0Right; shiftC0Right; shiftD0Right - DECQ itr2 - JNE openSSEPreparePolyKey + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + DECQ R9 + JNE openSSEPreparePolyKey // A0|B0 hold the Poly1305 32-byte key, C0,D0 can be discarded - PADDL ·chacha20Constants<>(SB), A0; PADDL state1Store, B0 + PADDL ·chacha20Constants<>+0(SB), X0 + PADDL 32(BP), X3 // Clamp and store the key - PAND ·polyClampMask<>(SB), A0 - MOVO A0, rStore; MOVO B0, sStore + PAND ·polyClampMask<>+0(SB), X0 + MOVO X0, (BP) + MOVO X3, 16(BP) // Hash AAD - MOVQ ad_len+80(FP), itr2 + MOVQ ad_len+80(FP), R9 CALL polyHashADInternal<>(SB) openSSEMainLoop: - CMPQ inl, $256 + CMPQ BX, $0x00000100 JB openSSEMainLoopDone // Load state, increment counter blocks - MOVO ·chacha20Constants<>(SB), A0; MOVO state1Store, B0; MOVO state2Store, C0; MOVO ctr3Store, D0; PADDL ·sseIncMask<>(SB), D0 - MOVO A0, A1; MOVO B0, B1; MOVO C0, C1; MOVO D0, D1; PADDL ·sseIncMask<>(SB), D1 - MOVO A1, A2; MOVO B1, B2; MOVO C1, C2; MOVO D1, D2; PADDL ·sseIncMask<>(SB), D2 - MOVO A2, A3; MOVO B2, B3; MOVO C2, C3; MOVO D2, D3; PADDL ·sseIncMask<>(SB), D3 + MOVO ·chacha20Constants<>+0(SB), X0 + MOVO 32(BP), X3 + MOVO 48(BP), X6 + MOVO 128(BP), X9 + PADDL ·sseIncMask<>+0(SB), X9 + MOVO X0, X1 + MOVO X3, X4 + MOVO X6, X7 + MOVO X9, X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X1, X2 + MOVO X4, X5 + MOVO X7, X8 + MOVO X10, X11 + PADDL ·sseIncMask<>+0(SB), X11 + MOVO X2, X12 + MOVO X5, X13 + MOVO X8, X14 + MOVO X11, X15 + PADDL ·sseIncMask<>+0(SB), X15 // Store counters - MOVO D0, ctr0Store; MOVO D1, ctr1Store; MOVO D2, ctr2Store; MOVO D3, ctr3Store + MOVO X9, 80(BP) + MOVO X10, 96(BP) + MOVO X11, 112(BP) + MOVO X15, 128(BP) - // There are 10 ChaCha20 iterations of 2QR each, so for 6 iterations we hash 2 blocks, and for the remaining 4 only 1 block - for a total of 16 - MOVQ $4, itr1 - MOVQ inp, itr2 + // There are 10 ChaCha20 iterations of 2QR each, so for 6 iterations we hash + // 2 blocks, and for the remaining 4 only 1 block - for a total of 16 + MOVQ $0x00000004, CX + MOVQ SI, R9 openSSEInternalLoop: - MOVO C3, tmpStore - chachaQR(A0, B0, C0, D0, C3); chachaQR(A1, B1, C1, D1, C3); chachaQR(A2, B2, C2, D2, C3) - MOVO tmpStore, C3 - MOVO C1, tmpStore - chachaQR(A3, B3, C3, D3, C1) - MOVO tmpStore, C1 - polyAdd(0(itr2)) - shiftB0Left; shiftB1Left; shiftB2Left; shiftB3Left - shiftC0Left; shiftC1Left; shiftC2Left; shiftC3Left - shiftD0Left; shiftD1Left; shiftD2Left; shiftD3Left - polyMulStage1 - polyMulStage2 - LEAQ (2*8)(itr2), itr2 - MOVO C3, tmpStore - chachaQR(A0, B0, C0, D0, C3); chachaQR(A1, B1, C1, D1, C3); chachaQR(A2, B2, C2, D2, C3) - MOVO tmpStore, C3 - MOVO C1, tmpStore - polyMulStage3 - chachaQR(A3, B3, C3, D3, C1) - MOVO tmpStore, C1 - polyMulReduceStage - shiftB0Right; shiftB1Right; shiftB2Right; shiftB3Right - shiftC0Right; shiftC1Right; shiftC2Right; shiftC3Right - shiftD0Right; shiftD1Right; shiftD2Right; shiftD3Right - DECQ itr1 - JGE openSSEInternalLoop - - polyAdd(0(itr2)) - polyMul - LEAQ (2*8)(itr2), itr2 - - CMPQ itr1, $-6 - JG openSSEInternalLoop + MOVO X14, 64(BP) + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X3 + PXOR X14, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X3 + PXOR X14, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X4 + PXOR X14, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X4 + PXOR X14, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X5 + PXOR X14, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X5 + PXOR X14, X5 + MOVO 64(BP), X14 + MOVO X7, 64(BP) + PADDD X13, X12 + PXOR X12, X15 + ROL16(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x0c, X7 + PSRLL $0x14, X13 + PXOR X7, X13 + PADDD X13, X12 + PXOR X12, X15 + ROL8(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x07, X7 + PSRLL $0x19, X13 + PXOR X7, X13 + MOVO 64(BP), X7 + ADDQ (R9), R10 + ADCQ 8(R9), R11 + ADCQ $0x01, R12 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x0c + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + LEAQ 16(R9), R9 + MOVO X14, 64(BP) + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X3 + PXOR X14, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X3 + PXOR X14, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X4 + PXOR X14, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X4 + PXOR X14, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X5 + PXOR X14, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X5 + PXOR X14, X5 + MOVO 64(BP), X14 + MOVO X7, 64(BP) + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + PADDD X13, X12 + PXOR X12, X15 + ROL16(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x0c, X7 + PSRLL $0x14, X13 + PXOR X7, X13 + PADDD X13, X12 + PXOR X12, X15 + ROL8(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x07, X7 + PSRLL $0x19, X13 + PXOR X7, X13 + MOVO 64(BP), X7 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x04 + DECQ CX + JGE openSSEInternalLoop + ADDQ (R9), R10 + ADCQ 8(R9), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(R9), R9 + CMPQ CX, $-6 + JG openSSEInternalLoop // Add in the state - PADDD ·chacha20Constants<>(SB), A0; PADDD ·chacha20Constants<>(SB), A1; PADDD ·chacha20Constants<>(SB), A2; PADDD ·chacha20Constants<>(SB), A3 - PADDD state1Store, B0; PADDD state1Store, B1; PADDD state1Store, B2; PADDD state1Store, B3 - PADDD state2Store, C0; PADDD state2Store, C1; PADDD state2Store, C2; PADDD state2Store, C3 - PADDD ctr0Store, D0; PADDD ctr1Store, D1; PADDD ctr2Store, D2; PADDD ctr3Store, D3 + PADDD ·chacha20Constants<>+0(SB), X0 + PADDD ·chacha20Constants<>+0(SB), X1 + PADDD ·chacha20Constants<>+0(SB), X2 + PADDD ·chacha20Constants<>+0(SB), X12 + PADDD 32(BP), X3 + PADDD 32(BP), X4 + PADDD 32(BP), X5 + PADDD 32(BP), X13 + PADDD 48(BP), X6 + PADDD 48(BP), X7 + PADDD 48(BP), X8 + PADDD 48(BP), X14 + PADDD 80(BP), X9 + PADDD 96(BP), X10 + PADDD 112(BP), X11 + PADDD 128(BP), X15 // Load - xor - store - MOVO D3, tmpStore - MOVOU (0*16)(inp), D3; PXOR D3, A0; MOVOU A0, (0*16)(oup) - MOVOU (1*16)(inp), D3; PXOR D3, B0; MOVOU B0, (1*16)(oup) - MOVOU (2*16)(inp), D3; PXOR D3, C0; MOVOU C0, (2*16)(oup) - MOVOU (3*16)(inp), D3; PXOR D3, D0; MOVOU D0, (3*16)(oup) - MOVOU (4*16)(inp), D0; PXOR D0, A1; MOVOU A1, (4*16)(oup) - MOVOU (5*16)(inp), D0; PXOR D0, B1; MOVOU B1, (5*16)(oup) - MOVOU (6*16)(inp), D0; PXOR D0, C1; MOVOU C1, (6*16)(oup) - MOVOU (7*16)(inp), D0; PXOR D0, D1; MOVOU D1, (7*16)(oup) - MOVOU (8*16)(inp), D0; PXOR D0, A2; MOVOU A2, (8*16)(oup) - MOVOU (9*16)(inp), D0; PXOR D0, B2; MOVOU B2, (9*16)(oup) - MOVOU (10*16)(inp), D0; PXOR D0, C2; MOVOU C2, (10*16)(oup) - MOVOU (11*16)(inp), D0; PXOR D0, D2; MOVOU D2, (11*16)(oup) - MOVOU (12*16)(inp), D0; PXOR D0, A3; MOVOU A3, (12*16)(oup) - MOVOU (13*16)(inp), D0; PXOR D0, B3; MOVOU B3, (13*16)(oup) - MOVOU (14*16)(inp), D0; PXOR D0, C3; MOVOU C3, (14*16)(oup) - MOVOU (15*16)(inp), D0; PXOR tmpStore, D0; MOVOU D0, (15*16)(oup) - LEAQ 256(inp), inp - LEAQ 256(oup), oup - SUBQ $256, inl + MOVO X15, 64(BP) + MOVOU (SI), X15 + PXOR X15, X0 + MOVOU X0, (DI) + MOVOU 16(SI), X15 + PXOR X15, X3 + MOVOU X3, 16(DI) + MOVOU 32(SI), X15 + PXOR X15, X6 + MOVOU X6, 32(DI) + MOVOU 48(SI), X15 + PXOR X15, X9 + MOVOU X9, 48(DI) + MOVOU 64(SI), X9 + PXOR X9, X1 + MOVOU X1, 64(DI) + MOVOU 80(SI), X9 + PXOR X9, X4 + MOVOU X4, 80(DI) + MOVOU 96(SI), X9 + PXOR X9, X7 + MOVOU X7, 96(DI) + MOVOU 112(SI), X9 + PXOR X9, X10 + MOVOU X10, 112(DI) + MOVOU 128(SI), X9 + PXOR X9, X2 + MOVOU X2, 128(DI) + MOVOU 144(SI), X9 + PXOR X9, X5 + MOVOU X5, 144(DI) + MOVOU 160(SI), X9 + PXOR X9, X8 + MOVOU X8, 160(DI) + MOVOU 176(SI), X9 + PXOR X9, X11 + MOVOU X11, 176(DI) + MOVOU 192(SI), X9 + PXOR X9, X12 + MOVOU X12, 192(DI) + MOVOU 208(SI), X9 + PXOR X9, X13 + MOVOU X13, 208(DI) + MOVOU 224(SI), X9 + PXOR X9, X14 + MOVOU X14, 224(DI) + MOVOU 240(SI), X9 + PXOR 64(BP), X9 + MOVOU X9, 240(DI) + LEAQ 256(SI), SI + LEAQ 256(DI), DI + SUBQ $0x00000100, BX JMP openSSEMainLoop openSSEMainLoopDone: // Handle the various tail sizes efficiently - TESTQ inl, inl + TESTQ BX, BX JE openSSEFinalize - CMPQ inl, $64 + CMPQ BX, $0x40 JBE openSSETail64 - CMPQ inl, $128 + CMPQ BX, $0x80 JBE openSSETail128 - CMPQ inl, $192 + CMPQ BX, $0xc0 JBE openSSETail192 JMP openSSETail256 openSSEFinalize: // Hash in the PT, AAD lengths - ADDQ ad_len+80(FP), acc0; ADCQ src_len+56(FP), acc1; ADCQ $1, acc2 - polyMul + ADDQ ad_len+80(FP), R10 + ADCQ src_len+56(FP), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 // Final reduce - MOVQ acc0, t0 - MOVQ acc1, t1 - MOVQ acc2, t2 - SUBQ $-5, acc0 - SBBQ $-1, acc1 - SBBQ $3, acc2 - CMOVQCS t0, acc0 - CMOVQCS t1, acc1 - CMOVQCS t2, acc2 + MOVQ R10, R13 + MOVQ R11, R14 + MOVQ R12, R15 + SUBQ $-5, R10 + SBBQ $-1, R11 + SBBQ $0x03, R12 + CMOVQCS R13, R10 + CMOVQCS R14, R11 + CMOVQCS R15, R12 // Add in the "s" part of the key - ADDQ 0+sStore, acc0 - ADCQ 8+sStore, acc1 + ADDQ 16(BP), R10 + ADCQ 24(BP), R11 // Finally, constant time compare to the tag at the end of the message XORQ AX, AX - MOVQ $1, DX - XORQ (0*8)(inp), acc0 - XORQ (1*8)(inp), acc1 - ORQ acc1, acc0 + MOVQ $0x00000001, DX + XORQ (SI), R10 + XORQ 8(SI), R11 + ORQ R11, R10 CMOVQEQ DX, AX // Return true iff tags are equal MOVB AX, ret+96(FP) RET -// ---------------------------------------------------------------------------- -// Special optimization for buffers smaller than 129 bytes openSSE128: - // For up to 128 bytes of ciphertext and 64 bytes for the poly key, we require to process three blocks - MOVOU ·chacha20Constants<>(SB), A0; MOVOU (1*16)(keyp), B0; MOVOU (2*16)(keyp), C0; MOVOU (3*16)(keyp), D0 - MOVO A0, A1; MOVO B0, B1; MOVO C0, C1; MOVO D0, D1; PADDL ·sseIncMask<>(SB), D1 - MOVO A1, A2; MOVO B1, B2; MOVO C1, C2; MOVO D1, D2; PADDL ·sseIncMask<>(SB), D2 - MOVO B0, T1; MOVO C0, T2; MOVO D1, T3 - MOVQ $10, itr2 + MOVOU ·chacha20Constants<>+0(SB), X0 + MOVOU 16(R8), X3 + MOVOU 32(R8), X6 + MOVOU 48(R8), X9 + MOVO X0, X1 + MOVO X3, X4 + MOVO X6, X7 + MOVO X9, X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X1, X2 + MOVO X4, X5 + MOVO X7, X8 + MOVO X10, X11 + PADDL ·sseIncMask<>+0(SB), X11 + MOVO X3, X13 + MOVO X6, X14 + MOVO X10, X15 + MOVQ $0x0000000a, R9 openSSE128InnerCipherLoop: - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0); chachaQR(A2, B2, C2, D2, T0) - shiftB0Left; shiftB1Left; shiftB2Left - shiftC0Left; shiftC1Left; shiftC2Left - shiftD0Left; shiftD1Left; shiftD2Left - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0); chachaQR(A2, B2, C2, D2, T0) - shiftB0Right; shiftB1Right; shiftB2Right - shiftC0Right; shiftC1Right; shiftC2Right - shiftD0Right; shiftD1Right; shiftD2Right - DECQ itr2 - JNE openSSE128InnerCipherLoop + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X5 + PXOR X12, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X5 + PXOR X12, X5 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X5 + PXOR X12, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X5 + PXOR X12, X5 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + DECQ R9 + JNE openSSE128InnerCipherLoop // A0|B0 hold the Poly1305 32-byte key, C0,D0 can be discarded - PADDL ·chacha20Constants<>(SB), A0; PADDL ·chacha20Constants<>(SB), A1; PADDL ·chacha20Constants<>(SB), A2 - PADDL T1, B0; PADDL T1, B1; PADDL T1, B2 - PADDL T2, C1; PADDL T2, C2 - PADDL T3, D1; PADDL ·sseIncMask<>(SB), T3; PADDL T3, D2 + PADDL ·chacha20Constants<>+0(SB), X0 + PADDL ·chacha20Constants<>+0(SB), X1 + PADDL ·chacha20Constants<>+0(SB), X2 + PADDL X13, X3 + PADDL X13, X4 + PADDL X13, X5 + PADDL X14, X7 + PADDL X14, X8 + PADDL X15, X10 + PADDL ·sseIncMask<>+0(SB), X15 + PADDL X15, X11 // Clamp and store the key - PAND ·polyClampMask<>(SB), A0 - MOVOU A0, rStore; MOVOU B0, sStore + PAND ·polyClampMask<>+0(SB), X0 + MOVOU X0, (BP) + MOVOU X3, 16(BP) // Hash - MOVQ ad_len+80(FP), itr2 + MOVQ ad_len+80(FP), R9 CALL polyHashADInternal<>(SB) openSSE128Open: - CMPQ inl, $16 + CMPQ BX, $0x10 JB openSSETail16 - SUBQ $16, inl + SUBQ $0x10, BX // Load for hashing - polyAdd(0(inp)) + ADDQ (SI), R10 + ADCQ 8(SI), R11 + ADCQ $0x01, R12 // Load for decryption - MOVOU (inp), T0; PXOR T0, A1; MOVOU A1, (oup) - LEAQ (1*16)(inp), inp - LEAQ (1*16)(oup), oup - polyMul + MOVOU (SI), X12 + PXOR X12, X1 + MOVOU X1, (DI) + LEAQ 16(SI), SI + LEAQ 16(DI), DI + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 // Shift the stream "left" - MOVO B1, A1 - MOVO C1, B1 - MOVO D1, C1 - MOVO A2, D1 - MOVO B2, A2 - MOVO C2, B2 - MOVO D2, C2 + MOVO X4, X1 + MOVO X7, X4 + MOVO X10, X7 + MOVO X2, X10 + MOVO X5, X2 + MOVO X8, X5 + MOVO X11, X8 JMP openSSE128Open openSSETail16: - TESTQ inl, inl + TESTQ BX, BX JE openSSEFinalize // We can safely load the CT from the end, because it is padded with the MAC - MOVQ inl, itr2 - SHLQ $4, itr2 - LEAQ ·andMask<>(SB), t0 - MOVOU (inp), T0 - ADDQ inl, inp - PAND -16(t0)(itr2*1), T0 - MOVO T0, 0+tmpStore - MOVQ T0, t0 - MOVQ 8+tmpStore, t1 - PXOR A1, T0 + MOVQ BX, R9 + SHLQ $0x04, R9 + LEAQ ·andMask<>+0(SB), R13 + MOVOU (SI), X12 + ADDQ BX, SI + PAND -16(R13)(R9*1), X12 + MOVO X12, 64(BP) + MOVQ X12, R13 + MOVQ 72(BP), R14 + PXOR X1, X12 // We can only store one byte at a time, since plaintext can be shorter than 16 bytes openSSETail16Store: - MOVQ T0, t3 - MOVB t3, (oup) - PSRLDQ $1, T0 - INCQ oup - DECQ inl + MOVQ X12, R8 + MOVB R8, (DI) + PSRLDQ $0x01, X12 + INCQ DI + DECQ BX JNE openSSETail16Store - ADDQ t0, acc0; ADCQ t1, acc1; ADCQ $1, acc2 - polyMul + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 JMP openSSEFinalize -// ---------------------------------------------------------------------------- -// Special optimization for the last 64 bytes of ciphertext openSSETail64: - // Need to decrypt up to 64 bytes - prepare single block - MOVO ·chacha20Constants<>(SB), A0; MOVO state1Store, B0; MOVO state2Store, C0; MOVO ctr3Store, D0; PADDL ·sseIncMask<>(SB), D0; MOVO D0, ctr0Store - XORQ itr2, itr2 - MOVQ inl, itr1 - CMPQ itr1, $16 - JB openSSETail64LoopB + MOVO ·chacha20Constants<>+0(SB), X0 + MOVO 32(BP), X3 + MOVO 48(BP), X6 + MOVO 128(BP), X9 + PADDL ·sseIncMask<>+0(SB), X9 + MOVO X9, 80(BP) + XORQ R9, R9 + MOVQ BX, CX + CMPQ CX, $0x10 + JB openSSETail64LoopB openSSETail64LoopA: - // Perform ChaCha rounds, while hashing the remaining input - polyAdd(0(inp)(itr2*1)) - polyMul - SUBQ $16, itr1 + ADDQ (SI)(R9*1), R10 + ADCQ 8(SI)(R9*1), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + SUBQ $0x10, CX openSSETail64LoopB: - ADDQ $16, itr2 - chachaQR(A0, B0, C0, D0, T0) - shiftB0Left; shiftC0Left; shiftD0Left - chachaQR(A0, B0, C0, D0, T0) - shiftB0Right; shiftC0Right; shiftD0Right - - CMPQ itr1, $16 - JAE openSSETail64LoopA - - CMPQ itr2, $160 - JNE openSSETail64LoopB - - PADDL ·chacha20Constants<>(SB), A0; PADDL state1Store, B0; PADDL state2Store, C0; PADDL ctr0Store, D0 + ADDQ $0x10, R9 + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + CMPQ CX, $0x10 + JAE openSSETail64LoopA + CMPQ R9, $0xa0 + JNE openSSETail64LoopB + PADDL ·chacha20Constants<>+0(SB), X0 + PADDL 32(BP), X3 + PADDL 48(BP), X6 + PADDL 80(BP), X9 openSSETail64DecLoop: - CMPQ inl, $16 + CMPQ BX, $0x10 JB openSSETail64DecLoopDone - SUBQ $16, inl - MOVOU (inp), T0 - PXOR T0, A0 - MOVOU A0, (oup) - LEAQ 16(inp), inp - LEAQ 16(oup), oup - MOVO B0, A0 - MOVO C0, B0 - MOVO D0, C0 + SUBQ $0x10, BX + MOVOU (SI), X12 + PXOR X12, X0 + MOVOU X0, (DI) + LEAQ 16(SI), SI + LEAQ 16(DI), DI + MOVO X3, X0 + MOVO X6, X3 + MOVO X9, X6 JMP openSSETail64DecLoop openSSETail64DecLoopDone: - MOVO A0, A1 + MOVO X0, X1 JMP openSSETail16 -// ---------------------------------------------------------------------------- -// Special optimization for the last 128 bytes of ciphertext openSSETail128: - // Need to decrypt up to 128 bytes - prepare two blocks - MOVO ·chacha20Constants<>(SB), A1; MOVO state1Store, B1; MOVO state2Store, C1; MOVO ctr3Store, D1; PADDL ·sseIncMask<>(SB), D1; MOVO D1, ctr0Store - MOVO A1, A0; MOVO B1, B0; MOVO C1, C0; MOVO D1, D0; PADDL ·sseIncMask<>(SB), D0; MOVO D0, ctr1Store - XORQ itr2, itr2 - MOVQ inl, itr1 - ANDQ $-16, itr1 + MOVO ·chacha20Constants<>+0(SB), X1 + MOVO 32(BP), X4 + MOVO 48(BP), X7 + MOVO 128(BP), X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X10, 80(BP) + MOVO X1, X0 + MOVO X4, X3 + MOVO X7, X6 + MOVO X10, X9 + PADDL ·sseIncMask<>+0(SB), X9 + MOVO X9, 96(BP) + XORQ R9, R9 + MOVQ BX, CX + ANDQ $-16, CX openSSETail128LoopA: - // Perform ChaCha rounds, while hashing the remaining input - polyAdd(0(inp)(itr2*1)) - polyMul + ADDQ (SI)(R9*1), R10 + ADCQ 8(SI)(R9*1), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 openSSETail128LoopB: - ADDQ $16, itr2 - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0) - shiftB0Left; shiftC0Left; shiftD0Left - shiftB1Left; shiftC1Left; shiftD1Left - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0) - shiftB0Right; shiftC0Right; shiftD0Right - shiftB1Right; shiftC1Right; shiftD1Right - - CMPQ itr2, itr1 - JB openSSETail128LoopA - - CMPQ itr2, $160 - JNE openSSETail128LoopB - - PADDL ·chacha20Constants<>(SB), A0; PADDL ·chacha20Constants<>(SB), A1 - PADDL state1Store, B0; PADDL state1Store, B1 - PADDL state2Store, C0; PADDL state2Store, C1 - PADDL ctr1Store, D0; PADDL ctr0Store, D1 - - MOVOU (0*16)(inp), T0; MOVOU (1*16)(inp), T1; MOVOU (2*16)(inp), T2; MOVOU (3*16)(inp), T3 - PXOR T0, A1; PXOR T1, B1; PXOR T2, C1; PXOR T3, D1 - MOVOU A1, (0*16)(oup); MOVOU B1, (1*16)(oup); MOVOU C1, (2*16)(oup); MOVOU D1, (3*16)(oup) - - SUBQ $64, inl - LEAQ 64(inp), inp - LEAQ 64(oup), oup - JMP openSSETail64DecLoop - -// ---------------------------------------------------------------------------- -// Special optimization for the last 192 bytes of ciphertext + ADDQ $0x10, R9 + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + CMPQ R9, CX + JB openSSETail128LoopA + CMPQ R9, $0xa0 + JNE openSSETail128LoopB + PADDL ·chacha20Constants<>+0(SB), X0 + PADDL ·chacha20Constants<>+0(SB), X1 + PADDL 32(BP), X3 + PADDL 32(BP), X4 + PADDL 48(BP), X6 + PADDL 48(BP), X7 + PADDL 96(BP), X9 + PADDL 80(BP), X10 + MOVOU (SI), X12 + MOVOU 16(SI), X13 + MOVOU 32(SI), X14 + MOVOU 48(SI), X15 + PXOR X12, X1 + PXOR X13, X4 + PXOR X14, X7 + PXOR X15, X10 + MOVOU X1, (DI) + MOVOU X4, 16(DI) + MOVOU X7, 32(DI) + MOVOU X10, 48(DI) + SUBQ $0x40, BX + LEAQ 64(SI), SI + LEAQ 64(DI), DI + JMP openSSETail64DecLoop + openSSETail192: - // Need to decrypt up to 192 bytes - prepare three blocks - MOVO ·chacha20Constants<>(SB), A2; MOVO state1Store, B2; MOVO state2Store, C2; MOVO ctr3Store, D2; PADDL ·sseIncMask<>(SB), D2; MOVO D2, ctr0Store - MOVO A2, A1; MOVO B2, B1; MOVO C2, C1; MOVO D2, D1; PADDL ·sseIncMask<>(SB), D1; MOVO D1, ctr1Store - MOVO A1, A0; MOVO B1, B0; MOVO C1, C0; MOVO D1, D0; PADDL ·sseIncMask<>(SB), D0; MOVO D0, ctr2Store - - MOVQ inl, itr1 - MOVQ $160, itr2 - CMPQ itr1, $160 - CMOVQGT itr2, itr1 - ANDQ $-16, itr1 - XORQ itr2, itr2 + MOVO ·chacha20Constants<>+0(SB), X2 + MOVO 32(BP), X5 + MOVO 48(BP), X8 + MOVO 128(BP), X11 + PADDL ·sseIncMask<>+0(SB), X11 + MOVO X11, 80(BP) + MOVO X2, X1 + MOVO X5, X4 + MOVO X8, X7 + MOVO X11, X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X10, 96(BP) + MOVO X1, X0 + MOVO X4, X3 + MOVO X7, X6 + MOVO X10, X9 + PADDL ·sseIncMask<>+0(SB), X9 + MOVO X9, 112(BP) + MOVQ BX, CX + MOVQ $0x000000a0, R9 + CMPQ CX, $0xa0 + CMOVQGT R9, CX + ANDQ $-16, CX + XORQ R9, R9 openSSLTail192LoopA: - // Perform ChaCha rounds, while hashing the remaining input - polyAdd(0(inp)(itr2*1)) - polyMul + ADDQ (SI)(R9*1), R10 + ADCQ 8(SI)(R9*1), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 openSSLTail192LoopB: - ADDQ $16, itr2 - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0); chachaQR(A2, B2, C2, D2, T0) - shiftB0Left; shiftC0Left; shiftD0Left - shiftB1Left; shiftC1Left; shiftD1Left - shiftB2Left; shiftC2Left; shiftD2Left - - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0); chachaQR(A2, B2, C2, D2, T0) - shiftB0Right; shiftC0Right; shiftD0Right - shiftB1Right; shiftC1Right; shiftD1Right - shiftB2Right; shiftC2Right; shiftD2Right - - CMPQ itr2, itr1 - JB openSSLTail192LoopA - - CMPQ itr2, $160 - JNE openSSLTail192LoopB - - CMPQ inl, $176 - JB openSSLTail192Store - - polyAdd(160(inp)) - polyMul - - CMPQ inl, $192 - JB openSSLTail192Store - - polyAdd(176(inp)) - polyMul + ADDQ $0x10, R9 + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X5 + PXOR X12, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X5 + PXOR X12, X5 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X5 + PXOR X12, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X5 + PXOR X12, X5 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + CMPQ R9, CX + JB openSSLTail192LoopA + CMPQ R9, $0xa0 + JNE openSSLTail192LoopB + CMPQ BX, $0xb0 + JB openSSLTail192Store + ADDQ 160(SI), R10 + ADCQ 168(SI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + CMPQ BX, $0xc0 + JB openSSLTail192Store + ADDQ 176(SI), R10 + ADCQ 184(SI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 openSSLTail192Store: - PADDL ·chacha20Constants<>(SB), A0; PADDL ·chacha20Constants<>(SB), A1; PADDL ·chacha20Constants<>(SB), A2 - PADDL state1Store, B0; PADDL state1Store, B1; PADDL state1Store, B2 - PADDL state2Store, C0; PADDL state2Store, C1; PADDL state2Store, C2 - PADDL ctr2Store, D0; PADDL ctr1Store, D1; PADDL ctr0Store, D2 - - MOVOU (0*16)(inp), T0; MOVOU (1*16)(inp), T1; MOVOU (2*16)(inp), T2; MOVOU (3*16)(inp), T3 - PXOR T0, A2; PXOR T1, B2; PXOR T2, C2; PXOR T3, D2 - MOVOU A2, (0*16)(oup); MOVOU B2, (1*16)(oup); MOVOU C2, (2*16)(oup); MOVOU D2, (3*16)(oup) - - MOVOU (4*16)(inp), T0; MOVOU (5*16)(inp), T1; MOVOU (6*16)(inp), T2; MOVOU (7*16)(inp), T3 - PXOR T0, A1; PXOR T1, B1; PXOR T2, C1; PXOR T3, D1 - MOVOU A1, (4*16)(oup); MOVOU B1, (5*16)(oup); MOVOU C1, (6*16)(oup); MOVOU D1, (7*16)(oup) - - SUBQ $128, inl - LEAQ 128(inp), inp - LEAQ 128(oup), oup - JMP openSSETail64DecLoop - -// ---------------------------------------------------------------------------- -// Special optimization for the last 256 bytes of ciphertext + PADDL ·chacha20Constants<>+0(SB), X0 + PADDL ·chacha20Constants<>+0(SB), X1 + PADDL ·chacha20Constants<>+0(SB), X2 + PADDL 32(BP), X3 + PADDL 32(BP), X4 + PADDL 32(BP), X5 + PADDL 48(BP), X6 + PADDL 48(BP), X7 + PADDL 48(BP), X8 + PADDL 112(BP), X9 + PADDL 96(BP), X10 + PADDL 80(BP), X11 + MOVOU (SI), X12 + MOVOU 16(SI), X13 + MOVOU 32(SI), X14 + MOVOU 48(SI), X15 + PXOR X12, X2 + PXOR X13, X5 + PXOR X14, X8 + PXOR X15, X11 + MOVOU X2, (DI) + MOVOU X5, 16(DI) + MOVOU X8, 32(DI) + MOVOU X11, 48(DI) + MOVOU 64(SI), X12 + MOVOU 80(SI), X13 + MOVOU 96(SI), X14 + MOVOU 112(SI), X15 + PXOR X12, X1 + PXOR X13, X4 + PXOR X14, X7 + PXOR X15, X10 + MOVOU X1, 64(DI) + MOVOU X4, 80(DI) + MOVOU X7, 96(DI) + MOVOU X10, 112(DI) + SUBQ $0x80, BX + LEAQ 128(SI), SI + LEAQ 128(DI), DI + JMP openSSETail64DecLoop + openSSETail256: - // Need to decrypt up to 256 bytes - prepare four blocks - MOVO ·chacha20Constants<>(SB), A0; MOVO state1Store, B0; MOVO state2Store, C0; MOVO ctr3Store, D0; PADDL ·sseIncMask<>(SB), D0 - MOVO A0, A1; MOVO B0, B1; MOVO C0, C1; MOVO D0, D1; PADDL ·sseIncMask<>(SB), D1 - MOVO A1, A2; MOVO B1, B2; MOVO C1, C2; MOVO D1, D2; PADDL ·sseIncMask<>(SB), D2 - MOVO A2, A3; MOVO B2, B3; MOVO C2, C3; MOVO D2, D3; PADDL ·sseIncMask<>(SB), D3 + MOVO ·chacha20Constants<>+0(SB), X0 + MOVO 32(BP), X3 + MOVO 48(BP), X6 + MOVO 128(BP), X9 + PADDL ·sseIncMask<>+0(SB), X9 + MOVO X0, X1 + MOVO X3, X4 + MOVO X6, X7 + MOVO X9, X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X1, X2 + MOVO X4, X5 + MOVO X7, X8 + MOVO X10, X11 + PADDL ·sseIncMask<>+0(SB), X11 + MOVO X2, X12 + MOVO X5, X13 + MOVO X8, X14 + MOVO X11, X15 + PADDL ·sseIncMask<>+0(SB), X15 // Store counters - MOVO D0, ctr0Store; MOVO D1, ctr1Store; MOVO D2, ctr2Store; MOVO D3, ctr3Store - XORQ itr2, itr2 + MOVO X9, 80(BP) + MOVO X10, 96(BP) + MOVO X11, 112(BP) + MOVO X15, 128(BP) + XORQ R9, R9 openSSETail256Loop: - // This loop inteleaves 8 ChaCha quarter rounds with 1 poly multiplication - polyAdd(0(inp)(itr2*1)) - MOVO C3, tmpStore - chachaQR(A0, B0, C0, D0, C3); chachaQR(A1, B1, C1, D1, C3); chachaQR(A2, B2, C2, D2, C3) - MOVO tmpStore, C3 - MOVO C1, tmpStore - chachaQR(A3, B3, C3, D3, C1) - MOVO tmpStore, C1 - shiftB0Left; shiftB1Left; shiftB2Left; shiftB3Left - shiftC0Left; shiftC1Left; shiftC2Left; shiftC3Left - shiftD0Left; shiftD1Left; shiftD2Left; shiftD3Left - polyMulStage1 - polyMulStage2 - MOVO C3, tmpStore - chachaQR(A0, B0, C0, D0, C3); chachaQR(A1, B1, C1, D1, C3); chachaQR(A2, B2, C2, D2, C3) - MOVO tmpStore, C3 - MOVO C1, tmpStore - chachaQR(A3, B3, C3, D3, C1) - MOVO tmpStore, C1 - polyMulStage3 - polyMulReduceStage - shiftB0Right; shiftB1Right; shiftB2Right; shiftB3Right - shiftC0Right; shiftC1Right; shiftC2Right; shiftC3Right - shiftD0Right; shiftD1Right; shiftD2Right; shiftD3Right - ADDQ $2*8, itr2 - CMPQ itr2, $160 - JB openSSETail256Loop - MOVQ inl, itr1 - ANDQ $-16, itr1 + ADDQ (SI)(R9*1), R10 + ADCQ 8(SI)(R9*1), R11 + ADCQ $0x01, R12 + MOVO X14, 64(BP) + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X3 + PXOR X14, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X3 + PXOR X14, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X4 + PXOR X14, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X4 + PXOR X14, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X5 + PXOR X14, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X5 + PXOR X14, X5 + MOVO 64(BP), X14 + MOVO X7, 64(BP) + PADDD X13, X12 + PXOR X12, X15 + ROL16(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x0c, X7 + PSRLL $0x14, X13 + PXOR X7, X13 + PADDD X13, X12 + PXOR X12, X15 + ROL8(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x07, X7 + PSRLL $0x19, X13 + PXOR X7, X13 + MOVO 64(BP), X7 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x0c + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + MOVO X14, 64(BP) + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X3 + PXOR X14, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X3 + PXOR X14, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X4 + PXOR X14, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X4 + PXOR X14, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X5 + PXOR X14, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X5 + PXOR X14, X5 + MOVO 64(BP), X14 + MOVO X7, 64(BP) + PADDD X13, X12 + PXOR X12, X15 + ROL16(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x0c, X7 + PSRLL $0x14, X13 + PXOR X7, X13 + PADDD X13, X12 + PXOR X12, X15 + ROL8(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x07, X7 + PSRLL $0x19, X13 + PXOR X7, X13 + MOVO 64(BP), X7 + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x04 + ADDQ $0x10, R9 + CMPQ R9, $0xa0 + JB openSSETail256Loop + MOVQ BX, CX + ANDQ $-16, CX openSSETail256HashLoop: - polyAdd(0(inp)(itr2*1)) - polyMul - ADDQ $2*8, itr2 - CMPQ itr2, itr1 - JB openSSETail256HashLoop + ADDQ (SI)(R9*1), R10 + ADCQ 8(SI)(R9*1), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + ADDQ $0x10, R9 + CMPQ R9, CX + JB openSSETail256HashLoop // Add in the state - PADDD ·chacha20Constants<>(SB), A0; PADDD ·chacha20Constants<>(SB), A1; PADDD ·chacha20Constants<>(SB), A2; PADDD ·chacha20Constants<>(SB), A3 - PADDD state1Store, B0; PADDD state1Store, B1; PADDD state1Store, B2; PADDD state1Store, B3 - PADDD state2Store, C0; PADDD state2Store, C1; PADDD state2Store, C2; PADDD state2Store, C3 - PADDD ctr0Store, D0; PADDD ctr1Store, D1; PADDD ctr2Store, D2; PADDD ctr3Store, D3 - MOVO D3, tmpStore + PADDD ·chacha20Constants<>+0(SB), X0 + PADDD ·chacha20Constants<>+0(SB), X1 + PADDD ·chacha20Constants<>+0(SB), X2 + PADDD ·chacha20Constants<>+0(SB), X12 + PADDD 32(BP), X3 + PADDD 32(BP), X4 + PADDD 32(BP), X5 + PADDD 32(BP), X13 + PADDD 48(BP), X6 + PADDD 48(BP), X7 + PADDD 48(BP), X8 + PADDD 48(BP), X14 + PADDD 80(BP), X9 + PADDD 96(BP), X10 + PADDD 112(BP), X11 + PADDD 128(BP), X15 + MOVO X15, 64(BP) // Load - xor - store - MOVOU (0*16)(inp), D3; PXOR D3, A0 - MOVOU (1*16)(inp), D3; PXOR D3, B0 - MOVOU (2*16)(inp), D3; PXOR D3, C0 - MOVOU (3*16)(inp), D3; PXOR D3, D0 - MOVOU A0, (0*16)(oup) - MOVOU B0, (1*16)(oup) - MOVOU C0, (2*16)(oup) - MOVOU D0, (3*16)(oup) - MOVOU (4*16)(inp), A0; MOVOU (5*16)(inp), B0; MOVOU (6*16)(inp), C0; MOVOU (7*16)(inp), D0 - PXOR A0, A1; PXOR B0, B1; PXOR C0, C1; PXOR D0, D1 - MOVOU A1, (4*16)(oup); MOVOU B1, (5*16)(oup); MOVOU C1, (6*16)(oup); MOVOU D1, (7*16)(oup) - MOVOU (8*16)(inp), A0; MOVOU (9*16)(inp), B0; MOVOU (10*16)(inp), C0; MOVOU (11*16)(inp), D0 - PXOR A0, A2; PXOR B0, B2; PXOR C0, C2; PXOR D0, D2 - MOVOU A2, (8*16)(oup); MOVOU B2, (9*16)(oup); MOVOU C2, (10*16)(oup); MOVOU D2, (11*16)(oup) - LEAQ 192(inp), inp - LEAQ 192(oup), oup - SUBQ $192, inl - MOVO A3, A0 - MOVO B3, B0 - MOVO C3, C0 - MOVO tmpStore, D0 - - JMP openSSETail64DecLoop - -// ---------------------------------------------------------------------------- -// ------------------------- AVX2 Code ---------------------------------------- + MOVOU (SI), X15 + PXOR X15, X0 + MOVOU 16(SI), X15 + PXOR X15, X3 + MOVOU 32(SI), X15 + PXOR X15, X6 + MOVOU 48(SI), X15 + PXOR X15, X9 + MOVOU X0, (DI) + MOVOU X3, 16(DI) + MOVOU X6, 32(DI) + MOVOU X9, 48(DI) + MOVOU 64(SI), X0 + MOVOU 80(SI), X3 + MOVOU 96(SI), X6 + MOVOU 112(SI), X9 + PXOR X0, X1 + PXOR X3, X4 + PXOR X6, X7 + PXOR X9, X10 + MOVOU X1, 64(DI) + MOVOU X4, 80(DI) + MOVOU X7, 96(DI) + MOVOU X10, 112(DI) + MOVOU 128(SI), X0 + MOVOU 144(SI), X3 + MOVOU 160(SI), X6 + MOVOU 176(SI), X9 + PXOR X0, X2 + PXOR X3, X5 + PXOR X6, X8 + PXOR X9, X11 + MOVOU X2, 128(DI) + MOVOU X5, 144(DI) + MOVOU X8, 160(DI) + MOVOU X11, 176(DI) + LEAQ 192(SI), SI + LEAQ 192(DI), DI + SUBQ $0xc0, BX + MOVO X12, X0 + MOVO X13, X3 + MOVO X14, X6 + MOVO 64(BP), X9 + JMP openSSETail64DecLoop + chacha20Poly1305Open_AVX2: VZEROUPPER - VMOVDQU ·chacha20Constants<>(SB), AA0 - BYTE $0xc4; BYTE $0x42; BYTE $0x7d; BYTE $0x5a; BYTE $0x70; BYTE $0x10 // broadcasti128 16(r8), ymm14 - BYTE $0xc4; BYTE $0x42; BYTE $0x7d; BYTE $0x5a; BYTE $0x60; BYTE $0x20 // broadcasti128 32(r8), ymm12 - BYTE $0xc4; BYTE $0xc2; BYTE $0x7d; BYTE $0x5a; BYTE $0x60; BYTE $0x30 // broadcasti128 48(r8), ymm4 - VPADDD ·avx2InitMask<>(SB), DD0, DD0 + VMOVDQU ·chacha20Constants<>+0(SB), Y0 + BYTE $0xc4 + BYTE $0x42 + BYTE $0x7d + BYTE $0x5a + BYTE $0x70 + BYTE $0x10 + BYTE $0xc4 + BYTE $0x42 + BYTE $0x7d + BYTE $0x5a + BYTE $0x60 + BYTE $0x20 + BYTE $0xc4 + BYTE $0xc2 + BYTE $0x7d + BYTE $0x5a + BYTE $0x60 + BYTE $0x30 + VPADDD ·avx2InitMask<>+0(SB), Y4, Y4 // Special optimization, for very short buffers - CMPQ inl, $192 + CMPQ BX, $0xc0 JBE openAVX2192 - CMPQ inl, $320 + CMPQ BX, $0x00000140 JBE openAVX2320 // For the general key prepare the key first - as a byproduct we have 64 bytes of cipher stream - VMOVDQA BB0, state1StoreAVX2 - VMOVDQA CC0, state2StoreAVX2 - VMOVDQA DD0, ctr3StoreAVX2 - MOVQ $10, itr2 + VMOVDQA Y14, 32(BP) + VMOVDQA Y12, 64(BP) + VMOVDQA Y4, 192(BP) + MOVQ $0x0000000a, R9 openAVX2PreparePolyKey: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0) - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $12, DD0, DD0, DD0 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0) - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $4, DD0, DD0, DD0 - DECQ itr2 - JNE openAVX2PreparePolyKey - - VPADDD ·chacha20Constants<>(SB), AA0, AA0 - VPADDD state1StoreAVX2, BB0, BB0 - VPADDD state2StoreAVX2, CC0, CC0 - VPADDD ctr3StoreAVX2, DD0, DD0 - - VPERM2I128 $0x02, AA0, BB0, TT0 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x04, Y4, Y4, Y4 + DECQ R9 + JNE openAVX2PreparePolyKey + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD 32(BP), Y14, Y14 + VPADDD 64(BP), Y12, Y12 + VPADDD 192(BP), Y4, Y4 + VPERM2I128 $0x02, Y0, Y14, Y3 // Clamp and store poly key - VPAND ·polyClampMask<>(SB), TT0, TT0 - VMOVDQA TT0, rsStoreAVX2 + VPAND ·polyClampMask<>+0(SB), Y3, Y3 + VMOVDQA Y3, (BP) // Stream for the first 64 bytes - VPERM2I128 $0x13, AA0, BB0, AA0 - VPERM2I128 $0x13, CC0, DD0, BB0 + VPERM2I128 $0x13, Y0, Y14, Y0 + VPERM2I128 $0x13, Y12, Y4, Y14 // Hash AD + first 64 bytes - MOVQ ad_len+80(FP), itr2 + MOVQ ad_len+80(FP), R9 CALL polyHashADInternal<>(SB) - XORQ itr1, itr1 + XORQ CX, CX openAVX2InitialHash64: - polyAdd(0(inp)(itr1*1)) - polyMulAVX2 - ADDQ $16, itr1 - CMPQ itr1, $64 - JNE openAVX2InitialHash64 + ADDQ (SI)(CX*1), R10 + ADCQ 8(SI)(CX*1), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + ADDQ $0x10, CX + CMPQ CX, $0x40 + JNE openAVX2InitialHash64 // Decrypt the first 64 bytes - VPXOR (0*32)(inp), AA0, AA0 - VPXOR (1*32)(inp), BB0, BB0 - VMOVDQU AA0, (0*32)(oup) - VMOVDQU BB0, (1*32)(oup) - LEAQ (2*32)(inp), inp - LEAQ (2*32)(oup), oup - SUBQ $64, inl + VPXOR (SI), Y0, Y0 + VPXOR 32(SI), Y14, Y14 + VMOVDQU Y0, (DI) + VMOVDQU Y14, 32(DI) + LEAQ 64(SI), SI + LEAQ 64(DI), DI + SUBQ $0x40, BX openAVX2MainLoop: - CMPQ inl, $512 + CMPQ BX, $0x00000200 JB openAVX2MainLoopDone // Load state, increment counter blocks, store the incremented counters - VMOVDQU ·chacha20Constants<>(SB), AA0; VMOVDQA AA0, AA1; VMOVDQA AA0, AA2; VMOVDQA AA0, AA3 - VMOVDQA state1StoreAVX2, BB0; VMOVDQA BB0, BB1; VMOVDQA BB0, BB2; VMOVDQA BB0, BB3 - VMOVDQA state2StoreAVX2, CC0; VMOVDQA CC0, CC1; VMOVDQA CC0, CC2; VMOVDQA CC0, CC3 - VMOVDQA ctr3StoreAVX2, DD0; VPADDD ·avx2IncMask<>(SB), DD0, DD0; VPADDD ·avx2IncMask<>(SB), DD0, DD1; VPADDD ·avx2IncMask<>(SB), DD1, DD2; VPADDD ·avx2IncMask<>(SB), DD2, DD3 - VMOVDQA DD0, ctr0StoreAVX2; VMOVDQA DD1, ctr1StoreAVX2; VMOVDQA DD2, ctr2StoreAVX2; VMOVDQA DD3, ctr3StoreAVX2 - XORQ itr1, itr1 + VMOVDQU ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA Y0, Y6 + VMOVDQA Y0, Y7 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA Y14, Y10 + VMOVDQA Y14, Y11 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA Y12, Y8 + VMOVDQA Y12, Y15 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VPADDD ·avx2IncMask<>+0(SB), Y2, Y3 + VMOVDQA Y4, 96(BP) + VMOVDQA Y1, 128(BP) + VMOVDQA Y2, 160(BP) + VMOVDQA Y3, 192(BP) + XORQ CX, CX openAVX2InternalLoop: - // Lets just say this spaghetti loop interleaves 2 quarter rounds with 3 poly multiplications - // Effectively per 512 bytes of stream we hash 480 bytes of ciphertext - polyAdd(0*8(inp)(itr1*1)) - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - polyMulStage1_AVX2 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol16<>(SB), DD0, DD0; VPSHUFB ·rol16<>(SB), DD1, DD1; VPSHUFB ·rol16<>(SB), DD2, DD2; VPSHUFB ·rol16<>(SB), DD3, DD3 - polyMulStage2_AVX2 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - polyMulStage3_AVX2 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $12, BB0, CC3; VPSRLD $20, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $12, BB1, CC3; VPSRLD $20, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $12, BB2, CC3; VPSRLD $20, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $12, BB3, CC3; VPSRLD $20, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyMulReduceStage - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol8<>(SB), DD0, DD0; VPSHUFB ·rol8<>(SB), DD1, DD1; VPSHUFB ·rol8<>(SB), DD2, DD2; VPSHUFB ·rol8<>(SB), DD3, DD3 - polyAdd(2*8(inp)(itr1*1)) - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - polyMulStage1_AVX2 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $7, BB0, CC3; VPSRLD $25, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $7, BB1, CC3; VPSRLD $25, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $7, BB2, CC3; VPSRLD $25, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $7, BB3, CC3; VPSRLD $25, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyMulStage2_AVX2 - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $4, BB2, BB2, BB2; VPALIGNR $4, BB3, BB3, BB3 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $8, CC3, CC3, CC3 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1; VPALIGNR $12, DD2, DD2, DD2; VPALIGNR $12, DD3, DD3, DD3 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - polyMulStage3_AVX2 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol16<>(SB), DD0, DD0; VPSHUFB ·rol16<>(SB), DD1, DD1; VPSHUFB ·rol16<>(SB), DD2, DD2; VPSHUFB ·rol16<>(SB), DD3, DD3 - polyMulReduceStage - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - polyAdd(4*8(inp)(itr1*1)) - LEAQ (6*8)(itr1), itr1 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $12, BB0, CC3; VPSRLD $20, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $12, BB1, CC3; VPSRLD $20, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $12, BB2, CC3; VPSRLD $20, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $12, BB3, CC3; VPSRLD $20, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyMulStage1_AVX2 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - polyMulStage2_AVX2 - VPSHUFB ·rol8<>(SB), DD0, DD0; VPSHUFB ·rol8<>(SB), DD1, DD1; VPSHUFB ·rol8<>(SB), DD2, DD2; VPSHUFB ·rol8<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - polyMulStage3_AVX2 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $7, BB0, CC3; VPSRLD $25, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $7, BB1, CC3; VPSRLD $25, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $7, BB2, CC3; VPSRLD $25, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $7, BB3, CC3; VPSRLD $25, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyMulReduceStage - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $12, BB2, BB2, BB2; VPALIGNR $12, BB3, BB3, BB3 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $8, CC3, CC3, CC3 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1; VPALIGNR $4, DD2, DD2, DD2; VPALIGNR $4, DD3, DD3, DD3 - CMPQ itr1, $480 + ADDQ (SI)(CX*1), R10 + ADCQ 8(SI)(CX*1), R11 + ADCQ $0x01, R12 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + ADDQ 16(SI)(CX*1), R10 + ADCQ 24(SI)(CX*1), R11 + ADCQ $0x01, R12 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x04, Y11, Y11, Y11 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPALIGNR $0x0c, Y3, Y3, Y3 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + ADDQ 32(SI)(CX*1), R10 + ADCQ 40(SI)(CX*1), R11 + ADCQ $0x01, R12 + LEAQ 48(CX), CX + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x0c, Y11, Y11, Y11 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x04, Y2, Y2, Y2 + VPALIGNR $0x04, Y3, Y3, Y3 + CMPQ CX, $0x000001e0 JNE openAVX2InternalLoop - - VPADDD ·chacha20Constants<>(SB), AA0, AA0; VPADDD ·chacha20Constants<>(SB), AA1, AA1; VPADDD ·chacha20Constants<>(SB), AA2, AA2; VPADDD ·chacha20Constants<>(SB), AA3, AA3 - VPADDD state1StoreAVX2, BB0, BB0; VPADDD state1StoreAVX2, BB1, BB1; VPADDD state1StoreAVX2, BB2, BB2; VPADDD state1StoreAVX2, BB3, BB3 - VPADDD state2StoreAVX2, CC0, CC0; VPADDD state2StoreAVX2, CC1, CC1; VPADDD state2StoreAVX2, CC2, CC2; VPADDD state2StoreAVX2, CC3, CC3 - VPADDD ctr0StoreAVX2, DD0, DD0; VPADDD ctr1StoreAVX2, DD1, DD1; VPADDD ctr2StoreAVX2, DD2, DD2; VPADDD ctr3StoreAVX2, DD3, DD3 - VMOVDQA CC3, tmpStoreAVX2 + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 + VPADDD ·chacha20Constants<>+0(SB), Y7, Y7 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 32(BP), Y10, Y10 + VPADDD 32(BP), Y11, Y11 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD 64(BP), Y8, Y8 + VPADDD 64(BP), Y15, Y15 + VPADDD 96(BP), Y4, Y4 + VPADDD 128(BP), Y1, Y1 + VPADDD 160(BP), Y2, Y2 + VPADDD 192(BP), Y3, Y3 + VMOVDQA Y15, 224(BP) // We only hashed 480 of the 512 bytes available - hash the remaining 32 here - polyAdd(480(inp)) - polyMulAVX2 - VPERM2I128 $0x02, AA0, BB0, CC3; VPERM2I128 $0x13, AA0, BB0, BB0; VPERM2I128 $0x02, CC0, DD0, AA0; VPERM2I128 $0x13, CC0, DD0, CC0 - VPXOR (0*32)(inp), CC3, CC3; VPXOR (1*32)(inp), AA0, AA0; VPXOR (2*32)(inp), BB0, BB0; VPXOR (3*32)(inp), CC0, CC0 - VMOVDQU CC3, (0*32)(oup); VMOVDQU AA0, (1*32)(oup); VMOVDQU BB0, (2*32)(oup); VMOVDQU CC0, (3*32)(oup) - VPERM2I128 $0x02, AA1, BB1, AA0; VPERM2I128 $0x02, CC1, DD1, BB0; VPERM2I128 $0x13, AA1, BB1, CC0; VPERM2I128 $0x13, CC1, DD1, DD0 - VPXOR (4*32)(inp), AA0, AA0; VPXOR (5*32)(inp), BB0, BB0; VPXOR (6*32)(inp), CC0, CC0; VPXOR (7*32)(inp), DD0, DD0 - VMOVDQU AA0, (4*32)(oup); VMOVDQU BB0, (5*32)(oup); VMOVDQU CC0, (6*32)(oup); VMOVDQU DD0, (7*32)(oup) + ADDQ 480(SI), R10 + ADCQ 488(SI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPERM2I128 $0x02, Y0, Y14, Y15 + VPERM2I128 $0x13, Y0, Y14, Y14 + VPERM2I128 $0x02, Y12, Y4, Y0 + VPERM2I128 $0x13, Y12, Y4, Y12 + VPXOR (SI), Y15, Y15 + VPXOR 32(SI), Y0, Y0 + VPXOR 64(SI), Y14, Y14 + VPXOR 96(SI), Y12, Y12 + VMOVDQU Y15, (DI) + VMOVDQU Y0, 32(DI) + VMOVDQU Y14, 64(DI) + VMOVDQU Y12, 96(DI) + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 + VPXOR 128(SI), Y0, Y0 + VPXOR 160(SI), Y14, Y14 + VPXOR 192(SI), Y12, Y12 + VPXOR 224(SI), Y4, Y4 + VMOVDQU Y0, 128(DI) + VMOVDQU Y14, 160(DI) + VMOVDQU Y12, 192(DI) + VMOVDQU Y4, 224(DI) // and here - polyAdd(496(inp)) - polyMulAVX2 - VPERM2I128 $0x02, AA2, BB2, AA0; VPERM2I128 $0x02, CC2, DD2, BB0; VPERM2I128 $0x13, AA2, BB2, CC0; VPERM2I128 $0x13, CC2, DD2, DD0 - VPXOR (8*32)(inp), AA0, AA0; VPXOR (9*32)(inp), BB0, BB0; VPXOR (10*32)(inp), CC0, CC0; VPXOR (11*32)(inp), DD0, DD0 - VMOVDQU AA0, (8*32)(oup); VMOVDQU BB0, (9*32)(oup); VMOVDQU CC0, (10*32)(oup); VMOVDQU DD0, (11*32)(oup) - VPERM2I128 $0x02, AA3, BB3, AA0; VPERM2I128 $0x02, tmpStoreAVX2, DD3, BB0; VPERM2I128 $0x13, AA3, BB3, CC0; VPERM2I128 $0x13, tmpStoreAVX2, DD3, DD0 - VPXOR (12*32)(inp), AA0, AA0; VPXOR (13*32)(inp), BB0, BB0; VPXOR (14*32)(inp), CC0, CC0; VPXOR (15*32)(inp), DD0, DD0 - VMOVDQU AA0, (12*32)(oup); VMOVDQU BB0, (13*32)(oup); VMOVDQU CC0, (14*32)(oup); VMOVDQU DD0, (15*32)(oup) - LEAQ (32*16)(inp), inp - LEAQ (32*16)(oup), oup - SUBQ $(32*16), inl + ADDQ 496(SI), R10 + ADCQ 504(SI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPERM2I128 $0x02, Y6, Y10, Y0 + VPERM2I128 $0x02, Y8, Y2, Y14 + VPERM2I128 $0x13, Y6, Y10, Y12 + VPERM2I128 $0x13, Y8, Y2, Y4 + VPXOR 256(SI), Y0, Y0 + VPXOR 288(SI), Y14, Y14 + VPXOR 320(SI), Y12, Y12 + VPXOR 352(SI), Y4, Y4 + VMOVDQU Y0, 256(DI) + VMOVDQU Y14, 288(DI) + VMOVDQU Y12, 320(DI) + VMOVDQU Y4, 352(DI) + VPERM2I128 $0x02, Y7, Y11, Y0 + VPERM2I128 $0x02, 224(BP), Y3, Y14 + VPERM2I128 $0x13, Y7, Y11, Y12 + VPERM2I128 $0x13, 224(BP), Y3, Y4 + VPXOR 384(SI), Y0, Y0 + VPXOR 416(SI), Y14, Y14 + VPXOR 448(SI), Y12, Y12 + VPXOR 480(SI), Y4, Y4 + VMOVDQU Y0, 384(DI) + VMOVDQU Y14, 416(DI) + VMOVDQU Y12, 448(DI) + VMOVDQU Y4, 480(DI) + LEAQ 512(SI), SI + LEAQ 512(DI), DI + SUBQ $0x00000200, BX JMP openAVX2MainLoop openAVX2MainLoopDone: // Handle the various tail sizes efficiently - TESTQ inl, inl + TESTQ BX, BX JE openSSEFinalize - CMPQ inl, $128 + CMPQ BX, $0x80 JBE openAVX2Tail128 - CMPQ inl, $256 + CMPQ BX, $0x00000100 JBE openAVX2Tail256 - CMPQ inl, $384 + CMPQ BX, $0x00000180 JBE openAVX2Tail384 JMP openAVX2Tail512 -// ---------------------------------------------------------------------------- -// Special optimization for buffers smaller than 193 bytes openAVX2192: - // For up to 192 bytes of ciphertext and 64 bytes for the poly key, we process four blocks - VMOVDQA AA0, AA1 - VMOVDQA BB0, BB1 - VMOVDQA CC0, CC1 - VPADDD ·avx2IncMask<>(SB), DD0, DD1 - VMOVDQA AA0, AA2 - VMOVDQA BB0, BB2 - VMOVDQA CC0, CC2 - VMOVDQA DD0, DD2 - VMOVDQA DD1, TT3 - MOVQ $10, itr2 + VMOVDQA Y0, Y5 + VMOVDQA Y14, Y9 + VMOVDQA Y12, Y13 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VMOVDQA Y0, Y6 + VMOVDQA Y14, Y10 + VMOVDQA Y12, Y8 + VMOVDQA Y4, Y2 + VMOVDQA Y1, Y15 + MOVQ $0x0000000a, R9 openAVX2192InnerCipherLoop: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1 - DECQ itr2 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + DECQ R9 JNE openAVX2192InnerCipherLoop - VPADDD AA2, AA0, AA0; VPADDD AA2, AA1, AA1 - VPADDD BB2, BB0, BB0; VPADDD BB2, BB1, BB1 - VPADDD CC2, CC0, CC0; VPADDD CC2, CC1, CC1 - VPADDD DD2, DD0, DD0; VPADDD TT3, DD1, DD1 - VPERM2I128 $0x02, AA0, BB0, TT0 + VPADDD Y6, Y0, Y0 + VPADDD Y6, Y5, Y5 + VPADDD Y10, Y14, Y14 + VPADDD Y10, Y9, Y9 + VPADDD Y8, Y12, Y12 + VPADDD Y8, Y13, Y13 + VPADDD Y2, Y4, Y4 + VPADDD Y15, Y1, Y1 + VPERM2I128 $0x02, Y0, Y14, Y3 // Clamp and store poly key - VPAND ·polyClampMask<>(SB), TT0, TT0 - VMOVDQA TT0, rsStoreAVX2 + VPAND ·polyClampMask<>+0(SB), Y3, Y3 + VMOVDQA Y3, (BP) // Stream for up to 192 bytes - VPERM2I128 $0x13, AA0, BB0, AA0 - VPERM2I128 $0x13, CC0, DD0, BB0 - VPERM2I128 $0x02, AA1, BB1, CC0 - VPERM2I128 $0x02, CC1, DD1, DD0 - VPERM2I128 $0x13, AA1, BB1, AA1 - VPERM2I128 $0x13, CC1, DD1, BB1 + VPERM2I128 $0x13, Y0, Y14, Y0 + VPERM2I128 $0x13, Y12, Y4, Y14 + VPERM2I128 $0x02, Y5, Y9, Y12 + VPERM2I128 $0x02, Y13, Y1, Y4 + VPERM2I128 $0x13, Y5, Y9, Y5 + VPERM2I128 $0x13, Y13, Y1, Y9 openAVX2ShortOpen: // Hash - MOVQ ad_len+80(FP), itr2 + MOVQ ad_len+80(FP), R9 CALL polyHashADInternal<>(SB) openAVX2ShortOpenLoop: - CMPQ inl, $32 + CMPQ BX, $0x20 JB openAVX2ShortTail32 - SUBQ $32, inl + SUBQ $0x20, BX // Load for hashing - polyAdd(0*8(inp)) - polyMulAVX2 - polyAdd(2*8(inp)) - polyMulAVX2 + ADDQ (SI), R10 + ADCQ 8(SI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + ADDQ 16(SI), R10 + ADCQ 24(SI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 // Load for decryption - VPXOR (inp), AA0, AA0 - VMOVDQU AA0, (oup) - LEAQ (1*32)(inp), inp - LEAQ (1*32)(oup), oup + VPXOR (SI), Y0, Y0 + VMOVDQU Y0, (DI) + LEAQ 32(SI), SI + LEAQ 32(DI), DI // Shift stream left - VMOVDQA BB0, AA0 - VMOVDQA CC0, BB0 - VMOVDQA DD0, CC0 - VMOVDQA AA1, DD0 - VMOVDQA BB1, AA1 - VMOVDQA CC1, BB1 - VMOVDQA DD1, CC1 - VMOVDQA AA2, DD1 - VMOVDQA BB2, AA2 + VMOVDQA Y14, Y0 + VMOVDQA Y12, Y14 + VMOVDQA Y4, Y12 + VMOVDQA Y5, Y4 + VMOVDQA Y9, Y5 + VMOVDQA Y13, Y9 + VMOVDQA Y1, Y13 + VMOVDQA Y6, Y1 + VMOVDQA Y10, Y6 JMP openAVX2ShortOpenLoop openAVX2ShortTail32: - CMPQ inl, $16 - VMOVDQA A0, A1 + CMPQ BX, $0x10 + VMOVDQA X0, X1 JB openAVX2ShortDone - - SUBQ $16, inl + SUBQ $0x10, BX // Load for hashing - polyAdd(0*8(inp)) - polyMulAVX2 + ADDQ (SI), R10 + ADCQ 8(SI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 // Load for decryption - VPXOR (inp), A0, T0 - VMOVDQU T0, (oup) - LEAQ (1*16)(inp), inp - LEAQ (1*16)(oup), oup - VPERM2I128 $0x11, AA0, AA0, AA0 - VMOVDQA A0, A1 + VPXOR (SI), X0, X12 + VMOVDQU X12, (DI) + LEAQ 16(SI), SI + LEAQ 16(DI), DI + VPERM2I128 $0x11, Y0, Y0, Y0 + VMOVDQA X0, X1 openAVX2ShortDone: VZEROUPPER JMP openSSETail16 -// ---------------------------------------------------------------------------- -// Special optimization for buffers smaller than 321 bytes openAVX2320: - // For up to 320 bytes of ciphertext and 64 bytes for the poly key, we process six blocks - VMOVDQA AA0, AA1; VMOVDQA BB0, BB1; VMOVDQA CC0, CC1; VPADDD ·avx2IncMask<>(SB), DD0, DD1 - VMOVDQA AA0, AA2; VMOVDQA BB0, BB2; VMOVDQA CC0, CC2; VPADDD ·avx2IncMask<>(SB), DD1, DD2 - VMOVDQA BB0, TT1; VMOVDQA CC0, TT2; VMOVDQA DD0, TT3 - MOVQ $10, itr2 + VMOVDQA Y0, Y5 + VMOVDQA Y14, Y9 + VMOVDQA Y12, Y13 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VMOVDQA Y0, Y6 + VMOVDQA Y14, Y10 + VMOVDQA Y12, Y8 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VMOVDQA Y14, Y7 + VMOVDQA Y12, Y11 + VMOVDQA Y4, Y15 + MOVQ $0x0000000a, R9 openAVX2320InnerCipherLoop: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0); chachaQR_AVX2(AA2, BB2, CC2, DD2, TT0) - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $4, BB2, BB2, BB2 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1; VPALIGNR $12, DD2, DD2, DD2 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0); chachaQR_AVX2(AA2, BB2, CC2, DD2, TT0) - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $12, BB2, BB2, BB2 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1; VPALIGNR $4, DD2, DD2, DD2 - DECQ itr2 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y3 + VPSRLD $0x19, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y3 + VPSRLD $0x19, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x04, Y2, Y2, Y2 + DECQ R9 JNE openAVX2320InnerCipherLoop - - VMOVDQA ·chacha20Constants<>(SB), TT0 - VPADDD TT0, AA0, AA0; VPADDD TT0, AA1, AA1; VPADDD TT0, AA2, AA2 - VPADDD TT1, BB0, BB0; VPADDD TT1, BB1, BB1; VPADDD TT1, BB2, BB2 - VPADDD TT2, CC0, CC0; VPADDD TT2, CC1, CC1; VPADDD TT2, CC2, CC2 - VMOVDQA ·avx2IncMask<>(SB), TT0 - VPADDD TT3, DD0, DD0; VPADDD TT0, TT3, TT3 - VPADDD TT3, DD1, DD1; VPADDD TT0, TT3, TT3 - VPADDD TT3, DD2, DD2 + VMOVDQA ·chacha20Constants<>+0(SB), Y3 + VPADDD Y3, Y0, Y0 + VPADDD Y3, Y5, Y5 + VPADDD Y3, Y6, Y6 + VPADDD Y7, Y14, Y14 + VPADDD Y7, Y9, Y9 + VPADDD Y7, Y10, Y10 + VPADDD Y11, Y12, Y12 + VPADDD Y11, Y13, Y13 + VPADDD Y11, Y8, Y8 + VMOVDQA ·avx2IncMask<>+0(SB), Y3 + VPADDD Y15, Y4, Y4 + VPADDD Y3, Y15, Y15 + VPADDD Y15, Y1, Y1 + VPADDD Y3, Y15, Y15 + VPADDD Y15, Y2, Y2 // Clamp and store poly key - VPERM2I128 $0x02, AA0, BB0, TT0 - VPAND ·polyClampMask<>(SB), TT0, TT0 - VMOVDQA TT0, rsStoreAVX2 + VPERM2I128 $0x02, Y0, Y14, Y3 + VPAND ·polyClampMask<>+0(SB), Y3, Y3 + VMOVDQA Y3, (BP) // Stream for up to 320 bytes - VPERM2I128 $0x13, AA0, BB0, AA0 - VPERM2I128 $0x13, CC0, DD0, BB0 - VPERM2I128 $0x02, AA1, BB1, CC0 - VPERM2I128 $0x02, CC1, DD1, DD0 - VPERM2I128 $0x13, AA1, BB1, AA1 - VPERM2I128 $0x13, CC1, DD1, BB1 - VPERM2I128 $0x02, AA2, BB2, CC1 - VPERM2I128 $0x02, CC2, DD2, DD1 - VPERM2I128 $0x13, AA2, BB2, AA2 - VPERM2I128 $0x13, CC2, DD2, BB2 + VPERM2I128 $0x13, Y0, Y14, Y0 + VPERM2I128 $0x13, Y12, Y4, Y14 + VPERM2I128 $0x02, Y5, Y9, Y12 + VPERM2I128 $0x02, Y13, Y1, Y4 + VPERM2I128 $0x13, Y5, Y9, Y5 + VPERM2I128 $0x13, Y13, Y1, Y9 + VPERM2I128 $0x02, Y6, Y10, Y13 + VPERM2I128 $0x02, Y8, Y2, Y1 + VPERM2I128 $0x13, Y6, Y10, Y6 + VPERM2I128 $0x13, Y8, Y2, Y10 JMP openAVX2ShortOpen -// ---------------------------------------------------------------------------- -// Special optimization for the last 128 bytes of ciphertext openAVX2Tail128: // Need to decrypt up to 128 bytes - prepare two blocks - VMOVDQA ·chacha20Constants<>(SB), AA1 - VMOVDQA state1StoreAVX2, BB1 - VMOVDQA state2StoreAVX2, CC1 - VMOVDQA ctr3StoreAVX2, DD1 - VPADDD ·avx2IncMask<>(SB), DD1, DD1 - VMOVDQA DD1, DD0 - - XORQ itr2, itr2 - MOVQ inl, itr1 - ANDQ $-16, itr1 - TESTQ itr1, itr1 - JE openAVX2Tail128LoopB + VMOVDQA ·chacha20Constants<>+0(SB), Y5 + VMOVDQA 32(BP), Y9 + VMOVDQA 64(BP), Y13 + VMOVDQA 192(BP), Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y1 + VMOVDQA Y1, Y4 + XORQ R9, R9 + MOVQ BX, CX + ANDQ $-16, CX + TESTQ CX, CX + JE openAVX2Tail128LoopB openAVX2Tail128LoopA: - // Perform ChaCha rounds, while hashing the remaining input - polyAdd(0(inp)(itr2*1)) - polyMulAVX2 + ADDQ (SI)(R9*1), R10 + ADCQ 8(SI)(R9*1), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 openAVX2Tail128LoopB: - ADDQ $16, itr2 - chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - VPALIGNR $4, BB1, BB1, BB1 - VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $12, DD1, DD1, DD1 - chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - VPALIGNR $12, BB1, BB1, BB1 - VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $4, DD1, DD1, DD1 - CMPQ itr2, itr1 - JB openAVX2Tail128LoopA - CMPQ itr2, $160 - JNE openAVX2Tail128LoopB - - VPADDD ·chacha20Constants<>(SB), AA1, AA1 - VPADDD state1StoreAVX2, BB1, BB1 - VPADDD state2StoreAVX2, CC1, CC1 - VPADDD DD0, DD1, DD1 - VPERM2I128 $0x02, AA1, BB1, AA0; VPERM2I128 $0x02, CC1, DD1, BB0; VPERM2I128 $0x13, AA1, BB1, CC0; VPERM2I128 $0x13, CC1, DD1, DD0 + ADDQ $0x10, R9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x04, Y1, Y1, Y1 + CMPQ R9, CX + JB openAVX2Tail128LoopA + CMPQ R9, $0xa0 + JNE openAVX2Tail128LoopB + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD 32(BP), Y9, Y9 + VPADDD 64(BP), Y13, Y13 + VPADDD Y4, Y1, Y1 + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 openAVX2TailLoop: - CMPQ inl, $32 + CMPQ BX, $0x20 JB openAVX2Tail - SUBQ $32, inl + SUBQ $0x20, BX // Load for decryption - VPXOR (inp), AA0, AA0 - VMOVDQU AA0, (oup) - LEAQ (1*32)(inp), inp - LEAQ (1*32)(oup), oup - VMOVDQA BB0, AA0 - VMOVDQA CC0, BB0 - VMOVDQA DD0, CC0 + VPXOR (SI), Y0, Y0 + VMOVDQU Y0, (DI) + LEAQ 32(SI), SI + LEAQ 32(DI), DI + VMOVDQA Y14, Y0 + VMOVDQA Y12, Y14 + VMOVDQA Y4, Y12 JMP openAVX2TailLoop openAVX2Tail: - CMPQ inl, $16 - VMOVDQA A0, A1 + CMPQ BX, $0x10 + VMOVDQA X0, X1 JB openAVX2TailDone - SUBQ $16, inl + SUBQ $0x10, BX // Load for decryption - VPXOR (inp), A0, T0 - VMOVDQU T0, (oup) - LEAQ (1*16)(inp), inp - LEAQ (1*16)(oup), oup - VPERM2I128 $0x11, AA0, AA0, AA0 - VMOVDQA A0, A1 + VPXOR (SI), X0, X12 + VMOVDQU X12, (DI) + LEAQ 16(SI), SI + LEAQ 16(DI), DI + VPERM2I128 $0x11, Y0, Y0, Y0 + VMOVDQA X0, X1 openAVX2TailDone: VZEROUPPER JMP openSSETail16 -// ---------------------------------------------------------------------------- -// Special optimization for the last 256 bytes of ciphertext openAVX2Tail256: - // Need to decrypt up to 256 bytes - prepare four blocks - VMOVDQA ·chacha20Constants<>(SB), AA0; VMOVDQA AA0, AA1 - VMOVDQA state1StoreAVX2, BB0; VMOVDQA BB0, BB1 - VMOVDQA state2StoreAVX2, CC0; VMOVDQA CC0, CC1 - VMOVDQA ctr3StoreAVX2, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD1 - VMOVDQA DD0, TT1 - VMOVDQA DD1, TT2 + VMOVDQA ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VMOVDQA Y4, Y7 + VMOVDQA Y1, Y11 // Compute the number of iterations that will hash data - MOVQ inl, tmpStoreAVX2 - MOVQ inl, itr1 - SUBQ $128, itr1 - SHRQ $4, itr1 - MOVQ $10, itr2 - CMPQ itr1, $10 - CMOVQGT itr2, itr1 - MOVQ inp, inl - XORQ itr2, itr2 + MOVQ BX, 224(BP) + MOVQ BX, CX + SUBQ $0x80, CX + SHRQ $0x04, CX + MOVQ $0x0000000a, R9 + CMPQ CX, $0x0a + CMOVQGT R9, CX + MOVQ SI, BX + XORQ R9, R9 openAVX2Tail256LoopA: - polyAdd(0(inl)) - polyMulAVX2 - LEAQ 16(inl), inl + ADDQ (BX), R10 + ADCQ 8(BX), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(BX), BX - // Perform ChaCha rounds, while hashing the remaining input openAVX2Tail256LoopB: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1 - INCQ itr2 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1 - CMPQ itr2, itr1 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + INCQ R9 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + CMPQ R9, CX JB openAVX2Tail256LoopA + CMPQ R9, $0x0a + JNE openAVX2Tail256LoopB + MOVQ BX, R9 + SUBQ SI, BX + MOVQ BX, CX + MOVQ 224(BP), BX - CMPQ itr2, $10 - JNE openAVX2Tail256LoopB - - MOVQ inl, itr2 - SUBQ inp, inl - MOVQ inl, itr1 - MOVQ tmpStoreAVX2, inl - - // Hash the remainder of data (if any) openAVX2Tail256Hash: - ADDQ $16, itr1 - CMPQ itr1, inl - JGT openAVX2Tail256HashEnd - polyAdd (0(itr2)) - polyMulAVX2 - LEAQ 16(itr2), itr2 - JMP openAVX2Tail256Hash - -// Store 128 bytes safely, then go to store loop + ADDQ $0x10, CX + CMPQ CX, BX + JGT openAVX2Tail256HashEnd + ADDQ (R9), R10 + ADCQ 8(R9), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(R9), R9 + JMP openAVX2Tail256Hash + openAVX2Tail256HashEnd: - VPADDD ·chacha20Constants<>(SB), AA0, AA0; VPADDD ·chacha20Constants<>(SB), AA1, AA1 - VPADDD state1StoreAVX2, BB0, BB0; VPADDD state1StoreAVX2, BB1, BB1 - VPADDD state2StoreAVX2, CC0, CC0; VPADDD state2StoreAVX2, CC1, CC1 - VPADDD TT1, DD0, DD0; VPADDD TT2, DD1, DD1 - VPERM2I128 $0x02, AA0, BB0, AA2; VPERM2I128 $0x02, CC0, DD0, BB2; VPERM2I128 $0x13, AA0, BB0, CC2; VPERM2I128 $0x13, CC0, DD0, DD2 - VPERM2I128 $0x02, AA1, BB1, AA0; VPERM2I128 $0x02, CC1, DD1, BB0; VPERM2I128 $0x13, AA1, BB1, CC0; VPERM2I128 $0x13, CC1, DD1, DD0 - - VPXOR (0*32)(inp), AA2, AA2; VPXOR (1*32)(inp), BB2, BB2; VPXOR (2*32)(inp), CC2, CC2; VPXOR (3*32)(inp), DD2, DD2 - VMOVDQU AA2, (0*32)(oup); VMOVDQU BB2, (1*32)(oup); VMOVDQU CC2, (2*32)(oup); VMOVDQU DD2, (3*32)(oup) - LEAQ (4*32)(inp), inp - LEAQ (4*32)(oup), oup - SUBQ $4*32, inl - - JMP openAVX2TailLoop - -// ---------------------------------------------------------------------------- -// Special optimization for the last 384 bytes of ciphertext + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD Y7, Y4, Y4 + VPADDD Y11, Y1, Y1 + VPERM2I128 $0x02, Y0, Y14, Y6 + VPERM2I128 $0x02, Y12, Y4, Y10 + VPERM2I128 $0x13, Y0, Y14, Y8 + VPERM2I128 $0x13, Y12, Y4, Y2 + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 + VPXOR (SI), Y6, Y6 + VPXOR 32(SI), Y10, Y10 + VPXOR 64(SI), Y8, Y8 + VPXOR 96(SI), Y2, Y2 + VMOVDQU Y6, (DI) + VMOVDQU Y10, 32(DI) + VMOVDQU Y8, 64(DI) + VMOVDQU Y2, 96(DI) + LEAQ 128(SI), SI + LEAQ 128(DI), DI + SUBQ $0x80, BX + JMP openAVX2TailLoop + openAVX2Tail384: // Need to decrypt up to 384 bytes - prepare six blocks - VMOVDQA ·chacha20Constants<>(SB), AA0; VMOVDQA AA0, AA1; VMOVDQA AA0, AA2 - VMOVDQA state1StoreAVX2, BB0; VMOVDQA BB0, BB1; VMOVDQA BB0, BB2 - VMOVDQA state2StoreAVX2, CC0; VMOVDQA CC0, CC1; VMOVDQA CC0, CC2 - VMOVDQA ctr3StoreAVX2, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD1 - VPADDD ·avx2IncMask<>(SB), DD1, DD2 - VMOVDQA DD0, ctr0StoreAVX2 - VMOVDQA DD1, ctr1StoreAVX2 - VMOVDQA DD2, ctr2StoreAVX2 + VMOVDQA ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA Y0, Y6 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA Y14, Y10 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA Y12, Y8 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VMOVDQA Y4, 96(BP) + VMOVDQA Y1, 128(BP) + VMOVDQA Y2, 160(BP) // Compute the number of iterations that will hash two blocks of data - MOVQ inl, tmpStoreAVX2 - MOVQ inl, itr1 - SUBQ $256, itr1 - SHRQ $4, itr1 - ADDQ $6, itr1 - MOVQ $10, itr2 - CMPQ itr1, $10 - CMOVQGT itr2, itr1 - MOVQ inp, inl - XORQ itr2, itr2 - - // Perform ChaCha rounds, while hashing the remaining input + MOVQ BX, 224(BP) + MOVQ BX, CX + SUBQ $0x00000100, CX + SHRQ $0x04, CX + ADDQ $0x06, CX + MOVQ $0x0000000a, R9 + CMPQ CX, $0x0a + CMOVQGT R9, CX + MOVQ SI, BX + XORQ R9, R9 + openAVX2Tail384LoopB: - polyAdd(0(inl)) - polyMulAVX2 - LEAQ 16(inl), inl + ADDQ (BX), R10 + ADCQ 8(BX), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(BX), BX openAVX2Tail384LoopA: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0); chachaQR_AVX2(AA2, BB2, CC2, DD2, TT0) - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $4, BB2, BB2, BB2 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1; VPALIGNR $12, DD2, DD2, DD2 - polyAdd(0(inl)) - polyMulAVX2 - LEAQ 16(inl), inl - INCQ itr2 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0); chachaQR_AVX2(AA2, BB2, CC2, DD2, TT0) - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $12, BB2, BB2, BB2 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1; VPALIGNR $4, DD2, DD2, DD2 - - CMPQ itr2, itr1 - JB openAVX2Tail384LoopB - - CMPQ itr2, $10 - JNE openAVX2Tail384LoopA - - MOVQ inl, itr2 - SUBQ inp, inl - MOVQ inl, itr1 - MOVQ tmpStoreAVX2, inl + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y3 + VPSRLD $0x19, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x0c, Y2, Y2, Y2 + ADDQ (BX), R10 + ADCQ 8(BX), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(BX), BX + INCQ R9 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y3 + VPSRLD $0x19, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x04, Y2, Y2, Y2 + CMPQ R9, CX + JB openAVX2Tail384LoopB + CMPQ R9, $0x0a + JNE openAVX2Tail384LoopA + MOVQ BX, R9 + SUBQ SI, BX + MOVQ BX, CX + MOVQ 224(BP), BX openAVX2Tail384Hash: - ADDQ $16, itr1 - CMPQ itr1, inl - JGT openAVX2Tail384HashEnd - polyAdd(0(itr2)) - polyMulAVX2 - LEAQ 16(itr2), itr2 - JMP openAVX2Tail384Hash - -// Store 256 bytes safely, then go to store loop + ADDQ $0x10, CX + CMPQ CX, BX + JGT openAVX2Tail384HashEnd + ADDQ (R9), R10 + ADCQ 8(R9), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(R9), R9 + JMP openAVX2Tail384Hash + openAVX2Tail384HashEnd: - VPADDD ·chacha20Constants<>(SB), AA0, AA0; VPADDD ·chacha20Constants<>(SB), AA1, AA1; VPADDD ·chacha20Constants<>(SB), AA2, AA2 - VPADDD state1StoreAVX2, BB0, BB0; VPADDD state1StoreAVX2, BB1, BB1; VPADDD state1StoreAVX2, BB2, BB2 - VPADDD state2StoreAVX2, CC0, CC0; VPADDD state2StoreAVX2, CC1, CC1; VPADDD state2StoreAVX2, CC2, CC2 - VPADDD ctr0StoreAVX2, DD0, DD0; VPADDD ctr1StoreAVX2, DD1, DD1; VPADDD ctr2StoreAVX2, DD2, DD2 - VPERM2I128 $0x02, AA0, BB0, TT0; VPERM2I128 $0x02, CC0, DD0, TT1; VPERM2I128 $0x13, AA0, BB0, TT2; VPERM2I128 $0x13, CC0, DD0, TT3 - VPXOR (0*32)(inp), TT0, TT0; VPXOR (1*32)(inp), TT1, TT1; VPXOR (2*32)(inp), TT2, TT2; VPXOR (3*32)(inp), TT3, TT3 - VMOVDQU TT0, (0*32)(oup); VMOVDQU TT1, (1*32)(oup); VMOVDQU TT2, (2*32)(oup); VMOVDQU TT3, (3*32)(oup) - VPERM2I128 $0x02, AA1, BB1, TT0; VPERM2I128 $0x02, CC1, DD1, TT1; VPERM2I128 $0x13, AA1, BB1, TT2; VPERM2I128 $0x13, CC1, DD1, TT3 - VPXOR (4*32)(inp), TT0, TT0; VPXOR (5*32)(inp), TT1, TT1; VPXOR (6*32)(inp), TT2, TT2; VPXOR (7*32)(inp), TT3, TT3 - VMOVDQU TT0, (4*32)(oup); VMOVDQU TT1, (5*32)(oup); VMOVDQU TT2, (6*32)(oup); VMOVDQU TT3, (7*32)(oup) - VPERM2I128 $0x02, AA2, BB2, AA0; VPERM2I128 $0x02, CC2, DD2, BB0; VPERM2I128 $0x13, AA2, BB2, CC0; VPERM2I128 $0x13, CC2, DD2, DD0 - LEAQ (8*32)(inp), inp - LEAQ (8*32)(oup), oup - SUBQ $8*32, inl + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 32(BP), Y10, Y10 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD 64(BP), Y8, Y8 + VPADDD 96(BP), Y4, Y4 + VPADDD 128(BP), Y1, Y1 + VPADDD 160(BP), Y2, Y2 + VPERM2I128 $0x02, Y0, Y14, Y3 + VPERM2I128 $0x02, Y12, Y4, Y7 + VPERM2I128 $0x13, Y0, Y14, Y11 + VPERM2I128 $0x13, Y12, Y4, Y15 + VPXOR (SI), Y3, Y3 + VPXOR 32(SI), Y7, Y7 + VPXOR 64(SI), Y11, Y11 + VPXOR 96(SI), Y15, Y15 + VMOVDQU Y3, (DI) + VMOVDQU Y7, 32(DI) + VMOVDQU Y11, 64(DI) + VMOVDQU Y15, 96(DI) + VPERM2I128 $0x02, Y5, Y9, Y3 + VPERM2I128 $0x02, Y13, Y1, Y7 + VPERM2I128 $0x13, Y5, Y9, Y11 + VPERM2I128 $0x13, Y13, Y1, Y15 + VPXOR 128(SI), Y3, Y3 + VPXOR 160(SI), Y7, Y7 + VPXOR 192(SI), Y11, Y11 + VPXOR 224(SI), Y15, Y15 + VMOVDQU Y3, 128(DI) + VMOVDQU Y7, 160(DI) + VMOVDQU Y11, 192(DI) + VMOVDQU Y15, 224(DI) + VPERM2I128 $0x02, Y6, Y10, Y0 + VPERM2I128 $0x02, Y8, Y2, Y14 + VPERM2I128 $0x13, Y6, Y10, Y12 + VPERM2I128 $0x13, Y8, Y2, Y4 + LEAQ 256(SI), SI + LEAQ 256(DI), DI + SUBQ $0x00000100, BX JMP openAVX2TailLoop -// ---------------------------------------------------------------------------- -// Special optimization for the last 512 bytes of ciphertext openAVX2Tail512: - VMOVDQU ·chacha20Constants<>(SB), AA0; VMOVDQA AA0, AA1; VMOVDQA AA0, AA2; VMOVDQA AA0, AA3 - VMOVDQA state1StoreAVX2, BB0; VMOVDQA BB0, BB1; VMOVDQA BB0, BB2; VMOVDQA BB0, BB3 - VMOVDQA state2StoreAVX2, CC0; VMOVDQA CC0, CC1; VMOVDQA CC0, CC2; VMOVDQA CC0, CC3 - VMOVDQA ctr3StoreAVX2, DD0; VPADDD ·avx2IncMask<>(SB), DD0, DD0; VPADDD ·avx2IncMask<>(SB), DD0, DD1; VPADDD ·avx2IncMask<>(SB), DD1, DD2; VPADDD ·avx2IncMask<>(SB), DD2, DD3 - VMOVDQA DD0, ctr0StoreAVX2; VMOVDQA DD1, ctr1StoreAVX2; VMOVDQA DD2, ctr2StoreAVX2; VMOVDQA DD3, ctr3StoreAVX2 - XORQ itr1, itr1 - MOVQ inp, itr2 + VMOVDQU ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA Y0, Y6 + VMOVDQA Y0, Y7 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA Y14, Y10 + VMOVDQA Y14, Y11 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA Y12, Y8 + VMOVDQA Y12, Y15 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VPADDD ·avx2IncMask<>+0(SB), Y2, Y3 + VMOVDQA Y4, 96(BP) + VMOVDQA Y1, 128(BP) + VMOVDQA Y2, 160(BP) + VMOVDQA Y3, 192(BP) + XORQ CX, CX + MOVQ SI, R9 openAVX2Tail512LoopB: - polyAdd(0(itr2)) - polyMulAVX2 - LEAQ (2*8)(itr2), itr2 + ADDQ (R9), R10 + ADCQ 8(R9), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(R9), R9 openAVX2Tail512LoopA: - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol16<>(SB), DD0, DD0; VPSHUFB ·rol16<>(SB), DD1, DD1; VPSHUFB ·rol16<>(SB), DD2, DD2; VPSHUFB ·rol16<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $12, BB0, CC3; VPSRLD $20, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $12, BB1, CC3; VPSRLD $20, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $12, BB2, CC3; VPSRLD $20, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $12, BB3, CC3; VPSRLD $20, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyAdd(0*8(itr2)) - polyMulAVX2 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol8<>(SB), DD0, DD0; VPSHUFB ·rol8<>(SB), DD1, DD1; VPSHUFB ·rol8<>(SB), DD2, DD2; VPSHUFB ·rol8<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $7, BB0, CC3; VPSRLD $25, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $7, BB1, CC3; VPSRLD $25, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $7, BB2, CC3; VPSRLD $25, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $7, BB3, CC3; VPSRLD $25, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $4, BB2, BB2, BB2; VPALIGNR $4, BB3, BB3, BB3 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $8, CC3, CC3, CC3 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1; VPALIGNR $12, DD2, DD2, DD2; VPALIGNR $12, DD3, DD3, DD3 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol16<>(SB), DD0, DD0; VPSHUFB ·rol16<>(SB), DD1, DD1; VPSHUFB ·rol16<>(SB), DD2, DD2; VPSHUFB ·rol16<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - polyAdd(2*8(itr2)) - polyMulAVX2 - LEAQ (4*8)(itr2), itr2 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $12, BB0, CC3; VPSRLD $20, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $12, BB1, CC3; VPSRLD $20, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $12, BB2, CC3; VPSRLD $20, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $12, BB3, CC3; VPSRLD $20, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol8<>(SB), DD0, DD0; VPSHUFB ·rol8<>(SB), DD1, DD1; VPSHUFB ·rol8<>(SB), DD2, DD2; VPSHUFB ·rol8<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $7, BB0, CC3; VPSRLD $25, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $7, BB1, CC3; VPSRLD $25, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $7, BB2, CC3; VPSRLD $25, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $7, BB3, CC3; VPSRLD $25, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $12, BB2, BB2, BB2; VPALIGNR $12, BB3, BB3, BB3 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $8, CC3, CC3, CC3 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1; VPALIGNR $4, DD2, DD2, DD2; VPALIGNR $4, DD3, DD3, DD3 - INCQ itr1 - CMPQ itr1, $4 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + ADDQ (R9), R10 + ADCQ 8(R9), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x04, Y11, Y11, Y11 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPALIGNR $0x0c, Y3, Y3, Y3 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + ADDQ 16(R9), R10 + ADCQ 24(R9), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 32(R9), R9 + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x0c, Y11, Y11, Y11 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x04, Y2, Y2, Y2 + VPALIGNR $0x04, Y3, Y3, Y3 + INCQ CX + CMPQ CX, $0x04 JLT openAVX2Tail512LoopB - - CMPQ itr1, $10 - JNE openAVX2Tail512LoopA - - MOVQ inl, itr1 - SUBQ $384, itr1 - ANDQ $-16, itr1 + CMPQ CX, $0x0a + JNE openAVX2Tail512LoopA + MOVQ BX, CX + SUBQ $0x00000180, CX + ANDQ $-16, CX openAVX2Tail512HashLoop: - TESTQ itr1, itr1 + TESTQ CX, CX JE openAVX2Tail512HashEnd - polyAdd(0(itr2)) - polyMulAVX2 - LEAQ 16(itr2), itr2 - SUBQ $16, itr1 + ADDQ (R9), R10 + ADCQ 8(R9), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(R9), R9 + SUBQ $0x10, CX JMP openAVX2Tail512HashLoop openAVX2Tail512HashEnd: - VPADDD ·chacha20Constants<>(SB), AA0, AA0; VPADDD ·chacha20Constants<>(SB), AA1, AA1; VPADDD ·chacha20Constants<>(SB), AA2, AA2; VPADDD ·chacha20Constants<>(SB), AA3, AA3 - VPADDD state1StoreAVX2, BB0, BB0; VPADDD state1StoreAVX2, BB1, BB1; VPADDD state1StoreAVX2, BB2, BB2; VPADDD state1StoreAVX2, BB3, BB3 - VPADDD state2StoreAVX2, CC0, CC0; VPADDD state2StoreAVX2, CC1, CC1; VPADDD state2StoreAVX2, CC2, CC2; VPADDD state2StoreAVX2, CC3, CC3 - VPADDD ctr0StoreAVX2, DD0, DD0; VPADDD ctr1StoreAVX2, DD1, DD1; VPADDD ctr2StoreAVX2, DD2, DD2; VPADDD ctr3StoreAVX2, DD3, DD3 - VMOVDQA CC3, tmpStoreAVX2 - VPERM2I128 $0x02, AA0, BB0, CC3; VPERM2I128 $0x13, AA0, BB0, BB0; VPERM2I128 $0x02, CC0, DD0, AA0; VPERM2I128 $0x13, CC0, DD0, CC0 - VPXOR (0*32)(inp), CC3, CC3; VPXOR (1*32)(inp), AA0, AA0; VPXOR (2*32)(inp), BB0, BB0; VPXOR (3*32)(inp), CC0, CC0 - VMOVDQU CC3, (0*32)(oup); VMOVDQU AA0, (1*32)(oup); VMOVDQU BB0, (2*32)(oup); VMOVDQU CC0, (3*32)(oup) - VPERM2I128 $0x02, AA1, BB1, AA0; VPERM2I128 $0x02, CC1, DD1, BB0; VPERM2I128 $0x13, AA1, BB1, CC0; VPERM2I128 $0x13, CC1, DD1, DD0 - VPXOR (4*32)(inp), AA0, AA0; VPXOR (5*32)(inp), BB0, BB0; VPXOR (6*32)(inp), CC0, CC0; VPXOR (7*32)(inp), DD0, DD0 - VMOVDQU AA0, (4*32)(oup); VMOVDQU BB0, (5*32)(oup); VMOVDQU CC0, (6*32)(oup); VMOVDQU DD0, (7*32)(oup) - VPERM2I128 $0x02, AA2, BB2, AA0; VPERM2I128 $0x02, CC2, DD2, BB0; VPERM2I128 $0x13, AA2, BB2, CC0; VPERM2I128 $0x13, CC2, DD2, DD0 - VPXOR (8*32)(inp), AA0, AA0; VPXOR (9*32)(inp), BB0, BB0; VPXOR (10*32)(inp), CC0, CC0; VPXOR (11*32)(inp), DD0, DD0 - VMOVDQU AA0, (8*32)(oup); VMOVDQU BB0, (9*32)(oup); VMOVDQU CC0, (10*32)(oup); VMOVDQU DD0, (11*32)(oup) - VPERM2I128 $0x02, AA3, BB3, AA0; VPERM2I128 $0x02, tmpStoreAVX2, DD3, BB0; VPERM2I128 $0x13, AA3, BB3, CC0; VPERM2I128 $0x13, tmpStoreAVX2, DD3, DD0 - - LEAQ (12*32)(inp), inp - LEAQ (12*32)(oup), oup - SUBQ $12*32, inl - - JMP openAVX2TailLoop - -// ---------------------------------------------------------------------------- -// ---------------------------------------------------------------------------- -// func chacha20Poly1305Seal(dst, key, src, ad []byte) -TEXT ·chacha20Poly1305Seal(SB), 0, $288-96 - // For aligned stack access + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 + VPADDD ·chacha20Constants<>+0(SB), Y7, Y7 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 32(BP), Y10, Y10 + VPADDD 32(BP), Y11, Y11 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD 64(BP), Y8, Y8 + VPADDD 64(BP), Y15, Y15 + VPADDD 96(BP), Y4, Y4 + VPADDD 128(BP), Y1, Y1 + VPADDD 160(BP), Y2, Y2 + VPADDD 192(BP), Y3, Y3 + VMOVDQA Y15, 224(BP) + VPERM2I128 $0x02, Y0, Y14, Y15 + VPERM2I128 $0x13, Y0, Y14, Y14 + VPERM2I128 $0x02, Y12, Y4, Y0 + VPERM2I128 $0x13, Y12, Y4, Y12 + VPXOR (SI), Y15, Y15 + VPXOR 32(SI), Y0, Y0 + VPXOR 64(SI), Y14, Y14 + VPXOR 96(SI), Y12, Y12 + VMOVDQU Y15, (DI) + VMOVDQU Y0, 32(DI) + VMOVDQU Y14, 64(DI) + VMOVDQU Y12, 96(DI) + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 + VPXOR 128(SI), Y0, Y0 + VPXOR 160(SI), Y14, Y14 + VPXOR 192(SI), Y12, Y12 + VPXOR 224(SI), Y4, Y4 + VMOVDQU Y0, 128(DI) + VMOVDQU Y14, 160(DI) + VMOVDQU Y12, 192(DI) + VMOVDQU Y4, 224(DI) + VPERM2I128 $0x02, Y6, Y10, Y0 + VPERM2I128 $0x02, Y8, Y2, Y14 + VPERM2I128 $0x13, Y6, Y10, Y12 + VPERM2I128 $0x13, Y8, Y2, Y4 + VPXOR 256(SI), Y0, Y0 + VPXOR 288(SI), Y14, Y14 + VPXOR 320(SI), Y12, Y12 + VPXOR 352(SI), Y4, Y4 + VMOVDQU Y0, 256(DI) + VMOVDQU Y14, 288(DI) + VMOVDQU Y12, 320(DI) + VMOVDQU Y4, 352(DI) + VPERM2I128 $0x02, Y7, Y11, Y0 + VPERM2I128 $0x02, 224(BP), Y3, Y14 + VPERM2I128 $0x13, Y7, Y11, Y12 + VPERM2I128 $0x13, 224(BP), Y3, Y4 + LEAQ 384(SI), SI + LEAQ 384(DI), DI + SUBQ $0x00000180, BX + JMP openAVX2TailLoop + +DATA ·chacha20Constants<>+0(SB)/4, $0x61707865 +DATA ·chacha20Constants<>+4(SB)/4, $0x3320646e +DATA ·chacha20Constants<>+8(SB)/4, $0x79622d32 +DATA ·chacha20Constants<>+12(SB)/4, $0x6b206574 +DATA ·chacha20Constants<>+16(SB)/4, $0x61707865 +DATA ·chacha20Constants<>+20(SB)/4, $0x3320646e +DATA ·chacha20Constants<>+24(SB)/4, $0x79622d32 +DATA ·chacha20Constants<>+28(SB)/4, $0x6b206574 +GLOBL ·chacha20Constants<>(SB), RODATA|NOPTR, $32 + +DATA ·polyClampMask<>+0(SB)/8, $0x0ffffffc0fffffff +DATA ·polyClampMask<>+8(SB)/8, $0x0ffffffc0ffffffc +DATA ·polyClampMask<>+16(SB)/8, $0xffffffffffffffff +DATA ·polyClampMask<>+24(SB)/8, $0xffffffffffffffff +GLOBL ·polyClampMask<>(SB), RODATA|NOPTR, $32 + +DATA ·sseIncMask<>+0(SB)/8, $0x0000000000000001 +DATA ·sseIncMask<>+8(SB)/8, $0x0000000000000000 +GLOBL ·sseIncMask<>(SB), RODATA|NOPTR, $16 + +DATA ·andMask<>+0(SB)/8, $0x00000000000000ff +DATA ·andMask<>+8(SB)/8, $0x0000000000000000 +DATA ·andMask<>+16(SB)/8, $0x000000000000ffff +DATA ·andMask<>+24(SB)/8, $0x0000000000000000 +DATA ·andMask<>+32(SB)/8, $0x0000000000ffffff +DATA ·andMask<>+40(SB)/8, $0x0000000000000000 +DATA ·andMask<>+48(SB)/8, $0x00000000ffffffff +DATA ·andMask<>+56(SB)/8, $0x0000000000000000 +DATA ·andMask<>+64(SB)/8, $0x000000ffffffffff +DATA ·andMask<>+72(SB)/8, $0x0000000000000000 +DATA ·andMask<>+80(SB)/8, $0x0000ffffffffffff +DATA ·andMask<>+88(SB)/8, $0x0000000000000000 +DATA ·andMask<>+96(SB)/8, $0x00ffffffffffffff +DATA ·andMask<>+104(SB)/8, $0x0000000000000000 +DATA ·andMask<>+112(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+120(SB)/8, $0x0000000000000000 +DATA ·andMask<>+128(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+136(SB)/8, $0x00000000000000ff +DATA ·andMask<>+144(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+152(SB)/8, $0x000000000000ffff +DATA ·andMask<>+160(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+168(SB)/8, $0x0000000000ffffff +DATA ·andMask<>+176(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+184(SB)/8, $0x00000000ffffffff +DATA ·andMask<>+192(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+200(SB)/8, $0x000000ffffffffff +DATA ·andMask<>+208(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+216(SB)/8, $0x0000ffffffffffff +DATA ·andMask<>+224(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+232(SB)/8, $0x00ffffffffffffff +GLOBL ·andMask<>(SB), RODATA|NOPTR, $240 + +DATA ·avx2InitMask<>+0(SB)/8, $0x0000000000000000 +DATA ·avx2InitMask<>+8(SB)/8, $0x0000000000000000 +DATA ·avx2InitMask<>+16(SB)/8, $0x0000000000000001 +DATA ·avx2InitMask<>+24(SB)/8, $0x0000000000000000 +GLOBL ·avx2InitMask<>(SB), RODATA|NOPTR, $32 + +DATA ·rol16<>+0(SB)/8, $0x0504070601000302 +DATA ·rol16<>+8(SB)/8, $0x0d0c0f0e09080b0a +DATA ·rol16<>+16(SB)/8, $0x0504070601000302 +DATA ·rol16<>+24(SB)/8, $0x0d0c0f0e09080b0a +GLOBL ·rol16<>(SB), RODATA|NOPTR, $32 + +DATA ·rol8<>+0(SB)/8, $0x0605040702010003 +DATA ·rol8<>+8(SB)/8, $0x0e0d0c0f0a09080b +DATA ·rol8<>+16(SB)/8, $0x0605040702010003 +DATA ·rol8<>+24(SB)/8, $0x0e0d0c0f0a09080b +GLOBL ·rol8<>(SB), RODATA|NOPTR, $32 + +DATA ·avx2IncMask<>+0(SB)/8, $0x0000000000000002 +DATA ·avx2IncMask<>+8(SB)/8, $0x0000000000000000 +DATA ·avx2IncMask<>+16(SB)/8, $0x0000000000000002 +DATA ·avx2IncMask<>+24(SB)/8, $0x0000000000000000 +GLOBL ·avx2IncMask<>(SB), RODATA|NOPTR, $32 + +// func chacha20Poly1305Seal(dst []byte, key []uint32, src []byte, ad []byte) +// Requires: AVX, AVX2, BMI2, CMOV, SSE2 +TEXT ·chacha20Poly1305Seal(SB), $288-96 MOVQ SP, BP - ADDQ $32, BP + ADDQ $0x20, BP ANDQ $-32, BP - MOVQ dst+0(FP), oup - MOVQ key+24(FP), keyp - MOVQ src+48(FP), inp - MOVQ src_len+56(FP), inl - MOVQ ad+72(FP), adp - - CMPB ·useAVX2(SB), $1 + MOVQ dst_base+0(FP), DI + MOVQ key_base+24(FP), R8 + MOVQ src_base+48(FP), SI + MOVQ src_len+56(FP), BX + MOVQ ad_base+72(FP), CX + CMPB ·useAVX2+0(SB), $0x01 JE chacha20Poly1305Seal_AVX2 // Special optimization, for very short buffers - CMPQ inl, $128 - JBE sealSSE128 // About 15% faster + CMPQ BX, $0x80 + JBE sealSSE128 // In the seal case - prepare the poly key + 3 blocks of stream in the first iteration - MOVOU ·chacha20Constants<>(SB), A0 - MOVOU (1*16)(keyp), B0 - MOVOU (2*16)(keyp), C0 - MOVOU (3*16)(keyp), D0 + MOVOU ·chacha20Constants<>+0(SB), X0 + MOVOU 16(R8), X3 + MOVOU 32(R8), X6 + MOVOU 48(R8), X9 // Store state on stack for future use - MOVO B0, state1Store - MOVO C0, state2Store + MOVO X3, 32(BP) + MOVO X6, 48(BP) // Load state, increment counter blocks - MOVO A0, A1; MOVO B0, B1; MOVO C0, C1; MOVO D0, D1; PADDL ·sseIncMask<>(SB), D1 - MOVO A1, A2; MOVO B1, B2; MOVO C1, C2; MOVO D1, D2; PADDL ·sseIncMask<>(SB), D2 - MOVO A2, A3; MOVO B2, B3; MOVO C2, C3; MOVO D2, D3; PADDL ·sseIncMask<>(SB), D3 + MOVO X0, X1 + MOVO X3, X4 + MOVO X6, X7 + MOVO X9, X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X1, X2 + MOVO X4, X5 + MOVO X7, X8 + MOVO X10, X11 + PADDL ·sseIncMask<>+0(SB), X11 + MOVO X2, X12 + MOVO X5, X13 + MOVO X8, X14 + MOVO X11, X15 + PADDL ·sseIncMask<>+0(SB), X15 // Store counters - MOVO D0, ctr0Store; MOVO D1, ctr1Store; MOVO D2, ctr2Store; MOVO D3, ctr3Store - MOVQ $10, itr2 + MOVO X9, 80(BP) + MOVO X10, 96(BP) + MOVO X11, 112(BP) + MOVO X15, 128(BP) + MOVQ $0x0000000a, R9 sealSSEIntroLoop: - MOVO C3, tmpStore - chachaQR(A0, B0, C0, D0, C3); chachaQR(A1, B1, C1, D1, C3); chachaQR(A2, B2, C2, D2, C3) - MOVO tmpStore, C3 - MOVO C1, tmpStore - chachaQR(A3, B3, C3, D3, C1) - MOVO tmpStore, C1 - shiftB0Left; shiftB1Left; shiftB2Left; shiftB3Left - shiftC0Left; shiftC1Left; shiftC2Left; shiftC3Left - shiftD0Left; shiftD1Left; shiftD2Left; shiftD3Left - - MOVO C3, tmpStore - chachaQR(A0, B0, C0, D0, C3); chachaQR(A1, B1, C1, D1, C3); chachaQR(A2, B2, C2, D2, C3) - MOVO tmpStore, C3 - MOVO C1, tmpStore - chachaQR(A3, B3, C3, D3, C1) - MOVO tmpStore, C1 - shiftB0Right; shiftB1Right; shiftB2Right; shiftB3Right - shiftC0Right; shiftC1Right; shiftC2Right; shiftC3Right - shiftD0Right; shiftD1Right; shiftD2Right; shiftD3Right - DECQ itr2 - JNE sealSSEIntroLoop + MOVO X14, 64(BP) + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X3 + PXOR X14, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X3 + PXOR X14, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X4 + PXOR X14, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X4 + PXOR X14, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X5 + PXOR X14, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X5 + PXOR X14, X5 + MOVO 64(BP), X14 + MOVO X7, 64(BP) + PADDD X13, X12 + PXOR X12, X15 + ROL16(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x0c, X7 + PSRLL $0x14, X13 + PXOR X7, X13 + PADDD X13, X12 + PXOR X12, X15 + ROL8(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x07, X7 + PSRLL $0x19, X13 + PXOR X7, X13 + MOVO 64(BP), X7 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x0c + MOVO X14, 64(BP) + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X3 + PXOR X14, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X3 + PXOR X14, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X4 + PXOR X14, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X4 + PXOR X14, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X5 + PXOR X14, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X5 + PXOR X14, X5 + MOVO 64(BP), X14 + MOVO X7, 64(BP) + PADDD X13, X12 + PXOR X12, X15 + ROL16(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x0c, X7 + PSRLL $0x14, X13 + PXOR X7, X13 + PADDD X13, X12 + PXOR X12, X15 + ROL8(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x07, X7 + PSRLL $0x19, X13 + PXOR X7, X13 + MOVO 64(BP), X7 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x04 + DECQ R9 + JNE sealSSEIntroLoop // Add in the state - PADDD ·chacha20Constants<>(SB), A0; PADDD ·chacha20Constants<>(SB), A1; PADDD ·chacha20Constants<>(SB), A2; PADDD ·chacha20Constants<>(SB), A3 - PADDD state1Store, B0; PADDD state1Store, B1; PADDD state1Store, B2; PADDD state1Store, B3 - PADDD state2Store, C1; PADDD state2Store, C2; PADDD state2Store, C3 - PADDD ctr1Store, D1; PADDD ctr2Store, D2; PADDD ctr3Store, D3 + PADDD ·chacha20Constants<>+0(SB), X0 + PADDD ·chacha20Constants<>+0(SB), X1 + PADDD ·chacha20Constants<>+0(SB), X2 + PADDD ·chacha20Constants<>+0(SB), X12 + PADDD 32(BP), X3 + PADDD 32(BP), X4 + PADDD 32(BP), X5 + PADDD 32(BP), X13 + PADDD 48(BP), X7 + PADDD 48(BP), X8 + PADDD 48(BP), X14 + PADDD 96(BP), X10 + PADDD 112(BP), X11 + PADDD 128(BP), X15 // Clamp and store the key - PAND ·polyClampMask<>(SB), A0 - MOVO A0, rStore - MOVO B0, sStore + PAND ·polyClampMask<>+0(SB), X0 + MOVO X0, (BP) + MOVO X3, 16(BP) // Hash AAD - MOVQ ad_len+80(FP), itr2 - CALL polyHashADInternal<>(SB) - - MOVOU (0*16)(inp), A0; MOVOU (1*16)(inp), B0; MOVOU (2*16)(inp), C0; MOVOU (3*16)(inp), D0 - PXOR A0, A1; PXOR B0, B1; PXOR C0, C1; PXOR D0, D1 - MOVOU A1, (0*16)(oup); MOVOU B1, (1*16)(oup); MOVOU C1, (2*16)(oup); MOVOU D1, (3*16)(oup) - MOVOU (4*16)(inp), A0; MOVOU (5*16)(inp), B0; MOVOU (6*16)(inp), C0; MOVOU (7*16)(inp), D0 - PXOR A0, A2; PXOR B0, B2; PXOR C0, C2; PXOR D0, D2 - MOVOU A2, (4*16)(oup); MOVOU B2, (5*16)(oup); MOVOU C2, (6*16)(oup); MOVOU D2, (7*16)(oup) - - MOVQ $128, itr1 - SUBQ $128, inl - LEAQ 128(inp), inp - - MOVO A3, A1; MOVO B3, B1; MOVO C3, C1; MOVO D3, D1 - - CMPQ inl, $64 - JBE sealSSE128SealHash - - MOVOU (0*16)(inp), A0; MOVOU (1*16)(inp), B0; MOVOU (2*16)(inp), C0; MOVOU (3*16)(inp), D0 - PXOR A0, A3; PXOR B0, B3; PXOR C0, C3; PXOR D0, D3 - MOVOU A3, (8*16)(oup); MOVOU B3, (9*16)(oup); MOVOU C3, (10*16)(oup); MOVOU D3, (11*16)(oup) - - ADDQ $64, itr1 - SUBQ $64, inl - LEAQ 64(inp), inp - - MOVQ $2, itr1 - MOVQ $8, itr2 - - CMPQ inl, $64 - JBE sealSSETail64 - CMPQ inl, $128 - JBE sealSSETail128 - CMPQ inl, $192 - JBE sealSSETail192 + MOVQ ad_len+80(FP), R9 + CALL polyHashADInternal<>(SB) + MOVOU (SI), X0 + MOVOU 16(SI), X3 + MOVOU 32(SI), X6 + MOVOU 48(SI), X9 + PXOR X0, X1 + PXOR X3, X4 + PXOR X6, X7 + PXOR X9, X10 + MOVOU X1, (DI) + MOVOU X4, 16(DI) + MOVOU X7, 32(DI) + MOVOU X10, 48(DI) + MOVOU 64(SI), X0 + MOVOU 80(SI), X3 + MOVOU 96(SI), X6 + MOVOU 112(SI), X9 + PXOR X0, X2 + PXOR X3, X5 + PXOR X6, X8 + PXOR X9, X11 + MOVOU X2, 64(DI) + MOVOU X5, 80(DI) + MOVOU X8, 96(DI) + MOVOU X11, 112(DI) + MOVQ $0x00000080, CX + SUBQ $0x80, BX + LEAQ 128(SI), SI + MOVO X12, X1 + MOVO X13, X4 + MOVO X14, X7 + MOVO X15, X10 + CMPQ BX, $0x40 + JBE sealSSE128SealHash + MOVOU (SI), X0 + MOVOU 16(SI), X3 + MOVOU 32(SI), X6 + MOVOU 48(SI), X9 + PXOR X0, X12 + PXOR X3, X13 + PXOR X6, X14 + PXOR X9, X15 + MOVOU X12, 128(DI) + MOVOU X13, 144(DI) + MOVOU X14, 160(DI) + MOVOU X15, 176(DI) + ADDQ $0x40, CX + SUBQ $0x40, BX + LEAQ 64(SI), SI + MOVQ $0x00000002, CX + MOVQ $0x00000008, R9 + CMPQ BX, $0x40 + JBE sealSSETail64 + CMPQ BX, $0x80 + JBE sealSSETail128 + CMPQ BX, $0xc0 + JBE sealSSETail192 sealSSEMainLoop: // Load state, increment counter blocks - MOVO ·chacha20Constants<>(SB), A0; MOVO state1Store, B0; MOVO state2Store, C0; MOVO ctr3Store, D0; PADDL ·sseIncMask<>(SB), D0 - MOVO A0, A1; MOVO B0, B1; MOVO C0, C1; MOVO D0, D1; PADDL ·sseIncMask<>(SB), D1 - MOVO A1, A2; MOVO B1, B2; MOVO C1, C2; MOVO D1, D2; PADDL ·sseIncMask<>(SB), D2 - MOVO A2, A3; MOVO B2, B3; MOVO C2, C3; MOVO D2, D3; PADDL ·sseIncMask<>(SB), D3 + MOVO ·chacha20Constants<>+0(SB), X0 + MOVO 32(BP), X3 + MOVO 48(BP), X6 + MOVO 128(BP), X9 + PADDL ·sseIncMask<>+0(SB), X9 + MOVO X0, X1 + MOVO X3, X4 + MOVO X6, X7 + MOVO X9, X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X1, X2 + MOVO X4, X5 + MOVO X7, X8 + MOVO X10, X11 + PADDL ·sseIncMask<>+0(SB), X11 + MOVO X2, X12 + MOVO X5, X13 + MOVO X8, X14 + MOVO X11, X15 + PADDL ·sseIncMask<>+0(SB), X15 // Store counters - MOVO D0, ctr0Store; MOVO D1, ctr1Store; MOVO D2, ctr2Store; MOVO D3, ctr3Store + MOVO X9, 80(BP) + MOVO X10, 96(BP) + MOVO X11, 112(BP) + MOVO X15, 128(BP) sealSSEInnerLoop: - MOVO C3, tmpStore - chachaQR(A0, B0, C0, D0, C3); chachaQR(A1, B1, C1, D1, C3); chachaQR(A2, B2, C2, D2, C3) - MOVO tmpStore, C3 - MOVO C1, tmpStore - chachaQR(A3, B3, C3, D3, C1) - MOVO tmpStore, C1 - polyAdd(0(oup)) - shiftB0Left; shiftB1Left; shiftB2Left; shiftB3Left - shiftC0Left; shiftC1Left; shiftC2Left; shiftC3Left - shiftD0Left; shiftD1Left; shiftD2Left; shiftD3Left - polyMulStage1 - polyMulStage2 - LEAQ (2*8)(oup), oup - MOVO C3, tmpStore - chachaQR(A0, B0, C0, D0, C3); chachaQR(A1, B1, C1, D1, C3); chachaQR(A2, B2, C2, D2, C3) - MOVO tmpStore, C3 - MOVO C1, tmpStore - polyMulStage3 - chachaQR(A3, B3, C3, D3, C1) - MOVO tmpStore, C1 - polyMulReduceStage - shiftB0Right; shiftB1Right; shiftB2Right; shiftB3Right - shiftC0Right; shiftC1Right; shiftC2Right; shiftC3Right - shiftD0Right; shiftD1Right; shiftD2Right; shiftD3Right - DECQ itr2 - JGE sealSSEInnerLoop - polyAdd(0(oup)) - polyMul - LEAQ (2*8)(oup), oup - DECQ itr1 - JG sealSSEInnerLoop + MOVO X14, 64(BP) + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X3 + PXOR X14, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X3 + PXOR X14, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X4 + PXOR X14, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X4 + PXOR X14, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X5 + PXOR X14, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X5 + PXOR X14, X5 + MOVO 64(BP), X14 + MOVO X7, 64(BP) + PADDD X13, X12 + PXOR X12, X15 + ROL16(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x0c, X7 + PSRLL $0x14, X13 + PXOR X7, X13 + PADDD X13, X12 + PXOR X12, X15 + ROL8(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x07, X7 + PSRLL $0x19, X13 + PXOR X7, X13 + MOVO 64(BP), X7 + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x0c + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + LEAQ 16(DI), DI + MOVO X14, 64(BP) + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X3 + PXOR X14, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X3 + PXOR X14, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X4 + PXOR X14, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X4 + PXOR X14, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X5 + PXOR X14, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X5 + PXOR X14, X5 + MOVO 64(BP), X14 + MOVO X7, 64(BP) + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + PADDD X13, X12 + PXOR X12, X15 + ROL16(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x0c, X7 + PSRLL $0x14, X13 + PXOR X7, X13 + PADDD X13, X12 + PXOR X12, X15 + ROL8(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x07, X7 + PSRLL $0x19, X13 + PXOR X7, X13 + MOVO 64(BP), X7 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x04 + DECQ R9 + JGE sealSSEInnerLoop + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI + DECQ CX + JG sealSSEInnerLoop // Add in the state - PADDD ·chacha20Constants<>(SB), A0; PADDD ·chacha20Constants<>(SB), A1; PADDD ·chacha20Constants<>(SB), A2; PADDD ·chacha20Constants<>(SB), A3 - PADDD state1Store, B0; PADDD state1Store, B1; PADDD state1Store, B2; PADDD state1Store, B3 - PADDD state2Store, C0; PADDD state2Store, C1; PADDD state2Store, C2; PADDD state2Store, C3 - PADDD ctr0Store, D0; PADDD ctr1Store, D1; PADDD ctr2Store, D2; PADDD ctr3Store, D3 - MOVO D3, tmpStore + PADDD ·chacha20Constants<>+0(SB), X0 + PADDD ·chacha20Constants<>+0(SB), X1 + PADDD ·chacha20Constants<>+0(SB), X2 + PADDD ·chacha20Constants<>+0(SB), X12 + PADDD 32(BP), X3 + PADDD 32(BP), X4 + PADDD 32(BP), X5 + PADDD 32(BP), X13 + PADDD 48(BP), X6 + PADDD 48(BP), X7 + PADDD 48(BP), X8 + PADDD 48(BP), X14 + PADDD 80(BP), X9 + PADDD 96(BP), X10 + PADDD 112(BP), X11 + PADDD 128(BP), X15 + MOVO X15, 64(BP) // Load - xor - store - MOVOU (0*16)(inp), D3; PXOR D3, A0 - MOVOU (1*16)(inp), D3; PXOR D3, B0 - MOVOU (2*16)(inp), D3; PXOR D3, C0 - MOVOU (3*16)(inp), D3; PXOR D3, D0 - MOVOU A0, (0*16)(oup) - MOVOU B0, (1*16)(oup) - MOVOU C0, (2*16)(oup) - MOVOU D0, (3*16)(oup) - MOVO tmpStore, D3 - - MOVOU (4*16)(inp), A0; MOVOU (5*16)(inp), B0; MOVOU (6*16)(inp), C0; MOVOU (7*16)(inp), D0 - PXOR A0, A1; PXOR B0, B1; PXOR C0, C1; PXOR D0, D1 - MOVOU A1, (4*16)(oup); MOVOU B1, (5*16)(oup); MOVOU C1, (6*16)(oup); MOVOU D1, (7*16)(oup) - MOVOU (8*16)(inp), A0; MOVOU (9*16)(inp), B0; MOVOU (10*16)(inp), C0; MOVOU (11*16)(inp), D0 - PXOR A0, A2; PXOR B0, B2; PXOR C0, C2; PXOR D0, D2 - MOVOU A2, (8*16)(oup); MOVOU B2, (9*16)(oup); MOVOU C2, (10*16)(oup); MOVOU D2, (11*16)(oup) - ADDQ $192, inp - MOVQ $192, itr1 - SUBQ $192, inl - MOVO A3, A1 - MOVO B3, B1 - MOVO C3, C1 - MOVO D3, D1 - CMPQ inl, $64 + MOVOU (SI), X15 + PXOR X15, X0 + MOVOU 16(SI), X15 + PXOR X15, X3 + MOVOU 32(SI), X15 + PXOR X15, X6 + MOVOU 48(SI), X15 + PXOR X15, X9 + MOVOU X0, (DI) + MOVOU X3, 16(DI) + MOVOU X6, 32(DI) + MOVOU X9, 48(DI) + MOVO 64(BP), X15 + MOVOU 64(SI), X0 + MOVOU 80(SI), X3 + MOVOU 96(SI), X6 + MOVOU 112(SI), X9 + PXOR X0, X1 + PXOR X3, X4 + PXOR X6, X7 + PXOR X9, X10 + MOVOU X1, 64(DI) + MOVOU X4, 80(DI) + MOVOU X7, 96(DI) + MOVOU X10, 112(DI) + MOVOU 128(SI), X0 + MOVOU 144(SI), X3 + MOVOU 160(SI), X6 + MOVOU 176(SI), X9 + PXOR X0, X2 + PXOR X3, X5 + PXOR X6, X8 + PXOR X9, X11 + MOVOU X2, 128(DI) + MOVOU X5, 144(DI) + MOVOU X8, 160(DI) + MOVOU X11, 176(DI) + ADDQ $0xc0, SI + MOVQ $0x000000c0, CX + SUBQ $0xc0, BX + MOVO X12, X1 + MOVO X13, X4 + MOVO X14, X7 + MOVO X15, X10 + CMPQ BX, $0x40 JBE sealSSE128SealHash - MOVOU (0*16)(inp), A0; MOVOU (1*16)(inp), B0; MOVOU (2*16)(inp), C0; MOVOU (3*16)(inp), D0 - PXOR A0, A3; PXOR B0, B3; PXOR C0, C3; PXOR D0, D3 - MOVOU A3, (12*16)(oup); MOVOU B3, (13*16)(oup); MOVOU C3, (14*16)(oup); MOVOU D3, (15*16)(oup) - LEAQ 64(inp), inp - SUBQ $64, inl - MOVQ $6, itr1 - MOVQ $4, itr2 - CMPQ inl, $192 + MOVOU (SI), X0 + MOVOU 16(SI), X3 + MOVOU 32(SI), X6 + MOVOU 48(SI), X9 + PXOR X0, X12 + PXOR X3, X13 + PXOR X6, X14 + PXOR X9, X15 + MOVOU X12, 192(DI) + MOVOU X13, 208(DI) + MOVOU X14, 224(DI) + MOVOU X15, 240(DI) + LEAQ 64(SI), SI + SUBQ $0x40, BX + MOVQ $0x00000006, CX + MOVQ $0x00000004, R9 + CMPQ BX, $0xc0 JG sealSSEMainLoop - - MOVQ inl, itr1 - TESTQ inl, inl + MOVQ BX, CX + TESTQ BX, BX JE sealSSE128SealHash - MOVQ $6, itr1 - CMPQ inl, $64 + MOVQ $0x00000006, CX + CMPQ BX, $0x40 JBE sealSSETail64 - CMPQ inl, $128 + CMPQ BX, $0x80 JBE sealSSETail128 JMP sealSSETail192 -// ---------------------------------------------------------------------------- -// Special optimization for the last 64 bytes of plaintext sealSSETail64: - // Need to encrypt up to 64 bytes - prepare single block, hash 192 or 256 bytes - MOVO ·chacha20Constants<>(SB), A1 - MOVO state1Store, B1 - MOVO state2Store, C1 - MOVO ctr3Store, D1 - PADDL ·sseIncMask<>(SB), D1 - MOVO D1, ctr0Store + MOVO ·chacha20Constants<>+0(SB), X1 + MOVO 32(BP), X4 + MOVO 48(BP), X7 + MOVO 128(BP), X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X10, 80(BP) sealSSETail64LoopA: - // Perform ChaCha rounds, while hashing the previously encrypted ciphertext - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI sealSSETail64LoopB: - chachaQR(A1, B1, C1, D1, T1) - shiftB1Left; shiftC1Left; shiftD1Left - chachaQR(A1, B1, C1, D1, T1) - shiftB1Right; shiftC1Right; shiftD1Right - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup - - DECQ itr1 - JG sealSSETail64LoopA - - DECQ itr2 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X13) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X13 + PSLLL $0x0c, X13 + PSRLL $0x14, X4 + PXOR X13, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X13) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X13 + PSLLL $0x07, X13 + PSRLL $0x19, X4 + PXOR X13, X4 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X13) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X13 + PSLLL $0x0c, X13 + PSRLL $0x14, X4 + PXOR X13, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X13) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X13 + PSLLL $0x07, X13 + PSRLL $0x19, X4 + PXOR X13, X4 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI + DECQ CX + JG sealSSETail64LoopA + DECQ R9 JGE sealSSETail64LoopB - PADDL ·chacha20Constants<>(SB), A1 - PADDL state1Store, B1 - PADDL state2Store, C1 - PADDL ctr0Store, D1 + PADDL ·chacha20Constants<>+0(SB), X1 + PADDL 32(BP), X4 + PADDL 48(BP), X7 + PADDL 80(BP), X10 + JMP sealSSE128Seal - JMP sealSSE128Seal - -// ---------------------------------------------------------------------------- -// Special optimization for the last 128 bytes of plaintext sealSSETail128: - // Need to encrypt up to 128 bytes - prepare two blocks, hash 192 or 256 bytes - MOVO ·chacha20Constants<>(SB), A0; MOVO state1Store, B0; MOVO state2Store, C0; MOVO ctr3Store, D0; PADDL ·sseIncMask<>(SB), D0; MOVO D0, ctr0Store - MOVO A0, A1; MOVO B0, B1; MOVO C0, C1; MOVO D0, D1; PADDL ·sseIncMask<>(SB), D1; MOVO D1, ctr1Store + MOVO ·chacha20Constants<>+0(SB), X0 + MOVO 32(BP), X3 + MOVO 48(BP), X6 + MOVO 128(BP), X9 + PADDL ·sseIncMask<>+0(SB), X9 + MOVO X9, 80(BP) + MOVO X0, X1 + MOVO X3, X4 + MOVO X6, X7 + MOVO X9, X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X10, 96(BP) sealSSETail128LoopA: - // Perform ChaCha rounds, while hashing the previously encrypted ciphertext - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI sealSSETail128LoopB: - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0) - shiftB0Left; shiftC0Left; shiftD0Left - shiftB1Left; shiftC1Left; shiftD1Left - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0) - shiftB0Right; shiftC0Right; shiftD0Right - shiftB1Right; shiftC1Right; shiftD1Right - - DECQ itr1 - JG sealSSETail128LoopA - - DECQ itr2 - JGE sealSSETail128LoopB - - PADDL ·chacha20Constants<>(SB), A0; PADDL ·chacha20Constants<>(SB), A1 - PADDL state1Store, B0; PADDL state1Store, B1 - PADDL state2Store, C0; PADDL state2Store, C1 - PADDL ctr0Store, D0; PADDL ctr1Store, D1 - - MOVOU (0*16)(inp), T0; MOVOU (1*16)(inp), T1; MOVOU (2*16)(inp), T2; MOVOU (3*16)(inp), T3 - PXOR T0, A0; PXOR T1, B0; PXOR T2, C0; PXOR T3, D0 - MOVOU A0, (0*16)(oup); MOVOU B0, (1*16)(oup); MOVOU C0, (2*16)(oup); MOVOU D0, (3*16)(oup) - - MOVQ $64, itr1 - LEAQ 64(inp), inp - SUBQ $64, inl - - JMP sealSSE128SealHash - -// ---------------------------------------------------------------------------- -// Special optimization for the last 192 bytes of plaintext + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + DECQ CX + JG sealSSETail128LoopA + DECQ R9 + JGE sealSSETail128LoopB + PADDL ·chacha20Constants<>+0(SB), X0 + PADDL ·chacha20Constants<>+0(SB), X1 + PADDL 32(BP), X3 + PADDL 32(BP), X4 + PADDL 48(BP), X6 + PADDL 48(BP), X7 + PADDL 80(BP), X9 + PADDL 96(BP), X10 + MOVOU (SI), X12 + MOVOU 16(SI), X13 + MOVOU 32(SI), X14 + MOVOU 48(SI), X15 + PXOR X12, X0 + PXOR X13, X3 + PXOR X14, X6 + PXOR X15, X9 + MOVOU X0, (DI) + MOVOU X3, 16(DI) + MOVOU X6, 32(DI) + MOVOU X9, 48(DI) + MOVQ $0x00000040, CX + LEAQ 64(SI), SI + SUBQ $0x40, BX + JMP sealSSE128SealHash + sealSSETail192: - // Need to encrypt up to 192 bytes - prepare three blocks, hash 192 or 256 bytes - MOVO ·chacha20Constants<>(SB), A0; MOVO state1Store, B0; MOVO state2Store, C0; MOVO ctr3Store, D0; PADDL ·sseIncMask<>(SB), D0; MOVO D0, ctr0Store - MOVO A0, A1; MOVO B0, B1; MOVO C0, C1; MOVO D0, D1; PADDL ·sseIncMask<>(SB), D1; MOVO D1, ctr1Store - MOVO A1, A2; MOVO B1, B2; MOVO C1, C2; MOVO D1, D2; PADDL ·sseIncMask<>(SB), D2; MOVO D2, ctr2Store + MOVO ·chacha20Constants<>+0(SB), X0 + MOVO 32(BP), X3 + MOVO 48(BP), X6 + MOVO 128(BP), X9 + PADDL ·sseIncMask<>+0(SB), X9 + MOVO X9, 80(BP) + MOVO X0, X1 + MOVO X3, X4 + MOVO X6, X7 + MOVO X9, X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X10, 96(BP) + MOVO X1, X2 + MOVO X4, X5 + MOVO X7, X8 + MOVO X10, X11 + PADDL ·sseIncMask<>+0(SB), X11 + MOVO X11, 112(BP) sealSSETail192LoopA: - // Perform ChaCha rounds, while hashing the previously encrypted ciphertext - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI sealSSETail192LoopB: - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0); chachaQR(A2, B2, C2, D2, T0) - shiftB0Left; shiftC0Left; shiftD0Left - shiftB1Left; shiftC1Left; shiftD1Left - shiftB2Left; shiftC2Left; shiftD2Left - - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup - - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0); chachaQR(A2, B2, C2, D2, T0) - shiftB0Right; shiftC0Right; shiftD0Right - shiftB1Right; shiftC1Right; shiftD1Right - shiftB2Right; shiftC2Right; shiftD2Right - - DECQ itr1 - JG sealSSETail192LoopA - - DECQ itr2 - JGE sealSSETail192LoopB - - PADDL ·chacha20Constants<>(SB), A0; PADDL ·chacha20Constants<>(SB), A1; PADDL ·chacha20Constants<>(SB), A2 - PADDL state1Store, B0; PADDL state1Store, B1; PADDL state1Store, B2 - PADDL state2Store, C0; PADDL state2Store, C1; PADDL state2Store, C2 - PADDL ctr0Store, D0; PADDL ctr1Store, D1; PADDL ctr2Store, D2 - - MOVOU (0*16)(inp), T0; MOVOU (1*16)(inp), T1; MOVOU (2*16)(inp), T2; MOVOU (3*16)(inp), T3 - PXOR T0, A0; PXOR T1, B0; PXOR T2, C0; PXOR T3, D0 - MOVOU A0, (0*16)(oup); MOVOU B0, (1*16)(oup); MOVOU C0, (2*16)(oup); MOVOU D0, (3*16)(oup) - MOVOU (4*16)(inp), T0; MOVOU (5*16)(inp), T1; MOVOU (6*16)(inp), T2; MOVOU (7*16)(inp), T3 - PXOR T0, A1; PXOR T1, B1; PXOR T2, C1; PXOR T3, D1 - MOVOU A1, (4*16)(oup); MOVOU B1, (5*16)(oup); MOVOU C1, (6*16)(oup); MOVOU D1, (7*16)(oup) - - MOVO A2, A1 - MOVO B2, B1 - MOVO C2, C1 - MOVO D2, D1 - MOVQ $128, itr1 - LEAQ 128(inp), inp - SUBQ $128, inl - - JMP sealSSE128SealHash - -// ---------------------------------------------------------------------------- -// Special seal optimization for buffers smaller than 129 bytes + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X5 + PXOR X12, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X5 + PXOR X12, X5 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X5 + PXOR X12, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X5 + PXOR X12, X5 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + DECQ CX + JG sealSSETail192LoopA + DECQ R9 + JGE sealSSETail192LoopB + PADDL ·chacha20Constants<>+0(SB), X0 + PADDL ·chacha20Constants<>+0(SB), X1 + PADDL ·chacha20Constants<>+0(SB), X2 + PADDL 32(BP), X3 + PADDL 32(BP), X4 + PADDL 32(BP), X5 + PADDL 48(BP), X6 + PADDL 48(BP), X7 + PADDL 48(BP), X8 + PADDL 80(BP), X9 + PADDL 96(BP), X10 + PADDL 112(BP), X11 + MOVOU (SI), X12 + MOVOU 16(SI), X13 + MOVOU 32(SI), X14 + MOVOU 48(SI), X15 + PXOR X12, X0 + PXOR X13, X3 + PXOR X14, X6 + PXOR X15, X9 + MOVOU X0, (DI) + MOVOU X3, 16(DI) + MOVOU X6, 32(DI) + MOVOU X9, 48(DI) + MOVOU 64(SI), X12 + MOVOU 80(SI), X13 + MOVOU 96(SI), X14 + MOVOU 112(SI), X15 + PXOR X12, X1 + PXOR X13, X4 + PXOR X14, X7 + PXOR X15, X10 + MOVOU X1, 64(DI) + MOVOU X4, 80(DI) + MOVOU X7, 96(DI) + MOVOU X10, 112(DI) + MOVO X2, X1 + MOVO X5, X4 + MOVO X8, X7 + MOVO X11, X10 + MOVQ $0x00000080, CX + LEAQ 128(SI), SI + SUBQ $0x80, BX + JMP sealSSE128SealHash + sealSSE128: - // For up to 128 bytes of ciphertext and 64 bytes for the poly key, we require to process three blocks - MOVOU ·chacha20Constants<>(SB), A0; MOVOU (1*16)(keyp), B0; MOVOU (2*16)(keyp), C0; MOVOU (3*16)(keyp), D0 - MOVO A0, A1; MOVO B0, B1; MOVO C0, C1; MOVO D0, D1; PADDL ·sseIncMask<>(SB), D1 - MOVO A1, A2; MOVO B1, B2; MOVO C1, C2; MOVO D1, D2; PADDL ·sseIncMask<>(SB), D2 - MOVO B0, T1; MOVO C0, T2; MOVO D1, T3 - MOVQ $10, itr2 + MOVOU ·chacha20Constants<>+0(SB), X0 + MOVOU 16(R8), X3 + MOVOU 32(R8), X6 + MOVOU 48(R8), X9 + MOVO X0, X1 + MOVO X3, X4 + MOVO X6, X7 + MOVO X9, X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X1, X2 + MOVO X4, X5 + MOVO X7, X8 + MOVO X10, X11 + PADDL ·sseIncMask<>+0(SB), X11 + MOVO X3, X13 + MOVO X6, X14 + MOVO X10, X15 + MOVQ $0x0000000a, R9 sealSSE128InnerCipherLoop: - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0); chachaQR(A2, B2, C2, D2, T0) - shiftB0Left; shiftB1Left; shiftB2Left - shiftC0Left; shiftC1Left; shiftC2Left - shiftD0Left; shiftD1Left; shiftD2Left - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0); chachaQR(A2, B2, C2, D2, T0) - shiftB0Right; shiftB1Right; shiftB2Right - shiftC0Right; shiftC1Right; shiftC2Right - shiftD0Right; shiftD1Right; shiftD2Right - DECQ itr2 - JNE sealSSE128InnerCipherLoop + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X5 + PXOR X12, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X5 + PXOR X12, X5 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X5 + PXOR X12, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X5 + PXOR X12, X5 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + DECQ R9 + JNE sealSSE128InnerCipherLoop // A0|B0 hold the Poly1305 32-byte key, C0,D0 can be discarded - PADDL ·chacha20Constants<>(SB), A0; PADDL ·chacha20Constants<>(SB), A1; PADDL ·chacha20Constants<>(SB), A2 - PADDL T1, B0; PADDL T1, B1; PADDL T1, B2 - PADDL T2, C1; PADDL T2, C2 - PADDL T3, D1; PADDL ·sseIncMask<>(SB), T3; PADDL T3, D2 - PAND ·polyClampMask<>(SB), A0 - MOVOU A0, rStore - MOVOU B0, sStore + PADDL ·chacha20Constants<>+0(SB), X0 + PADDL ·chacha20Constants<>+0(SB), X1 + PADDL ·chacha20Constants<>+0(SB), X2 + PADDL X13, X3 + PADDL X13, X4 + PADDL X13, X5 + PADDL X14, X7 + PADDL X14, X8 + PADDL X15, X10 + PADDL ·sseIncMask<>+0(SB), X15 + PADDL X15, X11 + PAND ·polyClampMask<>+0(SB), X0 + MOVOU X0, (BP) + MOVOU X3, 16(BP) // Hash - MOVQ ad_len+80(FP), itr2 + MOVQ ad_len+80(FP), R9 CALL polyHashADInternal<>(SB) - XORQ itr1, itr1 + XORQ CX, CX sealSSE128SealHash: - // itr1 holds the number of bytes encrypted but not yet hashed - CMPQ itr1, $16 - JB sealSSE128Seal - polyAdd(0(oup)) - polyMul - - SUBQ $16, itr1 - ADDQ $16, oup - - JMP sealSSE128SealHash + CMPQ CX, $0x10 + JB sealSSE128Seal + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + SUBQ $0x10, CX + ADDQ $0x10, DI + JMP sealSSE128SealHash sealSSE128Seal: - CMPQ inl, $16 + CMPQ BX, $0x10 JB sealSSETail - SUBQ $16, inl + SUBQ $0x10, BX // Load for decryption - MOVOU (inp), T0 - PXOR T0, A1 - MOVOU A1, (oup) - LEAQ (1*16)(inp), inp - LEAQ (1*16)(oup), oup + MOVOU (SI), X12 + PXOR X12, X1 + MOVOU X1, (DI) + LEAQ 16(SI), SI + LEAQ 16(DI), DI // Extract for hashing - MOVQ A1, t0 - PSRLDQ $8, A1 - MOVQ A1, t1 - ADDQ t0, acc0; ADCQ t1, acc1; ADCQ $1, acc2 - polyMul + MOVQ X1, R13 + PSRLDQ $0x08, X1 + MOVQ X1, R14 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 // Shift the stream "left" - MOVO B1, A1 - MOVO C1, B1 - MOVO D1, C1 - MOVO A2, D1 - MOVO B2, A2 - MOVO C2, B2 - MOVO D2, C2 + MOVO X4, X1 + MOVO X7, X4 + MOVO X10, X7 + MOVO X2, X10 + MOVO X5, X2 + MOVO X8, X5 + MOVO X11, X8 JMP sealSSE128Seal sealSSETail: - TESTQ inl, inl + TESTQ BX, BX JE sealSSEFinalize // We can only load the PT one byte at a time to avoid read after end of buffer - MOVQ inl, itr2 - SHLQ $4, itr2 - LEAQ ·andMask<>(SB), t0 - MOVQ inl, itr1 - LEAQ -1(inp)(inl*1), inp - XORQ t2, t2 - XORQ t3, t3 + MOVQ BX, R9 + SHLQ $0x04, R9 + LEAQ ·andMask<>+0(SB), R13 + MOVQ BX, CX + LEAQ -1(SI)(BX*1), SI + XORQ R15, R15 + XORQ R8, R8 XORQ AX, AX sealSSETailLoadLoop: - SHLQ $8, t2, t3 - SHLQ $8, t2 - MOVB (inp), AX - XORQ AX, t2 - LEAQ -1(inp), inp - DECQ itr1 + SHLQ $0x08, R15, R8 + SHLQ $0x08, R15 + MOVB (SI), AX + XORQ AX, R15 + LEAQ -1(SI), SI + DECQ CX JNE sealSSETailLoadLoop - MOVQ t2, 0+tmpStore - MOVQ t3, 8+tmpStore - PXOR 0+tmpStore, A1 - MOVOU A1, (oup) - MOVOU -16(t0)(itr2*1), T0 - PAND T0, A1 - MOVQ A1, t0 - PSRLDQ $8, A1 - MOVQ A1, t1 - ADDQ t0, acc0; ADCQ t1, acc1; ADCQ $1, acc2 - polyMul - - ADDQ inl, oup + MOVQ R15, 64(BP) + MOVQ R8, 72(BP) + PXOR 64(BP), X1 + MOVOU X1, (DI) + MOVOU -16(R13)(R9*1), X12 + PAND X12, X1 + MOVQ X1, R13 + PSRLDQ $0x08, X1 + MOVQ X1, R14 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + ADDQ BX, DI sealSSEFinalize: // Hash in the buffer lengths - ADDQ ad_len+80(FP), acc0 - ADCQ src_len+56(FP), acc1 - ADCQ $1, acc2 - polyMul + ADDQ ad_len+80(FP), R10 + ADCQ src_len+56(FP), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 // Final reduce - MOVQ acc0, t0 - MOVQ acc1, t1 - MOVQ acc2, t2 - SUBQ $-5, acc0 - SBBQ $-1, acc1 - SBBQ $3, acc2 - CMOVQCS t0, acc0 - CMOVQCS t1, acc1 - CMOVQCS t2, acc2 + MOVQ R10, R13 + MOVQ R11, R14 + MOVQ R12, R15 + SUBQ $-5, R10 + SBBQ $-1, R11 + SBBQ $0x03, R12 + CMOVQCS R13, R10 + CMOVQCS R14, R11 + CMOVQCS R15, R12 // Add in the "s" part of the key - ADDQ 0+sStore, acc0 - ADCQ 8+sStore, acc1 + ADDQ 16(BP), R10 + ADCQ 24(BP), R11 // Finally store the tag at the end of the message - MOVQ acc0, (0*8)(oup) - MOVQ acc1, (1*8)(oup) + MOVQ R10, (DI) + MOVQ R11, 8(DI) RET -// ---------------------------------------------------------------------------- -// ------------------------- AVX2 Code ---------------------------------------- chacha20Poly1305Seal_AVX2: VZEROUPPER - VMOVDQU ·chacha20Constants<>(SB), AA0 - BYTE $0xc4; BYTE $0x42; BYTE $0x7d; BYTE $0x5a; BYTE $0x70; BYTE $0x10 // broadcasti128 16(r8), ymm14 - BYTE $0xc4; BYTE $0x42; BYTE $0x7d; BYTE $0x5a; BYTE $0x60; BYTE $0x20 // broadcasti128 32(r8), ymm12 - BYTE $0xc4; BYTE $0xc2; BYTE $0x7d; BYTE $0x5a; BYTE $0x60; BYTE $0x30 // broadcasti128 48(r8), ymm4 - VPADDD ·avx2InitMask<>(SB), DD0, DD0 + VMOVDQU ·chacha20Constants<>+0(SB), Y0 + BYTE $0xc4 + BYTE $0x42 + BYTE $0x7d + BYTE $0x5a + BYTE $0x70 + BYTE $0x10 + BYTE $0xc4 + BYTE $0x42 + BYTE $0x7d + BYTE $0x5a + BYTE $0x60 + BYTE $0x20 + BYTE $0xc4 + BYTE $0xc2 + BYTE $0x7d + BYTE $0x5a + BYTE $0x60 + BYTE $0x30 + VPADDD ·avx2InitMask<>+0(SB), Y4, Y4 // Special optimizations, for very short buffers - CMPQ inl, $192 - JBE seal192AVX2 // 33% faster - CMPQ inl, $320 - JBE seal320AVX2 // 17% faster + CMPQ BX, $0x000000c0 + JBE seal192AVX2 + CMPQ BX, $0x00000140 + JBE seal320AVX2 // For the general key prepare the key first - as a byproduct we have 64 bytes of cipher stream - VMOVDQA AA0, AA1; VMOVDQA AA0, AA2; VMOVDQA AA0, AA3 - VMOVDQA BB0, BB1; VMOVDQA BB0, BB2; VMOVDQA BB0, BB3; VMOVDQA BB0, state1StoreAVX2 - VMOVDQA CC0, CC1; VMOVDQA CC0, CC2; VMOVDQA CC0, CC3; VMOVDQA CC0, state2StoreAVX2 - VPADDD ·avx2IncMask<>(SB), DD0, DD1; VMOVDQA DD0, ctr0StoreAVX2 - VPADDD ·avx2IncMask<>(SB), DD1, DD2; VMOVDQA DD1, ctr1StoreAVX2 - VPADDD ·avx2IncMask<>(SB), DD2, DD3; VMOVDQA DD2, ctr2StoreAVX2 - VMOVDQA DD3, ctr3StoreAVX2 - MOVQ $10, itr2 + VMOVDQA Y0, Y5 + VMOVDQA Y0, Y6 + VMOVDQA Y0, Y7 + VMOVDQA Y14, Y9 + VMOVDQA Y14, Y10 + VMOVDQA Y14, Y11 + VMOVDQA Y14, 32(BP) + VMOVDQA Y12, Y13 + VMOVDQA Y12, Y8 + VMOVDQA Y12, Y15 + VMOVDQA Y12, 64(BP) + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VMOVDQA Y4, 96(BP) + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VMOVDQA Y1, 128(BP) + VPADDD ·avx2IncMask<>+0(SB), Y2, Y3 + VMOVDQA Y2, 160(BP) + VMOVDQA Y3, 192(BP) + MOVQ $0x0000000a, R9 sealAVX2IntroLoop: - VMOVDQA CC3, tmpStoreAVX2 - chachaQR_AVX2(AA0, BB0, CC0, DD0, CC3); chachaQR_AVX2(AA1, BB1, CC1, DD1, CC3); chachaQR_AVX2(AA2, BB2, CC2, DD2, CC3) - VMOVDQA tmpStoreAVX2, CC3 - VMOVDQA CC1, tmpStoreAVX2 - chachaQR_AVX2(AA3, BB3, CC3, DD3, CC1) - VMOVDQA tmpStoreAVX2, CC1 - - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $12, DD0, DD0, DD0 - VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $12, DD1, DD1, DD1 - VPALIGNR $4, BB2, BB2, BB2; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $12, DD2, DD2, DD2 - VPALIGNR $4, BB3, BB3, BB3; VPALIGNR $8, CC3, CC3, CC3; VPALIGNR $12, DD3, DD3, DD3 - - VMOVDQA CC3, tmpStoreAVX2 - chachaQR_AVX2(AA0, BB0, CC0, DD0, CC3); chachaQR_AVX2(AA1, BB1, CC1, DD1, CC3); chachaQR_AVX2(AA2, BB2, CC2, DD2, CC3) - VMOVDQA tmpStoreAVX2, CC3 - VMOVDQA CC1, tmpStoreAVX2 - chachaQR_AVX2(AA3, BB3, CC3, DD3, CC1) - VMOVDQA tmpStoreAVX2, CC1 - - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $4, DD0, DD0, DD0 - VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $4, DD1, DD1, DD1 - VPALIGNR $12, BB2, BB2, BB2; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $4, DD2, DD2, DD2 - VPALIGNR $12, BB3, BB3, BB3; VPALIGNR $8, CC3, CC3, CC3; VPALIGNR $4, DD3, DD3, DD3 - DECQ itr2 - JNE sealAVX2IntroLoop - - VPADDD ·chacha20Constants<>(SB), AA0, AA0; VPADDD ·chacha20Constants<>(SB), AA1, AA1; VPADDD ·chacha20Constants<>(SB), AA2, AA2; VPADDD ·chacha20Constants<>(SB), AA3, AA3 - VPADDD state1StoreAVX2, BB0, BB0; VPADDD state1StoreAVX2, BB1, BB1; VPADDD state1StoreAVX2, BB2, BB2; VPADDD state1StoreAVX2, BB3, BB3 - VPADDD state2StoreAVX2, CC0, CC0; VPADDD state2StoreAVX2, CC1, CC1; VPADDD state2StoreAVX2, CC2, CC2; VPADDD state2StoreAVX2, CC3, CC3 - VPADDD ctr0StoreAVX2, DD0, DD0; VPADDD ctr1StoreAVX2, DD1, DD1; VPADDD ctr2StoreAVX2, DD2, DD2; VPADDD ctr3StoreAVX2, DD3, DD3 - - VPERM2I128 $0x13, CC0, DD0, CC0 // Stream bytes 96 - 127 - VPERM2I128 $0x02, AA0, BB0, DD0 // The Poly1305 key - VPERM2I128 $0x13, AA0, BB0, AA0 // Stream bytes 64 - 95 + VMOVDQA Y15, 224(BP) + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VMOVDQA 224(BP), Y15 + VMOVDQA Y13, 224(BP) + VPADDD Y11, Y7, Y7 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y3, Y15, Y15 + VPXOR Y15, Y11, Y11 + VPSLLD $0x0c, Y11, Y13 + VPSRLD $0x14, Y11, Y11 + VPXOR Y13, Y11, Y11 + VPADDD Y11, Y7, Y7 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y3, Y15, Y15 + VPXOR Y15, Y11, Y11 + VPSLLD $0x07, Y11, Y13 + VPSRLD $0x19, Y11, Y11 + VPXOR Y13, Y11, Y11 + VMOVDQA 224(BP), Y13 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPALIGNR $0x04, Y11, Y11, Y11 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x0c, Y3, Y3, Y3 + VMOVDQA Y15, 224(BP) + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VMOVDQA 224(BP), Y15 + VMOVDQA Y13, 224(BP) + VPADDD Y11, Y7, Y7 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y3, Y15, Y15 + VPXOR Y15, Y11, Y11 + VPSLLD $0x0c, Y11, Y13 + VPSRLD $0x14, Y11, Y11 + VPXOR Y13, Y11, Y11 + VPADDD Y11, Y7, Y7 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y3, Y15, Y15 + VPXOR Y15, Y11, Y11 + VPSLLD $0x07, Y11, Y13 + VPSRLD $0x19, Y11, Y11 + VPXOR Y13, Y11, Y11 + VMOVDQA 224(BP), Y13 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x04, Y2, Y2, Y2 + VPALIGNR $0x0c, Y11, Y11, Y11 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x04, Y3, Y3, Y3 + DECQ R9 + JNE sealAVX2IntroLoop + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 + VPADDD ·chacha20Constants<>+0(SB), Y7, Y7 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 32(BP), Y10, Y10 + VPADDD 32(BP), Y11, Y11 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD 64(BP), Y8, Y8 + VPADDD 64(BP), Y15, Y15 + VPADDD 96(BP), Y4, Y4 + VPADDD 128(BP), Y1, Y1 + VPADDD 160(BP), Y2, Y2 + VPADDD 192(BP), Y3, Y3 + VPERM2I128 $0x13, Y12, Y4, Y12 + VPERM2I128 $0x02, Y0, Y14, Y4 + VPERM2I128 $0x13, Y0, Y14, Y0 // Clamp and store poly key - VPAND ·polyClampMask<>(SB), DD0, DD0 - VMOVDQA DD0, rsStoreAVX2 + VPAND ·polyClampMask<>+0(SB), Y4, Y4 + VMOVDQA Y4, (BP) // Hash AD - MOVQ ad_len+80(FP), itr2 + MOVQ ad_len+80(FP), R9 CALL polyHashADInternal<>(SB) // Can store at least 320 bytes - VPXOR (0*32)(inp), AA0, AA0 - VPXOR (1*32)(inp), CC0, CC0 - VMOVDQU AA0, (0*32)(oup) - VMOVDQU CC0, (1*32)(oup) - - VPERM2I128 $0x02, AA1, BB1, AA0; VPERM2I128 $0x02, CC1, DD1, BB0; VPERM2I128 $0x13, AA1, BB1, CC0; VPERM2I128 $0x13, CC1, DD1, DD0 - VPXOR (2*32)(inp), AA0, AA0; VPXOR (3*32)(inp), BB0, BB0; VPXOR (4*32)(inp), CC0, CC0; VPXOR (5*32)(inp), DD0, DD0 - VMOVDQU AA0, (2*32)(oup); VMOVDQU BB0, (3*32)(oup); VMOVDQU CC0, (4*32)(oup); VMOVDQU DD0, (5*32)(oup) - VPERM2I128 $0x02, AA2, BB2, AA0; VPERM2I128 $0x02, CC2, DD2, BB0; VPERM2I128 $0x13, AA2, BB2, CC0; VPERM2I128 $0x13, CC2, DD2, DD0 - VPXOR (6*32)(inp), AA0, AA0; VPXOR (7*32)(inp), BB0, BB0; VPXOR (8*32)(inp), CC0, CC0; VPXOR (9*32)(inp), DD0, DD0 - VMOVDQU AA0, (6*32)(oup); VMOVDQU BB0, (7*32)(oup); VMOVDQU CC0, (8*32)(oup); VMOVDQU DD0, (9*32)(oup) - - MOVQ $320, itr1 - SUBQ $320, inl - LEAQ 320(inp), inp - - VPERM2I128 $0x02, AA3, BB3, AA0; VPERM2I128 $0x02, CC3, DD3, BB0; VPERM2I128 $0x13, AA3, BB3, CC0; VPERM2I128 $0x13, CC3, DD3, DD0 - CMPQ inl, $128 + VPXOR (SI), Y0, Y0 + VPXOR 32(SI), Y12, Y12 + VMOVDQU Y0, (DI) + VMOVDQU Y12, 32(DI) + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 + VPXOR 64(SI), Y0, Y0 + VPXOR 96(SI), Y14, Y14 + VPXOR 128(SI), Y12, Y12 + VPXOR 160(SI), Y4, Y4 + VMOVDQU Y0, 64(DI) + VMOVDQU Y14, 96(DI) + VMOVDQU Y12, 128(DI) + VMOVDQU Y4, 160(DI) + VPERM2I128 $0x02, Y6, Y10, Y0 + VPERM2I128 $0x02, Y8, Y2, Y14 + VPERM2I128 $0x13, Y6, Y10, Y12 + VPERM2I128 $0x13, Y8, Y2, Y4 + VPXOR 192(SI), Y0, Y0 + VPXOR 224(SI), Y14, Y14 + VPXOR 256(SI), Y12, Y12 + VPXOR 288(SI), Y4, Y4 + VMOVDQU Y0, 192(DI) + VMOVDQU Y14, 224(DI) + VMOVDQU Y12, 256(DI) + VMOVDQU Y4, 288(DI) + MOVQ $0x00000140, CX + SUBQ $0x00000140, BX + LEAQ 320(SI), SI + VPERM2I128 $0x02, Y7, Y11, Y0 + VPERM2I128 $0x02, Y15, Y3, Y14 + VPERM2I128 $0x13, Y7, Y11, Y12 + VPERM2I128 $0x13, Y15, Y3, Y4 + CMPQ BX, $0x80 JBE sealAVX2SealHash - - VPXOR (0*32)(inp), AA0, AA0; VPXOR (1*32)(inp), BB0, BB0; VPXOR (2*32)(inp), CC0, CC0; VPXOR (3*32)(inp), DD0, DD0 - VMOVDQU AA0, (10*32)(oup); VMOVDQU BB0, (11*32)(oup); VMOVDQU CC0, (12*32)(oup); VMOVDQU DD0, (13*32)(oup) - SUBQ $128, inl - LEAQ 128(inp), inp - - MOVQ $8, itr1 - MOVQ $2, itr2 - - CMPQ inl, $128 - JBE sealAVX2Tail128 - CMPQ inl, $256 - JBE sealAVX2Tail256 - CMPQ inl, $384 - JBE sealAVX2Tail384 - CMPQ inl, $512 - JBE sealAVX2Tail512 + VPXOR (SI), Y0, Y0 + VPXOR 32(SI), Y14, Y14 + VPXOR 64(SI), Y12, Y12 + VPXOR 96(SI), Y4, Y4 + VMOVDQU Y0, 320(DI) + VMOVDQU Y14, 352(DI) + VMOVDQU Y12, 384(DI) + VMOVDQU Y4, 416(DI) + SUBQ $0x80, BX + LEAQ 128(SI), SI + MOVQ $0x00000008, CX + MOVQ $0x00000002, R9 + CMPQ BX, $0x80 + JBE sealAVX2Tail128 + CMPQ BX, $0x00000100 + JBE sealAVX2Tail256 + CMPQ BX, $0x00000180 + JBE sealAVX2Tail384 + CMPQ BX, $0x00000200 + JBE sealAVX2Tail512 // We have 448 bytes to hash, but main loop hashes 512 bytes at a time - perform some rounds, before the main loop - VMOVDQA ·chacha20Constants<>(SB), AA0; VMOVDQA AA0, AA1; VMOVDQA AA0, AA2; VMOVDQA AA0, AA3 - VMOVDQA state1StoreAVX2, BB0; VMOVDQA BB0, BB1; VMOVDQA BB0, BB2; VMOVDQA BB0, BB3 - VMOVDQA state2StoreAVX2, CC0; VMOVDQA CC0, CC1; VMOVDQA CC0, CC2; VMOVDQA CC0, CC3 - VMOVDQA ctr3StoreAVX2, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD0; VPADDD ·avx2IncMask<>(SB), DD0, DD1; VPADDD ·avx2IncMask<>(SB), DD1, DD2; VPADDD ·avx2IncMask<>(SB), DD2, DD3 - VMOVDQA DD0, ctr0StoreAVX2; VMOVDQA DD1, ctr1StoreAVX2; VMOVDQA DD2, ctr2StoreAVX2; VMOVDQA DD3, ctr3StoreAVX2 - - VMOVDQA CC3, tmpStoreAVX2 - chachaQR_AVX2(AA0, BB0, CC0, DD0, CC3); chachaQR_AVX2(AA1, BB1, CC1, DD1, CC3); chachaQR_AVX2(AA2, BB2, CC2, DD2, CC3) - VMOVDQA tmpStoreAVX2, CC3 - VMOVDQA CC1, tmpStoreAVX2 - chachaQR_AVX2(AA3, BB3, CC3, DD3, CC1) - VMOVDQA tmpStoreAVX2, CC1 - - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $12, DD0, DD0, DD0 - VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $12, DD1, DD1, DD1 - VPALIGNR $4, BB2, BB2, BB2; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $12, DD2, DD2, DD2 - VPALIGNR $4, BB3, BB3, BB3; VPALIGNR $8, CC3, CC3, CC3; VPALIGNR $12, DD3, DD3, DD3 - - VMOVDQA CC3, tmpStoreAVX2 - chachaQR_AVX2(AA0, BB0, CC0, DD0, CC3); chachaQR_AVX2(AA1, BB1, CC1, DD1, CC3); chachaQR_AVX2(AA2, BB2, CC2, DD2, CC3) - VMOVDQA tmpStoreAVX2, CC3 - VMOVDQA CC1, tmpStoreAVX2 - chachaQR_AVX2(AA3, BB3, CC3, DD3, CC1) - VMOVDQA tmpStoreAVX2, CC1 - - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $4, DD0, DD0, DD0 - VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $4, DD1, DD1, DD1 - VPALIGNR $12, BB2, BB2, BB2; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $4, DD2, DD2, DD2 - VPALIGNR $12, BB3, BB3, BB3; VPALIGNR $8, CC3, CC3, CC3; VPALIGNR $4, DD3, DD3, DD3 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol16<>(SB), DD0, DD0; VPSHUFB ·rol16<>(SB), DD1, DD1; VPSHUFB ·rol16<>(SB), DD2, DD2; VPSHUFB ·rol16<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $12, BB0, CC3; VPSRLD $20, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $12, BB1, CC3; VPSRLD $20, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $12, BB2, CC3; VPSRLD $20, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $12, BB3, CC3; VPSRLD $20, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - - SUBQ $16, oup // Adjust the pointer - MOVQ $9, itr1 - JMP sealAVX2InternalLoopStart + VMOVDQA ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA Y0, Y6 + VMOVDQA Y0, Y7 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA Y14, Y10 + VMOVDQA Y14, Y11 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA Y12, Y8 + VMOVDQA Y12, Y15 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VPADDD ·avx2IncMask<>+0(SB), Y2, Y3 + VMOVDQA Y4, 96(BP) + VMOVDQA Y1, 128(BP) + VMOVDQA Y2, 160(BP) + VMOVDQA Y3, 192(BP) + VMOVDQA Y15, 224(BP) + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VMOVDQA 224(BP), Y15 + VMOVDQA Y13, 224(BP) + VPADDD Y11, Y7, Y7 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y3, Y15, Y15 + VPXOR Y15, Y11, Y11 + VPSLLD $0x0c, Y11, Y13 + VPSRLD $0x14, Y11, Y11 + VPXOR Y13, Y11, Y11 + VPADDD Y11, Y7, Y7 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y3, Y15, Y15 + VPXOR Y15, Y11, Y11 + VPSLLD $0x07, Y11, Y13 + VPSRLD $0x19, Y11, Y11 + VPXOR Y13, Y11, Y11 + VMOVDQA 224(BP), Y13 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPALIGNR $0x04, Y11, Y11, Y11 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x0c, Y3, Y3, Y3 + VMOVDQA Y15, 224(BP) + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VMOVDQA 224(BP), Y15 + VMOVDQA Y13, 224(BP) + VPADDD Y11, Y7, Y7 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y3, Y15, Y15 + VPXOR Y15, Y11, Y11 + VPSLLD $0x0c, Y11, Y13 + VPSRLD $0x14, Y11, Y11 + VPXOR Y13, Y11, Y11 + VPADDD Y11, Y7, Y7 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y3, Y15, Y15 + VPXOR Y15, Y11, Y11 + VPSLLD $0x07, Y11, Y13 + VPSRLD $0x19, Y11, Y11 + VPXOR Y13, Y11, Y11 + VMOVDQA 224(BP), Y13 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x04, Y2, Y2, Y2 + VPALIGNR $0x0c, Y11, Y11, Y11 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x04, Y3, Y3, Y3 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + SUBQ $0x10, DI + MOVQ $0x00000009, CX + JMP sealAVX2InternalLoopStart sealAVX2MainLoop: - // Load state, increment counter blocks, store the incremented counters - VMOVDQU ·chacha20Constants<>(SB), AA0; VMOVDQA AA0, AA1; VMOVDQA AA0, AA2; VMOVDQA AA0, AA3 - VMOVDQA state1StoreAVX2, BB0; VMOVDQA BB0, BB1; VMOVDQA BB0, BB2; VMOVDQA BB0, BB3 - VMOVDQA state2StoreAVX2, CC0; VMOVDQA CC0, CC1; VMOVDQA CC0, CC2; VMOVDQA CC0, CC3 - VMOVDQA ctr3StoreAVX2, DD0; VPADDD ·avx2IncMask<>(SB), DD0, DD0; VPADDD ·avx2IncMask<>(SB), DD0, DD1; VPADDD ·avx2IncMask<>(SB), DD1, DD2; VPADDD ·avx2IncMask<>(SB), DD2, DD3 - VMOVDQA DD0, ctr0StoreAVX2; VMOVDQA DD1, ctr1StoreAVX2; VMOVDQA DD2, ctr2StoreAVX2; VMOVDQA DD3, ctr3StoreAVX2 - MOVQ $10, itr1 + VMOVDQU ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA Y0, Y6 + VMOVDQA Y0, Y7 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA Y14, Y10 + VMOVDQA Y14, Y11 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA Y12, Y8 + VMOVDQA Y12, Y15 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VPADDD ·avx2IncMask<>+0(SB), Y2, Y3 + VMOVDQA Y4, 96(BP) + VMOVDQA Y1, 128(BP) + VMOVDQA Y2, 160(BP) + VMOVDQA Y3, 192(BP) + MOVQ $0x0000000a, CX sealAVX2InternalLoop: - polyAdd(0*8(oup)) - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - polyMulStage1_AVX2 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol16<>(SB), DD0, DD0; VPSHUFB ·rol16<>(SB), DD1, DD1; VPSHUFB ·rol16<>(SB), DD2, DD2; VPSHUFB ·rol16<>(SB), DD3, DD3 - polyMulStage2_AVX2 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - polyMulStage3_AVX2 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $12, BB0, CC3; VPSRLD $20, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $12, BB1, CC3; VPSRLD $20, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $12, BB2, CC3; VPSRLD $20, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $12, BB3, CC3; VPSRLD $20, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyMulReduceStage + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 sealAVX2InternalLoopStart: - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol8<>(SB), DD0, DD0; VPSHUFB ·rol8<>(SB), DD1, DD1; VPSHUFB ·rol8<>(SB), DD2, DD2; VPSHUFB ·rol8<>(SB), DD3, DD3 - polyAdd(2*8(oup)) - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - polyMulStage1_AVX2 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $7, BB0, CC3; VPSRLD $25, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $7, BB1, CC3; VPSRLD $25, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $7, BB2, CC3; VPSRLD $25, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $7, BB3, CC3; VPSRLD $25, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyMulStage2_AVX2 - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $4, BB2, BB2, BB2; VPALIGNR $4, BB3, BB3, BB3 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $8, CC3, CC3, CC3 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1; VPALIGNR $12, DD2, DD2, DD2; VPALIGNR $12, DD3, DD3, DD3 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - polyMulStage3_AVX2 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol16<>(SB), DD0, DD0; VPSHUFB ·rol16<>(SB), DD1, DD1; VPSHUFB ·rol16<>(SB), DD2, DD2; VPSHUFB ·rol16<>(SB), DD3, DD3 - polyMulReduceStage - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - polyAdd(4*8(oup)) - LEAQ (6*8)(oup), oup - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $12, BB0, CC3; VPSRLD $20, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $12, BB1, CC3; VPSRLD $20, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $12, BB2, CC3; VPSRLD $20, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $12, BB3, CC3; VPSRLD $20, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyMulStage1_AVX2 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - polyMulStage2_AVX2 - VPSHUFB ·rol8<>(SB), DD0, DD0; VPSHUFB ·rol8<>(SB), DD1, DD1; VPSHUFB ·rol8<>(SB), DD2, DD2; VPSHUFB ·rol8<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - polyMulStage3_AVX2 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $7, BB0, CC3; VPSRLD $25, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $7, BB1, CC3; VPSRLD $25, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $7, BB2, CC3; VPSRLD $25, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $7, BB3, CC3; VPSRLD $25, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyMulReduceStage - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $12, BB2, BB2, BB2; VPALIGNR $12, BB3, BB3, BB3 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $8, CC3, CC3, CC3 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1; VPALIGNR $4, DD2, DD2, DD2; VPALIGNR $4, DD3, DD3, DD3 - DECQ itr1 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + ADDQ 16(DI), R10 + ADCQ 24(DI), R11 + ADCQ $0x01, R12 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x04, Y11, Y11, Y11 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPALIGNR $0x0c, Y3, Y3, Y3 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + ADDQ 32(DI), R10 + ADCQ 40(DI), R11 + ADCQ $0x01, R12 + LEAQ 48(DI), DI + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x0c, Y11, Y11, Y11 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x04, Y2, Y2, Y2 + VPALIGNR $0x04, Y3, Y3, Y3 + DECQ CX JNE sealAVX2InternalLoop - - VPADDD ·chacha20Constants<>(SB), AA0, AA0; VPADDD ·chacha20Constants<>(SB), AA1, AA1; VPADDD ·chacha20Constants<>(SB), AA2, AA2; VPADDD ·chacha20Constants<>(SB), AA3, AA3 - VPADDD state1StoreAVX2, BB0, BB0; VPADDD state1StoreAVX2, BB1, BB1; VPADDD state1StoreAVX2, BB2, BB2; VPADDD state1StoreAVX2, BB3, BB3 - VPADDD state2StoreAVX2, CC0, CC0; VPADDD state2StoreAVX2, CC1, CC1; VPADDD state2StoreAVX2, CC2, CC2; VPADDD state2StoreAVX2, CC3, CC3 - VPADDD ctr0StoreAVX2, DD0, DD0; VPADDD ctr1StoreAVX2, DD1, DD1; VPADDD ctr2StoreAVX2, DD2, DD2; VPADDD ctr3StoreAVX2, DD3, DD3 - VMOVDQA CC3, tmpStoreAVX2 + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 + VPADDD ·chacha20Constants<>+0(SB), Y7, Y7 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 32(BP), Y10, Y10 + VPADDD 32(BP), Y11, Y11 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD 64(BP), Y8, Y8 + VPADDD 64(BP), Y15, Y15 + VPADDD 96(BP), Y4, Y4 + VPADDD 128(BP), Y1, Y1 + VPADDD 160(BP), Y2, Y2 + VPADDD 192(BP), Y3, Y3 + VMOVDQA Y15, 224(BP) // We only hashed 480 of the 512 bytes available - hash the remaining 32 here - polyAdd(0*8(oup)) - polyMulAVX2 - LEAQ (4*8)(oup), oup - VPERM2I128 $0x02, AA0, BB0, CC3; VPERM2I128 $0x13, AA0, BB0, BB0; VPERM2I128 $0x02, CC0, DD0, AA0; VPERM2I128 $0x13, CC0, DD0, CC0 - VPXOR (0*32)(inp), CC3, CC3; VPXOR (1*32)(inp), AA0, AA0; VPXOR (2*32)(inp), BB0, BB0; VPXOR (3*32)(inp), CC0, CC0 - VMOVDQU CC3, (0*32)(oup); VMOVDQU AA0, (1*32)(oup); VMOVDQU BB0, (2*32)(oup); VMOVDQU CC0, (3*32)(oup) - VPERM2I128 $0x02, AA1, BB1, AA0; VPERM2I128 $0x02, CC1, DD1, BB0; VPERM2I128 $0x13, AA1, BB1, CC0; VPERM2I128 $0x13, CC1, DD1, DD0 - VPXOR (4*32)(inp), AA0, AA0; VPXOR (5*32)(inp), BB0, BB0; VPXOR (6*32)(inp), CC0, CC0; VPXOR (7*32)(inp), DD0, DD0 - VMOVDQU AA0, (4*32)(oup); VMOVDQU BB0, (5*32)(oup); VMOVDQU CC0, (6*32)(oup); VMOVDQU DD0, (7*32)(oup) + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 32(DI), DI + VPERM2I128 $0x02, Y0, Y14, Y15 + VPERM2I128 $0x13, Y0, Y14, Y14 + VPERM2I128 $0x02, Y12, Y4, Y0 + VPERM2I128 $0x13, Y12, Y4, Y12 + VPXOR (SI), Y15, Y15 + VPXOR 32(SI), Y0, Y0 + VPXOR 64(SI), Y14, Y14 + VPXOR 96(SI), Y12, Y12 + VMOVDQU Y15, (DI) + VMOVDQU Y0, 32(DI) + VMOVDQU Y14, 64(DI) + VMOVDQU Y12, 96(DI) + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 + VPXOR 128(SI), Y0, Y0 + VPXOR 160(SI), Y14, Y14 + VPXOR 192(SI), Y12, Y12 + VPXOR 224(SI), Y4, Y4 + VMOVDQU Y0, 128(DI) + VMOVDQU Y14, 160(DI) + VMOVDQU Y12, 192(DI) + VMOVDQU Y4, 224(DI) // and here - polyAdd(-2*8(oup)) - polyMulAVX2 - VPERM2I128 $0x02, AA2, BB2, AA0; VPERM2I128 $0x02, CC2, DD2, BB0; VPERM2I128 $0x13, AA2, BB2, CC0; VPERM2I128 $0x13, CC2, DD2, DD0 - VPXOR (8*32)(inp), AA0, AA0; VPXOR (9*32)(inp), BB0, BB0; VPXOR (10*32)(inp), CC0, CC0; VPXOR (11*32)(inp), DD0, DD0 - VMOVDQU AA0, (8*32)(oup); VMOVDQU BB0, (9*32)(oup); VMOVDQU CC0, (10*32)(oup); VMOVDQU DD0, (11*32)(oup) - VPERM2I128 $0x02, AA3, BB3, AA0; VPERM2I128 $0x02, tmpStoreAVX2, DD3, BB0; VPERM2I128 $0x13, AA3, BB3, CC0; VPERM2I128 $0x13, tmpStoreAVX2, DD3, DD0 - VPXOR (12*32)(inp), AA0, AA0; VPXOR (13*32)(inp), BB0, BB0; VPXOR (14*32)(inp), CC0, CC0; VPXOR (15*32)(inp), DD0, DD0 - VMOVDQU AA0, (12*32)(oup); VMOVDQU BB0, (13*32)(oup); VMOVDQU CC0, (14*32)(oup); VMOVDQU DD0, (15*32)(oup) - LEAQ (32*16)(inp), inp - SUBQ $(32*16), inl - CMPQ inl, $512 + ADDQ -16(DI), R10 + ADCQ -8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPERM2I128 $0x02, Y6, Y10, Y0 + VPERM2I128 $0x02, Y8, Y2, Y14 + VPERM2I128 $0x13, Y6, Y10, Y12 + VPERM2I128 $0x13, Y8, Y2, Y4 + VPXOR 256(SI), Y0, Y0 + VPXOR 288(SI), Y14, Y14 + VPXOR 320(SI), Y12, Y12 + VPXOR 352(SI), Y4, Y4 + VMOVDQU Y0, 256(DI) + VMOVDQU Y14, 288(DI) + VMOVDQU Y12, 320(DI) + VMOVDQU Y4, 352(DI) + VPERM2I128 $0x02, Y7, Y11, Y0 + VPERM2I128 $0x02, 224(BP), Y3, Y14 + VPERM2I128 $0x13, Y7, Y11, Y12 + VPERM2I128 $0x13, 224(BP), Y3, Y4 + VPXOR 384(SI), Y0, Y0 + VPXOR 416(SI), Y14, Y14 + VPXOR 448(SI), Y12, Y12 + VPXOR 480(SI), Y4, Y4 + VMOVDQU Y0, 384(DI) + VMOVDQU Y14, 416(DI) + VMOVDQU Y12, 448(DI) + VMOVDQU Y4, 480(DI) + LEAQ 512(SI), SI + SUBQ $0x00000200, BX + CMPQ BX, $0x00000200 JG sealAVX2MainLoop // Tail can only hash 480 bytes - polyAdd(0*8(oup)) - polyMulAVX2 - polyAdd(2*8(oup)) - polyMulAVX2 - LEAQ 32(oup), oup - - MOVQ $10, itr1 - MOVQ $0, itr2 - CMPQ inl, $128 - JBE sealAVX2Tail128 - CMPQ inl, $256 - JBE sealAVX2Tail256 - CMPQ inl, $384 - JBE sealAVX2Tail384 - JMP sealAVX2Tail512 - -// ---------------------------------------------------------------------------- -// Special optimization for buffers smaller than 193 bytes + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + ADDQ 16(DI), R10 + ADCQ 24(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 32(DI), DI + MOVQ $0x0000000a, CX + MOVQ $0x00000000, R9 + CMPQ BX, $0x80 + JBE sealAVX2Tail128 + CMPQ BX, $0x00000100 + JBE sealAVX2Tail256 + CMPQ BX, $0x00000180 + JBE sealAVX2Tail384 + JMP sealAVX2Tail512 + seal192AVX2: - // For up to 192 bytes of ciphertext and 64 bytes for the poly key, we process four blocks - VMOVDQA AA0, AA1 - VMOVDQA BB0, BB1 - VMOVDQA CC0, CC1 - VPADDD ·avx2IncMask<>(SB), DD0, DD1 - VMOVDQA AA0, AA2 - VMOVDQA BB0, BB2 - VMOVDQA CC0, CC2 - VMOVDQA DD0, DD2 - VMOVDQA DD1, TT3 - MOVQ $10, itr2 + VMOVDQA Y0, Y5 + VMOVDQA Y14, Y9 + VMOVDQA Y12, Y13 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VMOVDQA Y0, Y6 + VMOVDQA Y14, Y10 + VMOVDQA Y12, Y8 + VMOVDQA Y4, Y2 + VMOVDQA Y1, Y15 + MOVQ $0x0000000a, R9 sealAVX2192InnerCipherLoop: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1 - DECQ itr2 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + DECQ R9 JNE sealAVX2192InnerCipherLoop - VPADDD AA2, AA0, AA0; VPADDD AA2, AA1, AA1 - VPADDD BB2, BB0, BB0; VPADDD BB2, BB1, BB1 - VPADDD CC2, CC0, CC0; VPADDD CC2, CC1, CC1 - VPADDD DD2, DD0, DD0; VPADDD TT3, DD1, DD1 - VPERM2I128 $0x02, AA0, BB0, TT0 + VPADDD Y6, Y0, Y0 + VPADDD Y6, Y5, Y5 + VPADDD Y10, Y14, Y14 + VPADDD Y10, Y9, Y9 + VPADDD Y8, Y12, Y12 + VPADDD Y8, Y13, Y13 + VPADDD Y2, Y4, Y4 + VPADDD Y15, Y1, Y1 + VPERM2I128 $0x02, Y0, Y14, Y3 // Clamp and store poly key - VPAND ·polyClampMask<>(SB), TT0, TT0 - VMOVDQA TT0, rsStoreAVX2 + VPAND ·polyClampMask<>+0(SB), Y3, Y3 + VMOVDQA Y3, (BP) // Stream for up to 192 bytes - VPERM2I128 $0x13, AA0, BB0, AA0 - VPERM2I128 $0x13, CC0, DD0, BB0 - VPERM2I128 $0x02, AA1, BB1, CC0 - VPERM2I128 $0x02, CC1, DD1, DD0 - VPERM2I128 $0x13, AA1, BB1, AA1 - VPERM2I128 $0x13, CC1, DD1, BB1 + VPERM2I128 $0x13, Y0, Y14, Y0 + VPERM2I128 $0x13, Y12, Y4, Y14 + VPERM2I128 $0x02, Y5, Y9, Y12 + VPERM2I128 $0x02, Y13, Y1, Y4 + VPERM2I128 $0x13, Y5, Y9, Y5 + VPERM2I128 $0x13, Y13, Y1, Y9 sealAVX2ShortSeal: // Hash aad - MOVQ ad_len+80(FP), itr2 + MOVQ ad_len+80(FP), R9 CALL polyHashADInternal<>(SB) - XORQ itr1, itr1 + XORQ CX, CX sealAVX2SealHash: // itr1 holds the number of bytes encrypted but not yet hashed - CMPQ itr1, $16 - JB sealAVX2ShortSealLoop - polyAdd(0(oup)) - polyMul - SUBQ $16, itr1 - ADDQ $16, oup - JMP sealAVX2SealHash + CMPQ CX, $0x10 + JB sealAVX2ShortSealLoop + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + SUBQ $0x10, CX + ADDQ $0x10, DI + JMP sealAVX2SealHash sealAVX2ShortSealLoop: - CMPQ inl, $32 + CMPQ BX, $0x20 JB sealAVX2ShortTail32 - SUBQ $32, inl + SUBQ $0x20, BX // Load for encryption - VPXOR (inp), AA0, AA0 - VMOVDQU AA0, (oup) - LEAQ (1*32)(inp), inp + VPXOR (SI), Y0, Y0 + VMOVDQU Y0, (DI) + LEAQ 32(SI), SI // Now can hash - polyAdd(0*8(oup)) - polyMulAVX2 - polyAdd(2*8(oup)) - polyMulAVX2 - LEAQ (1*32)(oup), oup + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + ADDQ 16(DI), R10 + ADCQ 24(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 32(DI), DI // Shift stream left - VMOVDQA BB0, AA0 - VMOVDQA CC0, BB0 - VMOVDQA DD0, CC0 - VMOVDQA AA1, DD0 - VMOVDQA BB1, AA1 - VMOVDQA CC1, BB1 - VMOVDQA DD1, CC1 - VMOVDQA AA2, DD1 - VMOVDQA BB2, AA2 + VMOVDQA Y14, Y0 + VMOVDQA Y12, Y14 + VMOVDQA Y4, Y12 + VMOVDQA Y5, Y4 + VMOVDQA Y9, Y5 + VMOVDQA Y13, Y9 + VMOVDQA Y1, Y13 + VMOVDQA Y6, Y1 + VMOVDQA Y10, Y6 JMP sealAVX2ShortSealLoop sealAVX2ShortTail32: - CMPQ inl, $16 - VMOVDQA A0, A1 + CMPQ BX, $0x10 + VMOVDQA X0, X1 JB sealAVX2ShortDone - - SUBQ $16, inl + SUBQ $0x10, BX // Load for encryption - VPXOR (inp), A0, T0 - VMOVDQU T0, (oup) - LEAQ (1*16)(inp), inp + VPXOR (SI), X0, X12 + VMOVDQU X12, (DI) + LEAQ 16(SI), SI // Hash - polyAdd(0*8(oup)) - polyMulAVX2 - LEAQ (1*16)(oup), oup - VPERM2I128 $0x11, AA0, AA0, AA0 - VMOVDQA A0, A1 + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI + VPERM2I128 $0x11, Y0, Y0, Y0 + VMOVDQA X0, X1 sealAVX2ShortDone: VZEROUPPER JMP sealSSETail -// ---------------------------------------------------------------------------- -// Special optimization for buffers smaller than 321 bytes seal320AVX2: - // For up to 320 bytes of ciphertext and 64 bytes for the poly key, we process six blocks - VMOVDQA AA0, AA1; VMOVDQA BB0, BB1; VMOVDQA CC0, CC1; VPADDD ·avx2IncMask<>(SB), DD0, DD1 - VMOVDQA AA0, AA2; VMOVDQA BB0, BB2; VMOVDQA CC0, CC2; VPADDD ·avx2IncMask<>(SB), DD1, DD2 - VMOVDQA BB0, TT1; VMOVDQA CC0, TT2; VMOVDQA DD0, TT3 - MOVQ $10, itr2 + VMOVDQA Y0, Y5 + VMOVDQA Y14, Y9 + VMOVDQA Y12, Y13 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VMOVDQA Y0, Y6 + VMOVDQA Y14, Y10 + VMOVDQA Y12, Y8 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VMOVDQA Y14, Y7 + VMOVDQA Y12, Y11 + VMOVDQA Y4, Y15 + MOVQ $0x0000000a, R9 sealAVX2320InnerCipherLoop: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0); chachaQR_AVX2(AA2, BB2, CC2, DD2, TT0) - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $4, BB2, BB2, BB2 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1; VPALIGNR $12, DD2, DD2, DD2 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0); chachaQR_AVX2(AA2, BB2, CC2, DD2, TT0) - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $12, BB2, BB2, BB2 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1; VPALIGNR $4, DD2, DD2, DD2 - DECQ itr2 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y3 + VPSRLD $0x19, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y3 + VPSRLD $0x19, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x04, Y2, Y2, Y2 + DECQ R9 JNE sealAVX2320InnerCipherLoop - - VMOVDQA ·chacha20Constants<>(SB), TT0 - VPADDD TT0, AA0, AA0; VPADDD TT0, AA1, AA1; VPADDD TT0, AA2, AA2 - VPADDD TT1, BB0, BB0; VPADDD TT1, BB1, BB1; VPADDD TT1, BB2, BB2 - VPADDD TT2, CC0, CC0; VPADDD TT2, CC1, CC1; VPADDD TT2, CC2, CC2 - VMOVDQA ·avx2IncMask<>(SB), TT0 - VPADDD TT3, DD0, DD0; VPADDD TT0, TT3, TT3 - VPADDD TT3, DD1, DD1; VPADDD TT0, TT3, TT3 - VPADDD TT3, DD2, DD2 + VMOVDQA ·chacha20Constants<>+0(SB), Y3 + VPADDD Y3, Y0, Y0 + VPADDD Y3, Y5, Y5 + VPADDD Y3, Y6, Y6 + VPADDD Y7, Y14, Y14 + VPADDD Y7, Y9, Y9 + VPADDD Y7, Y10, Y10 + VPADDD Y11, Y12, Y12 + VPADDD Y11, Y13, Y13 + VPADDD Y11, Y8, Y8 + VMOVDQA ·avx2IncMask<>+0(SB), Y3 + VPADDD Y15, Y4, Y4 + VPADDD Y3, Y15, Y15 + VPADDD Y15, Y1, Y1 + VPADDD Y3, Y15, Y15 + VPADDD Y15, Y2, Y2 // Clamp and store poly key - VPERM2I128 $0x02, AA0, BB0, TT0 - VPAND ·polyClampMask<>(SB), TT0, TT0 - VMOVDQA TT0, rsStoreAVX2 + VPERM2I128 $0x02, Y0, Y14, Y3 + VPAND ·polyClampMask<>+0(SB), Y3, Y3 + VMOVDQA Y3, (BP) // Stream for up to 320 bytes - VPERM2I128 $0x13, AA0, BB0, AA0 - VPERM2I128 $0x13, CC0, DD0, BB0 - VPERM2I128 $0x02, AA1, BB1, CC0 - VPERM2I128 $0x02, CC1, DD1, DD0 - VPERM2I128 $0x13, AA1, BB1, AA1 - VPERM2I128 $0x13, CC1, DD1, BB1 - VPERM2I128 $0x02, AA2, BB2, CC1 - VPERM2I128 $0x02, CC2, DD2, DD1 - VPERM2I128 $0x13, AA2, BB2, AA2 - VPERM2I128 $0x13, CC2, DD2, BB2 + VPERM2I128 $0x13, Y0, Y14, Y0 + VPERM2I128 $0x13, Y12, Y4, Y14 + VPERM2I128 $0x02, Y5, Y9, Y12 + VPERM2I128 $0x02, Y13, Y1, Y4 + VPERM2I128 $0x13, Y5, Y9, Y5 + VPERM2I128 $0x13, Y13, Y1, Y9 + VPERM2I128 $0x02, Y6, Y10, Y13 + VPERM2I128 $0x02, Y8, Y2, Y1 + VPERM2I128 $0x13, Y6, Y10, Y6 + VPERM2I128 $0x13, Y8, Y2, Y10 JMP sealAVX2ShortSeal -// ---------------------------------------------------------------------------- -// Special optimization for the last 128 bytes of ciphertext sealAVX2Tail128: - // Need to decrypt up to 128 bytes - prepare two blocks - // If we got here after the main loop - there are 512 encrypted bytes waiting to be hashed - // If we got here before the main loop - there are 448 encrpyred bytes waiting to be hashed - VMOVDQA ·chacha20Constants<>(SB), AA0 - VMOVDQA state1StoreAVX2, BB0 - VMOVDQA state2StoreAVX2, CC0 - VMOVDQA ctr3StoreAVX2, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD0 - VMOVDQA DD0, DD1 + VMOVDQA ·chacha20Constants<>+0(SB), Y0 + VMOVDQA 32(BP), Y14 + VMOVDQA 64(BP), Y12 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VMOVDQA Y4, Y1 sealAVX2Tail128LoopA: - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI sealAVX2Tail128LoopB: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0) - polyAdd(0(oup)) - polyMul - VPALIGNR $4, BB0, BB0, BB0 - VPALIGNR $8, CC0, CC0, CC0 - VPALIGNR $12, DD0, DD0, DD0 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0) - polyAdd(16(oup)) - polyMul - LEAQ 32(oup), oup - VPALIGNR $12, BB0, BB0, BB0 - VPALIGNR $8, CC0, CC0, CC0 - VPALIGNR $4, DD0, DD0, DD0 - DECQ itr1 - JG sealAVX2Tail128LoopA - DECQ itr2 - JGE sealAVX2Tail128LoopB - - VPADDD ·chacha20Constants<>(SB), AA0, AA1 - VPADDD state1StoreAVX2, BB0, BB1 - VPADDD state2StoreAVX2, CC0, CC1 - VPADDD DD1, DD0, DD1 - - VPERM2I128 $0x02, AA1, BB1, AA0 - VPERM2I128 $0x02, CC1, DD1, BB0 - VPERM2I128 $0x13, AA1, BB1, CC0 - VPERM2I128 $0x13, CC1, DD1, DD0 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + ADDQ 16(DI), R10 + ADCQ 24(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 32(DI), DI + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x04, Y4, Y4, Y4 + DECQ CX + JG sealAVX2Tail128LoopA + DECQ R9 + JGE sealAVX2Tail128LoopB + VPADDD ·chacha20Constants<>+0(SB), Y0, Y5 + VPADDD 32(BP), Y14, Y9 + VPADDD 64(BP), Y12, Y13 + VPADDD Y1, Y4, Y1 + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 JMP sealAVX2ShortSealLoop -// ---------------------------------------------------------------------------- -// Special optimization for the last 256 bytes of ciphertext sealAVX2Tail256: - // Need to decrypt up to 256 bytes - prepare two blocks - // If we got here after the main loop - there are 512 encrypted bytes waiting to be hashed - // If we got here before the main loop - there are 448 encrpyred bytes waiting to be hashed - VMOVDQA ·chacha20Constants<>(SB), AA0; VMOVDQA ·chacha20Constants<>(SB), AA1 - VMOVDQA state1StoreAVX2, BB0; VMOVDQA state1StoreAVX2, BB1 - VMOVDQA state2StoreAVX2, CC0; VMOVDQA state2StoreAVX2, CC1 - VMOVDQA ctr3StoreAVX2, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD1 - VMOVDQA DD0, TT1 - VMOVDQA DD1, TT2 + VMOVDQA ·chacha20Constants<>+0(SB), Y0 + VMOVDQA ·chacha20Constants<>+0(SB), Y5 + VMOVDQA 32(BP), Y14 + VMOVDQA 32(BP), Y9 + VMOVDQA 64(BP), Y12 + VMOVDQA 64(BP), Y13 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VMOVDQA Y4, Y7 + VMOVDQA Y1, Y11 sealAVX2Tail256LoopA: - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI sealAVX2Tail256LoopB: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - polyAdd(0(oup)) - polyMul - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - polyAdd(16(oup)) - polyMul - LEAQ 32(oup), oup - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1 - DECQ itr1 - JG sealAVX2Tail256LoopA - DECQ itr2 - JGE sealAVX2Tail256LoopB - - VPADDD ·chacha20Constants<>(SB), AA0, AA0; VPADDD ·chacha20Constants<>(SB), AA1, AA1 - VPADDD state1StoreAVX2, BB0, BB0; VPADDD state1StoreAVX2, BB1, BB1 - VPADDD state2StoreAVX2, CC0, CC0; VPADDD state2StoreAVX2, CC1, CC1 - VPADDD TT1, DD0, DD0; VPADDD TT2, DD1, DD1 - VPERM2I128 $0x02, AA0, BB0, TT0 - VPERM2I128 $0x02, CC0, DD0, TT1 - VPERM2I128 $0x13, AA0, BB0, TT2 - VPERM2I128 $0x13, CC0, DD0, TT3 - VPXOR (0*32)(inp), TT0, TT0; VPXOR (1*32)(inp), TT1, TT1; VPXOR (2*32)(inp), TT2, TT2; VPXOR (3*32)(inp), TT3, TT3 - VMOVDQU TT0, (0*32)(oup); VMOVDQU TT1, (1*32)(oup); VMOVDQU TT2, (2*32)(oup); VMOVDQU TT3, (3*32)(oup) - MOVQ $128, itr1 - LEAQ 128(inp), inp - SUBQ $128, inl - VPERM2I128 $0x02, AA1, BB1, AA0 - VPERM2I128 $0x02, CC1, DD1, BB0 - VPERM2I128 $0x13, AA1, BB1, CC0 - VPERM2I128 $0x13, CC1, DD1, DD0 - - JMP sealAVX2SealHash - -// ---------------------------------------------------------------------------- -// Special optimization for the last 384 bytes of ciphertext + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + ADDQ 16(DI), R10 + ADCQ 24(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 32(DI), DI + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + DECQ CX + JG sealAVX2Tail256LoopA + DECQ R9 + JGE sealAVX2Tail256LoopB + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD Y7, Y4, Y4 + VPADDD Y11, Y1, Y1 + VPERM2I128 $0x02, Y0, Y14, Y3 + VPERM2I128 $0x02, Y12, Y4, Y7 + VPERM2I128 $0x13, Y0, Y14, Y11 + VPERM2I128 $0x13, Y12, Y4, Y15 + VPXOR (SI), Y3, Y3 + VPXOR 32(SI), Y7, Y7 + VPXOR 64(SI), Y11, Y11 + VPXOR 96(SI), Y15, Y15 + VMOVDQU Y3, (DI) + VMOVDQU Y7, 32(DI) + VMOVDQU Y11, 64(DI) + VMOVDQU Y15, 96(DI) + MOVQ $0x00000080, CX + LEAQ 128(SI), SI + SUBQ $0x80, BX + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 + JMP sealAVX2SealHash + sealAVX2Tail384: - // Need to decrypt up to 384 bytes - prepare two blocks - // If we got here after the main loop - there are 512 encrypted bytes waiting to be hashed - // If we got here before the main loop - there are 448 encrpyred bytes waiting to be hashed - VMOVDQA ·chacha20Constants<>(SB), AA0; VMOVDQA AA0, AA1; VMOVDQA AA0, AA2 - VMOVDQA state1StoreAVX2, BB0; VMOVDQA BB0, BB1; VMOVDQA BB0, BB2 - VMOVDQA state2StoreAVX2, CC0; VMOVDQA CC0, CC1; VMOVDQA CC0, CC2 - VMOVDQA ctr3StoreAVX2, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD0; VPADDD ·avx2IncMask<>(SB), DD0, DD1; VPADDD ·avx2IncMask<>(SB), DD1, DD2 - VMOVDQA DD0, TT1; VMOVDQA DD1, TT2; VMOVDQA DD2, TT3 + VMOVDQA ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA Y0, Y6 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA Y14, Y10 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA Y12, Y8 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VMOVDQA Y4, Y7 + VMOVDQA Y1, Y11 + VMOVDQA Y2, Y15 sealAVX2Tail384LoopA: - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI sealAVX2Tail384LoopB: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0); chachaQR_AVX2(AA2, BB2, CC2, DD2, TT0) - polyAdd(0(oup)) - polyMul - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $4, BB2, BB2, BB2 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1; VPALIGNR $12, DD2, DD2, DD2 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0); chachaQR_AVX2(AA2, BB2, CC2, DD2, TT0) - polyAdd(16(oup)) - polyMul - LEAQ 32(oup), oup - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $12, BB2, BB2, BB2 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1; VPALIGNR $4, DD2, DD2, DD2 - DECQ itr1 - JG sealAVX2Tail384LoopA - DECQ itr2 - JGE sealAVX2Tail384LoopB - - VPADDD ·chacha20Constants<>(SB), AA0, AA0; VPADDD ·chacha20Constants<>(SB), AA1, AA1; VPADDD ·chacha20Constants<>(SB), AA2, AA2 - VPADDD state1StoreAVX2, BB0, BB0; VPADDD state1StoreAVX2, BB1, BB1; VPADDD state1StoreAVX2, BB2, BB2 - VPADDD state2StoreAVX2, CC0, CC0; VPADDD state2StoreAVX2, CC1, CC1; VPADDD state2StoreAVX2, CC2, CC2 - VPADDD TT1, DD0, DD0; VPADDD TT2, DD1, DD1; VPADDD TT3, DD2, DD2 - VPERM2I128 $0x02, AA0, BB0, TT0 - VPERM2I128 $0x02, CC0, DD0, TT1 - VPERM2I128 $0x13, AA0, BB0, TT2 - VPERM2I128 $0x13, CC0, DD0, TT3 - VPXOR (0*32)(inp), TT0, TT0; VPXOR (1*32)(inp), TT1, TT1; VPXOR (2*32)(inp), TT2, TT2; VPXOR (3*32)(inp), TT3, TT3 - VMOVDQU TT0, (0*32)(oup); VMOVDQU TT1, (1*32)(oup); VMOVDQU TT2, (2*32)(oup); VMOVDQU TT3, (3*32)(oup) - VPERM2I128 $0x02, AA1, BB1, TT0 - VPERM2I128 $0x02, CC1, DD1, TT1 - VPERM2I128 $0x13, AA1, BB1, TT2 - VPERM2I128 $0x13, CC1, DD1, TT3 - VPXOR (4*32)(inp), TT0, TT0; VPXOR (5*32)(inp), TT1, TT1; VPXOR (6*32)(inp), TT2, TT2; VPXOR (7*32)(inp), TT3, TT3 - VMOVDQU TT0, (4*32)(oup); VMOVDQU TT1, (5*32)(oup); VMOVDQU TT2, (6*32)(oup); VMOVDQU TT3, (7*32)(oup) - MOVQ $256, itr1 - LEAQ 256(inp), inp - SUBQ $256, inl - VPERM2I128 $0x02, AA2, BB2, AA0 - VPERM2I128 $0x02, CC2, DD2, BB0 - VPERM2I128 $0x13, AA2, BB2, CC0 - VPERM2I128 $0x13, CC2, DD2, DD0 - - JMP sealAVX2SealHash - -// ---------------------------------------------------------------------------- -// Special optimization for the last 512 bytes of ciphertext + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y3 + VPSRLD $0x19, Y10, Y10 + VPXOR Y3, Y10, Y10 + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y3 + VPSRLD $0x19, Y10, Y10 + VPXOR Y3, Y10, Y10 + ADDQ 16(DI), R10 + ADCQ 24(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 32(DI), DI + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x04, Y2, Y2, Y2 + DECQ CX + JG sealAVX2Tail384LoopA + DECQ R9 + JGE sealAVX2Tail384LoopB + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 32(BP), Y10, Y10 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD 64(BP), Y8, Y8 + VPADDD Y7, Y4, Y4 + VPADDD Y11, Y1, Y1 + VPADDD Y15, Y2, Y2 + VPERM2I128 $0x02, Y0, Y14, Y3 + VPERM2I128 $0x02, Y12, Y4, Y7 + VPERM2I128 $0x13, Y0, Y14, Y11 + VPERM2I128 $0x13, Y12, Y4, Y15 + VPXOR (SI), Y3, Y3 + VPXOR 32(SI), Y7, Y7 + VPXOR 64(SI), Y11, Y11 + VPXOR 96(SI), Y15, Y15 + VMOVDQU Y3, (DI) + VMOVDQU Y7, 32(DI) + VMOVDQU Y11, 64(DI) + VMOVDQU Y15, 96(DI) + VPERM2I128 $0x02, Y5, Y9, Y3 + VPERM2I128 $0x02, Y13, Y1, Y7 + VPERM2I128 $0x13, Y5, Y9, Y11 + VPERM2I128 $0x13, Y13, Y1, Y15 + VPXOR 128(SI), Y3, Y3 + VPXOR 160(SI), Y7, Y7 + VPXOR 192(SI), Y11, Y11 + VPXOR 224(SI), Y15, Y15 + VMOVDQU Y3, 128(DI) + VMOVDQU Y7, 160(DI) + VMOVDQU Y11, 192(DI) + VMOVDQU Y15, 224(DI) + MOVQ $0x00000100, CX + LEAQ 256(SI), SI + SUBQ $0x00000100, BX + VPERM2I128 $0x02, Y6, Y10, Y0 + VPERM2I128 $0x02, Y8, Y2, Y14 + VPERM2I128 $0x13, Y6, Y10, Y12 + VPERM2I128 $0x13, Y8, Y2, Y4 + JMP sealAVX2SealHash + sealAVX2Tail512: - // Need to decrypt up to 512 bytes - prepare two blocks - // If we got here after the main loop - there are 512 encrypted bytes waiting to be hashed - // If we got here before the main loop - there are 448 encrpyred bytes waiting to be hashed - VMOVDQA ·chacha20Constants<>(SB), AA0; VMOVDQA AA0, AA1; VMOVDQA AA0, AA2; VMOVDQA AA0, AA3 - VMOVDQA state1StoreAVX2, BB0; VMOVDQA BB0, BB1; VMOVDQA BB0, BB2; VMOVDQA BB0, BB3 - VMOVDQA state2StoreAVX2, CC0; VMOVDQA CC0, CC1; VMOVDQA CC0, CC2; VMOVDQA CC0, CC3 - VMOVDQA ctr3StoreAVX2, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD0; VPADDD ·avx2IncMask<>(SB), DD0, DD1; VPADDD ·avx2IncMask<>(SB), DD1, DD2; VPADDD ·avx2IncMask<>(SB), DD2, DD3 - VMOVDQA DD0, ctr0StoreAVX2; VMOVDQA DD1, ctr1StoreAVX2; VMOVDQA DD2, ctr2StoreAVX2; VMOVDQA DD3, ctr3StoreAVX2 + VMOVDQA ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA Y0, Y6 + VMOVDQA Y0, Y7 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA Y14, Y10 + VMOVDQA Y14, Y11 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA Y12, Y8 + VMOVDQA Y12, Y15 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VPADDD ·avx2IncMask<>+0(SB), Y2, Y3 + VMOVDQA Y4, 96(BP) + VMOVDQA Y1, 128(BP) + VMOVDQA Y2, 160(BP) + VMOVDQA Y3, 192(BP) sealAVX2Tail512LoopA: - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI sealAVX2Tail512LoopB: - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol16<>(SB), DD0, DD0; VPSHUFB ·rol16<>(SB), DD1, DD1; VPSHUFB ·rol16<>(SB), DD2, DD2; VPSHUFB ·rol16<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $12, BB0, CC3; VPSRLD $20, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $12, BB1, CC3; VPSRLD $20, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $12, BB2, CC3; VPSRLD $20, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $12, BB3, CC3; VPSRLD $20, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyAdd(0*8(oup)) - polyMulAVX2 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol8<>(SB), DD0, DD0; VPSHUFB ·rol8<>(SB), DD1, DD1; VPSHUFB ·rol8<>(SB), DD2, DD2; VPSHUFB ·rol8<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $7, BB0, CC3; VPSRLD $25, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $7, BB1, CC3; VPSRLD $25, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $7, BB2, CC3; VPSRLD $25, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $7, BB3, CC3; VPSRLD $25, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $4, BB2, BB2, BB2; VPALIGNR $4, BB3, BB3, BB3 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $8, CC3, CC3, CC3 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1; VPALIGNR $12, DD2, DD2, DD2; VPALIGNR $12, DD3, DD3, DD3 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol16<>(SB), DD0, DD0; VPSHUFB ·rol16<>(SB), DD1, DD1; VPSHUFB ·rol16<>(SB), DD2, DD2; VPSHUFB ·rol16<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - polyAdd(2*8(oup)) - polyMulAVX2 - LEAQ (4*8)(oup), oup - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $12, BB0, CC3; VPSRLD $20, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $12, BB1, CC3; VPSRLD $20, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $12, BB2, CC3; VPSRLD $20, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $12, BB3, CC3; VPSRLD $20, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol8<>(SB), DD0, DD0; VPSHUFB ·rol8<>(SB), DD1, DD1; VPSHUFB ·rol8<>(SB), DD2, DD2; VPSHUFB ·rol8<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $7, BB0, CC3; VPSRLD $25, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $7, BB1, CC3; VPSRLD $25, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $7, BB2, CC3; VPSRLD $25, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $7, BB3, CC3; VPSRLD $25, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $12, BB2, BB2, BB2; VPALIGNR $12, BB3, BB3, BB3 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $8, CC3, CC3, CC3 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1; VPALIGNR $4, DD2, DD2, DD2; VPALIGNR $4, DD3, DD3, DD3 - - DECQ itr1 - JG sealAVX2Tail512LoopA - DECQ itr2 - JGE sealAVX2Tail512LoopB - - VPADDD ·chacha20Constants<>(SB), AA0, AA0; VPADDD ·chacha20Constants<>(SB), AA1, AA1; VPADDD ·chacha20Constants<>(SB), AA2, AA2; VPADDD ·chacha20Constants<>(SB), AA3, AA3 - VPADDD state1StoreAVX2, BB0, BB0; VPADDD state1StoreAVX2, BB1, BB1; VPADDD state1StoreAVX2, BB2, BB2; VPADDD state1StoreAVX2, BB3, BB3 - VPADDD state2StoreAVX2, CC0, CC0; VPADDD state2StoreAVX2, CC1, CC1; VPADDD state2StoreAVX2, CC2, CC2; VPADDD state2StoreAVX2, CC3, CC3 - VPADDD ctr0StoreAVX2, DD0, DD0; VPADDD ctr1StoreAVX2, DD1, DD1; VPADDD ctr2StoreAVX2, DD2, DD2; VPADDD ctr3StoreAVX2, DD3, DD3 - VMOVDQA CC3, tmpStoreAVX2 - VPERM2I128 $0x02, AA0, BB0, CC3 - VPXOR (0*32)(inp), CC3, CC3 - VMOVDQU CC3, (0*32)(oup) - VPERM2I128 $0x02, CC0, DD0, CC3 - VPXOR (1*32)(inp), CC3, CC3 - VMOVDQU CC3, (1*32)(oup) - VPERM2I128 $0x13, AA0, BB0, CC3 - VPXOR (2*32)(inp), CC3, CC3 - VMOVDQU CC3, (2*32)(oup) - VPERM2I128 $0x13, CC0, DD0, CC3 - VPXOR (3*32)(inp), CC3, CC3 - VMOVDQU CC3, (3*32)(oup) - - VPERM2I128 $0x02, AA1, BB1, AA0 - VPERM2I128 $0x02, CC1, DD1, BB0 - VPERM2I128 $0x13, AA1, BB1, CC0 - VPERM2I128 $0x13, CC1, DD1, DD0 - VPXOR (4*32)(inp), AA0, AA0; VPXOR (5*32)(inp), BB0, BB0; VPXOR (6*32)(inp), CC0, CC0; VPXOR (7*32)(inp), DD0, DD0 - VMOVDQU AA0, (4*32)(oup); VMOVDQU BB0, (5*32)(oup); VMOVDQU CC0, (6*32)(oup); VMOVDQU DD0, (7*32)(oup) - - VPERM2I128 $0x02, AA2, BB2, AA0 - VPERM2I128 $0x02, CC2, DD2, BB0 - VPERM2I128 $0x13, AA2, BB2, CC0 - VPERM2I128 $0x13, CC2, DD2, DD0 - VPXOR (8*32)(inp), AA0, AA0; VPXOR (9*32)(inp), BB0, BB0; VPXOR (10*32)(inp), CC0, CC0; VPXOR (11*32)(inp), DD0, DD0 - VMOVDQU AA0, (8*32)(oup); VMOVDQU BB0, (9*32)(oup); VMOVDQU CC0, (10*32)(oup); VMOVDQU DD0, (11*32)(oup) - - MOVQ $384, itr1 - LEAQ 384(inp), inp - SUBQ $384, inl - VPERM2I128 $0x02, AA3, BB3, AA0 - VPERM2I128 $0x02, tmpStoreAVX2, DD3, BB0 - VPERM2I128 $0x13, AA3, BB3, CC0 - VPERM2I128 $0x13, tmpStoreAVX2, DD3, DD0 - - JMP sealAVX2SealHash + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x04, Y11, Y11, Y11 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPALIGNR $0x0c, Y3, Y3, Y3 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + ADDQ 16(DI), R10 + ADCQ 24(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 32(DI), DI + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x0c, Y11, Y11, Y11 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x04, Y2, Y2, Y2 + VPALIGNR $0x04, Y3, Y3, Y3 + DECQ CX + JG sealAVX2Tail512LoopA + DECQ R9 + JGE sealAVX2Tail512LoopB + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 + VPADDD ·chacha20Constants<>+0(SB), Y7, Y7 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 32(BP), Y10, Y10 + VPADDD 32(BP), Y11, Y11 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD 64(BP), Y8, Y8 + VPADDD 64(BP), Y15, Y15 + VPADDD 96(BP), Y4, Y4 + VPADDD 128(BP), Y1, Y1 + VPADDD 160(BP), Y2, Y2 + VPADDD 192(BP), Y3, Y3 + VMOVDQA Y15, 224(BP) + VPERM2I128 $0x02, Y0, Y14, Y15 + VPXOR (SI), Y15, Y15 + VMOVDQU Y15, (DI) + VPERM2I128 $0x02, Y12, Y4, Y15 + VPXOR 32(SI), Y15, Y15 + VMOVDQU Y15, 32(DI) + VPERM2I128 $0x13, Y0, Y14, Y15 + VPXOR 64(SI), Y15, Y15 + VMOVDQU Y15, 64(DI) + VPERM2I128 $0x13, Y12, Y4, Y15 + VPXOR 96(SI), Y15, Y15 + VMOVDQU Y15, 96(DI) + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 + VPXOR 128(SI), Y0, Y0 + VPXOR 160(SI), Y14, Y14 + VPXOR 192(SI), Y12, Y12 + VPXOR 224(SI), Y4, Y4 + VMOVDQU Y0, 128(DI) + VMOVDQU Y14, 160(DI) + VMOVDQU Y12, 192(DI) + VMOVDQU Y4, 224(DI) + VPERM2I128 $0x02, Y6, Y10, Y0 + VPERM2I128 $0x02, Y8, Y2, Y14 + VPERM2I128 $0x13, Y6, Y10, Y12 + VPERM2I128 $0x13, Y8, Y2, Y4 + VPXOR 256(SI), Y0, Y0 + VPXOR 288(SI), Y14, Y14 + VPXOR 320(SI), Y12, Y12 + VPXOR 352(SI), Y4, Y4 + VMOVDQU Y0, 256(DI) + VMOVDQU Y14, 288(DI) + VMOVDQU Y12, 320(DI) + VMOVDQU Y4, 352(DI) + MOVQ $0x00000180, CX + LEAQ 384(SI), SI + SUBQ $0x00000180, BX + VPERM2I128 $0x02, Y7, Y11, Y0 + VPERM2I128 $0x02, 224(BP), Y3, Y14 + VPERM2I128 $0x13, Y7, Y11, Y12 + VPERM2I128 $0x13, 224(BP), Y3, Y4 + JMP sealAVX2SealHash diff --git a/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s b/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s index e0d3c647..13375738 100644 --- a/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s +++ b/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s @@ -1,108 +1,93 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Code generated by command: go run sum_amd64_asm.go -out ../sum_amd64.s -pkg poly1305. DO NOT EDIT. //go:build gc && !purego -#include "textflag.h" - -#define POLY1305_ADD(msg, h0, h1, h2) \ - ADDQ 0(msg), h0; \ - ADCQ 8(msg), h1; \ - ADCQ $1, h2; \ - LEAQ 16(msg), msg - -#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \ - MOVQ r0, AX; \ - MULQ h0; \ - MOVQ AX, t0; \ - MOVQ DX, t1; \ - MOVQ r0, AX; \ - MULQ h1; \ - ADDQ AX, t1; \ - ADCQ $0, DX; \ - MOVQ r0, t2; \ - IMULQ h2, t2; \ - ADDQ DX, t2; \ - \ - MOVQ r1, AX; \ - MULQ h0; \ - ADDQ AX, t1; \ - ADCQ $0, DX; \ - MOVQ DX, h0; \ - MOVQ r1, t3; \ - IMULQ h2, t3; \ - MOVQ r1, AX; \ - MULQ h1; \ - ADDQ AX, t2; \ - ADCQ DX, t3; \ - ADDQ h0, t2; \ - ADCQ $0, t3; \ - \ - MOVQ t0, h0; \ - MOVQ t1, h1; \ - MOVQ t2, h2; \ - ANDQ $3, h2; \ - MOVQ t2, t0; \ - ANDQ $0xFFFFFFFFFFFFFFFC, t0; \ - ADDQ t0, h0; \ - ADCQ t3, h1; \ - ADCQ $0, h2; \ - SHRQ $2, t3, t2; \ - SHRQ $2, t3; \ - ADDQ t2, h0; \ - ADCQ t3, h1; \ - ADCQ $0, h2 - -// func update(state *[7]uint64, msg []byte) +// func update(state *macState, msg []byte) TEXT ·update(SB), $0-32 MOVQ state+0(FP), DI MOVQ msg_base+8(FP), SI MOVQ msg_len+16(FP), R15 - - MOVQ 0(DI), R8 // h0 - MOVQ 8(DI), R9 // h1 - MOVQ 16(DI), R10 // h2 - MOVQ 24(DI), R11 // r0 - MOVQ 32(DI), R12 // r1 - - CMPQ R15, $16 + MOVQ (DI), R8 + MOVQ 8(DI), R9 + MOVQ 16(DI), R10 + MOVQ 24(DI), R11 + MOVQ 32(DI), R12 + CMPQ R15, $0x10 JB bytes_between_0_and_15 loop: - POLY1305_ADD(SI, R8, R9, R10) + ADDQ (SI), R8 + ADCQ 8(SI), R9 + ADCQ $0x01, R10 + LEAQ 16(SI), SI multiply: - POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14) - SUBQ $16, R15 - CMPQ R15, $16 - JAE loop + MOVQ R11, AX + MULQ R8 + MOVQ AX, BX + MOVQ DX, CX + MOVQ R11, AX + MULQ R9 + ADDQ AX, CX + ADCQ $0x00, DX + MOVQ R11, R13 + IMULQ R10, R13 + ADDQ DX, R13 + MOVQ R12, AX + MULQ R8 + ADDQ AX, CX + ADCQ $0x00, DX + MOVQ DX, R8 + MOVQ R12, R14 + IMULQ R10, R14 + MOVQ R12, AX + MULQ R9 + ADDQ AX, R13 + ADCQ DX, R14 + ADDQ R8, R13 + ADCQ $0x00, R14 + MOVQ BX, R8 + MOVQ CX, R9 + MOVQ R13, R10 + ANDQ $0x03, R10 + MOVQ R13, BX + ANDQ $-4, BX + ADDQ BX, R8 + ADCQ R14, R9 + ADCQ $0x00, R10 + SHRQ $0x02, R14, R13 + SHRQ $0x02, R14 + ADDQ R13, R8 + ADCQ R14, R9 + ADCQ $0x00, R10 + SUBQ $0x10, R15 + CMPQ R15, $0x10 + JAE loop bytes_between_0_and_15: TESTQ R15, R15 JZ done - MOVQ $1, BX + MOVQ $0x00000001, BX XORQ CX, CX XORQ R13, R13 ADDQ R15, SI flush_buffer: - SHLQ $8, BX, CX - SHLQ $8, BX + SHLQ $0x08, BX, CX + SHLQ $0x08, BX MOVB -1(SI), R13 XORQ R13, BX DECQ SI DECQ R15 JNZ flush_buffer - ADDQ BX, R8 ADCQ CX, R9 - ADCQ $0, R10 - MOVQ $16, R15 + ADCQ $0x00, R10 + MOVQ $0x00000010, R15 JMP multiply done: - MOVQ R8, 0(DI) + MOVQ R8, (DI) MOVQ R9, 8(DI) MOVQ R10, 16(DI) RET diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s index fcce0234..3883e0ec 100644 --- a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s +++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s @@ -1,880 +1,880 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Code generated by command: go run salsa20_amd64_asm.go -out ../salsa20_amd64.s -pkg salsa. DO NOT EDIT. //go:build amd64 && !purego && gc -// This code was translated into a form compatible with 6a from the public -// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html +// func salsa2020XORKeyStream(out *byte, in *byte, n uint64, nonce *byte, key *byte) +// Requires: SSE2 +TEXT ·salsa2020XORKeyStream(SB), $456-40 + // This needs up to 64 bytes at 360(R12); hence the non-obvious frame size. + MOVQ out+0(FP), DI + MOVQ in+8(FP), SI + MOVQ n+16(FP), DX + MOVQ nonce+24(FP), CX + MOVQ key+32(FP), R8 + MOVQ SP, R12 + ADDQ $0x1f, R12 + ANDQ $-32, R12 + MOVQ DX, R9 + MOVQ CX, DX + MOVQ R8, R10 + CMPQ R9, $0x00 + JBE DONE + MOVL 20(R10), CX + MOVL (R10), R8 + MOVL (DX), AX + MOVL 16(R10), R11 + MOVL CX, (R12) + MOVL R8, 4(R12) + MOVL AX, 8(R12) + MOVL R11, 12(R12) + MOVL 8(DX), CX + MOVL 24(R10), R8 + MOVL 4(R10), AX + MOVL 4(DX), R11 + MOVL CX, 16(R12) + MOVL R8, 20(R12) + MOVL AX, 24(R12) + MOVL R11, 28(R12) + MOVL 12(DX), CX + MOVL 12(R10), DX + MOVL 28(R10), R8 + MOVL 8(R10), AX + MOVL DX, 32(R12) + MOVL CX, 36(R12) + MOVL R8, 40(R12) + MOVL AX, 44(R12) + MOVQ $0x61707865, DX + MOVQ $0x3320646e, CX + MOVQ $0x79622d32, R8 + MOVQ $0x6b206574, AX + MOVL DX, 48(R12) + MOVL CX, 52(R12) + MOVL R8, 56(R12) + MOVL AX, 60(R12) + CMPQ R9, $0x00000100 + JB BYTESBETWEEN1AND255 + MOVOA 48(R12), X0 + PSHUFL $0x55, X0, X1 + PSHUFL $0xaa, X0, X2 + PSHUFL $0xff, X0, X3 + PSHUFL $0x00, X0, X0 + MOVOA X1, 64(R12) + MOVOA X2, 80(R12) + MOVOA X3, 96(R12) + MOVOA X0, 112(R12) + MOVOA (R12), X0 + PSHUFL $0xaa, X0, X1 + PSHUFL $0xff, X0, X2 + PSHUFL $0x00, X0, X3 + PSHUFL $0x55, X0, X0 + MOVOA X1, 128(R12) + MOVOA X2, 144(R12) + MOVOA X3, 160(R12) + MOVOA X0, 176(R12) + MOVOA 16(R12), X0 + PSHUFL $0xff, X0, X1 + PSHUFL $0x55, X0, X2 + PSHUFL $0xaa, X0, X0 + MOVOA X1, 192(R12) + MOVOA X2, 208(R12) + MOVOA X0, 224(R12) + MOVOA 32(R12), X0 + PSHUFL $0x00, X0, X1 + PSHUFL $0xaa, X0, X2 + PSHUFL $0xff, X0, X0 + MOVOA X1, 240(R12) + MOVOA X2, 256(R12) + MOVOA X0, 272(R12) -// func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte) -// This needs up to 64 bytes at 360(R12); hence the non-obvious frame size. -TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment - MOVQ out+0(FP),DI - MOVQ in+8(FP),SI - MOVQ n+16(FP),DX - MOVQ nonce+24(FP),CX - MOVQ key+32(FP),R8 +BYTESATLEAST256: + MOVL 16(R12), DX + MOVL 36(R12), CX + MOVL DX, 288(R12) + MOVL CX, 304(R12) + SHLQ $0x20, CX + ADDQ CX, DX + ADDQ $0x01, DX + MOVQ DX, CX + SHRQ $0x20, CX + MOVL DX, 292(R12) + MOVL CX, 308(R12) + ADDQ $0x01, DX + MOVQ DX, CX + SHRQ $0x20, CX + MOVL DX, 296(R12) + MOVL CX, 312(R12) + ADDQ $0x01, DX + MOVQ DX, CX + SHRQ $0x20, CX + MOVL DX, 300(R12) + MOVL CX, 316(R12) + ADDQ $0x01, DX + MOVQ DX, CX + SHRQ $0x20, CX + MOVL DX, 16(R12) + MOVL CX, 36(R12) + MOVQ R9, 352(R12) + MOVQ $0x00000014, DX + MOVOA 64(R12), X0 + MOVOA 80(R12), X1 + MOVOA 96(R12), X2 + MOVOA 256(R12), X3 + MOVOA 272(R12), X4 + MOVOA 128(R12), X5 + MOVOA 144(R12), X6 + MOVOA 176(R12), X7 + MOVOA 192(R12), X8 + MOVOA 208(R12), X9 + MOVOA 224(R12), X10 + MOVOA 304(R12), X11 + MOVOA 112(R12), X12 + MOVOA 160(R12), X13 + MOVOA 240(R12), X14 + MOVOA 288(R12), X15 - MOVQ SP,R12 - ADDQ $31, R12 - ANDQ $~31, R12 +MAINLOOP1: + MOVOA X1, 320(R12) + MOVOA X2, 336(R12) + MOVOA X13, X1 + PADDL X12, X1 + MOVOA X1, X2 + PSLLL $0x07, X1 + PXOR X1, X14 + PSRLL $0x19, X2 + PXOR X2, X14 + MOVOA X7, X1 + PADDL X0, X1 + MOVOA X1, X2 + PSLLL $0x07, X1 + PXOR X1, X11 + PSRLL $0x19, X2 + PXOR X2, X11 + MOVOA X12, X1 + PADDL X14, X1 + MOVOA X1, X2 + PSLLL $0x09, X1 + PXOR X1, X15 + PSRLL $0x17, X2 + PXOR X2, X15 + MOVOA X0, X1 + PADDL X11, X1 + MOVOA X1, X2 + PSLLL $0x09, X1 + PXOR X1, X9 + PSRLL $0x17, X2 + PXOR X2, X9 + MOVOA X14, X1 + PADDL X15, X1 + MOVOA X1, X2 + PSLLL $0x0d, X1 + PXOR X1, X13 + PSRLL $0x13, X2 + PXOR X2, X13 + MOVOA X11, X1 + PADDL X9, X1 + MOVOA X1, X2 + PSLLL $0x0d, X1 + PXOR X1, X7 + PSRLL $0x13, X2 + PXOR X2, X7 + MOVOA X15, X1 + PADDL X13, X1 + MOVOA X1, X2 + PSLLL $0x12, X1 + PXOR X1, X12 + PSRLL $0x0e, X2 + PXOR X2, X12 + MOVOA 320(R12), X1 + MOVOA X12, 320(R12) + MOVOA X9, X2 + PADDL X7, X2 + MOVOA X2, X12 + PSLLL $0x12, X2 + PXOR X2, X0 + PSRLL $0x0e, X12 + PXOR X12, X0 + MOVOA X5, X2 + PADDL X1, X2 + MOVOA X2, X12 + PSLLL $0x07, X2 + PXOR X2, X3 + PSRLL $0x19, X12 + PXOR X12, X3 + MOVOA 336(R12), X2 + MOVOA X0, 336(R12) + MOVOA X6, X0 + PADDL X2, X0 + MOVOA X0, X12 + PSLLL $0x07, X0 + PXOR X0, X4 + PSRLL $0x19, X12 + PXOR X12, X4 + MOVOA X1, X0 + PADDL X3, X0 + MOVOA X0, X12 + PSLLL $0x09, X0 + PXOR X0, X10 + PSRLL $0x17, X12 + PXOR X12, X10 + MOVOA X2, X0 + PADDL X4, X0 + MOVOA X0, X12 + PSLLL $0x09, X0 + PXOR X0, X8 + PSRLL $0x17, X12 + PXOR X12, X8 + MOVOA X3, X0 + PADDL X10, X0 + MOVOA X0, X12 + PSLLL $0x0d, X0 + PXOR X0, X5 + PSRLL $0x13, X12 + PXOR X12, X5 + MOVOA X4, X0 + PADDL X8, X0 + MOVOA X0, X12 + PSLLL $0x0d, X0 + PXOR X0, X6 + PSRLL $0x13, X12 + PXOR X12, X6 + MOVOA X10, X0 + PADDL X5, X0 + MOVOA X0, X12 + PSLLL $0x12, X0 + PXOR X0, X1 + PSRLL $0x0e, X12 + PXOR X12, X1 + MOVOA 320(R12), X0 + MOVOA X1, 320(R12) + MOVOA X4, X1 + PADDL X0, X1 + MOVOA X1, X12 + PSLLL $0x07, X1 + PXOR X1, X7 + PSRLL $0x19, X12 + PXOR X12, X7 + MOVOA X8, X1 + PADDL X6, X1 + MOVOA X1, X12 + PSLLL $0x12, X1 + PXOR X1, X2 + PSRLL $0x0e, X12 + PXOR X12, X2 + MOVOA 336(R12), X12 + MOVOA X2, 336(R12) + MOVOA X14, X1 + PADDL X12, X1 + MOVOA X1, X2 + PSLLL $0x07, X1 + PXOR X1, X5 + PSRLL $0x19, X2 + PXOR X2, X5 + MOVOA X0, X1 + PADDL X7, X1 + MOVOA X1, X2 + PSLLL $0x09, X1 + PXOR X1, X10 + PSRLL $0x17, X2 + PXOR X2, X10 + MOVOA X12, X1 + PADDL X5, X1 + MOVOA X1, X2 + PSLLL $0x09, X1 + PXOR X1, X8 + PSRLL $0x17, X2 + PXOR X2, X8 + MOVOA X7, X1 + PADDL X10, X1 + MOVOA X1, X2 + PSLLL $0x0d, X1 + PXOR X1, X4 + PSRLL $0x13, X2 + PXOR X2, X4 + MOVOA X5, X1 + PADDL X8, X1 + MOVOA X1, X2 + PSLLL $0x0d, X1 + PXOR X1, X14 + PSRLL $0x13, X2 + PXOR X2, X14 + MOVOA X10, X1 + PADDL X4, X1 + MOVOA X1, X2 + PSLLL $0x12, X1 + PXOR X1, X0 + PSRLL $0x0e, X2 + PXOR X2, X0 + MOVOA 320(R12), X1 + MOVOA X0, 320(R12) + MOVOA X8, X0 + PADDL X14, X0 + MOVOA X0, X2 + PSLLL $0x12, X0 + PXOR X0, X12 + PSRLL $0x0e, X2 + PXOR X2, X12 + MOVOA X11, X0 + PADDL X1, X0 + MOVOA X0, X2 + PSLLL $0x07, X0 + PXOR X0, X6 + PSRLL $0x19, X2 + PXOR X2, X6 + MOVOA 336(R12), X2 + MOVOA X12, 336(R12) + MOVOA X3, X0 + PADDL X2, X0 + MOVOA X0, X12 + PSLLL $0x07, X0 + PXOR X0, X13 + PSRLL $0x19, X12 + PXOR X12, X13 + MOVOA X1, X0 + PADDL X6, X0 + MOVOA X0, X12 + PSLLL $0x09, X0 + PXOR X0, X15 + PSRLL $0x17, X12 + PXOR X12, X15 + MOVOA X2, X0 + PADDL X13, X0 + MOVOA X0, X12 + PSLLL $0x09, X0 + PXOR X0, X9 + PSRLL $0x17, X12 + PXOR X12, X9 + MOVOA X6, X0 + PADDL X15, X0 + MOVOA X0, X12 + PSLLL $0x0d, X0 + PXOR X0, X11 + PSRLL $0x13, X12 + PXOR X12, X11 + MOVOA X13, X0 + PADDL X9, X0 + MOVOA X0, X12 + PSLLL $0x0d, X0 + PXOR X0, X3 + PSRLL $0x13, X12 + PXOR X12, X3 + MOVOA X15, X0 + PADDL X11, X0 + MOVOA X0, X12 + PSLLL $0x12, X0 + PXOR X0, X1 + PSRLL $0x0e, X12 + PXOR X12, X1 + MOVOA X9, X0 + PADDL X3, X0 + MOVOA X0, X12 + PSLLL $0x12, X0 + PXOR X0, X2 + PSRLL $0x0e, X12 + PXOR X12, X2 + MOVOA 320(R12), X12 + MOVOA 336(R12), X0 + SUBQ $0x02, DX + JA MAINLOOP1 + PADDL 112(R12), X12 + PADDL 176(R12), X7 + PADDL 224(R12), X10 + PADDL 272(R12), X4 + MOVD X12, DX + MOVD X7, CX + MOVD X10, R8 + MOVD X4, R9 + PSHUFL $0x39, X12, X12 + PSHUFL $0x39, X7, X7 + PSHUFL $0x39, X10, X10 + PSHUFL $0x39, X4, X4 + XORL (SI), DX + XORL 4(SI), CX + XORL 8(SI), R8 + XORL 12(SI), R9 + MOVL DX, (DI) + MOVL CX, 4(DI) + MOVL R8, 8(DI) + MOVL R9, 12(DI) + MOVD X12, DX + MOVD X7, CX + MOVD X10, R8 + MOVD X4, R9 + PSHUFL $0x39, X12, X12 + PSHUFL $0x39, X7, X7 + PSHUFL $0x39, X10, X10 + PSHUFL $0x39, X4, X4 + XORL 64(SI), DX + XORL 68(SI), CX + XORL 72(SI), R8 + XORL 76(SI), R9 + MOVL DX, 64(DI) + MOVL CX, 68(DI) + MOVL R8, 72(DI) + MOVL R9, 76(DI) + MOVD X12, DX + MOVD X7, CX + MOVD X10, R8 + MOVD X4, R9 + PSHUFL $0x39, X12, X12 + PSHUFL $0x39, X7, X7 + PSHUFL $0x39, X10, X10 + PSHUFL $0x39, X4, X4 + XORL 128(SI), DX + XORL 132(SI), CX + XORL 136(SI), R8 + XORL 140(SI), R9 + MOVL DX, 128(DI) + MOVL CX, 132(DI) + MOVL R8, 136(DI) + MOVL R9, 140(DI) + MOVD X12, DX + MOVD X7, CX + MOVD X10, R8 + MOVD X4, R9 + XORL 192(SI), DX + XORL 196(SI), CX + XORL 200(SI), R8 + XORL 204(SI), R9 + MOVL DX, 192(DI) + MOVL CX, 196(DI) + MOVL R8, 200(DI) + MOVL R9, 204(DI) + PADDL 240(R12), X14 + PADDL 64(R12), X0 + PADDL 128(R12), X5 + PADDL 192(R12), X8 + MOVD X14, DX + MOVD X0, CX + MOVD X5, R8 + MOVD X8, R9 + PSHUFL $0x39, X14, X14 + PSHUFL $0x39, X0, X0 + PSHUFL $0x39, X5, X5 + PSHUFL $0x39, X8, X8 + XORL 16(SI), DX + XORL 20(SI), CX + XORL 24(SI), R8 + XORL 28(SI), R9 + MOVL DX, 16(DI) + MOVL CX, 20(DI) + MOVL R8, 24(DI) + MOVL R9, 28(DI) + MOVD X14, DX + MOVD X0, CX + MOVD X5, R8 + MOVD X8, R9 + PSHUFL $0x39, X14, X14 + PSHUFL $0x39, X0, X0 + PSHUFL $0x39, X5, X5 + PSHUFL $0x39, X8, X8 + XORL 80(SI), DX + XORL 84(SI), CX + XORL 88(SI), R8 + XORL 92(SI), R9 + MOVL DX, 80(DI) + MOVL CX, 84(DI) + MOVL R8, 88(DI) + MOVL R9, 92(DI) + MOVD X14, DX + MOVD X0, CX + MOVD X5, R8 + MOVD X8, R9 + PSHUFL $0x39, X14, X14 + PSHUFL $0x39, X0, X0 + PSHUFL $0x39, X5, X5 + PSHUFL $0x39, X8, X8 + XORL 144(SI), DX + XORL 148(SI), CX + XORL 152(SI), R8 + XORL 156(SI), R9 + MOVL DX, 144(DI) + MOVL CX, 148(DI) + MOVL R8, 152(DI) + MOVL R9, 156(DI) + MOVD X14, DX + MOVD X0, CX + MOVD X5, R8 + MOVD X8, R9 + XORL 208(SI), DX + XORL 212(SI), CX + XORL 216(SI), R8 + XORL 220(SI), R9 + MOVL DX, 208(DI) + MOVL CX, 212(DI) + MOVL R8, 216(DI) + MOVL R9, 220(DI) + PADDL 288(R12), X15 + PADDL 304(R12), X11 + PADDL 80(R12), X1 + PADDL 144(R12), X6 + MOVD X15, DX + MOVD X11, CX + MOVD X1, R8 + MOVD X6, R9 + PSHUFL $0x39, X15, X15 + PSHUFL $0x39, X11, X11 + PSHUFL $0x39, X1, X1 + PSHUFL $0x39, X6, X6 + XORL 32(SI), DX + XORL 36(SI), CX + XORL 40(SI), R8 + XORL 44(SI), R9 + MOVL DX, 32(DI) + MOVL CX, 36(DI) + MOVL R8, 40(DI) + MOVL R9, 44(DI) + MOVD X15, DX + MOVD X11, CX + MOVD X1, R8 + MOVD X6, R9 + PSHUFL $0x39, X15, X15 + PSHUFL $0x39, X11, X11 + PSHUFL $0x39, X1, X1 + PSHUFL $0x39, X6, X6 + XORL 96(SI), DX + XORL 100(SI), CX + XORL 104(SI), R8 + XORL 108(SI), R9 + MOVL DX, 96(DI) + MOVL CX, 100(DI) + MOVL R8, 104(DI) + MOVL R9, 108(DI) + MOVD X15, DX + MOVD X11, CX + MOVD X1, R8 + MOVD X6, R9 + PSHUFL $0x39, X15, X15 + PSHUFL $0x39, X11, X11 + PSHUFL $0x39, X1, X1 + PSHUFL $0x39, X6, X6 + XORL 160(SI), DX + XORL 164(SI), CX + XORL 168(SI), R8 + XORL 172(SI), R9 + MOVL DX, 160(DI) + MOVL CX, 164(DI) + MOVL R8, 168(DI) + MOVL R9, 172(DI) + MOVD X15, DX + MOVD X11, CX + MOVD X1, R8 + MOVD X6, R9 + XORL 224(SI), DX + XORL 228(SI), CX + XORL 232(SI), R8 + XORL 236(SI), R9 + MOVL DX, 224(DI) + MOVL CX, 228(DI) + MOVL R8, 232(DI) + MOVL R9, 236(DI) + PADDL 160(R12), X13 + PADDL 208(R12), X9 + PADDL 256(R12), X3 + PADDL 96(R12), X2 + MOVD X13, DX + MOVD X9, CX + MOVD X3, R8 + MOVD X2, R9 + PSHUFL $0x39, X13, X13 + PSHUFL $0x39, X9, X9 + PSHUFL $0x39, X3, X3 + PSHUFL $0x39, X2, X2 + XORL 48(SI), DX + XORL 52(SI), CX + XORL 56(SI), R8 + XORL 60(SI), R9 + MOVL DX, 48(DI) + MOVL CX, 52(DI) + MOVL R8, 56(DI) + MOVL R9, 60(DI) + MOVD X13, DX + MOVD X9, CX + MOVD X3, R8 + MOVD X2, R9 + PSHUFL $0x39, X13, X13 + PSHUFL $0x39, X9, X9 + PSHUFL $0x39, X3, X3 + PSHUFL $0x39, X2, X2 + XORL 112(SI), DX + XORL 116(SI), CX + XORL 120(SI), R8 + XORL 124(SI), R9 + MOVL DX, 112(DI) + MOVL CX, 116(DI) + MOVL R8, 120(DI) + MOVL R9, 124(DI) + MOVD X13, DX + MOVD X9, CX + MOVD X3, R8 + MOVD X2, R9 + PSHUFL $0x39, X13, X13 + PSHUFL $0x39, X9, X9 + PSHUFL $0x39, X3, X3 + PSHUFL $0x39, X2, X2 + XORL 176(SI), DX + XORL 180(SI), CX + XORL 184(SI), R8 + XORL 188(SI), R9 + MOVL DX, 176(DI) + MOVL CX, 180(DI) + MOVL R8, 184(DI) + MOVL R9, 188(DI) + MOVD X13, DX + MOVD X9, CX + MOVD X3, R8 + MOVD X2, R9 + XORL 240(SI), DX + XORL 244(SI), CX + XORL 248(SI), R8 + XORL 252(SI), R9 + MOVL DX, 240(DI) + MOVL CX, 244(DI) + MOVL R8, 248(DI) + MOVL R9, 252(DI) + MOVQ 352(R12), R9 + SUBQ $0x00000100, R9 + ADDQ $0x00000100, SI + ADDQ $0x00000100, DI + CMPQ R9, $0x00000100 + JAE BYTESATLEAST256 + CMPQ R9, $0x00 + JBE DONE - MOVQ DX,R9 - MOVQ CX,DX - MOVQ R8,R10 - CMPQ R9,$0 - JBE DONE - START: - MOVL 20(R10),CX - MOVL 0(R10),R8 - MOVL 0(DX),AX - MOVL 16(R10),R11 - MOVL CX,0(R12) - MOVL R8, 4 (R12) - MOVL AX, 8 (R12) - MOVL R11, 12 (R12) - MOVL 8(DX),CX - MOVL 24(R10),R8 - MOVL 4(R10),AX - MOVL 4(DX),R11 - MOVL CX,16(R12) - MOVL R8, 20 (R12) - MOVL AX, 24 (R12) - MOVL R11, 28 (R12) - MOVL 12(DX),CX - MOVL 12(R10),DX - MOVL 28(R10),R8 - MOVL 8(R10),AX - MOVL DX,32(R12) - MOVL CX, 36 (R12) - MOVL R8, 40 (R12) - MOVL AX, 44 (R12) - MOVQ $1634760805,DX - MOVQ $857760878,CX - MOVQ $2036477234,R8 - MOVQ $1797285236,AX - MOVL DX,48(R12) - MOVL CX, 52 (R12) - MOVL R8, 56 (R12) - MOVL AX, 60 (R12) - CMPQ R9,$256 - JB BYTESBETWEEN1AND255 - MOVOA 48(R12),X0 - PSHUFL $0X55,X0,X1 - PSHUFL $0XAA,X0,X2 - PSHUFL $0XFF,X0,X3 - PSHUFL $0X00,X0,X0 - MOVOA X1,64(R12) - MOVOA X2,80(R12) - MOVOA X3,96(R12) - MOVOA X0,112(R12) - MOVOA 0(R12),X0 - PSHUFL $0XAA,X0,X1 - PSHUFL $0XFF,X0,X2 - PSHUFL $0X00,X0,X3 - PSHUFL $0X55,X0,X0 - MOVOA X1,128(R12) - MOVOA X2,144(R12) - MOVOA X3,160(R12) - MOVOA X0,176(R12) - MOVOA 16(R12),X0 - PSHUFL $0XFF,X0,X1 - PSHUFL $0X55,X0,X2 - PSHUFL $0XAA,X0,X0 - MOVOA X1,192(R12) - MOVOA X2,208(R12) - MOVOA X0,224(R12) - MOVOA 32(R12),X0 - PSHUFL $0X00,X0,X1 - PSHUFL $0XAA,X0,X2 - PSHUFL $0XFF,X0,X0 - MOVOA X1,240(R12) - MOVOA X2,256(R12) - MOVOA X0,272(R12) - BYTESATLEAST256: - MOVL 16(R12),DX - MOVL 36 (R12),CX - MOVL DX,288(R12) - MOVL CX,304(R12) - SHLQ $32,CX - ADDQ CX,DX - ADDQ $1,DX - MOVQ DX,CX - SHRQ $32,CX - MOVL DX, 292 (R12) - MOVL CX, 308 (R12) - ADDQ $1,DX - MOVQ DX,CX - SHRQ $32,CX - MOVL DX, 296 (R12) - MOVL CX, 312 (R12) - ADDQ $1,DX - MOVQ DX,CX - SHRQ $32,CX - MOVL DX, 300 (R12) - MOVL CX, 316 (R12) - ADDQ $1,DX - MOVQ DX,CX - SHRQ $32,CX - MOVL DX,16(R12) - MOVL CX, 36 (R12) - MOVQ R9,352(R12) - MOVQ $20,DX - MOVOA 64(R12),X0 - MOVOA 80(R12),X1 - MOVOA 96(R12),X2 - MOVOA 256(R12),X3 - MOVOA 272(R12),X4 - MOVOA 128(R12),X5 - MOVOA 144(R12),X6 - MOVOA 176(R12),X7 - MOVOA 192(R12),X8 - MOVOA 208(R12),X9 - MOVOA 224(R12),X10 - MOVOA 304(R12),X11 - MOVOA 112(R12),X12 - MOVOA 160(R12),X13 - MOVOA 240(R12),X14 - MOVOA 288(R12),X15 - MAINLOOP1: - MOVOA X1,320(R12) - MOVOA X2,336(R12) - MOVOA X13,X1 - PADDL X12,X1 - MOVOA X1,X2 - PSLLL $7,X1 - PXOR X1,X14 - PSRLL $25,X2 - PXOR X2,X14 - MOVOA X7,X1 - PADDL X0,X1 - MOVOA X1,X2 - PSLLL $7,X1 - PXOR X1,X11 - PSRLL $25,X2 - PXOR X2,X11 - MOVOA X12,X1 - PADDL X14,X1 - MOVOA X1,X2 - PSLLL $9,X1 - PXOR X1,X15 - PSRLL $23,X2 - PXOR X2,X15 - MOVOA X0,X1 - PADDL X11,X1 - MOVOA X1,X2 - PSLLL $9,X1 - PXOR X1,X9 - PSRLL $23,X2 - PXOR X2,X9 - MOVOA X14,X1 - PADDL X15,X1 - MOVOA X1,X2 - PSLLL $13,X1 - PXOR X1,X13 - PSRLL $19,X2 - PXOR X2,X13 - MOVOA X11,X1 - PADDL X9,X1 - MOVOA X1,X2 - PSLLL $13,X1 - PXOR X1,X7 - PSRLL $19,X2 - PXOR X2,X7 - MOVOA X15,X1 - PADDL X13,X1 - MOVOA X1,X2 - PSLLL $18,X1 - PXOR X1,X12 - PSRLL $14,X2 - PXOR X2,X12 - MOVOA 320(R12),X1 - MOVOA X12,320(R12) - MOVOA X9,X2 - PADDL X7,X2 - MOVOA X2,X12 - PSLLL $18,X2 - PXOR X2,X0 - PSRLL $14,X12 - PXOR X12,X0 - MOVOA X5,X2 - PADDL X1,X2 - MOVOA X2,X12 - PSLLL $7,X2 - PXOR X2,X3 - PSRLL $25,X12 - PXOR X12,X3 - MOVOA 336(R12),X2 - MOVOA X0,336(R12) - MOVOA X6,X0 - PADDL X2,X0 - MOVOA X0,X12 - PSLLL $7,X0 - PXOR X0,X4 - PSRLL $25,X12 - PXOR X12,X4 - MOVOA X1,X0 - PADDL X3,X0 - MOVOA X0,X12 - PSLLL $9,X0 - PXOR X0,X10 - PSRLL $23,X12 - PXOR X12,X10 - MOVOA X2,X0 - PADDL X4,X0 - MOVOA X0,X12 - PSLLL $9,X0 - PXOR X0,X8 - PSRLL $23,X12 - PXOR X12,X8 - MOVOA X3,X0 - PADDL X10,X0 - MOVOA X0,X12 - PSLLL $13,X0 - PXOR X0,X5 - PSRLL $19,X12 - PXOR X12,X5 - MOVOA X4,X0 - PADDL X8,X0 - MOVOA X0,X12 - PSLLL $13,X0 - PXOR X0,X6 - PSRLL $19,X12 - PXOR X12,X6 - MOVOA X10,X0 - PADDL X5,X0 - MOVOA X0,X12 - PSLLL $18,X0 - PXOR X0,X1 - PSRLL $14,X12 - PXOR X12,X1 - MOVOA 320(R12),X0 - MOVOA X1,320(R12) - MOVOA X4,X1 - PADDL X0,X1 - MOVOA X1,X12 - PSLLL $7,X1 - PXOR X1,X7 - PSRLL $25,X12 - PXOR X12,X7 - MOVOA X8,X1 - PADDL X6,X1 - MOVOA X1,X12 - PSLLL $18,X1 - PXOR X1,X2 - PSRLL $14,X12 - PXOR X12,X2 - MOVOA 336(R12),X12 - MOVOA X2,336(R12) - MOVOA X14,X1 - PADDL X12,X1 - MOVOA X1,X2 - PSLLL $7,X1 - PXOR X1,X5 - PSRLL $25,X2 - PXOR X2,X5 - MOVOA X0,X1 - PADDL X7,X1 - MOVOA X1,X2 - PSLLL $9,X1 - PXOR X1,X10 - PSRLL $23,X2 - PXOR X2,X10 - MOVOA X12,X1 - PADDL X5,X1 - MOVOA X1,X2 - PSLLL $9,X1 - PXOR X1,X8 - PSRLL $23,X2 - PXOR X2,X8 - MOVOA X7,X1 - PADDL X10,X1 - MOVOA X1,X2 - PSLLL $13,X1 - PXOR X1,X4 - PSRLL $19,X2 - PXOR X2,X4 - MOVOA X5,X1 - PADDL X8,X1 - MOVOA X1,X2 - PSLLL $13,X1 - PXOR X1,X14 - PSRLL $19,X2 - PXOR X2,X14 - MOVOA X10,X1 - PADDL X4,X1 - MOVOA X1,X2 - PSLLL $18,X1 - PXOR X1,X0 - PSRLL $14,X2 - PXOR X2,X0 - MOVOA 320(R12),X1 - MOVOA X0,320(R12) - MOVOA X8,X0 - PADDL X14,X0 - MOVOA X0,X2 - PSLLL $18,X0 - PXOR X0,X12 - PSRLL $14,X2 - PXOR X2,X12 - MOVOA X11,X0 - PADDL X1,X0 - MOVOA X0,X2 - PSLLL $7,X0 - PXOR X0,X6 - PSRLL $25,X2 - PXOR X2,X6 - MOVOA 336(R12),X2 - MOVOA X12,336(R12) - MOVOA X3,X0 - PADDL X2,X0 - MOVOA X0,X12 - PSLLL $7,X0 - PXOR X0,X13 - PSRLL $25,X12 - PXOR X12,X13 - MOVOA X1,X0 - PADDL X6,X0 - MOVOA X0,X12 - PSLLL $9,X0 - PXOR X0,X15 - PSRLL $23,X12 - PXOR X12,X15 - MOVOA X2,X0 - PADDL X13,X0 - MOVOA X0,X12 - PSLLL $9,X0 - PXOR X0,X9 - PSRLL $23,X12 - PXOR X12,X9 - MOVOA X6,X0 - PADDL X15,X0 - MOVOA X0,X12 - PSLLL $13,X0 - PXOR X0,X11 - PSRLL $19,X12 - PXOR X12,X11 - MOVOA X13,X0 - PADDL X9,X0 - MOVOA X0,X12 - PSLLL $13,X0 - PXOR X0,X3 - PSRLL $19,X12 - PXOR X12,X3 - MOVOA X15,X0 - PADDL X11,X0 - MOVOA X0,X12 - PSLLL $18,X0 - PXOR X0,X1 - PSRLL $14,X12 - PXOR X12,X1 - MOVOA X9,X0 - PADDL X3,X0 - MOVOA X0,X12 - PSLLL $18,X0 - PXOR X0,X2 - PSRLL $14,X12 - PXOR X12,X2 - MOVOA 320(R12),X12 - MOVOA 336(R12),X0 - SUBQ $2,DX - JA MAINLOOP1 - PADDL 112(R12),X12 - PADDL 176(R12),X7 - PADDL 224(R12),X10 - PADDL 272(R12),X4 - MOVD X12,DX - MOVD X7,CX - MOVD X10,R8 - MOVD X4,R9 - PSHUFL $0X39,X12,X12 - PSHUFL $0X39,X7,X7 - PSHUFL $0X39,X10,X10 - PSHUFL $0X39,X4,X4 - XORL 0(SI),DX - XORL 4(SI),CX - XORL 8(SI),R8 - XORL 12(SI),R9 - MOVL DX,0(DI) - MOVL CX,4(DI) - MOVL R8,8(DI) - MOVL R9,12(DI) - MOVD X12,DX - MOVD X7,CX - MOVD X10,R8 - MOVD X4,R9 - PSHUFL $0X39,X12,X12 - PSHUFL $0X39,X7,X7 - PSHUFL $0X39,X10,X10 - PSHUFL $0X39,X4,X4 - XORL 64(SI),DX - XORL 68(SI),CX - XORL 72(SI),R8 - XORL 76(SI),R9 - MOVL DX,64(DI) - MOVL CX,68(DI) - MOVL R8,72(DI) - MOVL R9,76(DI) - MOVD X12,DX - MOVD X7,CX - MOVD X10,R8 - MOVD X4,R9 - PSHUFL $0X39,X12,X12 - PSHUFL $0X39,X7,X7 - PSHUFL $0X39,X10,X10 - PSHUFL $0X39,X4,X4 - XORL 128(SI),DX - XORL 132(SI),CX - XORL 136(SI),R8 - XORL 140(SI),R9 - MOVL DX,128(DI) - MOVL CX,132(DI) - MOVL R8,136(DI) - MOVL R9,140(DI) - MOVD X12,DX - MOVD X7,CX - MOVD X10,R8 - MOVD X4,R9 - XORL 192(SI),DX - XORL 196(SI),CX - XORL 200(SI),R8 - XORL 204(SI),R9 - MOVL DX,192(DI) - MOVL CX,196(DI) - MOVL R8,200(DI) - MOVL R9,204(DI) - PADDL 240(R12),X14 - PADDL 64(R12),X0 - PADDL 128(R12),X5 - PADDL 192(R12),X8 - MOVD X14,DX - MOVD X0,CX - MOVD X5,R8 - MOVD X8,R9 - PSHUFL $0X39,X14,X14 - PSHUFL $0X39,X0,X0 - PSHUFL $0X39,X5,X5 - PSHUFL $0X39,X8,X8 - XORL 16(SI),DX - XORL 20(SI),CX - XORL 24(SI),R8 - XORL 28(SI),R9 - MOVL DX,16(DI) - MOVL CX,20(DI) - MOVL R8,24(DI) - MOVL R9,28(DI) - MOVD X14,DX - MOVD X0,CX - MOVD X5,R8 - MOVD X8,R9 - PSHUFL $0X39,X14,X14 - PSHUFL $0X39,X0,X0 - PSHUFL $0X39,X5,X5 - PSHUFL $0X39,X8,X8 - XORL 80(SI),DX - XORL 84(SI),CX - XORL 88(SI),R8 - XORL 92(SI),R9 - MOVL DX,80(DI) - MOVL CX,84(DI) - MOVL R8,88(DI) - MOVL R9,92(DI) - MOVD X14,DX - MOVD X0,CX - MOVD X5,R8 - MOVD X8,R9 - PSHUFL $0X39,X14,X14 - PSHUFL $0X39,X0,X0 - PSHUFL $0X39,X5,X5 - PSHUFL $0X39,X8,X8 - XORL 144(SI),DX - XORL 148(SI),CX - XORL 152(SI),R8 - XORL 156(SI),R9 - MOVL DX,144(DI) - MOVL CX,148(DI) - MOVL R8,152(DI) - MOVL R9,156(DI) - MOVD X14,DX - MOVD X0,CX - MOVD X5,R8 - MOVD X8,R9 - XORL 208(SI),DX - XORL 212(SI),CX - XORL 216(SI),R8 - XORL 220(SI),R9 - MOVL DX,208(DI) - MOVL CX,212(DI) - MOVL R8,216(DI) - MOVL R9,220(DI) - PADDL 288(R12),X15 - PADDL 304(R12),X11 - PADDL 80(R12),X1 - PADDL 144(R12),X6 - MOVD X15,DX - MOVD X11,CX - MOVD X1,R8 - MOVD X6,R9 - PSHUFL $0X39,X15,X15 - PSHUFL $0X39,X11,X11 - PSHUFL $0X39,X1,X1 - PSHUFL $0X39,X6,X6 - XORL 32(SI),DX - XORL 36(SI),CX - XORL 40(SI),R8 - XORL 44(SI),R9 - MOVL DX,32(DI) - MOVL CX,36(DI) - MOVL R8,40(DI) - MOVL R9,44(DI) - MOVD X15,DX - MOVD X11,CX - MOVD X1,R8 - MOVD X6,R9 - PSHUFL $0X39,X15,X15 - PSHUFL $0X39,X11,X11 - PSHUFL $0X39,X1,X1 - PSHUFL $0X39,X6,X6 - XORL 96(SI),DX - XORL 100(SI),CX - XORL 104(SI),R8 - XORL 108(SI),R9 - MOVL DX,96(DI) - MOVL CX,100(DI) - MOVL R8,104(DI) - MOVL R9,108(DI) - MOVD X15,DX - MOVD X11,CX - MOVD X1,R8 - MOVD X6,R9 - PSHUFL $0X39,X15,X15 - PSHUFL $0X39,X11,X11 - PSHUFL $0X39,X1,X1 - PSHUFL $0X39,X6,X6 - XORL 160(SI),DX - XORL 164(SI),CX - XORL 168(SI),R8 - XORL 172(SI),R9 - MOVL DX,160(DI) - MOVL CX,164(DI) - MOVL R8,168(DI) - MOVL R9,172(DI) - MOVD X15,DX - MOVD X11,CX - MOVD X1,R8 - MOVD X6,R9 - XORL 224(SI),DX - XORL 228(SI),CX - XORL 232(SI),R8 - XORL 236(SI),R9 - MOVL DX,224(DI) - MOVL CX,228(DI) - MOVL R8,232(DI) - MOVL R9,236(DI) - PADDL 160(R12),X13 - PADDL 208(R12),X9 - PADDL 256(R12),X3 - PADDL 96(R12),X2 - MOVD X13,DX - MOVD X9,CX - MOVD X3,R8 - MOVD X2,R9 - PSHUFL $0X39,X13,X13 - PSHUFL $0X39,X9,X9 - PSHUFL $0X39,X3,X3 - PSHUFL $0X39,X2,X2 - XORL 48(SI),DX - XORL 52(SI),CX - XORL 56(SI),R8 - XORL 60(SI),R9 - MOVL DX,48(DI) - MOVL CX,52(DI) - MOVL R8,56(DI) - MOVL R9,60(DI) - MOVD X13,DX - MOVD X9,CX - MOVD X3,R8 - MOVD X2,R9 - PSHUFL $0X39,X13,X13 - PSHUFL $0X39,X9,X9 - PSHUFL $0X39,X3,X3 - PSHUFL $0X39,X2,X2 - XORL 112(SI),DX - XORL 116(SI),CX - XORL 120(SI),R8 - XORL 124(SI),R9 - MOVL DX,112(DI) - MOVL CX,116(DI) - MOVL R8,120(DI) - MOVL R9,124(DI) - MOVD X13,DX - MOVD X9,CX - MOVD X3,R8 - MOVD X2,R9 - PSHUFL $0X39,X13,X13 - PSHUFL $0X39,X9,X9 - PSHUFL $0X39,X3,X3 - PSHUFL $0X39,X2,X2 - XORL 176(SI),DX - XORL 180(SI),CX - XORL 184(SI),R8 - XORL 188(SI),R9 - MOVL DX,176(DI) - MOVL CX,180(DI) - MOVL R8,184(DI) - MOVL R9,188(DI) - MOVD X13,DX - MOVD X9,CX - MOVD X3,R8 - MOVD X2,R9 - XORL 240(SI),DX - XORL 244(SI),CX - XORL 248(SI),R8 - XORL 252(SI),R9 - MOVL DX,240(DI) - MOVL CX,244(DI) - MOVL R8,248(DI) - MOVL R9,252(DI) - MOVQ 352(R12),R9 - SUBQ $256,R9 - ADDQ $256,SI - ADDQ $256,DI - CMPQ R9,$256 - JAE BYTESATLEAST256 - CMPQ R9,$0 - JBE DONE - BYTESBETWEEN1AND255: - CMPQ R9,$64 - JAE NOCOPY - MOVQ DI,DX - LEAQ 360(R12),DI - MOVQ R9,CX +BYTESBETWEEN1AND255: + CMPQ R9, $0x40 + JAE NOCOPY + MOVQ DI, DX + LEAQ 360(R12), DI + MOVQ R9, CX REP; MOVSB - LEAQ 360(R12),DI - LEAQ 360(R12),SI - NOCOPY: - MOVQ R9,352(R12) - MOVOA 48(R12),X0 - MOVOA 0(R12),X1 - MOVOA 16(R12),X2 - MOVOA 32(R12),X3 - MOVOA X1,X4 - MOVQ $20,CX - MAINLOOP2: - PADDL X0,X4 - MOVOA X0,X5 - MOVOA X4,X6 - PSLLL $7,X4 - PSRLL $25,X6 - PXOR X4,X3 - PXOR X6,X3 - PADDL X3,X5 - MOVOA X3,X4 - MOVOA X5,X6 - PSLLL $9,X5 - PSRLL $23,X6 - PXOR X5,X2 - PSHUFL $0X93,X3,X3 - PXOR X6,X2 - PADDL X2,X4 - MOVOA X2,X5 - MOVOA X4,X6 - PSLLL $13,X4 - PSRLL $19,X6 - PXOR X4,X1 - PSHUFL $0X4E,X2,X2 - PXOR X6,X1 - PADDL X1,X5 - MOVOA X3,X4 - MOVOA X5,X6 - PSLLL $18,X5 - PSRLL $14,X6 - PXOR X5,X0 - PSHUFL $0X39,X1,X1 - PXOR X6,X0 - PADDL X0,X4 - MOVOA X0,X5 - MOVOA X4,X6 - PSLLL $7,X4 - PSRLL $25,X6 - PXOR X4,X1 - PXOR X6,X1 - PADDL X1,X5 - MOVOA X1,X4 - MOVOA X5,X6 - PSLLL $9,X5 - PSRLL $23,X6 - PXOR X5,X2 - PSHUFL $0X93,X1,X1 - PXOR X6,X2 - PADDL X2,X4 - MOVOA X2,X5 - MOVOA X4,X6 - PSLLL $13,X4 - PSRLL $19,X6 - PXOR X4,X3 - PSHUFL $0X4E,X2,X2 - PXOR X6,X3 - PADDL X3,X5 - MOVOA X1,X4 - MOVOA X5,X6 - PSLLL $18,X5 - PSRLL $14,X6 - PXOR X5,X0 - PSHUFL $0X39,X3,X3 - PXOR X6,X0 - PADDL X0,X4 - MOVOA X0,X5 - MOVOA X4,X6 - PSLLL $7,X4 - PSRLL $25,X6 - PXOR X4,X3 - PXOR X6,X3 - PADDL X3,X5 - MOVOA X3,X4 - MOVOA X5,X6 - PSLLL $9,X5 - PSRLL $23,X6 - PXOR X5,X2 - PSHUFL $0X93,X3,X3 - PXOR X6,X2 - PADDL X2,X4 - MOVOA X2,X5 - MOVOA X4,X6 - PSLLL $13,X4 - PSRLL $19,X6 - PXOR X4,X1 - PSHUFL $0X4E,X2,X2 - PXOR X6,X1 - PADDL X1,X5 - MOVOA X3,X4 - MOVOA X5,X6 - PSLLL $18,X5 - PSRLL $14,X6 - PXOR X5,X0 - PSHUFL $0X39,X1,X1 - PXOR X6,X0 - PADDL X0,X4 - MOVOA X0,X5 - MOVOA X4,X6 - PSLLL $7,X4 - PSRLL $25,X6 - PXOR X4,X1 - PXOR X6,X1 - PADDL X1,X5 - MOVOA X1,X4 - MOVOA X5,X6 - PSLLL $9,X5 - PSRLL $23,X6 - PXOR X5,X2 - PSHUFL $0X93,X1,X1 - PXOR X6,X2 - PADDL X2,X4 - MOVOA X2,X5 - MOVOA X4,X6 - PSLLL $13,X4 - PSRLL $19,X6 - PXOR X4,X3 - PSHUFL $0X4E,X2,X2 - PXOR X6,X3 - SUBQ $4,CX - PADDL X3,X5 - MOVOA X1,X4 - MOVOA X5,X6 - PSLLL $18,X5 - PXOR X7,X7 - PSRLL $14,X6 - PXOR X5,X0 - PSHUFL $0X39,X3,X3 - PXOR X6,X0 - JA MAINLOOP2 - PADDL 48(R12),X0 - PADDL 0(R12),X1 - PADDL 16(R12),X2 - PADDL 32(R12),X3 - MOVD X0,CX - MOVD X1,R8 - MOVD X2,R9 - MOVD X3,AX - PSHUFL $0X39,X0,X0 - PSHUFL $0X39,X1,X1 - PSHUFL $0X39,X2,X2 - PSHUFL $0X39,X3,X3 - XORL 0(SI),CX - XORL 48(SI),R8 - XORL 32(SI),R9 - XORL 16(SI),AX - MOVL CX,0(DI) - MOVL R8,48(DI) - MOVL R9,32(DI) - MOVL AX,16(DI) - MOVD X0,CX - MOVD X1,R8 - MOVD X2,R9 - MOVD X3,AX - PSHUFL $0X39,X0,X0 - PSHUFL $0X39,X1,X1 - PSHUFL $0X39,X2,X2 - PSHUFL $0X39,X3,X3 - XORL 20(SI),CX - XORL 4(SI),R8 - XORL 52(SI),R9 - XORL 36(SI),AX - MOVL CX,20(DI) - MOVL R8,4(DI) - MOVL R9,52(DI) - MOVL AX,36(DI) - MOVD X0,CX - MOVD X1,R8 - MOVD X2,R9 - MOVD X3,AX - PSHUFL $0X39,X0,X0 - PSHUFL $0X39,X1,X1 - PSHUFL $0X39,X2,X2 - PSHUFL $0X39,X3,X3 - XORL 40(SI),CX - XORL 24(SI),R8 - XORL 8(SI),R9 - XORL 56(SI),AX - MOVL CX,40(DI) - MOVL R8,24(DI) - MOVL R9,8(DI) - MOVL AX,56(DI) - MOVD X0,CX - MOVD X1,R8 - MOVD X2,R9 - MOVD X3,AX - XORL 60(SI),CX - XORL 44(SI),R8 - XORL 28(SI),R9 - XORL 12(SI),AX - MOVL CX,60(DI) - MOVL R8,44(DI) - MOVL R9,28(DI) - MOVL AX,12(DI) - MOVQ 352(R12),R9 - MOVL 16(R12),CX - MOVL 36 (R12),R8 - ADDQ $1,CX - SHLQ $32,R8 - ADDQ R8,CX - MOVQ CX,R8 - SHRQ $32,R8 - MOVL CX,16(R12) - MOVL R8, 36 (R12) - CMPQ R9,$64 - JA BYTESATLEAST65 - JAE BYTESATLEAST64 - MOVQ DI,SI - MOVQ DX,DI - MOVQ R9,CX + LEAQ 360(R12), DI + LEAQ 360(R12), SI + +NOCOPY: + MOVQ R9, 352(R12) + MOVOA 48(R12), X0 + MOVOA (R12), X1 + MOVOA 16(R12), X2 + MOVOA 32(R12), X3 + MOVOA X1, X4 + MOVQ $0x00000014, CX + +MAINLOOP2: + PADDL X0, X4 + MOVOA X0, X5 + MOVOA X4, X6 + PSLLL $0x07, X4 + PSRLL $0x19, X6 + PXOR X4, X3 + PXOR X6, X3 + PADDL X3, X5 + MOVOA X3, X4 + MOVOA X5, X6 + PSLLL $0x09, X5 + PSRLL $0x17, X6 + PXOR X5, X2 + PSHUFL $0x93, X3, X3 + PXOR X6, X2 + PADDL X2, X4 + MOVOA X2, X5 + MOVOA X4, X6 + PSLLL $0x0d, X4 + PSRLL $0x13, X6 + PXOR X4, X1 + PSHUFL $0x4e, X2, X2 + PXOR X6, X1 + PADDL X1, X5 + MOVOA X3, X4 + MOVOA X5, X6 + PSLLL $0x12, X5 + PSRLL $0x0e, X6 + PXOR X5, X0 + PSHUFL $0x39, X1, X1 + PXOR X6, X0 + PADDL X0, X4 + MOVOA X0, X5 + MOVOA X4, X6 + PSLLL $0x07, X4 + PSRLL $0x19, X6 + PXOR X4, X1 + PXOR X6, X1 + PADDL X1, X5 + MOVOA X1, X4 + MOVOA X5, X6 + PSLLL $0x09, X5 + PSRLL $0x17, X6 + PXOR X5, X2 + PSHUFL $0x93, X1, X1 + PXOR X6, X2 + PADDL X2, X4 + MOVOA X2, X5 + MOVOA X4, X6 + PSLLL $0x0d, X4 + PSRLL $0x13, X6 + PXOR X4, X3 + PSHUFL $0x4e, X2, X2 + PXOR X6, X3 + PADDL X3, X5 + MOVOA X1, X4 + MOVOA X5, X6 + PSLLL $0x12, X5 + PSRLL $0x0e, X6 + PXOR X5, X0 + PSHUFL $0x39, X3, X3 + PXOR X6, X0 + PADDL X0, X4 + MOVOA X0, X5 + MOVOA X4, X6 + PSLLL $0x07, X4 + PSRLL $0x19, X6 + PXOR X4, X3 + PXOR X6, X3 + PADDL X3, X5 + MOVOA X3, X4 + MOVOA X5, X6 + PSLLL $0x09, X5 + PSRLL $0x17, X6 + PXOR X5, X2 + PSHUFL $0x93, X3, X3 + PXOR X6, X2 + PADDL X2, X4 + MOVOA X2, X5 + MOVOA X4, X6 + PSLLL $0x0d, X4 + PSRLL $0x13, X6 + PXOR X4, X1 + PSHUFL $0x4e, X2, X2 + PXOR X6, X1 + PADDL X1, X5 + MOVOA X3, X4 + MOVOA X5, X6 + PSLLL $0x12, X5 + PSRLL $0x0e, X6 + PXOR X5, X0 + PSHUFL $0x39, X1, X1 + PXOR X6, X0 + PADDL X0, X4 + MOVOA X0, X5 + MOVOA X4, X6 + PSLLL $0x07, X4 + PSRLL $0x19, X6 + PXOR X4, X1 + PXOR X6, X1 + PADDL X1, X5 + MOVOA X1, X4 + MOVOA X5, X6 + PSLLL $0x09, X5 + PSRLL $0x17, X6 + PXOR X5, X2 + PSHUFL $0x93, X1, X1 + PXOR X6, X2 + PADDL X2, X4 + MOVOA X2, X5 + MOVOA X4, X6 + PSLLL $0x0d, X4 + PSRLL $0x13, X6 + PXOR X4, X3 + PSHUFL $0x4e, X2, X2 + PXOR X6, X3 + SUBQ $0x04, CX + PADDL X3, X5 + MOVOA X1, X4 + MOVOA X5, X6 + PSLLL $0x12, X5 + PXOR X7, X7 + PSRLL $0x0e, X6 + PXOR X5, X0 + PSHUFL $0x39, X3, X3 + PXOR X6, X0 + JA MAINLOOP2 + PADDL 48(R12), X0 + PADDL (R12), X1 + PADDL 16(R12), X2 + PADDL 32(R12), X3 + MOVD X0, CX + MOVD X1, R8 + MOVD X2, R9 + MOVD X3, AX + PSHUFL $0x39, X0, X0 + PSHUFL $0x39, X1, X1 + PSHUFL $0x39, X2, X2 + PSHUFL $0x39, X3, X3 + XORL (SI), CX + XORL 48(SI), R8 + XORL 32(SI), R9 + XORL 16(SI), AX + MOVL CX, (DI) + MOVL R8, 48(DI) + MOVL R9, 32(DI) + MOVL AX, 16(DI) + MOVD X0, CX + MOVD X1, R8 + MOVD X2, R9 + MOVD X3, AX + PSHUFL $0x39, X0, X0 + PSHUFL $0x39, X1, X1 + PSHUFL $0x39, X2, X2 + PSHUFL $0x39, X3, X3 + XORL 20(SI), CX + XORL 4(SI), R8 + XORL 52(SI), R9 + XORL 36(SI), AX + MOVL CX, 20(DI) + MOVL R8, 4(DI) + MOVL R9, 52(DI) + MOVL AX, 36(DI) + MOVD X0, CX + MOVD X1, R8 + MOVD X2, R9 + MOVD X3, AX + PSHUFL $0x39, X0, X0 + PSHUFL $0x39, X1, X1 + PSHUFL $0x39, X2, X2 + PSHUFL $0x39, X3, X3 + XORL 40(SI), CX + XORL 24(SI), R8 + XORL 8(SI), R9 + XORL 56(SI), AX + MOVL CX, 40(DI) + MOVL R8, 24(DI) + MOVL R9, 8(DI) + MOVL AX, 56(DI) + MOVD X0, CX + MOVD X1, R8 + MOVD X2, R9 + MOVD X3, AX + XORL 60(SI), CX + XORL 44(SI), R8 + XORL 28(SI), R9 + XORL 12(SI), AX + MOVL CX, 60(DI) + MOVL R8, 44(DI) + MOVL R9, 28(DI) + MOVL AX, 12(DI) + MOVQ 352(R12), R9 + MOVL 16(R12), CX + MOVL 36(R12), R8 + ADDQ $0x01, CX + SHLQ $0x20, R8 + ADDQ R8, CX + MOVQ CX, R8 + SHRQ $0x20, R8 + MOVL CX, 16(R12) + MOVL R8, 36(R12) + CMPQ R9, $0x40 + JA BYTESATLEAST65 + JAE BYTESATLEAST64 + MOVQ DI, SI + MOVQ DX, DI + MOVQ R9, CX REP; MOVSB - BYTESATLEAST64: - DONE: + +BYTESATLEAST64: +DONE: RET - BYTESATLEAST65: - SUBQ $64,R9 - ADDQ $64,DI - ADDQ $64,SI - JMP BYTESBETWEEN1AND255 + +BYTESATLEAST65: + SUBQ $0x40, R9 + ADDQ $0x40, DI + ADDQ $0x40, SI + JMP BYTESBETWEEN1AND255 diff --git a/vendor/golang.org/x/crypto/sha3/shake.go b/vendor/golang.org/x/crypto/sha3/shake.go index 1ea9275b..a01ef435 100644 --- a/vendor/golang.org/x/crypto/sha3/shake.go +++ b/vendor/golang.org/x/crypto/sha3/shake.go @@ -85,9 +85,9 @@ func newCShake(N, S []byte, rate, outputLen int, dsbyte byte) ShakeHash { // leftEncode returns max 9 bytes c.initBlock = make([]byte, 0, 9*2+len(N)+len(S)) - c.initBlock = append(c.initBlock, leftEncode(uint64(len(N)*8))...) + c.initBlock = append(c.initBlock, leftEncode(uint64(len(N))*8)...) c.initBlock = append(c.initBlock, N...) - c.initBlock = append(c.initBlock, leftEncode(uint64(len(S)*8))...) + c.initBlock = append(c.initBlock, leftEncode(uint64(len(S))*8)...) c.initBlock = append(c.initBlock, S...) c.Write(bytepad(c.initBlock, c.rate)) return &c diff --git a/vendor/golang.org/x/crypto/ssh/server.go b/vendor/golang.org/x/crypto/ssh/server.go index 3ca9e89e..c0d1c29e 100644 --- a/vendor/golang.org/x/crypto/ssh/server.go +++ b/vendor/golang.org/x/crypto/ssh/server.go @@ -510,8 +510,8 @@ userAuthLoop: if err := s.transport.writePacket(Marshal(discMsg)); err != nil { return nil, err } - - return nil, discMsg + authErrs = append(authErrs, discMsg) + return nil, &ServerAuthError{Errors: authErrs} } var userAuthReq userAuthRequestMsg diff --git a/vendor/golang.org/x/net/http2/config.go b/vendor/golang.org/x/net/http2/config.go new file mode 100644 index 00000000..de58dfb8 --- /dev/null +++ b/vendor/golang.org/x/net/http2/config.go @@ -0,0 +1,122 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http2 + +import ( + "math" + "net/http" + "time" +) + +// http2Config is a package-internal version of net/http.HTTP2Config. +// +// http.HTTP2Config was added in Go 1.24. +// When running with a version of net/http that includes HTTP2Config, +// we merge the configuration with the fields in Transport or Server +// to produce an http2Config. +// +// Zero valued fields in http2Config are interpreted as in the +// net/http.HTTPConfig documentation. +// +// Precedence order for reconciling configurations is: +// +// - Use the net/http.{Server,Transport}.HTTP2Config value, when non-zero. +// - Otherwise use the http2.{Server.Transport} value. +// - If the resulting value is zero or out of range, use a default. +type http2Config struct { + MaxConcurrentStreams uint32 + MaxDecoderHeaderTableSize uint32 + MaxEncoderHeaderTableSize uint32 + MaxReadFrameSize uint32 + MaxUploadBufferPerConnection int32 + MaxUploadBufferPerStream int32 + SendPingTimeout time.Duration + PingTimeout time.Duration + WriteByteTimeout time.Duration + PermitProhibitedCipherSuites bool + CountError func(errType string) +} + +// configFromServer merges configuration settings from +// net/http.Server.HTTP2Config and http2.Server. +func configFromServer(h1 *http.Server, h2 *Server) http2Config { + conf := http2Config{ + MaxConcurrentStreams: h2.MaxConcurrentStreams, + MaxEncoderHeaderTableSize: h2.MaxEncoderHeaderTableSize, + MaxDecoderHeaderTableSize: h2.MaxDecoderHeaderTableSize, + MaxReadFrameSize: h2.MaxReadFrameSize, + MaxUploadBufferPerConnection: h2.MaxUploadBufferPerConnection, + MaxUploadBufferPerStream: h2.MaxUploadBufferPerStream, + SendPingTimeout: h2.ReadIdleTimeout, + PingTimeout: h2.PingTimeout, + WriteByteTimeout: h2.WriteByteTimeout, + PermitProhibitedCipherSuites: h2.PermitProhibitedCipherSuites, + CountError: h2.CountError, + } + fillNetHTTPServerConfig(&conf, h1) + setConfigDefaults(&conf, true) + return conf +} + +// configFromServer merges configuration settings from h2 and h2.t1.HTTP2 +// (the net/http Transport). +func configFromTransport(h2 *Transport) http2Config { + conf := http2Config{ + MaxEncoderHeaderTableSize: h2.MaxEncoderHeaderTableSize, + MaxDecoderHeaderTableSize: h2.MaxDecoderHeaderTableSize, + MaxReadFrameSize: h2.MaxReadFrameSize, + SendPingTimeout: h2.ReadIdleTimeout, + PingTimeout: h2.PingTimeout, + WriteByteTimeout: h2.WriteByteTimeout, + } + + // Unlike most config fields, where out-of-range values revert to the default, + // Transport.MaxReadFrameSize clips. + if conf.MaxReadFrameSize < minMaxFrameSize { + conf.MaxReadFrameSize = minMaxFrameSize + } else if conf.MaxReadFrameSize > maxFrameSize { + conf.MaxReadFrameSize = maxFrameSize + } + + if h2.t1 != nil { + fillNetHTTPTransportConfig(&conf, h2.t1) + } + setConfigDefaults(&conf, false) + return conf +} + +func setDefault[T ~int | ~int32 | ~uint32 | ~int64](v *T, minval, maxval, defval T) { + if *v < minval || *v > maxval { + *v = defval + } +} + +func setConfigDefaults(conf *http2Config, server bool) { + setDefault(&conf.MaxConcurrentStreams, 1, math.MaxUint32, defaultMaxStreams) + setDefault(&conf.MaxEncoderHeaderTableSize, 1, math.MaxUint32, initialHeaderTableSize) + setDefault(&conf.MaxDecoderHeaderTableSize, 1, math.MaxUint32, initialHeaderTableSize) + if server { + setDefault(&conf.MaxUploadBufferPerConnection, initialWindowSize, math.MaxInt32, 1<<20) + } else { + setDefault(&conf.MaxUploadBufferPerConnection, initialWindowSize, math.MaxInt32, transportDefaultConnFlow) + } + if server { + setDefault(&conf.MaxUploadBufferPerStream, 1, math.MaxInt32, 1<<20) + } else { + setDefault(&conf.MaxUploadBufferPerStream, 1, math.MaxInt32, transportDefaultStreamFlow) + } + setDefault(&conf.MaxReadFrameSize, minMaxFrameSize, maxFrameSize, defaultMaxReadFrameSize) + setDefault(&conf.PingTimeout, 1, math.MaxInt64, 15*time.Second) +} + +// adjustHTTP1MaxHeaderSize converts a limit in bytes on the size of an HTTP/1 header +// to an HTTP/2 MAX_HEADER_LIST_SIZE value. +func adjustHTTP1MaxHeaderSize(n int64) int64 { + // http2's count is in a slightly different unit and includes 32 bytes per pair. + // So, take the net/http.Server value and pad it up a bit, assuming 10 headers. + const perFieldOverhead = 32 // per http2 spec + const typicalHeaders = 10 // conservative + return n + typicalHeaders*perFieldOverhead +} diff --git a/vendor/golang.org/x/net/http2/config_go124.go b/vendor/golang.org/x/net/http2/config_go124.go new file mode 100644 index 00000000..e3784123 --- /dev/null +++ b/vendor/golang.org/x/net/http2/config_go124.go @@ -0,0 +1,61 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.24 + +package http2 + +import "net/http" + +// fillNetHTTPServerConfig sets fields in conf from srv.HTTP2. +func fillNetHTTPServerConfig(conf *http2Config, srv *http.Server) { + fillNetHTTPConfig(conf, srv.HTTP2) +} + +// fillNetHTTPServerConfig sets fields in conf from tr.HTTP2. +func fillNetHTTPTransportConfig(conf *http2Config, tr *http.Transport) { + fillNetHTTPConfig(conf, tr.HTTP2) +} + +func fillNetHTTPConfig(conf *http2Config, h2 *http.HTTP2Config) { + if h2 == nil { + return + } + if h2.MaxConcurrentStreams != 0 { + conf.MaxConcurrentStreams = uint32(h2.MaxConcurrentStreams) + } + if h2.MaxEncoderHeaderTableSize != 0 { + conf.MaxEncoderHeaderTableSize = uint32(h2.MaxEncoderHeaderTableSize) + } + if h2.MaxDecoderHeaderTableSize != 0 { + conf.MaxDecoderHeaderTableSize = uint32(h2.MaxDecoderHeaderTableSize) + } + if h2.MaxConcurrentStreams != 0 { + conf.MaxConcurrentStreams = uint32(h2.MaxConcurrentStreams) + } + if h2.MaxReadFrameSize != 0 { + conf.MaxReadFrameSize = uint32(h2.MaxReadFrameSize) + } + if h2.MaxReceiveBufferPerConnection != 0 { + conf.MaxUploadBufferPerConnection = int32(h2.MaxReceiveBufferPerConnection) + } + if h2.MaxReceiveBufferPerStream != 0 { + conf.MaxUploadBufferPerStream = int32(h2.MaxReceiveBufferPerStream) + } + if h2.SendPingTimeout != 0 { + conf.SendPingTimeout = h2.SendPingTimeout + } + if h2.PingTimeout != 0 { + conf.PingTimeout = h2.PingTimeout + } + if h2.WriteByteTimeout != 0 { + conf.WriteByteTimeout = h2.WriteByteTimeout + } + if h2.PermitProhibitedCipherSuites { + conf.PermitProhibitedCipherSuites = true + } + if h2.CountError != nil { + conf.CountError = h2.CountError + } +} diff --git a/vendor/golang.org/x/net/http2/config_pre_go124.go b/vendor/golang.org/x/net/http2/config_pre_go124.go new file mode 100644 index 00000000..060fd6c6 --- /dev/null +++ b/vendor/golang.org/x/net/http2/config_pre_go124.go @@ -0,0 +1,16 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !go1.24 + +package http2 + +import "net/http" + +// Pre-Go 1.24 fallback. +// The Server.HTTP2 and Transport.HTTP2 config fields were added in Go 1.24. + +func fillNetHTTPServerConfig(conf *http2Config, srv *http.Server) {} + +func fillNetHTTPTransportConfig(conf *http2Config, tr *http.Transport) {} diff --git a/vendor/golang.org/x/net/http2/http2.go b/vendor/golang.org/x/net/http2/http2.go index 003e649f..7688c356 100644 --- a/vendor/golang.org/x/net/http2/http2.go +++ b/vendor/golang.org/x/net/http2/http2.go @@ -19,8 +19,9 @@ import ( "bufio" "context" "crypto/tls" + "errors" "fmt" - "io" + "net" "net/http" "os" "sort" @@ -237,13 +238,19 @@ func (cw closeWaiter) Wait() { // Its buffered writer is lazily allocated as needed, to minimize // idle memory usage with many connections. type bufferedWriter struct { - _ incomparable - w io.Writer // immutable - bw *bufio.Writer // non-nil when data is buffered + _ incomparable + group synctestGroupInterface // immutable + conn net.Conn // immutable + bw *bufio.Writer // non-nil when data is buffered + byteTimeout time.Duration // immutable, WriteByteTimeout } -func newBufferedWriter(w io.Writer) *bufferedWriter { - return &bufferedWriter{w: w} +func newBufferedWriter(group synctestGroupInterface, conn net.Conn, timeout time.Duration) *bufferedWriter { + return &bufferedWriter{ + group: group, + conn: conn, + byteTimeout: timeout, + } } // bufWriterPoolBufferSize is the size of bufio.Writer's @@ -270,7 +277,7 @@ func (w *bufferedWriter) Available() int { func (w *bufferedWriter) Write(p []byte) (n int, err error) { if w.bw == nil { bw := bufWriterPool.Get().(*bufio.Writer) - bw.Reset(w.w) + bw.Reset((*bufferedWriterTimeoutWriter)(w)) w.bw = bw } return w.bw.Write(p) @@ -288,6 +295,38 @@ func (w *bufferedWriter) Flush() error { return err } +type bufferedWriterTimeoutWriter bufferedWriter + +func (w *bufferedWriterTimeoutWriter) Write(p []byte) (n int, err error) { + return writeWithByteTimeout(w.group, w.conn, w.byteTimeout, p) +} + +// writeWithByteTimeout writes to conn. +// If more than timeout passes without any bytes being written to the connection, +// the write fails. +func writeWithByteTimeout(group synctestGroupInterface, conn net.Conn, timeout time.Duration, p []byte) (n int, err error) { + if timeout <= 0 { + return conn.Write(p) + } + for { + var now time.Time + if group == nil { + now = time.Now() + } else { + now = group.Now() + } + conn.SetWriteDeadline(now.Add(timeout)) + nn, err := conn.Write(p[n:]) + n += nn + if n == len(p) || nn == 0 || !errors.Is(err, os.ErrDeadlineExceeded) { + // Either we finished the write, made no progress, or hit the deadline. + // Whichever it is, we're done now. + conn.SetWriteDeadline(time.Time{}) + return n, err + } + } +} + func mustUint31(v int32) uint32 { if v < 0 || v > 2147483647 { panic("out of range") diff --git a/vendor/golang.org/x/net/http2/server.go b/vendor/golang.org/x/net/http2/server.go index 6c349f3e..617b4a47 100644 --- a/vendor/golang.org/x/net/http2/server.go +++ b/vendor/golang.org/x/net/http2/server.go @@ -29,6 +29,7 @@ import ( "bufio" "bytes" "context" + "crypto/rand" "crypto/tls" "errors" "fmt" @@ -52,10 +53,14 @@ import ( ) const ( - prefaceTimeout = 10 * time.Second - firstSettingsTimeout = 2 * time.Second // should be in-flight with preface anyway - handlerChunkWriteSize = 4 << 10 - defaultMaxStreams = 250 // TODO: make this 100 as the GFE seems to? + prefaceTimeout = 10 * time.Second + firstSettingsTimeout = 2 * time.Second // should be in-flight with preface anyway + handlerChunkWriteSize = 4 << 10 + defaultMaxStreams = 250 // TODO: make this 100 as the GFE seems to? + + // maxQueuedControlFrames is the maximum number of control frames like + // SETTINGS, PING and RST_STREAM that will be queued for writing before + // the connection is closed to prevent memory exhaustion attacks. maxQueuedControlFrames = 10000 ) @@ -127,6 +132,22 @@ type Server struct { // If zero or negative, there is no timeout. IdleTimeout time.Duration + // ReadIdleTimeout is the timeout after which a health check using a ping + // frame will be carried out if no frame is received on the connection. + // If zero, no health check is performed. + ReadIdleTimeout time.Duration + + // PingTimeout is the timeout after which the connection will be closed + // if a response to a ping is not received. + // If zero, a default of 15 seconds is used. + PingTimeout time.Duration + + // WriteByteTimeout is the timeout after which a connection will be + // closed if no data can be written to it. The timeout begins when data is + // available to write, and is extended whenever any bytes are written. + // If zero or negative, there is no timeout. + WriteByteTimeout time.Duration + // MaxUploadBufferPerConnection is the size of the initial flow // control window for each connections. The HTTP/2 spec does not // allow this to be smaller than 65535 or larger than 2^32-1. @@ -189,57 +210,6 @@ func (s *Server) afterFunc(d time.Duration, f func()) timer { return timeTimer{time.AfterFunc(d, f)} } -func (s *Server) initialConnRecvWindowSize() int32 { - if s.MaxUploadBufferPerConnection >= initialWindowSize { - return s.MaxUploadBufferPerConnection - } - return 1 << 20 -} - -func (s *Server) initialStreamRecvWindowSize() int32 { - if s.MaxUploadBufferPerStream > 0 { - return s.MaxUploadBufferPerStream - } - return 1 << 20 -} - -func (s *Server) maxReadFrameSize() uint32 { - if v := s.MaxReadFrameSize; v >= minMaxFrameSize && v <= maxFrameSize { - return v - } - return defaultMaxReadFrameSize -} - -func (s *Server) maxConcurrentStreams() uint32 { - if v := s.MaxConcurrentStreams; v > 0 { - return v - } - return defaultMaxStreams -} - -func (s *Server) maxDecoderHeaderTableSize() uint32 { - if v := s.MaxDecoderHeaderTableSize; v > 0 { - return v - } - return initialHeaderTableSize -} - -func (s *Server) maxEncoderHeaderTableSize() uint32 { - if v := s.MaxEncoderHeaderTableSize; v > 0 { - return v - } - return initialHeaderTableSize -} - -// maxQueuedControlFrames is the maximum number of control frames like -// SETTINGS, PING and RST_STREAM that will be queued for writing before -// the connection is closed to prevent memory exhaustion attacks. -func (s *Server) maxQueuedControlFrames() int { - // TODO: if anybody asks, add a Server field, and remember to define the - // behavior of negative values. - return maxQueuedControlFrames -} - type serverInternalState struct { mu sync.Mutex activeConns map[*serverConn]struct{} @@ -440,13 +410,15 @@ func (s *Server) serveConn(c net.Conn, opts *ServeConnOpts, newf func(*serverCon baseCtx, cancel := serverConnBaseContext(c, opts) defer cancel() + http1srv := opts.baseConfig() + conf := configFromServer(http1srv, s) sc := &serverConn{ srv: s, - hs: opts.baseConfig(), + hs: http1srv, conn: c, baseCtx: baseCtx, remoteAddrStr: c.RemoteAddr().String(), - bw: newBufferedWriter(c), + bw: newBufferedWriter(s.group, c, conf.WriteByteTimeout), handler: opts.handler(), streams: make(map[uint32]*stream), readFrameCh: make(chan readFrameResult), @@ -456,9 +428,12 @@ func (s *Server) serveConn(c net.Conn, opts *ServeConnOpts, newf func(*serverCon bodyReadCh: make(chan bodyReadMsg), // buffering doesn't matter either way doneServing: make(chan struct{}), clientMaxStreams: math.MaxUint32, // Section 6.5.2: "Initially, there is no limit to this value" - advMaxStreams: s.maxConcurrentStreams(), + advMaxStreams: conf.MaxConcurrentStreams, initialStreamSendWindowSize: initialWindowSize, + initialStreamRecvWindowSize: conf.MaxUploadBufferPerStream, maxFrameSize: initialMaxFrameSize, + pingTimeout: conf.PingTimeout, + countErrorFunc: conf.CountError, serveG: newGoroutineLock(), pushEnabled: true, sawClientPreface: opts.SawClientPreface, @@ -491,15 +466,15 @@ func (s *Server) serveConn(c net.Conn, opts *ServeConnOpts, newf func(*serverCon sc.flow.add(initialWindowSize) sc.inflow.init(initialWindowSize) sc.hpackEncoder = hpack.NewEncoder(&sc.headerWriteBuf) - sc.hpackEncoder.SetMaxDynamicTableSizeLimit(s.maxEncoderHeaderTableSize()) + sc.hpackEncoder.SetMaxDynamicTableSizeLimit(conf.MaxEncoderHeaderTableSize) fr := NewFramer(sc.bw, c) - if s.CountError != nil { - fr.countError = s.CountError + if conf.CountError != nil { + fr.countError = conf.CountError } - fr.ReadMetaHeaders = hpack.NewDecoder(s.maxDecoderHeaderTableSize(), nil) + fr.ReadMetaHeaders = hpack.NewDecoder(conf.MaxDecoderHeaderTableSize, nil) fr.MaxHeaderListSize = sc.maxHeaderListSize() - fr.SetMaxReadFrameSize(s.maxReadFrameSize()) + fr.SetMaxReadFrameSize(conf.MaxReadFrameSize) sc.framer = fr if tc, ok := c.(connectionStater); ok { @@ -532,7 +507,7 @@ func (s *Server) serveConn(c net.Conn, opts *ServeConnOpts, newf func(*serverCon // So for now, do nothing here again. } - if !s.PermitProhibitedCipherSuites && isBadCipher(sc.tlsState.CipherSuite) { + if !conf.PermitProhibitedCipherSuites && isBadCipher(sc.tlsState.CipherSuite) { // "Endpoints MAY choose to generate a connection error // (Section 5.4.1) of type INADEQUATE_SECURITY if one of // the prohibited cipher suites are negotiated." @@ -569,7 +544,7 @@ func (s *Server) serveConn(c net.Conn, opts *ServeConnOpts, newf func(*serverCon opts.UpgradeRequest = nil } - sc.serve() + sc.serve(conf) } func serverConnBaseContext(c net.Conn, opts *ServeConnOpts) (ctx context.Context, cancel func()) { @@ -609,6 +584,7 @@ type serverConn struct { tlsState *tls.ConnectionState // shared by all handlers, like net/http remoteAddrStr string writeSched WriteScheduler + countErrorFunc func(errType string) // Everything following is owned by the serve loop; use serveG.check(): serveG goroutineLock // used to verify funcs are on serve() @@ -628,6 +604,7 @@ type serverConn struct { streams map[uint32]*stream unstartedHandlers []unstartedHandler initialStreamSendWindowSize int32 + initialStreamRecvWindowSize int32 maxFrameSize int32 peerMaxHeaderListSize uint32 // zero means unknown (default) canonHeader map[string]string // http2-lower-case -> Go-Canonical-Case @@ -638,9 +615,14 @@ type serverConn struct { inGoAway bool // we've started to or sent GOAWAY inFrameScheduleLoop bool // whether we're in the scheduleFrameWrite loop needToSendGoAway bool // we need to schedule a GOAWAY frame write + pingSent bool + sentPingData [8]byte goAwayCode ErrCode shutdownTimer timer // nil until used idleTimer timer // nil if unused + readIdleTimeout time.Duration + pingTimeout time.Duration + readIdleTimer timer // nil if unused // Owned by the writeFrameAsync goroutine: headerWriteBuf bytes.Buffer @@ -655,11 +637,7 @@ func (sc *serverConn) maxHeaderListSize() uint32 { if n <= 0 { n = http.DefaultMaxHeaderBytes } - // http2's count is in a slightly different unit and includes 32 bytes per pair. - // So, take the net/http.Server value and pad it up a bit, assuming 10 headers. - const perFieldOverhead = 32 // per http2 spec - const typicalHeaders = 10 // conservative - return uint32(n + typicalHeaders*perFieldOverhead) + return uint32(adjustHTTP1MaxHeaderSize(int64(n))) } func (sc *serverConn) curOpenStreams() uint32 { @@ -923,7 +901,7 @@ func (sc *serverConn) notePanic() { } } -func (sc *serverConn) serve() { +func (sc *serverConn) serve(conf http2Config) { sc.serveG.check() defer sc.notePanic() defer sc.conn.Close() @@ -937,18 +915,18 @@ func (sc *serverConn) serve() { sc.writeFrame(FrameWriteRequest{ write: writeSettings{ - {SettingMaxFrameSize, sc.srv.maxReadFrameSize()}, + {SettingMaxFrameSize, conf.MaxReadFrameSize}, {SettingMaxConcurrentStreams, sc.advMaxStreams}, {SettingMaxHeaderListSize, sc.maxHeaderListSize()}, - {SettingHeaderTableSize, sc.srv.maxDecoderHeaderTableSize()}, - {SettingInitialWindowSize, uint32(sc.srv.initialStreamRecvWindowSize())}, + {SettingHeaderTableSize, conf.MaxDecoderHeaderTableSize}, + {SettingInitialWindowSize, uint32(sc.initialStreamRecvWindowSize)}, }, }) sc.unackedSettings++ // Each connection starts with initialWindowSize inflow tokens. // If a higher value is configured, we add more tokens. - if diff := sc.srv.initialConnRecvWindowSize() - initialWindowSize; diff > 0 { + if diff := conf.MaxUploadBufferPerConnection - initialWindowSize; diff > 0 { sc.sendWindowUpdate(nil, int(diff)) } @@ -968,11 +946,18 @@ func (sc *serverConn) serve() { defer sc.idleTimer.Stop() } + if conf.SendPingTimeout > 0 { + sc.readIdleTimeout = conf.SendPingTimeout + sc.readIdleTimer = sc.srv.afterFunc(conf.SendPingTimeout, sc.onReadIdleTimer) + defer sc.readIdleTimer.Stop() + } + go sc.readFrames() // closed by defer sc.conn.Close above settingsTimer := sc.srv.afterFunc(firstSettingsTimeout, sc.onSettingsTimer) defer settingsTimer.Stop() + lastFrameTime := sc.srv.now() loopNum := 0 for { loopNum++ @@ -986,6 +971,7 @@ func (sc *serverConn) serve() { case res := <-sc.wroteFrameCh: sc.wroteFrame(res) case res := <-sc.readFrameCh: + lastFrameTime = sc.srv.now() // Process any written frames before reading new frames from the client since a // written frame could have triggered a new stream to be started. if sc.writingFrameAsync { @@ -1017,6 +1003,8 @@ func (sc *serverConn) serve() { case idleTimerMsg: sc.vlogf("connection is idle") sc.goAway(ErrCodeNo) + case readIdleTimerMsg: + sc.handlePingTimer(lastFrameTime) case shutdownTimerMsg: sc.vlogf("GOAWAY close timer fired; closing conn from %v", sc.conn.RemoteAddr()) return @@ -1039,7 +1027,7 @@ func (sc *serverConn) serve() { // If the peer is causing us to generate a lot of control frames, // but not reading them from us, assume they are trying to make us // run out of memory. - if sc.queuedControlFrames > sc.srv.maxQueuedControlFrames() { + if sc.queuedControlFrames > maxQueuedControlFrames { sc.vlogf("http2: too many control frames in send queue, closing connection") return } @@ -1055,12 +1043,39 @@ func (sc *serverConn) serve() { } } +func (sc *serverConn) handlePingTimer(lastFrameReadTime time.Time) { + if sc.pingSent { + sc.vlogf("timeout waiting for PING response") + sc.conn.Close() + return + } + + pingAt := lastFrameReadTime.Add(sc.readIdleTimeout) + now := sc.srv.now() + if pingAt.After(now) { + // We received frames since arming the ping timer. + // Reset it for the next possible timeout. + sc.readIdleTimer.Reset(pingAt.Sub(now)) + return + } + + sc.pingSent = true + // Ignore crypto/rand.Read errors: It generally can't fail, and worse case if it does + // is we send a PING frame containing 0s. + _, _ = rand.Read(sc.sentPingData[:]) + sc.writeFrame(FrameWriteRequest{ + write: &writePing{data: sc.sentPingData}, + }) + sc.readIdleTimer.Reset(sc.pingTimeout) +} + type serverMessage int // Message values sent to serveMsgCh. var ( settingsTimerMsg = new(serverMessage) idleTimerMsg = new(serverMessage) + readIdleTimerMsg = new(serverMessage) shutdownTimerMsg = new(serverMessage) gracefulShutdownMsg = new(serverMessage) handlerDoneMsg = new(serverMessage) @@ -1068,6 +1083,7 @@ var ( func (sc *serverConn) onSettingsTimer() { sc.sendServeMsg(settingsTimerMsg) } func (sc *serverConn) onIdleTimer() { sc.sendServeMsg(idleTimerMsg) } +func (sc *serverConn) onReadIdleTimer() { sc.sendServeMsg(readIdleTimerMsg) } func (sc *serverConn) onShutdownTimer() { sc.sendServeMsg(shutdownTimerMsg) } func (sc *serverConn) sendServeMsg(msg interface{}) { @@ -1320,6 +1336,10 @@ func (sc *serverConn) wroteFrame(res frameWriteResult) { sc.writingFrame = false sc.writingFrameAsync = false + if res.err != nil { + sc.conn.Close() + } + wr := res.wr if writeEndsStream(wr.write) { @@ -1594,6 +1614,11 @@ func (sc *serverConn) processFrame(f Frame) error { func (sc *serverConn) processPing(f *PingFrame) error { sc.serveG.check() if f.IsAck() { + if sc.pingSent && sc.sentPingData == f.Data { + // This is a response to a PING we sent. + sc.pingSent = false + sc.readIdleTimer.Reset(sc.readIdleTimeout) + } // 6.7 PING: " An endpoint MUST NOT respond to PING frames // containing this flag." return nil @@ -2160,7 +2185,7 @@ func (sc *serverConn) newStream(id, pusherID uint32, state streamState) *stream st.cw.Init() st.flow.conn = &sc.flow // link to conn-level counter st.flow.add(sc.initialStreamSendWindowSize) - st.inflow.init(sc.srv.initialStreamRecvWindowSize()) + st.inflow.init(sc.initialStreamRecvWindowSize) if sc.hs.WriteTimeout > 0 { st.writeDeadline = sc.srv.afterFunc(sc.hs.WriteTimeout, st.onWriteTimeout) } @@ -3301,7 +3326,7 @@ func (sc *serverConn) countError(name string, err error) error { if sc == nil || sc.srv == nil { return err } - f := sc.srv.CountError + f := sc.countErrorFunc if f == nil { return err } diff --git a/vendor/golang.org/x/net/http2/transport.go b/vendor/golang.org/x/net/http2/transport.go index 61f511f9..0c5f64aa 100644 --- a/vendor/golang.org/x/net/http2/transport.go +++ b/vendor/golang.org/x/net/http2/transport.go @@ -25,7 +25,6 @@ import ( "net/http" "net/http/httptrace" "net/textproto" - "os" "sort" "strconv" "strings" @@ -227,40 +226,26 @@ func (t *Transport) contextWithTimeout(ctx context.Context, d time.Duration) (co } func (t *Transport) maxHeaderListSize() uint32 { - if t.MaxHeaderListSize == 0 { + n := int64(t.MaxHeaderListSize) + if t.t1 != nil && t.t1.MaxResponseHeaderBytes != 0 { + n = t.t1.MaxResponseHeaderBytes + if n > 0 { + n = adjustHTTP1MaxHeaderSize(n) + } + } + if n <= 0 { return 10 << 20 } - if t.MaxHeaderListSize == 0xffffffff { + if n >= 0xffffffff { return 0 } - return t.MaxHeaderListSize -} - -func (t *Transport) maxFrameReadSize() uint32 { - if t.MaxReadFrameSize == 0 { - return 0 // use the default provided by the peer - } - if t.MaxReadFrameSize < minMaxFrameSize { - return minMaxFrameSize - } - if t.MaxReadFrameSize > maxFrameSize { - return maxFrameSize - } - return t.MaxReadFrameSize + return uint32(n) } func (t *Transport) disableCompression() bool { return t.DisableCompression || (t.t1 != nil && t.t1.DisableCompression) } -func (t *Transport) pingTimeout() time.Duration { - if t.PingTimeout == 0 { - return 15 * time.Second - } - return t.PingTimeout - -} - // ConfigureTransport configures a net/http HTTP/1 Transport to use HTTP/2. // It returns an error if t1 has already been HTTP/2-enabled. // @@ -370,11 +355,14 @@ type ClientConn struct { lastActive time.Time lastIdle time.Time // time last idle // Settings from peer: (also guarded by wmu) - maxFrameSize uint32 - maxConcurrentStreams uint32 - peerMaxHeaderListSize uint64 - peerMaxHeaderTableSize uint32 - initialWindowSize uint32 + maxFrameSize uint32 + maxConcurrentStreams uint32 + peerMaxHeaderListSize uint64 + peerMaxHeaderTableSize uint32 + initialWindowSize uint32 + initialStreamRecvWindowSize int32 + readIdleTimeout time.Duration + pingTimeout time.Duration // reqHeaderMu is a 1-element semaphore channel controlling access to sending new requests. // Write to reqHeaderMu to lock it, read from it to unlock. @@ -499,6 +487,7 @@ func (cs *clientStream) closeReqBodyLocked() { } type stickyErrWriter struct { + group synctestGroupInterface conn net.Conn timeout time.Duration err *error @@ -508,22 +497,9 @@ func (sew stickyErrWriter) Write(p []byte) (n int, err error) { if *sew.err != nil { return 0, *sew.err } - for { - if sew.timeout != 0 { - sew.conn.SetWriteDeadline(time.Now().Add(sew.timeout)) - } - nn, err := sew.conn.Write(p[n:]) - n += nn - if n < len(p) && nn > 0 && errors.Is(err, os.ErrDeadlineExceeded) { - // Keep extending the deadline so long as we're making progress. - continue - } - if sew.timeout != 0 { - sew.conn.SetWriteDeadline(time.Time{}) - } - *sew.err = err - return n, err - } + n, err = writeWithByteTimeout(sew.group, sew.conn, sew.timeout, p) + *sew.err = err + return n, err } // noCachedConnError is the concrete type of ErrNoCachedConn, which @@ -758,44 +734,36 @@ func (t *Transport) expectContinueTimeout() time.Duration { return t.t1.ExpectContinueTimeout } -func (t *Transport) maxDecoderHeaderTableSize() uint32 { - if v := t.MaxDecoderHeaderTableSize; v > 0 { - return v - } - return initialHeaderTableSize -} - -func (t *Transport) maxEncoderHeaderTableSize() uint32 { - if v := t.MaxEncoderHeaderTableSize; v > 0 { - return v - } - return initialHeaderTableSize -} - func (t *Transport) NewClientConn(c net.Conn) (*ClientConn, error) { return t.newClientConn(c, t.disableKeepAlives()) } func (t *Transport) newClientConn(c net.Conn, singleUse bool) (*ClientConn, error) { + conf := configFromTransport(t) cc := &ClientConn{ - t: t, - tconn: c, - readerDone: make(chan struct{}), - nextStreamID: 1, - maxFrameSize: 16 << 10, // spec default - initialWindowSize: 65535, // spec default - maxConcurrentStreams: initialMaxConcurrentStreams, // "infinite", per spec. Use a smaller value until we have received server settings. - peerMaxHeaderListSize: 0xffffffffffffffff, // "infinite", per spec. Use 2^64-1 instead. - streams: make(map[uint32]*clientStream), - singleUse: singleUse, - wantSettingsAck: true, - pings: make(map[[8]byte]chan struct{}), - reqHeaderMu: make(chan struct{}, 1), - } + t: t, + tconn: c, + readerDone: make(chan struct{}), + nextStreamID: 1, + maxFrameSize: 16 << 10, // spec default + initialWindowSize: 65535, // spec default + initialStreamRecvWindowSize: conf.MaxUploadBufferPerStream, + maxConcurrentStreams: initialMaxConcurrentStreams, // "infinite", per spec. Use a smaller value until we have received server settings. + peerMaxHeaderListSize: 0xffffffffffffffff, // "infinite", per spec. Use 2^64-1 instead. + streams: make(map[uint32]*clientStream), + singleUse: singleUse, + wantSettingsAck: true, + readIdleTimeout: conf.SendPingTimeout, + pingTimeout: conf.PingTimeout, + pings: make(map[[8]byte]chan struct{}), + reqHeaderMu: make(chan struct{}, 1), + } + var group synctestGroupInterface if t.transportTestHooks != nil { t.markNewGoroutine() t.transportTestHooks.newclientconn(cc) c = cc.tconn + group = t.group } if VerboseLogs { t.vlogf("http2: Transport creating client conn %p to %v", cc, c.RemoteAddr()) @@ -807,24 +775,23 @@ func (t *Transport) newClientConn(c net.Conn, singleUse bool) (*ClientConn, erro // TODO: adjust this writer size to account for frame size + // MTU + crypto/tls record padding. cc.bw = bufio.NewWriter(stickyErrWriter{ + group: group, conn: c, - timeout: t.WriteByteTimeout, + timeout: conf.WriteByteTimeout, err: &cc.werr, }) cc.br = bufio.NewReader(c) cc.fr = NewFramer(cc.bw, cc.br) - if t.maxFrameReadSize() != 0 { - cc.fr.SetMaxReadFrameSize(t.maxFrameReadSize()) - } + cc.fr.SetMaxReadFrameSize(conf.MaxReadFrameSize) if t.CountError != nil { cc.fr.countError = t.CountError } - maxHeaderTableSize := t.maxDecoderHeaderTableSize() + maxHeaderTableSize := conf.MaxDecoderHeaderTableSize cc.fr.ReadMetaHeaders = hpack.NewDecoder(maxHeaderTableSize, nil) cc.fr.MaxHeaderListSize = t.maxHeaderListSize() cc.henc = hpack.NewEncoder(&cc.hbuf) - cc.henc.SetMaxDynamicTableSizeLimit(t.maxEncoderHeaderTableSize()) + cc.henc.SetMaxDynamicTableSizeLimit(conf.MaxEncoderHeaderTableSize) cc.peerMaxHeaderTableSize = initialHeaderTableSize if cs, ok := c.(connectionStater); ok { @@ -834,11 +801,9 @@ func (t *Transport) newClientConn(c net.Conn, singleUse bool) (*ClientConn, erro initialSettings := []Setting{ {ID: SettingEnablePush, Val: 0}, - {ID: SettingInitialWindowSize, Val: transportDefaultStreamFlow}, - } - if max := t.maxFrameReadSize(); max != 0 { - initialSettings = append(initialSettings, Setting{ID: SettingMaxFrameSize, Val: max}) + {ID: SettingInitialWindowSize, Val: uint32(cc.initialStreamRecvWindowSize)}, } + initialSettings = append(initialSettings, Setting{ID: SettingMaxFrameSize, Val: conf.MaxReadFrameSize}) if max := t.maxHeaderListSize(); max != 0 { initialSettings = append(initialSettings, Setting{ID: SettingMaxHeaderListSize, Val: max}) } @@ -848,8 +813,8 @@ func (t *Transport) newClientConn(c net.Conn, singleUse bool) (*ClientConn, erro cc.bw.Write(clientPreface) cc.fr.WriteSettings(initialSettings...) - cc.fr.WriteWindowUpdate(0, transportDefaultConnFlow) - cc.inflow.init(transportDefaultConnFlow + initialWindowSize) + cc.fr.WriteWindowUpdate(0, uint32(conf.MaxUploadBufferPerConnection)) + cc.inflow.init(conf.MaxUploadBufferPerConnection + initialWindowSize) cc.bw.Flush() if cc.werr != nil { cc.Close() @@ -867,7 +832,7 @@ func (t *Transport) newClientConn(c net.Conn, singleUse bool) (*ClientConn, erro } func (cc *ClientConn) healthCheck() { - pingTimeout := cc.t.pingTimeout() + pingTimeout := cc.pingTimeout // We don't need to periodically ping in the health check, because the readLoop of ClientConn will // trigger the healthCheck again if there is no frame received. ctx, cancel := cc.t.contextWithTimeout(context.Background(), pingTimeout) @@ -2199,7 +2164,7 @@ type resAndError struct { func (cc *ClientConn) addStreamLocked(cs *clientStream) { cs.flow.add(int32(cc.initialWindowSize)) cs.flow.setConnFlow(&cc.flow) - cs.inflow.init(transportDefaultStreamFlow) + cs.inflow.init(cc.initialStreamRecvWindowSize) cs.ID = cc.nextStreamID cc.nextStreamID += 2 cc.streams[cs.ID] = cs @@ -2345,7 +2310,7 @@ func (cc *ClientConn) countReadFrameError(err error) { func (rl *clientConnReadLoop) run() error { cc := rl.cc gotSettings := false - readIdleTimeout := cc.t.ReadIdleTimeout + readIdleTimeout := cc.readIdleTimeout var t timer if readIdleTimeout != 0 { t = cc.t.afterFunc(readIdleTimeout, cc.healthCheck) diff --git a/vendor/golang.org/x/net/http2/write.go b/vendor/golang.org/x/net/http2/write.go index 33f61398..6ff6bee7 100644 --- a/vendor/golang.org/x/net/http2/write.go +++ b/vendor/golang.org/x/net/http2/write.go @@ -131,6 +131,16 @@ func (se StreamError) writeFrame(ctx writeContext) error { func (se StreamError) staysWithinBuffer(max int) bool { return frameHeaderLen+4 <= max } +type writePing struct { + data [8]byte +} + +func (w writePing) writeFrame(ctx writeContext) error { + return ctx.Framer().WritePing(false, w.data) +} + +func (w writePing) staysWithinBuffer(max int) bool { return frameHeaderLen+len(w.data) <= max } + type writePingAck struct{ pf *PingFrame } func (w writePingAck) writeFrame(ctx writeContext) error { diff --git a/vendor/golang.org/x/sys/cpu/cpu.go b/vendor/golang.org/x/sys/cpu/cpu.go index ec07aab0..02609d5b 100644 --- a/vendor/golang.org/x/sys/cpu/cpu.go +++ b/vendor/golang.org/x/sys/cpu/cpu.go @@ -201,6 +201,25 @@ var S390X struct { _ CacheLinePad } +// RISCV64 contains the supported CPU features and performance characteristics for riscv64 +// platforms. The booleans in RISCV64, with the exception of HasFastMisaligned, indicate +// the presence of RISC-V extensions. +// +// It is safe to assume that all the RV64G extensions are supported and so they are omitted from +// this structure. As riscv64 Go programs require at least RV64G, the code that populates +// this structure cannot run successfully if some of the RV64G extensions are missing. +// The struct is padded to avoid false sharing. +var RISCV64 struct { + _ CacheLinePad + HasFastMisaligned bool // Fast misaligned accesses + HasC bool // Compressed instruction-set extension + HasV bool // Vector extension compatible with RVV 1.0 + HasZba bool // Address generation instructions extension + HasZbb bool // Basic bit-manipulation extension + HasZbs bool // Single-bit instructions extension + _ CacheLinePad +} + func init() { archInit() initOptions() diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go index cd63e733..7d902b68 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x +//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x && !riscv64 package cpu diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go new file mode 100644 index 00000000..cb4a0c57 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go @@ -0,0 +1,137 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cpu + +import ( + "syscall" + "unsafe" +) + +// RISC-V extension discovery code for Linux. The approach here is to first try the riscv_hwprobe +// syscall falling back to HWCAP to check for the C extension if riscv_hwprobe is not available. +// +// A note on detection of the Vector extension using HWCAP. +// +// Support for the Vector extension version 1.0 was added to the Linux kernel in release 6.5. +// Support for the riscv_hwprobe syscall was added in 6.4. It follows that if the riscv_hwprobe +// syscall is not available then neither is the Vector extension (which needs kernel support). +// The riscv_hwprobe syscall should then be all we need to detect the Vector extension. +// However, some RISC-V board manufacturers ship boards with an older kernel on top of which +// they have back-ported various versions of the Vector extension patches but not the riscv_hwprobe +// patches. These kernels advertise support for the Vector extension using HWCAP. Falling +// back to HWCAP to detect the Vector extension, if riscv_hwprobe is not available, or simply not +// bothering with riscv_hwprobe at all and just using HWCAP may then seem like an attractive option. +// +// Unfortunately, simply checking the 'V' bit in AT_HWCAP will not work as this bit is used by +// RISC-V board and cloud instance providers to mean different things. The Lichee Pi 4A board +// and the Scaleway RV1 cloud instances use the 'V' bit to advertise their support for the unratified +// 0.7.1 version of the Vector Specification. The Banana Pi BPI-F3 and the CanMV-K230 board use +// it to advertise support for 1.0 of the Vector extension. Versions 0.7.1 and 1.0 of the Vector +// extension are binary incompatible. HWCAP can then not be used in isolation to populate the +// HasV field as this field indicates that the underlying CPU is compatible with RVV 1.0. +// +// There is a way at runtime to distinguish between versions 0.7.1 and 1.0 of the Vector +// specification by issuing a RVV 1.0 vsetvli instruction and checking the vill bit of the vtype +// register. This check would allow us to safely detect version 1.0 of the Vector extension +// with HWCAP, if riscv_hwprobe were not available. However, the check cannot +// be added until the assembler supports the Vector instructions. +// +// Note the riscv_hwprobe syscall does not suffer from these ambiguities by design as all of the +// extensions it advertises support for are explicitly versioned. It's also worth noting that +// the riscv_hwprobe syscall is the only way to detect multi-letter RISC-V extensions, e.g., Zba. +// These cannot be detected using HWCAP and so riscv_hwprobe must be used to detect the majority +// of RISC-V extensions. +// +// Please see https://docs.kernel.org/arch/riscv/hwprobe.html for more information. + +// golang.org/x/sys/cpu is not allowed to depend on golang.org/x/sys/unix so we must +// reproduce the constants, types and functions needed to make the riscv_hwprobe syscall +// here. + +const ( + // Copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go. + riscv_HWPROBE_KEY_IMA_EXT_0 = 0x4 + riscv_HWPROBE_IMA_C = 0x2 + riscv_HWPROBE_IMA_V = 0x4 + riscv_HWPROBE_EXT_ZBA = 0x8 + riscv_HWPROBE_EXT_ZBB = 0x10 + riscv_HWPROBE_EXT_ZBS = 0x20 + riscv_HWPROBE_KEY_CPUPERF_0 = 0x5 + riscv_HWPROBE_MISALIGNED_FAST = 0x3 + riscv_HWPROBE_MISALIGNED_MASK = 0x7 +) + +const ( + // sys_RISCV_HWPROBE is copied from golang.org/x/sys/unix/zsysnum_linux_riscv64.go. + sys_RISCV_HWPROBE = 258 +) + +// riscvHWProbePairs is copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go. +type riscvHWProbePairs struct { + key int64 + value uint64 +} + +const ( + // CPU features + hwcap_RISCV_ISA_C = 1 << ('C' - 'A') +) + +func doinit() { + // A slice of key/value pair structures is passed to the RISCVHWProbe syscall. The key + // field should be initialised with one of the key constants defined above, e.g., + // RISCV_HWPROBE_KEY_IMA_EXT_0. The syscall will set the value field to the appropriate value. + // If the kernel does not recognise a key it will set the key field to -1 and the value field to 0. + + pairs := []riscvHWProbePairs{ + {riscv_HWPROBE_KEY_IMA_EXT_0, 0}, + {riscv_HWPROBE_KEY_CPUPERF_0, 0}, + } + + // This call only indicates that extensions are supported if they are implemented on all cores. + if riscvHWProbe(pairs, 0) { + if pairs[0].key != -1 { + v := uint(pairs[0].value) + RISCV64.HasC = isSet(v, riscv_HWPROBE_IMA_C) + RISCV64.HasV = isSet(v, riscv_HWPROBE_IMA_V) + RISCV64.HasZba = isSet(v, riscv_HWPROBE_EXT_ZBA) + RISCV64.HasZbb = isSet(v, riscv_HWPROBE_EXT_ZBB) + RISCV64.HasZbs = isSet(v, riscv_HWPROBE_EXT_ZBS) + } + if pairs[1].key != -1 { + v := pairs[1].value & riscv_HWPROBE_MISALIGNED_MASK + RISCV64.HasFastMisaligned = v == riscv_HWPROBE_MISALIGNED_FAST + } + } + + // Let's double check with HWCAP if the C extension does not appear to be supported. + // This may happen if we're running on a kernel older than 6.4. + + if !RISCV64.HasC { + RISCV64.HasC = isSet(hwCap, hwcap_RISCV_ISA_C) + } +} + +func isSet(hwc uint, value uint) bool { + return hwc&value != 0 +} + +// riscvHWProbe is a simplified version of the generated wrapper function found in +// golang.org/x/sys/unix/zsyscall_linux_riscv64.go. We simplify it by removing the +// cpuCount and cpus parameters which we do not need. We always want to pass 0 for +// these parameters here so the kernel only reports the extensions that are present +// on all cores. +func riscvHWProbe(pairs []riscvHWProbePairs, flags uint) bool { + var _zero uintptr + var p0 unsafe.Pointer + if len(pairs) > 0 { + p0 = unsafe.Pointer(&pairs[0]) + } else { + p0 = unsafe.Pointer(&_zero) + } + + _, _, e1 := syscall.Syscall6(sys_RISCV_HWPROBE, uintptr(p0), uintptr(len(pairs)), uintptr(0), uintptr(0), uintptr(flags), 0) + return e1 == 0 +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go index 7f0c79c0..aca3199c 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go +++ b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go @@ -8,4 +8,13 @@ package cpu const cacheLineSize = 64 -func initOptions() {} +func initOptions() { + options = []option{ + {Name: "fastmisaligned", Feature: &RISCV64.HasFastMisaligned}, + {Name: "c", Feature: &RISCV64.HasC}, + {Name: "v", Feature: &RISCV64.HasV}, + {Name: "zba", Feature: &RISCV64.HasZba}, + {Name: "zbb", Feature: &RISCV64.HasZbb}, + {Name: "zbs", Feature: &RISCV64.HasZbs}, + } +} diff --git a/vendor/golang.org/x/sys/unix/README.md b/vendor/golang.org/x/sys/unix/README.md index 7d3c060e..6e08a76a 100644 --- a/vendor/golang.org/x/sys/unix/README.md +++ b/vendor/golang.org/x/sys/unix/README.md @@ -156,7 +156,7 @@ from the generated architecture-specific files listed below, and merge these into a common file for each OS. The merge is performed in the following steps: -1. Construct the set of common code that is idential in all architecture-specific files. +1. Construct the set of common code that is identical in all architecture-specific files. 2. Write this common code to the merged file. 3. Remove the common code from all architecture-specific files. diff --git a/vendor/golang.org/x/sys/unix/mkerrors.sh b/vendor/golang.org/x/sys/unix/mkerrors.sh index d07dd09e..ac54ecab 100644 --- a/vendor/golang.org/x/sys/unix/mkerrors.sh +++ b/vendor/golang.org/x/sys/unix/mkerrors.sh @@ -552,6 +552,7 @@ ccflags="$@" $2 !~ /^RTC_VL_(ACCURACY|BACKUP|DATA)/ && $2 ~ /^(NETLINK|NLM|NLMSG|NLA|IFA|IFAN|RT|RTC|RTCF|RTN|RTPROT|RTNH|ARPHRD|ETH_P|NETNSA)_/ || $2 ~ /^SOCK_|SK_DIAG_|SKNLGRP_$/ || + $2 ~ /^(CONNECT|SAE)_/ || $2 ~ /^FIORDCHK$/ || $2 ~ /^SIOC/ || $2 ~ /^TIOC/ || @@ -655,7 +656,7 @@ errors=$( signals=$( echo '#include ' | $CC -x c - -E -dM $ccflags | awk '$1=="#define" && $2 ~ /^SIG[A-Z0-9]+$/ { print $2 }' | - grep -v 'SIGSTKSIZE\|SIGSTKSZ\|SIGRT\|SIGMAX64' | + grep -E -v '(SIGSTKSIZE|SIGSTKSZ|SIGRT|SIGMAX64)' | sort ) @@ -665,7 +666,7 @@ echo '#include ' | $CC -x c - -E -dM $ccflags | sort >_error.grep echo '#include ' | $CC -x c - -E -dM $ccflags | awk '$1=="#define" && $2 ~ /^SIG[A-Z0-9]+$/ { print "^\t" $2 "[ \t]*=" }' | - grep -v 'SIGSTKSIZE\|SIGSTKSZ\|SIGRT\|SIGMAX64' | + grep -E -v '(SIGSTKSIZE|SIGSTKSZ|SIGRT|SIGMAX64)' | sort >_signal.grep echo '// mkerrors.sh' "$@" diff --git a/vendor/golang.org/x/sys/unix/syscall_aix.go b/vendor/golang.org/x/sys/unix/syscall_aix.go index 67ce6cef..6f15ba1e 100644 --- a/vendor/golang.org/x/sys/unix/syscall_aix.go +++ b/vendor/golang.org/x/sys/unix/syscall_aix.go @@ -360,7 +360,7 @@ func Wait4(pid int, wstatus *WaitStatus, options int, rusage *Rusage) (wpid int, var status _C_int var r Pid_t err = ERESTART - // AIX wait4 may return with ERESTART errno, while the processus is still + // AIX wait4 may return with ERESTART errno, while the process is still // active. for err == ERESTART { r, err = wait4(Pid_t(pid), &status, options, rusage) diff --git a/vendor/golang.org/x/sys/unix/syscall_darwin.go b/vendor/golang.org/x/sys/unix/syscall_darwin.go index 2d15200a..099867de 100644 --- a/vendor/golang.org/x/sys/unix/syscall_darwin.go +++ b/vendor/golang.org/x/sys/unix/syscall_darwin.go @@ -566,6 +566,43 @@ func PthreadFchdir(fd int) (err error) { return pthread_fchdir_np(fd) } +// Connectx calls connectx(2) to initiate a connection on a socket. +// +// srcIf, srcAddr, and dstAddr are filled into a [SaEndpoints] struct and passed as the endpoints argument. +// +// - srcIf is the optional source interface index. 0 means unspecified. +// - srcAddr is the optional source address. nil means unspecified. +// - dstAddr is the destination address. +// +// On success, Connectx returns the number of bytes enqueued for transmission. +func Connectx(fd int, srcIf uint32, srcAddr, dstAddr Sockaddr, associd SaeAssocID, flags uint32, iov []Iovec, connid *SaeConnID) (n uintptr, err error) { + endpoints := SaEndpoints{ + Srcif: srcIf, + } + + if srcAddr != nil { + addrp, addrlen, err := srcAddr.sockaddr() + if err != nil { + return 0, err + } + endpoints.Srcaddr = (*RawSockaddr)(addrp) + endpoints.Srcaddrlen = uint32(addrlen) + } + + if dstAddr != nil { + addrp, addrlen, err := dstAddr.sockaddr() + if err != nil { + return 0, err + } + endpoints.Dstaddr = (*RawSockaddr)(addrp) + endpoints.Dstaddrlen = uint32(addrlen) + } + + err = connectx(fd, &endpoints, associd, flags, iov, &n, connid) + return +} + +//sys connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) //sys sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) //sys shmat(id int, addr uintptr, flag int) (ret uintptr, err error) diff --git a/vendor/golang.org/x/sys/unix/syscall_hurd.go b/vendor/golang.org/x/sys/unix/syscall_hurd.go index ba46651f..a6a2d2fc 100644 --- a/vendor/golang.org/x/sys/unix/syscall_hurd.go +++ b/vendor/golang.org/x/sys/unix/syscall_hurd.go @@ -11,6 +11,7 @@ package unix int ioctl(int, unsigned long int, uintptr_t); */ import "C" +import "unsafe" func ioctl(fd int, req uint, arg uintptr) (err error) { r0, er := C.ioctl(C.int(fd), C.ulong(req), C.uintptr_t(arg)) diff --git a/vendor/golang.org/x/sys/unix/syscall_linux.go b/vendor/golang.org/x/sys/unix/syscall_linux.go index 3f1d3d4c..f08abd43 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux.go @@ -1295,6 +1295,48 @@ func GetsockoptTCPInfo(fd, level, opt int) (*TCPInfo, error) { return &value, err } +// GetsockoptTCPCCVegasInfo returns algorithm specific congestion control information for a socket using the "vegas" +// algorithm. +// +// The socket's congestion control algorighm can be retrieved via [GetsockoptString] with the [TCP_CONGESTION] option: +// +// algo, err := unix.GetsockoptString(fd, unix.IPPROTO_TCP, unix.TCP_CONGESTION) +func GetsockoptTCPCCVegasInfo(fd, level, opt int) (*TCPVegasInfo, error) { + var value [SizeofTCPCCInfo / 4]uint32 // ensure proper alignment + vallen := _Socklen(SizeofTCPCCInfo) + err := getsockopt(fd, level, opt, unsafe.Pointer(&value[0]), &vallen) + out := (*TCPVegasInfo)(unsafe.Pointer(&value[0])) + return out, err +} + +// GetsockoptTCPCCDCTCPInfo returns algorithm specific congestion control information for a socket using the "dctp" +// algorithm. +// +// The socket's congestion control algorighm can be retrieved via [GetsockoptString] with the [TCP_CONGESTION] option: +// +// algo, err := unix.GetsockoptString(fd, unix.IPPROTO_TCP, unix.TCP_CONGESTION) +func GetsockoptTCPCCDCTCPInfo(fd, level, opt int) (*TCPDCTCPInfo, error) { + var value [SizeofTCPCCInfo / 4]uint32 // ensure proper alignment + vallen := _Socklen(SizeofTCPCCInfo) + err := getsockopt(fd, level, opt, unsafe.Pointer(&value[0]), &vallen) + out := (*TCPDCTCPInfo)(unsafe.Pointer(&value[0])) + return out, err +} + +// GetsockoptTCPCCBBRInfo returns algorithm specific congestion control information for a socket using the "bbr" +// algorithm. +// +// The socket's congestion control algorighm can be retrieved via [GetsockoptString] with the [TCP_CONGESTION] option: +// +// algo, err := unix.GetsockoptString(fd, unix.IPPROTO_TCP, unix.TCP_CONGESTION) +func GetsockoptTCPCCBBRInfo(fd, level, opt int) (*TCPBBRInfo, error) { + var value [SizeofTCPCCInfo / 4]uint32 // ensure proper alignment + vallen := _Socklen(SizeofTCPCCInfo) + err := getsockopt(fd, level, opt, unsafe.Pointer(&value[0]), &vallen) + out := (*TCPBBRInfo)(unsafe.Pointer(&value[0])) + return out, err +} + // GetsockoptString returns the string value of the socket option opt for the // socket associated with fd at the given socket level. func GetsockoptString(fd, level, opt int) (string, error) { @@ -1959,7 +2001,26 @@ func Getpgrp() (pid int) { //sysnb Getpid() (pid int) //sysnb Getppid() (ppid int) //sys Getpriority(which int, who int) (prio int, err error) -//sys Getrandom(buf []byte, flags int) (n int, err error) + +func Getrandom(buf []byte, flags int) (n int, err error) { + vdsoRet, supported := vgetrandom(buf, uint32(flags)) + if supported { + if vdsoRet < 0 { + return 0, errnoErr(syscall.Errno(-vdsoRet)) + } + return vdsoRet, nil + } + var p *byte + if len(buf) > 0 { + p = &buf[0] + } + r, _, e := Syscall(SYS_GETRANDOM, uintptr(unsafe.Pointer(p)), uintptr(len(buf)), uintptr(flags)) + if e != 0 { + return 0, errnoErr(e) + } + return int(r), nil +} + //sysnb Getrusage(who int, rusage *Rusage) (err error) //sysnb Getsid(pid int) (sid int, err error) //sysnb Gettid() (tid int) diff --git a/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go b/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go index cf2ee6c7..745e5c7e 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go @@ -182,3 +182,5 @@ func KexecFileLoad(kernelFd int, initrdFd int, cmdline string, flags int) error } return kexecFileLoad(kernelFd, initrdFd, cmdlineLen, cmdline, flags) } + +const SYS_FSTATAT = SYS_NEWFSTATAT diff --git a/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go b/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go index 3d0e9845..dd2262a4 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go @@ -214,3 +214,5 @@ func KexecFileLoad(kernelFd int, initrdFd int, cmdline string, flags int) error } return kexecFileLoad(kernelFd, initrdFd, cmdlineLen, cmdline, flags) } + +const SYS_FSTATAT = SYS_NEWFSTATAT diff --git a/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go b/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go index 6f5a2889..8cf3670b 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go @@ -187,3 +187,5 @@ func RISCVHWProbe(pairs []RISCVHWProbePairs, set *CPUSet, flags uint) (err error } return riscvHWProbe(pairs, setSize, set, flags) } + +const SYS_FSTATAT = SYS_NEWFSTATAT diff --git a/vendor/golang.org/x/sys/unix/vgetrandom_linux.go b/vendor/golang.org/x/sys/unix/vgetrandom_linux.go new file mode 100644 index 00000000..07ac8e09 --- /dev/null +++ b/vendor/golang.org/x/sys/unix/vgetrandom_linux.go @@ -0,0 +1,13 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build linux && go1.24 + +package unix + +import _ "unsafe" + +//go:linkname vgetrandom runtime.vgetrandom +//go:noescape +func vgetrandom(p []byte, flags uint32) (ret int, supported bool) diff --git a/vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go b/vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go new file mode 100644 index 00000000..297e97bc --- /dev/null +++ b/vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go @@ -0,0 +1,11 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !linux || !go1.24 + +package unix + +func vgetrandom(p []byte, flags uint32) (ret int, supported bool) { + return -1, false +} diff --git a/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go b/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go index 4308ac17..d73c4652 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go @@ -237,6 +237,9 @@ const ( CLOCK_UPTIME_RAW_APPROX = 0x9 CLONE_NOFOLLOW = 0x1 CLONE_NOOWNERCOPY = 0x2 + CONNECT_DATA_AUTHENTICATED = 0x4 + CONNECT_DATA_IDEMPOTENT = 0x2 + CONNECT_RESUME_ON_READ_WRITE = 0x1 CR0 = 0x0 CR1 = 0x1000 CR2 = 0x2000 @@ -1265,6 +1268,10 @@ const ( RTV_SSTHRESH = 0x20 RUSAGE_CHILDREN = -0x1 RUSAGE_SELF = 0x0 + SAE_ASSOCID_ALL = 0xffffffff + SAE_ASSOCID_ANY = 0x0 + SAE_CONNID_ALL = 0xffffffff + SAE_CONNID_ANY = 0x0 SCM_CREDS = 0x3 SCM_RIGHTS = 0x1 SCM_TIMESTAMP = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go b/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go index c8068a7a..4a55a400 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go @@ -237,6 +237,9 @@ const ( CLOCK_UPTIME_RAW_APPROX = 0x9 CLONE_NOFOLLOW = 0x1 CLONE_NOOWNERCOPY = 0x2 + CONNECT_DATA_AUTHENTICATED = 0x4 + CONNECT_DATA_IDEMPOTENT = 0x2 + CONNECT_RESUME_ON_READ_WRITE = 0x1 CR0 = 0x0 CR1 = 0x1000 CR2 = 0x2000 @@ -1265,6 +1268,10 @@ const ( RTV_SSTHRESH = 0x20 RUSAGE_CHILDREN = -0x1 RUSAGE_SELF = 0x0 + SAE_ASSOCID_ALL = 0xffffffff + SAE_ASSOCID_ANY = 0x0 + SAE_CONNID_ALL = 0xffffffff + SAE_CONNID_ANY = 0x0 SCM_CREDS = 0x3 SCM_RIGHTS = 0x1 SCM_TIMESTAMP = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux.go b/vendor/golang.org/x/sys/unix/zerrors_linux.go index 01a70b24..de3b4624 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux.go @@ -495,6 +495,7 @@ const ( BPF_F_TEST_REG_INVARIANTS = 0x80 BPF_F_TEST_RND_HI32 = 0x4 BPF_F_TEST_RUN_ON_CPU = 0x1 + BPF_F_TEST_SKB_CHECKSUM_COMPLETE = 0x4 BPF_F_TEST_STATE_FREQ = 0x8 BPF_F_TEST_XDP_LIVE_FRAMES = 0x2 BPF_F_XDP_DEV_BOUND_ONLY = 0x40 @@ -1922,6 +1923,7 @@ const ( MNT_EXPIRE = 0x4 MNT_FORCE = 0x1 MNT_ID_REQ_SIZE_VER0 = 0x18 + MNT_ID_REQ_SIZE_VER1 = 0x20 MODULE_INIT_COMPRESSED_FILE = 0x4 MODULE_INIT_IGNORE_MODVERSIONS = 0x1 MODULE_INIT_IGNORE_VERMAGIC = 0x2 @@ -2187,7 +2189,7 @@ const ( NFT_REG_SIZE = 0x10 NFT_REJECT_ICMPX_MAX = 0x3 NFT_RT_MAX = 0x4 - NFT_SECMARK_CTX_MAXLEN = 0x100 + NFT_SECMARK_CTX_MAXLEN = 0x1000 NFT_SET_MAXNAMELEN = 0x100 NFT_SOCKET_MAX = 0x3 NFT_TABLE_F_MASK = 0x7 @@ -2356,9 +2358,11 @@ const ( PERF_MEM_LVLNUM_IO = 0xa PERF_MEM_LVLNUM_L1 = 0x1 PERF_MEM_LVLNUM_L2 = 0x2 + PERF_MEM_LVLNUM_L2_MHB = 0x5 PERF_MEM_LVLNUM_L3 = 0x3 PERF_MEM_LVLNUM_L4 = 0x4 PERF_MEM_LVLNUM_LFB = 0xc + PERF_MEM_LVLNUM_MSC = 0x6 PERF_MEM_LVLNUM_NA = 0xf PERF_MEM_LVLNUM_PMEM = 0xe PERF_MEM_LVLNUM_RAM = 0xd @@ -2431,6 +2435,7 @@ const ( PRIO_PGRP = 0x1 PRIO_PROCESS = 0x0 PRIO_USER = 0x2 + PROCFS_IOCTL_MAGIC = 'f' PROC_SUPER_MAGIC = 0x9fa0 PROT_EXEC = 0x4 PROT_GROWSDOWN = 0x1000000 @@ -2933,11 +2938,12 @@ const ( RUSAGE_SELF = 0x0 RUSAGE_THREAD = 0x1 RWF_APPEND = 0x10 + RWF_ATOMIC = 0x40 RWF_DSYNC = 0x2 RWF_HIPRI = 0x1 RWF_NOAPPEND = 0x20 RWF_NOWAIT = 0x8 - RWF_SUPPORTED = 0x3f + RWF_SUPPORTED = 0x7f RWF_SYNC = 0x4 RWF_WRITE_LIFE_NOT_SET = 0x0 SCHED_BATCH = 0x3 @@ -3210,6 +3216,7 @@ const ( STATX_ATTR_MOUNT_ROOT = 0x2000 STATX_ATTR_NODUMP = 0x40 STATX_ATTR_VERITY = 0x100000 + STATX_ATTR_WRITE_ATOMIC = 0x400000 STATX_BASIC_STATS = 0x7ff STATX_BLOCKS = 0x400 STATX_BTIME = 0x800 @@ -3226,6 +3233,7 @@ const ( STATX_SUBVOL = 0x8000 STATX_TYPE = 0x1 STATX_UID = 0x8 + STATX_WRITE_ATOMIC = 0x10000 STATX__RESERVED = 0x80000000 SYNC_FILE_RANGE_WAIT_AFTER = 0x4 SYNC_FILE_RANGE_WAIT_BEFORE = 0x1 @@ -3624,6 +3632,7 @@ const ( XDP_UMEM_PGOFF_COMPLETION_RING = 0x180000000 XDP_UMEM_PGOFF_FILL_RING = 0x100000000 XDP_UMEM_REG = 0x4 + XDP_UMEM_TX_METADATA_LEN = 0x4 XDP_UMEM_TX_SW_CSUM = 0x2 XDP_UMEM_UNALIGNED_CHUNK_FLAG = 0x1 XDP_USE_NEED_WAKEUP = 0x8 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_386.go b/vendor/golang.org/x/sys/unix/zerrors_linux_386.go index 684a5168..8aa6d77c 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_386.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_386.go @@ -153,9 +153,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go index 61d74b59..da428f42 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go @@ -153,9 +153,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go b/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go index a28c9e3e..bf45bfec 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go @@ -150,9 +150,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go index ab5d1fe8..71c67162 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go @@ -154,9 +154,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go index c523090e..9476628f 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go @@ -154,9 +154,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go index 01e6ea78..b9e85f3c 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go @@ -150,9 +150,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go index 7aa610b1..a48b68a7 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go @@ -150,9 +150,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go index 92af771b..ea00e852 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go @@ -150,9 +150,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go index b27ef5e6..91c64687 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go @@ -150,9 +150,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go index 237a2cef..8cbf38d6 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go @@ -152,9 +152,14 @@ const ( NL3 = 0x300 NLDLY = 0x300 NOFLSH = 0x80000000 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x4 ONLCR = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go index 4a5c555a..a2df7341 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go @@ -152,9 +152,14 @@ const ( NL3 = 0x300 NLDLY = 0x300 NOFLSH = 0x80000000 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x4 ONLCR = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go index a02fb49a..24791379 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go @@ -152,9 +152,14 @@ const ( NL3 = 0x300 NLDLY = 0x300 NOFLSH = 0x80000000 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x4 ONLCR = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go index e26a7c61..d265f146 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go @@ -150,9 +150,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go b/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go index c48f7c21..3f2d6443 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go @@ -150,9 +150,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go index ad4b9aac..5d8b727a 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go @@ -155,9 +155,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go b/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go index da08b2ab..1ec2b140 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go +++ b/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go @@ -581,6 +581,8 @@ const ( AT_EMPTY_PATH = 0x1000 AT_REMOVEDIR = 0x200 RENAME_NOREPLACE = 1 << 0 + ST_RDONLY = 1 + ST_NOSUID = 2 ) const ( diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go index b622533e..24b346e1 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go @@ -841,6 +841,26 @@ var libc_pthread_fchdir_np_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) { + var _p0 unsafe.Pointer + if len(iov) > 0 { + _p0 = unsafe.Pointer(&iov[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + _, _, e1 := syscall_syscall9(libc_connectx_trampoline_addr, uintptr(fd), uintptr(unsafe.Pointer(endpoints)), uintptr(associd), uintptr(flags), uintptr(_p0), uintptr(len(iov)), uintptr(unsafe.Pointer(n)), uintptr(unsafe.Pointer(connid)), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_connectx_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_connectx connectx "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) { _, _, e1 := syscall_syscall6(libc_sendfile_trampoline_addr, uintptr(infd), uintptr(outfd), uintptr(offset), uintptr(unsafe.Pointer(len)), uintptr(hdtr), uintptr(flags)) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s index cfe6646b..ebd21310 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s @@ -248,6 +248,11 @@ TEXT libc_pthread_fchdir_np_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_pthread_fchdir_np_trampoline_addr(SB), RODATA, $8 DATA ·libc_pthread_fchdir_np_trampoline_addr(SB)/8, $libc_pthread_fchdir_np_trampoline<>(SB) +TEXT libc_connectx_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_connectx(SB) +GLOBL ·libc_connectx_trampoline_addr(SB), RODATA, $8 +DATA ·libc_connectx_trampoline_addr(SB)/8, $libc_connectx_trampoline<>(SB) + TEXT libc_sendfile_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sendfile(SB) GLOBL ·libc_sendfile_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go index 13f624f6..824b9c2d 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go @@ -841,6 +841,26 @@ var libc_pthread_fchdir_np_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) { + var _p0 unsafe.Pointer + if len(iov) > 0 { + _p0 = unsafe.Pointer(&iov[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + _, _, e1 := syscall_syscall9(libc_connectx_trampoline_addr, uintptr(fd), uintptr(unsafe.Pointer(endpoints)), uintptr(associd), uintptr(flags), uintptr(_p0), uintptr(len(iov)), uintptr(unsafe.Pointer(n)), uintptr(unsafe.Pointer(connid)), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_connectx_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_connectx connectx "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) { _, _, e1 := syscall_syscall6(libc_sendfile_trampoline_addr, uintptr(infd), uintptr(outfd), uintptr(offset), uintptr(unsafe.Pointer(len)), uintptr(hdtr), uintptr(flags)) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s index fe222b75..4f178a22 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s @@ -248,6 +248,11 @@ TEXT libc_pthread_fchdir_np_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_pthread_fchdir_np_trampoline_addr(SB), RODATA, $8 DATA ·libc_pthread_fchdir_np_trampoline_addr(SB)/8, $libc_pthread_fchdir_np_trampoline<>(SB) +TEXT libc_connectx_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_connectx(SB) +GLOBL ·libc_connectx_trampoline_addr(SB), RODATA, $8 +DATA ·libc_connectx_trampoline_addr(SB)/8, $libc_connectx_trampoline<>(SB) + TEXT libc_sendfile_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sendfile(SB) GLOBL ·libc_sendfile_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux.go b/vendor/golang.org/x/sys/unix/zsyscall_linux.go index 1bc1a5ad..af30da55 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux.go @@ -971,23 +971,6 @@ func Getpriority(which int, who int) (prio int, err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Getrandom(buf []byte, flags int) (n int, err error) { - var _p0 unsafe.Pointer - if len(buf) > 0 { - _p0 = unsafe.Pointer(&buf[0]) - } else { - _p0 = unsafe.Pointer(&_zero) - } - r0, _, e1 := Syscall(SYS_GETRANDOM, uintptr(_p0), uintptr(len(buf)), uintptr(flags)) - n = int(r0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Getrusage(who int, rusage *Rusage) (err error) { _, _, e1 := RawSyscall(SYS_GETRUSAGE, uintptr(who), uintptr(unsafe.Pointer(rusage)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go index d3e38f68..f485dbf4 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go @@ -341,6 +341,7 @@ const ( SYS_STATX = 332 SYS_IO_PGETEVENTS = 333 SYS_RSEQ = 334 + SYS_URETPROBE = 335 SYS_PIDFD_SEND_SIGNAL = 424 SYS_IO_URING_SETUP = 425 SYS_IO_URING_ENTER = 426 diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go index 6c778c23..1893e2fe 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go @@ -85,7 +85,7 @@ const ( SYS_SPLICE = 76 SYS_TEE = 77 SYS_READLINKAT = 78 - SYS_FSTATAT = 79 + SYS_NEWFSTATAT = 79 SYS_FSTAT = 80 SYS_SYNC = 81 SYS_FSYNC = 82 diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go index 37281cf5..16a4017d 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go @@ -84,6 +84,8 @@ const ( SYS_SPLICE = 76 SYS_TEE = 77 SYS_READLINKAT = 78 + SYS_NEWFSTATAT = 79 + SYS_FSTAT = 80 SYS_SYNC = 81 SYS_FSYNC = 82 SYS_FDATASYNC = 83 diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go index 9889f6a5..a5459e76 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go @@ -84,7 +84,7 @@ const ( SYS_SPLICE = 76 SYS_TEE = 77 SYS_READLINKAT = 78 - SYS_FSTATAT = 79 + SYS_NEWFSTATAT = 79 SYS_FSTAT = 80 SYS_SYNC = 81 SYS_FSYNC = 82 diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go index 091d107f..d003c3d4 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go @@ -306,6 +306,19 @@ type XVSockPgen struct { type _Socklen uint32 +type SaeAssocID uint32 + +type SaeConnID uint32 + +type SaEndpoints struct { + Srcif uint32 + Srcaddr *RawSockaddr + Srcaddrlen uint32 + Dstaddr *RawSockaddr + Dstaddrlen uint32 + _ [4]byte +} + type Xucred struct { Version uint32 Uid uint32 diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go index 28ff4ef7..0d45a941 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go @@ -306,6 +306,19 @@ type XVSockPgen struct { type _Socklen uint32 +type SaeAssocID uint32 + +type SaeConnID uint32 + +type SaEndpoints struct { + Srcif uint32 + Srcaddr *RawSockaddr + Srcaddrlen uint32 + Dstaddr *RawSockaddr + Dstaddrlen uint32 + _ [4]byte +} + type Xucred struct { Version uint32 Uid uint32 diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go index 6cbd094a..51e13eb0 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go @@ -625,6 +625,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go index 7c03b6ee..d002d8ef 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go @@ -630,6 +630,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go index 422107ee..3f863d89 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go @@ -616,6 +616,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go index 505a12ac..61c72931 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go @@ -610,6 +610,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go index cc986c79..b5d17414 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go @@ -612,6 +612,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux.go b/vendor/golang.org/x/sys/unix/ztypes_linux.go index 7f1961b9..3a69e454 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux.go @@ -87,31 +87,35 @@ type StatxTimestamp struct { } type Statx_t struct { - Mask uint32 - Blksize uint32 - Attributes uint64 - Nlink uint32 - Uid uint32 - Gid uint32 - Mode uint16 - _ [1]uint16 - Ino uint64 - Size uint64 - Blocks uint64 - Attributes_mask uint64 - Atime StatxTimestamp - Btime StatxTimestamp - Ctime StatxTimestamp - Mtime StatxTimestamp - Rdev_major uint32 - Rdev_minor uint32 - Dev_major uint32 - Dev_minor uint32 - Mnt_id uint64 - Dio_mem_align uint32 - Dio_offset_align uint32 - Subvol uint64 - _ [11]uint64 + Mask uint32 + Blksize uint32 + Attributes uint64 + Nlink uint32 + Uid uint32 + Gid uint32 + Mode uint16 + _ [1]uint16 + Ino uint64 + Size uint64 + Blocks uint64 + Attributes_mask uint64 + Atime StatxTimestamp + Btime StatxTimestamp + Ctime StatxTimestamp + Mtime StatxTimestamp + Rdev_major uint32 + Rdev_minor uint32 + Dev_major uint32 + Dev_minor uint32 + Mnt_id uint64 + Dio_mem_align uint32 + Dio_offset_align uint32 + Subvol uint64 + Atomic_write_unit_min uint32 + Atomic_write_unit_max uint32 + Atomic_write_segments_max uint32 + _ [1]uint32 + _ [9]uint64 } type Fsid struct { @@ -516,6 +520,29 @@ type TCPInfo struct { Total_rto_time uint32 } +type TCPVegasInfo struct { + Enabled uint32 + Rttcnt uint32 + Rtt uint32 + Minrtt uint32 +} + +type TCPDCTCPInfo struct { + Enabled uint16 + Ce_state uint16 + Alpha uint32 + Ab_ecn uint32 + Ab_tot uint32 +} + +type TCPBBRInfo struct { + Bw_lo uint32 + Bw_hi uint32 + Min_rtt uint32 + Pacing_gain uint32 + Cwnd_gain uint32 +} + type CanFilter struct { Id uint32 Mask uint32 @@ -557,6 +584,7 @@ const ( SizeofICMPv6Filter = 0x20 SizeofUcred = 0xc SizeofTCPInfo = 0xf8 + SizeofTCPCCInfo = 0x14 SizeofCanFilter = 0x8 SizeofTCPRepairOpt = 0x8 ) @@ -2486,7 +2514,7 @@ type XDPMmapOffsets struct { type XDPUmemReg struct { Addr uint64 Len uint64 - Chunk_size uint32 + Size uint32 Headroom uint32 Flags uint32 Tx_metadata_len uint32 @@ -3766,7 +3794,7 @@ const ( ETHTOOL_MSG_PSE_GET = 0x24 ETHTOOL_MSG_PSE_SET = 0x25 ETHTOOL_MSG_RSS_GET = 0x26 - ETHTOOL_MSG_USER_MAX = 0x2b + ETHTOOL_MSG_USER_MAX = 0x2c ETHTOOL_MSG_KERNEL_NONE = 0x0 ETHTOOL_MSG_STRSET_GET_REPLY = 0x1 ETHTOOL_MSG_LINKINFO_GET_REPLY = 0x2 @@ -3806,7 +3834,7 @@ const ( ETHTOOL_MSG_MODULE_NTF = 0x24 ETHTOOL_MSG_PSE_GET_REPLY = 0x25 ETHTOOL_MSG_RSS_GET_REPLY = 0x26 - ETHTOOL_MSG_KERNEL_MAX = 0x2b + ETHTOOL_MSG_KERNEL_MAX = 0x2c ETHTOOL_FLAG_COMPACT_BITSETS = 0x1 ETHTOOL_FLAG_OMIT_REPLY = 0x2 ETHTOOL_FLAG_STATS = 0x4 @@ -3951,7 +3979,7 @@ const ( ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL = 0x17 ETHTOOL_A_COALESCE_USE_CQE_MODE_TX = 0x18 ETHTOOL_A_COALESCE_USE_CQE_MODE_RX = 0x19 - ETHTOOL_A_COALESCE_MAX = 0x1c + ETHTOOL_A_COALESCE_MAX = 0x1e ETHTOOL_A_PAUSE_UNSPEC = 0x0 ETHTOOL_A_PAUSE_HEADER = 0x1 ETHTOOL_A_PAUSE_AUTONEG = 0x2 @@ -4609,7 +4637,7 @@ const ( NL80211_ATTR_MAC_HINT = 0xc8 NL80211_ATTR_MAC_MASK = 0xd7 NL80211_ATTR_MAX_AP_ASSOC_STA = 0xca - NL80211_ATTR_MAX = 0x14a + NL80211_ATTR_MAX = 0x14c NL80211_ATTR_MAX_CRIT_PROT_DURATION = 0xb4 NL80211_ATTR_MAX_CSA_COUNTERS = 0xce NL80211_ATTR_MAX_MATCH_SETS = 0x85 @@ -5213,7 +5241,7 @@ const ( NL80211_FREQUENCY_ATTR_GO_CONCURRENT = 0xf NL80211_FREQUENCY_ATTR_INDOOR_ONLY = 0xe NL80211_FREQUENCY_ATTR_IR_CONCURRENT = 0xf - NL80211_FREQUENCY_ATTR_MAX = 0x20 + NL80211_FREQUENCY_ATTR_MAX = 0x21 NL80211_FREQUENCY_ATTR_MAX_TX_POWER = 0x6 NL80211_FREQUENCY_ATTR_NO_10MHZ = 0x11 NL80211_FREQUENCY_ATTR_NO_160MHZ = 0xc diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go index 15adc041..ad05b51a 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go @@ -727,6 +727,37 @@ const ( RISCV_HWPROBE_EXT_ZBA = 0x8 RISCV_HWPROBE_EXT_ZBB = 0x10 RISCV_HWPROBE_EXT_ZBS = 0x20 + RISCV_HWPROBE_EXT_ZICBOZ = 0x40 + RISCV_HWPROBE_EXT_ZBC = 0x80 + RISCV_HWPROBE_EXT_ZBKB = 0x100 + RISCV_HWPROBE_EXT_ZBKC = 0x200 + RISCV_HWPROBE_EXT_ZBKX = 0x400 + RISCV_HWPROBE_EXT_ZKND = 0x800 + RISCV_HWPROBE_EXT_ZKNE = 0x1000 + RISCV_HWPROBE_EXT_ZKNH = 0x2000 + RISCV_HWPROBE_EXT_ZKSED = 0x4000 + RISCV_HWPROBE_EXT_ZKSH = 0x8000 + RISCV_HWPROBE_EXT_ZKT = 0x10000 + RISCV_HWPROBE_EXT_ZVBB = 0x20000 + RISCV_HWPROBE_EXT_ZVBC = 0x40000 + RISCV_HWPROBE_EXT_ZVKB = 0x80000 + RISCV_HWPROBE_EXT_ZVKG = 0x100000 + RISCV_HWPROBE_EXT_ZVKNED = 0x200000 + RISCV_HWPROBE_EXT_ZVKNHA = 0x400000 + RISCV_HWPROBE_EXT_ZVKNHB = 0x800000 + RISCV_HWPROBE_EXT_ZVKSED = 0x1000000 + RISCV_HWPROBE_EXT_ZVKSH = 0x2000000 + RISCV_HWPROBE_EXT_ZVKT = 0x4000000 + RISCV_HWPROBE_EXT_ZFH = 0x8000000 + RISCV_HWPROBE_EXT_ZFHMIN = 0x10000000 + RISCV_HWPROBE_EXT_ZIHINTNTL = 0x20000000 + RISCV_HWPROBE_EXT_ZVFH = 0x40000000 + RISCV_HWPROBE_EXT_ZVFHMIN = 0x80000000 + RISCV_HWPROBE_EXT_ZFA = 0x100000000 + RISCV_HWPROBE_EXT_ZTSO = 0x200000000 + RISCV_HWPROBE_EXT_ZACAS = 0x400000000 + RISCV_HWPROBE_EXT_ZICOND = 0x800000000 + RISCV_HWPROBE_EXT_ZIHINTPAUSE = 0x1000000000 RISCV_HWPROBE_KEY_CPUPERF_0 = 0x5 RISCV_HWPROBE_MISALIGNED_UNKNOWN = 0x0 RISCV_HWPROBE_MISALIGNED_EMULATED = 0x1 @@ -734,4 +765,6 @@ const ( RISCV_HWPROBE_MISALIGNED_FAST = 0x3 RISCV_HWPROBE_MISALIGNED_UNSUPPORTED = 0x4 RISCV_HWPROBE_MISALIGNED_MASK = 0x7 + RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE = 0x6 + RISCV_HWPROBE_WHICH_CPUS = 0x1 ) diff --git a/vendor/golang.org/x/sys/windows/dll_windows.go b/vendor/golang.org/x/sys/windows/dll_windows.go index 115341fb..4e613cf6 100644 --- a/vendor/golang.org/x/sys/windows/dll_windows.go +++ b/vendor/golang.org/x/sys/windows/dll_windows.go @@ -65,7 +65,7 @@ func LoadDLL(name string) (dll *DLL, err error) { return d, nil } -// MustLoadDLL is like LoadDLL but panics if load operation failes. +// MustLoadDLL is like LoadDLL but panics if load operation fails. func MustLoadDLL(name string) *DLL { d, e := LoadDLL(name) if e != nil { diff --git a/vendor/golang.org/x/sys/windows/syscall_windows.go b/vendor/golang.org/x/sys/windows/syscall_windows.go index 1fa34fd1..5cee9a31 100644 --- a/vendor/golang.org/x/sys/windows/syscall_windows.go +++ b/vendor/golang.org/x/sys/windows/syscall_windows.go @@ -313,6 +313,10 @@ func NewCallbackCDecl(fn interface{}) uintptr { //sys SetConsoleMode(console Handle, mode uint32) (err error) = kernel32.SetConsoleMode //sys GetConsoleScreenBufferInfo(console Handle, info *ConsoleScreenBufferInfo) (err error) = kernel32.GetConsoleScreenBufferInfo //sys setConsoleCursorPosition(console Handle, position uint32) (err error) = kernel32.SetConsoleCursorPosition +//sys GetConsoleCP() (cp uint32, err error) = kernel32.GetConsoleCP +//sys GetConsoleOutputCP() (cp uint32, err error) = kernel32.GetConsoleOutputCP +//sys SetConsoleCP(cp uint32) (err error) = kernel32.SetConsoleCP +//sys SetConsoleOutputCP(cp uint32) (err error) = kernel32.SetConsoleOutputCP //sys WriteConsole(console Handle, buf *uint16, towrite uint32, written *uint32, reserved *byte) (err error) = kernel32.WriteConsoleW //sys ReadConsole(console Handle, buf *uint16, toread uint32, read *uint32, inputControl *byte) (err error) = kernel32.ReadConsoleW //sys resizePseudoConsole(pconsole Handle, size uint32) (hr error) = kernel32.ResizePseudoConsole diff --git a/vendor/golang.org/x/sys/windows/types_windows.go b/vendor/golang.org/x/sys/windows/types_windows.go index 3f03b3d5..7b97a154 100644 --- a/vendor/golang.org/x/sys/windows/types_windows.go +++ b/vendor/golang.org/x/sys/windows/types_windows.go @@ -1060,6 +1060,7 @@ const ( SIO_GET_EXTENSION_FUNCTION_POINTER = IOC_INOUT | IOC_WS2 | 6 SIO_KEEPALIVE_VALS = IOC_IN | IOC_VENDOR | 4 SIO_UDP_CONNRESET = IOC_IN | IOC_VENDOR | 12 + SIO_UDP_NETRESET = IOC_IN | IOC_VENDOR | 15 // cf. http://support.microsoft.com/default.aspx?scid=kb;en-us;257460 diff --git a/vendor/golang.org/x/sys/windows/zsyscall_windows.go b/vendor/golang.org/x/sys/windows/zsyscall_windows.go index 9bb979a3..4c2e1bdc 100644 --- a/vendor/golang.org/x/sys/windows/zsyscall_windows.go +++ b/vendor/golang.org/x/sys/windows/zsyscall_windows.go @@ -247,7 +247,9 @@ var ( procGetCommandLineW = modkernel32.NewProc("GetCommandLineW") procGetComputerNameExW = modkernel32.NewProc("GetComputerNameExW") procGetComputerNameW = modkernel32.NewProc("GetComputerNameW") + procGetConsoleCP = modkernel32.NewProc("GetConsoleCP") procGetConsoleMode = modkernel32.NewProc("GetConsoleMode") + procGetConsoleOutputCP = modkernel32.NewProc("GetConsoleOutputCP") procGetConsoleScreenBufferInfo = modkernel32.NewProc("GetConsoleScreenBufferInfo") procGetCurrentDirectoryW = modkernel32.NewProc("GetCurrentDirectoryW") procGetCurrentProcessId = modkernel32.NewProc("GetCurrentProcessId") @@ -347,8 +349,10 @@ var ( procSetCommMask = modkernel32.NewProc("SetCommMask") procSetCommState = modkernel32.NewProc("SetCommState") procSetCommTimeouts = modkernel32.NewProc("SetCommTimeouts") + procSetConsoleCP = modkernel32.NewProc("SetConsoleCP") procSetConsoleCursorPosition = modkernel32.NewProc("SetConsoleCursorPosition") procSetConsoleMode = modkernel32.NewProc("SetConsoleMode") + procSetConsoleOutputCP = modkernel32.NewProc("SetConsoleOutputCP") procSetCurrentDirectoryW = modkernel32.NewProc("SetCurrentDirectoryW") procSetDefaultDllDirectories = modkernel32.NewProc("SetDefaultDllDirectories") procSetDllDirectoryW = modkernel32.NewProc("SetDllDirectoryW") @@ -2162,6 +2166,15 @@ func GetComputerName(buf *uint16, n *uint32) (err error) { return } +func GetConsoleCP() (cp uint32, err error) { + r0, _, e1 := syscall.Syscall(procGetConsoleCP.Addr(), 0, 0, 0, 0) + cp = uint32(r0) + if cp == 0 { + err = errnoErr(e1) + } + return +} + func GetConsoleMode(console Handle, mode *uint32) (err error) { r1, _, e1 := syscall.Syscall(procGetConsoleMode.Addr(), 2, uintptr(console), uintptr(unsafe.Pointer(mode)), 0) if r1 == 0 { @@ -2170,6 +2183,15 @@ func GetConsoleMode(console Handle, mode *uint32) (err error) { return } +func GetConsoleOutputCP() (cp uint32, err error) { + r0, _, e1 := syscall.Syscall(procGetConsoleOutputCP.Addr(), 0, 0, 0, 0) + cp = uint32(r0) + if cp == 0 { + err = errnoErr(e1) + } + return +} + func GetConsoleScreenBufferInfo(console Handle, info *ConsoleScreenBufferInfo) (err error) { r1, _, e1 := syscall.Syscall(procGetConsoleScreenBufferInfo.Addr(), 2, uintptr(console), uintptr(unsafe.Pointer(info)), 0) if r1 == 0 { @@ -3038,6 +3060,14 @@ func SetCommTimeouts(handle Handle, timeouts *CommTimeouts) (err error) { return } +func SetConsoleCP(cp uint32) (err error) { + r1, _, e1 := syscall.Syscall(procSetConsoleCP.Addr(), 1, uintptr(cp), 0, 0) + if r1 == 0 { + err = errnoErr(e1) + } + return +} + func setConsoleCursorPosition(console Handle, position uint32) (err error) { r1, _, e1 := syscall.Syscall(procSetConsoleCursorPosition.Addr(), 2, uintptr(console), uintptr(position), 0) if r1 == 0 { @@ -3054,6 +3084,14 @@ func SetConsoleMode(console Handle, mode uint32) (err error) { return } +func SetConsoleOutputCP(cp uint32) (err error) { + r1, _, e1 := syscall.Syscall(procSetConsoleOutputCP.Addr(), 1, uintptr(cp), 0, 0) + if r1 == 0 { + err = errnoErr(e1) + } + return +} + func SetCurrentDirectory(path *uint16) (err error) { r1, _, e1 := syscall.Syscall(procSetCurrentDirectoryW.Addr(), 1, uintptr(unsafe.Pointer(path)), 0, 0) if r1 == 0 { diff --git a/vendor/golang.org/x/term/term_windows.go b/vendor/golang.org/x/term/term_windows.go index 465f5606..df6bf948 100644 --- a/vendor/golang.org/x/term/term_windows.go +++ b/vendor/golang.org/x/term/term_windows.go @@ -26,6 +26,7 @@ func makeRaw(fd int) (*State, error) { return nil, err } raw := st &^ (windows.ENABLE_ECHO_INPUT | windows.ENABLE_PROCESSED_INPUT | windows.ENABLE_LINE_INPUT | windows.ENABLE_PROCESSED_OUTPUT) + raw |= windows.ENABLE_VIRTUAL_TERMINAL_INPUT if err := windows.SetConsoleMode(windows.Handle(fd), raw); err != nil { return nil, err } diff --git a/vendor/google.golang.org/protobuf/encoding/protojson/decode.go b/vendor/google.golang.org/protobuf/encoding/protojson/decode.go index bb2966e3..8f9e592f 100644 --- a/vendor/google.golang.org/protobuf/encoding/protojson/decode.go +++ b/vendor/google.golang.org/protobuf/encoding/protojson/decode.go @@ -351,7 +351,7 @@ func (d decoder) unmarshalScalar(fd protoreflect.FieldDescriptor) (protoreflect. panic(fmt.Sprintf("unmarshalScalar: invalid scalar kind %v", kind)) } - return protoreflect.Value{}, d.newError(tok.Pos(), "invalid value for %v type: %v", kind, tok.RawString()) + return protoreflect.Value{}, d.newError(tok.Pos(), "invalid value for %v field %v: %v", kind, fd.JSONName(), tok.RawString()) } func unmarshalInt(tok json.Token, bitSize int) (protoreflect.Value, bool) { diff --git a/vendor/google.golang.org/protobuf/encoding/protojson/encode.go b/vendor/google.golang.org/protobuf/encoding/protojson/encode.go index 29846df2..0e72d853 100644 --- a/vendor/google.golang.org/protobuf/encoding/protojson/encode.go +++ b/vendor/google.golang.org/protobuf/encoding/protojson/encode.go @@ -216,9 +216,7 @@ func (m unpopulatedFieldRanger) Range(f func(protoreflect.FieldDescriptor, proto } v := m.Get(fd) - isProto2Scalar := fd.Syntax() == protoreflect.Proto2 && fd.Default().IsValid() - isSingularMessage := fd.Cardinality() != protoreflect.Repeated && fd.Message() != nil - if isProto2Scalar || isSingularMessage { + if fd.HasPresence() { if m.skipNull { continue } diff --git a/vendor/google.golang.org/protobuf/internal/descopts/options.go b/vendor/google.golang.org/protobuf/internal/descopts/options.go index 8401be8c..024ffebd 100644 --- a/vendor/google.golang.org/protobuf/internal/descopts/options.go +++ b/vendor/google.golang.org/protobuf/internal/descopts/options.go @@ -9,7 +9,7 @@ // dependency on the descriptor proto package). package descopts -import pref "google.golang.org/protobuf/reflect/protoreflect" +import "google.golang.org/protobuf/reflect/protoreflect" // These variables are set by the init function in descriptor.pb.go via logic // in internal/filetype. In other words, so long as the descriptor proto package @@ -17,13 +17,13 @@ import pref "google.golang.org/protobuf/reflect/protoreflect" // // Each variable is populated with a nil pointer to the options struct. var ( - File pref.ProtoMessage - Enum pref.ProtoMessage - EnumValue pref.ProtoMessage - Message pref.ProtoMessage - Field pref.ProtoMessage - Oneof pref.ProtoMessage - ExtensionRange pref.ProtoMessage - Service pref.ProtoMessage - Method pref.ProtoMessage + File protoreflect.ProtoMessage + Enum protoreflect.ProtoMessage + EnumValue protoreflect.ProtoMessage + Message protoreflect.ProtoMessage + Field protoreflect.ProtoMessage + Oneof protoreflect.ProtoMessage + ExtensionRange protoreflect.ProtoMessage + Service protoreflect.ProtoMessage + Method protoreflect.ProtoMessage ) diff --git a/vendor/google.golang.org/protobuf/internal/editionssupport/editions.go b/vendor/google.golang.org/protobuf/internal/editionssupport/editions.go index 029a6a12..08dad769 100644 --- a/vendor/google.golang.org/protobuf/internal/editionssupport/editions.go +++ b/vendor/google.golang.org/protobuf/internal/editionssupport/editions.go @@ -5,7 +5,7 @@ // Package editionssupport defines constants for editions that are supported. package editionssupport -import descriptorpb "google.golang.org/protobuf/types/descriptorpb" +import "google.golang.org/protobuf/types/descriptorpb" const ( Minimum = descriptorpb.Edition_EDITION_PROTO2 diff --git a/vendor/google.golang.org/protobuf/internal/filedesc/desc.go b/vendor/google.golang.org/protobuf/internal/filedesc/desc.go index df53ff40..fa790e0f 100644 --- a/vendor/google.golang.org/protobuf/internal/filedesc/desc.go +++ b/vendor/google.golang.org/protobuf/internal/filedesc/desc.go @@ -258,6 +258,7 @@ type ( StringName stringName IsProto3Optional bool // promoted from google.protobuf.FieldDescriptorProto IsWeak bool // promoted from google.protobuf.FieldOptions + IsLazy bool // promoted from google.protobuf.FieldOptions Default defaultValue ContainingOneof protoreflect.OneofDescriptor // must be consistent with Message.Oneofs.Fields Enum protoreflect.EnumDescriptor @@ -351,6 +352,7 @@ func (fd *Field) IsPacked() bool { } func (fd *Field) IsExtension() bool { return false } func (fd *Field) IsWeak() bool { return fd.L1.IsWeak } +func (fd *Field) IsLazy() bool { return fd.L1.IsLazy } func (fd *Field) IsList() bool { return fd.Cardinality() == protoreflect.Repeated && !fd.IsMap() } func (fd *Field) IsMap() bool { return fd.Message() != nil && fd.Message().IsMapEntry() } func (fd *Field) MapKey() protoreflect.FieldDescriptor { @@ -425,6 +427,7 @@ type ( Extendee protoreflect.MessageDescriptor Cardinality protoreflect.Cardinality Kind protoreflect.Kind + IsLazy bool EditionFeatures EditionFeatures } ExtensionL2 struct { @@ -465,6 +468,7 @@ func (xd *Extension) IsPacked() bool { } func (xd *Extension) IsExtension() bool { return true } func (xd *Extension) IsWeak() bool { return false } +func (xd *Extension) IsLazy() bool { return xd.L1.IsLazy } func (xd *Extension) IsList() bool { return xd.Cardinality() == protoreflect.Repeated } func (xd *Extension) IsMap() bool { return false } func (xd *Extension) MapKey() protoreflect.FieldDescriptor { return nil } diff --git a/vendor/google.golang.org/protobuf/internal/filedesc/desc_init.go b/vendor/google.golang.org/protobuf/internal/filedesc/desc_init.go index 8a57d60b..d2f54949 100644 --- a/vendor/google.golang.org/protobuf/internal/filedesc/desc_init.go +++ b/vendor/google.golang.org/protobuf/internal/filedesc/desc_init.go @@ -495,6 +495,8 @@ func (xd *Extension) unmarshalOptions(b []byte) { switch num { case genid.FieldOptions_Packed_field_number: xd.L1.EditionFeatures.IsPacked = protowire.DecodeBool(v) + case genid.FieldOptions_Lazy_field_number: + xd.L1.IsLazy = protowire.DecodeBool(v) } case protowire.BytesType: v, m := protowire.ConsumeBytes(b) diff --git a/vendor/google.golang.org/protobuf/internal/filedesc/desc_lazy.go b/vendor/google.golang.org/protobuf/internal/filedesc/desc_lazy.go index e56c91a8..67a51b32 100644 --- a/vendor/google.golang.org/protobuf/internal/filedesc/desc_lazy.go +++ b/vendor/google.golang.org/protobuf/internal/filedesc/desc_lazy.go @@ -504,6 +504,8 @@ func (fd *Field) unmarshalOptions(b []byte) { fd.L1.EditionFeatures.IsPacked = protowire.DecodeBool(v) case genid.FieldOptions_Weak_field_number: fd.L1.IsWeak = protowire.DecodeBool(v) + case genid.FieldOptions_Lazy_field_number: + fd.L1.IsLazy = protowire.DecodeBool(v) case FieldOptions_EnforceUTF8: fd.L1.EditionFeatures.IsUTF8Validated = protowire.DecodeBool(v) } diff --git a/vendor/google.golang.org/protobuf/internal/filedesc/editions.go b/vendor/google.golang.org/protobuf/internal/filedesc/editions.go index 11f5f356..fd4d0c83 100644 --- a/vendor/google.golang.org/protobuf/internal/filedesc/editions.go +++ b/vendor/google.golang.org/protobuf/internal/filedesc/editions.go @@ -68,7 +68,7 @@ func unmarshalFeatureSet(b []byte, parent EditionFeatures) EditionFeatures { v, m := protowire.ConsumeBytes(b) b = b[m:] switch num { - case genid.GoFeatures_LegacyUnmarshalJsonEnum_field_number: + case genid.FeatureSet_Go_ext_number: parent = unmarshalGoFeature(v, parent) } } diff --git a/vendor/google.golang.org/protobuf/internal/genid/doc.go b/vendor/google.golang.org/protobuf/internal/genid/doc.go index 45ccd012..d9b9d916 100644 --- a/vendor/google.golang.org/protobuf/internal/genid/doc.go +++ b/vendor/google.golang.org/protobuf/internal/genid/doc.go @@ -6,6 +6,6 @@ // and the well-known types. package genid -import protoreflect "google.golang.org/protobuf/reflect/protoreflect" +import "google.golang.org/protobuf/reflect/protoreflect" const GoogleProtobuf_package protoreflect.FullName = "google.protobuf" diff --git a/vendor/google.golang.org/protobuf/internal/genid/go_features_gen.go b/vendor/google.golang.org/protobuf/internal/genid/go_features_gen.go index 9a652a2b..7f67cbb6 100644 --- a/vendor/google.golang.org/protobuf/internal/genid/go_features_gen.go +++ b/vendor/google.golang.org/protobuf/internal/genid/go_features_gen.go @@ -12,20 +12,25 @@ import ( const File_google_protobuf_go_features_proto = "google/protobuf/go_features.proto" -// Names for google.protobuf.GoFeatures. +// Names for pb.GoFeatures. const ( GoFeatures_message_name protoreflect.Name = "GoFeatures" - GoFeatures_message_fullname protoreflect.FullName = "google.protobuf.GoFeatures" + GoFeatures_message_fullname protoreflect.FullName = "pb.GoFeatures" ) -// Field names for google.protobuf.GoFeatures. +// Field names for pb.GoFeatures. const ( GoFeatures_LegacyUnmarshalJsonEnum_field_name protoreflect.Name = "legacy_unmarshal_json_enum" - GoFeatures_LegacyUnmarshalJsonEnum_field_fullname protoreflect.FullName = "google.protobuf.GoFeatures.legacy_unmarshal_json_enum" + GoFeatures_LegacyUnmarshalJsonEnum_field_fullname protoreflect.FullName = "pb.GoFeatures.legacy_unmarshal_json_enum" ) -// Field numbers for google.protobuf.GoFeatures. +// Field numbers for pb.GoFeatures. const ( GoFeatures_LegacyUnmarshalJsonEnum_field_number protoreflect.FieldNumber = 1 ) + +// Extension numbers +const ( + FeatureSet_Go_ext_number protoreflect.FieldNumber = 1002 +) diff --git a/vendor/google.golang.org/protobuf/internal/genid/map_entry.go b/vendor/google.golang.org/protobuf/internal/genid/map_entry.go index 8f9ea02f..bef5a25f 100644 --- a/vendor/google.golang.org/protobuf/internal/genid/map_entry.go +++ b/vendor/google.golang.org/protobuf/internal/genid/map_entry.go @@ -4,7 +4,7 @@ package genid -import protoreflect "google.golang.org/protobuf/reflect/protoreflect" +import "google.golang.org/protobuf/reflect/protoreflect" // Generic field names and numbers for synthetic map entry messages. const ( diff --git a/vendor/google.golang.org/protobuf/internal/genid/wrappers.go b/vendor/google.golang.org/protobuf/internal/genid/wrappers.go index 429384b8..9404270d 100644 --- a/vendor/google.golang.org/protobuf/internal/genid/wrappers.go +++ b/vendor/google.golang.org/protobuf/internal/genid/wrappers.go @@ -4,7 +4,7 @@ package genid -import protoreflect "google.golang.org/protobuf/reflect/protoreflect" +import "google.golang.org/protobuf/reflect/protoreflect" // Generic field name and number for messages in wrappers.proto. const ( diff --git a/vendor/google.golang.org/protobuf/internal/impl/codec_extension.go b/vendor/google.golang.org/protobuf/internal/impl/codec_extension.go index 4bb0a7a2..0d5b546e 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/codec_extension.go +++ b/vendor/google.golang.org/protobuf/internal/impl/codec_extension.go @@ -67,7 +67,6 @@ type lazyExtensionValue struct { xi *extensionFieldInfo value protoreflect.Value b []byte - fn func() protoreflect.Value } type ExtensionField struct { @@ -158,10 +157,9 @@ func (f *ExtensionField) lazyInit() { } f.lazy.value = val } else { - f.lazy.value = f.lazy.fn() + panic("No support for lazy fns for ExtensionField") } f.lazy.xi = nil - f.lazy.fn = nil f.lazy.b = nil atomic.StoreUint32(&f.lazy.atomicOnce, 1) } @@ -174,13 +172,6 @@ func (f *ExtensionField) Set(t protoreflect.ExtensionType, v protoreflect.Value) f.lazy = nil } -// SetLazy sets the type and a value that is to be lazily evaluated upon first use. -// This must not be called concurrently. -func (f *ExtensionField) SetLazy(t protoreflect.ExtensionType, fn func() protoreflect.Value) { - f.typ = t - f.lazy = &lazyExtensionValue{fn: fn} -} - // Value returns the value of the extension field. // This may be called concurrently. func (f *ExtensionField) Value() protoreflect.Value { diff --git a/vendor/google.golang.org/protobuf/internal/impl/codec_field.go b/vendor/google.golang.org/protobuf/internal/impl/codec_field.go index 78ee47e4..7c1f66c8 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/codec_field.go +++ b/vendor/google.golang.org/protobuf/internal/impl/codec_field.go @@ -65,6 +65,9 @@ func (mi *MessageInfo) initOneofFieldCoders(od protoreflect.OneofDescriptor, si if err != nil { return out, err } + if cf.funcs.isInit == nil { + out.initialized = true + } vi.Set(vw) return out, nil } diff --git a/vendor/google.golang.org/protobuf/internal/impl/codec_message.go b/vendor/google.golang.org/protobuf/internal/impl/codec_message.go index 6b2fdbb7..78be9df3 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/codec_message.go +++ b/vendor/google.golang.org/protobuf/internal/impl/codec_message.go @@ -189,6 +189,9 @@ func (mi *MessageInfo) makeCoderMethods(t reflect.Type, si structInfo) { if mi.methods.Merge == nil { mi.methods.Merge = mi.merge } + if mi.methods.Equal == nil { + mi.methods.Equal = equal + } } // getUnknownBytes returns a *[]byte for the unknown fields. diff --git a/vendor/google.golang.org/protobuf/internal/impl/codec_reflect.go b/vendor/google.golang.org/protobuf/internal/impl/codec_reflect.go deleted file mode 100644 index 145c577b..00000000 --- a/vendor/google.golang.org/protobuf/internal/impl/codec_reflect.go +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright 2019 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build purego || appengine -// +build purego appengine - -package impl - -import ( - "reflect" - - "google.golang.org/protobuf/encoding/protowire" -) - -func sizeEnum(p pointer, f *coderFieldInfo, _ marshalOptions) (size int) { - v := p.v.Elem().Int() - return f.tagsize + protowire.SizeVarint(uint64(v)) -} - -func appendEnum(b []byte, p pointer, f *coderFieldInfo, opts marshalOptions) ([]byte, error) { - v := p.v.Elem().Int() - b = protowire.AppendVarint(b, f.wiretag) - b = protowire.AppendVarint(b, uint64(v)) - return b, nil -} - -func consumeEnum(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, _ unmarshalOptions) (out unmarshalOutput, err error) { - if wtyp != protowire.VarintType { - return out, errUnknown - } - v, n := protowire.ConsumeVarint(b) - if n < 0 { - return out, errDecode - } - p.v.Elem().SetInt(int64(v)) - out.n = n - return out, nil -} - -func mergeEnum(dst, src pointer, _ *coderFieldInfo, _ mergeOptions) { - dst.v.Elem().Set(src.v.Elem()) -} - -var coderEnum = pointerCoderFuncs{ - size: sizeEnum, - marshal: appendEnum, - unmarshal: consumeEnum, - merge: mergeEnum, -} - -func sizeEnumNoZero(p pointer, f *coderFieldInfo, opts marshalOptions) (size int) { - if p.v.Elem().Int() == 0 { - return 0 - } - return sizeEnum(p, f, opts) -} - -func appendEnumNoZero(b []byte, p pointer, f *coderFieldInfo, opts marshalOptions) ([]byte, error) { - if p.v.Elem().Int() == 0 { - return b, nil - } - return appendEnum(b, p, f, opts) -} - -func mergeEnumNoZero(dst, src pointer, _ *coderFieldInfo, _ mergeOptions) { - if src.v.Elem().Int() != 0 { - dst.v.Elem().Set(src.v.Elem()) - } -} - -var coderEnumNoZero = pointerCoderFuncs{ - size: sizeEnumNoZero, - marshal: appendEnumNoZero, - unmarshal: consumeEnum, - merge: mergeEnumNoZero, -} - -func sizeEnumPtr(p pointer, f *coderFieldInfo, opts marshalOptions) (size int) { - return sizeEnum(pointer{p.v.Elem()}, f, opts) -} - -func appendEnumPtr(b []byte, p pointer, f *coderFieldInfo, opts marshalOptions) ([]byte, error) { - return appendEnum(b, pointer{p.v.Elem()}, f, opts) -} - -func consumeEnumPtr(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { - if wtyp != protowire.VarintType { - return out, errUnknown - } - if p.v.Elem().IsNil() { - p.v.Elem().Set(reflect.New(p.v.Elem().Type().Elem())) - } - return consumeEnum(b, pointer{p.v.Elem()}, wtyp, f, opts) -} - -func mergeEnumPtr(dst, src pointer, _ *coderFieldInfo, _ mergeOptions) { - if !src.v.Elem().IsNil() { - v := reflect.New(dst.v.Type().Elem().Elem()) - v.Elem().Set(src.v.Elem().Elem()) - dst.v.Elem().Set(v) - } -} - -var coderEnumPtr = pointerCoderFuncs{ - size: sizeEnumPtr, - marshal: appendEnumPtr, - unmarshal: consumeEnumPtr, - merge: mergeEnumPtr, -} - -func sizeEnumSlice(p pointer, f *coderFieldInfo, opts marshalOptions) (size int) { - s := p.v.Elem() - for i, llen := 0, s.Len(); i < llen; i++ { - size += protowire.SizeVarint(uint64(s.Index(i).Int())) + f.tagsize - } - return size -} - -func appendEnumSlice(b []byte, p pointer, f *coderFieldInfo, opts marshalOptions) ([]byte, error) { - s := p.v.Elem() - for i, llen := 0, s.Len(); i < llen; i++ { - b = protowire.AppendVarint(b, f.wiretag) - b = protowire.AppendVarint(b, uint64(s.Index(i).Int())) - } - return b, nil -} - -func consumeEnumSlice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { - s := p.v.Elem() - if wtyp == protowire.BytesType { - b, n := protowire.ConsumeBytes(b) - if n < 0 { - return out, errDecode - } - for len(b) > 0 { - v, n := protowire.ConsumeVarint(b) - if n < 0 { - return out, errDecode - } - rv := reflect.New(s.Type().Elem()).Elem() - rv.SetInt(int64(v)) - s.Set(reflect.Append(s, rv)) - b = b[n:] - } - out.n = n - return out, nil - } - if wtyp != protowire.VarintType { - return out, errUnknown - } - v, n := protowire.ConsumeVarint(b) - if n < 0 { - return out, errDecode - } - rv := reflect.New(s.Type().Elem()).Elem() - rv.SetInt(int64(v)) - s.Set(reflect.Append(s, rv)) - out.n = n - return out, nil -} - -func mergeEnumSlice(dst, src pointer, _ *coderFieldInfo, _ mergeOptions) { - dst.v.Elem().Set(reflect.AppendSlice(dst.v.Elem(), src.v.Elem())) -} - -var coderEnumSlice = pointerCoderFuncs{ - size: sizeEnumSlice, - marshal: appendEnumSlice, - unmarshal: consumeEnumSlice, - merge: mergeEnumSlice, -} - -func sizeEnumPackedSlice(p pointer, f *coderFieldInfo, opts marshalOptions) (size int) { - s := p.v.Elem() - llen := s.Len() - if llen == 0 { - return 0 - } - n := 0 - for i := 0; i < llen; i++ { - n += protowire.SizeVarint(uint64(s.Index(i).Int())) - } - return f.tagsize + protowire.SizeBytes(n) -} - -func appendEnumPackedSlice(b []byte, p pointer, f *coderFieldInfo, opts marshalOptions) ([]byte, error) { - s := p.v.Elem() - llen := s.Len() - if llen == 0 { - return b, nil - } - b = protowire.AppendVarint(b, f.wiretag) - n := 0 - for i := 0; i < llen; i++ { - n += protowire.SizeVarint(uint64(s.Index(i).Int())) - } - b = protowire.AppendVarint(b, uint64(n)) - for i := 0; i < llen; i++ { - b = protowire.AppendVarint(b, uint64(s.Index(i).Int())) - } - return b, nil -} - -var coderEnumPackedSlice = pointerCoderFuncs{ - size: sizeEnumPackedSlice, - marshal: appendEnumPackedSlice, - unmarshal: consumeEnumSlice, - merge: mergeEnumSlice, -} diff --git a/vendor/google.golang.org/protobuf/internal/impl/codec_unsafe.go b/vendor/google.golang.org/protobuf/internal/impl/codec_unsafe.go index 757642e2..077712c2 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/codec_unsafe.go +++ b/vendor/google.golang.org/protobuf/internal/impl/codec_unsafe.go @@ -2,9 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !purego && !appengine -// +build !purego,!appengine - package impl // When using unsafe pointers, we can just treat enum values as int32s. diff --git a/vendor/google.golang.org/protobuf/internal/impl/convert.go b/vendor/google.golang.org/protobuf/internal/impl/convert.go index e06ece55..f72ddd88 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/convert.go +++ b/vendor/google.golang.org/protobuf/internal/impl/convert.go @@ -322,7 +322,7 @@ func (c *stringConverter) PBValueOf(v reflect.Value) protoreflect.Value { return protoreflect.ValueOfString(v.Convert(stringType).String()) } func (c *stringConverter) GoValueOf(v protoreflect.Value) reflect.Value { - // pref.Value.String never panics, so we go through an interface + // protoreflect.Value.String never panics, so we go through an interface // conversion here to check the type. s := v.Interface().(string) if c.goType.Kind() == reflect.Slice && s == "" { diff --git a/vendor/google.golang.org/protobuf/internal/impl/encode.go b/vendor/google.golang.org/protobuf/internal/impl/encode.go index febd2122..6254f5de 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/encode.go +++ b/vendor/google.golang.org/protobuf/internal/impl/encode.go @@ -10,7 +10,7 @@ import ( "sync/atomic" "google.golang.org/protobuf/internal/flags" - proto "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/proto" piface "google.golang.org/protobuf/runtime/protoiface" ) diff --git a/vendor/google.golang.org/protobuf/internal/impl/equal.go b/vendor/google.golang.org/protobuf/internal/impl/equal.go new file mode 100644 index 00000000..9f6c32a7 --- /dev/null +++ b/vendor/google.golang.org/protobuf/internal/impl/equal.go @@ -0,0 +1,224 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package impl + +import ( + "bytes" + + "google.golang.org/protobuf/encoding/protowire" + "google.golang.org/protobuf/reflect/protoreflect" + "google.golang.org/protobuf/runtime/protoiface" +) + +func equal(in protoiface.EqualInput) protoiface.EqualOutput { + return protoiface.EqualOutput{Equal: equalMessage(in.MessageA, in.MessageB)} +} + +// equalMessage is a fast-path variant of protoreflect.equalMessage. +// It takes advantage of the internal messageState type to avoid +// unnecessary allocations, type assertions. +func equalMessage(mx, my protoreflect.Message) bool { + if mx == nil || my == nil { + return mx == my + } + if mx.Descriptor() != my.Descriptor() { + return false + } + + msx, ok := mx.(*messageState) + if !ok { + return protoreflect.ValueOfMessage(mx).Equal(protoreflect.ValueOfMessage(my)) + } + msy, ok := my.(*messageState) + if !ok { + return protoreflect.ValueOfMessage(mx).Equal(protoreflect.ValueOfMessage(my)) + } + + mi := msx.messageInfo() + miy := msy.messageInfo() + if mi != miy { + return protoreflect.ValueOfMessage(mx).Equal(protoreflect.ValueOfMessage(my)) + } + mi.init() + // Compares regular fields + // Modified Message.Range code that compares two messages of the same type + // while going over the fields. + for _, ri := range mi.rangeInfos { + var fd protoreflect.FieldDescriptor + var vx, vy protoreflect.Value + + switch ri := ri.(type) { + case *fieldInfo: + hx := ri.has(msx.pointer()) + hy := ri.has(msy.pointer()) + if hx != hy { + return false + } + if !hx { + continue + } + fd = ri.fieldDesc + vx = ri.get(msx.pointer()) + vy = ri.get(msy.pointer()) + case *oneofInfo: + fnx := ri.which(msx.pointer()) + fny := ri.which(msy.pointer()) + if fnx != fny { + return false + } + if fnx <= 0 { + continue + } + fi := mi.fields[fnx] + fd = fi.fieldDesc + vx = fi.get(msx.pointer()) + vy = fi.get(msy.pointer()) + } + + if !equalValue(fd, vx, vy) { + return false + } + } + + // Compare extensions. + // This is more complicated because mx or my could have empty/nil extension maps, + // however some populated extension map values are equal to nil extension maps. + emx := mi.extensionMap(msx.pointer()) + emy := mi.extensionMap(msy.pointer()) + if emx != nil { + for k, x := range *emx { + xd := x.Type().TypeDescriptor() + xv := x.Value() + var y ExtensionField + ok := false + if emy != nil { + y, ok = (*emy)[k] + } + // We need to treat empty lists as equal to nil values + if emy == nil || !ok { + if xd.IsList() && xv.List().Len() == 0 { + continue + } + return false + } + + if !equalValue(xd, xv, y.Value()) { + return false + } + } + } + if emy != nil { + // emy may have extensions emx does not have, need to check them as well + for k, y := range *emy { + if emx != nil { + // emx has the field, so we already checked it + if _, ok := (*emx)[k]; ok { + continue + } + } + // Empty lists are equal to nil + if y.Type().TypeDescriptor().IsList() && y.Value().List().Len() == 0 { + continue + } + + // Cant be equal if the extension is populated + return false + } + } + + return equalUnknown(mx.GetUnknown(), my.GetUnknown()) +} + +func equalValue(fd protoreflect.FieldDescriptor, vx, vy protoreflect.Value) bool { + // slow path + if fd.Kind() != protoreflect.MessageKind { + return vx.Equal(vy) + } + + // fast path special cases + if fd.IsMap() { + if fd.MapValue().Kind() == protoreflect.MessageKind { + return equalMessageMap(vx.Map(), vy.Map()) + } + return vx.Equal(vy) + } + + if fd.IsList() { + return equalMessageList(vx.List(), vy.List()) + } + + return equalMessage(vx.Message(), vy.Message()) +} + +// Mostly copied from protoreflect.equalMap. +// This variant only works for messages as map types. +// All other map types should be handled via Value.Equal. +func equalMessageMap(mx, my protoreflect.Map) bool { + if mx.Len() != my.Len() { + return false + } + equal := true + mx.Range(func(k protoreflect.MapKey, vx protoreflect.Value) bool { + if !my.Has(k) { + equal = false + return false + } + vy := my.Get(k) + equal = equalMessage(vx.Message(), vy.Message()) + return equal + }) + return equal +} + +// Mostly copied from protoreflect.equalList. +// The only change is the usage of equalImpl instead of protoreflect.equalValue. +func equalMessageList(lx, ly protoreflect.List) bool { + if lx.Len() != ly.Len() { + return false + } + for i := 0; i < lx.Len(); i++ { + // We only operate on messages here since equalImpl will not call us in any other case. + if !equalMessage(lx.Get(i).Message(), ly.Get(i).Message()) { + return false + } + } + return true +} + +// equalUnknown compares unknown fields by direct comparison on the raw bytes +// of each individual field number. +// Copied from protoreflect.equalUnknown. +func equalUnknown(x, y protoreflect.RawFields) bool { + if len(x) != len(y) { + return false + } + if bytes.Equal([]byte(x), []byte(y)) { + return true + } + + mx := make(map[protoreflect.FieldNumber]protoreflect.RawFields) + my := make(map[protoreflect.FieldNumber]protoreflect.RawFields) + for len(x) > 0 { + fnum, _, n := protowire.ConsumeField(x) + mx[fnum] = append(mx[fnum], x[:n]...) + x = x[n:] + } + for len(y) > 0 { + fnum, _, n := protowire.ConsumeField(y) + my[fnum] = append(my[fnum], y[:n]...) + y = y[n:] + } + if len(mx) != len(my) { + return false + } + + for k, v1 := range mx { + if v2, ok := my[k]; !ok || !bytes.Equal([]byte(v1), []byte(v2)) { + return false + } + } + + return true +} diff --git a/vendor/google.golang.org/protobuf/internal/impl/legacy_extension.go b/vendor/google.golang.org/protobuf/internal/impl/legacy_extension.go index 6e8677ee..b6849d66 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/legacy_extension.go +++ b/vendor/google.golang.org/protobuf/internal/impl/legacy_extension.go @@ -160,6 +160,7 @@ func (x placeholderExtension) HasPresence() bool func (x placeholderExtension) HasOptionalKeyword() bool { return false } func (x placeholderExtension) IsExtension() bool { return true } func (x placeholderExtension) IsWeak() bool { return false } +func (x placeholderExtension) IsLazy() bool { return false } func (x placeholderExtension) IsPacked() bool { return false } func (x placeholderExtension) IsList() bool { return false } func (x placeholderExtension) IsMap() bool { return false } diff --git a/vendor/google.golang.org/protobuf/internal/impl/message.go b/vendor/google.golang.org/protobuf/internal/impl/message.go index 019399d4..741b5ed2 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/message.go +++ b/vendor/google.golang.org/protobuf/internal/impl/message.go @@ -30,8 +30,8 @@ type MessageInfo struct { // Desc is the underlying message descriptor type and must be populated. Desc protoreflect.MessageDescriptor - // Exporter must be provided in a purego environment in order to provide - // access to unexported fields. + // Deprecated: Exporter will be removed the next time we bump + // protoimpl.GenVersion. See https://github.com/golang/protobuf/issues/1640 Exporter exporter // OneofWrappers is list of pointers to oneof wrapper struct types. diff --git a/vendor/google.golang.org/protobuf/internal/impl/pointer_reflect.go b/vendor/google.golang.org/protobuf/internal/impl/pointer_reflect.go deleted file mode 100644 index da685e8a..00000000 --- a/vendor/google.golang.org/protobuf/internal/impl/pointer_reflect.go +++ /dev/null @@ -1,215 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build purego || appengine -// +build purego appengine - -package impl - -import ( - "fmt" - "reflect" - "sync" -) - -const UnsafeEnabled = false - -// Pointer is an opaque pointer type. -type Pointer any - -// offset represents the offset to a struct field, accessible from a pointer. -// The offset is the field index into a struct. -type offset struct { - index int - export exporter -} - -// offsetOf returns a field offset for the struct field. -func offsetOf(f reflect.StructField, x exporter) offset { - if len(f.Index) != 1 { - panic("embedded structs are not supported") - } - if f.PkgPath == "" { - return offset{index: f.Index[0]} // field is already exported - } - if x == nil { - panic("exporter must be provided for unexported field") - } - return offset{index: f.Index[0], export: x} -} - -// IsValid reports whether the offset is valid. -func (f offset) IsValid() bool { return f.index >= 0 } - -// invalidOffset is an invalid field offset. -var invalidOffset = offset{index: -1} - -// zeroOffset is a noop when calling pointer.Apply. -var zeroOffset = offset{index: 0} - -// pointer is an abstract representation of a pointer to a struct or field. -type pointer struct{ v reflect.Value } - -// pointerOf returns p as a pointer. -func pointerOf(p Pointer) pointer { - return pointerOfIface(p) -} - -// pointerOfValue returns v as a pointer. -func pointerOfValue(v reflect.Value) pointer { - return pointer{v: v} -} - -// pointerOfIface returns the pointer portion of an interface. -func pointerOfIface(v any) pointer { - return pointer{v: reflect.ValueOf(v)} -} - -// IsNil reports whether the pointer is nil. -func (p pointer) IsNil() bool { - return p.v.IsNil() -} - -// Apply adds an offset to the pointer to derive a new pointer -// to a specified field. The current pointer must be pointing at a struct. -func (p pointer) Apply(f offset) pointer { - if f.export != nil { - if v := reflect.ValueOf(f.export(p.v.Interface(), f.index)); v.IsValid() { - return pointer{v: v} - } - } - return pointer{v: p.v.Elem().Field(f.index).Addr()} -} - -// AsValueOf treats p as a pointer to an object of type t and returns the value. -// It is equivalent to reflect.ValueOf(p.AsIfaceOf(t)) -func (p pointer) AsValueOf(t reflect.Type) reflect.Value { - if got := p.v.Type().Elem(); got != t { - panic(fmt.Sprintf("invalid type: got %v, want %v", got, t)) - } - return p.v -} - -// AsIfaceOf treats p as a pointer to an object of type t and returns the value. -// It is equivalent to p.AsValueOf(t).Interface() -func (p pointer) AsIfaceOf(t reflect.Type) any { - return p.AsValueOf(t).Interface() -} - -func (p pointer) Bool() *bool { return p.v.Interface().(*bool) } -func (p pointer) BoolPtr() **bool { return p.v.Interface().(**bool) } -func (p pointer) BoolSlice() *[]bool { return p.v.Interface().(*[]bool) } -func (p pointer) Int32() *int32 { return p.v.Interface().(*int32) } -func (p pointer) Int32Ptr() **int32 { return p.v.Interface().(**int32) } -func (p pointer) Int32Slice() *[]int32 { return p.v.Interface().(*[]int32) } -func (p pointer) Int64() *int64 { return p.v.Interface().(*int64) } -func (p pointer) Int64Ptr() **int64 { return p.v.Interface().(**int64) } -func (p pointer) Int64Slice() *[]int64 { return p.v.Interface().(*[]int64) } -func (p pointer) Uint32() *uint32 { return p.v.Interface().(*uint32) } -func (p pointer) Uint32Ptr() **uint32 { return p.v.Interface().(**uint32) } -func (p pointer) Uint32Slice() *[]uint32 { return p.v.Interface().(*[]uint32) } -func (p pointer) Uint64() *uint64 { return p.v.Interface().(*uint64) } -func (p pointer) Uint64Ptr() **uint64 { return p.v.Interface().(**uint64) } -func (p pointer) Uint64Slice() *[]uint64 { return p.v.Interface().(*[]uint64) } -func (p pointer) Float32() *float32 { return p.v.Interface().(*float32) } -func (p pointer) Float32Ptr() **float32 { return p.v.Interface().(**float32) } -func (p pointer) Float32Slice() *[]float32 { return p.v.Interface().(*[]float32) } -func (p pointer) Float64() *float64 { return p.v.Interface().(*float64) } -func (p pointer) Float64Ptr() **float64 { return p.v.Interface().(**float64) } -func (p pointer) Float64Slice() *[]float64 { return p.v.Interface().(*[]float64) } -func (p pointer) String() *string { return p.v.Interface().(*string) } -func (p pointer) StringPtr() **string { return p.v.Interface().(**string) } -func (p pointer) StringSlice() *[]string { return p.v.Interface().(*[]string) } -func (p pointer) Bytes() *[]byte { return p.v.Interface().(*[]byte) } -func (p pointer) BytesPtr() **[]byte { return p.v.Interface().(**[]byte) } -func (p pointer) BytesSlice() *[][]byte { return p.v.Interface().(*[][]byte) } -func (p pointer) WeakFields() *weakFields { return (*weakFields)(p.v.Interface().(*WeakFields)) } -func (p pointer) Extensions() *map[int32]ExtensionField { - return p.v.Interface().(*map[int32]ExtensionField) -} - -func (p pointer) Elem() pointer { - return pointer{v: p.v.Elem()} -} - -// PointerSlice copies []*T from p as a new []pointer. -// This behavior differs from the implementation in pointer_unsafe.go. -func (p pointer) PointerSlice() []pointer { - // TODO: reconsider this - if p.v.IsNil() { - return nil - } - n := p.v.Elem().Len() - s := make([]pointer, n) - for i := 0; i < n; i++ { - s[i] = pointer{v: p.v.Elem().Index(i)} - } - return s -} - -// AppendPointerSlice appends v to p, which must be a []*T. -func (p pointer) AppendPointerSlice(v pointer) { - sp := p.v.Elem() - sp.Set(reflect.Append(sp, v.v)) -} - -// SetPointer sets *p to v. -func (p pointer) SetPointer(v pointer) { - p.v.Elem().Set(v.v) -} - -func growSlice(p pointer, addCap int) { - // TODO: Once we only support Go 1.20 and newer, use reflect.Grow. - in := p.v.Elem() - out := reflect.MakeSlice(in.Type(), in.Len(), in.Len()+addCap) - reflect.Copy(out, in) - p.v.Elem().Set(out) -} - -func (p pointer) growBoolSlice(addCap int) { - growSlice(p, addCap) -} - -func (p pointer) growInt32Slice(addCap int) { - growSlice(p, addCap) -} - -func (p pointer) growUint32Slice(addCap int) { - growSlice(p, addCap) -} - -func (p pointer) growInt64Slice(addCap int) { - growSlice(p, addCap) -} - -func (p pointer) growUint64Slice(addCap int) { - growSlice(p, addCap) -} - -func (p pointer) growFloat64Slice(addCap int) { - growSlice(p, addCap) -} - -func (p pointer) growFloat32Slice(addCap int) { - growSlice(p, addCap) -} - -func (Export) MessageStateOf(p Pointer) *messageState { panic("not supported") } -func (ms *messageState) pointer() pointer { panic("not supported") } -func (ms *messageState) messageInfo() *MessageInfo { panic("not supported") } -func (ms *messageState) LoadMessageInfo() *MessageInfo { panic("not supported") } -func (ms *messageState) StoreMessageInfo(mi *MessageInfo) { panic("not supported") } - -type atomicNilMessage struct { - once sync.Once - m messageReflectWrapper -} - -func (m *atomicNilMessage) Init(mi *MessageInfo) *messageReflectWrapper { - m.once.Do(func() { - m.m.p = pointerOfIface(reflect.Zero(mi.GoReflectType).Interface()) - m.m.mi = mi - }) - return &m.m -} diff --git a/vendor/google.golang.org/protobuf/internal/impl/pointer_unsafe.go b/vendor/google.golang.org/protobuf/internal/impl/pointer_unsafe.go index 5f20ca5d..79e18666 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/pointer_unsafe.go +++ b/vendor/google.golang.org/protobuf/internal/impl/pointer_unsafe.go @@ -2,9 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !purego && !appengine -// +build !purego,!appengine - package impl import ( diff --git a/vendor/google.golang.org/protobuf/internal/strs/strings_pure.go b/vendor/google.golang.org/protobuf/internal/strs/strings_pure.go deleted file mode 100644 index a1f6f333..00000000 --- a/vendor/google.golang.org/protobuf/internal/strs/strings_pure.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build purego || appengine -// +build purego appengine - -package strs - -import pref "google.golang.org/protobuf/reflect/protoreflect" - -func UnsafeString(b []byte) string { - return string(b) -} - -func UnsafeBytes(s string) []byte { - return []byte(s) -} - -type Builder struct{} - -func (*Builder) AppendFullName(prefix pref.FullName, name pref.Name) pref.FullName { - return prefix.Append(name) -} - -func (*Builder) MakeString(b []byte) string { - return string(b) -} diff --git a/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go120.go b/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go120.go index a008acd0..832a7988 100644 --- a/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go120.go +++ b/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go120.go @@ -2,8 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !purego && !appengine && !go1.21 -// +build !purego,!appengine,!go1.21 +//go:build !go1.21 package strs diff --git a/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go121.go b/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go121.go index 60166f2b..1ffddf68 100644 --- a/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go121.go +++ b/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go121.go @@ -2,8 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !purego && !appengine && go1.21 -// +build !purego,!appengine,go1.21 +//go:build go1.21 package strs diff --git a/vendor/google.golang.org/protobuf/internal/version/version.go b/vendor/google.golang.org/protobuf/internal/version/version.go index dbbf1f68..fb8e15e8 100644 --- a/vendor/google.golang.org/protobuf/internal/version/version.go +++ b/vendor/google.golang.org/protobuf/internal/version/version.go @@ -51,8 +51,8 @@ import ( // 10. Send out the CL for review and submit it. const ( Major = 1 - Minor = 34 - Patch = 2 + Minor = 35 + Patch = 1 PreRelease = "" ) diff --git a/vendor/google.golang.org/protobuf/proto/equal.go b/vendor/google.golang.org/protobuf/proto/equal.go index 1a0be1b0..c36d4a9c 100644 --- a/vendor/google.golang.org/protobuf/proto/equal.go +++ b/vendor/google.golang.org/protobuf/proto/equal.go @@ -8,6 +8,7 @@ import ( "reflect" "google.golang.org/protobuf/reflect/protoreflect" + "google.golang.org/protobuf/runtime/protoiface" ) // Equal reports whether two messages are equal, @@ -51,6 +52,14 @@ func Equal(x, y Message) bool { if mx.IsValid() != my.IsValid() { return false } + + // Only one of the messages needs to implement the fast-path for it to work. + pmx := protoMethods(mx) + pmy := protoMethods(my) + if pmx != nil && pmy != nil && pmx.Equal != nil && pmy.Equal != nil { + return pmx.Equal(protoiface.EqualInput{MessageA: mx, MessageB: my}).Equal + } + vx := protoreflect.ValueOfMessage(mx) vy := protoreflect.ValueOfMessage(my) return vx.Equal(vy) diff --git a/vendor/google.golang.org/protobuf/proto/extension.go b/vendor/google.golang.org/protobuf/proto/extension.go index d248f292..78445d11 100644 --- a/vendor/google.golang.org/protobuf/proto/extension.go +++ b/vendor/google.golang.org/protobuf/proto/extension.go @@ -39,6 +39,48 @@ func ClearExtension(m Message, xt protoreflect.ExtensionType) { // If the field is unpopulated, it returns the default value for // scalars and an immutable, empty value for lists or messages. // It panics if xt does not extend m. +// +// The type of the value is dependent on the field type of the extension. +// For extensions generated by protoc-gen-go, the Go type is as follows: +// +// ╔═══════════════════╤═════════════════════════╗ +// ║ Go type │ Protobuf kind ║ +// ╠═══════════════════╪═════════════════════════╣ +// ║ bool │ bool ║ +// ║ int32 │ int32, sint32, sfixed32 ║ +// ║ int64 │ int64, sint64, sfixed64 ║ +// ║ uint32 │ uint32, fixed32 ║ +// ║ uint64 │ uint64, fixed64 ║ +// ║ float32 │ float ║ +// ║ float64 │ double ║ +// ║ string │ string ║ +// ║ []byte │ bytes ║ +// ║ protoreflect.Enum │ enum ║ +// ║ proto.Message │ message, group ║ +// ╚═══════════════════╧═════════════════════════╝ +// +// The protoreflect.Enum and proto.Message types are the concrete Go type +// associated with the named enum or message. Repeated fields are represented +// using a Go slice of the base element type. +// +// If a generated extension descriptor variable is directly passed to +// GetExtension, then the call should be followed immediately by a +// type assertion to the expected output value. For example: +// +// mm := proto.GetExtension(m, foopb.E_MyExtension).(*foopb.MyMessage) +// +// This pattern enables static analysis tools to verify that the asserted type +// matches the Go type associated with the extension field and +// also enables a possible future migration to a type-safe extension API. +// +// Since singular messages are the most common extension type, the pattern of +// calling HasExtension followed by GetExtension may be simplified to: +// +// if mm := proto.GetExtension(m, foopb.E_MyExtension).(*foopb.MyMessage); mm != nil { +// ... // make use of mm +// } +// +// The mm variable is non-nil if and only if HasExtension reports true. func GetExtension(m Message, xt protoreflect.ExtensionType) any { // Treat nil message interface as an empty message; return the default. if m == nil { @@ -51,6 +93,35 @@ func GetExtension(m Message, xt protoreflect.ExtensionType) any { // SetExtension stores the value of an extension field. // It panics if m is invalid, xt does not extend m, or if type of v // is invalid for the specified extension field. +// +// The type of the value is dependent on the field type of the extension. +// For extensions generated by protoc-gen-go, the Go type is as follows: +// +// ╔═══════════════════╤═════════════════════════╗ +// ║ Go type │ Protobuf kind ║ +// ╠═══════════════════╪═════════════════════════╣ +// ║ bool │ bool ║ +// ║ int32 │ int32, sint32, sfixed32 ║ +// ║ int64 │ int64, sint64, sfixed64 ║ +// ║ uint32 │ uint32, fixed32 ║ +// ║ uint64 │ uint64, fixed64 ║ +// ║ float32 │ float ║ +// ║ float64 │ double ║ +// ║ string │ string ║ +// ║ []byte │ bytes ║ +// ║ protoreflect.Enum │ enum ║ +// ║ proto.Message │ message, group ║ +// ╚═══════════════════╧═════════════════════════╝ +// +// The protoreflect.Enum and proto.Message types are the concrete Go type +// associated with the named enum or message. Repeated fields are represented +// using a Go slice of the base element type. +// +// If a generated extension descriptor variable is directly passed to +// SetExtension (e.g., foopb.E_MyExtension), then the value should be a +// concrete type that matches the expected Go type for the extension descriptor +// so that static analysis tools can verify type correctness. +// This also enables a possible future migration to a type-safe extension API. func SetExtension(m Message, xt protoreflect.ExtensionType, v any) { xd := xt.TypeDescriptor() pv := xt.ValueOf(v) diff --git a/vendor/google.golang.org/protobuf/reflect/protodesc/desc_init.go b/vendor/google.golang.org/protobuf/reflect/protodesc/desc_init.go index 85617554..ebcb4a8a 100644 --- a/vendor/google.golang.org/protobuf/reflect/protodesc/desc_init.go +++ b/vendor/google.golang.org/protobuf/reflect/protodesc/desc_init.go @@ -150,6 +150,7 @@ func (r descsByName) initFieldsFromDescriptorProto(fds []*descriptorpb.FieldDesc opts = proto.Clone(opts).(*descriptorpb.FieldOptions) f.L1.Options = func() protoreflect.ProtoMessage { return opts } f.L1.IsWeak = opts.GetWeak() + f.L1.IsLazy = opts.GetLazy() if opts.Packed != nil { f.L1.EditionFeatures.IsPacked = opts.GetPacked() } @@ -214,6 +215,9 @@ func (r descsByName) initExtensionDeclarations(xds []*descriptorpb.FieldDescript if xd.JsonName != nil { x.L2.StringName.InitJSON(xd.GetJsonName()) } + if x.L1.Kind == protoreflect.MessageKind && x.L1.EditionFeatures.IsDelimitedEncoded { + x.L1.Kind = protoreflect.GroupKind + } } return xs, nil } diff --git a/vendor/google.golang.org/protobuf/reflect/protodesc/editions.go b/vendor/google.golang.org/protobuf/reflect/protodesc/editions.go index 804830ed..002e0047 100644 --- a/vendor/google.golang.org/protobuf/reflect/protodesc/editions.go +++ b/vendor/google.golang.org/protobuf/reflect/protodesc/editions.go @@ -14,7 +14,7 @@ import ( "google.golang.org/protobuf/proto" "google.golang.org/protobuf/reflect/protoreflect" "google.golang.org/protobuf/types/descriptorpb" - gofeaturespb "google.golang.org/protobuf/types/gofeaturespb" + "google.golang.org/protobuf/types/gofeaturespb" ) var defaults = &descriptorpb.FeatureSetDefaults{} diff --git a/vendor/google.golang.org/protobuf/reflect/protoreflect/methods.go b/vendor/google.golang.org/protobuf/reflect/protoreflect/methods.go index d5d5af6e..742cb518 100644 --- a/vendor/google.golang.org/protobuf/reflect/protoreflect/methods.go +++ b/vendor/google.golang.org/protobuf/reflect/protoreflect/methods.go @@ -23,6 +23,7 @@ type ( Unmarshal func(unmarshalInput) (unmarshalOutput, error) Merge func(mergeInput) mergeOutput CheckInitialized func(checkInitializedInput) (checkInitializedOutput, error) + Equal func(equalInput) equalOutput } supportFlags = uint64 sizeInput = struct { @@ -75,4 +76,13 @@ type ( checkInitializedOutput = struct { pragma.NoUnkeyedLiterals } + equalInput = struct { + pragma.NoUnkeyedLiterals + MessageA Message + MessageB Message + } + equalOutput = struct { + pragma.NoUnkeyedLiterals + Equal bool + } ) diff --git a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_pure.go b/vendor/google.golang.org/protobuf/reflect/protoreflect/value_pure.go deleted file mode 100644 index 75f83a2a..00000000 --- a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_pure.go +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build purego || appengine -// +build purego appengine - -package protoreflect - -import "google.golang.org/protobuf/internal/pragma" - -type valueType int - -const ( - nilType valueType = iota - boolType - int32Type - int64Type - uint32Type - uint64Type - float32Type - float64Type - stringType - bytesType - enumType - ifaceType -) - -// value is a union where only one type can be represented at a time. -// This uses a distinct field for each type. This is type safe in Go, but -// occupies more memory than necessary (72B). -type value struct { - pragma.DoNotCompare // 0B - - typ valueType // 8B - num uint64 // 8B - str string // 16B - bin []byte // 24B - iface any // 16B -} - -func valueOfString(v string) Value { - return Value{typ: stringType, str: v} -} -func valueOfBytes(v []byte) Value { - return Value{typ: bytesType, bin: v} -} -func valueOfIface(v any) Value { - return Value{typ: ifaceType, iface: v} -} - -func (v Value) getString() string { - return v.str -} -func (v Value) getBytes() []byte { - return v.bin -} -func (v Value) getIface() any { - return v.iface -} diff --git a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go120.go b/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go120.go index 7f3583ea..0015fcb3 100644 --- a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go120.go +++ b/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go120.go @@ -2,8 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !purego && !appengine && !go1.21 -// +build !purego,!appengine,!go1.21 +//go:build !go1.21 package protoreflect diff --git a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go121.go b/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go121.go index f7d38699..479527b5 100644 --- a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go121.go +++ b/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go121.go @@ -2,8 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !purego && !appengine && go1.21 -// +build !purego,!appengine,go1.21 +//go:build go1.21 package protoreflect diff --git a/vendor/google.golang.org/protobuf/runtime/protoiface/methods.go b/vendor/google.golang.org/protobuf/runtime/protoiface/methods.go index 44cf467d..24615656 100644 --- a/vendor/google.golang.org/protobuf/runtime/protoiface/methods.go +++ b/vendor/google.golang.org/protobuf/runtime/protoiface/methods.go @@ -39,6 +39,9 @@ type Methods = struct { // CheckInitialized returns an error if any required fields in the message are not set. CheckInitialized func(CheckInitializedInput) (CheckInitializedOutput, error) + + // Equal compares two messages and returns EqualOutput.Equal == true if they are equal. + Equal func(EqualInput) EqualOutput } // SupportFlags indicate support for optional features. @@ -166,3 +169,18 @@ type CheckInitializedInput = struct { type CheckInitializedOutput = struct { pragma.NoUnkeyedLiterals } + +// EqualInput is input to the Equal method. +type EqualInput = struct { + pragma.NoUnkeyedLiterals + + MessageA protoreflect.Message + MessageB protoreflect.Message +} + +// EqualOutput is output from the Equal method. +type EqualOutput = struct { + pragma.NoUnkeyedLiterals + + Equal bool +} diff --git a/vendor/google.golang.org/protobuf/types/descriptorpb/descriptor.pb.go b/vendor/google.golang.org/protobuf/types/descriptorpb/descriptor.pb.go index 9403eb07..6dea75cd 100644 --- a/vendor/google.golang.org/protobuf/types/descriptorpb/descriptor.pb.go +++ b/vendor/google.golang.org/protobuf/types/descriptorpb/descriptor.pb.go @@ -1217,11 +1217,9 @@ type FileDescriptorSet struct { func (x *FileDescriptorSet) Reset() { *x = FileDescriptorSet{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FileDescriptorSet) String() string { @@ -1232,7 +1230,7 @@ func (*FileDescriptorSet) ProtoMessage() {} func (x *FileDescriptorSet) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1291,11 +1289,9 @@ type FileDescriptorProto struct { func (x *FileDescriptorProto) Reset() { *x = FileDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[1] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FileDescriptorProto) String() string { @@ -1306,7 +1302,7 @@ func (*FileDescriptorProto) ProtoMessage() {} func (x *FileDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[1] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1434,11 +1430,9 @@ type DescriptorProto struct { func (x *DescriptorProto) Reset() { *x = DescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[2] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *DescriptorProto) String() string { @@ -1449,7 +1443,7 @@ func (*DescriptorProto) ProtoMessage() {} func (x *DescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[2] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1561,11 +1555,9 @@ const ( func (x *ExtensionRangeOptions) Reset() { *x = ExtensionRangeOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[3] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *ExtensionRangeOptions) String() string { @@ -1576,7 +1568,7 @@ func (*ExtensionRangeOptions) ProtoMessage() {} func (x *ExtensionRangeOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[3] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1680,11 +1672,9 @@ type FieldDescriptorProto struct { func (x *FieldDescriptorProto) Reset() { *x = FieldDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[4] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FieldDescriptorProto) String() string { @@ -1695,7 +1685,7 @@ func (*FieldDescriptorProto) ProtoMessage() {} func (x *FieldDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[4] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1799,11 +1789,9 @@ type OneofDescriptorProto struct { func (x *OneofDescriptorProto) Reset() { *x = OneofDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[5] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *OneofDescriptorProto) String() string { @@ -1814,7 +1802,7 @@ func (*OneofDescriptorProto) ProtoMessage() {} func (x *OneofDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[5] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1863,11 +1851,9 @@ type EnumDescriptorProto struct { func (x *EnumDescriptorProto) Reset() { *x = EnumDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[6] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *EnumDescriptorProto) String() string { @@ -1878,7 +1864,7 @@ func (*EnumDescriptorProto) ProtoMessage() {} func (x *EnumDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[6] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1941,11 +1927,9 @@ type EnumValueDescriptorProto struct { func (x *EnumValueDescriptorProto) Reset() { *x = EnumValueDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[7] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *EnumValueDescriptorProto) String() string { @@ -1956,7 +1940,7 @@ func (*EnumValueDescriptorProto) ProtoMessage() {} func (x *EnumValueDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[7] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2005,11 +1989,9 @@ type ServiceDescriptorProto struct { func (x *ServiceDescriptorProto) Reset() { *x = ServiceDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[8] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[8] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *ServiceDescriptorProto) String() string { @@ -2020,7 +2002,7 @@ func (*ServiceDescriptorProto) ProtoMessage() {} func (x *ServiceDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[8] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2082,11 +2064,9 @@ const ( func (x *MethodDescriptorProto) Reset() { *x = MethodDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[9] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[9] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *MethodDescriptorProto) String() string { @@ -2097,7 +2077,7 @@ func (*MethodDescriptorProto) ProtoMessage() {} func (x *MethodDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[9] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2267,11 +2247,9 @@ const ( func (x *FileOptions) Reset() { *x = FileOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[10] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[10] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FileOptions) String() string { @@ -2282,7 +2260,7 @@ func (*FileOptions) ProtoMessage() {} func (x *FileOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[10] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2534,11 +2512,9 @@ const ( func (x *MessageOptions) Reset() { *x = MessageOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[11] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[11] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *MessageOptions) String() string { @@ -2549,7 +2525,7 @@ func (*MessageOptions) ProtoMessage() {} func (x *MessageOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[11] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2707,11 +2683,9 @@ const ( func (x *FieldOptions) Reset() { *x = FieldOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[12] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[12] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FieldOptions) String() string { @@ -2722,7 +2696,7 @@ func (*FieldOptions) ProtoMessage() {} func (x *FieldOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[12] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2849,11 +2823,9 @@ type OneofOptions struct { func (x *OneofOptions) Reset() { *x = OneofOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[13] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[13] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *OneofOptions) String() string { @@ -2864,7 +2836,7 @@ func (*OneofOptions) ProtoMessage() {} func (x *OneofOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[13] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2929,11 +2901,9 @@ const ( func (x *EnumOptions) Reset() { *x = EnumOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[14] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[14] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *EnumOptions) String() string { @@ -2944,7 +2914,7 @@ func (*EnumOptions) ProtoMessage() {} func (x *EnumOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[14] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3026,11 +2996,9 @@ const ( func (x *EnumValueOptions) Reset() { *x = EnumValueOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[15] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[15] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *EnumValueOptions) String() string { @@ -3041,7 +3009,7 @@ func (*EnumValueOptions) ProtoMessage() {} func (x *EnumValueOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[15] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3115,11 +3083,9 @@ const ( func (x *ServiceOptions) Reset() { *x = ServiceOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[16] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[16] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *ServiceOptions) String() string { @@ -3130,7 +3096,7 @@ func (*ServiceOptions) ProtoMessage() {} func (x *ServiceOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[16] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3192,11 +3158,9 @@ const ( func (x *MethodOptions) Reset() { *x = MethodOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[17] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[17] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *MethodOptions) String() string { @@ -3207,7 +3171,7 @@ func (*MethodOptions) ProtoMessage() {} func (x *MethodOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[17] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3274,11 +3238,9 @@ type UninterpretedOption struct { func (x *UninterpretedOption) Reset() { *x = UninterpretedOption{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[18] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[18] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *UninterpretedOption) String() string { @@ -3289,7 +3251,7 @@ func (*UninterpretedOption) ProtoMessage() {} func (x *UninterpretedOption) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[18] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3375,11 +3337,9 @@ type FeatureSet struct { func (x *FeatureSet) Reset() { *x = FeatureSet{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[19] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[19] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FeatureSet) String() string { @@ -3390,7 +3350,7 @@ func (*FeatureSet) ProtoMessage() {} func (x *FeatureSet) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[19] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3467,11 +3427,9 @@ type FeatureSetDefaults struct { func (x *FeatureSetDefaults) Reset() { *x = FeatureSetDefaults{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[20] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[20] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FeatureSetDefaults) String() string { @@ -3482,7 +3440,7 @@ func (*FeatureSetDefaults) ProtoMessage() {} func (x *FeatureSetDefaults) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[20] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3578,11 +3536,9 @@ type SourceCodeInfo struct { func (x *SourceCodeInfo) Reset() { *x = SourceCodeInfo{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[21] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[21] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *SourceCodeInfo) String() string { @@ -3593,7 +3549,7 @@ func (*SourceCodeInfo) ProtoMessage() {} func (x *SourceCodeInfo) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[21] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3630,11 +3586,9 @@ type GeneratedCodeInfo struct { func (x *GeneratedCodeInfo) Reset() { *x = GeneratedCodeInfo{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[22] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[22] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *GeneratedCodeInfo) String() string { @@ -3645,7 +3599,7 @@ func (*GeneratedCodeInfo) ProtoMessage() {} func (x *GeneratedCodeInfo) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[22] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3679,11 +3633,9 @@ type DescriptorProto_ExtensionRange struct { func (x *DescriptorProto_ExtensionRange) Reset() { *x = DescriptorProto_ExtensionRange{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[23] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[23] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *DescriptorProto_ExtensionRange) String() string { @@ -3694,7 +3646,7 @@ func (*DescriptorProto_ExtensionRange) ProtoMessage() {} func (x *DescriptorProto_ExtensionRange) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[23] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3744,11 +3696,9 @@ type DescriptorProto_ReservedRange struct { func (x *DescriptorProto_ReservedRange) Reset() { *x = DescriptorProto_ReservedRange{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[24] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[24] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *DescriptorProto_ReservedRange) String() string { @@ -3759,7 +3709,7 @@ func (*DescriptorProto_ReservedRange) ProtoMessage() {} func (x *DescriptorProto_ReservedRange) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[24] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3813,11 +3763,9 @@ type ExtensionRangeOptions_Declaration struct { func (x *ExtensionRangeOptions_Declaration) Reset() { *x = ExtensionRangeOptions_Declaration{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[25] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[25] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *ExtensionRangeOptions_Declaration) String() string { @@ -3828,7 +3776,7 @@ func (*ExtensionRangeOptions_Declaration) ProtoMessage() {} func (x *ExtensionRangeOptions_Declaration) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[25] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3895,11 +3843,9 @@ type EnumDescriptorProto_EnumReservedRange struct { func (x *EnumDescriptorProto_EnumReservedRange) Reset() { *x = EnumDescriptorProto_EnumReservedRange{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[26] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[26] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *EnumDescriptorProto_EnumReservedRange) String() string { @@ -3910,7 +3856,7 @@ func (*EnumDescriptorProto_EnumReservedRange) ProtoMessage() {} func (x *EnumDescriptorProto_EnumReservedRange) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[26] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3950,11 +3896,9 @@ type FieldOptions_EditionDefault struct { func (x *FieldOptions_EditionDefault) Reset() { *x = FieldOptions_EditionDefault{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[27] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[27] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FieldOptions_EditionDefault) String() string { @@ -3965,7 +3909,7 @@ func (*FieldOptions_EditionDefault) ProtoMessage() {} func (x *FieldOptions_EditionDefault) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[27] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -4018,11 +3962,9 @@ type FieldOptions_FeatureSupport struct { func (x *FieldOptions_FeatureSupport) Reset() { *x = FieldOptions_FeatureSupport{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[28] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[28] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FieldOptions_FeatureSupport) String() string { @@ -4033,7 +3975,7 @@ func (*FieldOptions_FeatureSupport) ProtoMessage() {} func (x *FieldOptions_FeatureSupport) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[28] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -4092,11 +4034,9 @@ type UninterpretedOption_NamePart struct { func (x *UninterpretedOption_NamePart) Reset() { *x = UninterpretedOption_NamePart{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[29] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[29] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *UninterpretedOption_NamePart) String() string { @@ -4107,7 +4047,7 @@ func (*UninterpretedOption_NamePart) ProtoMessage() {} func (x *UninterpretedOption_NamePart) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[29] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -4154,11 +4094,9 @@ type FeatureSetDefaults_FeatureSetEditionDefault struct { func (x *FeatureSetDefaults_FeatureSetEditionDefault) Reset() { *x = FeatureSetDefaults_FeatureSetEditionDefault{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[30] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[30] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FeatureSetDefaults_FeatureSetEditionDefault) String() string { @@ -4169,7 +4107,7 @@ func (*FeatureSetDefaults_FeatureSetEditionDefault) ProtoMessage() {} func (x *FeatureSetDefaults_FeatureSetEditionDefault) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[30] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -4305,11 +4243,9 @@ type SourceCodeInfo_Location struct { func (x *SourceCodeInfo_Location) Reset() { *x = SourceCodeInfo_Location{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[31] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[31] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *SourceCodeInfo_Location) String() string { @@ -4320,7 +4256,7 @@ func (*SourceCodeInfo_Location) ProtoMessage() {} func (x *SourceCodeInfo_Location) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[31] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -4392,11 +4328,9 @@ type GeneratedCodeInfo_Annotation struct { func (x *GeneratedCodeInfo_Annotation) Reset() { *x = GeneratedCodeInfo_Annotation{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[32] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[32] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *GeneratedCodeInfo_Annotation) String() string { @@ -4407,7 +4341,7 @@ func (*GeneratedCodeInfo_Annotation) ProtoMessage() {} func (x *GeneratedCodeInfo_Annotation) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[32] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -5385,424 +5319,6 @@ func file_google_protobuf_descriptor_proto_init() { if File_google_protobuf_descriptor_proto != nil { return } - if !protoimpl.UnsafeEnabled { - file_google_protobuf_descriptor_proto_msgTypes[0].Exporter = func(v any, i int) any { - switch v := v.(*FileDescriptorSet); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[1].Exporter = func(v any, i int) any { - switch v := v.(*FileDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[2].Exporter = func(v any, i int) any { - switch v := v.(*DescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[3].Exporter = func(v any, i int) any { - switch v := v.(*ExtensionRangeOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[4].Exporter = func(v any, i int) any { - switch v := v.(*FieldDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[5].Exporter = func(v any, i int) any { - switch v := v.(*OneofDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[6].Exporter = func(v any, i int) any { - switch v := v.(*EnumDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[7].Exporter = func(v any, i int) any { - switch v := v.(*EnumValueDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[8].Exporter = func(v any, i int) any { - switch v := v.(*ServiceDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[9].Exporter = func(v any, i int) any { - switch v := v.(*MethodDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[10].Exporter = func(v any, i int) any { - switch v := v.(*FileOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[11].Exporter = func(v any, i int) any { - switch v := v.(*MessageOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[12].Exporter = func(v any, i int) any { - switch v := v.(*FieldOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[13].Exporter = func(v any, i int) any { - switch v := v.(*OneofOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[14].Exporter = func(v any, i int) any { - switch v := v.(*EnumOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[15].Exporter = func(v any, i int) any { - switch v := v.(*EnumValueOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[16].Exporter = func(v any, i int) any { - switch v := v.(*ServiceOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[17].Exporter = func(v any, i int) any { - switch v := v.(*MethodOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[18].Exporter = func(v any, i int) any { - switch v := v.(*UninterpretedOption); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[19].Exporter = func(v any, i int) any { - switch v := v.(*FeatureSet); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[20].Exporter = func(v any, i int) any { - switch v := v.(*FeatureSetDefaults); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[21].Exporter = func(v any, i int) any { - switch v := v.(*SourceCodeInfo); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[22].Exporter = func(v any, i int) any { - switch v := v.(*GeneratedCodeInfo); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[23].Exporter = func(v any, i int) any { - switch v := v.(*DescriptorProto_ExtensionRange); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[24].Exporter = func(v any, i int) any { - switch v := v.(*DescriptorProto_ReservedRange); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[25].Exporter = func(v any, i int) any { - switch v := v.(*ExtensionRangeOptions_Declaration); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[26].Exporter = func(v any, i int) any { - switch v := v.(*EnumDescriptorProto_EnumReservedRange); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[27].Exporter = func(v any, i int) any { - switch v := v.(*FieldOptions_EditionDefault); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[28].Exporter = func(v any, i int) any { - switch v := v.(*FieldOptions_FeatureSupport); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[29].Exporter = func(v any, i int) any { - switch v := v.(*UninterpretedOption_NamePart); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[30].Exporter = func(v any, i int) any { - switch v := v.(*FeatureSetDefaults_FeatureSetEditionDefault); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[31].Exporter = func(v any, i int) any { - switch v := v.(*SourceCodeInfo_Location); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[32].Exporter = func(v any, i int) any { - switch v := v.(*GeneratedCodeInfo_Annotation); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/vendor/google.golang.org/protobuf/types/gofeaturespb/go_features.pb.go b/vendor/google.golang.org/protobuf/types/gofeaturespb/go_features.pb.go index a2ca940c..c7e860fc 100644 --- a/vendor/google.golang.org/protobuf/types/gofeaturespb/go_features.pb.go +++ b/vendor/google.golang.org/protobuf/types/gofeaturespb/go_features.pb.go @@ -29,11 +29,9 @@ type GoFeatures struct { func (x *GoFeatures) Reset() { *x = GoFeatures{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_go_features_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_go_features_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *GoFeatures) String() string { @@ -44,7 +42,7 @@ func (*GoFeatures) ProtoMessage() {} func (x *GoFeatures) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_go_features_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -145,20 +143,6 @@ func file_google_protobuf_go_features_proto_init() { if File_google_protobuf_go_features_proto != nil { return } - if !protoimpl.UnsafeEnabled { - file_google_protobuf_go_features_proto_msgTypes[0].Exporter = func(v any, i int) any { - switch v := v.(*GoFeatures); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/vendor/google.golang.org/protobuf/types/known/anypb/any.pb.go b/vendor/google.golang.org/protobuf/types/known/anypb/any.pb.go index 7172b43d..87da199a 100644 --- a/vendor/google.golang.org/protobuf/types/known/anypb/any.pb.go +++ b/vendor/google.golang.org/protobuf/types/known/anypb/any.pb.go @@ -368,11 +368,9 @@ func (x *Any) UnmarshalNew() (proto.Message, error) { func (x *Any) Reset() { *x = Any{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_any_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_any_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *Any) String() string { @@ -383,7 +381,7 @@ func (*Any) ProtoMessage() {} func (x *Any) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_any_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -461,20 +459,6 @@ func file_google_protobuf_any_proto_init() { if File_google_protobuf_any_proto != nil { return } - if !protoimpl.UnsafeEnabled { - file_google_protobuf_any_proto_msgTypes[0].Exporter = func(v any, i int) any { - switch v := v.(*Any); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/vendor/google.golang.org/protobuf/types/known/durationpb/duration.pb.go b/vendor/google.golang.org/protobuf/types/known/durationpb/duration.pb.go index 1b71bcd9..b99d4d24 100644 --- a/vendor/google.golang.org/protobuf/types/known/durationpb/duration.pb.go +++ b/vendor/google.golang.org/protobuf/types/known/durationpb/duration.pb.go @@ -245,11 +245,9 @@ func (x *Duration) check() uint { func (x *Duration) Reset() { *x = Duration{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_duration_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_duration_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *Duration) String() string { @@ -260,7 +258,7 @@ func (*Duration) ProtoMessage() {} func (x *Duration) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_duration_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -339,20 +337,6 @@ func file_google_protobuf_duration_proto_init() { if File_google_protobuf_duration_proto != nil { return } - if !protoimpl.UnsafeEnabled { - file_google_protobuf_duration_proto_msgTypes[0].Exporter = func(v any, i int) any { - switch v := v.(*Duration); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/vendor/google.golang.org/protobuf/types/known/emptypb/empty.pb.go b/vendor/google.golang.org/protobuf/types/known/emptypb/empty.pb.go index d87b4fb8..1761bc9c 100644 --- a/vendor/google.golang.org/protobuf/types/known/emptypb/empty.pb.go +++ b/vendor/google.golang.org/protobuf/types/known/emptypb/empty.pb.go @@ -55,11 +55,9 @@ type Empty struct { func (x *Empty) Reset() { *x = Empty{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_empty_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_empty_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *Empty) String() string { @@ -70,7 +68,7 @@ func (*Empty) ProtoMessage() {} func (x *Empty) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_empty_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -131,20 +129,6 @@ func file_google_protobuf_empty_proto_init() { if File_google_protobuf_empty_proto != nil { return } - if !protoimpl.UnsafeEnabled { - file_google_protobuf_empty_proto_msgTypes[0].Exporter = func(v any, i int) any { - switch v := v.(*Empty); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/vendor/google.golang.org/protobuf/types/known/fieldmaskpb/field_mask.pb.go b/vendor/google.golang.org/protobuf/types/known/fieldmaskpb/field_mask.pb.go index ac1e91bb..19de8d37 100644 --- a/vendor/google.golang.org/protobuf/types/known/fieldmaskpb/field_mask.pb.go +++ b/vendor/google.golang.org/protobuf/types/known/fieldmaskpb/field_mask.pb.go @@ -467,11 +467,9 @@ func rangeFields(path string, f func(field string) bool) bool { func (x *FieldMask) Reset() { *x = FieldMask{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_field_mask_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_field_mask_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FieldMask) String() string { @@ -482,7 +480,7 @@ func (*FieldMask) ProtoMessage() {} func (x *FieldMask) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_field_mask_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -553,20 +551,6 @@ func file_google_protobuf_field_mask_proto_init() { if File_google_protobuf_field_mask_proto != nil { return } - if !protoimpl.UnsafeEnabled { - file_google_protobuf_field_mask_proto_msgTypes[0].Exporter = func(v any, i int) any { - switch v := v.(*FieldMask); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/vendor/google.golang.org/protobuf/types/known/structpb/struct.pb.go b/vendor/google.golang.org/protobuf/types/known/structpb/struct.pb.go index d45361cb..8f206a66 100644 --- a/vendor/google.golang.org/protobuf/types/known/structpb/struct.pb.go +++ b/vendor/google.golang.org/protobuf/types/known/structpb/struct.pb.go @@ -120,6 +120,7 @@ package structpb import ( base64 "encoding/base64" + json "encoding/json" protojson "google.golang.org/protobuf/encoding/protojson" protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" @@ -233,11 +234,9 @@ func (x *Struct) UnmarshalJSON(b []byte) error { func (x *Struct) Reset() { *x = Struct{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_struct_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_struct_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *Struct) String() string { @@ -248,7 +247,7 @@ func (*Struct) ProtoMessage() {} func (x *Struct) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_struct_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -296,19 +295,20 @@ type Value struct { // NewValue constructs a Value from a general-purpose Go interface. // -// ╔════════════════════════╤════════════════════════════════════════════╗ -// ║ Go type │ Conversion ║ -// ╠════════════════════════╪════════════════════════════════════════════╣ -// ║ nil │ stored as NullValue ║ -// ║ bool │ stored as BoolValue ║ -// ║ int, int32, int64 │ stored as NumberValue ║ -// ║ uint, uint32, uint64 │ stored as NumberValue ║ -// ║ float32, float64 │ stored as NumberValue ║ -// ║ string │ stored as StringValue; must be valid UTF-8 ║ -// ║ []byte │ stored as StringValue; base64-encoded ║ -// ║ map[string]any │ stored as StructValue ║ -// ║ []any │ stored as ListValue ║ -// ╚════════════════════════╧════════════════════════════════════════════╝ +// ╔═══════════════════════════════════════╤════════════════════════════════════════════╗ +// ║ Go type │ Conversion ║ +// ╠═══════════════════════════════════════╪════════════════════════════════════════════╣ +// ║ nil │ stored as NullValue ║ +// ║ bool │ stored as BoolValue ║ +// ║ int, int8, int16, int32, int64 │ stored as NumberValue ║ +// ║ uint, uint8, uint16, uint32, uint64 │ stored as NumberValue ║ +// ║ float32, float64 │ stored as NumberValue ║ +// ║ json.Number │ stored as NumberValue ║ +// ║ string │ stored as StringValue; must be valid UTF-8 ║ +// ║ []byte │ stored as StringValue; base64-encoded ║ +// ║ map[string]any │ stored as StructValue ║ +// ║ []any │ stored as ListValue ║ +// ╚═══════════════════════════════════════╧════════════════════════════════════════════╝ // // When converting an int64 or uint64 to a NumberValue, numeric precision loss // is possible since they are stored as a float64. @@ -320,12 +320,20 @@ func NewValue(v any) (*Value, error) { return NewBoolValue(v), nil case int: return NewNumberValue(float64(v)), nil + case int8: + return NewNumberValue(float64(v)), nil + case int16: + return NewNumberValue(float64(v)), nil case int32: return NewNumberValue(float64(v)), nil case int64: return NewNumberValue(float64(v)), nil case uint: return NewNumberValue(float64(v)), nil + case uint8: + return NewNumberValue(float64(v)), nil + case uint16: + return NewNumberValue(float64(v)), nil case uint32: return NewNumberValue(float64(v)), nil case uint64: @@ -334,6 +342,12 @@ func NewValue(v any) (*Value, error) { return NewNumberValue(float64(v)), nil case float64: return NewNumberValue(float64(v)), nil + case json.Number: + n, err := v.Float64() + if err != nil { + return nil, protoimpl.X.NewError("invalid number format %q, expected a float64: %v", v, err) + } + return NewNumberValue(n), nil case string: if !utf8.ValidString(v) { return nil, protoimpl.X.NewError("invalid UTF-8 in string: %q", v) @@ -441,11 +455,9 @@ func (x *Value) UnmarshalJSON(b []byte) error { func (x *Value) Reset() { *x = Value{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_struct_proto_msgTypes[1] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_struct_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *Value) String() string { @@ -456,7 +468,7 @@ func (*Value) ProtoMessage() {} func (x *Value) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_struct_proto_msgTypes[1] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -613,11 +625,9 @@ func (x *ListValue) UnmarshalJSON(b []byte) error { func (x *ListValue) Reset() { *x = ListValue{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_struct_proto_msgTypes[2] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_struct_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *ListValue) String() string { @@ -628,7 +638,7 @@ func (*ListValue) ProtoMessage() {} func (x *ListValue) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_struct_proto_msgTypes[2] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -742,44 +752,6 @@ func file_google_protobuf_struct_proto_init() { if File_google_protobuf_struct_proto != nil { return } - if !protoimpl.UnsafeEnabled { - file_google_protobuf_struct_proto_msgTypes[0].Exporter = func(v any, i int) any { - switch v := v.(*Struct); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_struct_proto_msgTypes[1].Exporter = func(v any, i int) any { - switch v := v.(*Value); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_struct_proto_msgTypes[2].Exporter = func(v any, i int) any { - switch v := v.(*ListValue); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } file_google_protobuf_struct_proto_msgTypes[1].OneofWrappers = []any{ (*Value_NullValue)(nil), (*Value_NumberValue)(nil), diff --git a/vendor/google.golang.org/protobuf/types/known/timestamppb/timestamp.pb.go b/vendor/google.golang.org/protobuf/types/known/timestamppb/timestamp.pb.go index 83a5a645..0d20722d 100644 --- a/vendor/google.golang.org/protobuf/types/known/timestamppb/timestamp.pb.go +++ b/vendor/google.golang.org/protobuf/types/known/timestamppb/timestamp.pb.go @@ -254,11 +254,9 @@ func (x *Timestamp) check() uint { func (x *Timestamp) Reset() { *x = Timestamp{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_timestamp_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_timestamp_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *Timestamp) String() string { @@ -269,7 +267,7 @@ func (*Timestamp) ProtoMessage() {} func (x *Timestamp) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_timestamp_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -348,20 +346,6 @@ func file_google_protobuf_timestamp_proto_init() { if File_google_protobuf_timestamp_proto != nil { return } - if !protoimpl.UnsafeEnabled { - file_google_protobuf_timestamp_proto_msgTypes[0].Exporter = func(v any, i int) any { - switch v := v.(*Timestamp); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/vendor/google.golang.org/protobuf/types/known/wrapperspb/wrappers.pb.go b/vendor/google.golang.org/protobuf/types/known/wrapperspb/wrappers.pb.go index e473f826..006060e5 100644 --- a/vendor/google.golang.org/protobuf/types/known/wrapperspb/wrappers.pb.go +++ b/vendor/google.golang.org/protobuf/types/known/wrapperspb/wrappers.pb.go @@ -69,11 +69,9 @@ func Double(v float64) *DoubleValue { func (x *DoubleValue) Reset() { *x = DoubleValue{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_wrappers_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_wrappers_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *DoubleValue) String() string { @@ -84,7 +82,7 @@ func (*DoubleValue) ProtoMessage() {} func (x *DoubleValue) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_wrappers_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -125,11 +123,9 @@ func Float(v float32) *FloatValue { func (x *FloatValue) Reset() { *x = FloatValue{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_wrappers_proto_msgTypes[1] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_wrappers_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FloatValue) String() string { @@ -140,7 +136,7 @@ func (*FloatValue) ProtoMessage() {} func (x *FloatValue) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_wrappers_proto_msgTypes[1] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -181,11 +177,9 @@ func Int64(v int64) *Int64Value { func (x *Int64Value) Reset() { *x = Int64Value{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_wrappers_proto_msgTypes[2] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_wrappers_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *Int64Value) String() string { @@ -196,7 +190,7 @@ func (*Int64Value) ProtoMessage() {} func (x *Int64Value) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_wrappers_proto_msgTypes[2] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -237,11 +231,9 @@ func UInt64(v uint64) *UInt64Value { func (x *UInt64Value) Reset() { *x = UInt64Value{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_wrappers_proto_msgTypes[3] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_wrappers_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *UInt64Value) String() string { @@ -252,7 +244,7 @@ func (*UInt64Value) ProtoMessage() {} func (x *UInt64Value) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_wrappers_proto_msgTypes[3] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -293,11 +285,9 @@ func Int32(v int32) *Int32Value { func (x *Int32Value) Reset() { *x = Int32Value{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_wrappers_proto_msgTypes[4] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_wrappers_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *Int32Value) String() string { @@ -308,7 +298,7 @@ func (*Int32Value) ProtoMessage() {} func (x *Int32Value) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_wrappers_proto_msgTypes[4] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -349,11 +339,9 @@ func UInt32(v uint32) *UInt32Value { func (x *UInt32Value) Reset() { *x = UInt32Value{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_wrappers_proto_msgTypes[5] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_wrappers_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *UInt32Value) String() string { @@ -364,7 +352,7 @@ func (*UInt32Value) ProtoMessage() {} func (x *UInt32Value) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_wrappers_proto_msgTypes[5] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -405,11 +393,9 @@ func Bool(v bool) *BoolValue { func (x *BoolValue) Reset() { *x = BoolValue{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_wrappers_proto_msgTypes[6] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_wrappers_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *BoolValue) String() string { @@ -420,7 +406,7 @@ func (*BoolValue) ProtoMessage() {} func (x *BoolValue) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_wrappers_proto_msgTypes[6] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -461,11 +447,9 @@ func String(v string) *StringValue { func (x *StringValue) Reset() { *x = StringValue{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_wrappers_proto_msgTypes[7] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_wrappers_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *StringValue) String() string { @@ -476,7 +460,7 @@ func (*StringValue) ProtoMessage() {} func (x *StringValue) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_wrappers_proto_msgTypes[7] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -517,11 +501,9 @@ func Bytes(v []byte) *BytesValue { func (x *BytesValue) Reset() { *x = BytesValue{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_wrappers_proto_msgTypes[8] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_wrappers_proto_msgTypes[8] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *BytesValue) String() string { @@ -532,7 +514,7 @@ func (*BytesValue) ProtoMessage() {} func (x *BytesValue) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_wrappers_proto_msgTypes[8] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -629,116 +611,6 @@ func file_google_protobuf_wrappers_proto_init() { if File_google_protobuf_wrappers_proto != nil { return } - if !protoimpl.UnsafeEnabled { - file_google_protobuf_wrappers_proto_msgTypes[0].Exporter = func(v any, i int) any { - switch v := v.(*DoubleValue); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_wrappers_proto_msgTypes[1].Exporter = func(v any, i int) any { - switch v := v.(*FloatValue); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_wrappers_proto_msgTypes[2].Exporter = func(v any, i int) any { - switch v := v.(*Int64Value); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_wrappers_proto_msgTypes[3].Exporter = func(v any, i int) any { - switch v := v.(*UInt64Value); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_wrappers_proto_msgTypes[4].Exporter = func(v any, i int) any { - switch v := v.(*Int32Value); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_wrappers_proto_msgTypes[5].Exporter = func(v any, i int) any { - switch v := v.(*UInt32Value); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_wrappers_proto_msgTypes[6].Exporter = func(v any, i int) any { - switch v := v.(*BoolValue); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_wrappers_proto_msgTypes[7].Exporter = func(v any, i int) any { - switch v := v.(*StringValue); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_wrappers_proto_msgTypes[8].Exporter = func(v any, i int) any { - switch v := v.(*BytesValue); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/vendor/modules.txt b/vendor/modules.txt index 19ad397a..e2b57f63 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -875,7 +875,7 @@ github.com/oklog/ulid # github.com/oleiade/reflections v1.0.1 ## explicit; go 1.15 github.com/oleiade/reflections -# github.com/onsi/gomega v1.34.2 +# github.com/onsi/gomega v1.35.1 ## explicit; go 1.22 github.com/onsi/gomega github.com/onsi/gomega/format @@ -1434,7 +1434,7 @@ go.uber.org/zap/internal/exit go.uber.org/zap/internal/pool go.uber.org/zap/internal/stacktrace go.uber.org/zap/zapcore -# golang.org/x/crypto v0.26.0 +# golang.org/x/crypto v0.28.0 ## explicit; go 1.20 golang.org/x/crypto/argon2 golang.org/x/crypto/blake2b @@ -1478,7 +1478,7 @@ golang.org/x/exp/slog/internal/buffer # golang.org/x/mod v0.19.0 ## explicit; go 1.18 golang.org/x/mod/sumdb/note -# golang.org/x/net v0.28.0 +# golang.org/x/net v0.30.0 ## explicit; go 1.18 golang.org/x/net/html golang.org/x/net/html/atom @@ -1507,17 +1507,17 @@ golang.org/x/oauth2/jwt ## explicit; go 1.18 golang.org/x/sync/errgroup golang.org/x/sync/semaphore -# golang.org/x/sys v0.24.0 +# golang.org/x/sys v0.26.0 ## explicit; go 1.18 golang.org/x/sys/cpu golang.org/x/sys/plan9 golang.org/x/sys/unix golang.org/x/sys/windows golang.org/x/sys/windows/registry -# golang.org/x/term v0.23.0 +# golang.org/x/term v0.25.0 ## explicit; go 1.18 golang.org/x/term -# golang.org/x/text v0.17.0 +# golang.org/x/text v0.19.0 ## explicit; go 1.18 golang.org/x/text/encoding golang.org/x/text/encoding/charmap @@ -1624,8 +1624,8 @@ google.golang.org/grpc/serviceconfig google.golang.org/grpc/stats google.golang.org/grpc/status google.golang.org/grpc/tap -# google.golang.org/protobuf v1.34.2 -## explicit; go 1.20 +# google.golang.org/protobuf v1.35.1 +## explicit; go 1.21 google.golang.org/protobuf/encoding/protodelim google.golang.org/protobuf/encoding/protojson google.golang.org/protobuf/encoding/prototext