From ddc2de15325dc4933a2e03d956c27182f11733bd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Nov 2023 09:29:20 -0700 Subject: [PATCH] Bump github.com/klauspost/compress from 1.16.7 to 1.17.2 (#5626) Bumps [github.com/klauspost/compress](https://github.com/klauspost/compress) from 1.16.7 to 1.17.2. - [Release notes](https://github.com/klauspost/compress/releases) - [Changelog](https://github.com/klauspost/compress/blob/master/.goreleaser.yml) - [Commits](https://github.com/klauspost/compress/compare/v1.16.7...v1.17.2) --- updated-dependencies: - dependency-name: github.com/klauspost/compress dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 +- .../klauspost/compress/.goreleaser.yml | 20 +- .../github.com/klauspost/compress/README.md | 19 + .../klauspost/compress/flate/deflate.go | 29 + .../klauspost/compress/flate/fast_encoder.go | 23 - .../klauspost/compress/flate/inflate.go | 66 +- .../klauspost/compress/flate/inflate_gen.go | 34 +- .../klauspost/compress/flate/level5.go | 398 ++++ .../compress/flate/matchlen_amd64.go | 16 + .../klauspost/compress/flate/matchlen_amd64.s | 68 + .../compress/flate/matchlen_generic.go | 33 + .../klauspost/compress/fse/bitwriter.go | 3 +- .../klauspost/compress/fse/compress.go | 3 +- .../klauspost/compress/gzip/gunzip.go | 1 + .../klauspost/compress/gzip/gzip.go | 21 + .../klauspost/compress/huff0/bitwriter.go | 3 +- .../klauspost/compress/huff0/compress.go | 20 +- .../github.com/klauspost/compress/s2/dict.go | 19 + .../klauspost/compress/s2/encode.go | 2 +- .../klauspost/compress/s2/encode_best.go | 3 + .../klauspost/compress/s2/encode_go.go | 2 + .../klauspost/compress/s2/encodeblock_amd64.s | 1616 ++++++++++++----- .../github.com/klauspost/compress/s2/index.go | 20 +- .../klauspost/compress/zstd/bitreader.go | 34 +- .../klauspost/compress/zstd/bitwriter.go | 3 +- .../klauspost/compress/zstd/blockenc.go | 29 +- .../klauspost/compress/zstd/dict.go | 379 +++- .../klauspost/compress/zstd/enc_best.go | 11 +- .../klauspost/compress/zstd/encoder.go | 13 +- .../klauspost/compress/zstd/frameenc.go | 4 +- .../klauspost/compress/zstd/seqdec.go | 17 +- .../klauspost/compress/zstd/seqdec_amd64.s | 128 +- .../klauspost/compress/zstd/seqdec_generic.go | 2 +- .../klauspost/compress/zstd/snappy.go | 5 +- vendor/modules.txt | 2 +- 36 files changed, 2396 insertions(+), 656 deletions(-) create mode 100644 vendor/github.com/klauspost/compress/flate/matchlen_amd64.go create mode 100644 vendor/github.com/klauspost/compress/flate/matchlen_amd64.s create mode 100644 vendor/github.com/klauspost/compress/flate/matchlen_generic.go diff --git a/go.mod b/go.mod index 1186e5326c..2aa7693f44 100644 --- a/go.mod +++ b/go.mod @@ -32,7 +32,7 @@ require ( github.com/hashicorp/go-sockaddr v1.0.2 github.com/hashicorp/memberlist v0.5.0 github.com/json-iterator/go v1.1.12 - github.com/klauspost/compress v1.16.7 + github.com/klauspost/compress v1.17.2 github.com/lib/pq v1.10.9 github.com/minio/minio-go/v7 v7.0.63 github.com/mitchellh/go-wordwrap v1.0.1 diff --git a/go.sum b/go.sum index 4266ccf739..97be76a152 100644 --- a/go.sum +++ b/go.sum @@ -994,8 +994,8 @@ github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQL github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= -github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= -github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4= +github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= diff --git a/vendor/github.com/klauspost/compress/.goreleaser.yml b/vendor/github.com/klauspost/compress/.goreleaser.yml index 7a008a4d23..4c28dff465 100644 --- a/vendor/github.com/klauspost/compress/.goreleaser.yml +++ b/vendor/github.com/klauspost/compress/.goreleaser.yml @@ -3,7 +3,7 @@ before: hooks: - ./gen.sh - - go install mvdan.cc/garble@v0.9.3 + - go install mvdan.cc/garble@v0.10.1 builds: - @@ -92,16 +92,7 @@ builds: archives: - id: s2-binaries - name_template: "s2-{{ .Os }}_{{ .Arch }}_{{ .Version }}" - replacements: - aix: AIX - darwin: OSX - linux: Linux - windows: Windows - 386: i386 - amd64: x86_64 - freebsd: FreeBSD - netbsd: NetBSD + name_template: "s2-{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}" format_overrides: - goos: windows format: zip @@ -125,7 +116,7 @@ changelog: nfpms: - - file_name_template: "s2_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}" + file_name_template: "s2_package__{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}" vendor: Klaus Post homepage: https://github.com/klauspost/compress maintainer: Klaus Post @@ -134,8 +125,3 @@ nfpms: formats: - deb - rpm - replacements: - darwin: Darwin - linux: Linux - freebsd: FreeBSD - amd64: x86_64 diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md index 4002a16a63..43de486775 100644 --- a/vendor/github.com/klauspost/compress/README.md +++ b/vendor/github.com/klauspost/compress/README.md @@ -16,6 +16,18 @@ This package provides various compression algorithms. # changelog +* Sept 19th, 2023 - [v1.17.0](https://github.com/klauspost/compress/releases/tag/v1.17.0) + * Add experimental dictionary builder https://github.com/klauspost/compress/pull/853 + * Add xerial snappy read/writer https://github.com/klauspost/compress/pull/838 + * flate: Add limited window compression https://github.com/klauspost/compress/pull/843 + * s2: Do 2 overlapping match checks https://github.com/klauspost/compress/pull/839 + * flate: Add amd64 assembly matchlen https://github.com/klauspost/compress/pull/837 + * gzip: Copy bufio.Reader on Reset by @thatguystone in https://github.com/klauspost/compress/pull/860 + +* July 1st, 2023 - [v1.16.7](https://github.com/klauspost/compress/releases/tag/v1.16.7) + * zstd: Fix default level first dictionary encode https://github.com/klauspost/compress/pull/829 + * s2: add GetBufferCapacity() method by @GiedriusS in https://github.com/klauspost/compress/pull/832 + * June 13, 2023 - [v1.16.6](https://github.com/klauspost/compress/releases/tag/v1.16.6) * zstd: correctly ignore WithEncoderPadding(1) by @ianlancetaylor in https://github.com/klauspost/compress/pull/806 * zstd: Add amd64 match length assembly https://github.com/klauspost/compress/pull/824 @@ -50,6 +62,9 @@ This package provides various compression algorithms. * s2: Support io.ReaderAt in ReadSeeker. https://github.com/klauspost/compress/pull/747 * s2c/s2sx: Use concurrent decoding. https://github.com/klauspost/compress/pull/746 +
+ See changes to v1.15.x + * Jan 21st, 2023 (v1.15.15) * deflate: Improve level 7-9 by @klauspost in https://github.com/klauspost/compress/pull/739 * zstd: Add delta encoding support by @greatroar in https://github.com/klauspost/compress/pull/728 @@ -176,6 +191,8 @@ Stream decompression is now faster on asynchronous, since the goroutine allocati While the release has been extensively tested, it is recommended to testing when upgrading. +
+
See changes to v1.14.x @@ -636,6 +653,8 @@ Here are other packages of good quality and pure Go (no cgo wrappers or autoconv * [github.com/dsnet/compress](https://github.com/dsnet/compress) - brotli decompression, bzip2 writer. * [github.com/ronanh/intcomp](https://github.com/ronanh/intcomp) - Integer compression. * [github.com/spenczar/fpc](https://github.com/spenczar/fpc) - Float compression. +* [github.com/minio/zipindex](https://github.com/minio/zipindex) - External ZIP directory index. +* [github.com/ybirader/pzip](https://github.com/ybirader/pzip) - Fast concurrent zip archiver and extractor. # license diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go index 5faea0b2b3..de912e187c 100644 --- a/vendor/github.com/klauspost/compress/flate/deflate.go +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -7,6 +7,7 @@ package flate import ( "encoding/binary" + "errors" "fmt" "io" "math" @@ -833,6 +834,12 @@ func (d *compressor) init(w io.Writer, level int) (err error) { d.initDeflate() d.fill = (*compressor).fillDeflate d.step = (*compressor).deflateLazy + case -level >= MinCustomWindowSize && -level <= MaxCustomWindowSize: + d.w.logNewTablePenalty = 7 + d.fast = &fastEncL5Window{maxOffset: int32(-level), cur: maxStoreBlockSize} + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).storeFast default: return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level) } @@ -929,6 +936,28 @@ func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { return zw, err } +// MinCustomWindowSize is the minimum window size that can be sent to NewWriterWindow. +const MinCustomWindowSize = 32 + +// MaxCustomWindowSize is the maximum custom window that can be sent to NewWriterWindow. +const MaxCustomWindowSize = windowSize + +// NewWriterWindow returns a new Writer compressing data with a custom window size. +// windowSize must be from MinCustomWindowSize to MaxCustomWindowSize. +func NewWriterWindow(w io.Writer, windowSize int) (*Writer, error) { + if windowSize < MinCustomWindowSize { + return nil, errors.New("flate: requested window size less than MinWindowSize") + } + if windowSize > MaxCustomWindowSize { + return nil, errors.New("flate: requested window size bigger than MaxCustomWindowSize") + } + var dw Writer + if err := dw.d.init(w, -windowSize); err != nil { + return nil, err + } + return &dw, nil +} + // A Writer takes data written to it and writes the compressed // form of that data to an underlying writer (see NewWriter). type Writer struct { diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go index 24caf5f70b..c8124b5c49 100644 --- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go +++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go @@ -8,7 +8,6 @@ package flate import ( "encoding/binary" "fmt" - "math/bits" ) type fastEnc interface { @@ -192,25 +191,3 @@ func (e *fastGen) Reset() { } e.hist = e.hist[:0] } - -// matchLen returns the maximum length. -// 'a' must be the shortest of the two. -func matchLen(a, b []byte) int { - var checked int - - for len(a) >= 8 { - if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 { - return checked + (bits.TrailingZeros64(diff) >> 3) - } - checked += 8 - a = a[8:] - b = b[8:] - } - b = b[:len(a)] - for i := range a { - if a[i] != b[i] { - return i + checked - } - } - return len(a) + checked -} diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go index 414c0bea9f..2f410d64f5 100644 --- a/vendor/github.com/klauspost/compress/flate/inflate.go +++ b/vendor/github.com/klauspost/compress/flate/inflate.go @@ -120,8 +120,9 @@ func (h *huffmanDecoder) init(lengths []int) bool { const sanity = false if h.chunks == nil { - h.chunks = &[huffmanNumChunks]uint16{} + h.chunks = new([huffmanNumChunks]uint16) } + if h.maxRead != 0 { *h = huffmanDecoder{chunks: h.chunks, links: h.links} } @@ -175,6 +176,7 @@ func (h *huffmanDecoder) init(lengths []int) bool { } h.maxRead = min + chunks := h.chunks[:] for i := range chunks { chunks[i] = 0 @@ -202,8 +204,7 @@ func (h *huffmanDecoder) init(lengths []int) bool { if cap(h.links[off]) < numLinks { h.links[off] = make([]uint16, numLinks) } else { - links := h.links[off][:0] - h.links[off] = links[:numLinks] + h.links[off] = h.links[off][:numLinks] } } } else { @@ -277,7 +278,7 @@ func (h *huffmanDecoder) init(lengths []int) bool { return true } -// The actual read interface needed by NewReader. +// Reader is the actual read interface needed by NewReader. // If the passed in io.Reader does not also have ReadByte, // the NewReader will introduce its own buffering. type Reader interface { @@ -285,6 +286,18 @@ type Reader interface { io.ByteReader } +type step uint8 + +const ( + copyData step = iota + 1 + nextBlock + huffmanBytesBuffer + huffmanBytesReader + huffmanBufioReader + huffmanStringsReader + huffmanGenericReader +) + // Decompress state. type decompressor struct { // Input source. @@ -303,7 +316,7 @@ type decompressor struct { // Next step in the decompression, // and decompression state. - step func(*decompressor) + step step stepState int err error toRead []byte @@ -342,7 +355,7 @@ func (f *decompressor) nextBlock() { // compressed, fixed Huffman tables f.hl = &fixedHuffmanDecoder f.hd = nil - f.huffmanBlockDecoder()() + f.huffmanBlockDecoder() if debugDecode { fmt.Println("predefinied huffman block") } @@ -353,7 +366,7 @@ func (f *decompressor) nextBlock() { } f.hl = &f.h1 f.hd = &f.h2 - f.huffmanBlockDecoder()() + f.huffmanBlockDecoder() if debugDecode { fmt.Println("dynamic huffman block") } @@ -379,14 +392,16 @@ func (f *decompressor) Read(b []byte) (int, error) { if f.err != nil { return 0, f.err } - f.step(f) + + f.doStep() + if f.err != nil && len(f.toRead) == 0 { f.toRead = f.dict.readFlush() // Flush what's left in case of error } } } -// Support the io.WriteTo interface for io.Copy and friends. +// WriteTo implements the io.WriteTo interface for io.Copy and friends. func (f *decompressor) WriteTo(w io.Writer) (int64, error) { total := int64(0) flushed := false @@ -410,7 +425,7 @@ func (f *decompressor) WriteTo(w io.Writer) (int64, error) { return total, f.err } if f.err == nil { - f.step(f) + f.doStep() } if len(f.toRead) == 0 && f.err != nil && !flushed { f.toRead = f.dict.readFlush() // Flush what's left in case of error @@ -631,7 +646,7 @@ func (f *decompressor) copyData() { if f.dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = f.dict.readFlush() - f.step = (*decompressor).copyData + f.step = copyData return } f.finishBlock() @@ -644,7 +659,28 @@ func (f *decompressor) finishBlock() { } f.err = io.EOF } - f.step = (*decompressor).nextBlock + f.step = nextBlock +} + +func (f *decompressor) doStep() { + switch f.step { + case copyData: + f.copyData() + case nextBlock: + f.nextBlock() + case huffmanBytesBuffer: + f.huffmanBytesBuffer() + case huffmanBytesReader: + f.huffmanBytesReader() + case huffmanBufioReader: + f.huffmanBufioReader() + case huffmanStringsReader: + f.huffmanStringsReader() + case huffmanGenericReader: + f.huffmanGenericReader() + default: + panic("BUG: unexpected step state") + } } // noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF. @@ -747,7 +783,7 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error { h1: f.h1, h2: f.h2, dict: f.dict, - step: (*decompressor).nextBlock, + step: nextBlock, } f.dict.init(maxMatchOffset, dict) return nil @@ -768,7 +804,7 @@ func NewReader(r io.Reader) io.ReadCloser { f.r = makeReader(r) f.bits = new([maxNumLit + maxNumDist]int) f.codebits = new([numCodes]int) - f.step = (*decompressor).nextBlock + f.step = nextBlock f.dict.init(maxMatchOffset, nil) return &f } @@ -787,7 +823,7 @@ func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { f.r = makeReader(r) f.bits = new([maxNumLit + maxNumDist]int) f.codebits = new([numCodes]int) - f.step = (*decompressor).nextBlock + f.step = nextBlock f.dict.init(maxMatchOffset, dict) return &f } diff --git a/vendor/github.com/klauspost/compress/flate/inflate_gen.go b/vendor/github.com/klauspost/compress/flate/inflate_gen.go index 61342b6b88..2b2f993f75 100644 --- a/vendor/github.com/klauspost/compress/flate/inflate_gen.go +++ b/vendor/github.com/klauspost/compress/flate/inflate_gen.go @@ -85,7 +85,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesBuffer + f.step = huffmanBytesBuffer f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -251,7 +251,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work + f.step = huffmanBytesBuffer // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -336,7 +336,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesReader + f.step = huffmanBytesReader f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -502,7 +502,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesReader // We need to continue this work + f.step = huffmanBytesReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -587,7 +587,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBufioReader + f.step = huffmanBufioReader f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -753,7 +753,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBufioReader // We need to continue this work + f.step = huffmanBufioReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -838,7 +838,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanStringsReader + f.step = huffmanStringsReader f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -1004,7 +1004,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanStringsReader // We need to continue this work + f.step = huffmanStringsReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -1089,7 +1089,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanGenericReader + f.step = huffmanGenericReader f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -1255,7 +1255,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanGenericReader // We need to continue this work + f.step = huffmanGenericReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -1265,19 +1265,19 @@ copyHistory: // Not reached } -func (f *decompressor) huffmanBlockDecoder() func() { +func (f *decompressor) huffmanBlockDecoder() { switch f.r.(type) { case *bytes.Buffer: - return f.huffmanBytesBuffer + f.huffmanBytesBuffer() case *bytes.Reader: - return f.huffmanBytesReader + f.huffmanBytesReader() case *bufio.Reader: - return f.huffmanBufioReader + f.huffmanBufioReader() case *strings.Reader: - return f.huffmanStringsReader + f.huffmanStringsReader() case Reader: - return f.huffmanGenericReader + f.huffmanGenericReader() default: - return f.huffmanGenericReader + f.huffmanGenericReader() } } diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go index 83ef50ba45..1f61ec1829 100644 --- a/vendor/github.com/klauspost/compress/flate/level5.go +++ b/vendor/github.com/klauspost/compress/flate/level5.go @@ -308,3 +308,401 @@ emitRemainder: emitLiteral(dst, src[nextEmit:]) } } + +// fastEncL5Window is a level 5 encoder, +// but with a custom window size. +type fastEncL5Window struct { + hist []byte + cur int32 + maxOffset int32 + table [tableSize]tableEntry + bTable [tableSize]tableEntryPrev +} + +func (e *fastEncL5Window) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + hashShortBytes = 4 + ) + maxMatchOffset := e.maxOffset + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + v.Prev.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + } + e.bTable[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + for { + const skipLog = 6 + const doEvery = 1 + + nextS := s + var l int32 + var t int32 + for { + nextHashS := hashLen(cv, tableBits, hashShortBytes) + nextHashL := hash7(cv, tableBits) + + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur} + e.table[nextHashS] = entry + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = entry, eLong.Cur + + nextHashS = hashLen(next, tableBits, hashShortBytes) + nextHashL = hash7(next, tableBits) + + t = lCandidate.Cur.offset - e.cur + if s-t < maxMatchOffset { + if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + t2 := lCandidate.Prev.offset - e.cur + if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + l = e.matchlen(s+4, t+4, src) + 4 + ml1 := e.matchlen(s+4, t2+4, src) + 4 + if ml1 > l { + t = t2 + l = ml1 + break + } + } + break + } + t = lCandidate.Prev.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + break + } + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { + // Found a 4 match... + l = e.matchlen(s+4, t+4, src) + 4 + lCandidate = e.bTable[nextHashL] + // Store the next match + + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + // If the next long is a candidate, use that... + t2 := lCandidate.Cur.offset - e.cur + if nextS-t2 < maxMatchOffset { + if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + // If the previous long is a candidate, use that... + t2 = lCandidate.Prev.offset - e.cur + if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + if l == 0 { + // Extend the 4-byte match as long as possible. + l = e.matchlenLong(s+4, t+4, src) + 4 + } else if l == maxMatchLength { + l += e.matchlenLong(s+l, t+l, src) + } + + // Try to locate a better match by checking the end of best match... + if sAt := s + l; l < 30 && sAt < sLimit { + // Allow some bytes at the beginning to mismatch. + // Sweet spot is 2/3 bytes depending on input. + // 3 is only a little better when it is but sometimes a lot worse. + // The skipped bytes are tested in Extend backwards, + // and still picked up as part of the match if they do. + const skipBeginning = 2 + eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset + t2 := eLong - e.cur - l + skipBeginning + s2 := s + skipBeginning + off := s2 - t2 + if t2 >= 0 && off < maxMatchOffset && off > 0 { + if l2 := e.matchlenLong(s2, t2, src); l2 > l { + t = t2 + l = l2 + s = s2 + } + } + } + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + if debugDeflate { + if t >= s { + panic(fmt.Sprintln("s-t", s, t)) + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", s-t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + goto emitRemainder + } + + // Store every 3rd hash in-between. + if true { + const hashEvery = 3 + i := s - l + 1 + if i < s-1 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + e.table[hashLen(cv, tableBits, hashShortBytes)] = t + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // Do an long at i+1 + cv >>= 8 + t = tableEntry{offset: t.offset + 1} + eLong = &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // We only have enough bits for a short entry at i+2 + cv >>= 8 + t = tableEntry{offset: t.offset + 1} + e.table[hashLen(cv, tableBits, hashShortBytes)] = t + + // Skip one - otherwise we risk hitting 's' + i += 4 + for ; i < s-1; i += hashEvery { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 + } + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + x := load6432(src, s-1) + o := e.cur + s - 1 + prevHashS := hashLen(x, tableBits, hashShortBytes) + prevHashL := hash7(x, tableBits) + e.table[prevHashS] = tableEntry{offset: o} + eLong := &e.bTable[prevHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur + cv = x >> 8 + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} + +// Reset the encoding table. +func (e *fastEncL5Window) Reset() { + // We keep the same allocs, since we are compressing the same block sizes. + if cap(e.hist) < allocHistory { + e.hist = make([]byte, 0, allocHistory) + } + + // We offset current position so everything will be out of reach. + // If we are above the buffer reset it will be cleared anyway since len(hist) == 0. + if e.cur <= int32(bufferReset) { + e.cur += e.maxOffset + int32(len(e.hist)) + } + e.hist = e.hist[:0] +} + +func (e *fastEncL5Window) addBlock(src []byte) int32 { + // check if we have space already + maxMatchOffset := e.maxOffset + + if len(e.hist)+len(src) > cap(e.hist) { + if cap(e.hist) == 0 { + e.hist = make([]byte, 0, allocHistory) + } else { + if cap(e.hist) < int(maxMatchOffset*2) { + panic("unexpected buffer size") + } + // Move down + offset := int32(len(e.hist)) - maxMatchOffset + copy(e.hist[0:maxMatchOffset], e.hist[offset:]) + e.cur += offset + e.hist = e.hist[:maxMatchOffset] + } + } + s := int32(len(e.hist)) + e.hist = append(e.hist, src...) + return s +} + +// matchlen will return the match length between offsets and t in src. +// The maximum length returned is maxMatchLength - 4. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastEncL5Window) matchlen(s, t int32, src []byte) int32 { + if debugDecode { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > e.maxOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + s1 := int(s) + maxMatchLength - 4 + if s1 > len(src) { + s1 = len(src) + } + + // Extend the match to be as long as possible. + return int32(matchLen(src[s:s1], src[t:])) +} + +// matchlenLong will return the match length between offsets and t in src. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastEncL5Window) matchlenLong(s, t int32, src []byte) int32 { + if debugDeflate { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > e.maxOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + // Extend the match to be as long as possible. + return int32(matchLen(src[s:], src[t:])) +} diff --git a/vendor/github.com/klauspost/compress/flate/matchlen_amd64.go b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.go new file mode 100644 index 0000000000..4bd3885841 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.go @@ -0,0 +1,16 @@ +//go:build amd64 && !appengine && !noasm && gc +// +build amd64,!appengine,!noasm,gc + +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. + +package flate + +// matchLen returns how many bytes match in a and b +// +// It assumes that: +// +// len(a) <= len(b) and len(a) > 0 +// +//go:noescape +func matchLen(a []byte, b []byte) int diff --git a/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s new file mode 100644 index 0000000000..9a7655c0f7 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s @@ -0,0 +1,68 @@ +// Copied from S2 implementation. + +//go:build !appengine && !noasm && gc && !noasm + +#include "textflag.h" + +// func matchLen(a []byte, b []byte) int +// Requires: BMI +TEXT ·matchLen(SB), NOSPLIT, $0-56 + MOVQ a_base+0(FP), AX + MOVQ b_base+24(FP), CX + MOVQ a_len+8(FP), DX + + // matchLen + XORL SI, SI + CMPL DX, $0x08 + JB matchlen_match4_standalone + +matchlen_loopback_standalone: + MOVQ (AX)(SI*1), BX + XORQ (CX)(SI*1), BX + TESTQ BX, BX + JZ matchlen_loop_standalone + +#ifdef GOAMD64_v3 + TZCNTQ BX, BX +#else + BSFQ BX, BX +#endif + SARQ $0x03, BX + LEAL (SI)(BX*1), SI + JMP gen_match_len_end + +matchlen_loop_standalone: + LEAL -8(DX), DX + LEAL 8(SI), SI + CMPL DX, $0x08 + JAE matchlen_loopback_standalone + +matchlen_match4_standalone: + CMPL DX, $0x04 + JB matchlen_match2_standalone + MOVL (AX)(SI*1), BX + CMPL (CX)(SI*1), BX + JNE matchlen_match2_standalone + LEAL -4(DX), DX + LEAL 4(SI), SI + +matchlen_match2_standalone: + CMPL DX, $0x02 + JB matchlen_match1_standalone + MOVW (AX)(SI*1), BX + CMPW (CX)(SI*1), BX + JNE matchlen_match1_standalone + LEAL -2(DX), DX + LEAL 2(SI), SI + +matchlen_match1_standalone: + CMPL DX, $0x01 + JB gen_match_len_end + MOVB (AX)(SI*1), BL + CMPB (CX)(SI*1), BL + JNE gen_match_len_end + INCL SI + +gen_match_len_end: + MOVQ SI, ret+48(FP) + RET diff --git a/vendor/github.com/klauspost/compress/flate/matchlen_generic.go b/vendor/github.com/klauspost/compress/flate/matchlen_generic.go new file mode 100644 index 0000000000..ad5cd814b9 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/matchlen_generic.go @@ -0,0 +1,33 @@ +//go:build !amd64 || appengine || !gc || noasm +// +build !amd64 appengine !gc noasm + +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. + +package flate + +import ( + "encoding/binary" + "math/bits" +) + +// matchLen returns the maximum common prefix length of a and b. +// a must be the shortest of the two. +func matchLen(a, b []byte) (n int) { + for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] { + diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b) + if diff != 0 { + return n + bits.TrailingZeros64(diff)>>3 + } + n += 8 + } + + for i := range a { + if a[i] != b[i] { + break + } + n++ + } + return n + +} diff --git a/vendor/github.com/klauspost/compress/fse/bitwriter.go b/vendor/github.com/klauspost/compress/fse/bitwriter.go index 43e463611b..e82fa3bb7b 100644 --- a/vendor/github.com/klauspost/compress/fse/bitwriter.go +++ b/vendor/github.com/klauspost/compress/fse/bitwriter.go @@ -152,12 +152,11 @@ func (b *bitWriter) flushAlign() { // close will write the alignment bit and write the final byte(s) // to the output. -func (b *bitWriter) close() error { +func (b *bitWriter) close() { // End mark b.addBits16Clean(1, 1) // flush until next byte. b.flushAlign() - return nil } // reset and continue writing by appending to out. diff --git a/vendor/github.com/klauspost/compress/fse/compress.go b/vendor/github.com/klauspost/compress/fse/compress.go index dac97e58a2..65d777357a 100644 --- a/vendor/github.com/klauspost/compress/fse/compress.go +++ b/vendor/github.com/klauspost/compress/fse/compress.go @@ -199,7 +199,8 @@ func (s *Scratch) compress(src []byte) error { c2.flush(s.actualTableLog) c1.flush(s.actualTableLog) - return s.bw.close() + s.bw.close() + return nil } // writeCount will write the normalized histogram count to header. diff --git a/vendor/github.com/klauspost/compress/gzip/gunzip.go b/vendor/github.com/klauspost/compress/gzip/gunzip.go index 6d630c390d..dc2362a63b 100644 --- a/vendor/github.com/klauspost/compress/gzip/gunzip.go +++ b/vendor/github.com/klauspost/compress/gzip/gunzip.go @@ -106,6 +106,7 @@ func (z *Reader) Reset(r io.Reader) error { *z = Reader{ decompressor: z.decompressor, multistream: true, + br: z.br, } if rr, ok := r.(flate.Reader); ok { z.r = rr diff --git a/vendor/github.com/klauspost/compress/gzip/gzip.go b/vendor/github.com/klauspost/compress/gzip/gzip.go index 26203851bd..5bc720593e 100644 --- a/vendor/github.com/klauspost/compress/gzip/gzip.go +++ b/vendor/github.com/klauspost/compress/gzip/gzip.go @@ -74,6 +74,27 @@ func NewWriterLevel(w io.Writer, level int) (*Writer, error) { return z, nil } +// MinCustomWindowSize is the minimum window size that can be sent to NewWriterWindow. +const MinCustomWindowSize = flate.MinCustomWindowSize + +// MaxCustomWindowSize is the maximum custom window that can be sent to NewWriterWindow. +const MaxCustomWindowSize = flate.MaxCustomWindowSize + +// NewWriterWindow returns a new Writer compressing data with a custom window size. +// windowSize must be from MinCustomWindowSize to MaxCustomWindowSize. +func NewWriterWindow(w io.Writer, windowSize int) (*Writer, error) { + if windowSize < MinCustomWindowSize { + return nil, errors.New("gzip: requested window size less than MinWindowSize") + } + if windowSize > MaxCustomWindowSize { + return nil, errors.New("gzip: requested window size bigger than MaxCustomWindowSize") + } + + z := new(Writer) + z.init(w, -windowSize) + return z, nil +} + func (z *Writer) init(w io.Writer, level int) { compressor := z.compressor if level != StatelessCompression { diff --git a/vendor/github.com/klauspost/compress/huff0/bitwriter.go b/vendor/github.com/klauspost/compress/huff0/bitwriter.go index b4d7164e3f..0ebc9aaac7 100644 --- a/vendor/github.com/klauspost/compress/huff0/bitwriter.go +++ b/vendor/github.com/klauspost/compress/huff0/bitwriter.go @@ -94,10 +94,9 @@ func (b *bitWriter) flushAlign() { // close will write the alignment bit and write the final byte(s) // to the output. -func (b *bitWriter) close() error { +func (b *bitWriter) close() { // End mark b.addBits16Clean(1, 1) // flush until next byte. b.flushAlign() - return nil } diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go index 4ee4fa18dd..518436cf3d 100644 --- a/vendor/github.com/klauspost/compress/huff0/compress.go +++ b/vendor/github.com/klauspost/compress/huff0/compress.go @@ -227,10 +227,10 @@ func EstimateSizes(in []byte, s *Scratch) (tableSz, dataSz, reuseSz int, err err } func (s *Scratch) compress1X(src []byte) ([]byte, error) { - return s.compress1xDo(s.Out, src) + return s.compress1xDo(s.Out, src), nil } -func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) { +func (s *Scratch) compress1xDo(dst, src []byte) []byte { var bw = bitWriter{out: dst} // N is length divisible by 4. @@ -260,8 +260,8 @@ func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) { bw.encTwoSymbols(cTable, tmp[1], tmp[0]) } } - err := bw.close() - return bw.out, err + bw.close() + return bw.out } var sixZeros [6]byte @@ -283,12 +283,8 @@ func (s *Scratch) compress4X(src []byte) ([]byte, error) { } src = src[len(toDo):] - var err error idx := len(s.Out) - s.Out, err = s.compress1xDo(s.Out, toDo) - if err != nil { - return nil, err - } + s.Out = s.compress1xDo(s.Out, toDo) if len(s.Out)-idx > math.MaxUint16 { // We cannot store the size in the jump table return nil, ErrIncompressible @@ -315,7 +311,6 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) { segmentSize := (len(src) + 3) / 4 var wg sync.WaitGroup - var errs [4]error wg.Add(4) for i := 0; i < 4; i++ { toDo := src @@ -326,15 +321,12 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) { // Separate goroutine for each block. go func(i int) { - s.tmpOut[i], errs[i] = s.compress1xDo(s.tmpOut[i][:0], toDo) + s.tmpOut[i] = s.compress1xDo(s.tmpOut[i][:0], toDo) wg.Done() }(i) } wg.Wait() for i := 0; i < 4; i++ { - if errs[i] != nil { - return nil, errs[i] - } o := s.tmpOut[i] if len(o) > math.MaxUint16 { // We cannot store the size in the jump table diff --git a/vendor/github.com/klauspost/compress/s2/dict.go b/vendor/github.com/klauspost/compress/s2/dict.go index 24f7ce80bc..f125ad0963 100644 --- a/vendor/github.com/klauspost/compress/s2/dict.go +++ b/vendor/github.com/klauspost/compress/s2/dict.go @@ -106,6 +106,25 @@ func MakeDict(data []byte, searchStart []byte) *Dict { return &d } +// MakeDictManual will create a dictionary. +// 'data' must be at least MinDictSize and less than or equal to MaxDictSize. +// A manual first repeat index into data must be provided. +// It must be less than len(data)-8. +func MakeDictManual(data []byte, firstIdx uint16) *Dict { + if len(data) < MinDictSize || int(firstIdx) >= len(data)-8 || len(data) > MaxDictSize { + return nil + } + var d Dict + dict := data + d.dict = dict + if cap(d.dict) < len(d.dict)+16 { + d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...) + } + + d.repeat = int(firstIdx) + return &d +} + // Encode returns the encoded form of src. The returned slice may be a sub- // slice of dst if dst was large enough to hold the entire encoded block. // Otherwise, a newly allocated slice will be returned. diff --git a/vendor/github.com/klauspost/compress/s2/encode.go b/vendor/github.com/klauspost/compress/s2/encode.go index e6c2310212..0c9088adfe 100644 --- a/vendor/github.com/klauspost/compress/s2/encode.go +++ b/vendor/github.com/klauspost/compress/s2/encode.go @@ -57,7 +57,7 @@ func Encode(dst, src []byte) []byte { // The function returns -1 if no improvement could be achieved. // Using actual compression will most often produce better compression than the estimate. func EstimateBlockSize(src []byte) (d int) { - if len(src) < 6 || int64(len(src)) > 0xffffffff { + if len(src) <= inputMargin || int64(len(src)) > 0xffffffff { return -1 } if len(src) <= 1024 { diff --git a/vendor/github.com/klauspost/compress/s2/encode_best.go b/vendor/github.com/klauspost/compress/s2/encode_best.go index 1d13e869a1..47bac74234 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_best.go +++ b/vendor/github.com/klauspost/compress/s2/encode_best.go @@ -157,6 +157,9 @@ func encodeBlockBest(dst, src []byte, dict *Dict) (d int) { return m } matchDict := func(candidate, s int, first uint32, rep bool) match { + if s >= MaxDictSrcOffset { + return match{offset: candidate, s: s} + } // Calculate offset as if in continuous array with s offset := -len(dict.dict) + candidate if best.length != 0 && best.s-best.offset == s-offset && !rep { diff --git a/vendor/github.com/klauspost/compress/s2/encode_go.go b/vendor/github.com/klauspost/compress/s2/encode_go.go index 0d39c7b0e0..6b393c34d3 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_go.go +++ b/vendor/github.com/klauspost/compress/s2/encode_go.go @@ -316,6 +316,7 @@ func matchLen(a []byte, b []byte) int { return len(a) + checked } +// input must be > inputMargin func calcBlockSize(src []byte) (d int) { // Initialize the hash table. const ( @@ -501,6 +502,7 @@ emitRemainder: return d } +// length must be > inputMargin. func calcBlockSizeSmall(src []byte) (d int) { // Initialize the hash table. const ( diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s index 54031aa313..5f110d1940 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s @@ -249,15 +249,43 @@ emit_literal_done_repeat_emit_encodeBlockAsm: // matchLen XORL R11, R11 + +matchlen_loopback_16_repeat_extend_encodeBlockAsm: + CMPL R8, $0x10 + JB matchlen_match8_repeat_extend_encodeBlockAsm + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm + XORQ 8(BX)(R11*1), R12 + JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm + LEAL -16(R8), R8 + LEAL 16(R11), R11 + JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm + +matchlen_bsf_16repeat_extend_encodeBlockAsm: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP repeat_extend_forward_end_encodeBlockAsm + +matchlen_match8_repeat_extend_encodeBlockAsm: CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm + MOVQ (R9)(R11*1), R10 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm + LEAL -8(R8), R8 + LEAL 8(R11), R11 + JMP matchlen_match4_repeat_extend_encodeBlockAsm -matchlen_loopback_repeat_extend_encodeBlockAsm: - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_repeat_extend_encodeBlockAsm - +matchlen_bsf_8_repeat_extend_encodeBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -269,12 +297,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm: LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm -matchlen_loop_repeat_extend_encodeBlockAsm: - LEAL -8(R8), R8 - LEAL 8(R11), R11 - CMPL R8, $0x08 - JAE matchlen_loopback_repeat_extend_encodeBlockAsm - matchlen_match4_repeat_extend_encodeBlockAsm: CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm @@ -851,15 +873,43 @@ match_nolit_loop_encodeBlockAsm: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeBlockAsm: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeBlockAsm + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeBlockAsm + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeBlockAsm + +matchlen_bsf_16match_nolit_encodeBlockAsm: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeBlockAsm + +matchlen_match8_match_nolit_encodeBlockAsm: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeBlockAsm -matchlen_loopback_match_nolit_encodeBlockAsm: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeBlockAsm - +matchlen_bsf_8_match_nolit_encodeBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -871,12 +921,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm -matchlen_loop_match_nolit_encodeBlockAsm: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBlockAsm - matchlen_match4_match_nolit_encodeBlockAsm: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm @@ -1610,15 +1654,43 @@ emit_literal_done_repeat_emit_encodeBlockAsm4MB: // matchLen XORL R11, R11 + +matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB: + CMPL R8, $0x10 + JB matchlen_match8_repeat_extend_encodeBlockAsm4MB + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB + XORQ 8(BX)(R11*1), R12 + JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm4MB + LEAL -16(R8), R8 + LEAL 16(R11), R11 + JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB + +matchlen_bsf_16repeat_extend_encodeBlockAsm4MB: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP repeat_extend_forward_end_encodeBlockAsm4MB + +matchlen_match8_repeat_extend_encodeBlockAsm4MB: CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm4MB + MOVQ (R9)(R11*1), R10 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB + LEAL -8(R8), R8 + LEAL 8(R11), R11 + JMP matchlen_match4_repeat_extend_encodeBlockAsm4MB -matchlen_loopback_repeat_extend_encodeBlockAsm4MB: - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_repeat_extend_encodeBlockAsm4MB - +matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -1630,12 +1702,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm4MB: LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm4MB -matchlen_loop_repeat_extend_encodeBlockAsm4MB: - LEAL -8(R8), R8 - LEAL 8(R11), R11 - CMPL R8, $0x08 - JAE matchlen_loopback_repeat_extend_encodeBlockAsm4MB - matchlen_match4_repeat_extend_encodeBlockAsm4MB: CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm4MB @@ -2162,15 +2228,43 @@ match_nolit_loop_encodeBlockAsm4MB: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeBlockAsm4MB: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeBlockAsm4MB + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeBlockAsm4MB + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeBlockAsm4MB + +matchlen_bsf_16match_nolit_encodeBlockAsm4MB: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeBlockAsm4MB + +matchlen_match8_match_nolit_encodeBlockAsm4MB: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm4MB + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeBlockAsm4MB -matchlen_loopback_match_nolit_encodeBlockAsm4MB: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeBlockAsm4MB - +matchlen_bsf_8_match_nolit_encodeBlockAsm4MB: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -2182,12 +2276,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm4MB: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm4MB -matchlen_loop_match_nolit_encodeBlockAsm4MB: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBlockAsm4MB - matchlen_match4_match_nolit_encodeBlockAsm4MB: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm4MB @@ -2873,15 +2961,43 @@ emit_literal_done_repeat_emit_encodeBlockAsm12B: // matchLen XORL R11, R11 + +matchlen_loopback_16_repeat_extend_encodeBlockAsm12B: + CMPL R8, $0x10 + JB matchlen_match8_repeat_extend_encodeBlockAsm12B + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B + XORQ 8(BX)(R11*1), R12 + JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm12B + LEAL -16(R8), R8 + LEAL 16(R11), R11 + JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm12B + +matchlen_bsf_16repeat_extend_encodeBlockAsm12B: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP repeat_extend_forward_end_encodeBlockAsm12B + +matchlen_match8_repeat_extend_encodeBlockAsm12B: CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm12B + MOVQ (R9)(R11*1), R10 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B + LEAL -8(R8), R8 + LEAL 8(R11), R11 + JMP matchlen_match4_repeat_extend_encodeBlockAsm12B -matchlen_loopback_repeat_extend_encodeBlockAsm12B: - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_repeat_extend_encodeBlockAsm12B - +matchlen_bsf_8_repeat_extend_encodeBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -2893,12 +3009,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm12B: LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm12B -matchlen_loop_repeat_extend_encodeBlockAsm12B: - LEAL -8(R8), R8 - LEAL 8(R11), R11 - CMPL R8, $0x08 - JAE matchlen_loopback_repeat_extend_encodeBlockAsm12B - matchlen_match4_repeat_extend_encodeBlockAsm12B: CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm12B @@ -3303,15 +3413,43 @@ match_nolit_loop_encodeBlockAsm12B: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeBlockAsm12B: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeBlockAsm12B + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeBlockAsm12B + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeBlockAsm12B + +matchlen_bsf_16match_nolit_encodeBlockAsm12B: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeBlockAsm12B + +matchlen_match8_match_nolit_encodeBlockAsm12B: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm12B + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeBlockAsm12B -matchlen_loopback_match_nolit_encodeBlockAsm12B: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeBlockAsm12B - +matchlen_bsf_8_match_nolit_encodeBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -3323,12 +3461,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm12B: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm12B -matchlen_loop_match_nolit_encodeBlockAsm12B: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBlockAsm12B - matchlen_match4_match_nolit_encodeBlockAsm12B: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm12B @@ -3904,15 +4036,43 @@ emit_literal_done_repeat_emit_encodeBlockAsm10B: // matchLen XORL R11, R11 + +matchlen_loopback_16_repeat_extend_encodeBlockAsm10B: + CMPL R8, $0x10 + JB matchlen_match8_repeat_extend_encodeBlockAsm10B + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B + XORQ 8(BX)(R11*1), R12 + JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm10B + LEAL -16(R8), R8 + LEAL 16(R11), R11 + JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm10B + +matchlen_bsf_16repeat_extend_encodeBlockAsm10B: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP repeat_extend_forward_end_encodeBlockAsm10B + +matchlen_match8_repeat_extend_encodeBlockAsm10B: CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm10B + MOVQ (R9)(R11*1), R10 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B + LEAL -8(R8), R8 + LEAL 8(R11), R11 + JMP matchlen_match4_repeat_extend_encodeBlockAsm10B -matchlen_loopback_repeat_extend_encodeBlockAsm10B: - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_repeat_extend_encodeBlockAsm10B - +matchlen_bsf_8_repeat_extend_encodeBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -3924,12 +4084,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm10B: LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm10B -matchlen_loop_repeat_extend_encodeBlockAsm10B: - LEAL -8(R8), R8 - LEAL 8(R11), R11 - CMPL R8, $0x08 - JAE matchlen_loopback_repeat_extend_encodeBlockAsm10B - matchlen_match4_repeat_extend_encodeBlockAsm10B: CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm10B @@ -4334,15 +4488,43 @@ match_nolit_loop_encodeBlockAsm10B: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeBlockAsm10B: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeBlockAsm10B + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeBlockAsm10B + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeBlockAsm10B + +matchlen_bsf_16match_nolit_encodeBlockAsm10B: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeBlockAsm10B + +matchlen_match8_match_nolit_encodeBlockAsm10B: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm10B + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeBlockAsm10B -matchlen_loopback_match_nolit_encodeBlockAsm10B: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeBlockAsm10B - +matchlen_bsf_8_match_nolit_encodeBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -4354,12 +4536,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm10B: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm10B -matchlen_loop_match_nolit_encodeBlockAsm10B: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBlockAsm10B - matchlen_match4_match_nolit_encodeBlockAsm10B: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm10B @@ -4935,15 +5111,43 @@ emit_literal_done_repeat_emit_encodeBlockAsm8B: // matchLen XORL R11, R11 + +matchlen_loopback_16_repeat_extend_encodeBlockAsm8B: + CMPL R8, $0x10 + JB matchlen_match8_repeat_extend_encodeBlockAsm8B + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B + XORQ 8(BX)(R11*1), R12 + JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm8B + LEAL -16(R8), R8 + LEAL 16(R11), R11 + JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm8B + +matchlen_bsf_16repeat_extend_encodeBlockAsm8B: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP repeat_extend_forward_end_encodeBlockAsm8B + +matchlen_match8_repeat_extend_encodeBlockAsm8B: CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm8B + MOVQ (R9)(R11*1), R10 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B + LEAL -8(R8), R8 + LEAL 8(R11), R11 + JMP matchlen_match4_repeat_extend_encodeBlockAsm8B -matchlen_loopback_repeat_extend_encodeBlockAsm8B: - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_repeat_extend_encodeBlockAsm8B - +matchlen_bsf_8_repeat_extend_encodeBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -4955,12 +5159,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm8B: LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm8B -matchlen_loop_repeat_extend_encodeBlockAsm8B: - LEAL -8(R8), R8 - LEAL 8(R11), R11 - CMPL R8, $0x08 - JAE matchlen_loopback_repeat_extend_encodeBlockAsm8B - matchlen_match4_repeat_extend_encodeBlockAsm8B: CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm8B @@ -5351,15 +5549,43 @@ match_nolit_loop_encodeBlockAsm8B: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeBlockAsm8B: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeBlockAsm8B + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeBlockAsm8B + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeBlockAsm8B + +matchlen_bsf_16match_nolit_encodeBlockAsm8B: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeBlockAsm8B + +matchlen_match8_match_nolit_encodeBlockAsm8B: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm8B + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeBlockAsm8B -matchlen_loopback_match_nolit_encodeBlockAsm8B: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeBlockAsm8B - +matchlen_bsf_8_match_nolit_encodeBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -5371,12 +5597,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm8B: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm8B -matchlen_loop_match_nolit_encodeBlockAsm8B: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBlockAsm8B - matchlen_match4_match_nolit_encodeBlockAsm8B: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm8B @@ -5854,15 +6074,43 @@ match_dst_size_check_encodeBetterBlockAsm: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeBetterBlockAsm: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeBetterBlockAsm + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm + +matchlen_bsf_16match_nolit_encodeBetterBlockAsm: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeBetterBlockAsm + +matchlen_match8_match_nolit_encodeBetterBlockAsm: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeBetterBlockAsm -matchlen_loopback_match_nolit_encodeBetterBlockAsm: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeBetterBlockAsm - +matchlen_bsf_8_match_nolit_encodeBetterBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -5874,12 +6122,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm -matchlen_loop_match_nolit_encodeBetterBlockAsm: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm - matchlen_match4_match_nolit_encodeBetterBlockAsm: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm @@ -6926,15 +7168,43 @@ match_dst_size_check_encodeBetterBlockAsm4MB: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeBetterBlockAsm4MB + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB + +matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeBetterBlockAsm4MB + +matchlen_match8_match_nolit_encodeBetterBlockAsm4MB: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeBetterBlockAsm4MB -matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeBetterBlockAsm4MB - +matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -6946,12 +7216,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm4MB -matchlen_loop_match_nolit_encodeBetterBlockAsm4MB: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB - matchlen_match4_match_nolit_encodeBetterBlockAsm4MB: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm4MB @@ -7924,15 +8188,43 @@ match_dst_size_check_encodeBetterBlockAsm12B: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeBetterBlockAsm12B + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B + +matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeBetterBlockAsm12B + +matchlen_match8_match_nolit_encodeBetterBlockAsm12B: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm12B + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeBetterBlockAsm12B -matchlen_loopback_match_nolit_encodeBetterBlockAsm12B: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeBetterBlockAsm12B - +matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -7944,12 +8236,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm12B: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm12B -matchlen_loop_match_nolit_encodeBetterBlockAsm12B: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm12B - matchlen_match4_match_nolit_encodeBetterBlockAsm12B: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm12B @@ -8775,15 +9061,43 @@ match_dst_size_check_encodeBetterBlockAsm10B: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeBetterBlockAsm10B + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B + +matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeBetterBlockAsm10B + +matchlen_match8_match_nolit_encodeBetterBlockAsm10B: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm10B + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeBetterBlockAsm10B -matchlen_loopback_match_nolit_encodeBetterBlockAsm10B: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeBetterBlockAsm10B - +matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -8795,12 +9109,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm10B: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm10B -matchlen_loop_match_nolit_encodeBetterBlockAsm10B: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm10B - matchlen_match4_match_nolit_encodeBetterBlockAsm10B: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm10B @@ -9626,15 +9934,43 @@ match_dst_size_check_encodeBetterBlockAsm8B: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeBetterBlockAsm8B + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B + +matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeBetterBlockAsm8B + +matchlen_match8_match_nolit_encodeBetterBlockAsm8B: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm8B + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeBetterBlockAsm8B -matchlen_loopback_match_nolit_encodeBetterBlockAsm8B: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeBetterBlockAsm8B - +matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -9646,12 +9982,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm8B: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm8B -matchlen_loop_match_nolit_encodeBetterBlockAsm8B: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm8B - matchlen_match4_match_nolit_encodeBetterBlockAsm8B: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm8B @@ -10575,31 +10905,53 @@ emit_literal_done_repeat_emit_encodeSnappyBlockAsm: // matchLen XORL R10, R10 - CMPL DI, $0x08 - JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm - -matchlen_loopback_repeat_extend_encodeSnappyBlockAsm: - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 - TESTQ R9, R9 - JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm +matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm: + CMPL DI, $0x10 + JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm + XORQ 8(BX)(R10*1), R11 + JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm + LEAL -16(DI), DI + LEAL 16(R10), R10 + JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm + +matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R11, R11 #else - BSFQ R9, R9 + BSFQ R11, R11 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm -matchlen_loop_repeat_extend_encodeSnappyBlockAsm: +matchlen_match8_repeat_extend_encodeSnappyBlockAsm: + CMPL DI, $0x08 + JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm + MOVQ (R8)(R10*1), R9 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm LEAL -8(DI), DI LEAL 8(R10), R10 - CMPL DI, $0x08 - JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm + JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm + +matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm: +#ifdef GOAMD64_v3 + TZCNTQ R9, R9 + +#else + BSFQ R9, R9 + +#endif + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 + JMP repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match4_repeat_extend_encodeSnappyBlockAsm: CMPL DI, $0x04 @@ -10897,15 +11249,43 @@ match_nolit_loop_encodeSnappyBlockAsm: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBlockAsm + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm + +matchlen_bsf_16match_nolit_encodeSnappyBlockAsm: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeSnappyBlockAsm + +matchlen_match8_match_nolit_encodeSnappyBlockAsm: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm -matchlen_loopback_match_nolit_encodeSnappyBlockAsm: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm - +matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -10917,12 +11297,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm -matchlen_loop_match_nolit_encodeSnappyBlockAsm: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm - matchlen_match4_match_nolit_encodeSnappyBlockAsm: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm @@ -11437,15 +11811,43 @@ emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K: // matchLen XORL R10, R10 + +matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K: + CMPL DI, $0x10 + JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K + XORQ 8(BX)(R10*1), R11 + JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K + LEAL -16(DI), DI + LEAL 16(R10), R10 + JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K + +matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K: +#ifdef GOAMD64_v3 + TZCNTQ R11, R11 + +#else + BSFQ R11, R11 + +#endif + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 + JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K + +matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K: CMPL DI, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K + MOVQ (R8)(R10*1), R9 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K + LEAL -8(DI), DI + LEAL 8(R10), R10 + JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K -matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K: - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 - TESTQ R9, R9 - JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K - +matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 TZCNTQ R9, R9 @@ -11457,12 +11859,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K: LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K -matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K: - LEAL -8(DI), DI - LEAL 8(R10), R10 - CMPL DI, $0x08 - JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K - matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K: CMPL DI, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K @@ -11719,15 +12115,43 @@ match_nolit_loop_encodeSnappyBlockAsm64K: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBlockAsm64K + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K + +matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeSnappyBlockAsm64K + +matchlen_match8_match_nolit_encodeSnappyBlockAsm64K: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm64K + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm64K -matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm64K - +matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -11739,12 +12163,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm64K -matchlen_loop_match_nolit_encodeSnappyBlockAsm64K: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K - matchlen_match4_match_nolit_encodeSnappyBlockAsm64K: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm64K @@ -12219,15 +12637,43 @@ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B: // matchLen XORL R10, R10 + +matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B: + CMPL DI, $0x10 + JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B + XORQ 8(BX)(R10*1), R11 + JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B + LEAL -16(DI), DI + LEAL 16(R10), R10 + JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B + +matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B: +#ifdef GOAMD64_v3 + TZCNTQ R11, R11 + +#else + BSFQ R11, R11 + +#endif + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 + JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B + +matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B: CMPL DI, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B + MOVQ (R8)(R10*1), R9 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B + LEAL -8(DI), DI + LEAL 8(R10), R10 + JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B -matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B: - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 - TESTQ R9, R9 - JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B - +matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 @@ -12239,12 +12685,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B: LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B -matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B: - LEAL -8(DI), DI - LEAL 8(R10), R10 - CMPL DI, $0x08 - JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B - matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B: CMPL DI, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B @@ -12501,15 +12941,43 @@ match_nolit_loop_encodeSnappyBlockAsm12B: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBlockAsm12B + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B + +matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeSnappyBlockAsm12B + +matchlen_match8_match_nolit_encodeSnappyBlockAsm12B: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm12B + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm12B -matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm12B - +matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -12521,12 +12989,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm12B -matchlen_loop_match_nolit_encodeSnappyBlockAsm12B: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B - matchlen_match4_match_nolit_encodeSnappyBlockAsm12B: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm12B @@ -13001,15 +13463,43 @@ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B: // matchLen XORL R10, R10 + +matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B: + CMPL DI, $0x10 + JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B + XORQ 8(BX)(R10*1), R11 + JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B + LEAL -16(DI), DI + LEAL 16(R10), R10 + JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B + +matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B: +#ifdef GOAMD64_v3 + TZCNTQ R11, R11 + +#else + BSFQ R11, R11 + +#endif + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 + JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B + +matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B: CMPL DI, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B + MOVQ (R8)(R10*1), R9 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B + LEAL -8(DI), DI + LEAL 8(R10), R10 + JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B -matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B: - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 - TESTQ R9, R9 - JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B - +matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 @@ -13021,12 +13511,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B: LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B -matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B: - LEAL -8(DI), DI - LEAL 8(R10), R10 - CMPL DI, $0x08 - JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B - matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B: CMPL DI, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B @@ -13283,15 +13767,43 @@ match_nolit_loop_encodeSnappyBlockAsm10B: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBlockAsm10B + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B + +matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeSnappyBlockAsm10B + +matchlen_match8_match_nolit_encodeSnappyBlockAsm10B: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm10B + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm10B -matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm10B - +matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -13303,12 +13815,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm10B -matchlen_loop_match_nolit_encodeSnappyBlockAsm10B: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B - matchlen_match4_match_nolit_encodeSnappyBlockAsm10B: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm10B @@ -13783,15 +14289,43 @@ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B: // matchLen XORL R10, R10 + +matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B: + CMPL DI, $0x10 + JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B + XORQ 8(BX)(R10*1), R11 + JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B + LEAL -16(DI), DI + LEAL 16(R10), R10 + JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B + +matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B: +#ifdef GOAMD64_v3 + TZCNTQ R11, R11 + +#else + BSFQ R11, R11 + +#endif + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 + JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B + +matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B: CMPL DI, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B + MOVQ (R8)(R10*1), R9 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B + LEAL -8(DI), DI + LEAL 8(R10), R10 + JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B -matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B: - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 - TESTQ R9, R9 - JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B - +matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 @@ -13803,12 +14337,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B: LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B -matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B: - LEAL -8(DI), DI - LEAL 8(R10), R10 - CMPL DI, $0x08 - JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B - matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B: CMPL DI, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B @@ -14063,15 +14591,43 @@ match_nolit_loop_encodeSnappyBlockAsm8B: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBlockAsm8B + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B + +matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeSnappyBlockAsm8B + +matchlen_match8_match_nolit_encodeSnappyBlockAsm8B: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm8B + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm8B -matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm8B - +matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -14083,12 +14639,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm8B -matchlen_loop_match_nolit_encodeSnappyBlockAsm8B: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B - matchlen_match4_match_nolit_encodeSnappyBlockAsm8B: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm8B @@ -14473,15 +15023,43 @@ match_dst_size_check_encodeSnappyBetterBlockAsm: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm + +matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeSnappyBetterBlockAsm + +matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm -matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm - +matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -14493,12 +15071,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeSnappyBetterBlockAsm -matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm - matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm @@ -15096,15 +15668,43 @@ match_dst_size_check_encodeSnappyBetterBlockAsm64K: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K + +matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeSnappyBetterBlockAsm64K + +matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K -matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm64K - +matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -15116,12 +15716,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeSnappyBetterBlockAsm64K -matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm64K: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K - matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K @@ -15654,15 +16248,43 @@ match_dst_size_check_encodeSnappyBetterBlockAsm12B: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B + +matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeSnappyBetterBlockAsm12B + +matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B -matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm12B - +matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -15674,12 +16296,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeSnappyBetterBlockAsm12B -matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm12B: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B - matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B @@ -16212,15 +16828,43 @@ match_dst_size_check_encodeSnappyBetterBlockAsm10B: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B + +matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeSnappyBetterBlockAsm10B + +matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B -matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm10B - +matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -16232,12 +16876,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeSnappyBetterBlockAsm10B -matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm10B: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B - matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B @@ -16770,15 +17408,43 @@ match_dst_size_check_encodeSnappyBetterBlockAsm8B: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B + +matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeSnappyBetterBlockAsm8B + +matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B -matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm8B - +matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -16790,12 +17456,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeSnappyBetterBlockAsm8B -matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm8B: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B - matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B @@ -17343,15 +18003,43 @@ emit_literal_done_repeat_emit_calcBlockSize: // matchLen XORL R10, R10 + +matchlen_loopback_16_repeat_extend_calcBlockSize: + CMPL DI, $0x10 + JB matchlen_match8_repeat_extend_calcBlockSize + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_calcBlockSize + XORQ 8(BX)(R10*1), R11 + JNZ matchlen_bsf_16repeat_extend_calcBlockSize + LEAL -16(DI), DI + LEAL 16(R10), R10 + JMP matchlen_loopback_16_repeat_extend_calcBlockSize + +matchlen_bsf_16repeat_extend_calcBlockSize: +#ifdef GOAMD64_v3 + TZCNTQ R11, R11 + +#else + BSFQ R11, R11 + +#endif + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 + JMP repeat_extend_forward_end_calcBlockSize + +matchlen_match8_repeat_extend_calcBlockSize: CMPL DI, $0x08 JB matchlen_match4_repeat_extend_calcBlockSize + MOVQ (R8)(R10*1), R9 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_calcBlockSize + LEAL -8(DI), DI + LEAL 8(R10), R10 + JMP matchlen_match4_repeat_extend_calcBlockSize -matchlen_loopback_repeat_extend_calcBlockSize: - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 - TESTQ R9, R9 - JZ matchlen_loop_repeat_extend_calcBlockSize - +matchlen_bsf_8_repeat_extend_calcBlockSize: #ifdef GOAMD64_v3 TZCNTQ R9, R9 @@ -17363,12 +18051,6 @@ matchlen_loopback_repeat_extend_calcBlockSize: LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_calcBlockSize -matchlen_loop_repeat_extend_calcBlockSize: - LEAL -8(DI), DI - LEAL 8(R10), R10 - CMPL DI, $0x08 - JAE matchlen_loopback_repeat_extend_calcBlockSize - matchlen_match4_repeat_extend_calcBlockSize: CMPL DI, $0x04 JB matchlen_match2_repeat_extend_calcBlockSize @@ -17554,15 +18236,43 @@ match_nolit_loop_calcBlockSize: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_calcBlockSize: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_calcBlockSize + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_calcBlockSize + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_calcBlockSize + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_calcBlockSize + +matchlen_bsf_16match_nolit_calcBlockSize: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_calcBlockSize + +matchlen_match8_match_nolit_calcBlockSize: CMPL SI, $0x08 JB matchlen_match4_match_nolit_calcBlockSize + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_calcBlockSize + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_calcBlockSize -matchlen_loopback_match_nolit_calcBlockSize: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_calcBlockSize - +matchlen_bsf_8_match_nolit_calcBlockSize: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -17574,12 +18284,6 @@ matchlen_loopback_match_nolit_calcBlockSize: LEAL (R9)(R8*1), R9 JMP match_nolit_end_calcBlockSize -matchlen_loop_match_nolit_calcBlockSize: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_calcBlockSize - matchlen_match4_match_nolit_calcBlockSize: CMPL SI, $0x04 JB matchlen_match2_match_nolit_calcBlockSize @@ -17872,15 +18576,43 @@ emit_literal_done_repeat_emit_calcBlockSizeSmall: // matchLen XORL R10, R10 + +matchlen_loopback_16_repeat_extend_calcBlockSizeSmall: + CMPL DI, $0x10 + JB matchlen_match8_repeat_extend_calcBlockSizeSmall + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall + XORQ 8(BX)(R10*1), R11 + JNZ matchlen_bsf_16repeat_extend_calcBlockSizeSmall + LEAL -16(DI), DI + LEAL 16(R10), R10 + JMP matchlen_loopback_16_repeat_extend_calcBlockSizeSmall + +matchlen_bsf_16repeat_extend_calcBlockSizeSmall: +#ifdef GOAMD64_v3 + TZCNTQ R11, R11 + +#else + BSFQ R11, R11 + +#endif + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 + JMP repeat_extend_forward_end_calcBlockSizeSmall + +matchlen_match8_repeat_extend_calcBlockSizeSmall: CMPL DI, $0x08 JB matchlen_match4_repeat_extend_calcBlockSizeSmall + MOVQ (R8)(R10*1), R9 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall + LEAL -8(DI), DI + LEAL 8(R10), R10 + JMP matchlen_match4_repeat_extend_calcBlockSizeSmall -matchlen_loopback_repeat_extend_calcBlockSizeSmall: - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 - TESTQ R9, R9 - JZ matchlen_loop_repeat_extend_calcBlockSizeSmall - +matchlen_bsf_8_repeat_extend_calcBlockSizeSmall: #ifdef GOAMD64_v3 TZCNTQ R9, R9 @@ -17892,12 +18624,6 @@ matchlen_loopback_repeat_extend_calcBlockSizeSmall: LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_calcBlockSizeSmall -matchlen_loop_repeat_extend_calcBlockSizeSmall: - LEAL -8(DI), DI - LEAL 8(R10), R10 - CMPL DI, $0x08 - JAE matchlen_loopback_repeat_extend_calcBlockSizeSmall - matchlen_match4_repeat_extend_calcBlockSizeSmall: CMPL DI, $0x04 JB matchlen_match2_repeat_extend_calcBlockSizeSmall @@ -18053,15 +18779,43 @@ match_nolit_loop_calcBlockSizeSmall: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_calcBlockSizeSmall: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_calcBlockSizeSmall + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_calcBlockSizeSmall + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_calcBlockSizeSmall + +matchlen_bsf_16match_nolit_calcBlockSizeSmall: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_calcBlockSizeSmall + +matchlen_match8_match_nolit_calcBlockSizeSmall: CMPL SI, $0x08 JB matchlen_match4_match_nolit_calcBlockSizeSmall + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_calcBlockSizeSmall -matchlen_loopback_match_nolit_calcBlockSizeSmall: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_calcBlockSizeSmall - +matchlen_bsf_8_match_nolit_calcBlockSizeSmall: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -18073,12 +18827,6 @@ matchlen_loopback_match_nolit_calcBlockSizeSmall: LEAL (R9)(R8*1), R9 JMP match_nolit_end_calcBlockSizeSmall -matchlen_loop_match_nolit_calcBlockSizeSmall: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_calcBlockSizeSmall - matchlen_match4_match_nolit_calcBlockSizeSmall: CMPL SI, $0x04 JB matchlen_match2_match_nolit_calcBlockSizeSmall @@ -18840,15 +19588,43 @@ TEXT ·matchLen(SB), NOSPLIT, $0-56 // matchLen XORL SI, SI + +matchlen_loopback_16_standalone: + CMPL DX, $0x10 + JB matchlen_match8_standalone + MOVQ (AX)(SI*1), BX + MOVQ 8(AX)(SI*1), DI + XORQ (CX)(SI*1), BX + JNZ matchlen_bsf_8_standalone + XORQ 8(CX)(SI*1), DI + JNZ matchlen_bsf_16standalone + LEAL -16(DX), DX + LEAL 16(SI), SI + JMP matchlen_loopback_16_standalone + +matchlen_bsf_16standalone: +#ifdef GOAMD64_v3 + TZCNTQ DI, DI + +#else + BSFQ DI, DI + +#endif + SARQ $0x03, DI + LEAL 8(SI)(DI*1), SI + JMP gen_match_len_end + +matchlen_match8_standalone: CMPL DX, $0x08 JB matchlen_match4_standalone + MOVQ (AX)(SI*1), BX + XORQ (CX)(SI*1), BX + JNZ matchlen_bsf_8_standalone + LEAL -8(DX), DX + LEAL 8(SI), SI + JMP matchlen_match4_standalone -matchlen_loopback_standalone: - MOVQ (AX)(SI*1), BX - XORQ (CX)(SI*1), BX - TESTQ BX, BX - JZ matchlen_loop_standalone - +matchlen_bsf_8_standalone: #ifdef GOAMD64_v3 TZCNTQ BX, BX @@ -18860,12 +19636,6 @@ matchlen_loopback_standalone: LEAL (SI)(BX*1), SI JMP gen_match_len_end -matchlen_loop_standalone: - LEAL -8(DX), DX - LEAL 8(SI), SI - CMPL DX, $0x08 - JAE matchlen_loopback_standalone - matchlen_match4_standalone: CMPL DX, $0x04 JB matchlen_match2_standalone diff --git a/vendor/github.com/klauspost/compress/s2/index.go b/vendor/github.com/klauspost/compress/s2/index.go index dd9ecfe718..18a4f7acd6 100644 --- a/vendor/github.com/klauspost/compress/s2/index.go +++ b/vendor/github.com/klauspost/compress/s2/index.go @@ -511,24 +511,22 @@ func IndexStream(r io.Reader) ([]byte, error) { // JSON returns the index as JSON text. func (i *Index) JSON() []byte { + type offset struct { + CompressedOffset int64 `json:"compressed"` + UncompressedOffset int64 `json:"uncompressed"` + } x := struct { - TotalUncompressed int64 `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown. - TotalCompressed int64 `json:"total_compressed"` // Total Compressed size if known. Will be -1 if unknown. - Offsets []struct { - CompressedOffset int64 `json:"compressed"` - UncompressedOffset int64 `json:"uncompressed"` - } `json:"offsets"` - EstBlockUncomp int64 `json:"est_block_uncompressed"` + TotalUncompressed int64 `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown. + TotalCompressed int64 `json:"total_compressed"` // Total Compressed size if known. Will be -1 if unknown. + Offsets []offset `json:"offsets"` + EstBlockUncomp int64 `json:"est_block_uncompressed"` }{ TotalUncompressed: i.TotalUncompressed, TotalCompressed: i.TotalCompressed, EstBlockUncomp: i.estBlockUncomp, } for _, v := range i.info { - x.Offsets = append(x.Offsets, struct { - CompressedOffset int64 `json:"compressed"` - UncompressedOffset int64 `json:"uncompressed"` - }{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset}) + x.Offsets = append(x.Offsets, offset{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset}) } b, _ := json.MarshalIndent(x, "", " ") return b diff --git a/vendor/github.com/klauspost/compress/zstd/bitreader.go b/vendor/github.com/klauspost/compress/zstd/bitreader.go index 97299d499c..25ca983941 100644 --- a/vendor/github.com/klauspost/compress/zstd/bitreader.go +++ b/vendor/github.com/klauspost/compress/zstd/bitreader.go @@ -17,7 +17,6 @@ import ( // for aligning the input. type bitReader struct { in []byte - off uint // next byte to read is at in[off - 1] value uint64 // Maybe use [16]byte, but shifting is awkward. bitsRead uint8 } @@ -28,7 +27,6 @@ func (b *bitReader) init(in []byte) error { return errors.New("corrupt stream: too short") } b.in = in - b.off = uint(len(in)) // The highest bit of the last byte indicates where to start v := in[len(in)-1] if v == 0 { @@ -69,21 +67,19 @@ func (b *bitReader) fillFast() { if b.bitsRead < 32 { return } - // 2 bounds checks. - v := b.in[b.off-4:] - v = v[:4] + v := b.in[len(b.in)-4:] + b.in = b.in[:len(b.in)-4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 - b.off -= 4 } // fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. func (b *bitReader) fillFastStart() { - // Do single re-slice to avoid bounds checks. - b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + v := b.in[len(b.in)-8:] + b.in = b.in[:len(b.in)-8] + b.value = binary.LittleEndian.Uint64(v) b.bitsRead = 0 - b.off -= 8 } // fill() will make sure at least 32 bits are available. @@ -91,25 +87,25 @@ func (b *bitReader) fill() { if b.bitsRead < 32 { return } - if b.off >= 4 { - v := b.in[b.off-4:] - v = v[:4] + if len(b.in) >= 4 { + v := b.in[len(b.in)-4:] + b.in = b.in[:len(b.in)-4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 - b.off -= 4 return } - for b.off > 0 { - b.value = (b.value << 8) | uint64(b.in[b.off-1]) - b.bitsRead -= 8 - b.off-- + + b.bitsRead -= uint8(8 * len(b.in)) + for len(b.in) > 0 { + b.value = (b.value << 8) | uint64(b.in[len(b.in)-1]) + b.in = b.in[:len(b.in)-1] } } // finished returns true if all bits have been read from the bit stream. func (b *bitReader) finished() bool { - return b.off == 0 && b.bitsRead >= 64 + return len(b.in) == 0 && b.bitsRead >= 64 } // overread returns true if more bits have been requested than is on the stream. @@ -119,7 +115,7 @@ func (b *bitReader) overread() bool { // remain returns the number of bits remaining. func (b *bitReader) remain() uint { - return b.off*8 + 64 - uint(b.bitsRead) + return 8*uint(len(b.in)) + 64 - uint(b.bitsRead) } // close the bitstream and returns an error if out-of-buffer reads occurred. diff --git a/vendor/github.com/klauspost/compress/zstd/bitwriter.go b/vendor/github.com/klauspost/compress/zstd/bitwriter.go index 78b3c61be3..1952f175b0 100644 --- a/vendor/github.com/klauspost/compress/zstd/bitwriter.go +++ b/vendor/github.com/klauspost/compress/zstd/bitwriter.go @@ -97,12 +97,11 @@ func (b *bitWriter) flushAlign() { // close will write the alignment bit and write the final byte(s) // to the output. -func (b *bitWriter) close() error { +func (b *bitWriter) close() { // End mark b.addBits16Clean(1, 1) // flush until next byte. b.flushAlign() - return nil } // reset and continue writing by appending to out. diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go index fd4a36f730..2cfe925ade 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockenc.go +++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go @@ -361,14 +361,21 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error { if len(lits) >= 1024 { // Use 4 Streams. out, reUsed, err = huff0.Compress4X(lits, b.litEnc) - } else if len(lits) > 32 { + } else if len(lits) > 16 { // Use 1 stream single = true out, reUsed, err = huff0.Compress1X(lits, b.litEnc) } else { err = huff0.ErrIncompressible } - + if err == nil && len(out)+5 > len(lits) { + // If we are close, we may still be worse or equal to raw. + var lh literalsHeader + lh.setSizes(len(out), len(lits), single) + if len(out)+lh.size() >= len(lits) { + err = huff0.ErrIncompressible + } + } switch err { case huff0.ErrIncompressible: if debugEncoder { @@ -503,7 +510,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { if len(b.literals) >= 1024 && !raw { // Use 4 Streams. out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc) - } else if len(b.literals) > 32 && !raw { + } else if len(b.literals) > 16 && !raw { // Use 1 stream single = true out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc) @@ -511,6 +518,17 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { err = huff0.ErrIncompressible } + if err == nil && len(out)+5 > len(b.literals) { + // If we are close, we may still be worse or equal to raw. + var lh literalsHeader + lh.setSize(len(b.literals)) + szRaw := lh.size() + lh.setSizes(len(out), len(b.literals), single) + szComp := lh.size() + if len(out)+szComp >= len(b.literals)+szRaw { + err = huff0.ErrIncompressible + } + } switch err { case huff0.ErrIncompressible: lh.setType(literalsBlockRaw) @@ -773,10 +791,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { ml.flush(mlEnc.actualTableLog) of.flush(ofEnc.actualTableLog) ll.flush(llEnc.actualTableLog) - err = wr.close() - if err != nil { - return err - } + wr.close() b.output = wr.out // Maybe even add a bigger margin. diff --git a/vendor/github.com/klauspost/compress/zstd/dict.go b/vendor/github.com/klauspost/compress/zstd/dict.go index ca0951452e..8d5567fe64 100644 --- a/vendor/github.com/klauspost/compress/zstd/dict.go +++ b/vendor/github.com/klauspost/compress/zstd/dict.go @@ -1,10 +1,13 @@ package zstd import ( + "bytes" "encoding/binary" "errors" "fmt" "io" + "math" + "sort" "github.com/klauspost/compress/huff0" ) @@ -14,9 +17,8 @@ type dict struct { litEnc *huff0.Scratch llDec, ofDec, mlDec sequenceDec - //llEnc, ofEnc, mlEnc []*fseEncoder - offsets [3]int - content []byte + offsets [3]int + content []byte } const dictMagic = "\x37\xa4\x30\xec" @@ -159,3 +161,374 @@ func InspectDictionary(b []byte) (interface { d, err := loadDict(b) return d, err } + +type BuildDictOptions struct { + // Dictionary ID. + ID uint32 + + // Content to use to create dictionary tables. + Contents [][]byte + + // History to use for all blocks. + History []byte + + // Offsets to use. + Offsets [3]int + + // CompatV155 will make the dictionary compatible with Zstd v1.5.5 and earlier. + // See https://github.com/facebook/zstd/issues/3724 + CompatV155 bool + + // Use the specified encoder level. + // The dictionary will be built using the specified encoder level, + // which will reflect speed and make the dictionary tailored for that level. + // If not set SpeedBestCompression will be used. + Level EncoderLevel + + // DebugOut will write stats and other details here if set. + DebugOut io.Writer +} + +func BuildDict(o BuildDictOptions) ([]byte, error) { + initPredefined() + hist := o.History + contents := o.Contents + debug := o.DebugOut != nil + println := func(args ...interface{}) { + if o.DebugOut != nil { + fmt.Fprintln(o.DebugOut, args...) + } + } + printf := func(s string, args ...interface{}) { + if o.DebugOut != nil { + fmt.Fprintf(o.DebugOut, s, args...) + } + } + print := func(args ...interface{}) { + if o.DebugOut != nil { + fmt.Fprint(o.DebugOut, args...) + } + } + + if int64(len(hist)) > dictMaxLength { + return nil, fmt.Errorf("dictionary of size %d > %d", len(hist), int64(dictMaxLength)) + } + if len(hist) < 8 { + return nil, fmt.Errorf("dictionary of size %d < %d", len(hist), 8) + } + if len(contents) == 0 { + return nil, errors.New("no content provided") + } + d := dict{ + id: o.ID, + litEnc: nil, + llDec: sequenceDec{}, + ofDec: sequenceDec{}, + mlDec: sequenceDec{}, + offsets: o.Offsets, + content: hist, + } + block := blockEnc{lowMem: false} + block.init() + enc := encoder(&bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(maxMatchLen), bufferReset: math.MaxInt32 - int32(maxMatchLen*2), lowMem: false}}) + if o.Level != 0 { + eOpts := encoderOptions{ + level: o.Level, + blockSize: maxMatchLen, + windowSize: maxMatchLen, + dict: &d, + lowMem: false, + } + enc = eOpts.encoder() + } else { + o.Level = SpeedBestCompression + } + var ( + remain [256]int + ll [256]int + ml [256]int + of [256]int + ) + addValues := func(dst *[256]int, src []byte) { + for _, v := range src { + dst[v]++ + } + } + addHist := func(dst *[256]int, src *[256]uint32) { + for i, v := range src { + dst[i] += int(v) + } + } + seqs := 0 + nUsed := 0 + litTotal := 0 + newOffsets := make(map[uint32]int, 1000) + for _, b := range contents { + block.reset(nil) + if len(b) < 8 { + continue + } + nUsed++ + enc.Reset(&d, true) + enc.Encode(&block, b) + addValues(&remain, block.literals) + litTotal += len(block.literals) + seqs += len(block.sequences) + block.genCodes() + addHist(&ll, block.coders.llEnc.Histogram()) + addHist(&ml, block.coders.mlEnc.Histogram()) + addHist(&of, block.coders.ofEnc.Histogram()) + for i, seq := range block.sequences { + if i > 3 { + break + } + offset := seq.offset + if offset == 0 { + continue + } + if offset > 3 { + newOffsets[offset-3]++ + } else { + newOffsets[uint32(o.Offsets[offset-1])]++ + } + } + } + // Find most used offsets. + var sortedOffsets []uint32 + for k := range newOffsets { + sortedOffsets = append(sortedOffsets, k) + } + sort.Slice(sortedOffsets, func(i, j int) bool { + a, b := sortedOffsets[i], sortedOffsets[j] + if a == b { + // Prefer the longer offset + return sortedOffsets[i] > sortedOffsets[j] + } + return newOffsets[sortedOffsets[i]] > newOffsets[sortedOffsets[j]] + }) + if len(sortedOffsets) > 3 { + if debug { + print("Offsets:") + for i, v := range sortedOffsets { + if i > 20 { + break + } + printf("[%d: %d],", v, newOffsets[v]) + } + println("") + } + + sortedOffsets = sortedOffsets[:3] + } + for i, v := range sortedOffsets { + o.Offsets[i] = int(v) + } + if debug { + println("New repeat offsets", o.Offsets) + } + + if nUsed == 0 || seqs == 0 { + return nil, fmt.Errorf("%d blocks, %d sequences found", nUsed, seqs) + } + if debug { + println("Sequences:", seqs, "Blocks:", nUsed, "Literals:", litTotal) + } + if seqs/nUsed < 512 { + // Use 512 as minimum. + nUsed = seqs / 512 + } + copyHist := func(dst *fseEncoder, src *[256]int) ([]byte, error) { + hist := dst.Histogram() + var maxSym uint8 + var maxCount int + var fakeLength int + for i, v := range src { + if v > 0 { + v = v / nUsed + if v == 0 { + v = 1 + } + } + if v > maxCount { + maxCount = v + } + if v != 0 { + maxSym = uint8(i) + } + fakeLength += v + hist[i] = uint32(v) + } + dst.HistogramFinished(maxSym, maxCount) + dst.reUsed = false + dst.useRLE = false + err := dst.normalizeCount(fakeLength) + if err != nil { + return nil, err + } + if debug { + println("RAW:", dst.count[:maxSym+1], "NORM:", dst.norm[:maxSym+1], "LEN:", fakeLength) + } + return dst.writeCount(nil) + } + if debug { + print("Literal lengths: ") + } + llTable, err := copyHist(block.coders.llEnc, &ll) + if err != nil { + return nil, err + } + if debug { + print("Match lengths: ") + } + mlTable, err := copyHist(block.coders.mlEnc, &ml) + if err != nil { + return nil, err + } + if debug { + print("Offsets: ") + } + ofTable, err := copyHist(block.coders.ofEnc, &of) + if err != nil { + return nil, err + } + + // Literal table + avgSize := litTotal + if avgSize > huff0.BlockSizeMax/2 { + avgSize = huff0.BlockSizeMax / 2 + } + huffBuff := make([]byte, 0, avgSize) + // Target size + div := litTotal / avgSize + if div < 1 { + div = 1 + } + if debug { + println("Huffman weights:") + } + for i, n := range remain[:] { + if n > 0 { + n = n / div + // Allow all entries to be represented. + if n == 0 { + n = 1 + } + huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...) + if debug { + printf("[%d: %d], ", i, n) + } + } + } + if o.CompatV155 && remain[255]/div == 0 { + huffBuff = append(huffBuff, 255) + } + scratch := &huff0.Scratch{TableLog: 11} + for tries := 0; tries < 255; tries++ { + scratch = &huff0.Scratch{TableLog: 11} + _, _, err = huff0.Compress1X(huffBuff, scratch) + if err == nil { + break + } + if debug { + printf("Try %d: Huffman error: %v\n", tries+1, err) + } + huffBuff = huffBuff[:0] + if tries == 250 { + if debug { + println("Huffman: Bailing out with predefined table") + } + + // Bail out.... Just generate something + huffBuff = append(huffBuff, bytes.Repeat([]byte{255}, 10000)...) + for i := 0; i < 128; i++ { + huffBuff = append(huffBuff, byte(i)) + } + continue + } + if errors.Is(err, huff0.ErrIncompressible) { + // Try truncating least common. + for i, n := range remain[:] { + if n > 0 { + n = n / (div * (i + 1)) + if n > 0 { + huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...) + } + } + } + if o.CompatV155 && len(huffBuff) > 0 && huffBuff[len(huffBuff)-1] != 255 { + huffBuff = append(huffBuff, 255) + } + if len(huffBuff) == 0 { + huffBuff = append(huffBuff, 0, 255) + } + } + if errors.Is(err, huff0.ErrUseRLE) { + for i, n := range remain[:] { + n = n / (div * (i + 1)) + // Allow all entries to be represented. + if n == 0 { + n = 1 + } + huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...) + } + } + } + + var out bytes.Buffer + out.Write([]byte(dictMagic)) + out.Write(binary.LittleEndian.AppendUint32(nil, o.ID)) + out.Write(scratch.OutTable) + if debug { + println("huff table:", len(scratch.OutTable), "bytes") + println("of table:", len(ofTable), "bytes") + println("ml table:", len(mlTable), "bytes") + println("ll table:", len(llTable), "bytes") + } + out.Write(ofTable) + out.Write(mlTable) + out.Write(llTable) + out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[0]))) + out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[1]))) + out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[2]))) + out.Write(hist) + if debug { + _, err := loadDict(out.Bytes()) + if err != nil { + panic(err) + } + i, err := InspectDictionary(out.Bytes()) + if err != nil { + panic(err) + } + println("ID:", i.ID()) + println("Content size:", i.ContentSize()) + println("Encoder:", i.LitEncoder() != nil) + println("Offsets:", i.Offsets()) + var totalSize int + for _, b := range contents { + totalSize += len(b) + } + + encWith := func(opts ...EOption) int { + enc, err := NewWriter(nil, opts...) + if err != nil { + panic(err) + } + defer enc.Close() + var dst []byte + var totalSize int + for _, b := range contents { + dst = enc.EncodeAll(b, dst[:0]) + totalSize += len(dst) + } + return totalSize + } + plain := encWith(WithEncoderLevel(o.Level)) + withDict := encWith(WithEncoderLevel(o.Level), WithEncoderDict(out.Bytes())) + println("Input size:", totalSize) + println("Plain Compressed:", plain) + println("Dict Compressed:", withDict) + println("Saved:", plain-withDict, (plain-withDict)/len(contents), "bytes per input (rounded down)") + } + return out.Bytes(), nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go index 9819d41453..858f8f43a5 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_best.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go @@ -197,12 +197,13 @@ encodeLoop: // Set m to a match at offset if it looks like that will improve compression. improve := func(m *match, offset int32, s int32, first uint32, rep int32) { - if s-offset >= e.maxMatchOff || load3232(src, offset) != first { + delta := s - offset + if delta >= e.maxMatchOff || delta <= 0 || load3232(src, offset) != first { return } if debugAsserts { - if offset <= 0 { - panic(offset) + if offset >= s { + panic(fmt.Sprintf("offset: %d - s:%d - rep: %d - cur :%d - max: %d", offset, s, rep, e.cur, e.maxMatchOff)) } if !bytes.Equal(src[s:s+4], src[offset:offset+4]) { panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first)) @@ -343,8 +344,8 @@ encodeLoop: if best.rep > 0 { var seq seq seq.matchLen = uint32(best.length - zstdMinMatch) - if debugAsserts && s <= nextEmit { - panic("s <= nextEmit") + if debugAsserts && s < nextEmit { + panic("s < nextEmit") } addLiterals(&seq, best.s) diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index 4de0aed0d0..72af7ef0fe 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -227,10 +227,7 @@ func (e *Encoder) nextBlock(final bool) error { DictID: e.o.dict.ID(), } - dst, err := fh.appendTo(tmp[:0]) - if err != nil { - return err - } + dst := fh.appendTo(tmp[:0]) s.headerWritten = true s.wWg.Wait() var n2 int @@ -483,7 +480,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { Checksum: false, DictID: 0, } - dst, _ = fh.appendTo(dst) + dst = fh.appendTo(dst) // Write raw block as last one only. var blk blockHeader @@ -518,10 +515,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem { dst = make([]byte, 0, len(src)) } - dst, err := fh.appendTo(dst) - if err != nil { - panic(err) - } + dst = fh.appendTo(dst) // If we can do everything in one block, prefer that. if len(src) <= e.o.blockSize { @@ -581,6 +575,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { // Add padding with content from crypto/rand.Reader if e.o.pad > 0 { add := calcSkippableFrame(int64(len(dst)), int64(e.o.pad)) + var err error dst, err = skippableFrame(dst, add, rand.Reader) if err != nil { panic(err) diff --git a/vendor/github.com/klauspost/compress/zstd/frameenc.go b/vendor/github.com/klauspost/compress/zstd/frameenc.go index 4ef7f5a3e3..2f5d5ed454 100644 --- a/vendor/github.com/klauspost/compress/zstd/frameenc.go +++ b/vendor/github.com/klauspost/compress/zstd/frameenc.go @@ -22,7 +22,7 @@ type frameHeader struct { const maxHeaderSize = 14 -func (f frameHeader) appendTo(dst []byte) ([]byte, error) { +func (f frameHeader) appendTo(dst []byte) []byte { dst = append(dst, frameMagic...) var fhd uint8 if f.Checksum { @@ -88,7 +88,7 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) { default: panic("invalid fcs") } - return dst, nil + return dst } const skippableFrameHeader = 4 + 4 diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go index 9405fcf101..d7fe6d82d9 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go @@ -245,7 +245,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error { return io.ErrUnexpectedEOF } var ll, mo, ml int - if br.off > 4+((maxOffsetBits+16+16)>>3) { + if len(br.in) > 4+((maxOffsetBits+16+16)>>3) { // inlined function: // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) @@ -452,18 +452,13 @@ func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) // extra bits are stored in reverse order. br.fill() - if s.maxBits <= 32 { - mo += br.getBits(moB) - ml += br.getBits(mlB) - ll += br.getBits(llB) - } else { - mo += br.getBits(moB) + mo += br.getBits(moB) + if s.maxBits > 32 { br.fill() - // matchlength+literal length, max 32 bits - ml += br.getBits(mlB) - ll += br.getBits(llB) - } + // matchlength+literal length, max 32 bits + ml += br.getBits(mlB) + ll += br.getBits(llB) mo = s.adjustOffset(mo, ll, moB) return } diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s index b6f4ba6fc5..974b99725f 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s @@ -5,11 +5,11 @@ // func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: CMOV TEXT ·sequenceDecs_decode_amd64(SB), $8-32 - MOVQ br+8(FP), AX - MOVQ 32(AX), DX - MOVBQZX 40(AX), BX - MOVQ 24(AX), SI - MOVQ (AX), AX + MOVQ br+8(FP), CX + MOVQ 24(CX), DX + MOVBQZX 32(CX), BX + MOVQ (CX), AX + MOVQ 8(CX), SI ADDQ SI, AX MOVQ AX, (SP) MOVQ ctx+16(FP), AX @@ -301,9 +301,9 @@ sequenceDecs_decode_amd64_match_len_ofs_ok: MOVQ R12, 152(AX) MOVQ R13, 160(AX) MOVQ br+8(FP), AX - MOVQ DX, 32(AX) - MOVB BL, 40(AX) - MOVQ SI, 24(AX) + MOVQ DX, 24(AX) + MOVB BL, 32(AX) + MOVQ SI, 8(AX) // Return success MOVQ $0x00000000, ret+24(FP) @@ -336,11 +336,11 @@ error_overread: // func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: CMOV TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32 - MOVQ br+8(FP), AX - MOVQ 32(AX), DX - MOVBQZX 40(AX), BX - MOVQ 24(AX), SI - MOVQ (AX), AX + MOVQ br+8(FP), CX + MOVQ 24(CX), DX + MOVBQZX 32(CX), BX + MOVQ (CX), AX + MOVQ 8(CX), SI ADDQ SI, AX MOVQ AX, (SP) MOVQ ctx+16(FP), AX @@ -603,9 +603,9 @@ sequenceDecs_decode_56_amd64_match_len_ofs_ok: MOVQ R12, 152(AX) MOVQ R13, 160(AX) MOVQ br+8(FP), AX - MOVQ DX, 32(AX) - MOVB BL, 40(AX) - MOVQ SI, 24(AX) + MOVQ DX, 24(AX) + MOVB BL, 32(AX) + MOVQ SI, 8(AX) // Return success MOVQ $0x00000000, ret+24(FP) @@ -638,11 +638,11 @@ error_overread: // func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: BMI, BMI2, CMOV TEXT ·sequenceDecs_decode_bmi2(SB), $8-32 - MOVQ br+8(FP), CX - MOVQ 32(CX), AX - MOVBQZX 40(CX), DX - MOVQ 24(CX), BX - MOVQ (CX), CX + MOVQ br+8(FP), BX + MOVQ 24(BX), AX + MOVBQZX 32(BX), DX + MOVQ (BX), CX + MOVQ 8(BX), BX ADDQ BX, CX MOVQ CX, (SP) MOVQ ctx+16(FP), CX @@ -892,9 +892,9 @@ sequenceDecs_decode_bmi2_match_len_ofs_ok: MOVQ R11, 152(CX) MOVQ R12, 160(CX) MOVQ br+8(FP), CX - MOVQ AX, 32(CX) - MOVB DL, 40(CX) - MOVQ BX, 24(CX) + MOVQ AX, 24(CX) + MOVB DL, 32(CX) + MOVQ BX, 8(CX) // Return success MOVQ $0x00000000, ret+24(FP) @@ -927,11 +927,11 @@ error_overread: // func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: BMI, BMI2, CMOV TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32 - MOVQ br+8(FP), CX - MOVQ 32(CX), AX - MOVBQZX 40(CX), DX - MOVQ 24(CX), BX - MOVQ (CX), CX + MOVQ br+8(FP), BX + MOVQ 24(BX), AX + MOVBQZX 32(BX), DX + MOVQ (BX), CX + MOVQ 8(BX), BX ADDQ BX, CX MOVQ CX, (SP) MOVQ ctx+16(FP), CX @@ -1152,9 +1152,9 @@ sequenceDecs_decode_56_bmi2_match_len_ofs_ok: MOVQ R11, 152(CX) MOVQ R12, 160(CX) MOVQ br+8(FP), CX - MOVQ AX, 32(CX) - MOVB DL, 40(CX) - MOVQ BX, 24(CX) + MOVQ AX, 24(CX) + MOVB DL, 32(CX) + MOVQ BX, 8(CX) // Return success MOVQ $0x00000000, ret+24(FP) @@ -1797,11 +1797,11 @@ empty_seqs: // func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // Requires: CMOV, SSE TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32 - MOVQ br+8(FP), AX - MOVQ 32(AX), DX - MOVBQZX 40(AX), BX - MOVQ 24(AX), SI - MOVQ (AX), AX + MOVQ br+8(FP), CX + MOVQ 24(CX), DX + MOVBQZX 32(CX), BX + MOVQ (CX), AX + MOVQ 8(CX), SI ADDQ SI, AX MOVQ AX, (SP) MOVQ ctx+16(FP), AX @@ -2295,9 +2295,9 @@ handle_loop: loop_finished: MOVQ br+8(FP), AX - MOVQ DX, 32(AX) - MOVB BL, 40(AX) - MOVQ SI, 24(AX) + MOVQ DX, 24(AX) + MOVB BL, 32(AX) + MOVQ SI, 8(AX) // Update the context MOVQ ctx+16(FP), AX @@ -2362,11 +2362,11 @@ error_not_enough_space: // func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // Requires: BMI, BMI2, CMOV, SSE TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32 - MOVQ br+8(FP), CX - MOVQ 32(CX), AX - MOVBQZX 40(CX), DX - MOVQ 24(CX), BX - MOVQ (CX), CX + MOVQ br+8(FP), BX + MOVQ 24(BX), AX + MOVBQZX 32(BX), DX + MOVQ (BX), CX + MOVQ 8(BX), BX ADDQ BX, CX MOVQ CX, (SP) MOVQ ctx+16(FP), CX @@ -2818,9 +2818,9 @@ handle_loop: loop_finished: MOVQ br+8(FP), CX - MOVQ AX, 32(CX) - MOVB DL, 40(CX) - MOVQ BX, 24(CX) + MOVQ AX, 24(CX) + MOVB DL, 32(CX) + MOVQ BX, 8(CX) // Update the context MOVQ ctx+16(FP), AX @@ -2885,11 +2885,11 @@ error_not_enough_space: // func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // Requires: CMOV, SSE TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32 - MOVQ br+8(FP), AX - MOVQ 32(AX), DX - MOVBQZX 40(AX), BX - MOVQ 24(AX), SI - MOVQ (AX), AX + MOVQ br+8(FP), CX + MOVQ 24(CX), DX + MOVBQZX 32(CX), BX + MOVQ (CX), AX + MOVQ 8(CX), SI ADDQ SI, AX MOVQ AX, (SP) MOVQ ctx+16(FP), AX @@ -3485,9 +3485,9 @@ handle_loop: loop_finished: MOVQ br+8(FP), AX - MOVQ DX, 32(AX) - MOVB BL, 40(AX) - MOVQ SI, 24(AX) + MOVQ DX, 24(AX) + MOVB BL, 32(AX) + MOVQ SI, 8(AX) // Update the context MOVQ ctx+16(FP), AX @@ -3552,11 +3552,11 @@ error_not_enough_space: // func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // Requires: BMI, BMI2, CMOV, SSE TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32 - MOVQ br+8(FP), CX - MOVQ 32(CX), AX - MOVBQZX 40(CX), DX - MOVQ 24(CX), BX - MOVQ (CX), CX + MOVQ br+8(FP), BX + MOVQ 24(BX), AX + MOVBQZX 32(BX), DX + MOVQ (BX), CX + MOVQ 8(BX), BX ADDQ BX, CX MOVQ CX, (SP) MOVQ ctx+16(FP), CX @@ -4110,9 +4110,9 @@ handle_loop: loop_finished: MOVQ br+8(FP), CX - MOVQ AX, 32(CX) - MOVB DL, 40(CX) - MOVQ BX, 24(CX) + MOVQ AX, 24(CX) + MOVB DL, 32(CX) + MOVQ BX, 8(CX) // Update the context MOVQ ctx+16(FP), AX diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go index ac2a80d291..2fb35b788c 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go @@ -29,7 +29,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error { } for i := range seqs { var ll, mo, ml int - if br.off > 4+((maxOffsetBits+16+16)>>3) { + if len(br.in) > 4+((maxOffsetBits+16+16)>>3) { // inlined function: // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go index 9e1baad73b..ec13594e89 100644 --- a/vendor/github.com/klauspost/compress/zstd/snappy.go +++ b/vendor/github.com/klauspost/compress/zstd/snappy.go @@ -95,10 +95,9 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) { var written int64 var readHeader bool { - var header []byte - var n int - header, r.err = frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0]) + header := frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0]) + var n int n, r.err = w.Write(header) if r.err != nil { return written, r.err diff --git a/vendor/modules.txt b/vendor/modules.txt index 243ac06c39..eefb2284a6 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -567,7 +567,7 @@ github.com/json-iterator/go # github.com/julienschmidt/httprouter v1.3.0 ## explicit; go 1.7 github.com/julienschmidt/httprouter -# github.com/klauspost/compress v1.16.7 +# github.com/klauspost/compress v1.17.2 ## explicit; go 1.18 github.com/klauspost/compress github.com/klauspost/compress/flate